Python Parsers Comparison

Thu Apr 20 10:13:34 EDT 2000

Here's part of it :)

Note: for heavy-lifting applications you'd likely want to define a custom
mxTextTools tuple tree to just appending the interpreted numbers to the
parse tree (instead of the 4-tuples) (likely also want to make the grammar
distinguish between hex, float and int).

from simpleparse import generator
from TextTools import TextTools
import string
class Parser:
	def __init__( self, string ):
		self.data = string
		self.position = 0
	def parse( self, parseonly = 0 ):
		success, tags, next = TextTools.tag( self.data,
ROOTITEMPARSER)
		if parseonly:
			return success, tags, next
##		import pprint
##		pprint.pprint( tags )
		return success, map( self._dispatch, tags ), next
	def _dispatch( self, (tag, start, stop, children)):
		return getattr( self, tag)( (tag, start, stop, children) )

	# tag handlers
	def element( self, (tag, start, stop, (child,) )):
		'''Always has a single child, either vector or number'''
		return self._dispatch( child )
	def vector( self, (tag, start, stop, elements)):
		'''Some arbitrary number of elements, return as list'''
		return 	map( self._dispatch, elements )
	def number( self, (tag, start, stop, children)):
		'''Could be either int or float, check first, then second'''
		try:
			return string.atoi( self.data[start:stop], 0 )
		except ValueError:
			return string.atof( self.data[start:stop] )

PARSERDECLARATION = r'''root           := ts, element*
vector         := '[', ts, element*, ']'
element        := (vector/number), ts

number         := [-+]*, ( ('0',[xX],[0-9]+) / ([0-9.]+,([eE],[-+0-9.]+)?))
<ts>           :=  ( [ \011-\015,]+ / ('#',-'\012'*,'\n')+ )*
'''

PARSERTABLE = generator.buildParser( PARSERDECLARATION )
ROOTITEMPARSER = PARSERTABLE.parserbyname( "root" )
if __name__ == "__main__":
	for testString in [
		'''2''',
		'''[2]''',
		'''[ 2 ]''',
		'''[]''',
		'''[ 2 ]''',
		'''[ []]''',
		'''[ 1,2 ]''',
		'''[ [ 1,2 ],[ 3,4 ] ], [ 5.6 ]''',
		]:
		print Parser( testString).parse()
	speedtest = '''[ [ 1,2 ],[ 3,4 ] ], [ 5.6 ]'''* 5000
	import time
	print 'starting speed test, string length', len(speedtest)
	t = time.time()
	elements = len( Parser( speedtest).parse()[1])
	print elements, "elements parsed in", time.time()-t, "seconds"
	print 'starting parse-only speed test'
	t = time.time()
	elements = len( Parser( speedtest).parse(parseonly=1)[1])
	print elements, "elements parsed in", time.time()-t, "seconds"

-----Original Message-----
From: Randall Hopper [mailto:aa8vb at yahoo.com]
Sent: Thursday, April 20, 2000 9:40 AM
To: python-list at python.org
Subject: Python Parsers Comparison

I'm again faced with the "regexes not powerful enough for nested grammar"
problem, and need to chose an extension.

Does someone know of a web page or paper which compares the various Python
parser engines on a simple example grammar (or on features)?

For example, a comma-separated list of nested vectors would be a good
example:

    [ 1, 2 ], [ 3, 4 ]
    [ [ 1,2 ],[ 3,4 ] ], [ 5.6 ]
    etc.

mxTextTools, metalanguage, SimpleParse, SPARK, YAPPS, PyLR, kwParsing,
PyBison.  A comparison of any subset would be useful.

Thanks,

Randall

-- 
Randall Hopper
aa8vb at yahoo.com

-- 
http://www.python.org/mailman/listinfo/python-list