Python Parsers Comparison
Mike Fletcher
mfletch at tpresence.com
Thu Apr 20 10:13:34 EDT 2000
Here's part of it :)
Note: for heavy-lifting applications you'd likely want to define a custom
mxTextTools tuple tree to just appending the interpreted numbers to the
parse tree (instead of the 4-tuples) (likely also want to make the grammar
distinguish between hex, float and int).
from simpleparse import generator
from TextTools import TextTools
import string
class Parser:
def __init__( self, string ):
self.data = string
self.position = 0
def parse( self, parseonly = 0 ):
success, tags, next = TextTools.tag( self.data,
ROOTITEMPARSER)
if parseonly:
return success, tags, next
## import pprint
## pprint.pprint( tags )
return success, map( self._dispatch, tags ), next
def _dispatch( self, (tag, start, stop, children)):
return getattr( self, tag)( (tag, start, stop, children) )
# tag handlers
def element( self, (tag, start, stop, (child,) )):
'''Always has a single child, either vector or number'''
return self._dispatch( child )
def vector( self, (tag, start, stop, elements)):
'''Some arbitrary number of elements, return as list'''
return map( self._dispatch, elements )
def number( self, (tag, start, stop, children)):
'''Could be either int or float, check first, then second'''
try:
return string.atoi( self.data[start:stop], 0 )
except ValueError:
return string.atof( self.data[start:stop] )
PARSERDECLARATION = r'''root := ts, element*
vector := '[', ts, element*, ']'
element := (vector/number), ts
number := [-+]*, ( ('0',[xX],[0-9]+) / ([0-9.]+,([eE],[-+0-9.]+)?))
<ts> := ( [ \011-\015,]+ / ('#',-'\012'*,'\n')+ )*
'''
PARSERTABLE = generator.buildParser( PARSERDECLARATION )
ROOTITEMPARSER = PARSERTABLE.parserbyname( "root" )
if __name__ == "__main__":
for testString in [
'''2''',
'''[2]''',
'''[ 2 ]''',
'''[]''',
'''[ 2 ]''',
'''[ []]''',
'''[ 1,2 ]''',
'''[ [ 1,2 ],[ 3,4 ] ], [ 5.6 ]''',
]:
print Parser( testString).parse()
speedtest = '''[ [ 1,2 ],[ 3,4 ] ], [ 5.6 ]'''* 5000
import time
print 'starting speed test, string length', len(speedtest)
t = time.time()
elements = len( Parser( speedtest).parse()[1])
print elements, "elements parsed in", time.time()-t, "seconds"
print 'starting parse-only speed test'
t = time.time()
elements = len( Parser( speedtest).parse(parseonly=1)[1])
print elements, "elements parsed in", time.time()-t, "seconds"
-----Original Message-----
From: Randall Hopper [mailto:aa8vb at yahoo.com]
Sent: Thursday, April 20, 2000 9:40 AM
To: python-list at python.org
Subject: Python Parsers Comparison
I'm again faced with the "regexes not powerful enough for nested grammar"
problem, and need to chose an extension.
Does someone know of a web page or paper which compares the various Python
parser engines on a simple example grammar (or on features)?
For example, a comma-separated list of nested vectors would be a good
example:
[ 1, 2 ], [ 3, 4 ]
[ [ 1,2 ],[ 3,4 ] ], [ 5.6 ]
etc.
mxTextTools, metalanguage, SimpleParse, SPARK, YAPPS, PyLR, kwParsing,
PyBison. A comparison of any subset would be useful.
Thanks,
Randall
--
Randall Hopper
aa8vb at yahoo.com
--
http://www.python.org/mailman/listinfo/python-list
More information about the Python-list
mailing list