Python Parsers
Randall Hopper
aa8vb at yahoo.com
Thu Apr 20 20:36:31 EDT 2000
Mike Fletcher:
|Here's part of it :)
|
|from simpleparse import generator
|from TextTools import TextTools
Kudos to you an Mark L on an excellent tool! I had it parsing and
reformatting my syntax in no time.
I think this would make a great addition to the core distribution,
...particular after having seen the wealth of "regexes won't work for
recursive grammar" posts in the archives.
Also using this package is "easy" (thanks to your and Mark's efforts) and
the syntax grammar is very readable -- more so than re expressions IMO,
even with the verbose option on.
This is a good tool to have up your sleeve. ;-)
Thanks again,
Randall
(For the Dejanews and the list archive, I've attached my short parser. The
grammer is briefly described in the class docstring, and the output of the
test stub is listed below:
==============================================================================
RAW STRING TO PARSE PARSED PYTHON
------------------- -----------------------------------
1 :: [1]
2.3 :: [2.2999999999999998]
3.4e20 :: [3.4e+20]
56 78 :: [56, 78]
"string" :: ['string']
identifier :: ['@@ identifier @@']
[] :: [()]
[1] :: [(1,)]
[ 1.0 2.0 ] :: [(1.0, 2.0)]
[[1,2,3],[3,4,5 ] ] :: [((1, 2, 3), (3, 4, 5))]
{} :: [[]]
{{ "str" }} :: [[['str']]]
{ 1, [2,3], "str" } :: [[1, (2, 3), 'str']]
{ ident1, ident2 } :: [['@@ ident1 @@', '@@ ident2 @@']]
{[]{}} :: [[(), []]]
{ [ 1 2 ][ 4 5 ] } :: [[(1, 2), (4, 5)]]
==============================================================================
--
Randall Hopper
aa8vb at yahoo.com
-------------- next part --------------
#!/usr/bin/env python
#
# grammar_parse.py - Parse a recursive grammar into basic Python data types.
#
from simpleparse import generator
from TextTools import TextTools
import types, string
class Parser:
"""
GRAMMAR CONVERTED TO PYTHON AS
------------------ ----------------------
{ ... } is a list - List
( ... ) is a vector - Tuple (vector, matrix, tensor, etc.)
"..." is a string - String ("string")
ident is an ID str - String ("@@ ident @@")
1 is an integer - Integer
2.3 is a float - Float
"""
PARSERDECLARATION = r'''
root := ts, element*
element := (('"',string,'"')/vector/list/number/identifier), ts
list := '{', ts, element*, '}'
vector := '[', ts, vect_element*, ']'
vect_element := (vector/number), ts
number := [-+]*, ( ('0',[xX],[0-9]+) / ([0-9.]+,([eE],[-+0-9.]+)?))
string := (char/escapedchar)*
char := -[\134"]+
escapedchar := '\134"' / '\134\134'
identifier := [a-zA-Z_@], [a-zA-Z0-9_'@]*
<ts> := ( [ \011-\015,]+ / ('#',-'\012'*,'\n')+ )*
'''
#'" #<-- Keep python.el mode happy with quoting :-(
PARSERTABLE = generator.buildParser( PARSERDECLARATION )
ROOTITEMPARSER = PARSERTABLE.parserbyname( "root" )
def __init__( self, string ):
self.data = string
def Parse( self, parseonly = 0 ):
success, tags, next = TextTools.tag( self.data, Parser.ROOTITEMPARSER )
if not success:
raise ValueError, "TextTools.tag failed"
if next != len( self.data ):
raise ValueError, "Failed to parse entire expression"
#import pprint
#pprint.pprint( tags )
if parseonly: return tags
else: return self.ConvertToPython( tags )
def ConvertToPython( self, tags ):
return map( self._dispatch, tags )
def _dispatch( self, (tag, start, stop, children) ):
return getattr( self, tag)( (tag, start, stop, children) )
# tag handlers
def element( self, (tag, start, stop, (child,) ) ):
'''Always has a single child: either string, vector, list
number, or identifier '''
val = self._dispatch( child )
return val
def vect_element( self, (tag, start, stop, (child,)) ):
'''Always has a single child: either vector or number'''
return self._dispatch( child )
def vector( self, (tag, start, stop, elements) ):
'''Some arbitrary number of elements, return as list'''
val = map( self._dispatch, elements )
if type(val) == types.ListType:
return tuple(val)
else:
return val
def list( self, (tag, start, stop, elements) ):
'''Some arbitrary values returned as list'''
return map( self._dispatch, elements )
def string( self, (tag, start, stop, children) ):
'''A quoted string'''
return self.data[ start:stop ]
def identifier( self, (tag, start, stop, children) ):
'''An identifier'''
return "@@ %s @@" % self.data[ start:stop ]
def number( self, (tag, start, stop, children) ):
'''Could be either int or float, check first, then second'''
try:
return string.atoi( self.data[ start:stop ], 0 )
except ValueError:
return string.atof( self.data[ start:stop ] )
if __name__ == "__main__":
test_expr = (
'1', ' 2.3', '3.4e20 ', '56 78', '"string"', ' identifier',
'[]', '[1]', ' [ 1.0 2.0 ] ', '[[1,2,3],[3,4,5 ] ]',
'{}', ' {{ "str" }}', '{ 1, [2,3], "str" } ', '{ ident1, ident2 }',
'{[]{}}', '{ [ 1 2 ][ 4 5 ] }'
)
for expr in test_expr:
#print Parser( expr ).Parse( parseonly = 1 )
print '%-20s :: %s' % ( expr, repr( Parser( expr ).Parse() ) )
More information about the Python-list
mailing list