Python Parsers

Randall Hopper aa8vb at yahoo.com
Thu Apr 20 20:36:31 EDT 2000


Mike Fletcher:
 |Here's part of it :)
 |
 |from simpleparse import generator
 |from TextTools import TextTools

Kudos to you an Mark L on an excellent tool!  I had it parsing and
reformatting my syntax in no time.

I think this would make a great addition to the core distribution,
...particular after having seen the wealth of "regexes won't work for
recursive grammar" posts in the archives.

Also using this package is "easy" (thanks to your and Mark's efforts) and
the syntax grammar is very readable -- more so than re expressions IMO,
even with the verbose option on.

This is a good tool to have up your sleeve. ;-)

Thanks again,

Randall


(For the Dejanews and the list archive, I've attached my short parser.  The
grammer is briefly described in the class docstring, and the output of the
test stub is listed below:

==============================================================================

RAW STRING TO PARSE       PARSED PYTHON
-------------------       -----------------------------------

1                     ::  [1]
 2.3                  ::  [2.2999999999999998]
3.4e20                ::  [3.4e+20]
56 78                 ::  [56, 78]
"string"              ::  ['string']
 identifier           ::  ['@@ identifier @@']
[]                    ::  [()]
[1]                   ::  [(1,)]
 [ 1.0 2.0 ]          ::  [(1.0, 2.0)]
[[1,2,3],[3,4,5 ] ]   ::  [((1, 2, 3), (3, 4, 5))]
{}                    ::  [[]]
 {{ "str" }}          ::  [[['str']]]
{ 1, [2,3], "str" }   ::  [[1, (2, 3), 'str']]
{ ident1, ident2 }    ::  [['@@ ident1 @@', '@@ ident2 @@']]
{[]{}}                ::  [[(), []]]
{ [ 1 2 ][ 4 5 ] }    ::  [[(1, 2), (4, 5)]]

==============================================================================


-- 
Randall Hopper
aa8vb at yahoo.com
-------------- next part --------------
#!/usr/bin/env python
#
#  grammar_parse.py  -  Parse a recursive grammar into basic Python data types.
#

from simpleparse import generator
from TextTools import TextTools
import types, string

class Parser:
  """
    GRAMMAR                 CONVERTED TO PYTHON AS
    ------------------      ----------------------
    { ... } is a list     - List
    ( ... ) is a vector   - Tuple    (vector, matrix, tensor, etc.)
    "..."   is a string   - String   ("string")
    ident   is an ID str  - String   ("@@ ident @@")
    1       is an integer - Integer
    2.3     is a float    - Float
  """

  PARSERDECLARATION = r'''
  root           := ts, element*
  element        := (('"',string,'"')/vector/list/number/identifier), ts
  list           := '{', ts, element*, '}'
  vector         := '[', ts, vect_element*, ']'
  vect_element   := (vector/number), ts
  number         := [-+]*, ( ('0',[xX],[0-9]+) / ([0-9.]+,([eE],[-+0-9.]+)?))
  string         :=  (char/escapedchar)*
  char           :=  -[\134"]+
  escapedchar    :=  '\134"' / '\134\134'
  identifier     :=  [a-zA-Z_@], [a-zA-Z0-9_'@]*
  <ts>           :=  ( [ \011-\015,]+ / ('#',-'\012'*,'\n')+ )*
  '''

  #'"    #<-- Keep python.el mode happy with quoting :-(

  PARSERTABLE = generator.buildParser( PARSERDECLARATION )
  ROOTITEMPARSER = PARSERTABLE.parserbyname( "root" )
  
  def __init__( self, string ):
    self.data = string

  def Parse( self, parseonly = 0 ):
    success, tags, next = TextTools.tag( self.data, Parser.ROOTITEMPARSER )
    if not success:
      raise ValueError, "TextTools.tag failed"
    if next != len( self.data ):
      raise ValueError, "Failed to parse entire expression"
    #import pprint
    #pprint.pprint( tags )
    if parseonly: return tags
    else:         return self.ConvertToPython( tags )

  def ConvertToPython( self, tags ):
    return map( self._dispatch, tags )

  def _dispatch( self, (tag, start, stop, children) ):
    return getattr( self, tag)( (tag, start, stop, children) )

  # tag handlers
  def element( self, (tag, start, stop, (child,) ) ):
    '''Always has a single child: either string, vector, list
       number, or identifier '''
    val = self._dispatch( child )
    return val
  def vect_element( self, (tag, start, stop, (child,)) ):
    '''Always has a single child: either vector or number'''
    return self._dispatch( child )
  def vector( self, (tag, start, stop, elements) ):
    '''Some arbitrary number of elements, return as list'''
    val = map( self._dispatch, elements )
    if type(val) == types.ListType: 
      return tuple(val)
    else:
      return val
  def list( self, (tag, start, stop, elements) ):
    '''Some arbitrary values returned as list'''
    return  map( self._dispatch, elements )
    
  def string( self, (tag, start, stop, children) ):
    '''A quoted string'''
    return self.data[ start:stop ]

  def identifier( self, (tag, start, stop, children) ):
    '''An identifier'''
    return "@@ %s @@" % self.data[ start:stop ]

  def number( self, (tag, start, stop, children) ):
    '''Could be either int or float, check first, then second'''
    try:
      return string.atoi( self.data[ start:stop ], 0 )
    except ValueError:
      return string.atof( self.data[ start:stop ] )

if __name__ == "__main__":

  test_expr = (
    '1', ' 2.3', '3.4e20 ', '56 78', '"string"', ' identifier',
    '[]', '[1]', ' [ 1.0 2.0 ] ', '[[1,2,3],[3,4,5 ] ]',
    '{}', ' {{ "str" }}', '{ 1, [2,3], "str" } ', '{ ident1, ident2 }',
    '{[]{}}', '{ [ 1 2 ][ 4 5 ] }'
    )

  for expr in test_expr:
    #print Parser( expr ).Parse( parseonly = 1 )
    print '%-20s  ::  %s' % ( expr, repr( Parser( expr ).Parse() ) )




More information about the Python-list mailing list