[pypy-svn] r12837 - in pypy/branch/pycompiler/module/recparser: . test

Fri May 27 14:38:20 CEST 2005

Author: ludal
Date: Fri May 27 14:38:20 2005
New Revision: 12837

Added:
   pypy/branch/pycompiler/module/recparser/astbuilder.py
Modified:
   pypy/branch/pycompiler/module/recparser/__init__.py
   pypy/branch/pycompiler/module/recparser/ebnfparse.py
   pypy/branch/pycompiler/module/recparser/grammar.py
   pypy/branch/pycompiler/module/recparser/pythonlexer.py
   pypy/branch/pycompiler/module/recparser/test/test_samples.py
   pypy/branch/pycompiler/module/recparser/test/unittest_samples.py
Log:
 * grammar should now be LL(1) ie use less backtracking than before


Modified: pypy/branch/pycompiler/module/recparser/__init__.py
==============================================================================

--- pypy/branch/pycompiler/module/recparser/__init__.py	(original)
+++ pypy/branch/pycompiler/module/recparser/__init__.py	Fri May 27 14:38:20 2005
@@ -1,42 +1,44 @@
 from pypy.interpreter.error import OperationError, debug_print
 from pypy.interpreter import module
-from pypy.interpreter.mixedmodule import MixedModule 
+
+
 
 
 import pythonparse
 debug_print( "Loading grammar %s" % pythonparse.PYTHON_GRAMMAR )
 
-class Module(MixedModule):
-    """The builtin parser module. 
-    """ 
-
-
-    appleveldefs = {
-    #    'ParserError'  : 'app_class.ParserError', 
-    }
-    interpleveldefs = {
-        '__name__'     : '(space.wrap("parser"))', 
-        '__doc__'      : '(space.wrap("parser (recparser version) module"))', 
-
-        'suite'        : 'pyparser.suite',
-        'expr'         : 'pyparser.expr',
-        'STType'       : 'pyparser.STType', 
-        'ast2tuple'    : 'pyparser.ast2tuple',
-#        'ASTType'      : 'pyparser.STType', 
-        # 'sequence2st'  : 'pyparser.sequence2st',
-        #'eval_input'   : 'pyparser.eval_input', 
-        #'file_input'   : 'pyparser.file_input', 
-        #'compileast'   : 'pyparser.compileast',
-        #'st2tuple'     : 'pyparser.st2tuple',
-        #'st2list'      : 'pyparser.st2list',
-        #'issuite'      : 'pyparser.issuite',
-        #'ast2tuple'    : 'pyparser.ast2tuple',
-        #'tuple2st'     : 'pyparser.tuple2st',
-        #'isexpr'       : 'pyparser.isexpr',
-        #'ast2list'     : 'pyparser.ast2list',
-        #'sequence2ast' : 'pyparser.sequence2ast',
-        #'tuple2ast'    : 'pyparser.tuple2ast',
-        #'_pickler'     : 'pyparser._pickler',
-        #'compilest'    : 'pyparser.compilest',
-    }
+## from pypy.interpreter.mixedmodule import MixedModule 
+## class Module(MixedModule):
+##     """The builtin parser module. 
+##     """ 
+
+
+##     appleveldefs = {
+##     #    'ParserError'  : 'app_class.ParserError', 
+##     }
+##     interpleveldefs = {
+##         '__name__'     : '(space.wrap("parser"))', 
+##         '__doc__'      : '(space.wrap("parser (recparser version) module"))', 
+
+##         'suite'        : 'pyparser.suite',
+##         'expr'         : 'pyparser.expr',
+##         'STType'       : 'pyparser.STType', 
+##         'ast2tuple'    : 'pyparser.ast2tuple',
+## #        'ASTType'      : 'pyparser.STType', 
+##         # 'sequence2st'  : 'pyparser.sequence2st',
+##         #'eval_input'   : 'pyparser.eval_input', 
+##         #'file_input'   : 'pyparser.file_input', 
+##         #'compileast'   : 'pyparser.compileast',
+##         #'st2tuple'     : 'pyparser.st2tuple',
+##         #'st2list'      : 'pyparser.st2list',
+##         #'issuite'      : 'pyparser.issuite',
+##         #'ast2tuple'    : 'pyparser.ast2tuple',
+##         #'tuple2st'     : 'pyparser.tuple2st',
+##         #'isexpr'       : 'pyparser.isexpr',
+##         #'ast2list'     : 'pyparser.ast2list',
+##         #'sequence2ast' : 'pyparser.sequence2ast',
+##         #'tuple2ast'    : 'pyparser.tuple2ast',
+##         #'_pickler'     : 'pyparser._pickler',
+##         #'compilest'    : 'pyparser.compilest',
+##     }
 

Added: pypy/branch/pycompiler/module/recparser/astbuilder.py
==============================================================================
--- (empty file)
+++ pypy/branch/pycompiler/module/recparser/astbuilder.py	Fri May 27 14:38:20 2005
@@ -0,0 +1,48 @@
+
+
+from grammar import BaseGrammarBuilder
+from compiler.ast import nodes, TokenNode
+from compiler.astfactory import factory_functions, group_factory, syntaxnode_factory
+
+class AstBuilder(BaseGrammarBuilder):
+    """A builder that directly produce the AST"""
+
+    def __init__( self, rules=None, debug=0 ):
+        BaseGrammarBuilder.__init__(self, rules, debug )
+
+    def top(self, n=1):
+        toplist = []
+        for node in self.stack[-n:]:
+            toplist += node.expand()
+        return toplist
+
+    def alternative( self, rule, source ):
+        # Do nothing, keep rule on top of the stack
+        if rule.is_root():
+            ast_factory = factory_functions.get( rule.name, syntaxnode_factory )
+            elems = self.top()
+            node = ast_factory( rule.name, source, elems )
+            self.stack[-1] = node
+            if self.debug:
+                self.stack[-1].dumpstr()
+        return True
+
+    def sequence(self, rule, source, elts_number):
+        """ """
+        items = self.top( elts_number )
+        if rule.is_root():
+            ast_factory = factory_functions.get( rule.name, syntaxnode_factory )
+        else:
+            ast_factory = group_factory
+
+        node = ast_factory( rule.name, source, items )
+        # replace N elements with 1 element regrouping them
+        if elts_number >= 1:
+            self.stack[-elts_number:] = node
+        else:
+            self.stack.append(node)
+        return True
+
+    def token(self, name, value, source):
+        self.stack.append(TokenNode(name, source, value))
+        return True

Modified: pypy/branch/pycompiler/module/recparser/ebnfparse.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/ebnfparse.py	(original)
+++ pypy/branch/pycompiler/module/recparser/ebnfparse.py	Fri May 27 14:38:20 2005
@@ -250,6 +250,10 @@
 
     rules = [ star, star_opt, symbol, alternative, rule, grammar, sequence,
               seq_cont_list, sequence_cont, option, group, alt ]
+    for r in rules:
+        r._trace = False
+        for tk in r.args:
+            tk._trace = False
     build_first_sets( rules )
     return grammar
 

Modified: pypy/branch/pycompiler/module/recparser/grammar.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/grammar.py	(original)
+++ pypy/branch/pycompiler/module/recparser/grammar.py	Fri May 27 14:38:20 2005
@@ -35,6 +35,14 @@
         """Returns the current line number"""
         return 0
 
+    def get_pos(self):
+        """Returns the current source position of the scanner"""
+        return 0
+
+    def get_source_text(self, pos1, pos2 ):
+        """Returns the source text between two scanner positions"""
+        return ""
+
 
 ######################################################################
 
@@ -53,6 +61,8 @@
             if new_size != size:
                 changed = True
     print "Done", loops, "loops"
+    for r in rules:
+        r.reorder_rule()
 
 from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
 
@@ -121,6 +131,7 @@
         self.first_set = []
         self.first_set_complete = False
         self._processing = False
+        self._trace = False
 
     def is_root(self):
         """This is a root node of the grammar, that is one that will
@@ -142,21 +153,45 @@
         returns None if no match or an object build by builder
         """
         token = source.peek()
+        pos1 = source.get_pos()
         in_first_set = self.match_first_set(token)
+        if not in_first_set: # and not EmptyToken in self.first_set:
+            if EmptyToken in self.first_set:
+                ret = builder.sequence(self, source, 0 )
+                if self._trace:
+                    prefix = '%seee' % (' ' * level)
+                    print prefix, " RULE =", self
+                    print prefix, " TOKEN =", token
+                    print prefix, " FIRST SET =", self.first_set
+                return self.debug_return( ret, 0 )
+            if self._trace:
+                prefix = '%srrr' % (' ' * level)
+                print prefix, " RULE =", self
+                print prefix, " TOKEN =", token
+                print prefix, " FIRST SET =", self.first_set
+            return None
+        elif self._trace:
+            prefix = '%s>>>' % (' ' * level)
+            print prefix, " RULE =", self
+            print prefix, " TOKEN =", token
+            print prefix, " FIRST SET =", self.first_set
+            
         # <tmpdebug>
-        if 0 and token is not None:
-            if in_first_set:
-                prefix = '%s+++' % ('  ' * level)
+        res = self._match(source, builder, level)
+        if self._trace:
+            pos2 = source.get_pos()
+            if res:
+                prefix = '%s+++' % (' ' * level)
             else:
-                prefix = '%s---' % ('  ' * level)
-            print prefix, " TOKEN =", token
+                prefix = '%s---' % (' ' * level)
             print prefix, " RULE =", self
+            print prefix, " TOKEN =", token
             print prefix, " FIRST SET =", self.first_set
-            print "*" * 50
+            print prefix, " TEXT ='%s'" % source.get_source_text(pos1,pos2)
+            if res:
+                print "*" * 50
         # </tmpdebug>
-        if not in_first_set and EmptyToken not in self.first_set:
-            return None
-        return self._match(source, builder, level)
+        return res
 
     def _match(self, source, builder, level=0):
         """Try to match a grammar rule
@@ -227,11 +262,17 @@
     def in_first_set(self, other):
         return other in self.first_set
 
+    def reorder_rule(self):
+        """Called after the computation of first set to allow rules to be reordered
+        to avoid ambiguities"""
+        pass
+
 class Alternative(GrammarElement):
     """Represents an alternative in a grammar rule (as in S -> A | B | C)"""
     def __init__(self, name, *args):
         GrammarElement.__init__(self, name )
         self.args = list(args)
+        self._reordered = False
         for i in self.args:
             assert isinstance( i, GrammarElement )
 
@@ -246,8 +287,9 @@
         # try instead to get the longest alternative
         # to see if this solve our problems with infinite recursion
         for rule in self.args:
-            if not rule.match_first_set(tok):
-                #print "Skipping impossible rule: %s" % (rule,)
+            if not rule.match_first_set(tok) and EmptyToken not in rule.first_set:
+                if self._trace:
+                    print "Skipping impossible rule: %s" % (rule,)
                 continue
             m = rule.match(source, builder, level+1)
             if m:
@@ -270,12 +312,31 @@
         if S -> (A | B | C):
             LAH(S) = Union( LAH(A), LAH(B), LAH(C) )
         """
+            
         # do this to avoid problems on indirect recursive rules
         for rule in self.args:
             for t in rule.first_set:
                 if t not in self.first_set:
                     self.first_set.append(t)
 
+    def reorder_rules(self):
+        # take the opportunity to reorder rules in alternatives
+        # so that rules with Empty in their first set come last
+        # warn if two rules have empty in their first set
+        empty_set = []
+        not_empty_set = []
+        for r in self.args:
+            if EmptyToken in r.first_set:
+                empty_set.append( r )
+            else:
+                not_empty_set.append( r )
+        if len(empty_set)>1 and not self._reordered:
+            print "Warning: alternative %s has more than one rule matching Empty" % self
+            self._reordered = True
+        self.args[:] = not_empty_set
+        self.args.extend( empty_set )
+
+    
 
 class Sequence(GrammarElement):
     """Reprensents a Sequence in a grammar rule (as in S -> A B C)"""
@@ -291,14 +352,8 @@
             print "try seq:", self.display()
         ctx = source.context()
         bctx = builder.context()
-        if self.name == 'listmaker':
-            print "----------------------------- LISTMAKER !"
         for rule in self.args:
-            if self.name == 'listmaker':
-                print "    -------------- IN LISTMAKER, rule =", rule
             m = rule.match(source, builder, level+1)
-            if self.name == 'listmaker':
-                print "    !!!!!!!!!!!!!! IN LISTMAKER, doesn't match %s" % (rule,)
             if not m:
                 # Restore needed because some rules may have been matched
                 # before the one that failed
@@ -328,12 +383,15 @@
             LAH(S) = LAH(A)
         """
         for rule in self.args:
+            if EmptyToken in self.first_set:
+                self.first_set.remove( EmptyToken )
             # while we're in this loop, keep agregating possible tokens
             for t in rule.first_set:
                 if t not in self.first_set:
                     self.first_set.append(t)
             if EmptyToken not in rule.first_set:
                 break
+                
 
 
 class KleenStar(GrammarElement):

Modified: pypy/branch/pycompiler/module/recparser/pythonlexer.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/pythonlexer.py	(original)
+++ pypy/branch/pycompiler/module/recparser/pythonlexer.py	Fri May 27 14:38:20 2005
@@ -213,18 +213,29 @@
 
     def next(self):
         if self.stack_pos >= len(self.stack):
+            pos0 = self.pos
             tok, val = self._next()
             token = Token( tok, val )
-            self.stack.append( ( token, self.line) )
+            self.stack.append( ( token, self.line, pos0) )
             self._current_line = self.line
         else:
-            token, line = self.stack[self.stack_pos]
+            token, line, pos0 = self.stack[self.stack_pos]
             self._current_line = line
         self.stack_pos += 1
         if DEBUG:
             print "%d/%d: %s, %s" % (self.stack_pos, len(self.stack), tok, val)
         return token
 
+    def get_pos(self):
+        if self.stack_pos >= len(self.stack):
+            return self.pos
+        else:
+            token, line, pos = self.stack[self.stack_pos]
+            return pos
+
+    def get_source_text(self, pos0, pos1 ):
+        return self.input[pos0:pos1]
+
     def peek(self):
         """returns next token without consuming it"""
         ctx = self.context()

Modified: pypy/branch/pycompiler/module/recparser/test/test_samples.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/test/test_samples.py	(original)
+++ pypy/branch/pycompiler/module/recparser/test/test_samples.py	Fri May 27 14:38:20 2005
@@ -53,6 +53,7 @@
         yield check_parse, abspath
 
 def check_parse(filepath):
+    print "Testing:", filepath
     pypy_tuples = pypy_parse(filepath)
     python_tuples = python_parse(filepath)
     try:

Modified: pypy/branch/pycompiler/module/recparser/test/unittest_samples.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/test/unittest_samples.py	(original)
+++ pypy/branch/pycompiler/module/recparser/test/unittest_samples.py	Fri May 27 14:38:20 2005
@@ -2,7 +2,7 @@
 
 import os, os.path as osp
 import sys
-from pypy.module.recparser.pythonutil import python_parse, pypy_parse, set_debug
+from pypy.module.recparser.pythonutil import python_parse, pypy_parse
 from pprint import pprint
 from pypy.module.recparser import grammar
 grammar.DEBUG = False
@@ -85,7 +85,8 @@
     opts, args = getopt.getopt( sys.argv[1:], "d:", [] )
     for opt, val in opts:
         if opt == "-d":
-            set_debug(int(val))
+            pass
+#            set_debug(int(val))
     if args:
         samples = args
     else: