[pypy-svn] r12837 - in pypy/branch/pycompiler/module/recparser: . test
ludal at codespeak.net
ludal at codespeak.net
Fri May 27 14:38:20 CEST 2005
Author: ludal
Date: Fri May 27 14:38:20 2005
New Revision: 12837
Added:
pypy/branch/pycompiler/module/recparser/astbuilder.py
Modified:
pypy/branch/pycompiler/module/recparser/__init__.py
pypy/branch/pycompiler/module/recparser/ebnfparse.py
pypy/branch/pycompiler/module/recparser/grammar.py
pypy/branch/pycompiler/module/recparser/pythonlexer.py
pypy/branch/pycompiler/module/recparser/test/test_samples.py
pypy/branch/pycompiler/module/recparser/test/unittest_samples.py
Log:
* grammar should now be LL(1) ie use less backtracking than before
Modified: pypy/branch/pycompiler/module/recparser/__init__.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/__init__.py (original)
+++ pypy/branch/pycompiler/module/recparser/__init__.py Fri May 27 14:38:20 2005
@@ -1,42 +1,44 @@
from pypy.interpreter.error import OperationError, debug_print
from pypy.interpreter import module
-from pypy.interpreter.mixedmodule import MixedModule
+
+
import pythonparse
debug_print( "Loading grammar %s" % pythonparse.PYTHON_GRAMMAR )
-class Module(MixedModule):
- """The builtin parser module.
- """
-
-
- appleveldefs = {
- # 'ParserError' : 'app_class.ParserError',
- }
- interpleveldefs = {
- '__name__' : '(space.wrap("parser"))',
- '__doc__' : '(space.wrap("parser (recparser version) module"))',
-
- 'suite' : 'pyparser.suite',
- 'expr' : 'pyparser.expr',
- 'STType' : 'pyparser.STType',
- 'ast2tuple' : 'pyparser.ast2tuple',
-# 'ASTType' : 'pyparser.STType',
- # 'sequence2st' : 'pyparser.sequence2st',
- #'eval_input' : 'pyparser.eval_input',
- #'file_input' : 'pyparser.file_input',
- #'compileast' : 'pyparser.compileast',
- #'st2tuple' : 'pyparser.st2tuple',
- #'st2list' : 'pyparser.st2list',
- #'issuite' : 'pyparser.issuite',
- #'ast2tuple' : 'pyparser.ast2tuple',
- #'tuple2st' : 'pyparser.tuple2st',
- #'isexpr' : 'pyparser.isexpr',
- #'ast2list' : 'pyparser.ast2list',
- #'sequence2ast' : 'pyparser.sequence2ast',
- #'tuple2ast' : 'pyparser.tuple2ast',
- #'_pickler' : 'pyparser._pickler',
- #'compilest' : 'pyparser.compilest',
- }
+## from pypy.interpreter.mixedmodule import MixedModule
+## class Module(MixedModule):
+## """The builtin parser module.
+## """
+
+
+## appleveldefs = {
+## # 'ParserError' : 'app_class.ParserError',
+## }
+## interpleveldefs = {
+## '__name__' : '(space.wrap("parser"))',
+## '__doc__' : '(space.wrap("parser (recparser version) module"))',
+
+## 'suite' : 'pyparser.suite',
+## 'expr' : 'pyparser.expr',
+## 'STType' : 'pyparser.STType',
+## 'ast2tuple' : 'pyparser.ast2tuple',
+## # 'ASTType' : 'pyparser.STType',
+## # 'sequence2st' : 'pyparser.sequence2st',
+## #'eval_input' : 'pyparser.eval_input',
+## #'file_input' : 'pyparser.file_input',
+## #'compileast' : 'pyparser.compileast',
+## #'st2tuple' : 'pyparser.st2tuple',
+## #'st2list' : 'pyparser.st2list',
+## #'issuite' : 'pyparser.issuite',
+## #'ast2tuple' : 'pyparser.ast2tuple',
+## #'tuple2st' : 'pyparser.tuple2st',
+## #'isexpr' : 'pyparser.isexpr',
+## #'ast2list' : 'pyparser.ast2list',
+## #'sequence2ast' : 'pyparser.sequence2ast',
+## #'tuple2ast' : 'pyparser.tuple2ast',
+## #'_pickler' : 'pyparser._pickler',
+## #'compilest' : 'pyparser.compilest',
+## }
Added: pypy/branch/pycompiler/module/recparser/astbuilder.py
==============================================================================
--- (empty file)
+++ pypy/branch/pycompiler/module/recparser/astbuilder.py Fri May 27 14:38:20 2005
@@ -0,0 +1,48 @@
+
+
+from grammar import BaseGrammarBuilder
+from compiler.ast import nodes, TokenNode
+from compiler.astfactory import factory_functions, group_factory, syntaxnode_factory
+
+class AstBuilder(BaseGrammarBuilder):
+ """A builder that directly produce the AST"""
+
+ def __init__( self, rules=None, debug=0 ):
+ BaseGrammarBuilder.__init__(self, rules, debug )
+
+ def top(self, n=1):
+ toplist = []
+ for node in self.stack[-n:]:
+ toplist += node.expand()
+ return toplist
+
+ def alternative( self, rule, source ):
+ # Do nothing, keep rule on top of the stack
+ if rule.is_root():
+ ast_factory = factory_functions.get( rule.name, syntaxnode_factory )
+ elems = self.top()
+ node = ast_factory( rule.name, source, elems )
+ self.stack[-1] = node
+ if self.debug:
+ self.stack[-1].dumpstr()
+ return True
+
+ def sequence(self, rule, source, elts_number):
+ """ """
+ items = self.top( elts_number )
+ if rule.is_root():
+ ast_factory = factory_functions.get( rule.name, syntaxnode_factory )
+ else:
+ ast_factory = group_factory
+
+ node = ast_factory( rule.name, source, items )
+ # replace N elements with 1 element regrouping them
+ if elts_number >= 1:
+ self.stack[-elts_number:] = node
+ else:
+ self.stack.append(node)
+ return True
+
+ def token(self, name, value, source):
+ self.stack.append(TokenNode(name, source, value))
+ return True
Modified: pypy/branch/pycompiler/module/recparser/ebnfparse.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/ebnfparse.py (original)
+++ pypy/branch/pycompiler/module/recparser/ebnfparse.py Fri May 27 14:38:20 2005
@@ -250,6 +250,10 @@
rules = [ star, star_opt, symbol, alternative, rule, grammar, sequence,
seq_cont_list, sequence_cont, option, group, alt ]
+ for r in rules:
+ r._trace = False
+ for tk in r.args:
+ tk._trace = False
build_first_sets( rules )
return grammar
Modified: pypy/branch/pycompiler/module/recparser/grammar.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/grammar.py (original)
+++ pypy/branch/pycompiler/module/recparser/grammar.py Fri May 27 14:38:20 2005
@@ -35,6 +35,14 @@
"""Returns the current line number"""
return 0
+ def get_pos(self):
+ """Returns the current source position of the scanner"""
+ return 0
+
+ def get_source_text(self, pos1, pos2 ):
+ """Returns the source text between two scanner positions"""
+ return ""
+
######################################################################
@@ -53,6 +61,8 @@
if new_size != size:
changed = True
print "Done", loops, "loops"
+ for r in rules:
+ r.reorder_rule()
from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
@@ -121,6 +131,7 @@
self.first_set = []
self.first_set_complete = False
self._processing = False
+ self._trace = False
def is_root(self):
"""This is a root node of the grammar, that is one that will
@@ -142,21 +153,45 @@
returns None if no match or an object build by builder
"""
token = source.peek()
+ pos1 = source.get_pos()
in_first_set = self.match_first_set(token)
+ if not in_first_set: # and not EmptyToken in self.first_set:
+ if EmptyToken in self.first_set:
+ ret = builder.sequence(self, source, 0 )
+ if self._trace:
+ prefix = '%seee' % (' ' * level)
+ print prefix, " RULE =", self
+ print prefix, " TOKEN =", token
+ print prefix, " FIRST SET =", self.first_set
+ return self.debug_return( ret, 0 )
+ if self._trace:
+ prefix = '%srrr' % (' ' * level)
+ print prefix, " RULE =", self
+ print prefix, " TOKEN =", token
+ print prefix, " FIRST SET =", self.first_set
+ return None
+ elif self._trace:
+ prefix = '%s>>>' % (' ' * level)
+ print prefix, " RULE =", self
+ print prefix, " TOKEN =", token
+ print prefix, " FIRST SET =", self.first_set
+
# <tmpdebug>
- if 0 and token is not None:
- if in_first_set:
- prefix = '%s+++' % (' ' * level)
+ res = self._match(source, builder, level)
+ if self._trace:
+ pos2 = source.get_pos()
+ if res:
+ prefix = '%s+++' % (' ' * level)
else:
- prefix = '%s---' % (' ' * level)
- print prefix, " TOKEN =", token
+ prefix = '%s---' % (' ' * level)
print prefix, " RULE =", self
+ print prefix, " TOKEN =", token
print prefix, " FIRST SET =", self.first_set
- print "*" * 50
+ print prefix, " TEXT ='%s'" % source.get_source_text(pos1,pos2)
+ if res:
+ print "*" * 50
# </tmpdebug>
- if not in_first_set and EmptyToken not in self.first_set:
- return None
- return self._match(source, builder, level)
+ return res
def _match(self, source, builder, level=0):
"""Try to match a grammar rule
@@ -227,11 +262,17 @@
def in_first_set(self, other):
return other in self.first_set
+ def reorder_rule(self):
+ """Called after the computation of first set to allow rules to be reordered
+ to avoid ambiguities"""
+ pass
+
class Alternative(GrammarElement):
"""Represents an alternative in a grammar rule (as in S -> A | B | C)"""
def __init__(self, name, *args):
GrammarElement.__init__(self, name )
self.args = list(args)
+ self._reordered = False
for i in self.args:
assert isinstance( i, GrammarElement )
@@ -246,8 +287,9 @@
# try instead to get the longest alternative
# to see if this solve our problems with infinite recursion
for rule in self.args:
- if not rule.match_first_set(tok):
- #print "Skipping impossible rule: %s" % (rule,)
+ if not rule.match_first_set(tok) and EmptyToken not in rule.first_set:
+ if self._trace:
+ print "Skipping impossible rule: %s" % (rule,)
continue
m = rule.match(source, builder, level+1)
if m:
@@ -270,12 +312,31 @@
if S -> (A | B | C):
LAH(S) = Union( LAH(A), LAH(B), LAH(C) )
"""
+
# do this to avoid problems on indirect recursive rules
for rule in self.args:
for t in rule.first_set:
if t not in self.first_set:
self.first_set.append(t)
+ def reorder_rules(self):
+ # take the opportunity to reorder rules in alternatives
+ # so that rules with Empty in their first set come last
+ # warn if two rules have empty in their first set
+ empty_set = []
+ not_empty_set = []
+ for r in self.args:
+ if EmptyToken in r.first_set:
+ empty_set.append( r )
+ else:
+ not_empty_set.append( r )
+ if len(empty_set)>1 and not self._reordered:
+ print "Warning: alternative %s has more than one rule matching Empty" % self
+ self._reordered = True
+ self.args[:] = not_empty_set
+ self.args.extend( empty_set )
+
+
class Sequence(GrammarElement):
"""Reprensents a Sequence in a grammar rule (as in S -> A B C)"""
@@ -291,14 +352,8 @@
print "try seq:", self.display()
ctx = source.context()
bctx = builder.context()
- if self.name == 'listmaker':
- print "----------------------------- LISTMAKER !"
for rule in self.args:
- if self.name == 'listmaker':
- print " -------------- IN LISTMAKER, rule =", rule
m = rule.match(source, builder, level+1)
- if self.name == 'listmaker':
- print " !!!!!!!!!!!!!! IN LISTMAKER, doesn't match %s" % (rule,)
if not m:
# Restore needed because some rules may have been matched
# before the one that failed
@@ -328,12 +383,15 @@
LAH(S) = LAH(A)
"""
for rule in self.args:
+ if EmptyToken in self.first_set:
+ self.first_set.remove( EmptyToken )
# while we're in this loop, keep agregating possible tokens
for t in rule.first_set:
if t not in self.first_set:
self.first_set.append(t)
if EmptyToken not in rule.first_set:
break
+
class KleenStar(GrammarElement):
Modified: pypy/branch/pycompiler/module/recparser/pythonlexer.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/pythonlexer.py (original)
+++ pypy/branch/pycompiler/module/recparser/pythonlexer.py Fri May 27 14:38:20 2005
@@ -213,18 +213,29 @@
def next(self):
if self.stack_pos >= len(self.stack):
+ pos0 = self.pos
tok, val = self._next()
token = Token( tok, val )
- self.stack.append( ( token, self.line) )
+ self.stack.append( ( token, self.line, pos0) )
self._current_line = self.line
else:
- token, line = self.stack[self.stack_pos]
+ token, line, pos0 = self.stack[self.stack_pos]
self._current_line = line
self.stack_pos += 1
if DEBUG:
print "%d/%d: %s, %s" % (self.stack_pos, len(self.stack), tok, val)
return token
+ def get_pos(self):
+ if self.stack_pos >= len(self.stack):
+ return self.pos
+ else:
+ token, line, pos = self.stack[self.stack_pos]
+ return pos
+
+ def get_source_text(self, pos0, pos1 ):
+ return self.input[pos0:pos1]
+
def peek(self):
"""returns next token without consuming it"""
ctx = self.context()
Modified: pypy/branch/pycompiler/module/recparser/test/test_samples.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/test/test_samples.py (original)
+++ pypy/branch/pycompiler/module/recparser/test/test_samples.py Fri May 27 14:38:20 2005
@@ -53,6 +53,7 @@
yield check_parse, abspath
def check_parse(filepath):
+ print "Testing:", filepath
pypy_tuples = pypy_parse(filepath)
python_tuples = python_parse(filepath)
try:
Modified: pypy/branch/pycompiler/module/recparser/test/unittest_samples.py
==============================================================================
--- pypy/branch/pycompiler/module/recparser/test/unittest_samples.py (original)
+++ pypy/branch/pycompiler/module/recparser/test/unittest_samples.py Fri May 27 14:38:20 2005
@@ -2,7 +2,7 @@
import os, os.path as osp
import sys
-from pypy.module.recparser.pythonutil import python_parse, pypy_parse, set_debug
+from pypy.module.recparser.pythonutil import python_parse, pypy_parse
from pprint import pprint
from pypy.module.recparser import grammar
grammar.DEBUG = False
@@ -85,7 +85,8 @@
opts, args = getopt.getopt( sys.argv[1:], "d:", [] )
for opt, val in opts:
if opt == "-d":
- set_debug(int(val))
+ pass
+# set_debug(int(val))
if args:
samples = args
else:
More information about the Pypy-commit
mailing list