[pypy-svn] r22761 - in pypy/branch/ast-experiments/pypy: interpreter/pyparser module/recparser translator/tool
ludal at codespeak.net
ludal at codespeak.net
Sat Jan 28 02:10:50 CET 2006
Author: ludal
Date: Sat Jan 28 02:10:44 2006
New Revision: 22761
Modified:
pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pysymbol.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py
pypy/branch/ast-experiments/pypy/module/recparser/__init__.py
pypy/branch/ast-experiments/pypy/translator/tool/make_dot.py
Log:
big refactoring, of the parser -- part I
isolates management of symbols and grammar rules into a Parser class
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py Sat Jan 28 02:10:44 2006
@@ -5,13 +5,17 @@
from grammar import BaseGrammarBuilder, AbstractContext
from pypy.interpreter.astcompiler import ast, consts
-from pypy.interpreter.pyparser.pysymbol import _cpython_symbols as sym
-import pypy.interpreter.pyparser.pytoken as tok
+from pypy.interpreter.pyparser.pythonparse import PYTHON_PARSER
from pypy.interpreter.pyparser.error import SyntaxError
from pypy.interpreter.pyparser.parsestring import parsestr
DEBUG_MODE = 0
+# XXX : use builder.parser instead
+sym = PYTHON_PARSER.symbols
+rsym = PYTHON_PARSER.symbol_repr
+tok = PYTHON_PARSER
+
### Parsing utilites #################################################
def parse_except_clause(tokens):
"""parses 'except' [test [',' test]] ':' suite
@@ -1501,8 +1505,8 @@
self.count = count
self.lineno = lineno # src.getline()
self.col = 0 # src.getcol()
-
-
+
+# XXX : replace sym and rsym by a ref to parser
class RuleObject(BaseRuleObject):
"""A simple object used to wrap a rule or token"""
def __init__(self, name, count, lineno):
@@ -1510,26 +1514,25 @@
self.rulename = name
def __str__(self):
- return "<Rule: %s/%d>" % (sym.sym_name[self.rulename], self.count)
+ return "<Rule: %s/%d>" % ( rsym[self.rulename], self.count)
def __repr__(self):
- return "<Rule: %s/%d>" % (sym.sym_name[self.rulename], self.count)
+ return "<Rule: %s/%d>" % ( rsym[self.rulename], self.count)
class TempRuleObject(BaseRuleObject):
"""used to keep track of how many items get_atom() should pop"""
-
def __init__(self, name, count, lineno):
BaseRuleObject.__init__(self, count, lineno)
self.temp_rulename = name
-
+
def __str__(self):
return "<Rule: %s/%d>" % (self.temp_rulename, self.count)
def __repr__(self):
return "<Rule: %s/%d>" % (self.temp_rulename, self.count)
-
+
class TokenObject(ast.Node):
"""A simple object used to wrap a rule or token"""
def __init__(self, name, value, lineno):
@@ -1539,7 +1542,7 @@
# self.line = 0 # src.getline()
self.col = 0 # src.getcol()
self.lineno = lineno
-
+
def get_name(self):
return tok.tok_rpunct.get(self.name,
tok.tok_name.get(self.name, str(self.name)))
@@ -1549,10 +1552,10 @@
if value is None:
value = ''
return value
-
+
def __str__(self):
return "<Token: (%s,%s)>" % (self.get_name(), self.value)
-
+
def __repr__(self):
return "<Token: (%r,%s)>" % (self.get_name(), self.value)
@@ -1622,8 +1625,10 @@
class AstBuilder(BaseGrammarBuilder):
"""A builder that directly produce the AST"""
- def __init__(self, rules=None, debug=0, space=None):
- BaseGrammarBuilder.__init__(self, rules, debug)
+ def __init__(self, parser=None, debug=0, space=None):
+ if parser is None:
+ parser = PYTHON_PARSER
+ BaseGrammarBuilder.__init__(self, parser, debug)
self.rule_stack = []
self.space = space
self.source_encoding = None
@@ -1632,8 +1637,6 @@
return AstBuilderContext(self.rule_stack)
def restore(self, ctx):
-## if DEBUG_MODE:
-## print "Restoring context (%s)" % (len(ctx.rule_stack))
assert isinstance(ctx, AstBuilderContext)
assert len(self.rule_stack) >= ctx.d
del self.rule_stack[ctx.d:]
@@ -1644,15 +1647,10 @@
def push(self, obj):
self.rule_stack.append(obj)
- if not isinstance(obj, RuleObject) and not isinstance(obj, TokenObject):
-## if DEBUG_MODE:
-## print "Pushed:", str(obj), len(self.rule_stack)
- pass
- elif isinstance(obj, TempRuleObject):
-## if DEBUG_MODE:
-## print "Pushed:", str(obj), len(self.rule_stack)
- pass
- # print "\t", self.rule_stack
+## if not isinstance(obj, RuleObject) and not isinstance(obj, TokenObject):
+## pass
+## elif isinstance(obj, TempRuleObject):
+## pass
def push_tok(self, name, value, src ):
self.push( TokenObject( name, value, src._token_lnum ) )
@@ -1664,48 +1662,29 @@
# Do nothing, keep rule on top of the stack
## rule_stack = self.rule_stack[:]
if rule.is_root():
-## if DEBUG_MODE:
-## print "ALT:", sym.sym_name[rule.codename], self.rule_stack
builder_func = ASTRULES.get(rule.codename, None)
if builder_func:
builder_func(self, 1)
else:
-## if DEBUG_MODE:
-## print "No reducing implementation for %s, just push it on stack" % (
-## sym.sym_name[rule.codename])
self.push_rule(rule.codename, 1, source)
else:
self.push_rule(rule.codename, 1, source)
-## if DEBUG_MODE > 1:
-## show_stack(rule_stack, self.rule_stack)
-## x = raw_input("Continue ?")
return True
def sequence(self, rule, source, elts_number):
""" """
## rule_stack = self.rule_stack[:]
if rule.is_root():
-## if DEBUG_MODE:
-## print "SEQ:", sym.sym_name[rule.codename]
builder_func = ASTRULES.get(rule.codename, None)
if builder_func:
- # print "REDUCING SEQUENCE %s" % sym.sym_name[rule.codename]
builder_func(self, elts_number)
else:
-## if DEBUG_MODE:
-## print "No reducing implementation for %s, just push it on stack" % (
-## sym.sym_name[rule.codename])
self.push_rule(rule.codename, elts_number, source)
else:
self.push_rule(rule.codename, elts_number, source)
-## if DEBUG_MODE > 1:
-## show_stack(rule_stack, self.rule_stack)
-## raw_input("Continue ?")
return True
def token(self, name, value, source):
-## if DEBUG_MODE:
-## print "TOK:", tok.tok_name[name], name, value
self.push_tok(name, value, source)
return True
@@ -1723,12 +1702,12 @@
l = space.builtin.get('long')
return space.call_function(l, space.wrap(value), space.wrap(base))
if value.endswith('j') or value.endswith('J'):
- c = space.builtin.get('complex')
+ c = space.builtin.get('complex')
return space.call_function(c, space.wrap(value))
try:
i = space.builtin.get('int')
return space.call_function(i, space.wrap(value), space.wrap(base))
- except:
+ except:
f = space.builtin.get('float')
return space.call_function(f, space.wrap(value))
@@ -1765,4 +1744,4 @@
else:
obj2 = "-"
print "% 3d | %30s | %30s" % (i, obj1, obj2)
-
+
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py Sat Jan 28 02:10:44 2006
@@ -2,45 +2,49 @@
# and the symbol mappings
from grammar import BaseGrammarBuilder, Alternative, Sequence, Token, \
- KleeneStar, GrammarElement, build_first_sets, EmptyToken
+ KleeneStar, GrammarElement
+from pypy.interpreter.pyparser.parser import Parser
-sym_map = {}
-sym_rmap = {}
-_count = 0
-
-def g_add_symbol( name ):
- global _count
- if name in sym_rmap:
- return sym_rmap[name]
- val = _count
- _count += 1
- sym_map[val] = name
- sym_rmap[name] = val
- globals()[name] = val
- return val
-
-
-tok_map = {}
-tok_rmap = {}
-
-def g_add_token(sym, name):
- global _count
- if name in tok_rmap:
- return tok_rmap[name]
- val = _count
- _count += 1
- tok_map[val] = name
- tok_rmap[name] = val
- sym_map[val] = sym
- sym_rmap[sym] = val
- globals()[sym] = val
- return val
+## sym_map = {}
+## sym_rmap = {}
+## _count = 0
+
+## def g_add_symbol( name ):
+## global _count
+## if name in sym_rmap:
+## return sym_rmap[name]
+## val = _count
+## _count += 1
+## sym_map[val] = name
+## sym_rmap[name] = val
+## globals()[name] = val
+## return val
+
+
+## tok_map = {}
+## tok_rmap = {}
+
+## def g_add_token(sym, name):
+## global _count
+## if name in tok_rmap:
+## return tok_rmap[name]
+## val = _count
+## _count += 1
+## tok_map[val] = name
+## tok_rmap[name] = val
+## sym_map[val] = sym
+## sym_rmap[sym] = val
+## globals()[sym] = val
+## return val
-g_add_token('EOF', 'EOF')
+## g_add_token('EOF', 'EOF')
+class GrammarParser(Parser):
+ pass
+GRAMMAR_GRAMMAR = GrammarParser()
def grammar_grammar():
@@ -60,54 +64,57 @@
group: '(' alternative ')' star?
"""
global sym_map
- S = g_add_symbol
- T = g_add_token
+ p = GRAMMAR_GRAMMAR
+ p.add_token('EOF','EOF')
+
# star: '*' | '+'
- star = Alternative( S("star"), [Token(T('TOK_STAR', '*')), Token(T('TOK_ADD', '+'))] )
- star_opt = KleeneStar ( S("star_opt"), 0, 1, rule=star )
+ star = p.Alternative( "star", [p.Token('TOK_STAR', '*'), p.Token('TOK_ADD', '+')] )
+ star_opt = p.KleeneStar ( "star_opt", 0, 1, rule=star )
# rule: SYMBOL ':' alternative
- symbol = Sequence( S("symbol"), [Token(T('TOK_SYMBOL', 'SYMBOL')), star_opt] )
- symboldef = Token( T('TOK_SYMDEF', 'SYMDEF') )
- alternative = Sequence( S("alternative"), [])
- rule = Sequence( S("rule"), [symboldef, alternative] )
+ symbol = p.Sequence( "symbol", [p.Token('TOK_SYMBOL'), star_opt] )
+ symboldef = p.Token( 'TOK_SYMDEF' )
+ alternative = p.Sequence( "alternative", [])
+ rule = p.Sequence( "rule", [symboldef, alternative] )
# grammar: rule+
- grammar = KleeneStar( S("grammar"), _min=1, rule=rule )
+ grammar = p.KleeneStar( "grammar", _min=1, rule=rule )
# alternative: sequence ( '|' sequence )*
- sequence = KleeneStar( S("sequence"), 1 )
- seq_cont_list = Sequence( S("seq_cont_list"), [Token(T('TOK_BAR', '|')), sequence] )
- sequence_cont = KleeneStar( S("sequence_cont"),0, rule=seq_cont_list )
-
+ sequence = p.KleeneStar( "sequence", 1 )
+ seq_cont_list = p.Sequence( "seq_cont_list", [p.Token('TOK_BAR', '|'), sequence] )
+ sequence_cont = p.KleeneStar( "sequence_cont",0, rule=seq_cont_list )
+
alternative.args = [ sequence, sequence_cont ]
# option: '[' alternative ']'
- option = Sequence( S("option"), [Token(T('TOK_LBRACKET', '[')), alternative, Token(T('TOK_RBRACKET', ']'))] )
+ option = p.Sequence( "option", [p.Token('TOK_LBRACKET', '['), alternative, p.Token('TOK_RBRACKET', ']')] )
# group: '(' alternative ')'
- group = Sequence( S("group"), [Token(T('TOK_LPAR', '(')), alternative, Token(T('TOK_RPAR', ')')), star_opt] )
+ group = p.Sequence( "group", [p.Token('TOK_LPAR', '('), alternative, p.Token('TOK_RPAR', ')'), star_opt] )
# sequence: (SYMBOL | STRING | option | group )+
- string = Token(T('TOK_STRING', 'STRING'))
- alt = Alternative( S("sequence_alt"), [symbol, string, option, group] )
+ string = p.Token('TOK_STRING')
+ alt = p.Alternative( "sequence_alt", [symbol, string, option, group] )
sequence.args = [ alt ]
+ p.root_rules['grammar'] = grammar
+ p.build_first_sets()
+ return p
- rules = [ star, star_opt, symbol, alternative, rule, grammar, sequence,
- seq_cont_list, sequence_cont, option, group, alt ]
- build_first_sets( rules )
- return grammar
-
-
-GRAMMAR_GRAMMAR = grammar_grammar()
-for _sym, _value in sym_rmap.items():
- globals()[_sym] = _value
+grammar_grammar()
+for _sym, _value in GRAMMAR_GRAMMAR.symbols.items():
+ assert not hasattr( GRAMMAR_GRAMMAR, _sym )
+ setattr(GRAMMAR_GRAMMAR, _sym, _value )
+
+for _sym, _value in GRAMMAR_GRAMMAR.tokens.items():
+ assert not hasattr( GRAMMAR_GRAMMAR, _sym )
+ setattr(GRAMMAR_GRAMMAR, _sym, _value )
# cleanup
-del _sym
-del _value
+## del _sym
+## del _value
del grammar_grammar
-del g_add_symbol
+## del g_add_symbol
# del g_add_token
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py Sat Jan 28 02:10:44 2006
@@ -4,7 +4,7 @@
"""
from grammar import TokenSource, Token, AbstractContext
-from ebnfgrammar import *
+from ebnfgrammar import GRAMMAR_GRAMMAR as G
def match_symbol( input, start, stop ):
@@ -31,8 +31,9 @@
SYMBOL: a rule symbol usually appeary right of a SYMDEF
tokens: '[', ']', '(' ,')', '*', '+', '|'
"""
- def __init__(self, inpstring):
+ def __init__(self, parser, inpstring):
# TokenSource.__init__(self)
+ self.parser = parser
self.input = inpstring
self.pos = 0
self.begin = 0
@@ -58,7 +59,6 @@
assert isinstance( ctx, GrammarSourceContext )
self.pos = ctx.pos
self._peeked = ctx.peek
-
def current_linesource(self):
pos = idx = self.begin
@@ -74,7 +74,6 @@
def current_lineno(self):
return self.current_line
-
def skip_empty_lines(self, input, start, end ):
idx = start
# assume beginning of a line
@@ -130,13 +129,13 @@
peeked = self._peeked
self._peeked = None
return peeked
-
+
pos = self.pos
inp = self.input
end = len(self.input)
pos = self.skip_empty_lines(inp,pos,end)
if pos==end:
- return Token(EOF, None)
+ return self.parser.Token( 'EOF', None)
# at this point nextchar is not a white space nor \n
nextchr = inp[pos]
@@ -148,22 +147,22 @@
self.pos = npos
_endpos = npos - 1
assert _endpos>=0
- return Token(TOK_STRING,inp[pos+1:_endpos])
+ return self.parser.Token( 'TOK_STRING', inp[pos+1:_endpos])
else:
npos = match_symbol( inp, pos, end)
if npos!=pos:
self.pos = npos
if npos!=end and inp[npos]==":":
self.pos += 1
- return Token(TOK_SYMDEF,inp[pos:npos])
+ return self.parser.Token( 'TOK_SYMDEF', inp[pos:npos])
else:
- return Token(TOK_SYMBOL,inp[pos:npos])
-
+ return self.parser.Token( 'TOK_SYMBOL', inp[pos:npos])
+
# we still have pos!=end here
chr = inp[pos]
if chr in "[]()*+|":
self.pos = pos+1
- return Token(tok_rmap[chr], chr)
+ return Token( self.parser, self.parser.tok_values[chr], chr)
self.RaiseError( "Unknown token" )
def peek(self):
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py Sat Jan 28 02:10:44 2006
@@ -1,13 +1,12 @@
#!/usr/bin/env python
from grammar import BaseGrammarBuilder, Alternative, Sequence, Token
from grammar import GrammarProxy, KleeneStar, GrammarElement, build_first_sets
-from grammar import EmptyToken, AbstractBuilder, AbstractContext
+from grammar import AbstractBuilder, AbstractContext
from ebnflexer import GrammarSource
import ebnfgrammar
-from ebnfgrammar import GRAMMAR_GRAMMAR, sym_map
+from ebnfgrammar import GRAMMAR_GRAMMAR
from syntaxtree import AbstractSyntaxVisitor
-import pytoken
-import pysymbol
+from parser import Parser
ORDA = ord("A")
@@ -56,8 +55,8 @@
class NameToken(Token):
"""A token that is not a keyword"""
- def __init__(self, keywords=None ):
- Token.__init__(self, pytoken.NAME)
+ def __init__(self, parser, keywords=None ):
+ Token.__init__(self, parser, parser.tokens['NAME'] )
self.keywords = keywords
def match(self, source, builder, level=0):
@@ -95,191 +94,73 @@
-def ebnf_handle_grammar(self, node):
- for rule in node.nodes:
- rule.visit(self)
- # the rules are registered already
- # we do a pass through the variables to detect
- # terminal symbols from non terminals
- for r in self.items:
- for i in range(len(r.args)):
- a = r.args[i]
- if a.codename in self.rules:
- assert isinstance(a,Token)
- r.args[i] = self.rules[a.codename]
- if a.codename in self.terminals:
- del self.terminals[a.codename]
- # XXX .keywords also contains punctuations
- self.terminals['NAME'].keywords = self.keywords
-
-def ebnf_handle_rule(self, node):
- symdef = node.nodes[0].value
- self.current_rule = symdef
- self.current_subrule = 0
- alt = node.nodes[1]
- rule = alt.visit(self)
- if not isinstance(rule, Token):
- rule.codename = self.symbols.add_symbol( symdef )
- self.rules[rule.codename] = rule
-
-def ebnf_handle_alternative(self, node):
- items = [node.nodes[0].visit(self)]
- items += node.nodes[1].visit(self)
- if len(items) == 1 and not items[0].is_root():
- return items[0]
- alt = Alternative(self.new_symbol(), items)
- return self.new_item(alt)
-
-def ebnf_handle_sequence( self, node ):
- """ """
- items = []
- for n in node.nodes:
- items.append( n.visit(self) )
- if len(items)==1:
- return items[0]
- elif len(items)>1:
- return self.new_item( Sequence( self.new_symbol(), items) )
- raise RuntimeError("Found empty sequence")
-
-def ebnf_handle_sequence_cont( self, node ):
- """Returns a list of sequences (possibly empty)"""
- return [n.visit(self) for n in node.nodes]
-
-def ebnf_handle_seq_cont_list(self, node):
- return node.nodes[1].visit(self)
-
-
-def ebnf_handle_symbol(self, node):
- star_opt = node.nodes[1]
- sym = node.nodes[0].value
- terminal = self.terminals.get( sym, None )
- if not terminal:
- tokencode = pytoken.tok_values.get( sym, None )
- if tokencode is None:
- tokencode = self.symbols.add_symbol( sym )
- terminal = Token( tokencode )
- else:
- terminal = Token( tokencode )
- self.terminals[sym] = terminal
-
- return self.repeat( star_opt, terminal )
-
-def ebnf_handle_option( self, node ):
- rule = node.nodes[1].visit(self)
- return self.new_item( KleeneStar( self.new_symbol(), 0, 1, rule ) )
-
-def ebnf_handle_group( self, node ):
- rule = node.nodes[1].visit(self)
- return self.repeat( node.nodes[3], rule )
-
-def ebnf_handle_TOK_STRING( self, node ):
- value = node.value
- tokencode = pytoken.tok_punct.get( value, None )
- if tokencode is None:
- if not is_py_name( value ):
- raise RuntimeError("Unknown STRING value ('%s')" % value )
- # assume a keyword
- tok = Token( pytoken.NAME, value )
- if value not in self.keywords:
- self.keywords.append( value )
- else:
- # punctuation
- tok = Token( tokencode )
- return tok
-
-def ebnf_handle_sequence_alt( self, node ):
- res = node.nodes[0].visit(self)
- assert isinstance( res, GrammarElement )
- return res
-
-# This will setup a mapping between
-# ebnf_handle_xxx functions and ebnfgrammar.xxx
-ebnf_handles = {}
-for name, value in globals().items():
- if name.startswith("ebnf_handle_"):
- name = name[12:]
- key = getattr(ebnfgrammar, name )
- ebnf_handles[key] = value
-
-def handle_unknown( self, node ):
- raise RuntimeError("Unknown Visitor for %r" % node.name)
-
-
-
class EBNFBuilder(AbstractBuilder):
"""Build a grammar tree"""
- def __init__(self, rules=None, debug=0, symbols=None ):
- if symbols is None:
- symbols = pysymbol.SymbolMapper()
- AbstractBuilder.__init__(self, rules, debug, symbols)
+ def __init__(self, gram_parser, dest_parser ):
+ AbstractBuilder.__init__(self, dest_parser )
+ self.gram = gram_parser
self.rule_stack = []
- self.root_rules = {}
self.seqcounts = [] # number of items in the current sequence
self.altcounts = [] # number of sequence in the current alternative
self.curaltcount = 0
self.curseqcount = 0
self.current_subrule = 0
self.current_rule = -1
- self.all_rules = []
+ self.current_rule_name = ""
self.tokens = {}
self.keywords = []
- self.tokens[pytoken.NAME] = NameToken(keywords=self.keywords)
+ NAME = dest_parser.add_token('NAME')
+ self.tokens[NAME] = NameToken(dest_parser, keywords=self.keywords)
def new_symbol(self):
"""Allocate and return a new (anonymous) grammar symbol whose
name is based on the current grammar rule being parsed"""
- current_rule_name = self.symbols.sym_name.get(self.current_rule,"x")
- rule_name = ":" + current_rule_name + "_%d" % self.current_subrule
+ rule_name = ":" + self.current_rule_name + "_%d" % self.current_subrule
self.current_subrule += 1
- symval = self.symbols.add_anon_symbol( rule_name )
- return symval
+ return rule_name
def new_rule(self, rule):
"""A simple helper method that registers a new rule as 'known'"""
- self.all_rules.append(rule)
+ self.parser.all_rules.append(rule)
return rule
def resolve_rules(self):
"""Remove GrammarProxy objects"""
to_be_deleted = {}
- for rule in self.all_rules:
+ for rule in self.parser.all_rules:
for i, arg in enumerate(rule.args):
if isinstance(arg, GrammarProxy):
- real_rule = self.root_rules[arg.codename]
+ real_rule = self.parser.root_rules[arg.codename]
if isinstance(real_rule, GrammarProxy):
# If we still have a GrammarProxy associated to this codename
# this means we have encountered a terminal symbol
to_be_deleted[ arg.codename ] = True
- rule.args[i] = self.get_token( arg.codename )
+ rule.args[i] = Token( self.parser, arg.codename )
#print arg, "-> Token(",arg.rule_name,")"
else:
#print arg, "->", real_rule
rule.args[i] = real_rule
for codename in to_be_deleted.keys():
- del self.root_rules[codename]
+ del self.parser.root_rules[codename]
- def get_token(self, codename ):
- """Returns a new or existing token"""
- if codename in self.tokens:
- return self.tokens[codename]
- token = self.tokens[codename] = Token(codename)
- return token
+## def get_token(self, codename ):
+## """Returns a new or existing Token"""
+## if codename in self.tokens:
+## return self.tokens[codename]
+## token = self.tokens[codename] = self.parser.Token(codename)
+## return token
def get_symbolcode(self, name ):
- codename = self.symbols.sym_values.get( name, -1 )
- if codename == -1:
- codename = self.symbols.add_symbol( name )
- return codename
+ return self.parser.add_symbol( name )
def get_rule( self, name ):
- tokencode = pytoken.tok_values.get( name, -1 )
- if tokencode>=0:
- return self.get_token( tokencode )
+ if name in self.parser.tokens:
+ return self.parser.Token( name )
codename = self.get_symbolcode( name )
- if codename in self.root_rules:
- return self.root_rules[codename]
- proxy = GrammarProxy( name, codename )
- self.root_rules[codename] = proxy
+ if codename in self.parser.root_rules:
+ return self.parser.root_rules[codename]
+ proxy = GrammarProxy( self.parser, name, codename )
+ self.parser.root_rules[codename] = proxy
return proxy
def context(self):
@@ -291,7 +172,6 @@
assert False, "Not supported"
def alternative(self, rule, source):
-# print " alternative", rule.display(level=0,symbols=ebnfgrammar.sym_map)
return True
def pop_rules( self, count ):
@@ -302,89 +182,89 @@
return rules
def sequence(self, rule, source, elts_number):
-# print " sequence", rule.display(level=0,symbols=ebnfgrammar.sym_map)
_rule = rule.codename
- if _rule == ebnfgrammar.sequence:
+ if _rule == self.gram.sequence:
# print " -sequence", self.curaltcount, self.curseqcount
if self.curseqcount==1:
self.curseqcount = 0
self.curaltcount += 1
return True
rules = self.pop_rules(self.curseqcount)
- new_rule = self.new_rule(Sequence( self.new_symbol(), rules ))
+ new_rule = self.parser.Sequence( self.new_symbol(), rules )
self.rule_stack.append( new_rule )
self.curseqcount = 0
self.curaltcount += 1
- elif _rule == ebnfgrammar.alternative:
+ elif _rule == self.gram.alternative:
# print " -alternative", self.curaltcount, self.curseqcount
if self.curaltcount == 1:
self.curaltcount = 0
return True
rules = self.pop_rules(self.curaltcount)
- new_rule = self.new_rule(Alternative( self.new_symbol(), rules ))
+ new_rule = self.parser.Alternative( self.new_symbol(), rules )
self.rule_stack.append( new_rule )
self.curaltcount = 0
- elif _rule == ebnfgrammar.group:
+ elif _rule == self.gram.group:
# print " -group", self.curaltcount, self.curseqcount
self.curseqcount += 1
- elif _rule == ebnfgrammar.option:
+ elif _rule == self.gram.option:
# print " -option", self.curaltcount, self.curseqcount
# pops the last alternative
rules = self.pop_rules( 1 )
- new_rule = self.new_rule(KleeneStar( self.new_symbol(), _min=0, _max=1, rule=rules[0] ))
+ new_rule = self.parser.KleeneStar( self.new_symbol(), _min=0, _max=1, rule=rules[0] )
self.rule_stack.append( new_rule )
self.curseqcount += 1
- elif _rule == ebnfgrammar.rule:
+ elif _rule == self.gram.rule:
# print " -rule", self.curaltcount, self.curseqcount
assert len(self.rule_stack)==1
old_rule = self.rule_stack[0]
del self.rule_stack[0]
if isinstance(old_rule,Token):
# Wrap a token into an alternative
- old_rule = self.new_rule(Alternative( self.current_rule, [old_rule] ))
+ old_rule = self.parser.Alternative( self.current_rule_name, [old_rule] )
else:
# Make sure we use the codename from the named rule
old_rule.codename = self.current_rule
- self.root_rules[self.current_rule] = old_rule
+ self.parser.root_rules[self.current_rule] = old_rule
self.current_subrule = 0
return True
def token(self, name, value, source):
# print "token", name, value
- if name == ebnfgrammar.TOK_STRING:
+ if name == self.gram.TOK_STRING:
self.handle_TOK_STRING( name, value )
self.curseqcount += 1
- elif name == ebnfgrammar.TOK_SYMDEF:
+ elif name == self.gram.TOK_SYMDEF:
self.current_rule = self.get_symbolcode( value )
- elif name == ebnfgrammar.TOK_SYMBOL:
+ self.current_rule_name = value
+ elif name == self.gram.TOK_SYMBOL:
rule = self.get_rule( value )
self.rule_stack.append( rule )
self.curseqcount += 1
- elif name == ebnfgrammar.TOK_STAR:
+ elif name == self.gram.TOK_STAR:
top = self.rule_stack[-1]
- rule = self.new_rule(KleeneStar( self.new_symbol(), _min=0, rule=top))
+ rule = self.parser.KleeneStar( self.new_symbol(), _min=0, rule=top)
self.rule_stack[-1] = rule
- elif name == ebnfgrammar.TOK_ADD:
+ elif name == self.gram.TOK_ADD:
top = self.rule_stack[-1]
- rule = self.new_rule(KleeneStar( self.new_symbol(), _min=1, rule=top))
+ rule = self.parser.KleeneStar( self.new_symbol(), _min=1, rule=top)
self.rule_stack[-1] = rule
- elif name == ebnfgrammar.TOK_BAR:
+ elif name == self.gram.TOK_BAR:
assert self.curseqcount == 0
- elif name == ebnfgrammar.TOK_LPAR:
+ elif name == self.gram.TOK_LPAR:
self.altcounts.append( self.curaltcount )
self.seqcounts.append( self.curseqcount )
self.curseqcount = 0
self.curaltcount = 0
- elif name == ebnfgrammar.TOK_RPAR:
+ elif name == self.gram.TOK_RPAR:
assert self.curaltcount == 0
self.curaltcount = self.altcounts.pop()
self.curseqcount = self.seqcounts.pop()
- elif name == ebnfgrammar.TOK_LBRACKET:
+ elif name == self.gram.TOK_LBRACKET:
self.altcounts.append( self.curaltcount )
self.seqcounts.append( self.curseqcount )
self.curseqcount = 0
self.curaltcount = 0
- elif name == ebnfgrammar.TOK_RBRACKET:
+ elif name == self.gram.TOK_RBRACKET:
assert self.curaltcount == 0
assert self.curseqcount == 0
self.curaltcount = self.altcounts.pop()
@@ -392,95 +272,31 @@
return True
def handle_TOK_STRING( self, name, value ):
- try:
- tokencode = pytoken.tok_punct[value]
- except KeyError:
+ if value in self.parser.tok_values:
+ # punctuation
+ tokencode = self.parser.tok_values[value]
+ tok = Token( self.parser, tokencode, None )
+ else:
if not is_py_name(value):
raise RuntimeError("Unknown STRING value ('%s')" % value)
# assume a keyword
- tok = Token(pytoken.NAME, value)
+ tok = Token( self.parser, self.parser.NAME, value)
if value not in self.keywords:
self.keywords.append(value)
- else:
- # punctuation
- tok = Token(tokencode, None)
self.rule_stack.append(tok)
-class EBNFVisitor(AbstractSyntaxVisitor):
-
- def __init__(self):
- self.rules = {}
- self.terminals = {}
- self.current_rule = None
- self.current_subrule = 0
- self.keywords = []
- self.items = []
- self.terminals['NAME'] = NameToken()
- self.symbols = pysymbol.SymbolMapper( pysymbol._cpython_symbols.sym_name )
-
- def new_symbol(self):
- current_rule_name = self.symbols.sym_name.get(self.current_rule,"x")
- rule_name = ":" + self.current_rule + "_" + str(self.current_subrule)
- self.current_subrule += 1
- symval = self.symbols.add_anon_symbol( rule_name )
- return symval
-
- def new_item(self, itm):
- self.items.append(itm)
- return itm
-
- def visit_syntaxnode( self, node ):
- visit_func = ebnf_handles.get( node.name, handle_unknown )
- return visit_func( self, node )
-
- def visit_tokennode( self, node ):
- return self.visit_syntaxnode( node )
-
- def visit_tempsyntaxnode( self, node ):
- return self.visit_syntaxnode( node )
-
-
- def repeat( self, star_opt, myrule ):
- assert isinstance( myrule, GrammarElement )
- if star_opt.nodes:
- rule_name = self.new_symbol()
- tok = star_opt.nodes[0].nodes[0]
- if tok.value == '+':
- item = KleeneStar(rule_name, _min=1, rule=myrule)
- return self.new_item(item)
- elif tok.value == '*':
- item = KleeneStar(rule_name, _min=0, rule=myrule)
- return self.new_item(item)
- else:
- raise RuntimeError("Got symbol star_opt with value='%s'"
- % tok.value)
- return myrule
-
-
-
-def parse_grammar(stream):
- """parses the grammar file
-
- stream : file-like object representing the grammar to parse
- """
- source = GrammarSource(stream.read())
- builder = BaseGrammarBuilder()
- result = GRAMMAR_GRAMMAR.match(source, builder)
- node = builder.stack[-1]
- vis = EBNFVisitor()
- node.visit(vis)
- return vis
-
-def parse_grammar_text(txt):
+def parse_grammar_text( parser, txt):
"""parses a grammar input
stream : file-like object representing the grammar to parse
"""
- source = GrammarSource(txt)
- builder = EBNFBuilder(pysymbol._cpython_symbols)
- result = GRAMMAR_GRAMMAR.match(source, builder)
- return builder
+ source = GrammarSource( GRAMMAR_GRAMMAR, txt)
+ builder = EBNFBuilder(GRAMMAR_GRAMMAR, dest_parser=parser )
+ result = GRAMMAR_GRAMMAR.root_rules['grammar'].match(source, builder)
+ builder.resolve_rules()
+ parser.build_first_sets()
+ return parser
def target_parse_grammar_text(txt):
vis = parse_grammar_text(txt)
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py Sat Jan 28 02:10:44 2006
@@ -10,13 +10,13 @@
try:
from pypy.interpreter.baseobjspace import Wrappable
from pypy.interpreter.pyparser.pytoken import NULLTOKEN
- from pypy.interpreter.pyparser.pysymbol import SymbolMapper
+ from pypy.interpreter.pyparser.parser import Parser
except ImportError:
# allows standalone testing
Wrappable = object
NULLTOKEN = -1 # None
- from pysymbol import SymbolMapper
-
+ from parser import Parser
+
from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
@@ -75,7 +75,8 @@
def build_first_sets(rules):
- """builds the real first tokens set for each rule in <rules>
+ """XXX : dead
+ builds the real first tokens set for each rule in <rules>
Because a rule can be recursive (directly or indirectly), the
*simplest* algorithm to build each first set is to recompute them
@@ -106,17 +107,12 @@
class AbstractBuilder(object):
"""Abstract base class for builder objects"""
- def __init__(self, symbols, rules=None, debug=0 ):
- # a dictionary of grammar rules for debug/reference
- if rules is not None:
- self.rules = rules
- else:
- self.rules = {}
+ def __init__(self, parser, debug=0 ):
# This attribute is here for convenience
self.debug = debug
- # mapping from codename to symbols
- assert isinstance( symbols, SymbolMapper )
- self.symbols = symbols
+ # the parser that represent the grammar used
+ assert isinstance( parser, Parser )
+ self.parser = parser
def context(self):
"""Return an opaque context object"""
@@ -148,24 +144,20 @@
class BaseGrammarBuilder(AbstractBuilder):
"""Base/default class for a builder"""
- def __init__(self, rules=None, debug=0, symbols={} ):
- if rules is None:
- rules = SymbolMapper()
- AbstractBuilder.__init__(self, rules, debug, symbols )
+ def __init__(self, parser, debug=0 ):
+ AbstractBuilder.__init__(self, parser, debug )
# stacks contain different objects depending on the builder class
# to be RPython they should not be defined in the base class
self.stack = []
def context(self):
"""Returns the state of the builder to be restored later"""
- #print "Save Stack:", self.stack
return BaseGrammarBuilderContext(len(self.stack))
def restore(self, ctx):
assert isinstance(ctx, BaseGrammarBuilderContext)
del self.stack[ctx.stackpos:]
- #print "Restore Stack:", self.stack
-
+
def alternative(self, rule, source):
# Do nothing, keep rule on top of the stack
if rule.is_root():
@@ -216,9 +208,11 @@
symbols = {} # dirty trick to provide a symbols mapping while printing (and not putting it in every object)
- def __init__(self, codename):
+ def __init__(self, parser, codename):
# the rule name
#assert type(codename)==int
+ assert isinstance(parser, Parser)
+ self.parser = parser
self.codename = codename # integer mapping to either a token value or rule symbol value
self.args = []
self.first_set = []
@@ -233,7 +227,6 @@
if self.codename >=0:
return True
return False
-
def match(self, source, builder, level=0):
"""Try to match a grammar rule
@@ -256,17 +249,17 @@
pos1 = source.get_pos()
in_first_set = self.match_first_set(token)
if not in_first_set: # and not EmptyToken in self.first_set:
- if EmptyToken in self.first_set:
+ if self.parser.EmptyToken in self.first_set:
ret = builder.sequence(self, source, 0 )
if self._trace:
- self._debug_display(token, level, 'eee', builder.symbols)
+ self._debug_display(token, level, 'eee' )
return ret
if self._trace:
- self._debug_display(token, level, 'rrr', builder.symbols)
+ self._debug_display(token, level, 'rrr' )
return 0
elif self._trace:
- self._debug_display(token, level, '>>>', builder.symbols)
-
+ self._debug_display(token, level, '>>>')
+
res = self._match(source, builder, level)
if self._trace:
pos2 = source.get_pos()
@@ -274,21 +267,20 @@
prefix = '+++'
else:
prefix = '---'
- self._debug_display(token, level, prefix, builder.symbols)
+ self._debug_display(token, level, prefix)
print ' '*level, prefix, " TEXT ='%s'" % (
source.get_source_text(pos1,pos2))
if res:
print "*" * 50
return res
- def _debug_display(self, token, level, prefix, symbols):
+ def _debug_display(self, token, level, prefix):
"""prints context debug informations"""
prefix = '%s%s' % (' ' * level, prefix)
print prefix, " RULE =", self
print prefix, " TOKEN =", token
print prefix, " FIRST SET =", self.first_set
-
-
+
def _match(self, source, builder, level=0):
"""Try to match a grammar rule
@@ -302,7 +294,7 @@
returns None if no match or an object build by builder
"""
return 0
-
+
def parse(self, source):
"""Returns a simplified grammar if the rule matched at the source
current context or None"""
@@ -311,43 +303,27 @@
pass
def __str__(self):
- # XXX: remove me after debug
- symbols = {}
- import pytoken
- import pysymbol
- symbols.update( pysymbol._cpython_symbols.sym_name )
- symbols.update( pytoken.tok_name )
-
- return self.display(0, symbols )
-# return self.display(0, GrammarElement.symbols )
+ return self.display(0)
def __repr__(self):
- # XXX: remove me after debug
- symbols = {}
- import pytoken
- import pysymbol
- symbols.update( pysymbol._cpython_symbols.sym_name )
- symbols.update( pytoken.tok_name )
-
- return self.display(0, symbols )
-# return self.display(0, GrammarElement.symbols )
+ return self.display(0)
- def display(self, level=0, symbols={}):
+ def display(self, level=0):
"""Helper function used to represent the grammar.
mostly used for debugging the grammar itself"""
return "GrammarElement"
- def debug_return(self, ret, symbols, arg="" ):
+ def debug_return(self, ret, arg="" ):
# FIXME: use a wrapper of match() methods instead of debug_return()
# to prevent additional indirection even better a derived
# Debugging builder class
if ret and DEBUG > 0:
print "matched %s (%s): %s" % (self.__class__.__name__,
- arg, self.display(0, symbols=symbols) )
+ arg, self.display(0) )
return ret
-
+
def calc_first_set(self):
"""returns the list of possible next tokens
*must* be implemented in subclasses
@@ -380,27 +356,27 @@
class GrammarProxy(GrammarElement):
- def __init__(self, rule_name, codename=-1 ):
- GrammarElement.__init__(self, codename )
+ def __init__(self, parser, rule_name, codename=-1 ):
+ GrammarElement.__init__(self, parser, codename )
self.rule_name = rule_name
self.object = None
- def display(self, level=0, symbols={}):
+ def display(self, level=0):
"""Helper function used to represent the grammar.
mostly used for debugging the grammar itself"""
- name = symbols.get(self.codename, self.rule_name)
+ name = self.parser.symbol_repr(self.codename)
repr = "Proxy("+name
if self.object:
- repr+=","+self.object.display(level=1,symbols=symbols)
+ repr+=","+self.object.display(1)
repr += ")"
return repr
-
+
class Alternative(GrammarElement):
"""Represents an alternative in a grammar rule (as in S -> A | B | C)"""
- def __init__(self, name, args):
- GrammarElement.__init__(self, name )
+ def __init__(self, parser, name, args):
+ GrammarElement.__init__(self, parser, name )
self.args = args
self._reordered = False
for i in self.args:
@@ -418,7 +394,7 @@
# to see if this solve our problems with infinite recursion
for rule in self.args:
if USE_LOOKAHEAD:
- if not rule.match_first_set(tok) and EmptyToken not in rule.first_set:
+ if not rule.match_first_set(tok) and self.parser.EmptyToken not in rule.first_set:
if self._trace:
print "Skipping impossible rule: %s" % (rule,)
continue
@@ -428,15 +404,15 @@
return ret
return 0
- def display(self, level=0, symbols={}):
- name = get_symbol( self.codename, symbols )
+ def display(self, level=0):
+ name = self.parser.symbol_repr( self.codename )
if level == 0:
name = name + " -> "
elif self.is_root():
return name
else:
name = ""
- items = [ a.display(1,symbols) for a in self.args ]
+ items = [ a.display(1) for a in self.args ]
return name+"(" + "|".join( items ) + ")"
def calc_first_set(self):
@@ -460,7 +436,7 @@
# <tokens> is only needed for warning / debugging purposes
tokens_set = []
for rule in self.args:
- if EmptyToken in rule.first_set:
+ if self.parser.EmptyToken in rule.first_set:
empty_set.append(rule)
else:
not_empty_set.append(rule)
@@ -469,7 +445,7 @@
# It will check if a token is part of several first sets of
# a same alternative
for token in rule.first_set:
- if token is not EmptyToken and token in tokens_set:
+ if token is not self.parser.EmptyToken and token in tokens_set:
print "Warning, token %s in\n\t%s's first set is " \
" part of a previous rule's first set in " \
" alternative\n\t%s" % (token, rule, self)
@@ -497,8 +473,8 @@
class Sequence(GrammarElement):
"""Reprensents a Sequence in a grammar rule (as in S -> A B C)"""
- def __init__(self, name, args):
- GrammarElement.__init__(self, name )
+ def __init__(self, parser, name, args):
+ GrammarElement.__init__(self, parser, name )
self.args = args
for i in self.args:
assert isinstance( i, GrammarElement )
@@ -520,15 +496,15 @@
ret = builder.sequence(self, source, len(self.args))
return ret
- def display(self, level=0, symbols={}):
- name = get_symbol( self.codename, symbols )
+ def display(self, level=0):
+ name = self.parser.symbol_repr( self.codename )
if level == 0:
name = name + " -> "
elif self.is_root():
return name
else:
name = ""
- items = [a.display(1,symbols) for a in self.args]
+ items = [a.display(1) for a in self.args]
return name + "(" + " ".join( items ) + ")"
def calc_first_set(self):
@@ -543,18 +519,18 @@
for rule in self.args:
if not rule.first_set:
break
- if EmptyToken in self.first_set:
- self.first_set.remove( EmptyToken )
+ if self.parser.EmptyToken in self.first_set:
+ self.first_set.remove( self.parser.EmptyToken )
- # del self.first_set[EmptyToken]
+ # del self.first_set[self.parser.EmptyToken]
# while we're in this loop, keep agregating possible tokens
for t in rule.first_set:
if t not in self.first_set:
self.first_set.append(t)
# self.first_set[t] = 1
- if EmptyToken not in rule.first_set:
+ if self.parser.EmptyToken not in rule.first_set:
break
-
+
def validate( self, syntax_node ):
"""validate a syntax tree/subtree from this grammar node"""
if self.codename != syntax_node.name:
@@ -570,13 +546,10 @@
-
-
-
class KleeneStar(GrammarElement):
"""Represents a KleeneStar in a grammar rule as in (S -> A+) or (S -> A*)"""
- def __init__(self, name, _min = 0, _max = -1, rule=None):
- GrammarElement.__init__( self, name )
+ def __init__(self, parser, name, _min = 0, _max = -1, rule=None):
+ GrammarElement.__init__( self, parser, name )
self.args = [rule]
self.min = _min
if _max == 0:
@@ -584,8 +557,8 @@
self.max = _max
self.star = "x"
if self.min == 0:
- self.first_set.append( EmptyToken )
- # self.first_set[EmptyToken] = 1
+ self.first_set.append( self.parser.EmptyToken )
+ # self.first_set[self.parser.EmptyToken] = 1
def _match(self, source, builder, level=0):
"""matches a number of times self.args[0]. the number must be
@@ -616,14 +589,19 @@
ret = builder.sequence(self, source, rules)
return ret
- def display(self, level=0, symbols={}):
- name = get_symbol( self.codename, symbols )
+ def display(self, level=0):
+ name = self.parser.symbol_repr( self.codename )
if level==0:
name = name + " -> "
elif self.is_root():
return name
else:
name = ""
+ star = self.get_star()
+ s = self.args[0].display(1)
+ return name + "%s%s" % (s, star)
+
+ def get_star(self):
star = "{%d,%d}" % (self.min,self.max)
if self.min==0 and self.max==1:
star = "?"
@@ -631,23 +609,21 @@
star = "*"
elif self.min==1 and self.max==-1:
star = "+"
- s = self.args[0].display(1, symbols)
- return name + "%s%s" % (s, star)
-
+ return star
def calc_first_set(self):
"""returns the list of possible next tokens
if S -> A*:
- LAH(S) = Union( LAH(A), EmptyToken )
+ LAH(S) = Union( LAH(A), self.parser.EmptyToken )
if S -> A+:
LAH(S) = LAH(A)
"""
rule = self.args[0]
self.first_set = rule.first_set[:]
# self.first_set = dict(rule.first_set)
- if self.min == 0 and EmptyToken not in self.first_set:
- self.first_set.append(EmptyToken)
- # self.first_set[EmptyToken] = 1
+ if self.min == 0 and self.parser.EmptyToken not in self.first_set:
+ self.first_set.append(self.parser.EmptyToken)
+ # self.first_set[self.parser.EmptyToken] = 1
def validate( self, syntax_node ):
"""validate a syntax tree/subtree from this grammar node"""
@@ -666,8 +642,8 @@
class Token(GrammarElement):
"""Represents a Token in a grammar rule (a lexer token)"""
- def __init__(self, codename, value=None):
- GrammarElement.__init__(self, codename)
+ def __init__(self, parser, codename, value=None):
+ GrammarElement.__init__(self, parser, codename)
self.value = value
self.first_set = [self]
# self.first_set = {self: 1}
@@ -697,13 +673,13 @@
source.restore( ctx )
return 0
- def display(self, level=0, symbols={}):
- name = get_symbol( self.codename, symbols )
+ def display(self, level=0):
+ name = self.parser.symbol_repr( self.codename )
if self.value is None:
return "<%s>" % name
else:
return "<%s>=='%s'" % (name, self.value)
-
+
def match_token(self, other):
"""convenience '==' implementation, this is *not* a *real* equality test
@@ -715,12 +691,12 @@
"""
if not isinstance(other, Token):
raise RuntimeError("Unexpected token type")
- if other is EmptyToken:
+ if other is self.parser.EmptyToken:
return False
res = other.codename == self.codename and self.value in [None, other.value]
#print "matching", self, other, res
return res
-
+
def __eq__(self, other):
return self.codename == other.codename and self.value == other.value
@@ -740,8 +716,5 @@
return False
-EmptyToken = Token(NULLTOKEN, None)
-
-
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pysymbol.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pysymbol.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pysymbol.py Sat Jan 28 02:10:44 2006
@@ -10,6 +10,7 @@
# important here
class SymbolMapper(object):
+ """XXX dead"""
def __init__(self, sym_name=None ):
_anoncount = self._anoncount = -10
_count = self._count = 0
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py Sat Jan 28 02:10:44 2006
@@ -7,8 +7,8 @@
from pypy.interpreter.pyparser.grammar import TokenSource, Token, AbstractContext
from pypy.interpreter.pyparser.error import SyntaxError
+
import pytoken
-from pytoken import NEWLINE
# Don't import string for that ...
NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
@@ -369,8 +369,8 @@
return (self._current_line, self._lineno)
# return 'line %s : %s' % ('XXX', self._current_line)
-NONE_LIST = [pytoken.ENDMARKER, pytoken.INDENT, pytoken.DEDENT]
-NAMED_LIST = [pytoken.OP]
+#NONE_LIST = [pytoken.ENDMARKER, pytoken.INDENT, pytoken.DEDENT]
+#NAMED_LIST = [pytoken.OP]
Source = PythonSource
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py Sat Jan 28 02:10:44 2006
@@ -5,26 +5,31 @@
helper functions are provided that use the grammar to parse
using file_input, single_input and eval_input targets
"""
+import autopath
+import sys
+import os
from pypy.interpreter.error import OperationError, debug_print
from pypy.interpreter import gateway
from pypy.interpreter.pyparser.error import SyntaxError
from pypy.tool.option import Options
-from pythonlexer import Source, match_encoding_declaration
-import pysymbol
-import ebnfparse
-import sys
-import os
-import grammar
+from pypy.interpreter.pyparser.pythonlexer import Source, match_encoding_declaration
+import pypy.interpreter.pyparser.pysymbol as pysymbol
+import pypy.interpreter.pyparser.pytoken as pytoken
+import pypy.interpreter.pyparser.ebnfparse as ebnfparse
+import pypy.interpreter.pyparser.grammar as grammar
+from pypy.interpreter.pyparser.parser import Parser
+try:
+ from pypy.interpreter.pyparser import symbol
+except ImportError:
+ # for standalone testing
+ import symbol
from codeop import PyCF_DONT_IMPLY_DEDENT
-class PythonParser(object):
+class PythonParser(Parser):
"""Wrapper class for python grammar"""
- def __init__(self, rules, items ):
- self.items = items
- self.rules = rules
- # Build first sets for each rule (including anonymous ones)
- grammar.build_first_sets(self.items)
+ def __init__(self):
+ Parser.__init__(self)
def parse_source(self, textsrc, goal, builder, flags=0):
"""Parse a python source according to goal"""
@@ -46,9 +51,9 @@
def parse_lines(self, lines, goal, builder, flags=0):
goalnumber = pysymbol._cpython_symbols.sym_values[goal]
- target = self.rules[goalnumber]
+ target = self.root_rules[goalnumber]
src = Source(lines, flags)
-
+
result = target.match(src, builder)
if not result:
line, lineno = src.debug()
@@ -56,7 +61,7 @@
raise SyntaxError("invalid syntax", lineno, -1, line)
# return None
return builder
-
+
_recode_to_utf8 = gateway.applevel(r'''
def _recode_to_utf8(text, encoding):
return unicode(text, encoding).encode("utf-8")
@@ -96,7 +101,7 @@
if eol2 < 0:
return _check_line_for_encoding(s[eol + 1:])
return _check_line_for_encoding(s[eol + 1:eol2])
-
+
def _check_line_for_encoding(line):
"""returns the declared encoding or None"""
i = 0
@@ -119,28 +124,15 @@
# unfortunately the command line options are not parsed yet
PYTHON_GRAMMAR, PYPY_VERSION = get_grammar_file( Options.version )
-def python_grammar(fname):
- """returns a PythonParser build from the specified grammar file"""
- level = grammar.DEBUG
- grammar.DEBUG = 0
- gram = ebnfparse.parse_grammar( file(fname) )
- grammar.DEBUG = level
- parser = PythonParser( gram.rules, gram.items )
- return parser
-def python_grammar_dyn(fname):
+def load_python_grammar(fname):
"""Loads the grammar using the 'dynamic' rpython parser"""
_grammar_file = file(fname)
- ebnfbuilder = ebnfparse.parse_grammar_text( file(fname).read() )
- ebnfbuilder.resolve_rules()
- parser = PythonParser( ebnfbuilder.root_rules, ebnfbuilder.all_rules )
+ parser = PYTHON_PARSER
+ # populate symbols
+ ebnfparse.parse_grammar_text( parser, file(fname).read() )
return parser
-
-debug_print( "Loading grammar %s" % PYTHON_GRAMMAR )
-PYTHON_PARSER = python_grammar( PYTHON_GRAMMAR )
-PYTHON_PARSER_DYN = python_grammar_dyn( PYTHON_GRAMMAR )
-
def reload_grammar(version):
"""helper function to test with pypy different grammars"""
global PYTHON_GRAMMAR, PYTHON_PARSER, PYPY_VERSION
@@ -151,7 +143,7 @@
def parse_file_input(pyf, gram, builder ):
"""Parse a python file"""
return gram.parse_source( pyf.read(), "file_input", builder )
-
+
def parse_single_input(textsrc, gram, builder ):
"""Parse a python single statement"""
return gram.parse_source( textsrc, "single_input", builder )
@@ -160,11 +152,82 @@
"""Parse a python expression"""
return gram.parse_source( textsrc, "eval_input", builder )
-
def grammar_rules( space ):
- return space.wrap( PYTHON_PARSER.rules )
+ return space.wrap( PYTHON_PARSER.root_rules )
+
+def dot_node( gen, rule_name, rule, symbols, edges, count ):
+ from pypy.interpreter.pyparser.grammar import KleeneStar, Sequence, Alternative, Token
+ subrule_name = symbols.get( rule.codename, rule.codename )
+ label = None
+ if not subrule_name.startswith(":"+rule_name):
+ node_name = rule_name + "_ext_" + str(count[0])
+ count[0]+=1
+ label = subrule_name
+ gen.emit_node( node_name, shape="parallelogram", label=subrule_name )
+ edges.append( (node_name, subrule_name) )
+ return node_name
+ subrule_name = subrule_name.replace(":","_")
+ if isinstance(rule, KleeneStar):
+ node = dot_node( gen, rule_name, rule.args[0], symbols, edges, count )
+ gen.emit_edge( node, node, label=rule.get_star(), style='solid' )
+ return node
+ elif isinstance(rule, Sequence):
+ gen.enter_subgraph( subrule_name )
+ first_node = None
+ for n in rule.args:
+ node_name = dot_node( gen, rule_name, n, symbols, edges, count )
+ if first_node:
+ gen.emit_edge( first_node, node_name, style='solid' )
+ first_node = node_name
+ gen.leave_subgraph()
+ return subrule_name
+ elif isinstance(rule, Alternative):
+ gen.enter_subgraph( subrule_name )
+ for n in rule.args:
+ node_name = dot_node( gen, rule_name, n, symbols, edges, count )
+ gen.leave_subgraph()
+ return subrule_name
+ elif isinstance(rule, Token):
+ node_name = rule_name + "_ext_" + str(count[0])
+ count[0]+=1
+ gen.emit_node( node_name, shape='box', label=rule.display( 0, symbols ) )
+ return node_name
+ raise RuntimeError("Unknown node type")
+
+def gen_grammar_dot( name, root_rules, rules, symbols ):
+ """Quick hack to output a dot graph of the grammar"""
+ from pypy.translator.tool.make_dot import DotGen
+ gen = DotGen(name)
+ edges = []
+ count = [0]
+ for r in root_rules:
+ rule_name = symbols.get( r.codename, r.codename )
+ gen.emit_node( rule_name, shape='hexagon', label=r.display(0,symbols) )
+ for rule in r.args:
+ node = dot_node( gen, rule_name, rule, symbols, edges, count )
+ gen.emit_edge( rule_name, node, style='solid' )
+ for left, right in edges:
+ gen.emit_edge( left, right, style='solid' )
+ gen.generate(target='ps')
+
+
+def parse_grammar(space, w_src):
+ """Loads the grammar using the 'dynamic' rpython parser"""
+ src = space.str_w( w_src )
+ ebnfbuilder = ebnfparse.parse_grammar_text( src )
+ ebnfbuilder.resolve_rules()
+ grammar.build_first_sets(ebnfbuilder.all_rules)
+ return space.wrap( ebnfbuilder.root_rules )
+
+debug_print( "Loading grammar %s" % PYTHON_GRAMMAR )
+PYTHON_PARSER = PythonParser()
+PYTHON_PARSER.load_symbols( symbol.sym_name )
+pytoken.setup_tokens( PYTHON_PARSER )
+load_python_grammar( PYTHON_GRAMMAR )
-def make_rule( space, w_rule ):
- rule = space.str_w( w_rule )
-
+if __name__=="__main__":
+ symbols = {}
+ symbols.update( pytoken.tok_name )
+ symbols.update( pysymbol._cpython_symbols.sym_name )
+ gen_grammar_dot("grammar", PYTHON_PARSER.rules.values(), PYTHON_PARSER.items, symbols )
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py Sat Jan 28 02:10:44 2006
@@ -11,125 +11,71 @@
tok_name = {-1 : 'NULLTOKEN'}
tok_values = {'NULLTOKEN' : -1}
-def add_token(name):
- global N_TOKENS
- value = N_TOKENS
- N_TOKENS += 1
- _g = globals()
- _g[name] = value
- tok_name[value] = name
- tok_values[name] = value
-
+tok_rpunct = {}
+def setup_tokens( parser ):
+ global tok_rpunct
# For compatibility, this produces the same constant values as Python 2.4.
-add_token( 'ENDMARKER' )
-add_token( 'NAME' )
-add_token( 'NUMBER' )
-add_token( 'STRING' )
-add_token( 'NEWLINE' )
-add_token( 'INDENT' )
-add_token( 'DEDENT' )
-add_token( 'LPAR' )
-add_token( 'RPAR' )
-add_token( 'LSQB' )
-add_token( 'RSQB' )
-add_token( 'COLON' )
-add_token( 'COMMA' )
-add_token( 'SEMI' )
-add_token( 'PLUS' )
-add_token( 'MINUS' )
-add_token( 'STAR' )
-add_token( 'SLASH' )
-add_token( 'VBAR' )
-add_token( 'AMPER' )
-add_token( 'LESS' )
-add_token( 'GREATER' )
-add_token( 'EQUAL' )
-add_token( 'DOT' )
-add_token( 'PERCENT' )
-add_token( 'BACKQUOTE' )
-add_token( 'LBRACE' )
-add_token( 'RBRACE' )
-add_token( 'EQEQUAL' )
-add_token( 'NOTEQUAL' )
-add_token( 'LESSEQUAL' )
-add_token( 'GREATEREQUAL' )
-add_token( 'TILDE' )
-add_token( 'CIRCUMFLEX' )
-add_token( 'LEFTSHIFT' )
-add_token( 'RIGHTSHIFT' )
-add_token( 'DOUBLESTAR' )
-add_token( 'PLUSEQUAL' )
-add_token( 'MINEQUAL' )
-add_token( 'STAREQUAL' )
-add_token( 'SLASHEQUAL' )
-add_token( 'PERCENTEQUAL' )
-add_token( 'AMPEREQUAL' )
-add_token( 'VBAREQUAL' )
-add_token( 'CIRCUMFLEXEQUAL' )
-add_token( 'LEFTSHIFTEQUAL' )
-add_token( 'RIGHTSHIFTEQUAL' )
-add_token( 'DOUBLESTAREQUAL' )
-add_token( 'DOUBLESLASH' )
-add_token( 'DOUBLESLASHEQUAL' )
-add_token( 'AT' )
-add_token( 'OP' )
-add_token( 'ERRORTOKEN' )
+ parser.add_token( 'ENDMARKER' )
+ parser.add_token( 'NAME' )
+ parser.add_token( 'NUMBER' )
+ parser.add_token( 'STRING' )
+ parser.add_token( 'NEWLINE' )
+ parser.add_token( 'INDENT' )
+ parser.add_token( 'DEDENT' )
+ parser.add_token( 'LPAR', "(" )
+ parser.add_token( 'RPAR', ")" )
+ parser.add_token( 'LSQB', "[" )
+ parser.add_token( 'RSQB', "]" )
+ parser.add_token( 'COLON', ":" )
+ parser.add_token( 'COMMA', "," )
+ parser.add_token( 'SEMI', ";" )
+ parser.add_token( 'PLUS', "+" )
+ parser.add_token( 'MINUS', "-" )
+ parser.add_token( 'STAR', "*" )
+ parser.add_token( 'SLASH', "/" )
+ parser.add_token( 'VBAR', "|" )
+ parser.add_token( 'AMPER', "&" )
+ parser.add_token( 'LESS', "<" )
+ parser.add_token( 'GREATER', ">" )
+ parser.add_token( 'EQUAL', "=" )
+ parser.add_token( 'DOT', "." )
+ parser.add_token( 'PERCENT', "%" )
+ parser.add_token( 'BACKQUOTE', "`" )
+ parser.add_token( 'LBRACE', "{" )
+ parser.add_token( 'RBRACE', "}" )
+ parser.add_token( 'EQEQUAL', "==" )
+ ne = parser.add_token( 'NOTEQUAL', "!=" )
+ parser.tok_values["<>"] = ne
+ parser.add_token( 'LESSEQUAL', "<=" )
+ parser.add_token( 'GREATEREQUAL', ">=" )
+ parser.add_token( 'TILDE', "~" )
+ parser.add_token( 'CIRCUMFLEX', "^" )
+ parser.add_token( 'LEFTSHIFT', "<<" )
+ parser.add_token( 'RIGHTSHIFT', ">>" )
+ parser.add_token( 'DOUBLESTAR', "**" )
+ parser.add_token( 'PLUSEQUAL', "+=" )
+ parser.add_token( 'MINEQUAL', "-=" )
+ parser.add_token( 'STAREQUAL', "*=" )
+ parser.add_token( 'SLASHEQUAL', "/=" )
+ parser.add_token( 'PERCENTEQUAL', "%=" )
+ parser.add_token( 'AMPEREQUAL', "&=" )
+ parser.add_token( 'VBAREQUAL', "|=" )
+ parser.add_token( 'CIRCUMFLEXEQUAL', "^=" )
+ parser.add_token( 'LEFTSHIFTEQUAL', "<<=" )
+ parser.add_token( 'RIGHTSHIFTEQUAL', ">>=" )
+ parser.add_token( 'DOUBLESTAREQUAL', "**=" )
+ parser.add_token( 'DOUBLESLASH', "//" )
+ parser.add_token( 'DOUBLESLASHEQUAL',"//=" )
+ parser.add_token( 'AT', "@" )
+ parser.add_token( 'OP' )
+ parser.add_token( 'ERRORTOKEN' )
# extra PyPy-specific tokens
-add_token( "COMMENT" )
-add_token( "NL" )
-
-# a reverse mapping from internal tokens def to more pythonic tokens
-tok_punct = {
- "&" : AMPER,
- "&=" : AMPEREQUAL,
- "`" : BACKQUOTE,
- "^" : CIRCUMFLEX,
- "^=" : CIRCUMFLEXEQUAL,
- ":" : COLON,
- "," : COMMA,
- "." : DOT,
- "//" : DOUBLESLASH,
- "//=" : DOUBLESLASHEQUAL,
- "**" : DOUBLESTAR,
- "**=" : DOUBLESTAREQUAL,
- "==" : EQEQUAL,
- "=" : EQUAL,
- ">" : GREATER,
- ">=" : GREATEREQUAL,
- "{" : LBRACE,
- "}" : RBRACE,
- "<<" : LEFTSHIFT,
- "<<=" : LEFTSHIFTEQUAL,
- "<" : LESS,
- "<=" : LESSEQUAL,
- "(" : LPAR,
- "[" : LSQB,
- "-=" : MINEQUAL,
- "-" : MINUS,
- "!=" : NOTEQUAL,
- "<>" : NOTEQUAL,
- "%" : PERCENT,
- "%=" : PERCENTEQUAL,
- "+" : PLUS,
- "+=" : PLUSEQUAL,
- ")" : RBRACE,
- ">>" : RIGHTSHIFT,
- ">>=" : RIGHTSHIFTEQUAL,
- ")" : RPAR,
- "]" : RSQB,
- ";" : SEMI,
- "/" : SLASH,
- "/=" : SLASHEQUAL,
- "*" : STAR,
- "*=" : STAREQUAL,
- "~" : TILDE,
- "|" : VBAR,
- "|=" : VBAREQUAL,
- "@": AT,
- }
-tok_rpunct = {}
-for string, value in tok_punct.items():
- tok_rpunct[value] = string
+ parser.add_token( "COMMENT" )
+ parser.add_token( "NL" )
+ tok_rpunct = parser.tok_values.copy()
+ for _name, _value in parser.tokens.items():
+ globals()[_name] = _value
+ setattr(parser, _name, _value)
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py Sat Jan 28 02:10:44 2006
@@ -54,7 +54,7 @@
class TupleBuilderContext(AbstractContext):
def __init__(self, stackpos ):
self.stackpos = stackpos
-
+
class TupleBuilder(AbstractBuilder):
"""A builder that directly produce the AST"""
@@ -81,7 +81,7 @@
nodes = expand_nodes( [self.stack[-1]] )
self.stack[-1] = NonTerminal( rule.codename, nodes )
return True
-
+
def sequence(self, rule, source, elts_number):
""" """
num = rule.codename
Modified: pypy/branch/ast-experiments/pypy/module/recparser/__init__.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/module/recparser/__init__.py (original)
+++ pypy/branch/ast-experiments/pypy/module/recparser/__init__.py Sat Jan 28 02:10:44 2006
@@ -48,6 +48,7 @@
'decode_string_literal': 'pyparser.decode_string_literal',
'install_compiler_hook' : 'pypy.interpreter.pycompiler.install_compiler_hook',
'rules' : 'pypy.interpreter.pyparser.pythonparse.grammar_rules',
+ 'parse_grammar' : 'pypy.interpreter.pyparser.pythonparse.parse_grammar',
}
# Automatically exports each AST class
Modified: pypy/branch/ast-experiments/pypy/translator/tool/make_dot.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/translator/tool/make_dot.py (original)
+++ pypy/branch/ast-experiments/pypy/translator/tool/make_dot.py Sat Jan 28 02:10:44 2006
@@ -49,9 +49,9 @@
def leave_subgraph(self):
self.emit("}")
- def emit_edge(self, name1, name2, label="",
- style="dashed",
- color="black",
+ def emit_edge(self, name1, name2, label="",
+ style="dashed",
+ color="black",
dir="forward",
weight="5",
):
More information about the Pypy-commit
mailing list