[pypy-svn] r36450 - pypy/branch/ast-experiments/pypy/interpreter/pyparser
adim at codespeak.net
adim at codespeak.net
Thu Jan 11 10:34:25 CET 2007
Author: adim
Date: Thu Jan 11 10:34:19 2007
New Revision: 36450
Modified:
pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/asthelper.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py
Log:
use parser's tokens dict and get rid of setattr() usage in pytoken
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py Thu Jan 11 10:34:19 2007
@@ -46,31 +46,31 @@
top = atoms[0]
if isinstance(top, TokenObject):
# assert isinstance(top, TokenObject) # rtyper
- if top.name == builder.parser.LPAR:
+ if top.name == builder.parser.tokens['LPAR']:
if len(atoms) == 2:
builder.push(ast.Tuple([], top.lineno))
else:
builder.push( atoms[1] )
- elif top.name == builder.parser.LSQB:
+ elif top.name == builder.parser.tokens['LSQB']:
if len(atoms) == 2:
builder.push(ast.List([], top.lineno))
else:
list_node = atoms[1]
list_node.lineno = top.lineno
builder.push(list_node)
- elif top.name == builder.parser.LBRACE:
+ elif top.name == builder.parser.tokens['LBRACE']:
items = []
for index in range(1, len(atoms)-1, 4):
# a : b , c : d
# ^ +1 +2 +3 +4
items.append((atoms[index], atoms[index+2]))
builder.push(ast.Dict(items, top.lineno))
- elif top.name == builder.parser.NAME:
+ elif top.name == builder.parser.tokens['NAME']:
val = top.get_value()
builder.push( ast.Name(val, top.lineno) )
- elif top.name == builder.parser.NUMBER:
+ elif top.name == builder.parser.tokens['NUMBER']:
builder.push(ast.Const(builder.eval_number(top.get_value()), top.lineno))
- elif top.name == builder.parser.STRING:
+ elif top.name == builder.parser.tokens['STRING']:
# need to concatenate strings in atoms
s = ''
if len(atoms) == 1:
@@ -86,7 +86,7 @@
accum.append(parsestr(builder.space, builder.source_encoding, token.get_value()))
w_s = space.call_method(empty, 'join', space.newlist(accum))
builder.push(ast.Const(w_s, top.lineno))
- elif top.name == builder.parser.BACKQUOTE:
+ elif top.name == builder.parser.tokens['BACKQUOTE']:
builder.push(ast.Backquote(atoms[1], atoms[1].lineno))
else:
raise SyntaxError("unexpected tokens", top.lineno, top.col)
@@ -107,7 +107,7 @@
else:
lineno = atoms[0].lineno
token = atoms[-2]
- if isinstance(token, TokenObject) and token.name == builder.parser.DOUBLESTAR:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['DOUBLESTAR']:
obj = parse_attraccess(slicecut(atoms, 0, -2), builder)
builder.push(ast.Power( obj, atoms[-1], lineno))
else:
@@ -122,11 +122,11 @@
token = atoms[0]
lineno = token.lineno
if isinstance(token, TokenObject):
- if token.name == builder.parser.PLUS:
+ if token.name == builder.parser.tokens['PLUS']:
builder.push( ast.UnaryAdd( atoms[1], lineno) )
- if token.name == builder.parser.MINUS:
+ if token.name == builder.parser.tokens['MINUS']:
builder.push( ast.UnarySub( atoms[1], lineno) )
- if token.name == builder.parser.TILDE:
+ if token.name == builder.parser.tokens['TILDE']:
builder.push( ast.Invert( atoms[1], lineno) )
def build_term(builder, nb):
@@ -137,13 +137,13 @@
right = atoms[i]
op_node = atoms[i-1]
assert isinstance(op_node, TokenObject)
- if op_node.name == builder.parser.STAR:
+ if op_node.name == builder.parser.tokens['STAR']:
left = ast.Mul( left, right, left.lineno )
- elif op_node.name == builder.parser.SLASH:
+ elif op_node.name == builder.parser.tokens['SLASH']:
left = ast.Div( left, right, left.lineno )
- elif op_node.name == builder.parser.PERCENT:
+ elif op_node.name == builder.parser.tokens['PERCENT']:
left = ast.Mod( left, right, left.lineno )
- elif op_node.name == builder.parser.DOUBLESLASH:
+ elif op_node.name == builder.parser.tokens['DOUBLESLASH']:
left = ast.FloorDiv( left, right, left.lineno )
else:
token = atoms[i-1]
@@ -158,9 +158,9 @@
right = atoms[i]
op_node = atoms[i-1]
assert isinstance(op_node, TokenObject)
- if op_node.name == builder.parser.PLUS:
+ if op_node.name == builder.parser.tokens['PLUS']:
left = ast.Add( left, right, left.lineno)
- elif op_node.name == builder.parser.MINUS:
+ elif op_node.name == builder.parser.tokens['MINUS']:
left = ast.Sub( left, right, left.lineno)
else:
token = atoms[i-1]
@@ -176,9 +176,9 @@
right = atoms[i]
op_node = atoms[i-1]
assert isinstance(op_node, TokenObject)
- if op_node.name == builder.parser.LEFTSHIFT:
+ if op_node.name == builder.parser.tokens['LEFTSHIFT']:
left = ast.LeftShift( left, right, lineno )
- elif op_node.name == builder.parser.RIGHTSHIFT:
+ elif op_node.name == builder.parser.tokens['RIGHTSHIFT']:
left = ast.RightShift( left, right, lineno )
else:
token = atoms[i-1]
@@ -255,9 +255,9 @@
lineno = token.lineno
assert isinstance(token, TokenObject)
if token.get_value() == 'not':
- builder.push(TokenObject(builder.parser.NAME, 'not in', lineno, builder.parser))
+ builder.push(TokenObject(builder.parser.tokens['NAME'], 'not in', lineno, builder.parser))
else:
- builder.push(TokenObject(builder.parser.NAME, 'is not', lineno, builder.parser))
+ builder.push(TokenObject(builder.parser.tokens['NAME'], 'is not', lineno, builder.parser))
else:
assert False, "TODO" # uh ?
@@ -309,7 +309,7 @@
return
op = atoms[1]
assert isinstance(op, TokenObject)
- if op.name == builder.parser.EQUAL:
+ if op.name == builder.parser.tokens['EQUAL']:
nodes = []
for i in range(0,l-2,2):
lvalue = to_lvalue(atoms[i], consts.OP_ASSIGN)
@@ -343,7 +343,7 @@
lineno = -1
for n in range(0,l,2):
node = atoms[n]
- if isinstance(node, TokenObject) and node.name == builder.parser.NEWLINE:
+ if isinstance(node, TokenObject) and node.name == builder.parser.tokens['NEWLINE']:
nodes.append(ast.Discard(ast.Const(builder.wrap_none()), node.lineno))
else:
nodes.append(node)
@@ -369,10 +369,10 @@
for node in atoms:
if isinstance(node, ast.Stmt):
stmts.extend(node.nodes)
- elif isinstance(node, TokenObject) and node.name == builder.parser.ENDMARKER:
+ elif isinstance(node, TokenObject) and node.name == builder.parser.tokens['ENDMARKER']:
# XXX Can't we just remove the last element of the list ?
break
- elif isinstance(node, TokenObject) and node.name == builder.parser.NEWLINE:
+ elif isinstance(node, TokenObject) and node.name == builder.parser.tokens['NEWLINE']:
continue
else:
stmts.append(node)
@@ -392,7 +392,7 @@
l = len(atoms)
if l == 1 or l==2:
atom0 = atoms[0]
- if isinstance(atom0, TokenObject) and atom0.name == builder.parser.NEWLINE:
+ if isinstance(atom0, TokenObject) and atom0.name == builder.parser.tokens['NEWLINE']:
atom0 = ast.Pass(atom0.lineno)
elif not isinstance(atom0, ast.Stmt):
atom0 = ast.Stmt([atom0], atom0.lineno)
@@ -412,7 +412,7 @@
return
items = []
token = atoms[1]
- if isinstance(token, TokenObject) and token.name == builder.parser.COMMA:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['COMMA']:
for i in range(0, l, 2): # this is atoms not 1
items.append(atoms[i])
else:
@@ -452,13 +452,13 @@
atoms = get_atoms(builder, nb)
first_token = atoms[0]
# Case 1 : '(' ...
- if isinstance(first_token, TokenObject) and first_token.name == builder.parser.LPAR:
- if len(atoms) == 2: # and atoms[1].token == builder.parser.RPAR:
+ if isinstance(first_token, TokenObject) and first_token.name == builder.parser.tokens['LPAR']:
+ if len(atoms) == 2: # and atoms[1].token == builder.parser.tokens['RPAR']:
builder.push(ArglistObject([], None, None, first_token.lineno))
elif len(atoms) == 3: # '(' Arglist ')'
# push arglist on the stack
builder.push(atoms[1])
- elif isinstance(first_token, TokenObject) and first_token.name == builder.parser.LSQB:
+ elif isinstance(first_token, TokenObject) and first_token.name == builder.parser.tokens['LSQB']:
if len(atoms) == 3 and isinstance(atoms[1], SlicelistObject):
builder.push(atoms[1])
else:
@@ -516,11 +516,11 @@
atoms = get_atoms(builder, nb)
token = atoms[0]
lineno = token.lineno
- if isinstance(token, TokenObject) and token.name == builder.parser.DOT:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['DOT']:
# Ellipsis:
builder.push(ast.Ellipsis(lineno))
elif len(atoms) == 1:
- if isinstance(token, TokenObject) and token.name == builder.parser.COLON:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['COLON']:
sliceinfos = [None, None, None]
builder.push(SlicelistObject('slice', sliceinfos, lineno))
else:
@@ -530,7 +530,7 @@
sliceinfos = [None, None, None]
infosindex = 0
for token in atoms:
- if isinstance(token, TokenObject) and token.name == builder.parser.COLON:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['COLON']:
infosindex += 1
else:
sliceinfos[infosindex] = token
@@ -578,7 +578,7 @@
# remove '@', '(' and ')' from atoms and use parse_attraccess
for token in atoms[1:]:
if isinstance(token, TokenObject) and \
- token.name in (builder.parser.LPAR, builder.parser.RPAR, builder.parser.NEWLINE):
+ token.name in (builder.parser.tokens['LPAR'], builder.parser.tokens['RPAR'], builder.parser.tokens['NEWLINE']):
# skip those ones
continue
else:
@@ -810,11 +810,11 @@
while index<l:
atom = atoms[index]
# for atom in atoms[index:]:
- if isinstance(atom, TokenObject) and atom.name == builder.parser.COMMA:
+ if isinstance(atom, TokenObject) and atom.name == builder.parser.tokens['COMMA']:
break
index += 1
## while index < l and isinstance(atoms[index], TokenObject) and \
-## atoms[index].name != builder.parser.COMMA:
+## atoms[index].name != builder.parser.tokens['COMMA']:
## index += 1
index += 1
builder.push(ast.Import(names, atoms[0].lineno))
@@ -833,10 +833,10 @@
index += (incr + 1) # skip 'import'
token = atoms[index]
assert isinstance(token, TokenObject) # XXX
- if token.name == builder.parser.STAR:
+ if token.name == builder.parser.tokens['STAR']:
names = [('*', None)]
else:
- if token.name == builder.parser.LPAR:
+ if token.name == builder.parser.tokens['LPAR']:
# mutli-line imports
tokens = slicecut( atoms, index+1, -1 )
else:
@@ -917,14 +917,14 @@
start = 1
if l > 1:
token = atoms[1]
- if isinstance(token, TokenObject) and token.name == builder.parser.RIGHTSHIFT:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['RIGHTSHIFT']:
dest = atoms[2]
# skip following comma
start = 4
for index in range(start, l, 2):
items.append(atoms[index])
last_token = atoms[-1]
- if isinstance(last_token, TokenObject) and last_token.name == builder.parser.COMMA:
+ if isinstance(last_token, TokenObject) and last_token.name == builder.parser.tokens['COMMA']:
builder.push(ast.Print(items, dest, atoms[0].lineno))
else:
builder.push(ast.Printnl(items, dest, atoms[0].lineno))
@@ -1061,11 +1061,7 @@
class AstBuilder(Wrappable, BaseGrammarBuilder):
"""A builder that directly produce the AST"""
- def __init__(self, parser=None, debug=0, space=None):
- # XXX: parser must become mandatory
- if parser is None:
- from pythonparse import PYTHON_PARSER
- parser = pythonparse.PYTHON_PARSER
+ def __init__(self, parser, debug=0, space=None):
BaseGrammarBuilder.__init__(self, parser, debug)
self.rule_stack = []
self.space = space
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/asthelper.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/asthelper.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/asthelper.py Thu Jan 11 10:34:19 2007
@@ -43,7 +43,7 @@
for index in range(1, l, 2):
token = tokens[index]
assert isinstance(token, TokenObject)
- if token.name != builder.parser.DOT:
+ if token.name != builder.parser.tokens['DOT']:
break
token = tokens[index+1]
assert isinstance(token, TokenObject)
@@ -75,16 +75,16 @@
building_kw = False
kw_built = True
continue
- elif cur_token.name == builder.parser.COMMA:
+ elif cur_token.name == builder.parser.tokens['COMMA']:
index += 1
continue
- elif cur_token.name == builder.parser.EQUAL:
+ elif cur_token.name == builder.parser.tokens['EQUAL']:
index += 1
building_kw = True
continue
- elif cur_token.name == builder.parser.STAR or cur_token.name == builder.parser.DOUBLESTAR:
+ elif cur_token.name == builder.parser.tokens['STAR'] or cur_token.name == builder.parser.tokens['DOUBLESTAR']:
index += 1
- if cur_token.name == builder.parser.STAR:
+ if cur_token.name == builder.parser.tokens['STAR']:
stararg_token = tokens[index]
index += 1
if index >= l:
@@ -119,9 +119,9 @@
token = tokens[index]
index += 1
assert isinstance(token, TokenObject)
- if token.name == builder.parser.LPAR: # nested item
+ if token.name == builder.parser.tokens['LPAR']: # nested item
index, node = parse_fpdef(tokens, index, builder)
- elif token.name == builder.parser.RPAR: # end of current nesting
+ elif token.name == builder.parser.tokens['RPAR']: # end of current nesting
break
else: # name
val = token.get_value()
@@ -131,10 +131,10 @@
token = tokens[index]
index += 1
assert isinstance(token, TokenObject)
- if token.name == builder.parser.COMMA:
+ if token.name == builder.parser.tokens['COMMA']:
comma = True
else:
- assert token.name == builder.parser.RPAR
+ assert token.name == builder.parser.tokens['RPAR']
break
if len(nodes) == 1 and not comma:
node = nodes[0]
@@ -158,19 +158,19 @@
defaults.append(cur_token)
if first_with_default == -1:
first_with_default = len(names) - 1
- elif cur_token.name == builder.parser.COMMA:
+ elif cur_token.name == builder.parser.tokens['COMMA']:
# We could skip test COMMA by incrementing index cleverly
# but we might do some experiment on the grammar at some point
continue
- elif cur_token.name == builder.parser.LPAR:
+ elif cur_token.name == builder.parser.tokens['LPAR']:
index, node = parse_fpdef(tokens, index, builder)
names.append(node)
- elif cur_token.name == builder.parser.STAR or cur_token.name == builder.parser.DOUBLESTAR:
- if cur_token.name == builder.parser.STAR:
+ elif cur_token.name == builder.parser.tokens['STAR'] or cur_token.name == builder.parser.tokens['DOUBLESTAR']:
+ if cur_token.name == builder.parser.tokens['STAR']:
cur_token = tokens[index]
assert isinstance(cur_token, TokenObject)
index += 1
- if cur_token.name == builder.parser.NAME:
+ if cur_token.name == builder.parser.tokens['NAME']:
val = cur_token.get_value()
names.append( ast.AssName( val, consts.OP_ASSIGN ) )
flags |= consts.CO_VARARGS
@@ -185,13 +185,13 @@
raise SyntaxError("incomplete varags", cur_token.lineno,
cur_token.col)
assert isinstance(cur_token, TokenObject)
- if cur_token.name != builder.parser.DOUBLESTAR:
+ if cur_token.name != builder.parser.tokens['DOUBLESTAR']:
raise SyntaxError("Unexpected token", cur_token.lineno,
cur_token.col)
cur_token = tokens[index]
index += 1
assert isinstance(cur_token, TokenObject)
- if cur_token.name == builder.parser.NAME:
+ if cur_token.name == builder.parser.tokens['NAME']:
val = cur_token.get_value()
names.append( ast.AssName( val, consts.OP_ASSIGN ) )
flags |= consts.CO_VARKEYWORDS
@@ -203,7 +203,7 @@
token = tokens[index]
raise SyntaxError("unexpected token" , token.lineno,
token.col)
- elif cur_token.name == builder.parser.NAME:
+ elif cur_token.name == builder.parser.tokens['NAME']:
val = cur_token.get_value()
names.append( ast.AssName( val, consts.OP_ASSIGN ) )
@@ -244,7 +244,7 @@
tok2 = tokens[index]
if not isinstance(tok2, TokenObject):
break
- if tok2.name != builder.parser.COMMA:
+ if tok2.name != builder.parser.tokens['COMMA']:
break
iterables.append(tokens[index+1])
index += 2
@@ -485,7 +485,7 @@
index = 1
while index < len(tokens):
token = tokens[index]
- if isinstance(token, TokenObject) and token.name == builder.parser.DOT:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['DOT']:
index += 1
token = tokens[index]
assert isinstance(token, TokenObject)
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py Thu Jan 11 10:34:19 2007
@@ -67,7 +67,7 @@
grammar_grammar()
for _sym, _value in GRAMMAR_GRAMMAR.symbols.items():
- assert not hasattr( GRAMMAR_GRAMMAR, _sym )
+ assert not hasattr( GRAMMAR_GRAMMAR, _sym ), _sym
setattr(GRAMMAR_GRAMMAR, _sym, _value )
for _sym, _value in GRAMMAR_GRAMMAR.tokens.items():
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py Thu Jan 11 10:34:19 2007
@@ -136,7 +136,7 @@
end = len(self.input)
pos = self.skip_empty_lines(inp,pos,end)
if pos==end:
- return _p.Token( _p.EOF, None)
+ return _p.build_token( _p.EOF, None)
# at this point nextchar is not a white space nor \n
nextchr = inp[pos]
@@ -148,22 +148,22 @@
self.pos = npos
_endpos = npos - 1
assert _endpos>=0
- return _p.Token( _p.TOK_STRING, inp[pos+1:_endpos])
+ return _p.build_token( _p.TOK_STRING, inp[pos+1:_endpos])
else:
npos = match_symbol( inp, pos, end)
if npos!=pos:
self.pos = npos
if npos!=end and inp[npos]==":":
self.pos += 1
- return _p.Token( _p.TOK_SYMDEF, inp[pos:npos])
+ return _p.build_token( _p.TOK_SYMDEF, inp[pos:npos])
else:
- return _p.Token( _p.TOK_SYMBOL, inp[pos:npos])
+ return _p.build_token( _p.TOK_SYMBOL, inp[pos:npos])
# we still have pos!=end here
chr = inp[pos]
if chr in "[]()*+|":
self.pos = pos+1
- return _p.Token( _p.tok_values[chr], chr)
+ return _p.build_token( _p.tok_values[chr], chr)
self.RaiseError( "Unknown token" )
def peek(self):
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py Thu Jan 11 10:34:19 2007
@@ -1,10 +1,5 @@
-#!/usr/bin/env python
-from grammar import BaseGrammarBuilder, Alternative, Sequence, Token
-from grammar import GrammarProxy, KleeneStar, GrammarElement, build_first_sets
-from grammar import AbstractBuilder, AbstractContext, Parser
-from ebnflexer import GrammarSource
-import ebnfgrammar
-from ebnfgrammar import GRAMMAR_GRAMMAR
+from grammar import Token, GrammarProxy
+from grammar import AbstractBuilder, AbstractContext
ORDA = ord("A")
@@ -32,7 +27,6 @@
return True
-
punct=['>=', '<>', '!=', '<', '>', '<=', '==', '\\*=',
'//=', '%=', '^=', '<<=', '\\*\\*=', '\\', '=',
'\\+=', '>>=', '=', '&=', '/=', '-=', '\n,', '^',
@@ -40,12 +34,10 @@
'%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':',
'@', '\\[', '\\]', '`', '\\{', '\\}']
-TERMINALS = [
- 'NAME', 'NUMBER', 'STRING', 'NEWLINE', 'ENDMARKER',
- 'INDENT', 'DEDENT' ]
+TERMINALS = ['NAME', 'NUMBER', 'STRING', 'NEWLINE', 'ENDMARKER',
+ 'INDENT', 'DEDENT' ]
-# FIXME: parsertools.py ? parser/__init__.py ?
class NameToken(Token):
"""A token that is not a keyword"""
def __init__(self, parser, keywords=None):
@@ -142,7 +134,9 @@
"""Remove GrammarProxy objects"""
to_be_deleted = {}
for rule in self.parser.all_rules:
- for i, arg in enumerate(rule.args):
+ # for i, arg in enumerate(rule.args):
+ for i in range(len(rule.args)):
+ arg = rule.args[i]
if isinstance(arg, GrammarProxy):
real_rule = self.parser.root_rules[arg.codename]
if isinstance(real_rule, GrammarProxy):
@@ -161,10 +155,10 @@
"""Returns a new or existing Token"""
if codename in self.tokens:
return self.tokens[codename]
- token = self.tokens[codename] = self.parser.Token(codename)
+ token = self.tokens[codename] = self.parser.build_token(codename)
return token
- def get_symbolcode(self, name ):
+ def get_symbolcode(self, name):
return self.parser.add_symbol( name )
def get_rule( self, name ):
@@ -196,7 +190,7 @@
self.curaltcount += 1
return True
rules = self.pop_rules(self.curseqcount)
- new_rule = self.parser.Sequence( self.new_symbol(), rules )
+ new_rule = self.parser.build_sequence( self.new_symbol(), rules )
self.rule_stack.append( new_rule )
self.curseqcount = 0
self.curaltcount += 1
@@ -205,7 +199,7 @@
self.curaltcount = 0
return True
rules = self.pop_rules(self.curaltcount)
- new_rule = self.parser.Alternative( self.new_symbol(), rules )
+ new_rule = self.parser.build_alternative( self.new_symbol(), rules )
self.rule_stack.append( new_rule )
self.curaltcount = 0
elif _rule == self.gram.group:
@@ -213,7 +207,7 @@
elif _rule == self.gram.option:
# pops the last alternative
rules = self.pop_rules( 1 )
- new_rule = self.parser.KleeneStar( self.new_symbol(), _min=0, _max=1, rule=rules[0] )
+ new_rule = self.parser.build_kleenestar( self.new_symbol(), _min=0, _max=1, rule=rules[0] )
self.rule_stack.append( new_rule )
self.curseqcount += 1
elif _rule == self.gram.rule:
@@ -222,7 +216,7 @@
del self.rule_stack[0]
if isinstance(old_rule,Token):
# Wrap a token into an alternative
- old_rule = self.parser.Alternative( self.current_rule, [old_rule] )
+ old_rule = self.parser.build_alternative( self.current_rule, [old_rule] )
else:
# Make sure we use the codename from the named rule
old_rule.codename = self.current_rule
@@ -243,11 +237,11 @@
self.curseqcount += 1
elif name == self.gram.TOK_STAR:
top = self.rule_stack[-1]
- rule = self.parser.KleeneStar( self.new_symbol(), _min=0, rule=top)
+ rule = self.parser.build_kleenestar( self.new_symbol(), _min=0, rule=top)
self.rule_stack[-1] = rule
elif name == self.gram.TOK_ADD:
top = self.rule_stack[-1]
- rule = self.parser.KleeneStar( self.new_symbol(), _min=1, rule=top)
+ rule = self.parser.build_kleenestar( self.new_symbol(), _min=1, rule=top)
self.rule_stack[-1] = rule
elif name == self.gram.TOK_BAR:
assert self.curseqcount == 0
@@ -276,12 +270,12 @@
if value in self.parser.tok_values:
# punctuation
tokencode = self.parser.tok_values[value]
- tok = self.parser.Token( tokencode, None )
+ tok = self.parser.build_token( tokencode, None )
else:
if not is_py_name(value):
raise RuntimeError("Unknown STRING value ('%s')" % value)
# assume a keyword
- tok = self.parser.Token( self.parser.NAME, value)
+ tok = self.parser.build_token( self.parser.tokens['NAME'], value)
if value not in self.keywords:
self.keywords.append(value)
self.rule_stack.append(tok)
@@ -294,6 +288,8 @@
## txt : the grammar definition
## """
+## from ebnflexer import GrammarSource
+## from ebnfgrammar import GRAMMAR_GRAMMAR
## source = GrammarSource(GRAMMAR_GRAMMAR, txt)
## builder = EBNFBuilder(GRAMMAR_GRAMMAR, dest_parser=parser)
## result = GRAMMAR_GRAMMAR.root_rules['grammar'].match(source, builder)
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py Thu Jan 11 10:34:19 2007
@@ -461,7 +461,11 @@
print "Warning: alternative %s has more than one rule " \
"matching Empty" % self
self._reordered = True
- self.args[:] = not_empty_set
+ # self.args[:] = not_empty_set
+ for elt in self.args[:]:
+ self.args.remove(elt)
+ for elt in not_empty_set:
+ self.args.append(elt)
self.args.extend( empty_set )
def validate( self, syntax_node ):
@@ -758,7 +762,7 @@
return "%d" % codename
def add_symbol( self, sym ):
- assert isinstance( sym, str )
+ # assert isinstance( sym, str )
if not sym in self.symbols:
val = self._sym_count
self._sym_count += 1
@@ -768,7 +772,7 @@
return self.symbols[ sym ]
def add_anon_symbol( self, sym ):
- assert isinstance( sym, str )
+ # assert isinstance( sym, str )
if not sym in self.symbols:
val = self._ann_sym_count
self._ann_sym_count -= 1
@@ -778,7 +782,7 @@
return self.symbols[ sym ]
def add_token( self, tok, value = None ):
- assert isinstance( tok, str )
+ # assert isinstance( tok, str )
if not tok in self.tokens:
val = self._sym_count
self._sym_count += 1
@@ -827,48 +831,49 @@
r.reorder_rule()
- def Alternative( self, name_id, args ):
- assert isinstance( name_id, int )
+ def build_alternative( self, name_id, args ):
+ # assert isinstance( name_id, int )
+ assert isinstance(args, list)
alt = Alternative( self, name_id, args )
self.all_rules.append( alt )
return alt
def Alternative_n(self, name, args ):
- assert isinstance(name, str)
+ # assert isinstance(name, str)
name_id = self.add_symbol( name )
- return self.Alternative( name_id, args )
+ return self.build_alternative( name_id, args )
- def Sequence( self, name_id, args ):
- assert isinstance( name_id, int )
+ def build_sequence( self, name_id, args ):
+ # assert isinstance( name_id, int )
alt = Sequence( self, name_id, args )
self.all_rules.append( alt )
return alt
def Sequence_n(self, name, args ):
- assert isinstance(name, str)
+ # assert isinstance(name, str)
name_id = self.add_symbol( name )
- return self.Sequence( name_id, args )
+ return self.build_sequence( name_id, args )
- def KleeneStar( self, name_id, _min = 0, _max = -1, rule = None ):
- assert isinstance( name_id, int )
+ def build_kleenestar( self, name_id, _min = 0, _max = -1, rule = None ):
+ # assert isinstance( name_id, int )
alt = KleeneStar( self, name_id, _min, _max, rule )
self.all_rules.append( alt )
return alt
def KleeneStar_n(self, name, _min = 0, _max = -1, rule = None ):
- assert isinstance(name, str)
+ # assert isinstance(name, str)
name_id = self.add_symbol( name )
- return self.KleeneStar( name_id, _min, _max, rule )
+ return self.build_kleenestar( name_id, _min, _max, rule )
def Token_n(self, name, value = None ):
- assert isinstance( name, str)
- assert value is None or isinstance( value, str)
+ # assert isinstance( name, str)
+ # assert value is None or isinstance( value, str)
name_id = self.add_token( name, value )
- return self.Token( name_id, value )
+ return self.build_token( name_id, value )
- def Token(self, name_id, value = None ):
- assert isinstance( name_id, int )
- assert value is None or isinstance( value, str)
+ def build_token(self, name_id, value = None ):
+ # assert isinstance( name_id, int )
+ # assert value is None or isinstance( value, str)
tok = Token( self, name_id, value )
return tok
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py Thu Jan 11 10:34:19 2007
@@ -121,7 +121,7 @@
endmatch = endDFA.recognize(line)
if endmatch >= 0:
pos = end = endmatch
- tok = parser.Token(parser.STRING, contstr + line[:end])
+ tok = parser.build_token(parser.tokens['STRING'], contstr + line[:end])
token_list.append((tok, line, lnum, pos))
last_comment = ''
# token_list.append((STRING, contstr + line[:end],
@@ -130,7 +130,7 @@
contline = None
elif (needcont and not line.endswith('\\\n') and
not line.endswith('\\\r\n')):
- tok = parser.Token(parser.ERRORTOKEN, contstr + line)
+ tok = parser.build_token(parser.tokens['ERRORTOKEN'], contstr + line)
token_list.append((tok, line, lnum, pos))
last_comment = ''
# token_list.append((ERRORTOKEN, contstr + line,
@@ -156,10 +156,10 @@
if line[pos] in '#\r\n': # skip comments or blank lines
if line[pos] == '#':
- tok = parser.Token(parser.COMMENT, line[pos:])
+ tok = parser.build_token(parser.tokens['COMMENT'], line[pos:])
last_comment = line[pos:]
else:
- tok = parser.Token(parser.NL, line[pos:])
+ tok = parser.build_token(parser.tokens['NL'], line[pos:])
last_comment = ''
# XXX Skip NL and COMMENT Tokens
# token_list.append((tok, line, lnum, pos))
@@ -167,12 +167,12 @@
if column > indents[-1]: # count indents or dedents
indents.append(column)
- tok = parser.Token(parser.INDENT, line[:pos])
+ tok = parser.build_token(parser.tokens['INDENT'], line[:pos])
token_list.append((tok, line, lnum, pos))
last_comment = ''
while column < indents[-1]:
indents = indents[:-1]
- tok = parser.Token(parser.DEDENT, '')
+ tok = parser.build_token(parser.tokens['DEDENT'], '')
token_list.append((tok, line, lnum, pos))
last_comment = ''
else: # continued statement
@@ -199,22 +199,22 @@
token, initial = line[start:end], line[start]
if initial in numchars or \
(initial == '.' and token != '.'): # ordinary number
- tok = parser.Token(parser.NUMBER, token)
+ tok = parser.build_token(parser.tokens['NUMBER'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
elif initial in '\r\n':
if parenlev > 0:
- tok = parser.Token(parser.NL, token)
+ tok = parser.build_token(parser.tokens['NL'], token)
last_comment = ''
# XXX Skip NL
else:
- tok = parser.Token(parser.NEWLINE, token)
+ tok = parser.build_token(parser.tokens['NEWLINE'], token)
# XXX YUCK !
tok.value = last_comment
token_list.append((tok, line, lnum, pos))
last_comment = ''
elif initial == '#':
- tok = parser.Token(parser.COMMENT, token)
+ tok = parser.build_token(parser.tokens['COMMENT'], token)
last_comment = token
# XXX Skip # token_list.append((tok, line, lnum, pos))
# token_list.append((COMMENT, token, spos, epos, line))
@@ -224,7 +224,7 @@
if endmatch >= 0: # all on one line
pos = endmatch
token = line[start:pos]
- tok = parser.Token(parser.STRING, token)
+ tok = parser.build_token(parser.tokens['STRING'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
else:
@@ -241,11 +241,11 @@
contline = line
break
else: # ordinary string
- tok = parser.Token(parser.STRING, token)
+ tok = parser.build_token(parser.tokens['STRING'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
elif initial in namechars: # ordinary name
- tok = parser.Token(parser.NAME, token)
+ tok = parser.build_token(parser.tokens['NAME'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
elif initial == '\\': # continued stmt
@@ -261,9 +261,9 @@
(lnum-1, 0), token_list)
if token in parser.tok_values:
punct = parser.tok_values[token]
- tok = parser.Token(punct)
+ tok = parser.build_token(punct)
else:
- tok = parser.Token(parser.OP, token)
+ tok = parser.build_token(parser.tokens['OP'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
else:
@@ -273,22 +273,22 @@
if start<max and line[start] in single_quoted:
raise TokenError("EOL while scanning single-quoted string", line,
(lnum, start), token_list)
- tok = parser.Token(parser.ERRORTOKEN, line[pos])
+ tok = parser.build_token(parser.tokens['ERRORTOKEN'], line[pos])
token_list.append((tok, line, lnum, pos))
last_comment = ''
pos = pos + 1
lnum -= 1
if not (flags & PyCF_DONT_IMPLY_DEDENT):
- if token_list and token_list[-1][0].codename != pytoken.NEWLINE:
- token_list.append((parser.Token(parser.NEWLINE, ''), '\n', lnum, 0))
+ if token_list and token_list[-1][0].codename != parser.tokens['NEWLINE']:
+ token_list.append((parser.build_token(parser.tokens['NEWLINE'], ''), '\n', lnum, 0))
for indent in indents[1:]: # pop remaining indent levels
- tok = parser.Token(parser.DEDENT, '')
+ tok = parser.build_token(parser.tokens['DEDENT'], '')
token_list.append((tok, line, lnum, pos))
#if token_list and token_list[-1][0].codename != pytoken.NEWLINE:
- token_list.append((parser.Token(parser.NEWLINE, ''), '\n', lnum, 0))
+ token_list.append((parser.build_token(parser.tokens['NEWLINE'], ''), '\n', lnum, 0))
- tok = parser.Token(parser.ENDMARKER, '',)
+ tok = parser.build_token(parser.tokens['ENDMARKER'], '',)
token_list.append((tok, line, lnum, pos))
#for t in token_list:
# print '%20s %-25s %d' % (pytoken.tok_name.get(t[0].codename, '?'), t[0], t[-2])
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py Thu Jan 11 10:34:19 2007
@@ -95,11 +95,11 @@
'exec' : "file_input",
}
- def __init__(self, predefined_symbols=None):
+ def __init__(self): # , predefined_symbols=None):
grammar.Parser.__init__(self)
pytoken.setup_tokens(self)
- if predefined_symbols:
- self.load_symbols(predefined_symbols)
+ # if predefined_symbols:
+ # self.load_symbols(predefined_symbols)
self.keywords = []
# XXX (adim): this is trunk's keyword management
@@ -148,7 +148,8 @@
"""update references to old rules"""
# brute force algorithm
for rule in self.all_rules:
- for i, arg in enumerate(rule.args):
+ for i in range(len(rule.args)):
+ arg = rule.args[i]
if arg.codename in self.root_rules:
real_rule = self.root_rules[arg.codename]
# This rule has been updated
@@ -174,13 +175,25 @@
def make_pyparser(version=Options.version):
- parser = PythonParser(predefined_symbols=symbol.sym_name)
+ parser = PythonParser() # predefined_symbols=symbol.sym_name)
return build_parser_for_version(version, parser=parser)
+def translation_target(grammardef):
+ parser = PythonParser() # predefined_symbols=symbol.sym_name)
+ source = GrammarSource(GRAMMAR_GRAMMAR, grammardef)
+ builder = ebnfparse.EBNFBuilder(GRAMMAR_GRAMMAR, dest_parser=parser)
+ GRAMMAR_GRAMMAR.root_rules['grammar'].match(source, builder)
+ builder.resolve_rules()
+ parser.build_first_sets()
+ parser.keywords = builder.keywords
+ return parser
+
+
+
# unfortunately the command line options are not parsed yet
# debug_print( "Loading grammar %s" % Options.version )
# XXX: remove PYTHON_PARSER
-PYTHON_PARSER = make_pyparser()
+# PYTHON_PARSER = make_pyparser()
## XXX BROKEN
## def grammar_rules( space ):
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py Thu Jan 11 10:34:19 2007
@@ -25,6 +25,8 @@
_ver = "_stablecompiler"
elif version in ("2.3","2.4","2.5a"):
_ver = version
+ else:
+ raise ValueError('no such grammar version: %s' % version)
return os.path.join( os.path.dirname(__file__), "data", "Grammar" + _ver ), _ver
@@ -34,7 +36,9 @@
parser = Parser()
setup_tokens(parser)
# XXX: clean up object dependencies
- source = GrammarSource(GRAMMAR_GRAMMAR, file(gramfile).read())
+ grammardef = file(gramfile).read()
+ assert isinstance(grammardef, str)
+ source = GrammarSource(GRAMMAR_GRAMMAR, grammardef)
builder = EBNFBuilder(GRAMMAR_GRAMMAR, dest_parser=parser)
GRAMMAR_GRAMMAR.root_rules['grammar'].match(source, builder)
builder.resolve_rules()
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py Thu Jan 11 10:34:19 2007
@@ -11,10 +11,10 @@
tok_name = {-1 : 'NULLTOKEN'}
tok_values = {'NULLTOKEN' : -1}
-tok_rpunct = {}
+# tok_rpunct = {}
def setup_tokens( parser ):
- global tok_rpunct
+ # global tok_rpunct
# For compatibility, this produces the same constant values as Python 2.4.
parser.add_token( 'ENDMARKER' )
parser.add_token( 'NAME' )
@@ -75,7 +75,7 @@
parser.add_token( "COMMENT" )
parser.add_token( "NL" )
- tok_rpunct = parser.tok_values.copy()
- for _name, _value in parser.tokens.items():
- globals()[_name] = _value
- setattr(parser, _name, _value)
+ # tok_rpunct = parser.tok_values.copy()
+ # for _name, _value in parser.tokens.items():
+ # globals()[_name] = _value
+ # setattr(parser, _name, _value)
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py Thu Jan 11 10:34:19 2007
@@ -62,8 +62,8 @@
self.source_encoding = None
self.lineno = lineno
self.stack = []
- self.space_token = ( self.parser.NEWLINE, self.parser.INDENT,
- self.parser.DEDENT, self.parser.ENDMARKER )
+ self.space_token = ( self.parser.tokens['NEWLINE'], self.parser.tokens['INDENT'],
+ self.parser.tokens['DEDENT'], self.parser.tokens['ENDMARKER'] )
def context(self):
"""Returns the state of the builder to be restored later"""
More information about the Pypy-commit
mailing list