[pypy-svn] r22813 - in pypy/branch/ast-experiments/pypy/interpreter/pyparser: . test
ludal at codespeak.net
ludal at codespeak.net
Sun Jan 29 02:21:23 CET 2006
Author: ludal
Date: Sun Jan 29 02:21:18 2006
New Revision: 22813
Added:
pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_parser.py
Modified:
pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astbuilder.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astcompiler.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_lookahead.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_pytokenizer.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py
Log:
refactoring III
most tests pass. a problem remains with TupleBuilder messing on import xxx
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/astbuilder.py Sun Jan 29 02:21:18 2006
@@ -713,7 +713,7 @@
# 'is', 'is not', 'not' or 'not in' => tok.get_value()
token = atoms[i]
assert isinstance(token, TokenObject)
- op_name = tok.tok_rpunct.get(token.name, token.get_value())
+ op_name = tok.tok_rvalues.get(token.name, token.get_value())
ops.append((op_name, atoms[i+1]))
builder.push(ast.Compare(atoms[0], ops, atoms[0].lineno))
@@ -1544,7 +1544,7 @@
self.lineno = lineno
def get_name(self):
- return tok.tok_rpunct.get(self.name,
+ return tok.tok_rvalues.get(self.name,
tok.tok_name.get(self.name, str(self.name)))
def get_value(self):
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py Sun Jan 29 02:21:18 2006
@@ -74,7 +74,7 @@
if tk.codename == self.codename:
if tk.value not in self.keywords:
ret = builder.token( tk.codename, tk.value, source )
- return self.debug_return( ret, tk.codename, tk.value )
+ return ret
source.restore( ctx )
return 0
@@ -83,7 +83,7 @@
"""
if not isinstance(other, Token):
raise RuntimeError("Unexpected token type")
- if other is EmptyToken:
+ if other is self.parser.EmptyToken:
return False
if other.codename != self.codename:
return False
@@ -135,7 +135,7 @@
# If we still have a GrammarProxy associated to this codename
# this means we have encountered a terminal symbol
to_be_deleted[ arg.codename ] = True
- rule.args[i] = Token( self.parser, arg.codename )
+ rule.args[i] = self.get_token( arg.codename )
#print arg, "-> Token(",arg.rule_name,")"
else:
#print arg, "->", real_rule
@@ -143,19 +143,20 @@
for codename in to_be_deleted.keys():
del self.parser.root_rules[codename]
-## def get_token(self, codename ):
-## """Returns a new or existing Token"""
-## if codename in self.tokens:
-## return self.tokens[codename]
-## token = self.tokens[codename] = self.parser.Token(codename)
-## return token
+ def get_token(self, codename ):
+ """Returns a new or existing Token"""
+ if codename in self.tokens:
+ return self.tokens[codename]
+ token = self.tokens[codename] = self.parser.Token(codename)
+ return token
def get_symbolcode(self, name ):
return self.parser.add_symbol( name )
def get_rule( self, name ):
if name in self.parser.tokens:
- return self.parser.Token_n( name )
+ codename = self.parser.tokens[name]
+ return self.get_token( codename )
codename = self.get_symbolcode( name )
if codename in self.parser.root_rules:
return self.parser.root_rules[codename]
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py Sun Jan 29 02:21:18 2006
@@ -734,6 +734,7 @@
self.EmptyToken = Token( self, -1, None )
self.tok_name = {}
self.tok_values = {}
+ self.tok_rvalues = {}
self._ann_sym_count = -10
self._sym_count = 0
self.all_rules = []
@@ -775,6 +776,9 @@
self.tok_name[val] = tok
if value is not None:
self.tok_values[value] = val
+ # XXX : this reverse mapping seemed only to be used
+ # because of pycodegen visitAugAssign
+ self.tok_rvalues[val] = value
return val
return self.tokens[ tok ]
@@ -857,3 +861,14 @@
assert value is None or isinstance( value, str)
tok = Token( self, name_id, value )
return tok
+
+
+ # Debugging functions
+ def show_rules(self, name):
+ import re
+ rex = re.compile(name)
+ rules =[]
+ for _name, _val in self.symbols.items():
+ if rex.search(_name) and _val>=0:
+ rules.append(self.root_rules[_val])
+ return rules
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonparse.py Sun Jan 29 02:21:18 2006
@@ -5,7 +5,6 @@
helper functions are provided that use the grammar to parse
using file_input, single_input and eval_input targets
"""
-import autopath
import sys
import os
from pypy.interpreter.error import OperationError, debug_print
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py Sun Jan 29 02:21:18 2006
@@ -61,7 +61,7 @@
tuples (StackElement is only a wrapper class around these tuples)
"""
- builder = TupleBuilder(pysymbol._cpython_symbols, PYTHON_PARSER.rules, lineno=False)
+ builder = TupleBuilder(PYTHON_PARSER, lineno=False)
if space is not None:
builder.space = space
target_rule = TARGET_DICT[mode]
@@ -109,6 +109,8 @@
etc. This is to be fixed in a clean way
"""
tuples = pypy_parse(input, mode, True)
+ if 'import' in input:
+ toto
ast = transformer.compile_node(tuples)
return ast
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astbuilder.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astbuilder.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astbuilder.py Sun Jan 29 02:21:18 2006
@@ -1,6 +1,6 @@
import os
-from pypy.interpreter.pyparser.pythonparse import PYTHON_PARSER_DYN as PYTHON_PARSER
+from pypy.interpreter.pyparser.pythonparse import PYTHON_PARSER
from pypy.interpreter.pyparser.astbuilder import AstBuilder
from pypy.interpreter.pyparser.pythonutil import ast_from_input
from pypy.interpreter.stablecompiler.transformer import Transformer
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astcompiler.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astcompiler.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astcompiler.py Sun Jan 29 02:21:18 2006
@@ -3,7 +3,6 @@
from pypy.interpreter.pyparser.astbuilder import AstBuilder
from pypy.interpreter.pyparser.tuplebuilder import TupleBuilder
from pypy.interpreter.pycode import PyCode
-from pypy.interpreter.pyparser.pysymbol import _cpython_symbols
import py.test
def setup_module(mod):
@@ -83,7 +82,7 @@
def compile_with_testcompiler(expr, target='exec', space=FakeSpace()):
target2 = TARGET_DICT['exec'] # xxx exec: single not really tested
- builder = TupleBuilder(_cpython_symbols)
+ builder = TupleBuilder(PYTHON_PARSER)
PYTHON_PARSER.parse_source(expr, target2, builder)
tuples = builder.stack[-1].as_tuple(True)
from pypy.interpreter.stablecompiler import transformer, pycodegen, misc
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_lookahead.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_lookahead.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_lookahead.py Sun Jan 29 02:21:18 2006
@@ -1,43 +1,37 @@
from pypy.interpreter.pyparser.grammar import Alternative, Sequence, KleeneStar, \
- Token, EmptyToken, build_first_sets
+ Token, Parser
class TestLookAheadBasics:
def setup_method(self, method):
- self.count = 0
- self.tok1 = Token(self.nextid(), 'foo')
- self.tok2 = Token(self.nextid(), 'bar')
- self.tok3 = Token(self.nextid(), 'foobar')
+ self.parser = Parser()
+ self.tok1 = self.parser.Token_n("t1", 'foo')
+ self.tok2 = self.parser.Token_n("t2", 'bar')
+ self.tok3 = self.parser.Token_n("t3", 'foobar')
self.tokens = [self.tok1, self.tok2, self.tok3]
- build_first_sets(self.tokens)
-
- def nextid(self):
- self.count+=1
- return self.count
+ self.parser.build_first_sets()
def test_basic_token(self):
assert self.tok1.first_set == [self.tok1]
-
def test_basic_alternative(self):
- alt = Alternative(self.nextid(), self.tokens)
- build_first_sets([alt])
+ alt = self.parser.Alternative_n("a1t", self.tokens)
+ self.parser.build_first_sets()
assert alt.first_set == self.tokens
def test_basic_sequence(self):
- seq = Sequence(self.nextid(), self.tokens)
- build_first_sets([seq])
+ seq = self.parser.Sequence_n("seq", self.tokens)
+ self.parser.build_first_sets()
assert seq.first_set == [self.tokens[0]]
def test_basic_kleenstar(self):
tok1, tok2, tok3 = self.tokens
- kstar = KleeneStar(self.nextid(), 1, 3, tok1)
- build_first_sets([kstar])
- assert kstar.first_set == [tok1]
- kstar = KleeneStar(self.nextid(), 0, 3, tok1)
- build_first_sets([kstar])
- assert kstar.first_set == [tok1, EmptyToken]
+ kstar1 = self.parser.KleeneStar_n("k", 1, 3, tok1)
+ kstar2 = self.parser.KleeneStar_n("k2", 0, 3, tok1)
+ self.parser.build_first_sets()
+ assert kstar1.first_set == [tok1]
+ assert kstar2.first_set == [tok1, self.parser.EmptyToken]
def test_maybe_empty_sequence(self):
@@ -45,11 +39,11 @@
==> S.first_set = [tok1, tok2, EmptyToken]
"""
tok1, tok2, tok3 = self.tokens
- k1 = KleeneStar(self.nextid(), 0, 2, tok1)
- k2 = KleeneStar(self.nextid(), 0, 2, tok2)
- seq = Sequence(self.nextid(), [k1, k2])
- build_first_sets([k1, k2, seq])
- assert seq.first_set == [tok1, tok2, EmptyToken]
+ k1 = self.parser.KleeneStar_n( "k1", 0, 2, tok1)
+ k2 = self.parser.KleeneStar_n("k2", 0, 2, tok2)
+ seq = self.parser.Sequence_n( "seq", [k1, k2])
+ self.parser.build_first_sets()
+ assert seq.first_set == [tok1, tok2, self.parser.EmptyToken]
def test_not_empty_sequence(self):
@@ -57,41 +51,42 @@
==> S.first_set = [tok1, tok2]
"""
tok1, tok2, tok3 = self.tokens
- k1 = KleeneStar(self.nextid(), 0, 2, tok1)
- k2 = KleeneStar(self.nextid(), 1, 2, tok2)
- seq = Sequence(self.nextid(), [k1, k2])
- build_first_sets([k1, k2, seq])
+ k1 = self.parser.KleeneStar_n("k1", 0, 2, tok1)
+ k2 = self.parser.KleeneStar_n("k2", 1, 2, tok2)
+ seq = self.parser.Sequence_n("seq", [k1, k2])
+ self.parser.build_first_sets()
assert seq.first_set == [tok1, tok2]
-def test_token_comparison():
- assert Token(1, 'foo') == Token(1, 'foo')
- assert Token(1, 'foo') != Token(2, 'foo')
- assert Token(2, 'foo') != Token(2, None)
+ def test_token_comparison(self):
+ tok1 = self.parser.Token_n( "tok1", "foo" )
+ tok1b = self.parser.Token_n( "tok1", "foo" )
+ tok2 = self.parser.Token_n( "tok2", "foo" )
+ tok3 = self.parser.Token_n( "tok2", None )
+ assert tok1 == tok1b
+ assert tok1 != tok2
+ assert tok2 != tok3
-LOW = 1
-CAP = 2
-R_A = 3
-R_B = 4
-R_C = 5
-R_k1 = 6
-R_k2 = 7
class TestLookAhead:
def setup_method(self, method):
- self.LOW = Token(LOW, 'low')
- self.CAP = Token(CAP ,'cap')
- self.A = Alternative(R_A, [])
- k1 = KleeneStar(R_k1, 0, rule=self.LOW)
- k2 = KleeneStar(R_k2, 0, rule=self.CAP)
- self.B = Sequence(R_B, [k1, self.A])
- self.C = Sequence(R_C, [k2, self.A])
+ p = self.parser = Parser()
+ self.LOW = p.Token_n( 'LOW', 'low')
+ self.CAP = p.Token_n( 'CAP' ,'cap')
+ self.A = p.Alternative_n( 'R_A', [])
+ k1 = p.KleeneStar_n( 'R_k1', 0, rule=self.LOW)
+ k2 = p.KleeneStar_n( 'R_k2', 0, rule=self.CAP)
+ self.B = p.Sequence_n( 'R_B', [k1, self.A])
+ self.C = p.Sequence_n( 'R_C', [k2, self.A])
self.A.args = [self.B, self.C]
- build_first_sets([self.A, self.B, self.C, self.LOW, self.CAP, k1, k2])
+ p.build_first_sets()
def test_S_first_set(self):
- for s in [Token(LOW, 'low'), EmptyToken, Token(CAP, 'cap')]:
+ p = self.parser
+ LOW = p.tokens['LOW']
+ CAP = p.tokens['CAP']
+ for s in [Token(p, LOW, 'low'), p.EmptyToken, Token(p, CAP, 'cap')]:
assert s in self.A.first_set
assert s in self.B.first_set
assert s in self.C.first_set
Added: pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_parser.py
==============================================================================
--- (empty file)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_parser.py Sun Jan 29 02:21:18 2006
@@ -0,0 +1,45 @@
+
+from pypy.interpreter.pyparser.grammar import Parser
+
+
+
+def test_symbols():
+ p = Parser()
+ x1 = p.add_symbol('sym')
+ x2 = p.add_token('tok')
+ x3 = p.add_anon_symbol(':sym')
+ x4 = p.add_anon_symbol(':sym1')
+ # test basic numbering assumption
+ # symbols and tokens are attributed sequentially
+ # using the same counter
+ assert x2 == x1 + 1
+ # anon symbols have negative value
+ assert x3 != x2 + 1
+ assert x4 == x3 - 1
+ assert x3 < 0
+ y1 = p.add_symbol('sym')
+ assert y1 == x1
+ y2 = p.add_token('tok')
+ assert y2 == x2
+ y3 = p.add_symbol(':sym')
+ assert y3 == x3
+ y4 = p.add_symbol(':sym1')
+ assert y4 == x4
+
+
+def test_load():
+ d = { 5 : 'sym1',
+ 6 : 'sym2',
+ 9 : 'sym3',
+ }
+ p = Parser()
+ p.load_symbols( d )
+ v = p.add_symbol('sym4')
+ # check that we avoid numbering conflicts
+ assert v>9
+ v = p.add_symbol( 'sym1' )
+ assert v == 5
+ v = p.add_symbol( 'sym2' )
+ assert v == 6
+ v = p.add_symbol( 'sym3' )
+ assert v == 9
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_pytokenizer.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_pytokenizer.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_pytokenizer.py Sun Jan 29 02:21:18 2006
@@ -1,17 +1,24 @@
from pypy.interpreter.pyparser.pythonlexer import Source, TokenError, \
match_encoding_declaration
from pypy.interpreter.pyparser.grammar import Token, GrammarElement
-from pypy.interpreter.pyparser.pytoken import EQUAL, ENDMARKER, LSQB, MINUS, NAME, NEWLINE, NULLTOKEN, NUMBER, RSQB, STRING
-
-from pypy.interpreter.pyparser.pytoken import tok_name, tok_punct
-GrammarElement.symbols = tok_name
+from pypy.interpreter.pyparser.pythonparse import PYTHON_PARSER as P
+EQUAL = P.EQUAL
+ENDMARKER = P.ENDMARKER
+LSQB = P.LSQB
+MINUS = P.MINUS
+NAME = P.NAME
+NEWLINE = P.NEWLINE
+NULLTOKEN = P.NULLTOKEN
+NUMBER = P.NUMBER
+RSQB = P.RSQB
+STRING = P.STRING
def parse_source(source):
"""returns list of parsed tokens"""
- lexer = Source(source.splitlines(True))
+ lexer = Source( P, source.splitlines(True))
tokens = []
- last_token = Token(NULLTOKEN, None)
+ last_token = Token( P, NULLTOKEN, None)
while last_token.codename != ENDMARKER:
last_token = lexer.next()
tokens.append(last_token)
@@ -49,24 +56,24 @@
s = """['a'
]"""
tokens = parse_source(s)
- assert tokens[:4] == [Token(LSQB, None), Token(STRING, "'a'"),
- Token(RSQB, None), Token(NEWLINE, '')]
+ assert tokens[:4] == [Token(P, LSQB, None), Token(P, STRING, "'a'"),
+ Token(P, RSQB, None), Token(P, NEWLINE, '')]
def test_numbers():
"""make sure all kind of numbers are correctly parsed"""
for number in NUMBERS:
- assert parse_source(number)[0] == Token(NUMBER, number)
+ assert parse_source(number)[0] == Token(P, NUMBER, number)
neg = '-%s' % number
- assert parse_source(neg)[:2] == [Token(MINUS, None),
- Token(NUMBER, number)]
+ assert parse_source(neg)[:2] == [Token(P, MINUS, None),
+ Token(P, NUMBER, number)]
for number in BAD_NUMBERS:
- assert parse_source(number)[0] != Token(NUMBER, number)
+ assert parse_source(number)[0] != Token(P, NUMBER, number)
def test_hex_number():
"""basic pasrse"""
tokens = parse_source("a = 0x12L")
- assert tokens[:4] == [Token(NAME, 'a'), Token(EQUAL, None),
- Token(NUMBER, '0x12L'), Token(NEWLINE, '')]
+ assert tokens[:4] == [Token(P, NAME, 'a'), Token(P, EQUAL, None),
+ Token(P, NUMBER, '0x12L'), Token(P, NEWLINE, '')]
def test_punct():
"""make sure each punctuation is correctly parsed"""
@@ -81,7 +88,7 @@
tokens = [tok for tok, _, _, _ in error.token_stack]
if prefix:
tokens.pop(0)
- assert tokens[0].codename == tok_punct[pstr]
+ assert tokens[0].codename == P.tok_values[pstr]
def test_encoding_declarations_match():
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py Sun Jan 29 02:21:18 2006
@@ -1,7 +1,5 @@
-from grammar import AbstractBuilder, AbstractContext
-from pytoken import tok_name, tok_rpunct, NEWLINE, INDENT, DEDENT, ENDMARKER
-import pysymbol
+from grammar import AbstractBuilder, AbstractContext, Parser
class StackElement:
"""wraps TupleBuilder's tuples"""
@@ -58,12 +56,14 @@
class TupleBuilder(AbstractBuilder):
"""A builder that directly produce the AST"""
- def __init__(self, symbols, rules=None, debug=0, lineno=True):
- AbstractBuilder.__init__(self, symbols, rules, debug)
+ def __init__(self, parser, debug=0, lineno=True):
+ AbstractBuilder.__init__(self, parser, debug)
# This attribute is here for convenience
self.source_encoding = None
self.lineno = lineno
self.stack = []
+ self.space_token = ( self.parser.NEWLINE, self.parser.INDENT,
+ self.parser.DEDENT, self.parser.ENDMARKER )
def context(self):
"""Returns the state of the builder to be restored later"""
@@ -98,8 +98,8 @@
def token(self, codename, value, source):
lineno = source._token_lnum
if value is None:
- if codename not in ( NEWLINE, INDENT, DEDENT, ENDMARKER ):
- value = tok_rpunct.get(codename, "unknown op")
+ if codename not in self.space_token:
+ value = self.parser.tok_rvalues.get(codename, "unknown op")
else:
value = ''
self.stack.append( Terminal(codename, value, lineno) )
More information about the Pypy-commit
mailing list