[pypy-svn] r14255 - pypy/dist/pypy/interpreter/pyparser
adim at codespeak.net
adim at codespeak.net
Mon Jul 4 21:47:56 CEST 2005
Author: adim
Date: Mon Jul 4 21:47:55 2005
New Revision: 14255
Modified:
pypy/dist/pypy/interpreter/pyparser/ebnfparse.py
pypy/dist/pypy/interpreter/pyparser/grammar.py
pypy/dist/pypy/interpreter/pyparser/pythonlexer.py
pypy/dist/pypy/interpreter/pyparser/pytokenize.py
Log:
small cleanups
Modified: pypy/dist/pypy/interpreter/pyparser/ebnfparse.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/ebnfparse.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/ebnfparse.py Mon Jul 4 21:47:55 2005
@@ -8,7 +8,10 @@
punct=['>=', '<>', '!=', '<', '>', '<=', '==', '\\*=',
'//=', '%=', '^=', '<<=', '\\*\\*=', '\\', '=',
- '\\+=', '>>=', '=', '&=', '/=', '-=', '\n,', '^', '>>', '&', '\\+', '\\*', '-', '/', '\\.', '\\*\\*', '%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':', '@', '\\[', '\\]', '`', '\\{', '\\}']
+ '\\+=', '>>=', '=', '&=', '/=', '-=', '\n,', '^',
+ '>>', '&', '\\+', '\\*', '-', '/', '\\.', '\\*\\*',
+ '%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':',
+ '@', '\\[', '\\]', '`', '\\{', '\\}']
py_punct = re.compile(r"""
>=|<>|!=|<|>|<=|==|~|
@@ -77,17 +80,16 @@
self.items = []
self.terminals['NAME'] = NameToken()
- def new_name( self ):
+ def new_name(self):
rule_name = ":%s_%s" % (self.current_rule, self.current_subrule)
self.current_subrule += 1
return rule_name
- def new_item( self, itm ):
- self.items.append( itm )
+ def new_item(self, itm):
+ self.items.append(itm)
return itm
- def visit_grammar( self, node ):
- # print "Grammar:"
+ def visit_grammar(self, node):
for rule in node.nodes:
rule.visit(self)
# the rules are registered already
@@ -103,23 +105,23 @@
# XXX .keywords also contains punctuations
self.terminals['NAME'].keywords = self.tokens.keys()
- def visit_rule( self, node ):
+ def visit_rule(self, node):
symdef = node.nodes[0].value
self.current_rule = symdef
self.current_subrule = 0
alt = node.nodes[1]
rule = alt.visit(self)
- if not isinstance( rule, Token ):
+ if not isinstance(rule, Token):
rule.name = symdef
self.rules[symdef] = rule
- def visit_alternative( self, node ):
- items = [ node.nodes[0].visit(self) ]
+ def visit_alternative(self, node):
+ items = [node.nodes[0].visit(self)]
items += node.nodes[1].visit(self)
if len(items) == 1 and items[0].name.startswith(':'):
return items[0]
- alt = Alternative( self.new_name(), items )
- return self.new_item( alt )
+ alt = Alternative(self.new_name(), items)
+ return self.new_item(alt)
def visit_sequence( self, node ):
""" """
@@ -181,11 +183,14 @@
rule_name = self.new_name()
tok = star_opt.nodes[0].nodes[0]
if tok.value == '+':
- return self.new_item( KleenStar( rule_name, _min=1, rule = myrule ) )
+ item = KleenStar(rule_name, _min=1, rule=myrule)
+ return self.new_item(item)
elif tok.value == '*':
- return self.new_item( KleenStar( rule_name, _min=0, rule = myrule ) )
+ item = KleenStar(rule_name, _min=0, rule=myrule)
+ return self.new_item(item)
else:
- raise SyntaxError("Got symbol star_opt with value='%s'" % tok.value )
+ raise SyntaxError("Got symbol star_opt with value='%s'"
+ % tok.value)
return myrule
rules = None
Modified: pypy/dist/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/grammar.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/grammar.py Mon Jul 4 21:47:55 2005
@@ -78,8 +78,9 @@
class BaseGrammarBuilder(object):
"""Base/default class for a builder"""
def __init__(self, rules=None, debug=0):
- self.rules = rules or {} # a dictionary of grammar rules for debug/reference
- # XXX This attribute is here for convenience
+ # a dictionary of grammar rules for debug/reference
+ self.rules = rules or {}
+ # This attribute is here for convenience
self.source_encoding = None
self.debug = debug
self.stack = []
@@ -166,8 +167,8 @@
"""
if not USE_LOOKAHEAD:
return self._match(source, builder, level)
- pos1 = -1 # XXX make the annotator happy
- pos2 = -1 # XXX make the annotator happy
+ pos1 = -1 # make the annotator happy
+ pos2 = -1 # make the annotator happy
token = source.peek()
if self._trace:
pos1 = source.get_pos()
@@ -242,9 +243,10 @@
def debug_return(self, ret, *args ):
# FIXME: use a wrapper of match() methods instead of debug_return()
# to prevent additional indirection
- if ret and DEBUG>0:
+ if ret and DEBUG > 0:
sargs = ",".join( [ str(i) for i in args ] )
- print "matched %s (%s): %s" % (self.__class__.__name__, sargs, self.display() )
+ print "matched %s (%s): %s" % (self.__class__.__name__,
+ sargs, self.display() )
return ret
@@ -268,8 +270,9 @@
return other in self.first_set
def reorder_rule(self):
- """Called after the computation of first set to allow rules to be reordered
- to avoid ambiguities"""
+ """Called after the computation of first set to allow rules to be
+ reordered to avoid ambiguities
+ """
pass
class Alternative(GrammarElement):
@@ -285,7 +288,7 @@
"""If any of the rules in self.args matches
returns the object built from the first rules that matches
"""
- if DEBUG>1:
+ if DEBUG > 1:
print "try alt:", self.display()
tok = source.peek()
# Here we stop at the first match we should
@@ -304,7 +307,7 @@
return 0
def display(self, level=0):
- if level==0:
+ if level == 0:
name = self.name + " -> "
elif not self.name.startswith(":"):
return self.name
@@ -344,12 +347,13 @@
# a same alternative
for token in rule.first_set:
if token is not EmptyToken and token in tokens_set:
- print "Warning, token %s in\n\t%s's first set is part " \
- "of a previous rule's first set in alternative\n\t" \
- "%s" % (token, rule, self)
+ print "Warning, token %s in\n\t%s's first set is " \
+ " part of a previous rule's first set in " \
+ " alternative\n\t%s" % (token, rule, self)
tokens_set.append(token)
if len(empty_set) > 1 and not self._reordered:
- print "Warning: alternative %s has more than one rule matching Empty" % self
+ print "Warning: alternative %s has more than one rule " \
+ "matching Empty" % self
self._reordered = True
self.args[:] = not_empty_set
self.args.extend( empty_set )
@@ -365,7 +369,7 @@
def _match(self, source, builder, level=0):
"""matches all of the symbols in order"""
- if DEBUG>1:
+ if DEBUG > 1:
print "try seq:", self.display()
ctx = source.context()
bctx = builder.context()
@@ -381,7 +385,7 @@
return self.debug_return( ret )
def display(self, level=0):
- if level == 0:
+ if level == 0:
name = self.name + " -> "
elif not self.name.startswith(":"):
return self.name
@@ -431,9 +435,11 @@
# self.first_set[EmptyToken] = 1
def _match(self, source, builder, level=0):
- """matches a number of times self.args[0]. the number must be comprised
- between self._min and self._max inclusive. -1 is used to represent infinity"""
- if DEBUG>1:
+ """matches a number of times self.args[0]. the number must be
+ comprised between self._min and self._max inclusive. -1 is used to
+ represent infinity
+ """
+ if DEBUG > 1:
print "try kle:", self.display()
ctx = source.context()
bctx = builder.context()
@@ -507,7 +513,6 @@
"""
ctx = source.context()
tk = source.next()
- # XXX: match_token
if tk.name == self.name:
if self.value is None:
ret = builder.token( tk.name, tk.value, source )
@@ -515,7 +520,7 @@
elif self.value == tk.value:
ret = builder.token( tk.name, tk.value, source )
return self.debug_return( ret, tk.name, tk.value )
- if DEBUG>1:
+ if DEBUG > 1:
print "tried tok:", self.display()
source.restore( ctx )
return 0
@@ -534,9 +539,6 @@
must be equal
- a tuple, such as those yielded by the Python lexer, in which case
the comparison algorithm is similar to the one in match()
- XXX:
- 1/ refactor match and __eq__ ?
- 2/ make source.next and source.peek return a Token() instance
"""
if not isinstance(other, Token):
raise RuntimeError("Unexpected token type %r" % other)
Modified: pypy/dist/pypy/interpreter/pyparser/pythonlexer.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pythonlexer.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/pythonlexer.py Mon Jul 4 21:47:55 2005
@@ -4,7 +4,7 @@
"""
import symbol
-from grammar import TokenSource, Token
+from pypy.interpreter.pyparser.grammar import TokenSource, Token
# Don't import string for that ...
NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
NUMCHARS = '0123456789'
@@ -113,10 +113,13 @@
last_comment = ''
encoding = None
strstart = (0, 0)
+ # make the annotator happy
pos = -1
lines.append('') # XXX HACK probably not needed
- endDFA = automata.DFA([], []) # XXX Make the translator happy
- line = '' # XXX Make the translator happy
+ # make the annotator happy
+ endDFA = automata.DFA([], [])
+ # make the annotator happy
+ line = ''
for line in lines:
lnum = lnum + 1
pos, max = 0, len(line)
@@ -173,9 +176,6 @@
last_comment = ''
# XXX Skip NL and COMMENT Tokens
# token_list.append((tok, line, lnum, pos))
- # token_list.append(((NL, COMMENT)[line[pos] == '#'],
- # line[pos:],
- # (lnum, pos), (lnum, len(line)), line))
continue
if column > indents[-1]: # count indents or dedents
@@ -183,17 +183,15 @@
tok = token_from_values(tokenmod.INDENT, line[:pos])
token_list.append((tok, line, lnum, pos))
last_comment = ''
- # token_list.append((INDENT, line[:pos],(lnum, 0),(lnum,pos),line))
while column < indents[-1]:
indents = indents[:-1]
tok = token_from_values(tokenmod.DEDENT, '')
token_list.append((tok, line, lnum, pos))
last_comment = ''
- # token_list.append((DEDENT, '', (lnum, pos),(lnum,pos),line))
-
else: # continued statement
if not line:
- raise TokenError("EOF in multi-line statement", line, (lnum, 0), token_list)
+ raise TokenError("EOF in multi-line statement", line,
+ (lnum, 0), token_list)
continued = 0
while pos < max:
@@ -213,7 +211,6 @@
tok = token_from_values(tokenmod.NUMBER, token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
- # token_list.append((NUMBER, token, spos, epos, line))
elif initial in '\r\n':
if parenlev > 0:
tok = token_from_values(tokenmod.NL, token)
@@ -225,7 +222,6 @@
tok.value = last_comment
token_list.append((tok, line, lnum, pos))
last_comment = ''
- # token_list.append((parenlev > 0 and NL or NEWLINE, token, spos, epos, line))
elif initial == '#':
tok = token_from_values(tokenmod.COMMENT, token)
last_comment = token
@@ -244,7 +240,6 @@
tok = token_from_values(tokenmod.STRING, token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
- # token_list.append((STRING, token, spos, (lnum, pos), line))
else:
strstart = (lnum, start) # multiple lines
contstr = line[start:]
@@ -269,29 +264,26 @@
tok = token_from_values(tokenmod.NAME, token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
- # token_list.append((NAME, token, spos, epos, line))
elif initial == '\\': # continued stmt
continued = 1
else:
- if initial in '([{': parenlev = parenlev + 1
- elif initial in ')]}': parenlev = parenlev - 1
+ if initial in '([{':
+ parenlev = parenlev + 1
+ elif initial in ')]}':
+ parenlev = parenlev - 1
tok = token_from_values(tokenmod.OP, token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
- # token_list.append((OP, token, spos, epos, line))
else:
tok = token_from_values(tokenmod.ERRORTOKEN, line[pos])
token_list.append((tok, line, lnum, pos))
last_comment = ''
- # token_list.append((ERRORTOKEN, line[pos],
- # (lnum, pos), (lnum, pos+1), line))
pos = pos + 1
lnum -= 1
for indent in indents[1:]: # pop remaining indent levels
tok = token_from_values(tokenmod.DEDENT, '')
token_list.append((tok, line, lnum, pos))
- # token_list.append((DEDENT, '', (lnum, 0), (lnum, 0), ''))
## <XXX> adim: this can't be (only) that, can it ?
if token_list and token_list[-1] != symbol.file_input:
@@ -299,7 +291,7 @@
## </XXX>
tok = token_from_values(tokenmod.ENDMARKER, '',)
token_list.append((tok, line, lnum, pos))
- # token_list.append((ENDMARKER, '', (lnum, 0), (lnum, 0), ''))
+
return token_list, encoding
class PythonSource(TokenSource):
@@ -330,6 +322,7 @@
return self._current_line
def current_lineno(self):
+ """Returns the current lineno"""
return self._lineno
def context(self):
@@ -370,8 +363,8 @@
return (self._current_line, self._lineno)
# return 'line %s : %s' % ('XXX', self._current_line)
-NONE_LIST = [tokenmod.ENDMARKER, tokenmod.INDENT, tokenmod.DEDENT,]
-NAMED_LIST = [tokenmod.OP, ]
+NONE_LIST = [tokenmod.ENDMARKER, tokenmod.INDENT, tokenmod.DEDENT]
+NAMED_LIST = [tokenmod.OP]
def token_from_values(tok_type, tok_string):
"""Compatibility layer between both parsers"""
Modified: pypy/dist/pypy/interpreter/pyparser/pytokenize.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pytokenize.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/pytokenize.py Mon Jul 4 21:47:55 2005
@@ -13,23 +13,21 @@
expressions have been replaced with hand built DFA's using the
basil.util.automata module.
-XXX This now assumes that the automata module is in the Python path.
-
$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $
"""
# ______________________________________________________________________
from __future__ import generators
-import automata
+from pypy.interpreter.pyparser import automata
# ______________________________________________________________________
# COPIED:
-from token import *
-
import token
__all__ = [x for x in dir(token) if x[0] != '_'] + ["COMMENT", "tokenize",
"generate_tokens", "NL"]
del x
+N_TOKENS = token.N_TOKENS
+tok_name = token.tok_name
del token
COMMENT = N_TOKENS
More information about the Pypy-commit
mailing list