[pypy-svn] r14255 - pypy/dist/pypy/interpreter/pyparser

Mon Jul 4 21:47:56 CEST 2005

Author: adim
Date: Mon Jul  4 21:47:55 2005
New Revision: 14255

Modified:
   pypy/dist/pypy/interpreter/pyparser/ebnfparse.py
   pypy/dist/pypy/interpreter/pyparser/grammar.py
   pypy/dist/pypy/interpreter/pyparser/pythonlexer.py
   pypy/dist/pypy/interpreter/pyparser/pytokenize.py
Log:
small cleanups

Modified: pypy/dist/pypy/interpreter/pyparser/ebnfparse.py
==============================================================================

--- pypy/dist/pypy/interpreter/pyparser/ebnfparse.py	(original)
+++ pypy/dist/pypy/interpreter/pyparser/ebnfparse.py	Mon Jul  4 21:47:55 2005
@@ -8,7 +8,10 @@
 
 punct=['>=', '<>', '!=', '<', '>', '<=', '==', '\\*=',
        '//=', '%=', '^=', '<<=', '\\*\\*=', '\\', '=',
-       '\\+=', '>>=', '=', '&=', '/=', '-=', '\n,', '^', '>>', '&', '\\+', '\\*', '-', '/', '\\.', '\\*\\*', '%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':', '@', '\\[', '\\]', '`', '\\{', '\\}']
+       '\\+=', '>>=', '=', '&=', '/=', '-=', '\n,', '^',
+       '>>', '&', '\\+', '\\*', '-', '/', '\\.', '\\*\\*',
+       '%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':',
+       '@', '\\[', '\\]', '`', '\\{', '\\}']
 
 py_punct = re.compile(r"""
 >=|<>|!=|<|>|<=|==|~|
@@ -77,17 +80,16 @@
         self.items = []
         self.terminals['NAME'] = NameToken()
 
-    def new_name( self ):
+    def new_name(self):
         rule_name = ":%s_%s" % (self.current_rule, self.current_subrule)
         self.current_subrule += 1
         return rule_name
 
-    def new_item( self, itm ):
-        self.items.append( itm )
+    def new_item(self, itm):
+        self.items.append(itm)
         return itm
     
-    def visit_grammar( self, node ):
-        # print "Grammar:"
+    def visit_grammar(self, node):
         for rule in node.nodes:
             rule.visit(self)
         # the rules are registered already
@@ -103,23 +105,23 @@
         # XXX .keywords also contains punctuations
         self.terminals['NAME'].keywords = self.tokens.keys()
 
-    def visit_rule( self, node ):
+    def visit_rule(self, node):
         symdef = node.nodes[0].value
         self.current_rule = symdef
         self.current_subrule = 0
         alt = node.nodes[1]
         rule = alt.visit(self)
-        if not isinstance( rule, Token ):
+        if not isinstance(rule, Token):
             rule.name = symdef
         self.rules[symdef] = rule
         
-    def visit_alternative( self, node ):
-        items = [ node.nodes[0].visit(self) ]
+    def visit_alternative(self, node):
+        items = [node.nodes[0].visit(self)]
         items += node.nodes[1].visit(self)        
         if len(items) == 1 and items[0].name.startswith(':'):
             return items[0]
-        alt = Alternative( self.new_name(), items )
-        return self.new_item( alt )
+        alt = Alternative(self.new_name(), items)
+        return self.new_item(alt)
 
     def visit_sequence( self, node ):
         """ """
@@ -181,11 +183,14 @@
             rule_name = self.new_name()
             tok = star_opt.nodes[0].nodes[0]
             if tok.value == '+':
-                return self.new_item( KleenStar( rule_name, _min=1, rule = myrule ) )
+                item = KleenStar(rule_name, _min=1, rule=myrule)
+                return self.new_item(item)
             elif tok.value == '*':
-                return self.new_item( KleenStar( rule_name, _min=0, rule = myrule ) )
+                item = KleenStar(rule_name, _min=0, rule=myrule)
+                return self.new_item(item)
             else:
-                raise SyntaxError("Got symbol star_opt with value='%s'" % tok.value )
+                raise SyntaxError("Got symbol star_opt with value='%s'"
+                                  % tok.value)
         return myrule
 
 rules = None

Modified: pypy/dist/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/grammar.py	(original)
+++ pypy/dist/pypy/interpreter/pyparser/grammar.py	Mon Jul  4 21:47:55 2005
@@ -78,8 +78,9 @@
 class BaseGrammarBuilder(object):
     """Base/default class for a builder"""
     def __init__(self, rules=None, debug=0):
-        self.rules = rules or {} # a dictionary of grammar rules for debug/reference
-        # XXX This attribute is here for convenience
+        # a dictionary of grammar rules for debug/reference
+        self.rules = rules or {}
+        # This attribute is here for convenience
         self.source_encoding = None
         self.debug = debug
         self.stack = []
@@ -166,8 +167,8 @@
         """
         if not USE_LOOKAHEAD:
             return self._match(source, builder, level)
-        pos1 = -1 # XXX make the annotator happy
-        pos2 = -1 # XXX make the annotator happy
+        pos1 = -1 # make the annotator happy
+        pos2 = -1 # make the annotator happy
         token = source.peek()
         if self._trace:
             pos1 = source.get_pos()
@@ -242,9 +243,10 @@
     def debug_return(self, ret, *args ):
         # FIXME: use a wrapper of match() methods instead of debug_return()
         #        to prevent additional indirection
-        if ret and DEBUG>0:
+        if ret and DEBUG > 0:
             sargs = ",".join( [ str(i) for i in args ] )
-            print "matched %s (%s): %s" % (self.__class__.__name__, sargs, self.display() )
+            print "matched %s (%s): %s" % (self.__class__.__name__,
+                                           sargs, self.display() )
         return ret
 
     
@@ -268,8 +270,9 @@
         return other in self.first_set
 
     def reorder_rule(self):
-        """Called after the computation of first set to allow rules to be reordered
-        to avoid ambiguities"""
+        """Called after the computation of first set to allow rules to be
+        reordered to avoid ambiguities
+        """
         pass
 
 class Alternative(GrammarElement):
@@ -285,7 +288,7 @@
         """If any of the rules in self.args matches
         returns the object built from the first rules that matches
         """
-        if DEBUG>1:
+        if DEBUG > 1:
             print "try alt:", self.display()
         tok = source.peek()
         # Here we stop at the first match we should
@@ -304,7 +307,7 @@
         return 0
 
     def display(self, level=0):
-        if level==0:
+        if level == 0:
             name =  self.name + " -> "
         elif not self.name.startswith(":"):
             return self.name
@@ -344,12 +347,13 @@
                 # a same alternative
                 for token in rule.first_set:
                     if token is not EmptyToken and token in tokens_set:
-                        print "Warning, token %s in\n\t%s's first set is part " \
-                              "of a previous rule's first set in alternative\n\t" \
-                              "%s" % (token, rule, self)
+                        print "Warning, token %s in\n\t%s's first set is " \
+                            " part of a previous rule's first set in " \
+                            " alternative\n\t%s" % (token, rule, self)
                     tokens_set.append(token)
         if len(empty_set) > 1 and not self._reordered:
-            print "Warning: alternative %s has more than one rule matching Empty" % self
+            print "Warning: alternative %s has more than one rule " \
+                "matching Empty" % self
             self._reordered = True
         self.args[:] = not_empty_set
         self.args.extend( empty_set )
@@ -365,7 +369,7 @@
 
     def _match(self, source, builder, level=0):
         """matches all of the symbols in order"""
-        if DEBUG>1:
+        if DEBUG > 1:
             print "try seq:", self.display()
         ctx = source.context()
         bctx = builder.context()
@@ -381,7 +385,7 @@
         return self.debug_return( ret )
 
     def display(self, level=0):
-        if level == 0:
+        if level ==  0:
             name = self.name + " -> "
         elif not self.name.startswith(":"):
             return self.name
@@ -431,9 +435,11 @@
             # self.first_set[EmptyToken] = 1
 
     def _match(self, source, builder, level=0):
-        """matches a number of times self.args[0]. the number must be comprised
-        between self._min and self._max inclusive. -1 is used to represent infinity"""
-        if DEBUG>1:
+        """matches a number of times self.args[0]. the number must be
+        comprised between self._min and self._max inclusive. -1 is used to
+        represent infinity
+        """
+        if DEBUG > 1:
             print "try kle:", self.display()
         ctx = source.context()
         bctx = builder.context()
@@ -507,7 +513,6 @@
         """
         ctx = source.context()
         tk = source.next()
-        # XXX: match_token
         if tk.name == self.name:
             if self.value is None:
                 ret = builder.token( tk.name, tk.value, source )
@@ -515,7 +520,7 @@
             elif self.value == tk.value:
                 ret = builder.token( tk.name, tk.value, source )
                 return self.debug_return( ret, tk.name, tk.value )
-        if DEBUG>1:
+        if DEBUG > 1:
             print "tried tok:", self.display()
         source.restore( ctx )
         return 0
@@ -534,9 +539,6 @@
            must be equal
          - a tuple, such as those yielded by the Python lexer, in which case
            the comparison algorithm is similar to the one in match()
-           XXX:
-             1/ refactor match and __eq__ ?
-             2/ make source.next and source.peek return a Token() instance
         """
         if not isinstance(other, Token):
             raise RuntimeError("Unexpected token type %r" % other)

Modified: pypy/dist/pypy/interpreter/pyparser/pythonlexer.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pythonlexer.py	(original)
+++ pypy/dist/pypy/interpreter/pyparser/pythonlexer.py	Mon Jul  4 21:47:55 2005
@@ -4,7 +4,7 @@
 """
 import symbol
 
-from grammar import TokenSource, Token
+from pypy.interpreter.pyparser.grammar import TokenSource, Token
 # Don't import string for that ...
 NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
 NUMCHARS = '0123456789'
@@ -113,10 +113,13 @@
     last_comment = ''
     encoding = None
     strstart = (0, 0)
+    # make the annotator happy
     pos = -1
     lines.append('') # XXX HACK probably not needed
-    endDFA = automata.DFA([], []) # XXX Make the translator happy
-    line = ''                 # XXX Make the translator happy
+    # make the annotator happy
+    endDFA = automata.DFA([], [])
+    # make the annotator happy
+    line = ''
     for line in lines:
         lnum = lnum + 1
         pos, max = 0, len(line)
@@ -173,9 +176,6 @@
                     last_comment = ''
                 # XXX Skip NL and COMMENT Tokens
                 # token_list.append((tok, line, lnum, pos))
-                # token_list.append(((NL, COMMENT)[line[pos] == '#'],
-                #                    line[pos:],
-                #                    (lnum, pos), (lnum, len(line)), line))
                 continue
 
             if column > indents[-1]:           # count indents or dedents
@@ -183,17 +183,15 @@
                 tok = token_from_values(tokenmod.INDENT, line[:pos])
                 token_list.append((tok, line, lnum, pos))
                 last_comment = ''
-                # token_list.append((INDENT, line[:pos],(lnum, 0),(lnum,pos),line))
             while column < indents[-1]:
                 indents = indents[:-1]
                 tok = token_from_values(tokenmod.DEDENT, '')
                 token_list.append((tok, line, lnum, pos))
                 last_comment = ''
-                # token_list.append((DEDENT, '', (lnum, pos),(lnum,pos),line))
-
         else:                                  # continued statement
             if not line:
-                raise TokenError("EOF in multi-line statement", line, (lnum, 0), token_list)
+                raise TokenError("EOF in multi-line statement", line,
+                                 (lnum, 0), token_list)
             continued = 0
 
         while pos < max:
@@ -213,7 +211,6 @@
                     tok = token_from_values(tokenmod.NUMBER, token)
                     token_list.append((tok, line, lnum, pos))
                     last_comment = ''
-                    # token_list.append((NUMBER, token, spos, epos, line))
                 elif initial in '\r\n':
                     if parenlev > 0:
                         tok = token_from_values(tokenmod.NL, token)
@@ -225,7 +222,6 @@
                         tok.value = last_comment
                         token_list.append((tok, line, lnum, pos))
                         last_comment = ''
-                    # token_list.append((parenlev > 0 and NL or NEWLINE, token, spos, epos, line))
                 elif initial == '#':
                     tok = token_from_values(tokenmod.COMMENT, token)
                     last_comment = token
@@ -244,7 +240,6 @@
                         tok = token_from_values(tokenmod.STRING, token)
                         token_list.append((tok, line, lnum, pos))
                         last_comment = ''
-                        # token_list.append((STRING, token, spos, (lnum, pos), line))
                     else:
                         strstart = (lnum, start)           # multiple lines
                         contstr = line[start:]
@@ -269,29 +264,26 @@
                     tok = token_from_values(tokenmod.NAME, token)
                     token_list.append((tok, line, lnum, pos))
                     last_comment = ''
-                    # token_list.append((NAME, token, spos, epos, line))
                 elif initial == '\\':                      # continued stmt
                     continued = 1
                 else:
-                    if initial in '([{': parenlev = parenlev + 1
-                    elif initial in ')]}': parenlev = parenlev - 1
+                    if initial in '([{':
+                        parenlev = parenlev + 1
+                    elif initial in ')]}':
+                        parenlev = parenlev - 1
                     tok = token_from_values(tokenmod.OP, token)
                     token_list.append((tok, line, lnum, pos)) 
                     last_comment = ''
-                    # token_list.append((OP, token, spos, epos, line))
             else:
                 tok = token_from_values(tokenmod.ERRORTOKEN, line[pos])
                 token_list.append((tok, line, lnum, pos))
                 last_comment = ''
-                # token_list.append((ERRORTOKEN, line[pos],
-                #                    (lnum, pos), (lnum, pos+1), line))
                 pos = pos + 1
 
     lnum -= 1
     for indent in indents[1:]:                 # pop remaining indent levels
         tok = token_from_values(tokenmod.DEDENT, '')
         token_list.append((tok, line, lnum, pos))
-        # token_list.append((DEDENT, '', (lnum, 0), (lnum, 0), ''))
         
     ## <XXX> adim: this can't be (only) that, can it ?
     if token_list and token_list[-1] != symbol.file_input:
@@ -299,7 +291,7 @@
     ## </XXX>
     tok = token_from_values(tokenmod.ENDMARKER, '',)
     token_list.append((tok, line, lnum, pos))
-    # token_list.append((ENDMARKER, '', (lnum, 0), (lnum, 0), ''))
+
     return token_list, encoding
 
 class PythonSource(TokenSource):
@@ -330,6 +322,7 @@
         return self._current_line
 
     def current_lineno(self):
+        """Returns the current lineno"""
         return self._lineno
 
     def context(self):
@@ -370,8 +363,8 @@
         return (self._current_line, self._lineno)
         # return 'line %s : %s' % ('XXX', self._current_line)
 
-NONE_LIST = [tokenmod.ENDMARKER, tokenmod.INDENT, tokenmod.DEDENT,]
-NAMED_LIST = [tokenmod.OP, ]
+NONE_LIST = [tokenmod.ENDMARKER, tokenmod.INDENT, tokenmod.DEDENT]
+NAMED_LIST = [tokenmod.OP]
 
 def token_from_values(tok_type, tok_string):
     """Compatibility layer between both parsers"""

Modified: pypy/dist/pypy/interpreter/pyparser/pytokenize.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pytokenize.py	(original)
+++ pypy/dist/pypy/interpreter/pyparser/pytokenize.py	Mon Jul  4 21:47:55 2005
@@ -13,23 +13,21 @@
 expressions have been replaced with hand built DFA's using the
 basil.util.automata module.
 
-XXX This now assumes that the automata module is in the Python path.
-
 $Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $
 """
 # ______________________________________________________________________
 
 from __future__ import generators
-import automata
+from pypy.interpreter.pyparser import automata
 
 # ______________________________________________________________________
 # COPIED:
-from token import *
-
 import token
 __all__ = [x for x in dir(token) if x[0] != '_'] + ["COMMENT", "tokenize",
            "generate_tokens", "NL"]
 del x
+N_TOKENS = token.N_TOKENS
+tok_name = token.tok_name
 del token
 
 COMMENT = N_TOKENS