[pypy-commit] pypy pyparser-improvements-3: introduce a Token class instead of passing 5 values around all the time

Sat Apr 14 05:21:20 EDT 2018

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: pyparser-improvements-3
Changeset: r94316:1065b72e0409
Date: 2018-04-14 10:56 +0200
http://bitbucket.org/pypy/pypy/changeset/1065b72e0409/

Log:	introduce a Token class instead of passing 5 values around all the
	time

diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -65,6 +65,20 @@
             b[pos] |= bit
         return str(b)
 
+
+class Token(object):
+    def __init__(self, token_type, value, lineno, column, line):
+        self.token_type = token_type
+        self.value = value
+        self.lineno = lineno
+        # 0-based offset
+        self.column = column
+        self.line = line
+
+    def __repr__(self):
+        return "Token(%s, %s)" % (self.token_type, self.value)
+
+
 class Node(object):
 
     __slots__ = ("type", )
@@ -99,11 +113,11 @@
 
 class Terminal(Node):
     __slots__ = ("value", "lineno", "column")
-    def __init__(self, type, value, lineno, column):
-        Node.__init__(self, type)
-        self.value = value
-        self.lineno = lineno
-        self.column = column
+    def __init__(self, token):
+        Node.__init__(self, token.token_type)
+        self.value = token.value
+        self.lineno = token.lineno
+        self.column = token.column
 
     def __repr__(self):
         return "Terminal(type=%s, value=%r)" % (self.type, self.value)
@@ -193,20 +207,14 @@
 
 class ParseError(Exception):
 
-    def __init__(self, msg, token_type, value, lineno, column, line,
-                 expected=-1, expected_str=None):
+    def __init__(self, msg, token, expected=-1, expected_str=None):
         self.msg = msg
-        self.token_type = token_type
-        self.value = value
-        self.lineno = lineno
-        # this is a 0-based index
-        self.column = column
-        self.line = line
+        self.token = token
         self.expected = expected
         self.expected_str = expected_str
 
     def __str__(self):
-        return "ParserError(%s, %r)" % (self.token_type, self.value)
+        return "ParserError(%s)" % (self.token, )
 
 
 class StackEntry(object):
@@ -249,8 +257,8 @@
         self.root = None
         self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0)
 
-    def add_token(self, token_type, value, lineno, column, line):
-        label_index = self.classify(token_type, value, lineno, column, line)
+    def add_token(self, token):
+        label_index = self.classify(token)
         sym_id = 0 # for the annotator
         while True:
             dfa = self.stack.dfa
@@ -261,7 +269,7 @@
                 sym_id = self.grammar.labels[i]
                 if label_index == i:
                     # We matched a non-terminal.
-                    self.shift(next_state, token_type, value, lineno, column)
+                    self.shift(next_state, token)
                     state = states[next_state]
                     # While the only possible action is to accept, pop nodes off
                     # the stack.
@@ -278,8 +286,7 @@
                     sub_node_dfa = self.grammar.dfas[sym_id - 256]
                     # Check if this token can start a child node.
                     if sub_node_dfa.could_match_token(label_index):
-                        self.push(sub_node_dfa, next_state, sym_id, lineno,
-                                  column)
+                        self.push(sub_node_dfa, next_state, sym_id)
                         break
             else:
                 # We failed to find any arcs to another state, so unless this
@@ -287,8 +294,7 @@
                 if is_accepting:
                     self.pop()
                     if self.stack is None:
-                        raise ParseError("too much input", token_type, value,
-                                         lineno, column, line)
+                        raise ParseError("too much input", token)
                 else:
                     # If only one possible input would satisfy, attach it to the
                     # error.
@@ -299,28 +305,26 @@
                     else:
                         expected = -1
                         expected_str = None
-                    raise ParseError("bad input", token_type, value, lineno,
-                                     column, line, expected, expected_str)
+                    raise ParseError("bad input", token, expected, expected_str)
 
-    def classify(self, token_type, value, lineno, column, line):
+    def classify(self, token):
         """Find the label for a token."""
-        if token_type == self.grammar.KEYWORD_TOKEN:
-            label_index = self.grammar.keyword_ids.get(value, -1)
+        if token.token_type == self.grammar.KEYWORD_TOKEN:
+            label_index = self.grammar.keyword_ids.get(token.value, -1)
             if label_index != -1:
                 return label_index
-        label_index = self.grammar.token_ids.get(token_type, -1)
+        label_index = self.grammar.token_ids.get(token.token_type, -1)
         if label_index == -1:
-            raise ParseError("invalid token", token_type, value, lineno, column,
-                             line)
+            raise ParseError("invalid token", token)
         return label_index
 
-    def shift(self, next_state, token_type, value, lineno, column):
+    def shift(self, next_state, token):
         """Shift a non-terminal and prepare for the next state."""
-        new_node = Terminal(token_type, value, lineno, column)
+        new_node = Terminal(token)
         self.stack.node_append_child(new_node)
         self.stack.state = next_state
 
-    def push(self, next_dfa, next_state, node_type, lineno, column):
+    def push(self, next_dfa, next_state, node_type):
         """Push a terminal and adjust the current state."""
         self.stack.state = next_state
         self.stack = self.stack.push(next_dfa, 0)
diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -166,7 +166,7 @@
                     self.grammar = pygram.python_grammar
 
                 for tp, value, lineno, column, line in tokens:
-                    if self.add_token(tp, value, lineno, column, line):
+                    if self.add_token(parser.Token(tp, value, lineno, column, line)):
                         break
             except error.TokenError as e:
                 e.filename = compile_info.filename
@@ -190,7 +190,7 @@
 
                 # parser.ParseError(...).column is 0-based, but the offsets in the
                 # exceptions in the error module are 1-based, hence the '+ 1'
-                raise new_err(msg, e.lineno, e.column + 1, e.line,
+                raise new_err(msg, e.token.lineno, e.token.column + 1, e.token.line,
                               compile_info.filename)
             else:
                 tree = self.root
diff --git a/pypy/interpreter/pyparser/test/test_parser.py b/pypy/interpreter/pyparser/test/test_parser.py
--- a/pypy/interpreter/pyparser/test/test_parser.py
+++ b/pypy/interpreter/pyparser/test/test_parser.py
@@ -20,7 +20,7 @@
         rl = StringIO.StringIO(input + "\n").readline
         gen = tokenize.generate_tokens(rl)
         for tp, value, begin, end, line in gen:
-            if self.add_token(tp, value, begin[0], begin[1], line):
+            if self.add_token(parser.Token(tp, value, begin[0], begin[1], line)):
                 py.test.raises(StopIteration, gen.next)
         return self.root
 
@@ -58,7 +58,7 @@
                 value = "\n"
             else:
                 value = ""
-            n = parser.Terminal(tp, value, 0, 0)
+            n = parser.Terminal(parser.Token(tp, value, 0, 0, ''))
         else:
             tp = gram.symbol_ids[data[0]]
             n = parser.Nonterminal(tp)