[pypy-commit] pypy default: parser.sequence2st: add validation of the passed tuple.

amauryfa pypy.commits at gmail.com
Wed Jan 3 17:32:55 EST 2018


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: 
Changeset: r93616:749063799a58
Date: 2018-01-02 00:53 +0100
http://bitbucket.org/pypy/pypy/changeset/749063799a58/

Log:	parser.sequence2st: add validation of the passed tuple. Do it the
	2016 way, by walking the grammar DFA, instead of a ton of custom
	validation code.

diff --git a/pypy/module/parser/pyparser.py b/pypy/module/parser/pyparser.py
--- a/pypy/module/parser/pyparser.py
+++ b/pypy/module/parser/pyparser.py
@@ -124,9 +124,13 @@
     return space.call_args(space.getattr(w_st, space.newtext("compile")), __args__)
 
 
-def raise_parser_error(space, w_tuple, message):
+def parser_error(space, w_tuple, message):
     raise OperationError(get_error(space), space.newtuple(
-        [w_tuple, space.newtext("Illegal component tuple.")]))
+        [w_tuple, space.newtext(message)]))
+
+def parse_error(space, message):
+    return OperationError(get_error(space),
+                         space.newtext(message))
 
 
 def get_node_type(space, w_tuple):
@@ -134,7 +138,7 @@
         w_type = space.getitem(w_tuple, space.newint(0))
         return space.int_w(w_type)
     except OperationError:
-        raise_parser_error(space, w_tuple, "Illegal component tuple.")
+        raise parser_error(space, w_tuple, "Illegal component tuple.")
 
 class NodeState:
     def __init__(self):
@@ -146,7 +150,7 @@
     if 0 <= type < 256:
         # The tuple is simple, but it doesn't start with a start symbol.
         # Raise an exception now and be done with it.
-        raise_parser_error(space, w_tuple,
+        raise parser_error(space, w_tuple,
                            "Illegal syntax-tree; cannot start with terminal symbol.")
     node = pyparse.parser.Nonterminal(type, [])
     build_node_children(space, w_tuple, node, node_state)
@@ -162,7 +166,8 @@
             elif length == 3:
                 _, w_obj, w_lineno = space.unpackiterable(w_elem, 3)
             else:
-                raise_error(space, "terminal nodes must have 2 or 3 entries")
+                raise parse_error(
+                    space, "terminal nodes must have 2 or 3 entries")
             strn = space.text_w(w_obj)
             child = pyparse.parser.Terminal(type, strn, node_state.lineno, 0)
         else:
@@ -174,7 +179,36 @@
             node_state.lineno += 1
 
 
+def validate_node(space, tree):
+    assert tree.type >= 256
+    type = tree.type - 256
+    parser = pyparse.PythonParser(space)
+    if type >= len(parser.grammar.dfas):
+        raise parse_error(space, "Unrecognized node type %d." % type)
+    dfa = parser.grammar.dfas[type]
+    # Run the DFA for this nonterminal
+    states, first = dfa
+    arcs, is_accepting = states[0]
+    for pos in range(tree.num_children()):
+        ch = tree.get_child(pos)
+        for i, next_state in arcs:
+            label = parser.grammar.labels[i]
+            if label == ch.type:
+                # The child is acceptable; validate it recursively
+                if ch.type >= 256:
+                    validate_node(space, ch)
+                # Update the state, and move on to the next child.
+                arcs, is_accepting = states[next_state]
+                break
+        else:
+            raise parse_error(space, "Illegal node")
+    if not is_accepting:
+        raise parse_error(space, "Illegal number of children for %d node" %
+                          tree.type)
+
+
 def tuple2st(space, w_sequence):
     # Convert the tree to the internal form before checking it
     tree = build_node_tree(space, w_sequence)
+    validate_node(space, tree)
     return W_STType(tree, 'eval')
diff --git a/pypy/module/parser/test/test_parser.py b/pypy/module/parser/test/test_parser.py
--- a/pypy/module/parser/test/test_parser.py
+++ b/pypy/module/parser/test/test_parser.py
@@ -71,3 +71,19 @@
 
         check_expr("foo(1)")
         check_suite("def f(): yield 1")
+
+    def test_bad_tree(self):
+        import parser
+        # from import a
+        tree = \
+            (257,
+             (267,
+              (268,
+               (269,
+                (281,
+                 (283, (1, 'from'), (1, 'import'),
+                  (286, (284, (1, 'fred')))))),
+               (4, ''))),
+             (4, ''), (0, ''))
+        raises(parser.ParserError,
+               parser.sequence2st, tree)


More information about the pypy-commit mailing list