[Python-checkins] [3.9] bpo-42218: Correctly handle errors in left-recursive rules (GH-23065) (GH-23066)

miss-islington webhook-mailer at python.org
Sat Oct 31 15:06:12 EDT 2020


https://github.com/python/cpython/commit/cfcb952e30e01d7cce430829af8edc7afc94e0b1
commit: cfcb952e30e01d7cce430829af8edc7afc94e0b1
branch: 3.9
author: Lysandros Nikolaou <lisandrosnik at gmail.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2020-10-31T12:06:03-07:00
summary:

[3.9] bpo-42218: Correctly handle errors in left-recursive rules (GH-23065) (GH-23066)



Left-recursive rules need to check for errors explicitly, since
even if the rule returns NULL, the parsing might continue and lead
to long-distance failures.

Co-authored-by: Pablo Galindo <Pablogsal at gmail.com>
(cherry picked from commit 02cdfc93f82fecdb7eae97a868d4ee222b9875d9)

Automerge-Triggered-By: GH:lysnikolaou

files:
A Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst
M Lib/test/test_syntax.py
M Parser/pegen/parse.c
M Tools/peg_generator/pegen/c_generator.py

diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index a95992d869e5e..1336231fbbfbf 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -964,6 +964,14 @@ def func2():
 """
         self._check_error(code, "invalid syntax")
 
+    def test_invalid_line_continuation_left_recursive(self):
+        # Check bpo-42218: SyntaxErrors following left-recursive rules
+        # (t_primary_raw in this case) need to be tested explicitly
+        self._check_error("A.\u018a\\ ",
+                          "unexpected character after line continuation character")
+        self._check_error("A.\u03bc\\\n",
+                          "unexpected EOF while parsing")
+
 def test_main():
     support.run_unittest(SyntaxTestCase)
     from test import test_syntax
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst
new file mode 100644
index 0000000000000..a38a310e4b45b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst	
@@ -0,0 +1,3 @@
+Fixed a bug in the PEG parser that was causing crashes in debug mode. Now errors are checked
+in left-recursive rules to avoid cases where such errors do not get handled in time and appear
+as long-distance crashes in other places.
diff --git a/Parser/pegen/parse.c b/Parser/pegen/parse.c
index bae9463e274a3..97cefa9c2a3d4 100644
--- a/Parser/pegen/parse.c
+++ b/Parser/pegen/parse.c
@@ -3460,6 +3460,8 @@ dotted_name_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = dotted_name_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
@@ -9044,6 +9046,8 @@ bitwise_or_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = bitwise_or_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
@@ -9158,6 +9162,8 @@ bitwise_xor_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = bitwise_xor_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
@@ -9272,6 +9278,8 @@ bitwise_and_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = bitwise_and_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
@@ -9386,6 +9394,8 @@ shift_expr_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = shift_expr_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
@@ -9539,6 +9549,8 @@ sum_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = sum_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
@@ -9698,6 +9710,8 @@ term_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = term_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
@@ -10302,6 +10316,8 @@ primary_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = primary_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
@@ -13962,6 +13978,8 @@ t_primary_rule(Parser *p)
         }
         p->mark = _mark;
         void *_raw = t_primary_raw(p);
+        if (p->error_indicator)
+            return NULL;
         if (_raw == NULL || p->mark <= _resmark)
             break;
         _resmark = p->mark;
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index d0abc12b4026a..b4d6a0bab51f4 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -496,6 +496,9 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
                 )
                 self.print("p->mark = _mark;")
                 self.print(f"void *_raw = {node.name}_raw(p);")
+                self.print("if (p->error_indicator)")
+                with self.indent():
+                    self.print("return NULL;")
                 self.print("if (_raw == NULL || p->mark <= _resmark)")
                 with self.indent():
                     self.print("break;")



More information about the Python-checkins mailing list