[Python-checkins] [3.10] bpo-45408: Don't override previous tokenizer errors in the second parser pass (GH-28812). (GH-28813)

Thu Oct 7 19:50:19 EDT 2021

https://github.com/python/cpython/commit/4ce55a2353e07962280181df40af0135aef1cf51
commit: 4ce55a2353e07962280181df40af0135aef1cf51
branch: 3.10
author: Pablo Galindo Salgado <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2021-10-08T00:50:10+01:00
summary:

[3.10] bpo-45408: Don't override previous tokenizer errors in the second parser pass (GH-28812). (GH-28813)

(cherry picked from commit 0219017df7ec41839fd0d56a3076b5f09c58d313)

Co-authored-by: Pablo Galindo Salgado <Pablogsal at gmail.com>

files:
A Misc/NEWS.d/next/Core and Builtins/2021-10-07-21-26-44.bpo-45408.qUqzcd.rst
M Lib/test/test_ast.py
M Lib/test/test_exceptions.py
M Parser/pegen.c

diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py
index 326f3ab2beb1d..39fc7e9673816 100644
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@@ -1044,6 +1044,14 @@ def test_literal_eval_malformed_lineno(self):
         with self.assertRaisesRegex(ValueError, msg):
             ast.literal_eval(node)
 
+    def test_literal_eval_syntax_errors(self):
+        msg = "unexpected character after line continuation character"
+        with self.assertRaisesRegex(SyntaxError, msg):
+            ast.literal_eval(r'''
+                \
+                (\
+            \ ''')
+
     def test_bad_integer(self):
         # issue13436: Bad error message with invalid numeric values
         body = [ast.ImportFrom(module='time',
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index d04e5f5573ca9..4213dabfd8e71 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -223,7 +223,7 @@ def testSyntaxErrorOffset(self):
         check('x = "a', 1, 5)
         check('lambda x: x = 2', 1, 1)
         check('f{a + b + c}', 1, 2)
-        check('[file for str(file) in []\n])', 2, 2)
+        check('[file for str(file) in []\n])', 1, 11)
         check('a = « hello » « world »', 1, 5)
         check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5)
         check('[file for\n str(file) in []]', 2, 2)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-07-21-26-44.bpo-45408.qUqzcd.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-07-21-26-44.bpo-45408.qUqzcd.rst
new file mode 100644
index 0000000000000..e4d4db9cb9536
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-07-21-26-44.bpo-45408.qUqzcd.rst	
@@ -0,0 +1,2 @@
+Fix a crash in the parser when reporting tokenizer errors that occur at the
+same time unclosed parentheses are detected. Patch by Pablo Galindo.
diff --git a/Parser/pegen.c b/Parser/pegen.c
index e20e926136828..1bb975d684a0d 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -1321,13 +1321,16 @@ _PyPegen_run_parser(Parser *p)
 {
     void *res = _PyPegen_parse(p);
     if (res == NULL) {
+        if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
+            return NULL;
+        }
         Token *last_token = p->tokens[p->fill - 1];
         reset_parser_state(p);
         _PyPegen_parse(p);
         if (PyErr_Occurred()) {
             // Prioritize tokenizer errors to custom syntax errors raised
             // on the second phase only if the errors come from the parser.
-            if (p->tok->done != E_ERROR && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
+            if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
                 _PyPegen_check_tokenizer_errors(p);
             }
             return NULL;