[Python-checkins] bpo-45738: Fix computation of error location for invalid continuation (GH-29550)

miss-islington webhook-mailer at python.org
Sat Nov 13 20:30:11 EST 2021


https://github.com/python/cpython/commit/bf26a6da7aaedb526c9eb1cb56b0e46d1c10384c
commit: bf26a6da7aaedb526c9eb1cb56b0e46d1c10384c
branch: 3.10
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2021-11-13T17:30:03-08:00
summary:

bpo-45738: Fix computation of error location for invalid continuation (GH-29550)


characters in the parser
(cherry picked from commit 25835c518aa7446f3680b62c1fb43827e0f190d9)

Co-authored-by: Pablo Galindo Salgado <Pablogsal at gmail.com>

files:
A Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst
M Lib/test/test_syntax.py
M Parser/pegen.c
M Parser/tokenizer.c

diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index 45b2785f34831..3b79b7a0b67b6 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -1487,7 +1487,13 @@ def func2():
     def test_invalid_line_continuation_error_position(self):
         self._check_error(r"a = 3 \ 4",
                           "unexpected character after line continuation character",
-                          lineno=1, offset=9)
+                          lineno=1, offset=8)
+        self._check_error('1,\\#\n2',
+                          "unexpected character after line continuation character",
+                          lineno=1, offset=4)
+        self._check_error('\nfgdfgf\n1,\\#\n2\n',
+                          "unexpected character after line continuation character",
+                          lineno=3, offset=4)
 
     def test_invalid_line_continuation_left_recursive(self):
         # Check bpo-42218: SyntaxErrors following left-recursive rules
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst
new file mode 100644
index 0000000000000..b238034323c77
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst	
@@ -0,0 +1,2 @@
+Fix computation of error location for invalid continuation characters in the
+parser. Patch by Pablo Galindo.
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 66e4b1929711f..cfb4b8e8fb157 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -372,14 +372,7 @@ tokenizer_error(Parser *p)
             msg = "too many levels of indentation";
             break;
         case E_LINECONT: {
-            char* loc = strrchr(p->tok->buf, '\n');
-            const char* last_char = p->tok->cur - 1;
-            if (loc != NULL && loc != last_char) {
-                col_offset = p->tok->cur - loc - 1;
-                p->tok->buf = loc;
-            } else {
-                col_offset = last_char - p->tok->buf - 1;
-            }
+            col_offset = p->tok->cur - p->tok->buf - 1;
             msg = "unexpected character after line continuation character";
             break;
         }
@@ -387,7 +380,9 @@ tokenizer_error(Parser *p)
             msg = "unknown parsing error";
     }
 
-    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, p->tok->lineno, -1, msg);
+    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
+                               col_offset >= 0 ? col_offset : 0,
+                               p->tok->lineno, -1, msg);
     return -1;
 }
 
@@ -497,7 +492,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
            does not physically exist */
         assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
 
-        if (p->tok->lineno <= lineno) {
+        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
             Py_ssize_t size = p->tok->inp - p->tok->buf;
             error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
         }
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 519300f3731f0..76a22dab65994 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1969,7 +1969,6 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
         c = tok_nextc(tok);
         if (c != '\n') {
             tok->done = E_LINECONT;
-            tok->cur = tok->inp;
             return ERRORTOKEN;
         }
         c = tok_nextc(tok);



More information about the Python-checkins mailing list