[Python-checkins] bpo-44317: Improve tokenizer errors with more informative locations (GH-26555) (GH-27079)
pablogsal
webhook-mailer at python.org
Fri Jul 9 20:47:42 EDT 2021
https://github.com/python/cpython/commit/2a722d4fab6a9656f3c03cfdaf6d1684277b8af5
commit: 2a722d4fab6a9656f3c03cfdaf6d1684277b8af5
branch: 3.10
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2021-07-10T01:47:33+01:00
summary:
bpo-44317: Improve tokenizer errors with more informative locations (GH-26555) (GH-27079)
(cherry picked from commit f24777c2b329974b69d2a3bf5cfc37e0fcace36c)
Co-authored-by: Pablo Galindo Salgado <Pablogsal at gmail.com>
files:
A Misc/NEWS.d/next/Core and Builtins/2021-06-06-00-29-14.bpo-44317.xPPhcZ.rst
M Lib/test/test_exceptions.py
M Parser/tokenizer.c
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 8f689546a62296..f92637f9930bfd 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -226,9 +226,9 @@ def testSyntaxErrorOffset(self):
# Errors thrown by tokenizer.c
check('(0x+1)', 1, 3)
check('x = 0xI', 1, 6)
- check('0010 + 2', 1, 4)
+ check('0010 + 2', 1, 1)
check('x = 32e-+4', 1, 8)
- check('x = 0o9', 1, 6)
+ check('x = 0o9', 1, 7)
check('\u03b1 = 0xI', 1, 6)
check(b'\xce\xb1 = 0xI', 1, 6)
check(b'# -*- coding: iso8859-7 -*-\n\xe1 = 0xI', 2, 6,
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-06-00-29-14.bpo-44317.xPPhcZ.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-06-00-29-14.bpo-44317.xPPhcZ.rst
new file mode 100644
index 00000000000000..8ac32adf8b5535
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-06-00-29-14.bpo-44317.xPPhcZ.rst
@@ -0,0 +1 @@
+Improve tokenizer error with improved locations. Patch by Pablo Galindo.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index be9b13ebabb8e3..3dea77e5a2a3bd 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1071,19 +1071,13 @@ tok_backup(struct tok_state *tok, int c)
}
}
-
static int
-syntaxerror(struct tok_state *tok, const char *format, ...)
+_syntaxerror_range(struct tok_state *tok, const char *format,
+ int col_offset, int end_col_offset,
+ va_list vargs)
{
PyObject *errmsg, *errtext, *args;
- va_list vargs;
-#ifdef HAVE_STDARG_PROTOTYPES
- va_start(vargs, format);
-#else
- va_start(vargs);
-#endif
errmsg = PyUnicode_FromFormatV(format, vargs);
- va_end(vargs);
if (!errmsg) {
goto error;
}
@@ -1093,7 +1087,14 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
if (!errtext) {
goto error;
}
- int offset = (int)PyUnicode_GET_LENGTH(errtext);
+
+ if (col_offset == -1) {
+ col_offset = (int)PyUnicode_GET_LENGTH(errtext);
+ }
+ if (end_col_offset == -1) {
+ end_col_offset = col_offset;
+ }
+
Py_ssize_t line_len = strcspn(tok->line_start, "\n");
if (line_len != tok->cur - tok->line_start) {
Py_DECREF(errtext);
@@ -1104,8 +1105,8 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
goto error;
}
- args = Py_BuildValue("(O(OiiN))", errmsg,
- tok->filename, tok->lineno, offset, errtext);
+ args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno,
+ col_offset, errtext, tok->lineno, end_col_offset);
if (args) {
PyErr_SetObject(PyExc_SyntaxError, args);
Py_DECREF(args);
@@ -1117,6 +1118,38 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
return ERRORTOKEN;
}
+static int
+syntaxerror(struct tok_state *tok, const char *format, ...)
+{
+ va_list vargs;
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
+ va_end(vargs);
+ return ret;
+}
+
+static int
+syntaxerror_known_range(struct tok_state *tok,
+ int col_offset, int end_col_offset,
+ const char *format, ...)
+{
+ va_list vargs;
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs);
+ va_end(vargs);
+ return ret;
+}
+
+
+
static int
indenterror(struct tok_state *tok)
{
@@ -1692,12 +1725,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
}
if (c < '0' || c >= '8') {
- tok_backup(tok, c);
if (isdigit(c)) {
return syntaxerror(tok,
"invalid digit '%c' in octal literal", c);
}
else {
+ tok_backup(tok, c);
return syntaxerror(tok, "invalid octal literal");
}
}
@@ -1721,12 +1754,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
}
if (c != '0' && c != '1') {
- tok_backup(tok, c);
if (isdigit(c)) {
return syntaxerror(tok,
"invalid digit '%c' in binary literal", c);
}
else {
+ tok_backup(tok, c);
return syntaxerror(tok, "invalid binary literal");
}
}
@@ -1759,6 +1792,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
c = tok_nextc(tok);
}
+ char* zeros_end = tok->cur;
if (isdigit(c)) {
nonzero = 1;
c = tok_decimal_tail(tok);
@@ -1779,10 +1813,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
else if (nonzero) {
/* Old-style octal: now disallowed. */
tok_backup(tok, c);
- return syntaxerror(tok,
- "leading zeros in decimal integer "
- "literals are not permitted; "
- "use an 0o prefix for octal integers");
+ return syntaxerror_known_range(
+ tok, (int)(tok->start + 1 - tok->line_start),
+ (int)(zeros_end - tok->line_start),
+ "leading zeros in decimal integer "
+ "literals are not permitted; "
+ "use an 0o prefix for octal integers");
}
if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;
More information about the Python-checkins
mailing list