[Python-checkins] [3.10] bpo-45848: Allow the parser to get error lines from encoded files (GH-29646) (GH-29661)
ambv
webhook-mailer at python.org
Sat Nov 20 10:35:06 EST 2021
https://github.com/python/cpython/commit/904af3de2bef6d971463a564541cb6dadf22d7f8
commit: 904af3de2bef6d971463a564541cb6dadf22d7f8
branch: 3.10
author: Łukasz Langa <lukasz at langa.pl>
committer: ambv <lukasz at langa.pl>
date: 2021-11-20T16:34:56+01:00
summary:
[3.10] bpo-45848: Allow the parser to get error lines from encoded files (GH-29646) (GH-29661)
(cherry picked from commit fdcc46d9554094994f78bedf6dc9220e5d5ee668)
Co-authored-by: Pablo Galindo Salgado <Pablogsal at gmail.com>
files:
A Misc/NEWS.d/next/Core and Builtins/2021-11-19-22-57-42.bpo-45848.HgVBJ5.rst
M .gitignore
M Include/cpython/pyerrors.h
M Lib/test/test_exceptions.py
M Parser/pegen.c
M Python/errors.c
diff --git a/.gitignore b/.gitignore
index d0b608892a87c..19b4214a9aea0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -134,3 +134,9 @@ Tools/ssl/win32
# Ignore ./python binary on Unix but still look into ./Python/ directory.
/python
!/Python/
+
+# Artifacts generated by 3.11 lying around when switching branches:
+/_bootstrap_python
+/Programs/_freeze_module
+/Python/deepfreeze/
+/Python/frozen_modules/
\ No newline at end of file
diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h
index 5e57129c3b82c..3f952456679ec 100644
--- a/Include/cpython/pyerrors.h
+++ b/Include/cpython/pyerrors.h
@@ -185,6 +185,12 @@ Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
Py_ssize_t end,
const char *reason /* UTF-8 encoded string */
);
+
+PyAPI_FUNC(PyObject *) _PyErr_ProgramDecodedTextObject(
+ PyObject *filename,
+ int lineno,
+ const char* encoding);
+
PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create(
PyObject *object,
Py_ssize_t start,
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 8419f582cbdb9..04c883cf53ba3 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -2352,6 +2352,19 @@ def test_encodings(self):
finally:
unlink(TESTFN)
+ # Check backwards tokenizer errors
+ source = '# -*- coding: ascii -*-\n\n(\n'
+ try:
+ with open(TESTFN, 'w', encoding='ascii') as testfile:
+ testfile.write(source)
+ rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
+ err = err.decode('utf-8').splitlines()
+
+ self.assertEqual(err[-3], ' (')
+ self.assertEqual(err[-2], ' ^')
+ finally:
+ unlink(TESTFN)
+
def test_attributes_new_constructor(self):
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
the_exception = SyntaxError("bad bad", args)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-19-22-57-42.bpo-45848.HgVBJ5.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-19-22-57-42.bpo-45848.HgVBJ5.rst
new file mode 100644
index 0000000000000..d9394c9c1f08b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-19-22-57-42.bpo-45848.HgVBJ5.rst
@@ -0,0 +1,2 @@
+Allow the parser to obtain error lines directly from encoded files. Patch by
+Pablo Galindo
diff --git a/Parser/pegen.c b/Parser/pegen.c
index c6570eb1bd0b4..9bf4fe7ecd89d 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -480,14 +480,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
goto error;
}
- // PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
- // with an arbitrary encoding or otherwise we could get some badly decoded text.
- int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
if (p->tok->fp_interactive) {
error_line = get_error_line(p, lineno);
}
- else if (uses_utf8_codec && p->start_rule == Py_file_input) {
- error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
+ else if (p->start_rule == Py_file_input) {
+ error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
+ (int) lineno, p->tok->encoding);
}
if (!error_line) {
@@ -498,15 +496,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
does not physically exist */
- assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
+ assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
Py_ssize_t size = p->tok->inp - p->tok->buf;
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
}
- else {
+ else if (p->tok->fp == NULL || p->tok->fp == stdin) {
error_line = get_error_line(p, lineno);
}
+ else {
+ error_line = PyUnicode_FromStringAndSize("", 0);
+ }
if (!error_line) {
goto error;
}
diff --git a/Python/errors.c b/Python/errors.c
index 600300e263d09..bc1b55e440e8a 100644
--- a/Python/errors.c
+++ b/Python/errors.c
@@ -1724,7 +1724,7 @@ PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset)
functionality in tb_displayline() in traceback.c. */
static PyObject *
-err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
+err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding)
{
int i;
char linebuf[1000];
@@ -1752,7 +1752,11 @@ err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
fclose(fp);
if (i == lineno) {
PyObject *res;
- res = PyUnicode_FromString(linebuf);
+ if (encoding != NULL) {
+ res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace");
+ } else {
+ res = PyUnicode_FromString(linebuf);
+ }
if (res == NULL)
_PyErr_Clear(tstate);
return res;
@@ -1778,7 +1782,7 @@ PyErr_ProgramText(const char *filename, int lineno)
}
PyObject *
-PyErr_ProgramTextObject(PyObject *filename, int lineno)
+_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
{
if (filename == NULL || lineno <= 0) {
return NULL;
@@ -1790,7 +1794,13 @@ PyErr_ProgramTextObject(PyObject *filename, int lineno)
_PyErr_Clear(tstate);
return NULL;
}
- return err_programtext(tstate, fp, lineno);
+ return err_programtext(tstate, fp, lineno, encoding);
+}
+
+PyObject *
+PyErr_ProgramTextObject(PyObject *filename, int lineno)
+{
+ return _PyErr_ProgramDecodedTextObject(filename, lineno, NULL);
}
#ifdef __cplusplus
More information about the Python-checkins
mailing list