[Python-checkins] [3.11] gh-99891: Fix infinite recursion in the tokenizer when showing warnings (GH-99893) (GH-99896)

miss-islington webhook-mailer at python.org
Thu Dec 1 03:57:09 EST 2022


https://github.com/python/cpython/commit/6282ef6c3f07b1effbf51d5806f92791751d362e
commit: 6282ef6c3f07b1effbf51d5806f92791751d362e
branch: 3.11
author: Pablo Galindo Salgado <Pablogsal at gmail.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2022-12-01T00:57:04-08:00
summary:

[3.11] gh-99891: Fix infinite recursion in the tokenizer when showing warnings (GH-99893) (GH-99896)



Automerge-Triggered-By: GH:pablogsal.
(cherry picked from commit 417206a05c4545bde96c2bbbea92b53e6cac0d48)

Co-authored-by: Pablo Galindo Salgado <Pablogsal at gmail.com>

files:
A Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst
M Lib/test/test_source_encoding.py
M Parser/tokenizer.c
M Parser/tokenizer.h

diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index e357264eb1d1..5fe0f3124444 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -161,6 +161,18 @@ def test_file_parse_error_multiline(self):
         finally:
             os.unlink(TESTFN)
 
+    def test_tokenizer_fstring_warning_in_first_line(self):
+        source = "0b1and 2"
+        with open(TESTFN, "w") as fd:
+            fd.write("{}".format(source))
+        try:
+            retcode, stdout, stderr = script_helper.assert_python_ok(TESTFN)
+            self.assertIn(b"SyntaxWarning: invalid binary litera", stderr)
+            self.assertEqual(stderr.count(source.encode()), 1)
+        finally:
+            os.unlink(TESTFN)
+
+
 class AbstractSourceEncodingTest:
 
     def test_default_coding(self):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst
new file mode 100644
index 000000000000..20cd361affea
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst	
@@ -0,0 +1,3 @@
+Fix a bug in the tokenizer that could cause infinite recursion when showing
+syntax warnings that happen in the first line of the source. Patch by Pablo
+Galindo
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index eda38a09a995..ca11c7bebb4e 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -88,6 +88,7 @@ tok_new(void)
     tok->async_def_nl = 0;
     tok->interactive_underflow = IUNDERFLOW_NORMAL;
     tok->str = NULL;
+    tok->report_warnings = 1;
     return tok;
 }
 
@@ -1186,6 +1187,10 @@ indenterror(struct tok_state *tok)
 static int
 parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...)
 {
+    if (!tok->report_warnings) {
+        return 0;
+    }
+
     PyObject *errmsg;
     va_list vargs;
 #ifdef HAVE_STDARG_PROTOTYPES
@@ -2194,6 +2199,9 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
             return encoding;
         }
     }
+    // We don't want to report warnings here because it could cause infinite recursion
+    // if fetching the encoding shows a warning.
+    tok->report_warnings = 0;
     while (tok->lineno < 2 && tok->done == E_OK) {
         _PyTokenizer_Get(tok, &p_start, &p_end);
     }
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 0cb665104b2b..d9a5f457d9c5 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -84,6 +84,7 @@ struct tok_state {
                              NEWLINE token after it. */
     /* How to proceed when asked for a new token in interactive mode */
     enum interactive_underflow_t interactive_underflow;
+    int report_warnings;
 };
 
 extern struct tok_state *_PyTokenizer_FromString(const char *, int);



More information about the Python-checkins mailing list