[Python-checkins] gh-97556: Raise null bytes syntax error upon null in multiline string (GH-104136)

lysnikolaou webhook-mailer at python.org
Thu May 4 08:26:30 EDT 2023


https://github.com/python/cpython/commit/ef0df5284f929719b2ef3955b1b569ade0a5193c
commit: ef0df5284f929719b2ef3955b1b569ade0a5193c
branch: main
author: Lysandros Nikolaou <lisandrosnik at gmail.com>
committer: lysnikolaou <lisandrosnik at gmail.com>
date: 2023-05-04T14:26:23+02:00
summary:

gh-97556: Raise null bytes syntax error upon null in multiline string (GH-104136)

files:
M Lib/test/test_cmd_line_script.py
M Parser/tokenizer.c

diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index d98e23855e0c..8bf299382e9c 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -669,6 +669,19 @@ def test_syntaxerror_null_bytes(self):
                 ],
             )
 
+    def test_syntaxerror_null_bytes_in_multiline_string(self):
+        scripts = ["\n'''\nmultilinestring\0\n'''", "\nf'''\nmultilinestring\0\n'''"] # Both normal and f-strings
+        with os_helper.temp_dir() as script_dir:
+            for script in scripts:
+                script_name = _make_test_script(script_dir, 'script', script)
+                _, _, stderr = assert_python_failure(script_name)
+                self.assertEqual(
+                    stderr.splitlines()[-2:],
+                    [   b"    multilinestring",
+                        b'SyntaxError: source code cannot contain null bytes'
+                    ]
+                )
+
     def test_consistent_sys_path_for_direct_execution(self):
         # This test case ensures that the following all give the same
         # sys.path configuration:
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d2f9fee110eb..7c07d2011fda 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -2301,8 +2301,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         /* Get rest of string */
         while (end_quote_size != quote_size) {
             c = tok_nextc(tok);
-            if (tok->done == E_DECODE)
+            if (tok->done == E_ERROR) {
+                return MAKE_TOKEN(ERRORTOKEN);
+            }
+            if (tok->done == E_DECODE) {
                 break;
+            }
             if (c == EOF || (quote_size == 1 && c == '\n')) {
                 assert(tok->multi_line_start != NULL);
                 // shift the tok_state's location into
@@ -2554,6 +2558,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
 
     while (end_quote_size != current_tok->f_string_quote_size) {
         int c = tok_nextc(tok);
+        if (tok->done == E_ERROR) {
+            return MAKE_TOKEN(ERRORTOKEN);
+        }
         if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
             if (tok->decoding_erred) {
                 return MAKE_TOKEN(ERRORTOKEN);



More information about the Python-checkins mailing list