[Python-checkins] GH-103727: Avoid advancing tokenizer too far in f-string mode (GH-103775)

Mon Apr 24 14:30:28 EDT 2023

https://github.com/python/cpython/commit/cb157a1a353675cb6f08bdae5d7aadd6b28bb0a9
commit: cb157a1a353675cb6f08bdae5d7aadd6b28bb0a9
branch: main
author: Lysandros Nikolaou <lisandrosnik at gmail.com>
committer: lysnikolaou <lisandrosnik at gmail.com>
date: 2023-04-24T12:30:21-06:00
summary:

GH-103727: Avoid advancing tokenizer too far in f-string mode (GH-103775)

files:
M Lib/test/test_fstring.py
M Parser/tokenizer.c

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index b26b12d369f6..9d5e16628f04 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -940,15 +940,13 @@ def test_lambda(self):
                              "f'{lambda :x}'",
                              "f'{lambda *arg, :x}'",
                              "f'{1, lambda:x}'",
+                             "f'{lambda x:}'",
+                             "f'{lambda :}'",
                              ])
 
         # but don't emit the paren warning in general cases
-        self.assertAllRaise(SyntaxError,
-                            "f-string: expecting a valid expression after '{'",
-                            ["f'{lambda x:}'",
-                             "f'{lambda :}'",
-                             "f'{+ lambda:None}'",
-                             ])
+        with self.assertRaisesRegex(SyntaxError, "f-string: expecting a valid expression after '{'"):
+            eval("f'{+ lambda:None}'")
 
     def test_valid_prefixes(self):
         self.assertEqual(F'{1}', "1")
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 0370f75efb53..5244ab7d4f7e 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -2481,19 +2481,21 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
     // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
     // before it.
     int start_char = tok_nextc(tok);
-    int peek1 = tok_nextc(tok);
-    tok_backup(tok, peek1);
-    tok_backup(tok, start_char);
-
-    if ((start_char == '{' && peek1 != '{') || (start_char == '}' && peek1 != '}')) {
-        if (start_char == '{') {
+    if (start_char == '{') {
+        int peek1 = tok_nextc(tok);
+        tok_backup(tok, peek1);
+        tok_backup(tok, start_char);
+        if (peek1 != '{') {
             current_tok->curly_bracket_expr_start_depth++;
             if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
                 return MAKE_TOKEN(syntaxerror(tok, "f-string: expressions nested too deeply"));
             }
+            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+            return tok_get_normal_mode(tok, current_tok, token);
         }
-        TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
-        return tok_get_normal_mode(tok, current_tok, token);
+    }
+    else {
+        tok_backup(tok, start_char);
     }
 
     // Check if we are at the end of the string