[Python-checkins] bpo-33305: Improve SyntaxError for invalid numerical literals. (GH-6517)

Serhiy Storchaka webhook-mailer at python.org
Mon Jul 9 08:09:39 EDT 2018


https://github.com/python/cpython/commit/cf7303ed2aa19fb48687d7140dbc86fc23c9fca4
commit: cf7303ed2aa19fb48687d7140dbc86fc23c9fca4
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2018-07-09T15:09:35+03:00
summary:

bpo-33305: Improve SyntaxError for invalid numerical literals. (GH-6517)

files:
A Misc/NEWS.d/next/Core and Builtins/2018-04-18-14-17-44.bpo-33305.9z3dDH.rst
M Lib/test/test_grammar.py
M Parser/tokenizer.c

diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
index ee4136286ba1..78918ae250c4 100644
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -100,6 +100,8 @@
 
 class TokenTests(unittest.TestCase):
 
+    check_syntax_error = check_syntax_error
+
     def test_backslash(self):
         # Backslash means line continuation:
         x = 1 \
@@ -184,6 +186,28 @@ def test_underscore_literals(self):
         # Sanity check: no literal begins with an underscore
         self.assertRaises(NameError, eval, "_0")
 
+    def test_bad_numerical_literals(self):
+        check = self.check_syntax_error
+        check("0b12", "invalid digit '2' in binary literal")
+        check("0b1_2", "invalid digit '2' in binary literal")
+        check("0b2", "invalid digit '2' in binary literal")
+        check("0b1_", "invalid binary literal")
+        check("0b", "invalid binary literal")
+        check("0o18", "invalid digit '8' in octal literal")
+        check("0o1_8", "invalid digit '8' in octal literal")
+        check("0o8", "invalid digit '8' in octal literal")
+        check("0o1_", "invalid octal literal")
+        check("0o", "invalid octal literal")
+        check("0x1_", "invalid hexadecimal literal")
+        check("0x", "invalid hexadecimal literal")
+        check("1_", "invalid decimal literal")
+        check("012",
+              "leading zeros in decimal integer literals are not permitted; "
+              "use an 0o prefix for octal integers")
+        check("1.2_", "invalid decimal literal")
+        check("1e2_", "invalid decimal literal")
+        check("1e+", "invalid decimal literal")
+
     def test_string_literals(self):
         x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
         x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-18-14-17-44.bpo-33305.9z3dDH.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-18-14-17-44.bpo-33305.9z3dDH.rst
new file mode 100644
index 000000000000..cae2f7f85950
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-04-18-14-17-44.bpo-33305.9z3dDH.rst	
@@ -0,0 +1 @@
+Improved syntax error messages for invalid numerical literals.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index fbc98880c9a5..f8b83c9f3d7f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1280,6 +1280,28 @@ PyToken_ThreeChars(int c1, int c2, int c3)
     return OP;
 }
 
+static int
+syntaxerror(struct tok_state *tok, const char *format, ...)
+{
+#ifndef PGEN
+    va_list vargs;
+#ifdef HAVE_STDARG_PROTOTYPES
+    va_start(vargs, format);
+#else
+    va_start(vargs);
+#endif
+    PyErr_FormatV(PyExc_SyntaxError, format, vargs);
+    va_end(vargs);
+    PyErr_SyntaxLocationObject(tok->filename,
+                               tok->lineno,
+                               tok->cur - tok->line_start);
+    tok->done = E_ERROR;
+#else
+    tok->done = E_TOKEN;
+#endif
+    return ERRORTOKEN;
+}
+
 static int
 indenterror(struct tok_state *tok)
 {
@@ -1333,8 +1355,8 @@ tok_decimal_tail(struct tok_state *tok)
         }
         c = tok_nextc(tok);
         if (!isdigit(c)) {
-            tok->done = E_TOKEN;
             tok_backup(tok, c);
+            syntaxerror(tok, "invalid decimal literal");
             return 0;
         }
     }
@@ -1562,9 +1584,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                         c = tok_nextc(tok);
                     }
                     if (!isxdigit(c)) {
-                        tok->done = E_TOKEN;
                         tok_backup(tok, c);
-                        return ERRORTOKEN;
+                        return syntaxerror(tok, "invalid hexadecimal literal");
                     }
                     do {
                         c = tok_nextc(tok);
@@ -1579,14 +1600,23 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                         c = tok_nextc(tok);
                     }
                     if (c < '0' || c >= '8') {
-                        tok->done = E_TOKEN;
                         tok_backup(tok, c);
-                        return ERRORTOKEN;
+                        if (isdigit(c)) {
+                            return syntaxerror(tok,
+                                    "invalid digit '%c' in octal literal", c);
+                        }
+                        else {
+                            return syntaxerror(tok, "invalid octal literal");
+                        }
                     }
                     do {
                         c = tok_nextc(tok);
                     } while ('0' <= c && c < '8');
                 } while (c == '_');
+                if (isdigit(c)) {
+                    return syntaxerror(tok,
+                            "invalid digit '%c' in octal literal", c);
+                }
             }
             else if (c == 'b' || c == 'B') {
                 /* Binary */
@@ -1596,14 +1626,23 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                         c = tok_nextc(tok);
                     }
                     if (c != '0' && c != '1') {
-                        tok->done = E_TOKEN;
                         tok_backup(tok, c);
-                        return ERRORTOKEN;
+                        if (isdigit(c)) {
+                            return syntaxerror(tok,
+                                    "invalid digit '%c' in binary literal", c);
+                        }
+                        else {
+                            return syntaxerror(tok, "invalid binary literal");
+                        }
                     }
                     do {
                         c = tok_nextc(tok);
                     } while (c == '0' || c == '1');
                 } while (c == '_');
+                if (isdigit(c)) {
+                    return syntaxerror(tok,
+                            "invalid digit '%c' in binary literal", c);
+                }
             }
             else {
                 int nonzero = 0;
@@ -1613,9 +1652,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                     if (c == '_') {
                         c = tok_nextc(tok);
                         if (!isdigit(c)) {
-                            tok->done = E_TOKEN;
                             tok_backup(tok, c);
-                            return ERRORTOKEN;
+                            return syntaxerror(tok, "invalid decimal literal");
                         }
                     }
                     if (c != '0') {
@@ -1642,9 +1680,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                 }
                 else if (nonzero) {
                     /* Old-style octal: now disallowed. */
-                    tok->done = E_TOKEN;
                     tok_backup(tok, c);
-                    return ERRORTOKEN;
+                    return syntaxerror(tok,
+                                       "leading zeros in decimal integer "
+                                       "literals are not permitted; "
+                                       "use an 0o prefix for octal integers");
                 }
             }
         }
@@ -1676,9 +1716,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                     if (c == '+' || c == '-') {
                         c = tok_nextc(tok);
                         if (!isdigit(c)) {
-                            tok->done = E_TOKEN;
                             tok_backup(tok, c);
-                            return ERRORTOKEN;
+                            return syntaxerror(tok, "invalid decimal literal");
                         }
                     } else if (!isdigit(c)) {
                         tok_backup(tok, c);



More information about the Python-checkins mailing list