[Python-checkins] [3.7] bpo-36440: include node names in ParserError messages, instead of numeric IDs (GH-12565) (GH-12671)

Wed Apr 3 14:35:10 EDT 2019

https://github.com/python/cpython/commit/513d142993bb8c13e6803727fa086e44eafc360f
commit: 513d142993bb8c13e6803727fa086e44eafc360f
branch: 3.7
author: Pablo Galindo <Pablogsal at gmail.com>
committer: GitHub <noreply at github.com>
date: 2019-04-03T14:34:59-04:00
summary:

[3.7] bpo-36440: include node names in ParserError messages, instead of numeric IDs (GH-12565) (GH-12671)

The error messages in the parser module are referring to numeric IDs for the nodes. To improve readability, use the node names when reporting errors..
(cherry picked from commit cb0748d3939c31168ab5d3b80e3677494497d5e3)

Co-authored-by: tyomitch <tyomitch at gmail.com>

files:
A Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst
M Lib/test/test_parser.py
M Modules/parsermodule.c

diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py
index 94e454663573..e49afd2ba1d8 100644
--- a/Lib/test/test_parser.py
+++ b/Lib/test/test_parser.py
@@ -713,6 +713,22 @@ def test_illegal_encoding(self):
         with self.assertRaises(UnicodeEncodeError):
             parser.sequence2st(tree)
 
+    def test_invalid_node_id(self):
+        tree = (257, (269, (-7, '')))
+        self.check_bad_tree(tree, "negative node id")
+        tree = (257, (269, (99, '')))
+        self.check_bad_tree(tree, "invalid token id")
+        tree = (257, (269, (9999, (0, ''))))
+        self.check_bad_tree(tree, "invalid symbol id")
+
+    def test_ParserError_message(self):
+        try:
+            parser.sequence2st((257,(269,(257,(0,'')))))
+        except parser.ParserError as why:
+            self.assertIn("simple_stmt", str(why))  # Expected
+            self.assertIn("file_input", str(why))     # Got
+
+
 
 class CompileTestCase(unittest.TestCase):
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst
new file mode 100644
index 000000000000..372b1f771009
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst	
@@ -0,0 +1,2 @@
+Include node names in ``ParserError`` messages, instead of numeric IDs.
+Patch by A. Skrobov.
diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c
index 67c874267f24..799a813468f1 100644
--- a/Modules/parsermodule.c
+++ b/Modules/parsermodule.c
@@ -24,10 +24,6 @@
  *  Py_[X]DECREF() and Py_[X]INCREF() macros.  The lint annotations
  *  look like "NOTE(...)".
  *
- *  To debug parser errors like
- *      "parser.ParserError: Expected node type 12, got 333."
- *  decode symbol numbers using the automatically-generated files
- *  Lib/symbol.h and Include/token.h.
  */
 
 #include "Python.h"                     /* general Python API             */
@@ -663,6 +659,13 @@ validate_node(node *tree)
     for (pos = 0; pos < nch; ++pos) {
         node *ch = CHILD(tree, pos);
         int ch_type = TYPE(ch);
+        if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas)
+            || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS))
+            || (ch_type < 0)
+           ) {
+            PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type);
+            return 0;
+        }
         for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
             short a_label = dfa_state->s_arc[arc].a_lbl;
             assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
@@ -691,8 +694,10 @@ validate_node(node *tree)
             const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str;
 
             if (ISNONTERMINAL(next_type)) {
-                PyErr_Format(parser_error, "Expected node type %d, got %d.",
-                             next_type, ch_type);
+                PyErr_Format(parser_error, "Expected %s, got %s.",
+                             _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name,
+                             ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] :
+                             _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name);
             }
             else if (expected_str != NULL) {
                 PyErr_Format(parser_error, "Illegal terminal: expected '%s'.",