[Python-checkins] bpo-36878: Track extra text added to 'type: ignore' in the AST (GH-13479)

Ivan Levkivskyi webhook-mailer at python.org
Wed May 22 10:54:34 EDT 2019


https://github.com/python/cpython/commit/933e1509ec6efa8e6ab8c8c7ce02059ce2b6d9b9
commit: 933e1509ec6efa8e6ab8c8c7ce02059ce2b6d9b9
branch: master
author: Michael J. Sullivan <sully at msully.net>
committer: Ivan Levkivskyi <levkivskyi at gmail.com>
date: 2019-05-22T15:54:20+01:00
summary:

bpo-36878: Track extra text added to 'type: ignore' in the AST (GH-13479)

GH-13238 made extra text after a # type: ignore accepted by the parser.
This finishes the job and actually plumbs the extra text through the
parser and makes it available in the AST.

files:
A Misc/NEWS.d/next/Core and Builtins/2019-05-21-16-21-22.bpo-36878.EFRHZ3.rst
M Include/Python-ast.h
M Lib/test/test_type_comments.py
M Misc/ACKS
M Parser/Python.asdl
M Parser/parsetok.c
M Parser/tokenizer.c
M Python/Python-ast.c
M Python/ast.c

diff --git a/Include/Python-ast.h b/Include/Python-ast.h
index 08d50ffcddf6..2fc50e3f53a2 100644
--- a/Include/Python-ast.h
+++ b/Include/Python-ast.h
@@ -467,6 +467,7 @@ struct _type_ignore {
     union {
         struct {
             int lineno;
+            string tag;
         } TypeIgnore;
 
     } v;
@@ -702,8 +703,8 @@ alias_ty _Py_alias(identifier name, identifier asname, PyArena *arena);
 #define withitem(a0, a1, a2) _Py_withitem(a0, a1, a2)
 withitem_ty _Py_withitem(expr_ty context_expr, expr_ty optional_vars, PyArena
                          *arena);
-#define TypeIgnore(a0, a1) _Py_TypeIgnore(a0, a1)
-type_ignore_ty _Py_TypeIgnore(int lineno, PyArena *arena);
+#define TypeIgnore(a0, a1, a2) _Py_TypeIgnore(a0, a1, a2)
+type_ignore_ty _Py_TypeIgnore(int lineno, string tag, PyArena *arena);
 
 PyObject* PyAST_mod2obj(mod_ty t);
 mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);
diff --git a/Lib/test/test_type_comments.py b/Lib/test/test_type_comments.py
index b4318902ee34..c62894fa4255 100644
--- a/Lib/test/test_type_comments.py
+++ b/Lib/test/test_type_comments.py
@@ -272,7 +272,16 @@ def test_vardecl(self):
 
     def test_ignores(self):
         for tree in self.parse_all(ignores):
-            self.assertEqual([ti.lineno for ti in tree.type_ignores], [2, 5, 8, 9, 10, 11])
+            self.assertEqual(
+                [(ti.lineno, ti.tag) for ti in tree.type_ignores],
+                [
+                    (2, ''),
+                    (5, ''),
+                    (8, '[excuse]'),
+                    (9, '=excuse'),
+                    (10, ' [excuse]'),
+                    (11, ' whatever'),
+                ])
         tree = self.classic_parse(ignores)
         self.assertEqual(tree.type_ignores, [])
 
diff --git a/Misc/ACKS b/Misc/ACKS
index 8f0ecb7f1c37..fbed14684b31 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1594,6 +1594,7 @@ Daniel Stutzbach
 Andreas Stührk
 Colin Su
 Pal Subbiah
+Michael J. Sullivan
 Nathan Sullivan
 Mark Summerfield
 Reuben Sumner
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-05-21-16-21-22.bpo-36878.EFRHZ3.rst b/Misc/NEWS.d/next/Core and Builtins/2019-05-21-16-21-22.bpo-36878.EFRHZ3.rst
new file mode 100644
index 000000000000..00c8b904ac2a
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-05-21-16-21-22.bpo-36878.EFRHZ3.rst	
@@ -0,0 +1,3 @@
+Store text appearing after a `# type: ignore` comment in the AST. For
+example a type ignore like `# type: ignore[E1000]` will have the string
+`"[E1000]"` stored in its AST node.
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index 626fa4fede47..882f5d1eba35 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -125,6 +125,5 @@ module Python
 
     withitem = (expr context_expr, expr? optional_vars)
 
-    type_ignore = TypeIgnore(int lineno)
+    type_ignore = TypeIgnore(int lineno, string tag)
 }
-
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 31be0ebbde2d..55fd7f7db3da 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -16,13 +16,16 @@ static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
 static int initerr(perrdetail *err_ret, PyObject * filename);
 
 typedef struct {
-    int *items;
+    struct {
+        int lineno;
+        char *comment;
+    } *items;
     size_t size;
     size_t num_items;
-} growable_int_array;
+} growable_comment_array;
 
 static int
-growable_int_array_init(growable_int_array *arr, size_t initial_size) {
+growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
     assert(initial_size > 0);
     arr->items = malloc(initial_size * sizeof(*arr->items));
     arr->size = initial_size;
@@ -32,7 +35,7 @@ growable_int_array_init(growable_int_array *arr, size_t initial_size) {
 }
 
 static int
-growable_int_array_add(growable_int_array *arr, int item) {
+growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
     if (arr->num_items >= arr->size) {
         arr->size *= 2;
         arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
@@ -41,13 +44,17 @@ growable_int_array_add(growable_int_array *arr, int item) {
         }
     }
 
-    arr->items[arr->num_items] = item;
+    arr->items[arr->num_items].lineno = lineno;
+    arr->items[arr->num_items].comment = comment;
     arr->num_items++;
     return 1;
 }
 
 static void
-growable_int_array_deallocate(growable_int_array *arr) {
+growable_comment_array_deallocate(growable_comment_array *arr) {
+    for (unsigned i = 0; i < arr->num_items; i++) {
+        PyObject_FREE(arr->items[i].comment);
+    }
     free(arr->items);
 }
 
@@ -220,9 +227,9 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
     node *n;
     int started = 0;
     int col_offset, end_col_offset;
-    growable_int_array type_ignores;
+    growable_comment_array type_ignores;
 
-    if (!growable_int_array_init(&type_ignores, 10)) {
+    if (!growable_comment_array_init(&type_ignores, 10)) {
         err_ret->error = E_NOMEM;
         PyTokenizer_Free(tok);
         return NULL;
@@ -320,8 +327,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         }
 
         if (type == TYPE_IGNORE) {
-            PyObject_FREE(str);
-            if (!growable_int_array_add(&type_ignores, tok->lineno)) {
+            if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
                 err_ret->error = E_NOMEM;
                 break;
             }
@@ -355,9 +361,16 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
             REQ(ch, ENDMARKER);
 
             for (i = 0; i < type_ignores.num_items; i++) {
-                PyNode_AddChild(ch, TYPE_IGNORE, NULL,
-                                type_ignores.items[i], 0,
-                                type_ignores.items[i], 0);
+                int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
+                                          type_ignores.items[i].lineno, 0,
+                                          type_ignores.items[i].lineno, 0);
+                if (res != 0) {
+                    err_ret->error = res;
+                    PyNode_Free(n);
+                    n = NULL;
+                    break;
+                }
+                type_ignores.items[i].comment = NULL;
             }
         }
 
@@ -365,7 +378,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
            is a single statement by looking at what is left in the
            buffer after parsing.  Trailing whitespace and comments
            are OK.  */
-        if (start == single_input) {
+        if (err_ret->error == E_DONE && start == single_input) {
             char *cur = tok->cur;
             char c = *tok->cur;
 
@@ -392,7 +405,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
     else
         n = NULL;
 
-    growable_int_array_deallocate(&type_ignores);
+    growable_comment_array_deallocate(&type_ignores);
 
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
     *flags = ps->p_flags;
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index e52d498d5542..9b269afc429b 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1269,6 +1269,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
             /* This is a type comment if we matched all of type_comment_prefix. */
             if (!*prefix) {
                 int is_type_ignore = 1;
+                const char *ignore_end = p + 6;
                 tok_backup(tok, c);  /* don't eat the newline or EOF */
 
                 type_start = p;
@@ -1276,10 +1277,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                 /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
                  * or anything non-alphanumeric. */
                 is_type_ignore = (
-                    tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0
-                    && !(tok->cur > p + 6 && isalnum(p[6])));
+                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
+                    && !(tok->cur > ignore_end && isalnum(p[6])));
 
                 if (is_type_ignore) {
+                    *p_start = (char *) ignore_end;
+                    *p_end = tok->cur;
+
                     /* If this type ignore is the only thing on the line, consume the newline also. */
                     if (blankline) {
                         tok_nextc(tok);
diff --git a/Python/Python-ast.c b/Python/Python-ast.c
index 552750584480..e84a7586a707 100644
--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -524,8 +524,10 @@ static char *withitem_fields[]={
 static PyTypeObject *type_ignore_type;
 static PyObject* ast2obj_type_ignore(void*);
 static PyTypeObject *TypeIgnore_type;
+_Py_IDENTIFIER(tag);
 static char *TypeIgnore_fields[]={
     "lineno",
+    "tag",
 };
 
 
@@ -1164,7 +1166,7 @@ static int init_types(void)
     if (!type_ignore_type) return 0;
     if (!add_attributes(type_ignore_type, NULL, 0)) return 0;
     TypeIgnore_type = make_type("TypeIgnore", type_ignore_type,
-                                TypeIgnore_fields, 1);
+                                TypeIgnore_fields, 2);
     if (!TypeIgnore_type) return 0;
     initialized = 1;
     return 1;
@@ -2667,14 +2669,20 @@ withitem(expr_ty context_expr, expr_ty optional_vars, PyArena *arena)
 }
 
 type_ignore_ty
-TypeIgnore(int lineno, PyArena *arena)
+TypeIgnore(int lineno, string tag, PyArena *arena)
 {
     type_ignore_ty p;
+    if (!tag) {
+        PyErr_SetString(PyExc_ValueError,
+                        "field tag is required for TypeIgnore");
+        return NULL;
+    }
     p = (type_ignore_ty)PyArena_Malloc(arena, sizeof(*p));
     if (!p)
         return NULL;
     p->kind = TypeIgnore_kind;
     p->v.TypeIgnore.lineno = lineno;
+    p->v.TypeIgnore.tag = tag;
     return p;
 }
 
@@ -4158,6 +4166,11 @@ ast2obj_type_ignore(void* _o)
         if (_PyObject_SetAttrId(result, &PyId_lineno, value) == -1)
             goto failed;
         Py_DECREF(value);
+        value = ast2obj_string(o->v.TypeIgnore.tag);
+        if (!value) goto failed;
+        if (_PyObject_SetAttrId(result, &PyId_tag, value) == -1)
+            goto failed;
+        Py_DECREF(value);
         break;
     }
     return result;
@@ -8738,6 +8751,7 @@ obj2ast_type_ignore(PyObject* obj, type_ignore_ty* out, PyArena* arena)
     }
     if (isinstance) {
         int lineno;
+        string tag;
 
         if (_PyObject_LookupAttrId(obj, &PyId_lineno, &tmp) < 0) {
             return 1;
@@ -8752,7 +8766,20 @@ obj2ast_type_ignore(PyObject* obj, type_ignore_ty* out, PyArena* arena)
             if (res != 0) goto failed;
             Py_CLEAR(tmp);
         }
-        *out = TypeIgnore(lineno, arena);
+        if (_PyObject_LookupAttrId(obj, &PyId_tag, &tmp) < 0) {
+            return 1;
+        }
+        if (tmp == NULL) {
+            PyErr_SetString(PyExc_TypeError, "required field \"tag\" missing from TypeIgnore");
+            return 1;
+        }
+        else {
+            int res;
+            res = obj2ast_string(tmp, &tag, arena);
+            if (res != 0) goto failed;
+            Py_CLEAR(tmp);
+        }
+        *out = TypeIgnore(lineno, tag, arena);
         if (*out == NULL) goto failed;
         return 0;
     }
diff --git a/Python/ast.c b/Python/ast.c
index abc8d89c8a38..625982735775 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -830,7 +830,10 @@ PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
                 goto out;
 
             for (i = 0; i < num; i++) {
-                type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), arena);
+                string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
+                if (!type_comment)
+                    goto out;
+                type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
                 if (!ti)
                    goto out;
                asdl_seq_SET(type_ignores, i, ti);



More information about the Python-checkins mailing list