[Python-checkins] bpo-30465: Fix lineno and col_offset in fstring AST nodes (#1800)

ericvsmith webhook-mailer at python.org
Wed Sep 6 20:28:01 EDT 2017


https://github.com/python/cpython/commit/e7c566caf177afe43b57f0b2723e723d880368e8
commit: e7c566caf177afe43b57f0b2723e723d880368e8
branch: master
author: Łukasz Langa <lukasz at langa.pl>
committer: ericvsmith <ericvsmith at users.noreply.github.com>
date: 2017-09-06T17:27:58-07:00
summary:

bpo-30465: Fix lineno and col_offset in fstring AST nodes (#1800)

For f-string ast nodes, fix the line and columns so that tools such as flake8 can identify them correctly.

files:
A Misc/NEWS.d/next/Core and Builtins/2017-09-06-10-47-29.bpo-30465.oe-3GD.rst
M Lib/test/test_fstring.py
M Python/ast.c

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 3d762b5aa9b..5e7efe25e39 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -70,6 +70,253 @@ def __call__(self):
         # Make sure x was called.
         self.assertTrue(x.called)
 
+    def test_ast_line_numbers(self):
+        expr = """
+a = 10
+f'{a * x()}'"""
+        t = ast.parse(expr)
+        self.assertEqual(type(t), ast.Module)
+        self.assertEqual(len(t.body), 2)
+        # check `a = 10`
+        self.assertEqual(type(t.body[0]), ast.Assign)
+        self.assertEqual(t.body[0].lineno, 2)
+        # check `f'...'`
+        self.assertEqual(type(t.body[1]), ast.Expr)
+        self.assertEqual(type(t.body[1].value), ast.JoinedStr)
+        self.assertEqual(len(t.body[1].value.values), 1)
+        self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
+        self.assertEqual(t.body[1].lineno, 3)
+        self.assertEqual(t.body[1].value.lineno, 3)
+        self.assertEqual(t.body[1].value.values[0].lineno, 3)
+        # check the binop location
+        binop = t.body[1].value.values[0].value
+        self.assertEqual(type(binop), ast.BinOp)
+        self.assertEqual(type(binop.left), ast.Name)
+        self.assertEqual(type(binop.op), ast.Mult)
+        self.assertEqual(type(binop.right), ast.Call)
+        self.assertEqual(binop.lineno, 3)
+        self.assertEqual(binop.left.lineno, 3)
+        self.assertEqual(binop.right.lineno, 3)
+        self.assertEqual(binop.col_offset, 3)
+        self.assertEqual(binop.left.col_offset, 3)
+        self.assertEqual(binop.right.col_offset, 7)
+
+    def test_ast_line_numbers_multiple_formattedvalues(self):
+        expr = """
+f'no formatted values'
+f'eggs {a * x()} spam {b + y()}'"""
+        t = ast.parse(expr)
+        self.assertEqual(type(t), ast.Module)
+        self.assertEqual(len(t.body), 2)
+        # check `f'no formatted value'`
+        self.assertEqual(type(t.body[0]), ast.Expr)
+        self.assertEqual(type(t.body[0].value), ast.JoinedStr)
+        self.assertEqual(t.body[0].lineno, 2)
+        # check `f'...'`
+        self.assertEqual(type(t.body[1]), ast.Expr)
+        self.assertEqual(type(t.body[1].value), ast.JoinedStr)
+        self.assertEqual(len(t.body[1].value.values), 4)
+        self.assertEqual(type(t.body[1].value.values[0]), ast.Str)
+        self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
+        self.assertEqual(type(t.body[1].value.values[2]), ast.Str)
+        self.assertEqual(type(t.body[1].value.values[3]), ast.FormattedValue)
+        self.assertEqual(t.body[1].lineno, 3)
+        self.assertEqual(t.body[1].value.lineno, 3)
+        self.assertEqual(t.body[1].value.values[0].lineno, 3)
+        self.assertEqual(t.body[1].value.values[1].lineno, 3)
+        self.assertEqual(t.body[1].value.values[2].lineno, 3)
+        self.assertEqual(t.body[1].value.values[3].lineno, 3)
+        # check the first binop location
+        binop1 = t.body[1].value.values[1].value
+        self.assertEqual(type(binop1), ast.BinOp)
+        self.assertEqual(type(binop1.left), ast.Name)
+        self.assertEqual(type(binop1.op), ast.Mult)
+        self.assertEqual(type(binop1.right), ast.Call)
+        self.assertEqual(binop1.lineno, 3)
+        self.assertEqual(binop1.left.lineno, 3)
+        self.assertEqual(binop1.right.lineno, 3)
+        self.assertEqual(binop1.col_offset, 8)
+        self.assertEqual(binop1.left.col_offset, 8)
+        self.assertEqual(binop1.right.col_offset, 12)
+        # check the second binop location
+        binop2 = t.body[1].value.values[3].value
+        self.assertEqual(type(binop2), ast.BinOp)
+        self.assertEqual(type(binop2.left), ast.Name)
+        self.assertEqual(type(binop2.op), ast.Add)
+        self.assertEqual(type(binop2.right), ast.Call)
+        self.assertEqual(binop2.lineno, 3)
+        self.assertEqual(binop2.left.lineno, 3)
+        self.assertEqual(binop2.right.lineno, 3)
+        self.assertEqual(binop2.col_offset, 23)
+        self.assertEqual(binop2.left.col_offset, 23)
+        self.assertEqual(binop2.right.col_offset, 27)
+
+    def test_ast_line_numbers_nested(self):
+        expr = """
+a = 10
+f'{a * f"-{x()}-"}'"""
+        t = ast.parse(expr)
+        self.assertEqual(type(t), ast.Module)
+        self.assertEqual(len(t.body), 2)
+        # check `a = 10`
+        self.assertEqual(type(t.body[0]), ast.Assign)
+        self.assertEqual(t.body[0].lineno, 2)
+        # check `f'...'`
+        self.assertEqual(type(t.body[1]), ast.Expr)
+        self.assertEqual(type(t.body[1].value), ast.JoinedStr)
+        self.assertEqual(len(t.body[1].value.values), 1)
+        self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
+        self.assertEqual(t.body[1].lineno, 3)
+        self.assertEqual(t.body[1].value.lineno, 3)
+        self.assertEqual(t.body[1].value.values[0].lineno, 3)
+        # check the binop location
+        binop = t.body[1].value.values[0].value
+        self.assertEqual(type(binop), ast.BinOp)
+        self.assertEqual(type(binop.left), ast.Name)
+        self.assertEqual(type(binop.op), ast.Mult)
+        self.assertEqual(type(binop.right), ast.JoinedStr)
+        self.assertEqual(binop.lineno, 3)
+        self.assertEqual(binop.left.lineno, 3)
+        self.assertEqual(binop.right.lineno, 3)
+        self.assertEqual(binop.col_offset, 3)
+        self.assertEqual(binop.left.col_offset, 3)
+        self.assertEqual(binop.right.col_offset, 7)
+        # check the nested call location
+        self.assertEqual(len(binop.right.values), 3)
+        self.assertEqual(type(binop.right.values[0]), ast.Str)
+        self.assertEqual(type(binop.right.values[1]), ast.FormattedValue)
+        self.assertEqual(type(binop.right.values[2]), ast.Str)
+        self.assertEqual(binop.right.values[0].lineno, 3)
+        self.assertEqual(binop.right.values[1].lineno, 3)
+        self.assertEqual(binop.right.values[2].lineno, 3)
+        call = binop.right.values[1].value
+        self.assertEqual(type(call), ast.Call)
+        self.assertEqual(call.lineno, 3)
+        self.assertEqual(call.col_offset, 11)
+
+    def test_ast_line_numbers_duplicate_expression(self):
+        """Duplicate expression
+
+        NOTE: this is currently broken, always sets location of the first
+        expression.
+        """
+        expr = """
+a = 10
+f'{a * x()} {a * x()} {a * x()}'
+"""
+        t = ast.parse(expr)
+        self.assertEqual(type(t), ast.Module)
+        self.assertEqual(len(t.body), 2)
+        # check `a = 10`
+        self.assertEqual(type(t.body[0]), ast.Assign)
+        self.assertEqual(t.body[0].lineno, 2)
+        # check `f'...'`
+        self.assertEqual(type(t.body[1]), ast.Expr)
+        self.assertEqual(type(t.body[1].value), ast.JoinedStr)
+        self.assertEqual(len(t.body[1].value.values), 5)
+        self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
+        self.assertEqual(type(t.body[1].value.values[1]), ast.Str)
+        self.assertEqual(type(t.body[1].value.values[2]), ast.FormattedValue)
+        self.assertEqual(type(t.body[1].value.values[3]), ast.Str)
+        self.assertEqual(type(t.body[1].value.values[4]), ast.FormattedValue)
+        self.assertEqual(t.body[1].lineno, 3)
+        self.assertEqual(t.body[1].value.lineno, 3)
+        self.assertEqual(t.body[1].value.values[0].lineno, 3)
+        self.assertEqual(t.body[1].value.values[1].lineno, 3)
+        self.assertEqual(t.body[1].value.values[2].lineno, 3)
+        self.assertEqual(t.body[1].value.values[3].lineno, 3)
+        self.assertEqual(t.body[1].value.values[4].lineno, 3)
+        # check the first binop location
+        binop = t.body[1].value.values[0].value
+        self.assertEqual(type(binop), ast.BinOp)
+        self.assertEqual(type(binop.left), ast.Name)
+        self.assertEqual(type(binop.op), ast.Mult)
+        self.assertEqual(type(binop.right), ast.Call)
+        self.assertEqual(binop.lineno, 3)
+        self.assertEqual(binop.left.lineno, 3)
+        self.assertEqual(binop.right.lineno, 3)
+        self.assertEqual(binop.col_offset, 3)
+        self.assertEqual(binop.left.col_offset, 3)
+        self.assertEqual(binop.right.col_offset, 7)
+        # check the second binop location
+        binop = t.body[1].value.values[2].value
+        self.assertEqual(type(binop), ast.BinOp)
+        self.assertEqual(type(binop.left), ast.Name)
+        self.assertEqual(type(binop.op), ast.Mult)
+        self.assertEqual(type(binop.right), ast.Call)
+        self.assertEqual(binop.lineno, 3)
+        self.assertEqual(binop.left.lineno, 3)
+        self.assertEqual(binop.right.lineno, 3)
+        self.assertEqual(binop.col_offset, 3)  # FIXME: this is wrong
+        self.assertEqual(binop.left.col_offset, 3)  # FIXME: this is wrong
+        self.assertEqual(binop.right.col_offset, 7)  # FIXME: this is wrong
+        # check the third binop location
+        binop = t.body[1].value.values[4].value
+        self.assertEqual(type(binop), ast.BinOp)
+        self.assertEqual(type(binop.left), ast.Name)
+        self.assertEqual(type(binop.op), ast.Mult)
+        self.assertEqual(type(binop.right), ast.Call)
+        self.assertEqual(binop.lineno, 3)
+        self.assertEqual(binop.left.lineno, 3)
+        self.assertEqual(binop.right.lineno, 3)
+        self.assertEqual(binop.col_offset, 3)  # FIXME: this is wrong
+        self.assertEqual(binop.left.col_offset, 3)  # FIXME: this is wrong
+        self.assertEqual(binop.right.col_offset, 7)  # FIXME: this is wrong
+
+    def test_ast_line_numbers_multiline_fstring(self):
+        # FIXME: This test demonstrates invalid behavior due to JoinedStr's
+        # immediate child nodes containing the wrong lineno.  The enclosed
+        # expressions have valid line information and column offsets.
+        # See bpo-16806 and bpo-30465 for details.
+        expr = """
+a = 10
+f'''
+  {a
+     *
+       x()}
+non-important content
+'''
+"""
+        t = ast.parse(expr)
+        self.assertEqual(type(t), ast.Module)
+        self.assertEqual(len(t.body), 2)
+        # check `a = 10`
+        self.assertEqual(type(t.body[0]), ast.Assign)
+        self.assertEqual(t.body[0].lineno, 2)
+        # check `f'...'`
+        self.assertEqual(type(t.body[1]), ast.Expr)
+        self.assertEqual(type(t.body[1].value), ast.JoinedStr)
+        self.assertEqual(len(t.body[1].value.values), 3)
+        self.assertEqual(type(t.body[1].value.values[0]), ast.Str)
+        self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
+        self.assertEqual(type(t.body[1].value.values[2]), ast.Str)
+        # NOTE: the following invalid behavior is described in bpo-16806.
+        # - line number should be the *first* line (3), not the *last* (8)
+        # - column offset should not be -1
+        self.assertEqual(t.body[1].lineno, 8)
+        self.assertEqual(t.body[1].value.lineno, 8)
+        self.assertEqual(t.body[1].value.values[0].lineno, 8)
+        self.assertEqual(t.body[1].value.values[1].lineno, 8)
+        self.assertEqual(t.body[1].value.values[2].lineno, 8)
+        self.assertEqual(t.body[1].col_offset, -1)
+        self.assertEqual(t.body[1].value.col_offset, -1)
+        self.assertEqual(t.body[1].value.values[0].col_offset, -1)
+        self.assertEqual(t.body[1].value.values[1].col_offset, -1)
+        self.assertEqual(t.body[1].value.values[2].col_offset, -1)
+        # NOTE: the following lineno information and col_offset is correct for
+        # expressions within FormattedValues.
+        binop = t.body[1].value.values[1].value
+        self.assertEqual(type(binop), ast.BinOp)
+        self.assertEqual(type(binop.left), ast.Name)
+        self.assertEqual(type(binop.op), ast.Mult)
+        self.assertEqual(type(binop.right), ast.Call)
+        self.assertEqual(binop.lineno, 4)
+        self.assertEqual(binop.left.lineno, 4)
+        self.assertEqual(binop.right.lineno, 6)
+        self.assertEqual(binop.col_offset, 3)
+        self.assertEqual(binop.left.col_offset, 3)
+        self.assertEqual(binop.right.col_offset, 7)
+
     def test_docstring(self):
         def f():
             f'''Not a docstring'''
@@ -786,5 +1033,6 @@ def test_backslash_char(self):
         self.assertEqual(eval('f"\\\n"'), '')
         self.assertEqual(eval('f"\\\r"'), '')
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-06-10-47-29.bpo-30465.oe-3GD.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-06-10-47-29.bpo-30465.oe-3GD.rst
new file mode 100644
index 00000000000..0d9138ed82d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2017-09-06-10-47-29.bpo-30465.oe-3GD.rst	
@@ -0,0 +1,3 @@
+Location information (``lineno`` and ``col_offset``) in f-strings is now
+(mostly) correct.  This fixes tools like flake8 from showing warnings on the
+wrong line (typically the first line of the file).
diff --git a/Python/ast.c b/Python/ast.c
index 89206c3464f..7d30e5f864f 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -8,6 +8,7 @@
 #include "node.h"
 #include "ast.h"
 #include "token.h"
+#include "pythonrun.h"
 
 #include <assert.h>
 
@@ -4270,6 +4271,55 @@ decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
     return result;
 }
 
+/* Shift locations for the given node and all its children by adding `lineno`
+   and `col_offset` to existing locations. */
+static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
+{
+    n->n_col_offset = n->n_col_offset + col_offset;
+    for (int i = 0; i < NCH(n); ++i) {
+        if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
+            /* Shifting column offsets unnecessary if there's been newlines. */
+            col_offset = 0;
+        }
+        fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
+    }
+    n->n_lineno = n->n_lineno + lineno;
+}
+
+/* Fix locations for the given node and its children.
+
+   `parent` is the enclosing node.
+   `n` is the node which locations are going to be fixed relative to parent.
+   `expr_str` is the child node's string representation, incuding braces.
+*/
+static void
+fstring_fix_node_location(const node *parent, node *n, char *expr_str)
+{
+    char *substr = NULL;
+    char *start;
+    int lines = LINENO(parent) - 1;
+    int cols = parent->n_col_offset;
+    /* Find the full fstring to fix location information in `n`. */
+    while (parent && parent->n_type != STRING)
+        parent = parent->n_child;
+    if (parent && parent->n_str) {
+        substr = strstr(parent->n_str, expr_str);
+        if (substr) {
+            start = substr;
+            while (start > parent->n_str) {
+                if (start[0] == '\n')
+                    break;
+                start--;
+            }
+            cols += substr - start;
+            /* Fix lineno in mulitline strings. */
+            while ((substr = strchr(substr + 1, '\n')))
+                lines--;
+        }
+    }
+    fstring_shift_node_locations(n, lines, cols);
+}
+
 /* Compile this expression in to an expr_ty.  Add parens around the
    expression, in order to allow leading spaces in the expression. */
 static expr_ty
@@ -4278,6 +4328,7 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
 
 {
     PyCompilerFlags cf;
+    node *mod_n;
     mod_ty mod;
     char *str;
     Py_ssize_t len;
@@ -4287,9 +4338,10 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
     assert(*(expr_start-1) == '{');
     assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
 
-    /* If the substring is all whitespace, it's an error.  We need to catch
-       this here, and not when we call PyParser_ASTFromString, because turning
-       the expression '' in to '()' would go from being invalid to valid. */
+    /* If the substring is all whitespace, it's an error.  We need to catch this
+       here, and not when we call PyParser_SimpleParseStringFlagsFilename,
+       because turning the expression '' in to '()' would go from being invalid
+       to valid. */
     for (s = expr_start; s != expr_end; s++) {
         char c = *s;
         /* The Python parser ignores only the following whitespace
@@ -4315,9 +4367,19 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
     str[len+2] = 0;
 
     cf.cf_flags = PyCF_ONLY_AST;
-    mod = PyParser_ASTFromString(str, "<fstring>",
-                                 Py_eval_input, &cf, c->c_arena);
+    mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
+                                                    Py_eval_input, 0);
+    if (!mod_n) {
+        PyMem_RawFree(str);
+        return NULL;
+    }
+    /* Reuse str to find the correct column offset. */
+    str[0] = '{';
+    str[len+1] = '}';
+    fstring_fix_node_location(n, mod_n, str);
+    mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
     PyMem_RawFree(str);
+    PyNode_Free(mod_n);
     if (!mod)
         return NULL;
     return mod->v.Expression.body;



More information about the Python-checkins mailing list