[Python-checkins] gh-99103: Normalize specialized traceback anchors against the current line (GH-99145)

Sat Nov 12 18:37:44 EST 2022

https://github.com/python/cpython/commit/57be5459593bbd09583317ebdafc4d58ae51dbf4
commit: 57be5459593bbd09583317ebdafc4d58ae51dbf4
branch: main
author: Batuhan Taskaya <isidentical at gmail.com>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2022-11-12T15:37:25-08:00
summary:

gh-99103: Normalize specialized traceback anchors against the current line (GH-99145)



Automerge-Triggered-By: GH:isidentical

files:
A Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-17-58.gh-issue-99103.bFA9BX.rst
M Lib/test/test_traceback.py
M Lib/traceback.py
M Python/traceback.c

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 430daf69d295..c17bbb48b65b 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -559,6 +559,23 @@ def f_with_binary_operator():
         result_lines = self.get_exception(f_with_binary_operator)
         self.assertEqual(result_lines, expected_error.splitlines())
 
+    def test_caret_for_binary_operators_with_unicode(self):
+        def f_with_binary_operator():
+            áóí = 20
+            return 10 + áóí / 0 + 30
+
+        lineno_f = f_with_binary_operator.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
+            '    return 10 + áóí / 0 + 30\n'
+            '                ~~~~^~~\n'
+        )
+        result_lines = self.get_exception(f_with_binary_operator)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
     def test_caret_for_binary_operators_two_char(self):
         def f_with_binary_operator():
             divisor = 20
@@ -593,6 +610,23 @@ def f_with_subscript():
         result_lines = self.get_exception(f_with_subscript)
         self.assertEqual(result_lines, expected_error.splitlines())
 
+    def test_caret_for_subscript_unicode(self):
+        def f_with_subscript():
+            some_dict = {'ó': {'á': {'í': {'theta': 1}}}}
+            return some_dict['ó']['á']['í']['beta']
+
+        lineno_f = f_with_subscript.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f_with_subscript\n'
+            "    return some_dict['ó']['á']['í']['beta']\n"
+            '           ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^\n'
+        )
+        result_lines = self.get_exception(f_with_subscript)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
     def test_traceback_specialization_with_syntax_error(self):
         bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec")
 
@@ -3356,7 +3390,7 @@ def func():
 
         actual = self.get_suggestion(func)
         self.assertNotIn("blech", actual)
-    
+
     def test_name_error_with_instance(self):
         class A:
             def __init__(self):
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 8d518728fa1b..c43c4720ae5a 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -586,12 +586,15 @@ def _extract_caret_anchors_from_line_segment(segment):
     if len(tree.body) != 1:
         return None
 
+    normalize = lambda offset: _byte_offset_to_character_offset(segment, offset)
     statement = tree.body[0]
     match statement:
         case ast.Expr(expr):
             match expr:
                 case ast.BinOp():
-                    operator_str = segment[expr.left.end_col_offset:expr.right.col_offset]
+                    operator_start = normalize(expr.left.end_col_offset)
+                    operator_end = normalize(expr.right.col_offset)
+                    operator_str = segment[operator_start:operator_end]
                     operator_offset = len(operator_str) - len(operator_str.lstrip())
 
                     left_anchor = expr.left.end_col_offset + operator_offset
@@ -601,9 +604,11 @@ def _extract_caret_anchors_from_line_segment(segment):
                         and not operator_str[operator_offset + 1].isspace()
                     ):
                         right_anchor += 1
-                    return _Anchors(left_anchor, right_anchor)
+                    return _Anchors(normalize(left_anchor), normalize(right_anchor))
                 case ast.Subscript():
-                    return _Anchors(expr.value.end_col_offset, expr.slice.end_col_offset + 1)
+                    subscript_start = normalize(expr.value.end_col_offset)
+                    subscript_end = normalize(expr.slice.end_col_offset + 1)
+                    return _Anchors(subscript_start, subscript_end)
 
     return None
 
@@ -1044,7 +1049,7 @@ def _compute_suggestion_error(exc_value, tb, wrong_name):
             self = frame.f_locals['self']
             if hasattr(self, wrong_name):
                 return f"self.{wrong_name}"
-    
+
     # Compute closest match
 
     if len(d) > _MAX_CANDIDATE_ITEMS:
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-17-58.gh-issue-99103.bFA9BX.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-17-58.gh-issue-99103.bFA9BX.rst
new file mode 100644
index 000000000000..f5378eb837d1
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-06-00-17-58.gh-issue-99103.bFA9BX.rst	
@@ -0,0 +1,2 @@
+Fix the error reporting positions of specialized traceback anchors when the
+source line contains Unicode characters.
diff --git a/Python/traceback.c b/Python/traceback.c
index aacdb33d39b8..356e64364832 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -700,8 +700,13 @@ extract_anchors_from_line(PyObject *filename, PyObject *line,
 
 done:
     if (res > 0) {
-        *left_anchor += start_offset;
-        *right_anchor += start_offset;
+        // Normalize the AST offsets to byte offsets and adjust them with the
+        // start of the actual line (instead of the source code segment).
+        assert(segment != NULL);
+        assert(*left_anchor >= 0);
+        assert(*right_anchor >= 0);
+        *left_anchor = _PyPegen_byte_offset_to_character_offset(segment, *left_anchor) + start_offset;
+        *right_anchor = _PyPegen_byte_offset_to_character_offset(segment, *right_anchor) + start_offset;
     }
     Py_XDECREF(segment);
     if (arena) {