[Python-checkins] GH-95150: Use position and exception tables for code hashing and equality (GH-95509)

brandtbucher webhook-mailer at python.org
Mon Aug 1 14:03:03 EDT 2022


https://github.com/python/cpython/commit/c7e5bbaee88a71dc6e633e3cd451ed1798436382
commit: c7e5bbaee88a71dc6e633e3cd451ed1798436382
branch: main
author: Brandt Bucher <brandtbucher at gmail.com>
committer: brandtbucher <brandtbucher at gmail.com>
date: 2022-08-01T11:02:56-07:00
summary:

GH-95150: Use position and exception tables for code hashing and equality (GH-95509)

files:
A Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst
M Lib/test/test_code.py
M Lib/test/test_compile.py
M Lib/test/test_syntax.py
M Objects/codeobject.c

diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py
index fd68f6dee7915..2386cf6b59f39 100644
--- a/Lib/test/test_code.py
+++ b/Lib/test/test_code.py
@@ -428,6 +428,27 @@ def func():
             self.assertIsNone(line)
             self.assertEqual(end_line, new_code.co_firstlineno + 1)
 
+    def test_code_equality(self):
+        def f():
+            try:
+                a()
+            except:
+                b()
+            else:
+                c()
+            finally:
+                d()
+        code_a = f.__code__
+        code_b = code_a.replace(co_linetable=b"")
+        code_c = code_a.replace(co_exceptiontable=b"")
+        code_d = code_b.replace(co_exceptiontable=b"")
+        self.assertNotEqual(code_a, code_b)
+        self.assertNotEqual(code_a, code_c)
+        self.assertNotEqual(code_a, code_d)
+        self.assertNotEqual(code_b, code_c)
+        self.assertNotEqual(code_b, code_d)
+        self.assertNotEqual(code_c, code_d)
+
 
 def isinterned(s):
     return s is sys.intern(('_' + s + '_')[1:-1])
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index e6194460b787d..c64e4e55f4445 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -615,7 +615,7 @@ def check_same_constant(const):
             exec(code, ns)
             f1 = ns['f1']
             f2 = ns['f2']
-            self.assertIs(f1.__code__, f2.__code__)
+            self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
             self.check_constant(f1, const)
             self.assertEqual(repr(f1()), repr(const))
 
@@ -628,7 +628,7 @@ def check_same_constant(const):
         # Note: "lambda: ..." emits "LOAD_CONST Ellipsis",
         # whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis"
         f1, f2 = lambda: ..., lambda: ...
-        self.assertIs(f1.__code__, f2.__code__)
+        self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
         self.check_constant(f1, Ellipsis)
         self.assertEqual(repr(f1()), repr(Ellipsis))
 
@@ -643,7 +643,7 @@ def check_same_constant(const):
         # {0} is converted to a constant frozenset({0}) by the peephole
         # optimizer
         f1, f2 = lambda x: x in {0}, lambda x: x in {0}
-        self.assertIs(f1.__code__, f2.__code__)
+        self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
         self.check_constant(f1, frozenset({0}))
         self.assertTrue(f1(0))
 
@@ -1302,6 +1302,27 @@ def f():
             self.assertIsNotNone(end_column)
             self.assertLessEqual((line, column), (end_line, end_column))
 
+    @support.cpython_only
+    def test_column_offset_deduplication(self):
+        # GH-95150: Code with different column offsets shouldn't be merged!
+        for source in [
+            "lambda: a",
+            "(a for b in c)",
+            "[a for b in c]",
+            "{a for b in c}",
+            "{a: b for c in d}",
+        ]:
+            with self.subTest(source):
+                code = compile(f"{source}, {source}", "<test>", "eval")
+                self.assertEqual(len(code.co_consts), 2)
+                self.assertIsInstance(code.co_consts[0], types.CodeType)
+                self.assertIsInstance(code.co_consts[1], types.CodeType)
+                self.assertNotEqual(code.co_consts[0], code.co_consts[1])
+                self.assertNotEqual(
+                    list(code.co_consts[0].co_positions()),
+                    list(code.co_consts[1].co_positions()),
+                )
+
 
 class TestExpressionStackSize(unittest.TestCase):
     # These tests check that the computed stack size for a code object
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index b22a96b20298d..ae1066924b3cf 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -2012,7 +2012,8 @@ def fib(n):
     a, b = 0, 1
 """
         try:
-            self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
+            compile(s1, '<string>', 'exec')
+            compile(s2, '<string>', 'exec')
         except SyntaxError:
             self.fail("Indented statement over multiple lines is valid")
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst
new file mode 100644
index 0000000000000..c3db4714188b3
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst	
@@ -0,0 +1,3 @@
+Update code object hashing and equality to consider all debugging and
+exception handling tables. This fixes an issue where certain non-identical
+code objects could be "deduplicated" during compilation.
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 2f757c4d8a986..7ebbfdbdec18b 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -1695,6 +1695,15 @@ code_richcompare(PyObject *self, PyObject *other, int op)
     eq = PyObject_RichCompareBool(co->co_localsplusnames,
                                   cp->co_localsplusnames, Py_EQ);
     if (eq <= 0) goto unequal;
+    eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ);
+    if (eq <= 0) {
+        goto unequal;
+    }
+    eq = PyObject_RichCompareBool(co->co_exceptiontable,
+                                  cp->co_exceptiontable, Py_EQ);
+    if (eq <= 0) {
+        goto unequal;
+    }
 
     if (op == Py_EQ)
         res = Py_True;
@@ -1727,7 +1736,15 @@ code_hash(PyCodeObject *co)
     if (h2 == -1) return -1;
     h3 = PyObject_Hash(co->co_localsplusnames);
     if (h3 == -1) return -1;
-    h = h0 ^ h1 ^ h2 ^ h3 ^
+    Py_hash_t h4 = PyObject_Hash(co->co_linetable);
+    if (h4 == -1) {
+        return -1;
+    }
+    Py_hash_t h5 = PyObject_Hash(co->co_exceptiontable);
+    if (h5 == -1) {
+        return -1;
+    }
+    h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^
         co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^
         co->co_flags;
     if (h == -1) h = -2;



More information about the Python-checkins mailing list