[Python-checkins] bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422)

vstinner webhook-mailer at python.org
Thu Jan 6 02:53:53 EST 2022


https://github.com/python/cpython/commit/35d6540c904ef07b8602ff014e520603f84b5886
commit: 35d6540c904ef07b8602ff014e520603f84b5886
branch: main
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2022-01-06T08:53:44+01:00
summary:

bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422)

This reverts commit ea251806b8dffff11b30d2182af1e589caf88acf.

Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for
the main interpreter.

Keep _PyUnicode_ClearInterned() changes avoiding the creation of a
temporary Python list object.

files:
A Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst
M Include/internal/pycore_unicodeobject.h
M Objects/typeobject.c
M Objects/unicodeobject.c

diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index c50c42011a934..97d11aeb8201c 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -48,21 +48,11 @@ struct _Py_unicode_state {
     PyObject *latin1[256];
     struct _Py_unicode_fs_codec fs_codec;
 
-    /* This dictionary holds all interned unicode strings.  Note that references
-       to strings in this dictionary are *not* counted in the string's ob_refcnt.
-       When the interned string reaches a refcnt of 0 the string deallocation
-       function will delete the reference from this dictionary.
-
-       Another way to look at this is that to say that the actual reference
-       count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
-    */
-    PyObject *interned;
-
     // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
     struct _Py_unicode_ids ids;
 };
 
-extern void _PyUnicode_ClearInterned(PyInterpreterState *);
+extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
 
 
 #ifdef __cplusplus
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst
new file mode 100644
index 0000000000000..3acd2b09390a8
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst	
@@ -0,0 +1,5 @@
+Fix a regression when a type method like ``__init__()`` is modified in a
+subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
+``update_slot()``. Revert the change which made the Unicode dictionary of
+interned strings compatible with subinterpreters: the internal interned
+dictionary is shared again by all interpreters. Patch by Victor Stinner.
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index af35180cdb983..cbf806b074b9f 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -54,6 +54,11 @@ typedef struct PySlot_Offset {
 } PySlot_Offset;
 
 
+/* bpo-40521: Interned strings are shared by all subinterpreters */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+#  define INTERN_NAME_STRINGS
+#endif
+
 /* alphabetical order */
 _Py_IDENTIFIER(__abstractmethods__);
 _Py_IDENTIFIER(__annotations__);
@@ -4028,6 +4033,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
             if (name == NULL)
                 return -1;
         }
+#ifdef INTERN_NAME_STRINGS
         if (!PyUnicode_CHECK_INTERNED(name)) {
             PyUnicode_InternInPlace(&name);
             if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -4037,6 +4043,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
                 return -1;
             }
         }
+#endif
     }
     else {
         /* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -8424,10 +8431,17 @@ _PyTypes_InitSlotDefs(void)
     for (slotdef *p = slotdefs; p->name; p++) {
         /* Slots must be ordered by their offset in the PyHeapTypeObject. */
         assert(!p[1].name || p->offset <= p[1].offset);
+#ifdef INTERN_NAME_STRINGS
         p->name_strobj = PyUnicode_InternFromString(p->name);
         if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
             return _PyStatus_NO_MEMORY();
         }
+#else
+        p->name_strobj = PyUnicode_FromString(p->name);
+        if (!p->name_strobj) {
+            return _PyStatus_NO_MEMORY();
+        }
+#endif
     }
     slotdefs_initialized = 1;
     return _PyStatus_OK();
@@ -8452,16 +8466,24 @@ update_slot(PyTypeObject *type, PyObject *name)
     int offset;
 
     assert(PyUnicode_CheckExact(name));
+#ifdef INTERN_NAME_STRINGS
     assert(PyUnicode_CHECK_INTERNED(name));
+#endif
 
     assert(slotdefs_initialized);
     pp = ptrs;
     for (p = slotdefs; p->name; p++) {
         assert(PyUnicode_CheckExact(p->name_strobj));
         assert(PyUnicode_CheckExact(name));
+#ifdef INTERN_NAME_STRINGS
         if (p->name_strobj == name) {
             *pp++ = p;
         }
+#else
+        if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
+            *pp++ = p;
+        }
+#endif
     }
     *pp = NULL;
     for (pp = ptrs; *pp; pp++) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 14449bce70839..31b8710defbea 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -214,6 +214,22 @@ extern "C" {
 #  define OVERALLOCATE_FACTOR 4
 #endif
 
+/* bpo-40521: Interned strings are shared by all interpreters. */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+#  define INTERNED_STRINGS
+#endif
+
+/* This dictionary holds all interned unicode strings.  Note that references
+   to strings in this dictionary are *not* counted in the string's ob_refcnt.
+   When the interned string reaches a refcnt of 0 the string deallocation
+   function will delete the reference from this dictionary.
+
+   Another way to look at this is that to say that the actual reference
+   count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
+*/
+#ifdef INTERNED_STRINGS
+static PyObject *interned = NULL;
+#endif
 
 /* Forward declaration */
 static inline int
@@ -1950,7 +1966,7 @@ unicode_dealloc(PyObject *unicode)
 
     case SSTATE_INTERNED_MORTAL:
     {
-        struct _Py_unicode_state *state = get_unicode_state();
+#ifdef INTERNED_STRINGS
         /* Revive the dead object temporarily. PyDict_DelItem() removes two
            references (key and value) which were ignored by
            PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
@@ -1958,12 +1974,13 @@ unicode_dealloc(PyObject *unicode)
            PyDict_DelItem(). */
         assert(Py_REFCNT(unicode) == 0);
         Py_SET_REFCNT(unicode, 3);
-        if (PyDict_DelItem(state->interned, unicode) != 0) {
+        if (PyDict_DelItem(interned, unicode) != 0) {
             _PyErr_WriteUnraisableMsg("deletion of interned string failed",
                                       NULL);
         }
         assert(Py_REFCNT(unicode) == 1);
         Py_SET_REFCNT(unicode, 0);
+#endif
         break;
     }
 
@@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
     if (PyUnicode_CHECK_INTERNED(left))
         return 0;
 
+#ifdef INTERNED_STRINGS
     assert(_PyUnicode_HASH(right_uni) != -1);
     Py_hash_t hash = _PyUnicode_HASH(left);
     if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
         return 0;
     }
+#endif
 
     return unicode_compare_eq(left, right_uni);
 }
@@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)
         return;
     }
 
+#ifdef INTERNED_STRINGS
     if (PyUnicode_READY(s) == -1) {
         PyErr_Clear();
         return;
     }
 
-    struct _Py_unicode_state *state = get_unicode_state();
-    if (state->interned == NULL) {
-        state->interned = PyDict_New();
-        if (state->interned == NULL) {
+    if (interned == NULL) {
+        interned = PyDict_New();
+        if (interned == NULL) {
             PyErr_Clear(); /* Don't leave an exception */
             return;
         }
     }
 
-    PyObject *t = PyDict_SetDefault(state->interned, s, s);
+    PyObject *t = PyDict_SetDefault(interned, s, s);
     if (t == NULL) {
         PyErr_Clear();
         return;
@@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)
        this. */
     Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
     _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
+#else
+    // PyDict expects that interned strings have their hash
+    // (PyASCIIObject.hash) already computed.
+    (void)unicode_hash(s);
+#endif
 }
 
-
 void
 PyUnicode_InternImmortal(PyObject **p)
 {
@@ -15658,11 +15681,15 @@ PyUnicode_InternFromString(const char *cp)
 void
 _PyUnicode_ClearInterned(PyInterpreterState *interp)
 {
-    struct _Py_unicode_state *state = &interp->unicode;
-    if (state->interned == NULL) {
+    if (!_Py_IsMainInterpreter(interp)) {
+        // interned dict is shared by all interpreters
         return;
     }
-    assert(PyDict_CheckExact(state->interned));
+
+    if (interned == NULL) {
+        return;
+    }
+    assert(PyDict_CheckExact(interned));
 
     /* Interned unicode strings are not forcibly deallocated; rather, we give
        them their stolen references back, and then clear and DECREF the
@@ -15670,13 +15697,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
 
 #ifdef INTERNED_STATS
     fprintf(stderr, "releasing %zd interned strings\n",
-            PyDict_GET_SIZE(state->interned));
+            PyDict_GET_SIZE(interned));
 
     Py_ssize_t immortal_size = 0, mortal_size = 0;
 #endif
     Py_ssize_t pos = 0;
     PyObject *s, *ignored_value;
-    while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
+    while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
         assert(PyUnicode_IS_READY(s));
 
         switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
             mortal_size, immortal_size);
 #endif
 
-    PyDict_Clear(state->interned);
-    Py_CLEAR(state->interned);
+    PyDict_Clear(interned);
+    Py_CLEAR(interned);
 }
 
 
@@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
 static inline int
 unicode_is_finalizing(void)
 {
-    struct _Py_unicode_state *state = get_unicode_state();
-    return (state->interned == NULL);
+    return (interned == NULL);
 }
 #endif
 
@@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
 {
     struct _Py_unicode_state *state = &interp->unicode;
 
-    // _PyUnicode_ClearInterned() must be called before
-    assert(state->interned == NULL);
+    if (_Py_IsMainInterpreter(interp)) {
+        // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
+        assert(interned == NULL);
+    }
 
     _PyUnicode_FiniEncodings(&state->fs_codec);
 



More information about the Python-checkins mailing list