[Python-checkins] bpo-1635741: Release Unicode interned strings at exit (GH-21269)

Victor Stinner webhook-mailer at python.org
Wed Jul 1 19:20:04 EDT 2020


https://github.com/python/cpython/commit/666ecfb0957a2fa0df5e2bd03804195de74bdfbf
commit: 666ecfb0957a2fa0df5e2bd03804195de74bdfbf
branch: master
author: Victor Stinner <vstinner at python.org>
committer: GitHub <noreply at github.com>
date: 2020-07-02T01:19:57+02:00
summary:

bpo-1635741: Release Unicode interned strings at exit (GH-21269)

* PyUnicode_InternInPlace() now ensures that interned strings are
  ready.
* Add _PyUnicode_ClearInterned().
* Py_Finalize() now releases Unicode interned strings:
  call _PyUnicode_ClearInterned().

files:
M Include/internal/pycore_pylifecycle.h
M Objects/unicodeobject.c
M Python/pylifecycle.c

diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h
index bffc95b27e946..22def3dbc8b66 100644
--- a/Include/internal/pycore_pylifecycle.h
+++ b/Include/internal/pycore_pylifecycle.h
@@ -78,6 +78,7 @@ extern void _PyGC_Fini(PyThreadState *tstate);
 extern void _PyType_Fini(void);
 extern void _Py_HashRandomization_Fini(void);
 extern void _PyUnicode_Fini(PyThreadState *tstate);
+extern void _PyUnicode_ClearInterned(PyThreadState *tstate);
 extern void _PyLong_Fini(PyThreadState *tstate);
 extern void _PyFaulthandler_Fini(void);
 extern void _PyHash_Fini(void);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index fe46de2ae4743..37e7fe5c0eff2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -55,8 +55,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include <windows.h>
 #endif
 
-/* Uncomment to display statistics on interned strings at exit when
-   using Valgrind or Insecure++. */
+/* Uncomment to display statistics on interned strings at exit
+   in _PyUnicode_ClearInterned(). */
 /* #define INTERNED_STATS 1 */
 
 
@@ -15681,6 +15681,11 @@ PyUnicode_InternInPlace(PyObject **p)
     }
 
 #ifdef INTERNED_STRINGS
+    if (PyUnicode_READY(s) == -1) {
+        PyErr_Clear();
+        return;
+    }
+
     if (interned == NULL) {
         interned = PyDict_New();
         if (interned == NULL) {
@@ -15733,23 +15738,29 @@ PyUnicode_InternFromString(const char *cp)
 }
 
 
-#if defined(WITH_VALGRIND) || defined(__INSURE__)
-static void
-unicode_release_interned(void)
+void
+_PyUnicode_ClearInterned(PyThreadState *tstate)
 {
-    if (interned == NULL || !PyDict_Check(interned)) {
+    if (!_Py_IsMainInterpreter(tstate)) {
+        // interned dict is shared by all interpreters
+        return;
+    }
+
+    if (interned == NULL) {
         return;
     }
+    assert(PyDict_CheckExact(interned));
+
     PyObject *keys = PyDict_Keys(interned);
-    if (keys == NULL || !PyList_Check(keys)) {
+    if (keys == NULL) {
         PyErr_Clear();
         return;
     }
+    assert(PyList_CheckExact(keys));
 
-    /* Since unicode_release_interned() is intended to help a leak
-       detector, interned unicode strings are not forcibly deallocated;
-       rather, we give them their stolen references back, and then clear
-       and DECREF the interned dict. */
+    /* Interned unicode strings are not forcibly deallocated; rather, we give
+       them their stolen references back, and then clear and DECREF the
+       interned dict. */
 
     Py_ssize_t n = PyList_GET_SIZE(keys);
 #ifdef INTERNED_STATS
@@ -15759,9 +15770,8 @@ unicode_release_interned(void)
 #endif
     for (Py_ssize_t i = 0; i < n; i++) {
         PyObject *s = PyList_GET_ITEM(keys, i);
-        if (PyUnicode_READY(s) == -1) {
-            Py_UNREACHABLE();
-        }
+        assert(PyUnicode_IS_READY(s));
+
         switch (PyUnicode_CHECK_INTERNED(s)) {
         case SSTATE_INTERNED_IMMORTAL:
             Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
@@ -15788,10 +15798,10 @@ unicode_release_interned(void)
             mortal_size, immortal_size);
 #endif
     Py_DECREF(keys);
+
     PyDict_Clear(interned);
     Py_CLEAR(interned);
 }
-#endif
 
 
 /********************* Unicode Iterator **************************/
@@ -16160,23 +16170,9 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
 void
 _PyUnicode_Fini(PyThreadState *tstate)
 {
-    struct _Py_unicode_state *state = &tstate->interp->unicode;
+    // _PyUnicode_ClearInterned() must be called before
 
-    int is_main_interp = _Py_IsMainInterpreter(tstate);
-    if (is_main_interp) {
-#if defined(WITH_VALGRIND) || defined(__INSURE__)
-        /* Insure++ is a memory analysis tool that aids in discovering
-         * memory leaks and other memory problems.  On Python exit, the
-         * interned string dictionaries are flagged as being in use at exit
-         * (which it is).  Under normal circumstances, this is fine because
-         * the memory will be automatically reclaimed by the system.  Under
-         * memory debugging, it's a huge source of useless noise, so we
-         * trade off slower shutdown for less distraction in the memory
-         * reports.  -baw
-         */
-        unicode_release_interned();
-#endif /* __INSURE__ */
-    }
+    struct _Py_unicode_state *state = &tstate->interp->unicode;
 
     Py_CLEAR(state->empty_string);
 
@@ -16184,7 +16180,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
         Py_CLEAR(state->latin1[i]);
     }
 
-    if (is_main_interp) {
+    if (_Py_IsMainInterpreter(tstate)) {
         unicode_clear_static_strings();
     }
 
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index cfbaf21960b91..3ce2c41ef1ff6 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1263,6 +1263,7 @@ finalize_interp_types(PyThreadState *tstate)
     _PyFrame_Fini(tstate);
     _PyAsyncGen_Fini(tstate);
     _PyContext_Fini(tstate);
+    _PyUnicode_ClearInterned(tstate);
 
     _PyDict_Fini(tstate);
     _PyList_Fini(tstate);



More information about the Python-checkins mailing list