[Python-checkins] bpo-40521: Make bytes singletons per interpreter (GH-21074)

Victor Stinner webhook-mailer at python.org
Tue Jun 23 09:54:45 EDT 2020


https://github.com/python/cpython/commit/c41eed1a874e2f22bde45c3c89418414b7a37f46
commit: c41eed1a874e2f22bde45c3c89418414b7a37f46
branch: master
author: Victor Stinner <vstinner at python.org>
committer: GitHub <noreply at github.com>
date: 2020-06-23T15:54:35+02:00
summary:

bpo-40521: Make bytes singletons per interpreter (GH-21074)

Each interpreter now has its own empty bytes string and single byte
character singletons.

Replace STRINGLIB_EMPTY macro with STRINGLIB_GET_EMPTY() macro.

files:
M Include/internal/pycore_interp.h
M Include/internal/pycore_pylifecycle.h
M Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst
M Objects/bytesobject.c
M Objects/stringlib/README.txt
M Objects/stringlib/asciilib.h
M Objects/stringlib/partition.h
M Objects/stringlib/stringdefs.h
M Objects/stringlib/ucs1lib.h
M Objects/stringlib/ucs2lib.h
M Objects/stringlib/ucs4lib.h
M Objects/stringlib/unicodedefs.h
M Python/pylifecycle.c

diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index 697d97a39e01f..64e891f9f6eb4 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -65,6 +65,11 @@ struct _Py_unicode_fs_codec {
     _Py_error_handler error_handler;
 };
 
+struct _Py_bytes_state {
+    PyBytesObject *characters[256];
+    PyBytesObject *empty_string;
+};
+
 struct _Py_unicode_state {
     struct _Py_unicode_fs_codec fs_codec;
 };
@@ -233,6 +238,7 @@ struct _is {
     */
     PyLongObject* small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
 #endif
+    struct _Py_bytes_state bytes;
     struct _Py_unicode_state unicode;
     struct _Py_float_state float_state;
     /* Using a cache is very effective since typically only a single slice is
diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h
index 83ce1d2e7468c..9a3063aa2775f 100644
--- a/Include/internal/pycore_pylifecycle.h
+++ b/Include/internal/pycore_pylifecycle.h
@@ -63,7 +63,7 @@ extern void _PyDict_Fini(PyThreadState *tstate);
 extern void _PyTuple_Fini(PyThreadState *tstate);
 extern void _PyList_Fini(PyThreadState *tstate);
 extern void _PySet_Fini(PyThreadState *tstate);
-extern void _PyBytes_Fini(void);
+extern void _PyBytes_Fini(PyThreadState *tstate);
 extern void _PyFloat_Fini(PyThreadState *tstate);
 extern void _PySlice_Fini(PyThreadState *tstate);
 extern void _PyAsyncGen_Fini(PyThreadState *tstate);
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst b/Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst
index 24fd437062a51..95fab369748f0 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst	
@@ -1,5 +1,9 @@
-The tuple free lists, the empty tuple singleton, the list free list, the empty
-frozenset singleton, the float free list, the slice cache, the dict free lists,
-the frame free list, the asynchronous generator free lists, and the context
-free list are no longer shared by all interpreters: each interpreter now its
-has own free lists and caches.
+Each interpreter now its has own free lists, singletons and caches:
+
+* Free lists: float, tuple, list, dict, frame, context,
+  asynchronous generator.
+* Singletons: empty tuple, empty frozenset, empty bytes string,
+  single byte character.
+* Slice cache.
+
+They are no longer shared by all interpreters.
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index d39721428634f..ce006e15dce9e 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -18,9 +18,6 @@ class bytes "PyBytesObject *" "&PyBytes_Type"
 
 #include "clinic/bytesobject.c.h"
 
-static PyBytesObject *characters[UCHAR_MAX + 1];
-static PyBytesObject *nullstring;
-
 _Py_IDENTIFIER(__bytes__);
 
 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
@@ -35,6 +32,15 @@ _Py_IDENTIFIER(__bytes__);
 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
                                                    char *str);
 
+
+static struct _Py_bytes_state*
+get_bytes_state(void)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    return &interp->bytes;
+}
+
+
 /*
    For PyBytes_FromString(), the parameter `str' points to a null-terminated
    string containing exactly `size' bytes.
@@ -63,9 +69,13 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
     PyBytesObject *op;
     assert(size >= 0);
 
-    if (size == 0 && (op = nullstring) != NULL) {
-        Py_INCREF(op);
-        return (PyObject *)op;
+    if (size == 0) {
+        struct _Py_bytes_state *state = get_bytes_state();
+        op = state->empty_string;
+        if (op != NULL) {
+            Py_INCREF(op);
+            return (PyObject *)op;
+        }
     }
 
     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
@@ -88,8 +98,9 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
         op->ob_sval[size] = '\0';
     /* empty byte string singleton */
     if (size == 0) {
-        nullstring = op;
+        struct _Py_bytes_state *state = get_bytes_state();
         Py_INCREF(op);
+        state->empty_string = op;
     }
     return (PyObject *) op;
 }
@@ -103,11 +114,13 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
             "Negative size passed to PyBytes_FromStringAndSize");
         return NULL;
     }
-    if (size == 1 && str != NULL &&
-        (op = characters[*str & UCHAR_MAX]) != NULL)
-    {
-        Py_INCREF(op);
-        return (PyObject *)op;
+    if (size == 1 && str != NULL) {
+        struct _Py_bytes_state *state = get_bytes_state();
+        op = state->characters[*str & UCHAR_MAX];
+        if (op != NULL) {
+            Py_INCREF(op);
+            return (PyObject *)op;
+        }
     }
 
     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
@@ -119,8 +132,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
     memcpy(op->ob_sval, str, size);
     /* share short strings */
     if (size == 1) {
-        characters[*str & UCHAR_MAX] = op;
+        struct _Py_bytes_state *state = get_bytes_state();
         Py_INCREF(op);
+        state->characters[*str & UCHAR_MAX] = op;
     }
     return (PyObject *) op;
 }
@@ -138,13 +152,21 @@ PyBytes_FromString(const char *str)
             "byte string is too long");
         return NULL;
     }
-    if (size == 0 && (op = nullstring) != NULL) {
-        Py_INCREF(op);
-        return (PyObject *)op;
+
+    struct _Py_bytes_state *state = get_bytes_state();
+    if (size == 0) {
+        op = state->empty_string;
+        if (op != NULL) {
+            Py_INCREF(op);
+            return (PyObject *)op;
+        }
     }
-    if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
-        Py_INCREF(op);
-        return (PyObject *)op;
+    else if (size == 1) {
+        op = state->characters[*str & UCHAR_MAX];
+        if (op != NULL) {
+            Py_INCREF(op);
+            return (PyObject *)op;
+        }
     }
 
     /* Inline PyObject_NewVar */
@@ -157,11 +179,12 @@ PyBytes_FromString(const char *str)
     memcpy(op->ob_sval, str, size+1);
     /* share short strings */
     if (size == 0) {
-        nullstring = op;
         Py_INCREF(op);
-    } else if (size == 1) {
-        characters[*str & UCHAR_MAX] = op;
+        state->empty_string = op;
+    }
+    else if (size == 1) {
         Py_INCREF(op);
+        state->characters[*str & UCHAR_MAX] = op;
     }
     return (PyObject *) op;
 }
@@ -1249,6 +1272,8 @@ PyBytes_AsStringAndSize(PyObject *obj,
 /* -------------------------------------------------------------------- */
 /* Methods */
 
+#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string
+
 #include "stringlib/stringdefs.h"
 
 #include "stringlib/fastsearch.h"
@@ -1261,6 +1286,8 @@ PyBytes_AsStringAndSize(PyObject *obj,
 
 #include "stringlib/transmogrify.h"
 
+#undef STRINGLIB_GET_EMPTY
+
 PyObject *
 PyBytes_Repr(PyObject *obj, int smartquotes)
 {
@@ -3058,12 +3085,13 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
 }
 
 void
-_PyBytes_Fini(void)
+_PyBytes_Fini(PyThreadState *tstate)
 {
-    int i;
-    for (i = 0; i < UCHAR_MAX + 1; i++)
-        Py_CLEAR(characters[i]);
-    Py_CLEAR(nullstring);
+    struct _Py_bytes_state* state = &tstate->interp->bytes;
+    for (int i = 0; i < UCHAR_MAX + 1; i++) {
+        Py_CLEAR(state->characters[i]);
+    }
+    Py_CLEAR(state->empty_string);
 }
 
 /*********************** Bytes Iterator ****************************/
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
index 8ff6ad8c4fa0f..e1e329290acbb 100644
--- a/Objects/stringlib/README.txt
+++ b/Objects/stringlib/README.txt
@@ -11,10 +11,10 @@ STRINGLIB_CHAR
 
     the type used to hold a character (char or Py_UNICODE)
 
-STRINGLIB_EMPTY
+STRINGLIB_GET_EMPTY()
 
-    a PyObject representing the empty string, only to be used if
-    STRINGLIB_MUTABLE is 0
+    returns a PyObject representing the empty string, only to be used if
+    STRINGLIB_MUTABLE is 0. It must not be NULL.
 
 Py_ssize_t STRINGLIB_LEN(PyObject*)
 
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
index e69a2c076e3a3..8599d38a5a7f5 100644
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@@ -11,7 +11,7 @@
 #define STRINGLIB_CHAR           Py_UCS1
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
-#define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_GET_EMPTY()    unicode_empty
 #define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
 #define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
 #define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h
index ed32a6f2b382e..3731df56987fd 100644
--- a/Objects/stringlib/partition.h
+++ b/Objects/stringlib/partition.h
@@ -37,10 +37,12 @@ STRINGLIB(partition)(PyObject* str_obj,
 #else
         Py_INCREF(str_obj);
         PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
-        Py_INCREF(STRINGLIB_EMPTY);
-        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
-        Py_INCREF(STRINGLIB_EMPTY);
-        PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
+        PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY();
+        assert(empty != NULL);
+        Py_INCREF(empty);
+        PyTuple_SET_ITEM(out, 1, empty);
+        Py_INCREF(empty);
+        PyTuple_SET_ITEM(out, 2, empty);
 #endif
         return out;
     }
@@ -90,10 +92,12 @@ STRINGLIB(rpartition)(PyObject* str_obj,
             return NULL;
         }
 #else
-        Py_INCREF(STRINGLIB_EMPTY);
-        PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
-        Py_INCREF(STRINGLIB_EMPTY);
-        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
+        PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY();
+        assert(empty != NULL);
+        Py_INCREF(empty);
+        PyTuple_SET_ITEM(out, 0, empty);
+        Py_INCREF(empty);
+        PyTuple_SET_ITEM(out, 1, empty);
         Py_INCREF(str_obj);
         PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
 #endif
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index ce27f3e4081f9..c12ecc59e5c6d 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -1,6 +1,10 @@
 #ifndef STRINGLIB_STRINGDEFS_H
 #define STRINGLIB_STRINGDEFS_H
 
+#ifndef STRINGLIB_GET_EMPTY
+#  error "STRINGLIB_GET_EMPTY macro must be defined"
+#endif
+
 /* this is sort of a hack.  there's at least one place (formatting
    floats) where some stringlib code takes a different path if it's
    compiled as unicode. */
@@ -13,7 +17,6 @@
 #define STRINGLIB_CHAR           char
 #define STRINGLIB_TYPE_NAME      "string"
 #define STRINGLIB_PARSE_CODE     "S"
-#define STRINGLIB_EMPTY          nullstring
 #define STRINGLIB_ISSPACE        Py_ISSPACE
 #define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
 #define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9'))
diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h
index bc4b104f112cc..bdf30356b8457 100644
--- a/Objects/stringlib/ucs1lib.h
+++ b/Objects/stringlib/ucs1lib.h
@@ -11,7 +11,7 @@
 #define STRINGLIB_CHAR           Py_UCS1
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
-#define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_GET_EMPTY()    unicode_empty
 #define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
 #define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
 #define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h
index 86a1dff1b5637..9d6888801867d 100644
--- a/Objects/stringlib/ucs2lib.h
+++ b/Objects/stringlib/ucs2lib.h
@@ -11,7 +11,7 @@
 #define STRINGLIB_CHAR           Py_UCS2
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
-#define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_GET_EMPTY()    unicode_empty
 #define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
 #define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
 #define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h
index 3c32a93c96a1c..c7dfa527433e3 100644
--- a/Objects/stringlib/ucs4lib.h
+++ b/Objects/stringlib/ucs4lib.h
@@ -11,7 +11,7 @@
 #define STRINGLIB_CHAR           Py_UCS4
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
-#define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_GET_EMPTY()    unicode_empty
 #define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
 #define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
 #define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index 3db5629e11f12..e4d4163afc2f9 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -13,7 +13,7 @@
 #define STRINGLIB_CHAR           Py_UNICODE
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
-#define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_GET_EMPTY()    unicode_empty
 #define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
 #define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
 #define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index aaea0454d0084..4bb32abc4be1f 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1262,9 +1262,7 @@ finalize_interp_types(PyThreadState *tstate, int is_main_interp)
 
     _PySlice_Fini(tstate);
 
-    if (is_main_interp) {
-        _PyBytes_Fini();
-    }
+    _PyBytes_Fini(tstate);
     _PyUnicode_Fini(tstate);
     _PyFloat_Fini(tstate);
     _PyLong_Fini(tstate);



More information about the Python-checkins mailing list