[Python-checkins] GH-96458: Statically initialize utf8 representation of static strings (#96481)

gvanrossum webhook-mailer at python.org
Sat Sep 3 02:43:17 EDT 2022


https://github.com/python/cpython/commit/6dab8c95bd8db18e09619d804a938ab3e46042fc
commit: 6dab8c95bd8db18e09619d804a938ab3e46042fc
branch: main
author: Kumar Aditya <59607654+kumaraditya303 at users.noreply.github.com>
committer: gvanrossum <gvanrossum at gmail.com>
date: 2022-09-02T23:43:08-07:00
summary:

GH-96458: Statically initialize utf8 representation of static strings (#96481)

files:
M Include/internal/pycore_runtime_init.h
M Include/internal/pycore_runtime_init_generated.h
M Include/internal/pycore_unicodeobject.h
M Objects/unicodeobject.c
M Tools/scripts/deepfreeze.py
M Tools/scripts/generate_global_objects.py

diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index c14d2594134..621d5cc8642 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -113,10 +113,12 @@ extern "C" {
     ._ ## NAME = _PyASCIIObject_INIT(LITERAL)
 #define INIT_ID(NAME) \
     ._ ## NAME = _PyASCIIObject_INIT(#NAME)
-#define _PyUnicode_LATIN1_INIT(LITERAL) \
+#define _PyUnicode_LATIN1_INIT(LITERAL, UTF8) \
     { \
         ._latin1 = { \
             ._base = _PyUnicode_ASCII_BASE_INIT((LITERAL), 0), \
+            .utf8 = (UTF8), \
+            .utf8_length = sizeof(UTF8) - 1, \
         }, \
         ._data = (LITERAL), \
     }
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 65ab098c1a2..7a760773aa7 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -1287,134 +1287,134 @@ extern "C" {
                 _PyASCIIObject_INIT("\x7f"), \
             }, \
             .latin1 = { \
-                _PyUnicode_LATIN1_INIT("\x80"), \
-                _PyUnicode_LATIN1_INIT("\x81"), \
-                _PyUnicode_LATIN1_INIT("\x82"), \
-                _PyUnicode_LATIN1_INIT("\x83"), \
-                _PyUnicode_LATIN1_INIT("\x84"), \
-                _PyUnicode_LATIN1_INIT("\x85"), \
-                _PyUnicode_LATIN1_INIT("\x86"), \
-                _PyUnicode_LATIN1_INIT("\x87"), \
-                _PyUnicode_LATIN1_INIT("\x88"), \
-                _PyUnicode_LATIN1_INIT("\x89"), \
-                _PyUnicode_LATIN1_INIT("\x8a"), \
-                _PyUnicode_LATIN1_INIT("\x8b"), \
-                _PyUnicode_LATIN1_INIT("\x8c"), \
-                _PyUnicode_LATIN1_INIT("\x8d"), \
-                _PyUnicode_LATIN1_INIT("\x8e"), \
-                _PyUnicode_LATIN1_INIT("\x8f"), \
-                _PyUnicode_LATIN1_INIT("\x90"), \
-                _PyUnicode_LATIN1_INIT("\x91"), \
-                _PyUnicode_LATIN1_INIT("\x92"), \
-                _PyUnicode_LATIN1_INIT("\x93"), \
-                _PyUnicode_LATIN1_INIT("\x94"), \
-                _PyUnicode_LATIN1_INIT("\x95"), \
-                _PyUnicode_LATIN1_INIT("\x96"), \
-                _PyUnicode_LATIN1_INIT("\x97"), \
-                _PyUnicode_LATIN1_INIT("\x98"), \
-                _PyUnicode_LATIN1_INIT("\x99"), \
-                _PyUnicode_LATIN1_INIT("\x9a"), \
-                _PyUnicode_LATIN1_INIT("\x9b"), \
-                _PyUnicode_LATIN1_INIT("\x9c"), \
-                _PyUnicode_LATIN1_INIT("\x9d"), \
-                _PyUnicode_LATIN1_INIT("\x9e"), \
-                _PyUnicode_LATIN1_INIT("\x9f"), \
-                _PyUnicode_LATIN1_INIT("\xa0"), \
-                _PyUnicode_LATIN1_INIT("\xa1"), \
-                _PyUnicode_LATIN1_INIT("\xa2"), \
-                _PyUnicode_LATIN1_INIT("\xa3"), \
-                _PyUnicode_LATIN1_INIT("\xa4"), \
-                _PyUnicode_LATIN1_INIT("\xa5"), \
-                _PyUnicode_LATIN1_INIT("\xa6"), \
-                _PyUnicode_LATIN1_INIT("\xa7"), \
-                _PyUnicode_LATIN1_INIT("\xa8"), \
-                _PyUnicode_LATIN1_INIT("\xa9"), \
-                _PyUnicode_LATIN1_INIT("\xaa"), \
-                _PyUnicode_LATIN1_INIT("\xab"), \
-                _PyUnicode_LATIN1_INIT("\xac"), \
-                _PyUnicode_LATIN1_INIT("\xad"), \
-                _PyUnicode_LATIN1_INIT("\xae"), \
-                _PyUnicode_LATIN1_INIT("\xaf"), \
-                _PyUnicode_LATIN1_INIT("\xb0"), \
-                _PyUnicode_LATIN1_INIT("\xb1"), \
-                _PyUnicode_LATIN1_INIT("\xb2"), \
-                _PyUnicode_LATIN1_INIT("\xb3"), \
-                _PyUnicode_LATIN1_INIT("\xb4"), \
-                _PyUnicode_LATIN1_INIT("\xb5"), \
-                _PyUnicode_LATIN1_INIT("\xb6"), \
-                _PyUnicode_LATIN1_INIT("\xb7"), \
-                _PyUnicode_LATIN1_INIT("\xb8"), \
-                _PyUnicode_LATIN1_INIT("\xb9"), \
-                _PyUnicode_LATIN1_INIT("\xba"), \
-                _PyUnicode_LATIN1_INIT("\xbb"), \
-                _PyUnicode_LATIN1_INIT("\xbc"), \
-                _PyUnicode_LATIN1_INIT("\xbd"), \
-                _PyUnicode_LATIN1_INIT("\xbe"), \
-                _PyUnicode_LATIN1_INIT("\xbf"), \
-                _PyUnicode_LATIN1_INIT("\xc0"), \
-                _PyUnicode_LATIN1_INIT("\xc1"), \
-                _PyUnicode_LATIN1_INIT("\xc2"), \
-                _PyUnicode_LATIN1_INIT("\xc3"), \
-                _PyUnicode_LATIN1_INIT("\xc4"), \
-                _PyUnicode_LATIN1_INIT("\xc5"), \
-                _PyUnicode_LATIN1_INIT("\xc6"), \
-                _PyUnicode_LATIN1_INIT("\xc7"), \
-                _PyUnicode_LATIN1_INIT("\xc8"), \
-                _PyUnicode_LATIN1_INIT("\xc9"), \
-                _PyUnicode_LATIN1_INIT("\xca"), \
-                _PyUnicode_LATIN1_INIT("\xcb"), \
-                _PyUnicode_LATIN1_INIT("\xcc"), \
-                _PyUnicode_LATIN1_INIT("\xcd"), \
-                _PyUnicode_LATIN1_INIT("\xce"), \
-                _PyUnicode_LATIN1_INIT("\xcf"), \
-                _PyUnicode_LATIN1_INIT("\xd0"), \
-                _PyUnicode_LATIN1_INIT("\xd1"), \
-                _PyUnicode_LATIN1_INIT("\xd2"), \
-                _PyUnicode_LATIN1_INIT("\xd3"), \
-                _PyUnicode_LATIN1_INIT("\xd4"), \
-                _PyUnicode_LATIN1_INIT("\xd5"), \
-                _PyUnicode_LATIN1_INIT("\xd6"), \
-                _PyUnicode_LATIN1_INIT("\xd7"), \
-                _PyUnicode_LATIN1_INIT("\xd8"), \
-                _PyUnicode_LATIN1_INIT("\xd9"), \
-                _PyUnicode_LATIN1_INIT("\xda"), \
-                _PyUnicode_LATIN1_INIT("\xdb"), \
-                _PyUnicode_LATIN1_INIT("\xdc"), \
-                _PyUnicode_LATIN1_INIT("\xdd"), \
-                _PyUnicode_LATIN1_INIT("\xde"), \
-                _PyUnicode_LATIN1_INIT("\xdf"), \
-                _PyUnicode_LATIN1_INIT("\xe0"), \
-                _PyUnicode_LATIN1_INIT("\xe1"), \
-                _PyUnicode_LATIN1_INIT("\xe2"), \
-                _PyUnicode_LATIN1_INIT("\xe3"), \
-                _PyUnicode_LATIN1_INIT("\xe4"), \
-                _PyUnicode_LATIN1_INIT("\xe5"), \
-                _PyUnicode_LATIN1_INIT("\xe6"), \
-                _PyUnicode_LATIN1_INIT("\xe7"), \
-                _PyUnicode_LATIN1_INIT("\xe8"), \
-                _PyUnicode_LATIN1_INIT("\xe9"), \
-                _PyUnicode_LATIN1_INIT("\xea"), \
-                _PyUnicode_LATIN1_INIT("\xeb"), \
-                _PyUnicode_LATIN1_INIT("\xec"), \
-                _PyUnicode_LATIN1_INIT("\xed"), \
-                _PyUnicode_LATIN1_INIT("\xee"), \
-                _PyUnicode_LATIN1_INIT("\xef"), \
-                _PyUnicode_LATIN1_INIT("\xf0"), \
-                _PyUnicode_LATIN1_INIT("\xf1"), \
-                _PyUnicode_LATIN1_INIT("\xf2"), \
-                _PyUnicode_LATIN1_INIT("\xf3"), \
-                _PyUnicode_LATIN1_INIT("\xf4"), \
-                _PyUnicode_LATIN1_INIT("\xf5"), \
-                _PyUnicode_LATIN1_INIT("\xf6"), \
-                _PyUnicode_LATIN1_INIT("\xf7"), \
-                _PyUnicode_LATIN1_INIT("\xf8"), \
-                _PyUnicode_LATIN1_INIT("\xf9"), \
-                _PyUnicode_LATIN1_INIT("\xfa"), \
-                _PyUnicode_LATIN1_INIT("\xfb"), \
-                _PyUnicode_LATIN1_INIT("\xfc"), \
-                _PyUnicode_LATIN1_INIT("\xfd"), \
-                _PyUnicode_LATIN1_INIT("\xfe"), \
-                _PyUnicode_LATIN1_INIT("\xff"), \
+                _PyUnicode_LATIN1_INIT("\x80", "\xc2\x80"), \
+                _PyUnicode_LATIN1_INIT("\x81", "\xc2\x81"), \
+                _PyUnicode_LATIN1_INIT("\x82", "\xc2\x82"), \
+                _PyUnicode_LATIN1_INIT("\x83", "\xc2\x83"), \
+                _PyUnicode_LATIN1_INIT("\x84", "\xc2\x84"), \
+                _PyUnicode_LATIN1_INIT("\x85", "\xc2\x85"), \
+                _PyUnicode_LATIN1_INIT("\x86", "\xc2\x86"), \
+                _PyUnicode_LATIN1_INIT("\x87", "\xc2\x87"), \
+                _PyUnicode_LATIN1_INIT("\x88", "\xc2\x88"), \
+                _PyUnicode_LATIN1_INIT("\x89", "\xc2\x89"), \
+                _PyUnicode_LATIN1_INIT("\x8a", "\xc2\x8a"), \
+                _PyUnicode_LATIN1_INIT("\x8b", "\xc2\x8b"), \
+                _PyUnicode_LATIN1_INIT("\x8c", "\xc2\x8c"), \
+                _PyUnicode_LATIN1_INIT("\x8d", "\xc2\x8d"), \
+                _PyUnicode_LATIN1_INIT("\x8e", "\xc2\x8e"), \
+                _PyUnicode_LATIN1_INIT("\x8f", "\xc2\x8f"), \
+                _PyUnicode_LATIN1_INIT("\x90", "\xc2\x90"), \
+                _PyUnicode_LATIN1_INIT("\x91", "\xc2\x91"), \
+                _PyUnicode_LATIN1_INIT("\x92", "\xc2\x92"), \
+                _PyUnicode_LATIN1_INIT("\x93", "\xc2\x93"), \
+                _PyUnicode_LATIN1_INIT("\x94", "\xc2\x94"), \
+                _PyUnicode_LATIN1_INIT("\x95", "\xc2\x95"), \
+                _PyUnicode_LATIN1_INIT("\x96", "\xc2\x96"), \
+                _PyUnicode_LATIN1_INIT("\x97", "\xc2\x97"), \
+                _PyUnicode_LATIN1_INIT("\x98", "\xc2\x98"), \
+                _PyUnicode_LATIN1_INIT("\x99", "\xc2\x99"), \
+                _PyUnicode_LATIN1_INIT("\x9a", "\xc2\x9a"), \
+                _PyUnicode_LATIN1_INIT("\x9b", "\xc2\x9b"), \
+                _PyUnicode_LATIN1_INIT("\x9c", "\xc2\x9c"), \
+                _PyUnicode_LATIN1_INIT("\x9d", "\xc2\x9d"), \
+                _PyUnicode_LATIN1_INIT("\x9e", "\xc2\x9e"), \
+                _PyUnicode_LATIN1_INIT("\x9f", "\xc2\x9f"), \
+                _PyUnicode_LATIN1_INIT("\xa0", "\xc2\xa0"), \
+                _PyUnicode_LATIN1_INIT("\xa1", "\xc2\xa1"), \
+                _PyUnicode_LATIN1_INIT("\xa2", "\xc2\xa2"), \
+                _PyUnicode_LATIN1_INIT("\xa3", "\xc2\xa3"), \
+                _PyUnicode_LATIN1_INIT("\xa4", "\xc2\xa4"), \
+                _PyUnicode_LATIN1_INIT("\xa5", "\xc2\xa5"), \
+                _PyUnicode_LATIN1_INIT("\xa6", "\xc2\xa6"), \
+                _PyUnicode_LATIN1_INIT("\xa7", "\xc2\xa7"), \
+                _PyUnicode_LATIN1_INIT("\xa8", "\xc2\xa8"), \
+                _PyUnicode_LATIN1_INIT("\xa9", "\xc2\xa9"), \
+                _PyUnicode_LATIN1_INIT("\xaa", "\xc2\xaa"), \
+                _PyUnicode_LATIN1_INIT("\xab", "\xc2\xab"), \
+                _PyUnicode_LATIN1_INIT("\xac", "\xc2\xac"), \
+                _PyUnicode_LATIN1_INIT("\xad", "\xc2\xad"), \
+                _PyUnicode_LATIN1_INIT("\xae", "\xc2\xae"), \
+                _PyUnicode_LATIN1_INIT("\xaf", "\xc2\xaf"), \
+                _PyUnicode_LATIN1_INIT("\xb0", "\xc2\xb0"), \
+                _PyUnicode_LATIN1_INIT("\xb1", "\xc2\xb1"), \
+                _PyUnicode_LATIN1_INIT("\xb2", "\xc2\xb2"), \
+                _PyUnicode_LATIN1_INIT("\xb3", "\xc2\xb3"), \
+                _PyUnicode_LATIN1_INIT("\xb4", "\xc2\xb4"), \
+                _PyUnicode_LATIN1_INIT("\xb5", "\xc2\xb5"), \
+                _PyUnicode_LATIN1_INIT("\xb6", "\xc2\xb6"), \
+                _PyUnicode_LATIN1_INIT("\xb7", "\xc2\xb7"), \
+                _PyUnicode_LATIN1_INIT("\xb8", "\xc2\xb8"), \
+                _PyUnicode_LATIN1_INIT("\xb9", "\xc2\xb9"), \
+                _PyUnicode_LATIN1_INIT("\xba", "\xc2\xba"), \
+                _PyUnicode_LATIN1_INIT("\xbb", "\xc2\xbb"), \
+                _PyUnicode_LATIN1_INIT("\xbc", "\xc2\xbc"), \
+                _PyUnicode_LATIN1_INIT("\xbd", "\xc2\xbd"), \
+                _PyUnicode_LATIN1_INIT("\xbe", "\xc2\xbe"), \
+                _PyUnicode_LATIN1_INIT("\xbf", "\xc2\xbf"), \
+                _PyUnicode_LATIN1_INIT("\xc0", "\xc3\x80"), \
+                _PyUnicode_LATIN1_INIT("\xc1", "\xc3\x81"), \
+                _PyUnicode_LATIN1_INIT("\xc2", "\xc3\x82"), \
+                _PyUnicode_LATIN1_INIT("\xc3", "\xc3\x83"), \
+                _PyUnicode_LATIN1_INIT("\xc4", "\xc3\x84"), \
+                _PyUnicode_LATIN1_INIT("\xc5", "\xc3\x85"), \
+                _PyUnicode_LATIN1_INIT("\xc6", "\xc3\x86"), \
+                _PyUnicode_LATIN1_INIT("\xc7", "\xc3\x87"), \
+                _PyUnicode_LATIN1_INIT("\xc8", "\xc3\x88"), \
+                _PyUnicode_LATIN1_INIT("\xc9", "\xc3\x89"), \
+                _PyUnicode_LATIN1_INIT("\xca", "\xc3\x8a"), \
+                _PyUnicode_LATIN1_INIT("\xcb", "\xc3\x8b"), \
+                _PyUnicode_LATIN1_INIT("\xcc", "\xc3\x8c"), \
+                _PyUnicode_LATIN1_INIT("\xcd", "\xc3\x8d"), \
+                _PyUnicode_LATIN1_INIT("\xce", "\xc3\x8e"), \
+                _PyUnicode_LATIN1_INIT("\xcf", "\xc3\x8f"), \
+                _PyUnicode_LATIN1_INIT("\xd0", "\xc3\x90"), \
+                _PyUnicode_LATIN1_INIT("\xd1", "\xc3\x91"), \
+                _PyUnicode_LATIN1_INIT("\xd2", "\xc3\x92"), \
+                _PyUnicode_LATIN1_INIT("\xd3", "\xc3\x93"), \
+                _PyUnicode_LATIN1_INIT("\xd4", "\xc3\x94"), \
+                _PyUnicode_LATIN1_INIT("\xd5", "\xc3\x95"), \
+                _PyUnicode_LATIN1_INIT("\xd6", "\xc3\x96"), \
+                _PyUnicode_LATIN1_INIT("\xd7", "\xc3\x97"), \
+                _PyUnicode_LATIN1_INIT("\xd8", "\xc3\x98"), \
+                _PyUnicode_LATIN1_INIT("\xd9", "\xc3\x99"), \
+                _PyUnicode_LATIN1_INIT("\xda", "\xc3\x9a"), \
+                _PyUnicode_LATIN1_INIT("\xdb", "\xc3\x9b"), \
+                _PyUnicode_LATIN1_INIT("\xdc", "\xc3\x9c"), \
+                _PyUnicode_LATIN1_INIT("\xdd", "\xc3\x9d"), \
+                _PyUnicode_LATIN1_INIT("\xde", "\xc3\x9e"), \
+                _PyUnicode_LATIN1_INIT("\xdf", "\xc3\x9f"), \
+                _PyUnicode_LATIN1_INIT("\xe0", "\xc3\xa0"), \
+                _PyUnicode_LATIN1_INIT("\xe1", "\xc3\xa1"), \
+                _PyUnicode_LATIN1_INIT("\xe2", "\xc3\xa2"), \
+                _PyUnicode_LATIN1_INIT("\xe3", "\xc3\xa3"), \
+                _PyUnicode_LATIN1_INIT("\xe4", "\xc3\xa4"), \
+                _PyUnicode_LATIN1_INIT("\xe5", "\xc3\xa5"), \
+                _PyUnicode_LATIN1_INIT("\xe6", "\xc3\xa6"), \
+                _PyUnicode_LATIN1_INIT("\xe7", "\xc3\xa7"), \
+                _PyUnicode_LATIN1_INIT("\xe8", "\xc3\xa8"), \
+                _PyUnicode_LATIN1_INIT("\xe9", "\xc3\xa9"), \
+                _PyUnicode_LATIN1_INIT("\xea", "\xc3\xaa"), \
+                _PyUnicode_LATIN1_INIT("\xeb", "\xc3\xab"), \
+                _PyUnicode_LATIN1_INIT("\xec", "\xc3\xac"), \
+                _PyUnicode_LATIN1_INIT("\xed", "\xc3\xad"), \
+                _PyUnicode_LATIN1_INIT("\xee", "\xc3\xae"), \
+                _PyUnicode_LATIN1_INIT("\xef", "\xc3\xaf"), \
+                _PyUnicode_LATIN1_INIT("\xf0", "\xc3\xb0"), \
+                _PyUnicode_LATIN1_INIT("\xf1", "\xc3\xb1"), \
+                _PyUnicode_LATIN1_INIT("\xf2", "\xc3\xb2"), \
+                _PyUnicode_LATIN1_INIT("\xf3", "\xc3\xb3"), \
+                _PyUnicode_LATIN1_INIT("\xf4", "\xc3\xb4"), \
+                _PyUnicode_LATIN1_INIT("\xf5", "\xc3\xb5"), \
+                _PyUnicode_LATIN1_INIT("\xf6", "\xc3\xb6"), \
+                _PyUnicode_LATIN1_INIT("\xf7", "\xc3\xb7"), \
+                _PyUnicode_LATIN1_INIT("\xf8", "\xc3\xb8"), \
+                _PyUnicode_LATIN1_INIT("\xf9", "\xc3\xb9"), \
+                _PyUnicode_LATIN1_INIT("\xfa", "\xc3\xba"), \
+                _PyUnicode_LATIN1_INIT("\xfb", "\xc3\xbb"), \
+                _PyUnicode_LATIN1_INIT("\xfc", "\xc3\xbc"), \
+                _PyUnicode_LATIN1_INIT("\xfd", "\xc3\xbd"), \
+                _PyUnicode_LATIN1_INIT("\xfe", "\xc3\xbe"), \
+                _PyUnicode_LATIN1_INIT("\xff", "\xc3\xbf"), \
             }, \
         }, \
         \
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index 4bee2419fbd..63bf04b3e1b 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -19,7 +19,6 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
 extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
 extern void _PyUnicode_Fini(PyInterpreterState *);
 extern void _PyUnicode_FiniTypes(PyInterpreterState *);
-extern void _PyStaticUnicode_Dealloc(PyObject *);
 
 extern PyTypeObject _PyUnicodeASCIIIter_Type;
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 13f2c5b49bd..bd169ed7142 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -15184,23 +15184,6 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
 }
 
 
-static void unicode_static_dealloc(PyObject *op)
-{
-    PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
-
-    assert(ascii->state.compact);
-
-    if (!ascii->state.ascii) {
-        PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
-        if (compact->utf8) {
-            PyObject_Free(compact->utf8);
-            compact->utf8 = NULL;
-            compact->utf8_length = 0;
-        }
-    }
-}
-
-
 void
 _PyUnicode_Fini(PyInterpreterState *interp)
 {
@@ -15217,24 +15200,8 @@ _PyUnicode_Fini(PyInterpreterState *interp)
     _PyUnicode_FiniEncodings(&state->fs_codec);
 
     unicode_clear_identifiers(state);
-
-    // Clear the single character singletons
-    for (int i = 0; i < 128; i++) {
-        unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]);
-    }
-    for (int i = 0; i < 128; i++) {
-        unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]);
-    }
 }
 
-
-void
-_PyStaticUnicode_Dealloc(PyObject *op)
-{
-    unicode_static_dealloc(op);
-}
-
-
 /* A _string module, to export formatter_parser and formatter_field_name_split
    to the string.Formatter class implemented in Python. */
 
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py
index 62eeafab084..d9c6030fc17 100644
--- a/Tools/scripts/deepfreeze.py
+++ b/Tools/scripts/deepfreeze.py
@@ -195,7 +195,6 @@ def generate_unicode(self, name: str, s: str) -> str:
                 else:
                     self.write("PyCompactUnicodeObject _compact;")
                 self.write(f"{datatype} _data[{len(s)+1}];")
-        self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
         with self.block(f"{name} =", ";"):
             if ascii:
                 with self.block("._ascii =", ","):
@@ -218,6 +217,9 @@ def generate_unicode(self, name: str, s: str) -> str:
                             self.write(f".kind = {kind},")
                             self.write(".compact = 1,")
                             self.write(".ascii = 0,")
+                    utf8 = s.encode('utf-8')
+                    self.write(f'.utf8 = {make_string_literal(utf8)},')
+                    self.write(f'.utf8_length = {len(utf8)},')
                 with self.block(f"._data =", ","):
                     for i in range(0, len(s), 16):
                         data = s[i:i+16]
diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py
index f3a11f5f7e5..a50f3ba85d7 100644
--- a/Tools/scripts/generate_global_objects.py
+++ b/Tools/scripts/generate_global_objects.py
@@ -287,7 +287,11 @@ def generate_runtime_init(identifiers, strings):
                             immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')
                     with printer.block('.latin1 =', ','):
                         for i in range(128, 256):
-                            printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}"),')
+                            utf8 = ['"']
+                            for c in chr(i).encode('utf-8'):
+                                utf8.append(f"\\x{c:02x}")
+                            utf8.append('"')
+                            printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')
                             immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')
                 printer.write('')
                 with printer.block('.tuple_empty =', ','):



More information about the Python-checkins mailing list