[Python-checkins] bpo-42157: unicodedata avoids references to UCD_Type (GH-22990)

vstinner webhook-mailer at python.org
Mon Oct 26 14:19:50 EDT 2020


https://github.com/python/cpython/commit/920cb647ba23feab7987d0dac1bd63bfc2ffc4c0
commit: 920cb647ba23feab7987d0dac1bd63bfc2ffc4c0
branch: master
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2020-10-26T19:19:36+01:00
summary:

bpo-42157: unicodedata avoids references to UCD_Type (GH-22990)

* UCD_Check() uses PyModule_Check()
* Simplify the internal _PyUnicode_Name_CAPI structure:

  * Remove size and state members
  * Remove state and self parameters of getcode() and getname()
    functions

* Remove global_module_state

files:
A Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-42157.e3BcPM.rst
D Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst
M Doc/whatsnew/3.10.rst
M Include/internal/pycore_ucnhash.h
M Modules/unicodedata.c
M Objects/unicodeobject.c
M Python/codecs.c

diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 581d3a57e8457..2ef2b5d19e585 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -408,10 +408,8 @@ Porting to Python 3.10
   (Contributed by Inada Naoki in :issue:`36346`.)
 
 * The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
-  ``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover,
-  the structure gets a new ``state`` member which must be passed to the
-  ``getcode()`` and ``getname()`` functions.
-  (Contributed by Victor Stinner in :issue:`1635741`.)
+  ``unicodedata.ucnhash_CAPI`` moves to the internal C API.
+  (Contributed by Victor Stinner in :issue:`42157`.)
 
 Deprecated
 ----------
diff --git a/Include/internal/pycore_ucnhash.h b/Include/internal/pycore_ucnhash.h
index 380b9415d4280..5e7c035f81d2a 100644
--- a/Include/internal/pycore_ucnhash.h
+++ b/Include/internal/pycore_ucnhash.h
@@ -15,25 +15,15 @@ extern "C" {
 
 typedef struct {
 
-    /* Size of this struct */
-    int size;
-
-    // state which must be passed as the first parameter to getname()
-    // and getcode()
-    void *state;
-
-    /* Get name for a given character code.  Returns non-zero if
-       success, zero if not.  Does not set Python exceptions.
-       If self is NULL, data come from the default version of the database.
-       If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
-    int (*getname)(void *state, PyObject *self, Py_UCS4 code,
-                   char* buffer, int buflen,
+    /* Get name for a given character code.
+       Returns non-zero if success, zero if not.
+       Does not set Python exceptions. */
+    int (*getname)(Py_UCS4 code, char* buffer, int buflen,
                    int with_alias_and_seq);
 
-    /* Get character code for a given name.  Same error handling
-       as for getname. */
-    int (*getcode)(void *state, PyObject *self,
-                   const char* name, int namelen, Py_UCS4* code,
+    /* Get character code for a given name.
+       Same error handling as for getname(). */
+    int (*getcode)(const char* name, int namelen, Py_UCS4* code,
                    int with_named_seq);
 
 } _PyUnicode_Name_CAPI;
diff --git a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst
deleted file mode 100644
index 5272ad577265a..0000000000000
--- a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-1635741.e3BcPM.rst	
+++ /dev/null
@@ -1,4 +0,0 @@
-The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
-``unicodedata.ucnhash_CAPI`` moves to the internal C API. Moreover, the
-structure gets a new ``state`` member which must be passed to the
-``getcode()`` and ``getname()`` functions. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-42157.e3BcPM.rst b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-42157.e3BcPM.rst
new file mode 100644
index 0000000000000..1f05186d9e0ef
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2020-10-16-10-47-17.bpo-42157.e3BcPM.rst	
@@ -0,0 +1,3 @@
+The private ``_PyUnicode_Name_CAPI`` structure of the PyCapsule API
+``unicodedata.ucnhash_CAPI`` moves to the internal C API.
+Patch by Victor Stinner.
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index bfd8ab503c8cc..6c802ba116ffe 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -93,29 +93,19 @@ static PyMemberDef DB_members[] = {
 /* forward declaration */
 static PyTypeObject UCD_Type;
 
-typedef struct {
-    // Borrowed reference to &UCD_Type. It is used to prepare the code
-    // to convert the UCD_Type static type to a heap type.
-    PyTypeObject *ucd_type;
-
-    _PyUnicode_Name_CAPI capi;
-} unicodedata_module_state;
-
-// bpo-1635741: Temporary global state until the unicodedata module
-// gets a real module state.
-static unicodedata_module_state global_module_state;
-
-// Check if self is an instance of ucd_type.
-// Return 0 if self is NULL (when the PyCapsule C API is used).
-#define UCD_Check(self, ucd_type) (self != NULL && Py_IS_TYPE(self, ucd_type))
+// Check if self is an unicodedata.UCD instance.
+// If self is NULL (when the PyCapsule C API is used), return 0.
+// PyModule_Check() is used to avoid having to retrieve the ucd_type.
+// See unicodedata_functions comment to the rationale of this macro.
+#define UCD_Check(self) (self != NULL && !PyModule_Check(self))
 
 static PyObject*
-new_previous_version(unicodedata_module_state *state,
+new_previous_version(PyTypeObject *ucd_type,
                      const char*name, const change_record* (*getrecord)(Py_UCS4),
                      Py_UCS4 (*normalization)(Py_UCS4))
 {
     PreviousDBVersion *self;
-    self = PyObject_New(PreviousDBVersion, state->ucd_type);
+    self = PyObject_New(PreviousDBVersion, ucd_type);
     if (self == NULL)
         return NULL;
     self->name = name;
@@ -147,12 +137,11 @@ unicodedata_UCD_decimal_impl(PyObject *self, int chr,
                              PyObject *default_value)
 /*[clinic end generated code: output=be23376e1a185231 input=933f8107993f23d0]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     int have_old = 0;
     long rc;
     Py_UCS4 c = (Py_UCS4)chr;
 
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         const change_record *old = get_old_record(self, c);
         if (old->category_changed == 0) {
             /* unassigned */
@@ -236,12 +225,11 @@ unicodedata_UCD_numeric_impl(PyObject *self, int chr,
                              PyObject *default_value)
 /*[clinic end generated code: output=53ce281fe85b10c4 input=fdf5871a5542893c]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     int have_old = 0;
     double rc;
     Py_UCS4 c = (Py_UCS4)chr;
 
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         const change_record *old = get_old_record(self, c);
         if (old->category_changed == 0) {
             /* unassigned */
@@ -283,11 +271,10 @@ static PyObject *
 unicodedata_UCD_category_impl(PyObject *self, int chr)
 /*[clinic end generated code: output=8571539ee2e6783a input=27d6f3d85050bc06]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     int index;
     Py_UCS4 c = (Py_UCS4)chr;
     index = (int) _getrecord_ex(c)->category;
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         const change_record *old = get_old_record(self, c);
         if (old->category_changed != 0xFF)
             index = old->category_changed;
@@ -311,11 +298,10 @@ static PyObject *
 unicodedata_UCD_bidirectional_impl(PyObject *self, int chr)
 /*[clinic end generated code: output=d36310ce2039bb92 input=b3d8f42cebfcf475]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     int index;
     Py_UCS4 c = (Py_UCS4)chr;
     index = (int) _getrecord_ex(c)->bidirectional;
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         const change_record *old = get_old_record(self, c);
         if (old->category_changed == 0)
             index = 0; /* unassigned */
@@ -341,11 +327,10 @@ static int
 unicodedata_UCD_combining_impl(PyObject *self, int chr)
 /*[clinic end generated code: output=cad056d0cb6a5920 input=9f2d6b2a95d0a22a]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     int index;
     Py_UCS4 c = (Py_UCS4)chr;
     index = (int) _getrecord_ex(c)->combining;
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         const change_record *old = get_old_record(self, c);
         if (old->category_changed == 0)
             index = 0; /* unassigned */
@@ -370,11 +355,10 @@ static int
 unicodedata_UCD_mirrored_impl(PyObject *self, int chr)
 /*[clinic end generated code: output=2532dbf8121b50e6 input=5dd400d351ae6f3b]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     int index;
     Py_UCS4 c = (Py_UCS4)chr;
     index = (int) _getrecord_ex(c)->mirrored;
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         const change_record *old = get_old_record(self, c);
         if (old->category_changed == 0)
             index = 0; /* unassigned */
@@ -398,11 +382,10 @@ static PyObject *
 unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr)
 /*[clinic end generated code: output=484e8537d9ee8197 input=c4854798aab026e0]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     int index;
     Py_UCS4 c = (Py_UCS4)chr;
     index = (int) _getrecord_ex(c)->east_asian_width;
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         const change_record *old = get_old_record(self, c);
         if (old->category_changed == 0)
             index = 0; /* unassigned */
@@ -428,7 +411,6 @@ static PyObject *
 unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
 /*[clinic end generated code: output=7d699f3ec7565d27 input=e4c12459ad68507b]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     char decomp[256];
     int code, index, count;
     size_t i;
@@ -437,7 +419,7 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
 
     code = (int)c;
 
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         const change_record *old = get_old_record(self, c);
         if (old->category_changed == 0)
             return PyUnicode_FromString(""); /* unassigned */
@@ -480,13 +462,14 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
 }
 
 static void
-get_decomp_record(unicodedata_module_state *state, PyObject *self,
-                  Py_UCS4 code, int *index, int *prefix, int *count)
+get_decomp_record(PyObject *self, Py_UCS4 code,
+                  int *index, int *prefix, int *count)
 {
     if (code >= 0x110000) {
         *index = 0;
-    } else if (UCD_Check(self, state->ucd_type) &&
-               get_old_record(self, code)->category_changed==0) {
+    }
+    else if (UCD_Check(self)
+             && get_old_record(self, code)->category_changed==0) {
         /* unassigned in old version */
         *index = 0;
     }
@@ -515,8 +498,7 @@ get_decomp_record(unicodedata_module_state *state, PyObject *self,
 #define SCount  (LCount*NCount)
 
 static PyObject*
-nfd_nfkd(unicodedata_module_state *state, PyObject *self,
-         PyObject *input, int k)
+nfd_nfkd(PyObject *self, PyObject *input, int k)
 {
     PyObject *result;
     Py_UCS4 *output;
@@ -584,7 +566,7 @@ nfd_nfkd(unicodedata_module_state *state, PyObject *self,
                 continue;
             }
             /* normalization changes */
-            if (UCD_Check(self, state->ucd_type)) {
+            if (UCD_Check(self)) {
                 Py_UCS4 value = ((PreviousDBVersion*)self)->normalization(code);
                 if (value != 0) {
                     stack[stackptr++] = value;
@@ -593,7 +575,7 @@ nfd_nfkd(unicodedata_module_state *state, PyObject *self,
             }
 
             /* Other decompositions. */
-            get_decomp_record(state, self, code, &index, &prefix, &count);
+            get_decomp_record(self, code, &index, &prefix, &count);
 
             /* Copy character if it is not decomposable, or has a
                compatibility decomposition, but we do NFD. */
@@ -665,7 +647,7 @@ find_nfc_index(const struct reindex* nfc, Py_UCS4 code)
 }
 
 static PyObject*
-nfc_nfkc(unicodedata_module_state *state, PyObject *self, PyObject *input, int k)
+nfc_nfkc(PyObject *self, PyObject *input, int k)
 {
     PyObject *result;
     int kind;
@@ -677,7 +659,7 @@ nfc_nfkc(unicodedata_module_state *state, PyObject *self, PyObject *input, int k
     Py_ssize_t skipped[20];
     int cskipped = 0;
 
-    result = nfd_nfkd(state, self, input, k);
+    result = nfd_nfkd(self, input, k);
     if (!result)
         return NULL;
     /* result will be "ready". */
@@ -820,13 +802,13 @@ typedef enum {YES = 0, MAYBE = 1, NO = 2} QuickcheckResult;
  *   https://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
  */
 static QuickcheckResult
-is_normalized_quickcheck(unicodedata_module_state *state, PyObject *self,
-                         PyObject *input, bool nfc, bool k, bool yes_only)
+is_normalized_quickcheck(PyObject *self, PyObject *input, bool nfc, bool k,
+                         bool yes_only)
 {
-    /* An older version of the database is requested, quickchecks must be
-       disabled. */
-    if (UCD_Check(self, state->ucd_type))
+    /* UCD 3.2.0 is requested, quickchecks must be disabled. */
+    if (UCD_Check(self)) {
         return NO;
+    }
 
     Py_ssize_t i, len;
     int kind;
@@ -885,7 +867,6 @@ unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
                                    PyObject *input)
 /*[clinic end generated code: output=11e5a3694e723ca5 input=a544f14cea79e508]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     if (PyUnicode_READY(input) == -1) {
         return NULL;
     }
@@ -921,10 +902,10 @@ unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
         return NULL;
     }
 
-    m = is_normalized_quickcheck(state, self, input, nfc, k, false);
+    m = is_normalized_quickcheck(self, input, nfc, k, false);
 
     if (m == MAYBE) {
-        cmp = (nfc ? nfc_nfkc : nfd_nfkd)(state, self, input, k);
+        cmp = (nfc ? nfc_nfkc : nfd_nfkd)(self, input, k);
         if (cmp == NULL) {
             return NULL;
         }
@@ -959,7 +940,6 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
                                PyObject *input)
 /*[clinic end generated code: output=05ca4385a2ad6983 input=3a5206c0ad2833fb]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     if (PyUnicode_GET_LENGTH(input) == 0) {
         /* Special case empty input strings, since resizing
            them  later would cause internal errors. */
@@ -968,36 +948,36 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
     }
 
     if (_PyUnicode_EqualToASCIIId(form, &PyId_NFC)) {
-        if (is_normalized_quickcheck(state, self, input,
+        if (is_normalized_quickcheck(self, input,
                                      true,  false, true) == YES) {
             Py_INCREF(input);
             return input;
         }
-        return nfc_nfkc(state, self, input, 0);
+        return nfc_nfkc(self, input, 0);
     }
     if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKC)) {
-        if (is_normalized_quickcheck(state, self, input,
+        if (is_normalized_quickcheck(self, input,
                                      true,  true,  true) == YES) {
             Py_INCREF(input);
             return input;
         }
-        return nfc_nfkc(state, self, input, 1);
+        return nfc_nfkc(self, input, 1);
     }
     if (_PyUnicode_EqualToASCIIId(form, &PyId_NFD)) {
-        if (is_normalized_quickcheck(state, self, input,
+        if (is_normalized_quickcheck(self, input,
                                      false, false, true) == YES) {
             Py_INCREF(input);
             return input;
         }
-        return nfd_nfkd(state, self, input, 0);
+        return nfd_nfkd(self, input, 0);
     }
     if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKD)) {
-        if (is_normalized_quickcheck(state, self, input,
+        if (is_normalized_quickcheck(self, input,
                                      false, true,  true) == YES) {
             Py_INCREF(input);
             return input;
         }
-        return nfd_nfkd(state, self, input, 1);
+        return nfd_nfkd(self, input, 1);
     }
     PyErr_SetString(PyExc_ValueError, "invalid normalization form");
     return NULL;
@@ -1080,7 +1060,7 @@ is_unified_ideograph(Py_UCS4 code)
                           (cp < named_sequences_end))
 
 static int
-_getucname(unicodedata_module_state *state, PyObject *self,
+_getucname(PyObject *self,
            Py_UCS4 code, char* buffer, int buflen, int with_alias_and_seq)
 {
     /* Find the name associated with the given code point.
@@ -1098,7 +1078,7 @@ _getucname(unicodedata_module_state *state, PyObject *self,
     if (!with_alias_and_seq && (IS_ALIAS(code) || IS_NAMED_SEQ(code)))
         return 0;
 
-    if (UCD_Check(self, state->ucd_type)) {
+    if (UCD_Check(self)) {
         /* in 3.2.0 there are no aliases and named sequences */
         const change_record *old;
         if (IS_ALIAS(code) || IS_NAMED_SEQ(code))
@@ -1182,23 +1162,21 @@ _getucname(unicodedata_module_state *state, PyObject *self,
 }
 
 static int
-capi_getucname(void *state_raw, PyObject *self, Py_UCS4 code,
+capi_getucname(Py_UCS4 code,
                char* buffer, int buflen,
                int with_alias_and_seq)
 {
-    unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
-    return _getucname(state, self, code, buffer, buflen, with_alias_and_seq);
+    return _getucname(NULL, code, buffer, buflen, with_alias_and_seq);
 
 }
 
 static int
-_cmpname(unicodedata_module_state *state, PyObject *self,
-         int code, const char* name, int namelen)
+_cmpname(PyObject *self, int code, const char* name, int namelen)
 {
     /* check if code corresponds to the given name */
     int i;
     char buffer[NAME_MAXLEN+1];
-    if (!_getucname(state, self, code, buffer, NAME_MAXLEN, 1))
+    if (!_getucname(self, code, buffer, NAME_MAXLEN, 1))
         return 0;
     for (i = 0; i < namelen; i++) {
         if (Py_TOUPPER(name[i]) != buffer[i])
@@ -1243,7 +1221,7 @@ _check_alias_and_seq(unsigned int cp, Py_UCS4* code, int with_named_seq)
 }
 
 static int
-_getcode(unicodedata_module_state *state, PyObject* self,
+_getcode(PyObject* self,
          const char* name, int namelen, Py_UCS4* code, int with_named_seq)
 {
     /* Return the code point associated with the given name.
@@ -1305,7 +1283,7 @@ _getcode(unicodedata_module_state *state, PyObject* self,
     v = code_hash[i];
     if (!v)
         return 0;
-    if (_cmpname(state, self, v, name, namelen)) {
+    if (_cmpname(self, v, name, namelen)) {
         return _check_alias_and_seq(v, code, with_named_seq);
     }
     incr = (h ^ (h >> 3)) & mask;
@@ -1316,7 +1294,7 @@ _getcode(unicodedata_module_state *state, PyObject* self,
         v = code_hash[i];
         if (!v)
             return 0;
-        if (_cmpname(state, self, v, name, namelen)) {
+        if (_cmpname(self, v, name, namelen)) {
             return _check_alias_and_seq(v, code, with_named_seq);
         }
         incr = incr << 1;
@@ -1326,15 +1304,20 @@ _getcode(unicodedata_module_state *state, PyObject* self,
 }
 
 static int
-capi_getcode(void *state_raw, PyObject* self,
-             const char* name, int namelen, Py_UCS4* code,
+capi_getcode(const char* name, int namelen, Py_UCS4* code,
              int with_named_seq)
 {
-    unicodedata_module_state *state = (unicodedata_module_state *)state_raw;
-    return _getcode(state, self, name, namelen, code, with_named_seq);
+    return _getcode(NULL, name, namelen, code, with_named_seq);
 
 }
 
+static const _PyUnicode_Name_CAPI unicodedata_capi =
+{
+    .getname = capi_getucname,
+    .getcode = capi_getcode,
+};
+
+
 /* -------------------------------------------------------------------- */
 /* Python bindings */
 
@@ -1356,11 +1339,10 @@ static PyObject *
 unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value)
 /*[clinic end generated code: output=6bbb37a326407707 input=3e0367f534de56d9]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     char name[NAME_MAXLEN+1];
     Py_UCS4 c = (Py_UCS4)chr;
 
-    if (!_getucname(state, self, c, name, NAME_MAXLEN, 0)) {
+    if (!_getucname(self, c, name, NAME_MAXLEN, 0)) {
         if (default_value == NULL) {
             PyErr_SetString(PyExc_ValueError, "no such name");
             return NULL;
@@ -1392,7 +1374,6 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
                             Py_ssize_clean_t name_length)
 /*[clinic end generated code: output=765cb8186788e6be input=a557be0f8607a0d6]*/
 {
-    unicodedata_module_state *state = &global_module_state;
     Py_UCS4 code;
     unsigned int index;
     if (name_length > NAME_MAXLEN) {
@@ -1400,7 +1381,7 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
         return NULL;
     }
 
-    if (!_getcode(state, self, name, (int)name_length, &code, 1)) {
+    if (!_getcode(self, name, (int)name_length, &code, 1)) {
         PyErr_Format(PyExc_KeyError, "undefined character name '%s'", name);
         return NULL;
     }
@@ -1415,8 +1396,10 @@ unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
     return PyUnicode_FromOrdinal(code);
 }
 
-/* XXX Add doc strings. */
-
+// List of functions used to define module functions *AND* unicodedata.UCD
+// methods. For module functions, self is the module. For UCD methods, self
+// is an UCD instance. The UCD_Check() macro is used to check if self is
+// an UCD instance.
 static PyMethodDef unicodedata_functions[] = {
     UNICODEDATA_UCD_DECIMAL_METHODDEF
     UNICODEDATA_UCD_DIGIT_METHODDEF
@@ -1501,41 +1484,64 @@ static struct PyModuleDef unicodedatamodule = {
         NULL
 };
 
-PyMODINIT_FUNC
-PyInit_unicodedata(void)
-{
-    PyObject *m, *v;
-    unicodedata_module_state *state = &global_module_state;
-
-    state->capi.size = sizeof(_PyUnicode_Name_CAPI);
-    state->capi.state = state;
-    state->capi.getname = capi_getucname;
-    state->capi.getcode = capi_getcode;
 
+static int
+unicodedata_exec(PyObject *module)
+{
     Py_SET_TYPE(&UCD_Type, &PyType_Type);
-    state->ucd_type = &UCD_Type;
+    PyTypeObject *ucd_type = &UCD_Type;
 
-    m = PyModule_Create(&unicodedatamodule);
-    if (!m)
-        return NULL;
+    if (PyModule_AddStringConstant(module, "unidata_version", UNIDATA_VERSION) < 0) {
+        return -1;
+    }
 
-    PyModule_AddStringConstant(m, "unidata_version", UNIDATA_VERSION);
-    Py_INCREF(state->ucd_type);
-    PyModule_AddObject(m, "UCD", (PyObject*)state->ucd_type);
+    if (PyModule_AddType(module, ucd_type) < 0) {
+        return -1;
+    }
 
     /* Previous versions */
-    v = new_previous_version(state, "3.2.0",
+    PyObject *v;
+    v = new_previous_version(ucd_type, "3.2.0",
                              get_change_3_2_0, normalization_3_2_0);
-    if (v != NULL)
-        PyModule_AddObject(m, "ucd_3_2_0", v);
+    if (v == NULL) {
+        return -1;
+    }
+    if (PyModule_AddObject(module, "ucd_3_2_0", v) < 0) {
+        Py_DECREF(v);
+        return -1;
+    }
 
     /* Export C API */
-    v = PyCapsule_New((void *)&state->capi, PyUnicodeData_CAPSULE_NAME, NULL);
-    if (v != NULL)
-        PyModule_AddObject(m, "ucnhash_CAPI", v);
-    return m;
+    v = PyCapsule_New((void *)&unicodedata_capi, PyUnicodeData_CAPSULE_NAME,
+                      NULL);
+    if (v == NULL) {
+        return -1;
+    }
+    if (PyModule_AddObject(module, "ucnhash_CAPI", v) < 0) {
+        Py_DECREF(v);
+        return -1;
+    }
+    return 0;
+}
+
+
+PyMODINIT_FUNC
+PyInit_unicodedata(void)
+{
+    PyObject *module = PyModule_Create(&unicodedatamodule);
+    if (!module) {
+        return NULL;
+    }
+
+    if (unicodedata_exec(module) < 0) {
+        Py_DECREF(module);
+        return NULL;
+    }
+
+    return module;
 }
 
+
 /*
 Local variables:
 c-basic-offset: 4
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ba48d35aa40b1..9058018201039 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6523,8 +6523,7 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
                     s++;
                     ch = 0xffffffff; /* in case 'getcode' messes up */
                     if (namelen <= INT_MAX &&
-                        ucnhash_capi->getcode(ucnhash_capi->state, NULL,
-                                              start, (int)namelen,
+                        ucnhash_capi->getcode(start, (int)namelen,
                                               &ch, 0)) {
                         assert(ch <= MAX_UNICODE);
                         WRITE_CHAR(ch);
diff --git a/Python/codecs.c b/Python/codecs.c
index 62d1f3f3ac0d3..fa329ce243642 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -987,8 +987,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
         for (i = start, ressize = 0; i < end; ++i) {
             /* object is guaranteed to be "ready" */
             c = PyUnicode_READ_CHAR(object, i);
-            if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
-                                      c, buffer, sizeof(buffer), 1)) {
+            if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) {
                 replsize = 1+1+1+(int)strlen(buffer)+1;
             }
             else if (c >= 0x10000) {
@@ -1011,8 +1010,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
             i < end; ++i) {
             c = PyUnicode_READ_CHAR(object, i);
             *outp++ = '\\';
-            if (ucnhash_capi->getname(ucnhash_capi->state, NULL,
-                                      c, buffer, sizeof(buffer), 1)) {
+            if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) {
                 *outp++ = 'N';
                 *outp++ = '{';
                 strcpy((char *)outp, buffer);



More information about the Python-checkins mailing list