[Python-checkins] bpo-42846: Convert CJK codec extensions to multiphase init (GH-24157)

vstinner webhook-mailer at python.org
Thu Jan 7 18:15:30 EST 2021


https://github.com/python/cpython/commit/07f2cee93f1b619650403981c455f47bfed8d818
commit: 07f2cee93f1b619650403981c455f47bfed8d818
branch: master
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2021-01-08T00:15:22+01:00
summary:

bpo-42846: Convert CJK codec extensions to multiphase init (GH-24157)

Convert the 6 CJK codec extension modules (_codecs_cn, _codecs_hk,
_codecs_iso2022, _codecs_jp, _codecs_kr and _codecs_tw) to the
multiphase initialization API (PEP 489).

Remove getmultibytecodec() local cache: always import
_multibytecodec. It should be uncommon to get a codec. For example,
this function is only called once per CJK codec module.

Fix a reference leak in register_maps() error path.

files:
A Misc/NEWS.d/next/Library/2021-01-07-23-31-17.bpo-42846.kukDjw.rst
M Lib/test/test_multibytecodec.py
M Modules/cjkcodecs/cjkcodecs.h

diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
index 7c3b67f3cbf6e..3efa1505e5c92 100644
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -3,11 +3,15 @@
 #   Unit test for multibytecodec itself
 #
 
+import _multibytecodec
+import codecs
+import io
+import sys
+import textwrap
+import unittest
 from test import support
 from test.support import os_helper
 from test.support.os_helper import TESTFN
-import unittest, io, codecs, sys
-import _multibytecodec
 
 ALL_CJKENCODINGS = [
 # _codecs_cn
@@ -205,6 +209,24 @@ def test_issue5640(self):
         self.assertEqual(encoder.encode('\xff'), b'\\xff')
         self.assertEqual(encoder.encode('\n'), b'\n')
 
+    @support.cpython_only
+    def test_subinterp(self):
+        # bpo-42846: Test a CJK codec in a subinterpreter
+        import _testcapi
+        encoding = 'cp932'
+        text = "Python の開発は、1990 年ごろから開始されています。"
+        code = textwrap.dedent("""
+            import codecs
+            encoding = %r
+            text = %r
+            encoder = codecs.getincrementalencoder(encoding)()
+            text2 = encoder.encode(text).decode(encoding)
+            if text2 != text:
+                raise ValueError(f"encoding issue: {text2!a} != {text!a}")
+        """) % (encoding, text)
+        res = _testcapi.run_in_subinterp(code)
+        self.assertEqual(res, 0)
+
 class Test_IncrementalDecoder(unittest.TestCase):
 
     def test_dbcs(self):
diff --git a/Misc/NEWS.d/next/Library/2021-01-07-23-31-17.bpo-42846.kukDjw.rst b/Misc/NEWS.d/next/Library/2021-01-07-23-31-17.bpo-42846.kukDjw.rst
new file mode 100644
index 0000000000000..6f8a739ec1da2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-01-07-23-31-17.bpo-42846.kukDjw.rst
@@ -0,0 +1,3 @@
+Convert the 6 CJK codec extension modules (_codecs_cn, _codecs_hk,
+_codecs_iso2022, _codecs_jp, _codecs_kr and _codecs_tw) to the multiphase
+initialization API (:pep:`489`). Patch by Victor Stinner.
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
index e41755b197ffc..3b89bc93ed142 100644
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -245,15 +245,13 @@ static const struct dbcs_map *mapping_list;
 static PyObject *
 getmultibytecodec(void)
 {
-    static PyObject *cofunc = NULL;
-
-    if (cofunc == NULL) {
-        PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
-        if (mod == NULL)
-            return NULL;
-        cofunc = PyObject_GetAttrString(mod, "__create_codec");
-        Py_DECREF(mod);
+    PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
+    if (mod == NULL) {
+        return NULL;
     }
+
+    PyObject *cofunc = PyObject_GetAttrString(mod, "__create_codec");
+    Py_DECREF(mod);
     return cofunc;
 }
 
@@ -297,10 +295,6 @@ getcodec(PyObject *self, PyObject *encoding)
     return r;
 }
 
-static struct PyMethodDef __methods[] = {
-    {"getcodec", (PyCFunction)getcodec, METH_O, ""},
-    {NULL, NULL},
-};
 
 static int
 register_maps(PyObject *module)
@@ -309,12 +303,17 @@ register_maps(PyObject *module)
 
     for (h = mapping_list; h->charset[0] != '\0'; h++) {
         char mhname[256] = "__map_";
-        int r;
         strcpy(mhname + sizeof("__map_") - 1, h->charset);
-        r = PyModule_AddObject(module, mhname,
-                        PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
-        if (r == -1)
+
+        PyObject *capsule = PyCapsule_New((void *)h,
+                                          PyMultibyteCodec_CAPSULE_NAME, NULL);
+        if (capsule == NULL) {
+            return -1;
+        }
+        if (PyModule_AddObject(module, mhname, capsule) < 0) {
+            Py_DECREF(capsule);
             return -1;
+        }
     }
     return 0;
 }
@@ -395,25 +394,36 @@ importmap(const char *modname, const char *symbol,
 }
 #endif
 
+static int
+_cjk_exec(PyObject *module)
+{
+    return register_maps(module);
+}
+
+
+static struct PyMethodDef _cjk_methods[] = {
+    {"getcodec", (PyCFunction)getcodec, METH_O, ""},
+    {NULL, NULL},
+};
+
+static PyModuleDef_Slot _cjk_slots[] = {
+    {Py_mod_exec, _cjk_exec},
+    {0, NULL}
+};
+
 #define I_AM_A_MODULE_FOR(loc)                                          \
-    static struct PyModuleDef __module = {                              \
+    static struct PyModuleDef _cjk_module = {                           \
         PyModuleDef_HEAD_INIT,                                          \
-        "_codecs_"#loc,                                                 \
-        NULL,                                                           \
-        0,                                                              \
-        __methods,                                                      \
-        NULL,                                                           \
-        NULL,                                                           \
-        NULL,                                                           \
-        NULL                                                            \
+        .m_name = "_codecs_"#loc,                                       \
+        .m_size = 0,                                                    \
+        .m_methods = _cjk_methods,                                      \
+        .m_slots = _cjk_slots,                                          \
     };                                                                  \
+                                                                        \
     PyMODINIT_FUNC                                                      \
     PyInit__codecs_##loc(void)                                          \
     {                                                                   \
-        PyObject *m = PyModule_Create(&__module);                       \
-        if (m != NULL)                                                  \
-            (void)register_maps(m);                                     \
-        return m;                                                       \
+        return PyModuleDef_Init(&_cjk_module);                          \
     }
 
 #endif



More information about the Python-checkins mailing list