[Python-checkins] bpo-36775: Add _PyUnicode_InitEncodings() (GH-13057)

Victor Stinner webhook-mailer at python.org
Thu May 2 11:54:26 EDT 2019


https://github.com/python/cpython/commit/43fc3bb7cf0278735eb0010d7b3043775a120cb5
commit: 43fc3bb7cf0278735eb0010d7b3043775a120cb5
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2019-05-02T11:54:20-04:00
summary:

bpo-36775: Add _PyUnicode_InitEncodings() (GH-13057)

Move get_codec_name() and initfsencoding() from pylifecycle.c to
unicodeobject.c.

Rename also "init" functions in pylifecycle.c.

files:
M Include/internal/pycore_pylifecycle.h
M Objects/unicodeobject.c
M Python/pylifecycle.c

diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h
index f5da1431d94c..a2383d476ee9 100644
--- a/Include/internal/pycore_pylifecycle.h
+++ b/Include/internal/pycore_pylifecycle.h
@@ -16,10 +16,11 @@ PyAPI_DATA(int) _Py_UnhandledKeyboardInterrupt;
 
 PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv);
 
-PyAPI_FUNC(int) _Py_SetFileSystemEncoding(
+extern int _Py_SetFileSystemEncoding(
     const char *encoding,
     const char *errors);
-PyAPI_FUNC(void) _Py_ClearFileSystemEncoding(void);
+extern void _Py_ClearFileSystemEncoding(void);
+extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);
 
 PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 9991362a3330..5b6b241cb62b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include "Python.h"
 #include "pycore_fileutils.h"
 #include "pycore_object.h"
+#include "pycore_pylifecycle.h"
 #include "pycore_pystate.h"
 #include "ucnhash.h"
 #include "bytes_methods.h"
@@ -15574,6 +15575,102 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
 }
 
 
+static char*
+get_codec_name(const char *encoding)
+{
+    PyObject *codec, *name_obj = NULL;
+
+    codec = _PyCodec_Lookup(encoding);
+    if (!codec)
+        goto error;
+
+    name_obj = PyObject_GetAttrString(codec, "name");
+    Py_CLEAR(codec);
+    if (!name_obj) {
+        goto error;
+    }
+
+    const char *name_utf8 = PyUnicode_AsUTF8(name_obj);
+    if (name_utf8 == NULL) {
+        goto error;
+    }
+
+    char *name = _PyMem_RawStrdup(name_utf8);
+    Py_DECREF(name_obj);
+    if (name == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    return name;
+
+error:
+    Py_XDECREF(codec);
+    Py_XDECREF(name_obj);
+    return NULL;
+}
+
+
+static _PyInitError
+init_stdio_encoding(PyInterpreterState *interp)
+{
+    _PyCoreConfig *config = &interp->core_config;
+
+    char *codec_name = get_codec_name(config->stdio_encoding);
+    if (codec_name == NULL) {
+        return _Py_INIT_ERR("failed to get the Python codec name "
+                            "of the stdio encoding");
+    }
+    PyMem_RawFree(config->stdio_encoding);
+    config->stdio_encoding = codec_name;
+    return _Py_INIT_OK();
+}
+
+
+static _PyInitError
+init_fs_encoding(PyInterpreterState *interp)
+{
+    _PyCoreConfig *config = &interp->core_config;
+
+    char *encoding = get_codec_name(config->filesystem_encoding);
+    if (encoding == NULL) {
+        /* Such error can only occurs in critical situations: no more
+           memory, import a module of the standard library failed, etc. */
+        return _Py_INIT_ERR("failed to get the Python codec "
+                            "of the filesystem encoding");
+    }
+
+    /* Update the filesystem encoding to the normalized Python codec name.
+       For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
+       (Python codec name). */
+    PyMem_RawFree(config->filesystem_encoding);
+    config->filesystem_encoding = encoding;
+
+    /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
+       global configuration variables. */
+    if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
+                                  config->filesystem_errors) < 0) {
+        return _Py_INIT_NO_MEMORY();
+    }
+
+    /* PyUnicode can now use the Python codec rather than C implementation
+       for the filesystem encoding */
+    interp->fscodec_initialized = 1;
+    return _Py_INIT_OK();
+}
+
+
+_PyInitError
+_PyUnicode_InitEncodings(PyInterpreterState *interp)
+{
+    _PyInitError err = init_fs_encoding(interp);
+    if (_Py_INIT_FAILED(err)) {
+        return err;
+    }
+
+    return init_stdio_encoding(interp);
+}
+
+
 void
 _PyUnicode_Fini(void)
 {
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 40eeebdd1a7f..01ef027b9d86 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -59,10 +59,9 @@ extern grammar _PyParser_Grammar; /* From graminit.c */
 
 /* Forward */
 static _PyInitError add_main_module(PyInterpreterState *interp);
-static _PyInitError initfsencoding(PyInterpreterState *interp);
-static _PyInitError initsite(void);
+static _PyInitError init_import_size(void);
 static _PyInitError init_sys_streams(PyInterpreterState *interp);
-static _PyInitError initsigs(void);
+static _PyInitError init_signals(void);
 static void call_py_exitfuncs(PyInterpreterState *);
 static void wait_for_thread_shutdown(void);
 static void call_ll_exitfuncs(_PyRuntimeState *runtime);
@@ -144,42 +143,8 @@ Py_IsInitialized(void)
 
 */
 
-static char*
-get_codec_name(const char *encoding)
-{
-    const char *name_utf8;
-    char *name_str;
-    PyObject *codec, *name = NULL;
-
-    codec = _PyCodec_Lookup(encoding);
-    if (!codec)
-        goto error;
-
-    name = _PyObject_GetAttrId(codec, &PyId_name);
-    Py_CLEAR(codec);
-    if (!name)
-        goto error;
-
-    name_utf8 = PyUnicode_AsUTF8(name);
-    if (name_utf8 == NULL)
-        goto error;
-    name_str = _PyMem_RawStrdup(name_utf8);
-    Py_DECREF(name);
-    if (name_str == NULL) {
-        PyErr_NoMemory();
-        return NULL;
-    }
-    return name_str;
-
-error:
-    Py_XDECREF(codec);
-    Py_XDECREF(name);
-    return NULL;
-}
-
-
 static _PyInitError
-initimport(PyInterpreterState *interp, PyObject *sysmod)
+init_importlib(PyInterpreterState *interp, PyObject *sysmod)
 {
     PyObject *importlib;
     PyObject *impmod;
@@ -229,7 +194,7 @@ initimport(PyInterpreterState *interp, PyObject *sysmod)
 }
 
 static _PyInitError
-initexternalimport(PyInterpreterState *interp)
+init_importlib_external(PyInterpreterState *interp)
 {
     PyObject *value;
     value = PyObject_CallMethod(interp->importlib,
@@ -661,7 +626,7 @@ pycore_init_import_warnings(PyInterpreterState *interp, PyObject *sysmod)
 
     /* This call sets up builtin and frozen import support */
     if (interp->core_config._install_importlib) {
-        err = initimport(interp, sysmod);
+        err = init_importlib(interp, sysmod);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -940,7 +905,7 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
         return _Py_INIT_ERR("can't finish initializing sys");
     }
 
-    _PyInitError err = initexternalimport(interp);
+    _PyInitError err = init_importlib_external(interp);
     if (_Py_INIT_FAILED(err)) {
         return err;
     }
@@ -951,13 +916,13 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
         return err;
     }
 
-    err = initfsencoding(interp);
+    err = _PyUnicode_InitEncodings(interp);
     if (_Py_INIT_FAILED(err)) {
         return err;
     }
 
     if (core_config->install_signal_handlers) {
-        err = initsigs(); /* Signal handling stuff, including initintr() */
+        err = init_signals();
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -992,7 +957,7 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
     runtime->initialized = 1;
 
     if (core_config->site_import) {
-        err = initsite(); /* Module site */
+        err = init_import_size(); /* Module site */
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1497,17 +1462,17 @@ new_interpreter(PyThreadState **tstate_p)
             return err;
         }
 
-        err = initimport(interp, sysmod);
+        err = init_importlib(interp, sysmod);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
 
-        err = initexternalimport(interp);
+        err = init_importlib_external(interp);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
 
-        err = initfsencoding(interp);
+        err = _PyUnicode_InitEncodings(interp);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1523,7 +1488,7 @@ new_interpreter(PyThreadState **tstate_p)
         }
 
         if (core_config->site_import) {
-            err = initsite();
+            err = init_import_size();
             if (_Py_INIT_FAILED(err)) {
                 return err;
             }
@@ -1649,42 +1614,10 @@ add_main_module(PyInterpreterState *interp)
     return _Py_INIT_OK();
 }
 
-static _PyInitError
-initfsencoding(PyInterpreterState *interp)
-{
-    _PyCoreConfig *config = &interp->core_config;
-
-    char *encoding = get_codec_name(config->filesystem_encoding);
-    if (encoding == NULL) {
-        /* Such error can only occurs in critical situations: no more
-           memory, import a module of the standard library failed, etc. */
-        return _Py_INIT_ERR("failed to get the Python codec "
-                            "of the filesystem encoding");
-    }
-
-    /* Update the filesystem encoding to the normalized Python codec name.
-       For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
-       (Python codec name). */
-    PyMem_RawFree(config->filesystem_encoding);
-    config->filesystem_encoding = encoding;
-
-    /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
-       global configuration variables. */
-    if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
-                                  config->filesystem_errors) < 0) {
-        return _Py_INIT_NO_MEMORY();
-    }
-
-    /* PyUnicode can now use the Python codec rather than C implementation
-       for the filesystem encoding */
-    interp->fscodec_initialized = 1;
-    return _Py_INIT_OK();
-}
-
 /* Import the site module (not into __main__ though) */
 
 static _PyInitError
-initsite(void)
+init_import_size(void)
 {
     PyObject *m;
     m = PyImport_ImportModule("site");
@@ -1880,14 +1813,6 @@ init_sys_streams(PyInterpreterState *interp)
     }
 #endif
 
-    char *codec_name = get_codec_name(config->stdio_encoding);
-    if (codec_name == NULL) {
-        return _Py_INIT_ERR("failed to get the Python codec name "
-                            "of the stdio encoding");
-    }
-    PyMem_RawFree(config->stdio_encoding);
-    config->stdio_encoding = codec_name;
-
     /* Hack to avoid a nasty recursion issue when Python is invoked
        in verbose mode: pre-import the Latin-1 and UTF-8 codecs */
     if ((m = PyImport_ImportModule("encodings.utf_8")) == NULL) {
@@ -2287,7 +2212,7 @@ Py_Exit(int sts)
 }
 
 static _PyInitError
-initsigs(void)
+init_signals(void)
 {
 #ifdef SIGPIPE
     PyOS_setsig(SIGPIPE, SIG_IGN);



More information about the Python-checkins mailing list