[Python-checkins] bpo-42208: Add _Py_GetLocaleEncoding() (GH-23050)

vstinner webhook-mailer at python.org
Fri Oct 30 20:02:17 EDT 2020


https://github.com/python/cpython/commit/710e82630775774dceba5e8f24b1b10e6dfaf9b7
commit: 710e82630775774dceba5e8f24b1b10e6dfaf9b7
branch: master
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2020-10-31T01:02:09+01:00
summary:

bpo-42208: Add _Py_GetLocaleEncoding() (GH-23050)

_io.TextIOWrapper no longer calls getpreferredencoding(False) of
_bootlocale to get the locale encoding, but calls
_Py_GetLocaleEncoding() instead.

Add config_get_fs_encoding() sub-function. Reorganize also
config_get_locale_encoding() code.

files:
M Include/internal/pycore_fileutils.h
M Modules/_io/_iomodule.c
M Modules/_io/_iomodule.h
M Modules/_io/textio.c
M Python/fileutils.c
M Python/initconfig.c

diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h
index 9cb5fc66ee2e0..ff7bc4874c797 100644
--- a/Include/internal/pycore_fileutils.h
+++ b/Include/internal/pycore_fileutils.h
@@ -50,6 +50,8 @@ PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
 
 PyAPI_FUNC(void) _Py_closerange(int first, int last);
 
+PyAPI_FUNC(PyObject*) _Py_GetLocaleEncoding(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index e430352a48e21..9147648b243be 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -593,31 +593,6 @@ _PyIO_get_module_state(void)
     return state;
 }
 
-PyObject *
-_PyIO_get_locale_module(_PyIO_State *state)
-{
-    PyObject *mod;
-    if (state->locale_module != NULL) {
-        assert(PyWeakref_CheckRef(state->locale_module));
-        mod = PyWeakref_GET_OBJECT(state->locale_module);
-        if (mod != Py_None) {
-            Py_INCREF(mod);
-            return mod;
-        }
-        Py_CLEAR(state->locale_module);
-    }
-    mod = PyImport_ImportModule("_bootlocale");
-    if (mod == NULL)
-        return NULL;
-    state->locale_module = PyWeakref_NewRef(mod, NULL);
-    if (state->locale_module == NULL) {
-        Py_DECREF(mod);
-        return NULL;
-    }
-    return mod;
-}
-
-
 static int
 iomodule_traverse(PyObject *mod, visitproc visit, void *arg) {
     _PyIO_State *state = get_io_state(mod);
diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h
index a8f3951e57feb..638797fd35736 100644
--- a/Modules/_io/_iomodule.h
+++ b/Modules/_io/_iomodule.h
@@ -150,7 +150,6 @@ typedef struct {
 #define IO_STATE() _PyIO_get_module_state()
 
 extern _PyIO_State *_PyIO_get_module_state(void);
-extern PyObject *_PyIO_get_locale_module(_PyIO_State *);
 
 #ifdef MS_WINDOWS
 extern char _PyIO_get_console_type(PyObject *);
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 699b7e94c93bb..2078bb316b282 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -10,6 +10,7 @@
 #include "Python.h"
 #include "pycore_interp.h"        // PyInterpreterState.fs_codec
 #include "pycore_long.h"          // _PyLong_GetZero()
+#include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
 #include "pycore_object.h"
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "structmember.h"         // PyMemberDef
@@ -27,7 +28,6 @@ _Py_IDENTIFIER(_dealloc_warn);
 _Py_IDENTIFIER(decode);
 _Py_IDENTIFIER(fileno);
 _Py_IDENTIFIER(flush);
-_Py_IDENTIFIER(getpreferredencoding);
 _Py_IDENTIFIER(isatty);
 _Py_IDENTIFIER(mode);
 _Py_IDENTIFIER(name);
@@ -1155,29 +1155,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
         }
     }
     if (encoding == NULL && self->encoding == NULL) {
-        PyObject *locale_module = _PyIO_get_locale_module(state);
-        if (locale_module == NULL)
-            goto catch_ImportError;
-        self->encoding = _PyObject_CallMethodIdOneArg(
-            locale_module, &PyId_getpreferredencoding, Py_False);
-        Py_DECREF(locale_module);
+        self->encoding = _Py_GetLocaleEncoding();
         if (self->encoding == NULL) {
-          catch_ImportError:
-            /*
-             Importing locale can raise an ImportError because of
-             _functools, and locale.getpreferredencoding can raise an
-             ImportError if _locale is not available.  These will happen
-             during module building.
-            */
-            if (PyErr_ExceptionMatches(PyExc_ImportError)) {
-                PyErr_Clear();
-                self->encoding = PyUnicode_FromString("ascii");
-            }
-            else
-                goto error;
+            goto error;
         }
-        else if (!PyUnicode_Check(self->encoding))
-            Py_CLEAR(self->encoding);
+        assert(PyUnicode_Check(self->encoding));
     }
     if (self->encoding != NULL) {
         encoding = PyUnicode_AsUTF8(self->encoding);
diff --git a/Python/fileutils.c b/Python/fileutils.c
index e125ba46c21ba..ba2690429f366 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -1,5 +1,6 @@
 #include "Python.h"
-#include "pycore_fileutils.h"
+#include "pycore_fileutils.h"     // fileutils definitions
+#include "pycore_runtime.h"       // _PyRuntime
 #include "osdefs.h"               // SEP
 #include <locale.h>
 
@@ -820,6 +821,46 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
 }
 
 
+// Get the current locale encoding: locale.getpreferredencoding(False).
+// See also config_get_locale_encoding()
+PyObject *
+_Py_GetLocaleEncoding(void)
+{
+#ifdef _Py_FORCE_UTF8_LOCALE
+    // On Android langinfo.h and CODESET are missing,
+    // and UTF-8 is always used in mbstowcs() and wcstombs().
+    return PyUnicode_FromString("UTF-8");
+#else
+    const PyPreConfig *preconfig = &_PyRuntime.preconfig;
+    if (preconfig->utf8_mode) {
+        return PyUnicode_FromString("UTF-8");
+    }
+
+#if defined(MS_WINDOWS)
+    return PyUnicode_FromFormat("cp%u", GetACP());
+#else
+    const char *encoding = nl_langinfo(CODESET);
+    if (!encoding || encoding[0] == '\0') {
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+        // nl_langinfo() can return an empty string when the LC_CTYPE locale is
+        // not supported. Default to UTF-8 in that case, because UTF-8 is the
+        // default charset on macOS.
+        encoding = "UTF-8";
+#else
+        PyErr_SetString(PyExc_ValueError,
+                        "failed to get the locale encoding: "
+                        "nl_langinfo(CODESET) returns an empty string");
+        return NULL;
+#endif
+    }
+    // Decode from UTF-8
+    return PyUnicode_FromString(encoding);
+#endif  // !CODESET
+
+#endif
+}
+
+
 #ifdef MS_WINDOWS
 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
 
diff --git a/Python/initconfig.c b/Python/initconfig.c
index 6a13dc52ed776..e129278d8f8ad 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -766,7 +766,7 @@ config_set_bytes_string(PyConfig *config, wchar_t **config_str,
    configured. */
 PyStatus
 PyConfig_SetBytesString(PyConfig *config, wchar_t **config_str,
-                           const char *str)
+                        const char *str)
 {
     return CONFIG_SET_BYTES_STR(config, config_str, str, "string");
 }
@@ -1466,8 +1466,13 @@ config_read_complex_options(PyConfig *config)
 
 
 static const wchar_t *
-config_get_stdio_errors(void)
+config_get_stdio_errors(const PyPreConfig *preconfig)
 {
+    if (preconfig->utf8_mode) {
+        /* UTF-8 Mode uses UTF-8/surrogateescape */
+        return L"surrogateescape";
+    }
+
 #ifndef MS_WINDOWS
     const char *loc = setlocale(LC_CTYPE, NULL);
     if (loc != NULL) {
@@ -1492,26 +1497,41 @@ config_get_stdio_errors(void)
 }
 
 
+// See also _Py_GetLocaleEncoding() and config_get_fs_encoding()
 static PyStatus
-config_get_locale_encoding(PyConfig *config, wchar_t **locale_encoding)
+config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
+                           wchar_t **locale_encoding)
 {
+#ifdef _Py_FORCE_UTF8_LOCALE
+    return PyConfig_SetString(config, locale_encoding, L"utf-8");
+#else
+    if (preconfig->utf8_mode) {
+        return PyConfig_SetString(config, locale_encoding, L"utf-8");
+    }
+
 #ifdef MS_WINDOWS
     char encoding[20];
     PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
     return PyConfig_SetBytesString(config, locale_encoding, encoding);
-#elif defined(_Py_FORCE_UTF8_LOCALE)
-    return PyConfig_SetString(config, locale_encoding, L"utf-8");
 #else
     const char *encoding = nl_langinfo(CODESET);
     if (!encoding || encoding[0] == '\0') {
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+        // nl_langinfo() can return an empty string when the LC_CTYPE locale is
+        // not supported. Default to UTF-8 in that case, because UTF-8 is the
+        // default charset on macOS.
+        encoding = "UTF-8";
+#else
         return _PyStatus_ERR("failed to get the locale encoding: "
-                             "nl_langinfo(CODESET) failed");
+                             "nl_langinfo(CODESET) returns an empty string");
+#endif
     }
     /* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
     return CONFIG_SET_BYTES_STR(config,
                                 locale_encoding, encoding,
                                 "nl_langinfo(CODESET)");
-#endif
+#endif  // !MS_WINDOWS
+#endif  // !_Py_FORCE_UTF8_LOCALE
 }
 
 
@@ -1596,33 +1616,16 @@ config_init_stdio_encoding(PyConfig *config,
         PyMem_RawFree(pythonioencoding);
     }
 
-    /* UTF-8 Mode uses UTF-8/surrogateescape */
-    if (preconfig->utf8_mode) {
-        if (config->stdio_encoding == NULL) {
-            status = PyConfig_SetString(config, &config->stdio_encoding,
-                                        L"utf-8");
-            if (_PyStatus_EXCEPTION(status)) {
-                return status;
-            }
-        }
-        if (config->stdio_errors == NULL) {
-            status = PyConfig_SetString(config, &config->stdio_errors,
-                                        L"surrogateescape");
-            if (_PyStatus_EXCEPTION(status)) {
-                return status;
-            }
-        }
-    }
-
     /* Choose the default error handler based on the current locale. */
     if (config->stdio_encoding == NULL) {
-        status = config_get_locale_encoding(config, &config->stdio_encoding);
+        status = config_get_locale_encoding(config, preconfig,
+                                            &config->stdio_encoding);
         if (_PyStatus_EXCEPTION(status)) {
             return status;
         }
     }
     if (config->stdio_errors == NULL) {
-        const wchar_t *errors = config_get_stdio_errors();
+        const wchar_t *errors = config_get_stdio_errors(preconfig);
         assert(errors != NULL);
 
         status = PyConfig_SetString(config, &config->stdio_errors, errors);
@@ -1635,46 +1638,46 @@ config_init_stdio_encoding(PyConfig *config,
 }
 
 
+// See also config_get_locale_encoding()
+static PyStatus
+config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig,
+                       wchar_t **fs_encoding)
+{
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+    return PyConfig_SetString(config, fs_encoding, L"utf-8");
+#elif defined(MS_WINDOWS)
+    const wchar_t *encoding;
+    if (preconfig->legacy_windows_fs_encoding) {
+        // Legacy Windows filesystem encoding: mbcs/replace
+        encoding = L"mbcs";
+    }
+    else {
+        // Windows defaults to utf-8/surrogatepass (PEP 529)
+        encoding = L"utf-8";
+    }
+     return PyConfig_SetString(config, fs_encoding, encoding);
+#else  // !MS_WINDOWS
+    if (preconfig->utf8_mode) {
+        return PyConfig_SetString(config, fs_encoding, L"utf-8");
+    }
+    else if (_Py_GetForceASCII()) {
+        return PyConfig_SetString(config, fs_encoding, L"ascii");
+    }
+    else {
+        return config_get_locale_encoding(config, preconfig, fs_encoding);
+    }
+#endif  // !MS_WINDOWS
+}
+
+
 static PyStatus
 config_init_fs_encoding(PyConfig *config, const PyPreConfig *preconfig)
 {
     PyStatus status;
 
     if (config->filesystem_encoding == NULL) {
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
-        status = PyConfig_SetString(config, &config->filesystem_encoding, L"utf-8");
-#else
-
-#ifdef MS_WINDOWS
-        if (preconfig->legacy_windows_fs_encoding) {
-            /* Legacy Windows filesystem encoding: mbcs/replace */
-            status = PyConfig_SetString(config, &config->filesystem_encoding,
-                                        L"mbcs");
-        }
-        else
-#endif
-        if (preconfig->utf8_mode) {
-            status = PyConfig_SetString(config, &config->filesystem_encoding,
-                                        L"utf-8");
-        }
-#ifndef MS_WINDOWS
-        else if (_Py_GetForceASCII()) {
-            status = PyConfig_SetString(config, &config->filesystem_encoding,
-                                        L"ascii");
-        }
-#endif
-        else {
-#ifdef MS_WINDOWS
-            /* Windows defaults to utf-8/surrogatepass (PEP 529). */
-            status = PyConfig_SetString(config, &config->filesystem_encoding,
-                                        L"utf-8");
-#else
-            status = config_get_locale_encoding(config,
-                                                &config->filesystem_encoding);
-#endif
-        }
-#endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
-
+        status = config_get_fs_encoding(config, preconfig,
+                                        &config->filesystem_encoding);
         if (_PyStatus_EXCEPTION(status)) {
             return status;
         }



More information about the Python-checkins mailing list