[Python-checkins] bpo-42208: Add _locale._get_locale_encoding() (GH-23052)

vstinner webhook-mailer at python.org
Fri Oct 30 20:32:15 EDT 2020


https://github.com/python/cpython/commit/b62bdf71ea0cd52041d49691d8ae3dc645bd48e1
commit: b62bdf71ea0cd52041d49691d8ae3dc645bd48e1
branch: master
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2020-10-31T01:32:11+01:00
summary:

bpo-42208: Add _locale._get_locale_encoding() (GH-23052)

* Add a new _locale._get_locale_encoding() function to get the
  current locale encoding.
* Modify locale.getpreferredencoding() to use it.
* Remove the _bootlocale module.

files:
D Lib/_bootlocale.py
M Lib/locale.py
M Lib/test/test_mimetypes.py
M Modules/_localemodule.c
M Modules/clinic/_localemodule.c.h
M PCbuild/lib.pyproj

diff --git a/Lib/_bootlocale.py b/Lib/_bootlocale.py
deleted file mode 100644
index 3273a3b42252b..0000000000000
--- a/Lib/_bootlocale.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""A minimal subset of the locale module used at interpreter startup
-(imported by the _io module), in order to reduce startup time.
-
-Don't import directly from third-party code; use the `locale` module instead!
-"""
-
-import sys
-import _locale
-
-if sys.platform.startswith("win"):
-    def getpreferredencoding(do_setlocale=True):
-        if sys.flags.utf8_mode:
-            return 'UTF-8'
-        return _locale._getdefaultlocale()[1]
-else:
-    try:
-        _locale.CODESET
-    except AttributeError:
-        if hasattr(sys, 'getandroidapilevel'):
-            # On Android langinfo.h and CODESET are missing, and UTF-8 is
-            # always used in mbstowcs() and wcstombs().
-            def getpreferredencoding(do_setlocale=True):
-                return 'UTF-8'
-        else:
-            def getpreferredencoding(do_setlocale=True):
-                if sys.flags.utf8_mode:
-                    return 'UTF-8'
-                # This path for legacy systems needs the more complex
-                # getdefaultlocale() function, import the full locale module.
-                import locale
-                return locale.getpreferredencoding(do_setlocale)
-    else:
-        def getpreferredencoding(do_setlocale=True):
-            assert not do_setlocale
-            if sys.flags.utf8_mode:
-                return 'UTF-8'
-            result = _locale.nl_langinfo(_locale.CODESET)
-            if not result and sys.platform == 'darwin':
-                # nl_langinfo can return an empty string
-                # when the setting has an invalid value.
-                # Default to UTF-8 in that case because
-                # UTF-8 is the default charset on OSX and
-                # returning nothing will crash the
-                # interpreter.
-                result = 'UTF-8'
-            return result
diff --git a/Lib/locale.py b/Lib/locale.py
index 1a4e9f694f309..ee841e8b8655e 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -619,53 +619,49 @@ def resetlocale(category=LC_ALL):
     """
     _setlocale(category, _build_localename(getdefaultlocale()))
 
-if sys.platform.startswith("win"):
-    # On Win32, this will return the ANSI code page
-    def getpreferredencoding(do_setlocale = True):
-        """Return the charset that the user is likely using."""
+
+try:
+    from _locale import _get_locale_encoding
+except ImportError:
+    def _get_locale_encoding():
+        if hasattr(sys, 'getandroidapilevel'):
+            # On Android langinfo.h and CODESET are missing, and UTF-8 is
+            # always used in mbstowcs() and wcstombs().
+            return 'UTF-8'
         if sys.flags.utf8_mode:
             return 'UTF-8'
-        import _bootlocale
-        return _bootlocale.getpreferredencoding(False)
+        encoding = getdefaultlocale()[1]
+        if encoding is None:
+            # LANG not set, default conservatively to ASCII
+            encoding = 'ascii'
+        return encoding
+
+try:
+    CODESET
+except NameError:
+    def getpreferredencoding(do_setlocale=True):
+        """Return the charset that the user is likely using."""
+        return _get_locale_encoding()
 else:
     # On Unix, if CODESET is available, use that.
-    try:
-        CODESET
-    except NameError:
-        if hasattr(sys, 'getandroidapilevel'):
-            # On Android langinfo.h and CODESET are missing, and UTF-8 is
-            # always used in mbstowcs() and wcstombs().
-            def getpreferredencoding(do_setlocale = True):
-                return 'UTF-8'
-        else:
-            # Fall back to parsing environment variables :-(
-            def getpreferredencoding(do_setlocale = True):
-                """Return the charset that the user is likely using,
-                by looking at environment variables."""
-                if sys.flags.utf8_mode:
-                    return 'UTF-8'
-                res = getdefaultlocale()[1]
-                if res is None:
-                    # LANG not set, default conservatively to ASCII
-                    res = 'ascii'
-                return res
-    else:
-        def getpreferredencoding(do_setlocale = True):
-            """Return the charset that the user is likely using,
-            according to the system configuration."""
-            if sys.flags.utf8_mode:
-                return 'UTF-8'
-            import _bootlocale
-            if do_setlocale:
-                oldloc = setlocale(LC_CTYPE)
-                try:
-                    setlocale(LC_CTYPE, "")
-                except Error:
-                    pass
-            result = _bootlocale.getpreferredencoding(False)
-            if do_setlocale:
-                setlocale(LC_CTYPE, oldloc)
-            return result
+    def getpreferredencoding(do_setlocale=True):
+        """Return the charset that the user is likely using,
+        according to the system configuration."""
+        if sys.flags.utf8_mode:
+            return 'UTF-8'
+
+        if not do_setlocale:
+            return _get_locale_encoding()
+
+        old_loc = setlocale(LC_CTYPE)
+        try:
+            try:
+                setlocale(LC_CTYPE, "")
+            except Error:
+                pass
+            return _get_locale_encoding()
+        finally:
+            setlocale(LC_CTYPE, old_loc)
 
 
 ### Database
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index ddeae38e1372f..d63f6b66e10c9 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -3,7 +3,7 @@
 import mimetypes
 import pathlib
 import sys
-import unittest
+import unittest.mock
 
 from test import support
 from test.support import os_helper
@@ -71,14 +71,14 @@ def test_read_mime_types(self):
         # bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding.
         # Not with locale encoding. _bootlocale has been imported because io.open(...)
         # uses it.
-        with os_helper.temp_dir() as directory:
-            data = "application/no-mans-land  Fran\u00E7ais"
-            file = pathlib.Path(directory, "sample.mimetype")
-            file.write_text(data, encoding='utf-8')
-            import _bootlocale
-            with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'):
-                mime_dict = mimetypes.read_mime_types(file)
-            eq(mime_dict[".Français"], "application/no-mans-land")
+        data = "application/no-mans-land  Fran\u00E7ais"
+        filename = "filename"
+        fp = io.StringIO(data)
+        with unittest.mock.patch.object(mimetypes, 'open',
+                                        return_value=fp) as mock_open:
+            mime_dict = mimetypes.read_mime_types(filename)
+            mock_open.assert_called_with(filename, encoding='utf-8')
+        eq(mime_dict[".Français"], "application/no-mans-land")
 
     def test_non_standard_types(self):
         eq = self.assertEqual
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 9c7ce876e4059..359deb7544043 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -768,9 +768,24 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
     }
     Py_RETURN_NONE;
 }
-#endif
+#endif  // HAVE_BIND_TEXTDOMAIN_CODESET
+
+#endif  // HAVE_LIBINTL_H
+
+
+/*[clinic input]
+_locale._get_locale_encoding
+
+Get the current locale encoding.
+[clinic start generated code]*/
+
+static PyObject *
+_locale__get_locale_encoding_impl(PyObject *module)
+/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
+{
+    return _Py_GetLocaleEncoding();
+}
 
-#endif
 
 static struct PyMethodDef PyLocale_Methods[] = {
     _LOCALE_SETLOCALE_METHODDEF
@@ -797,6 +812,7 @@ static struct PyMethodDef PyLocale_Methods[] = {
     _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
 #endif
 #endif
+    _LOCALE__GET_LOCALE_ENCODING_METHODDEF
   {NULL, NULL}
 };
 
diff --git a/Modules/clinic/_localemodule.c.h b/Modules/clinic/_localemodule.c.h
index 5d1db3ece796d..703d034c32e80 100644
--- a/Modules/clinic/_localemodule.c.h
+++ b/Modules/clinic/_localemodule.c.h
@@ -545,6 +545,24 @@ _locale_bind_textdomain_codeset(PyObject *module, PyObject *const *args, Py_ssiz
 
 #endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */
 
+PyDoc_STRVAR(_locale__get_locale_encoding__doc__,
+"_get_locale_encoding($module, /)\n"
+"--\n"
+"\n"
+"Get the current locale encoding.");
+
+#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF    \
+    {"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__},
+
+static PyObject *
+_locale__get_locale_encoding_impl(PyObject *module);
+
+static PyObject *
+_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+    return _locale__get_locale_encoding_impl(module);
+}
+
 #ifndef _LOCALE_STRCOLL_METHODDEF
     #define _LOCALE_STRCOLL_METHODDEF
 #endif /* !defined(_LOCALE_STRCOLL_METHODDEF) */
@@ -584,4 +602,4 @@ _locale_bind_textdomain_codeset(PyObject *module, PyObject *const *args, Py_ssiz
 #ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
     #define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
 #endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */
-/*[clinic end generated code: output=fe944779cd572d8e input=a9049054013a1b77]*/
+/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/
diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj
index f0c51edb9d1ca..a15165d92ef12 100644
--- a/PCbuild/lib.pyproj
+++ b/PCbuild/lib.pyproj
@@ -1572,7 +1572,6 @@
     <Compile Include="zoneinfo\__init__.py" />
     <Compile Include="zoneinfo\_tzpath.py" />
     <Compile Include="zoneinfo\_zoneinfo.py" />
-    <Compile Include="_bootlocale.py" />
     <Compile Include="_collections_abc.py" />
     <Compile Include="_compat_pickle.py" />
     <Compile Include="_compression.py" />



More information about the Python-checkins mailing list