[Python-checkins] bpo-31900: Fix localeconv() encoding for LC_NUMERIC (#4174)

Victor Stinner webhook-mailer at python.org
Mon Jan 15 09:58:04 EST 2018


https://github.com/python/cpython/commit/cb064fc2321ce8673fe365e9ef60445a27657f54
commit: cb064fc2321ce8673fe365e9ef60445a27657f54
branch: master
author: Victor Stinner <victor.stinner at gmail.com>
committer: GitHub <noreply at github.com>
date: 2018-01-15T15:58:02+01:00
summary:

bpo-31900: Fix localeconv() encoding for LC_NUMERIC (#4174)

* Add _Py_GetLocaleconvNumeric() function: decode decimal_point and
  thousands_sep fields of localeconv() from the LC_NUMERIC encoding,
  rather than decoding from the LC_CTYPE encoding.
* Modify locale.localeconv() and "n" formatter of str.format() (for
  int, float and complex to use _Py_GetLocaleconvNumeric()
  internally.

files:
A Misc/NEWS.d/next/Library/2017-10-30-15-55-32.bpo-31900.-S9xc4.rst
M Doc/library/locale.rst
M Doc/library/stdtypes.rst
M Doc/whatsnew/3.7.rst
M Include/fileutils.h
M Modules/_localemodule.c
M Python/fileutils.c
M Python/formatter_unicode.c

diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst
index 7da94a23964..2fd44fe8e90 100644
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@@ -147,6 +147,16 @@ The :mod:`locale` module defines the following exception and functions:
    | ``CHAR_MAX`` | Nothing is specified in this locale.    |
    +--------------+-----------------------------------------+
 
+   The function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC``
+   locale to decode ``decimal_point`` and ``thousands_sep`` byte strings if
+   they are non-ASCII or longer than 1 byte, and the ``LC_NUMERIC`` locale is
+   different than the ``LC_CTYPE`` locale. This temporary change affects other
+   threads.
+
+   .. versionchanged:: 3.7
+      The function now sets temporarily the ``LC_CTYPE`` locale to the
+      ``LC_NUMERIC`` locale in some cases.
+
 
 .. function:: nl_langinfo(option)
 
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index de2fb27c2d7..120b0d3399c 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -1599,6 +1599,20 @@ expression support in the :mod:`re` module).
    See :ref:`formatstrings` for a description of the various formatting options
    that can be specified in format strings.
 
+   .. note::
+      When formatting a number (:class:`int`, :class:`float`, :class:`float`
+      and subclasses) with the ``n`` type (ex: ``'{:n}'.format(1234)``), the
+      function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC``
+      locale to decode ``decimal_point`` and ``thousands_sep`` fields of
+      :c:func:`localeconv` if they are non-ASCII or longer than 1 byte, and the
+      ``LC_NUMERIC`` locale is different than the ``LC_CTYPE`` locale. This
+      temporary change affects other threads.
+
+   .. versionchanged:: 3.7
+      When formatting a number with the ``n`` type, the function sets
+      temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` locale in some
+      cases.
+
 
 .. method:: str.format_map(mapping)
 
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index 1041d31f302..009df38d8ec 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -866,6 +866,9 @@ Changes in Python behavior
 Changes in the Python API
 -------------------------
 
+* The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE``
+  locale to the ``LC_NUMERIC`` locale in some cases.
+
 * The ``asyncio.windows_utils.socketpair()`` function has been
   removed: use directly :func:`socket.socketpair` which is available on all
   platforms since Python 3.5 (before, it wasn't available on Windows).
diff --git a/Include/fileutils.h b/Include/fileutils.h
index b4f8b11a635..21eefdef87a 100644
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@@ -160,6 +160,11 @@ PyAPI_FUNC(int) _Py_get_blocking(int fd);
 PyAPI_FUNC(int) _Py_set_blocking(int fd, int blocking);
 #endif   /* !MS_WINDOWS */
 
+PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
+    PyObject **decimal_point,
+    PyObject **thousands_sep,
+    const char **grouping);
+
 #endif   /* Py_LIMITED_API */
 
 #ifdef __cplusplus
diff --git a/Misc/NEWS.d/next/Library/2017-10-30-15-55-32.bpo-31900.-S9xc4.rst b/Misc/NEWS.d/next/Library/2017-10-30-15-55-32.bpo-31900.-S9xc4.rst
new file mode 100644
index 00000000000..2d8e3ce6002
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-10-30-15-55-32.bpo-31900.-S9xc4.rst
@@ -0,0 +1,9 @@
+The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE``
+locale to the ``LC_NUMERIC`` locale to decode ``decimal_point`` and
+``thousands_sep`` byte strings if they are non-ASCII or longer than 1 byte, and
+the ``LC_NUMERIC`` locale is different than the ``LC_CTYPE`` locale.  This
+temporary change affects other threads.
+
+Same change for the :meth:`str.format` method when formatting a number
+(:class:`int`, :class:`float`, :class:`float` and subclasses) with the ``n``
+type (ex: ``'{:n}'.format(1234)``).
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 324b694b830..f9eeeb72dd9 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -139,8 +139,9 @@ PyLocale_localeconv(PyObject* self)
     PyObject *x;
 
     result = PyDict_New();
-    if (!result)
+    if (!result) {
         return NULL;
+    }
 
     /* if LC_NUMERIC is different in the C library, use saved value */
     l = localeconv();
@@ -171,12 +172,6 @@ PyLocale_localeconv(PyObject* self)
         RESULT(#i, x); \
     } while (0)
 
-    /* Numeric information */
-    RESULT_STRING(decimal_point);
-    RESULT_STRING(thousands_sep);
-    x = copy_grouping(l->grouping);
-    RESULT("grouping", x);
-
     /* Monetary information */
     RESULT_STRING(int_curr_symbol);
     RESULT_STRING(currency_symbol);
@@ -195,10 +190,36 @@ PyLocale_localeconv(PyObject* self)
     RESULT_INT(n_sep_by_space);
     RESULT_INT(p_sign_posn);
     RESULT_INT(n_sign_posn);
+
+    /* Numeric information */
+    PyObject *decimal_point, *thousands_sep;
+    const char *grouping;
+    if (_Py_GetLocaleconvNumeric(&decimal_point,
+                                 &thousands_sep,
+                                 &grouping) < 0) {
+        goto failed;
+    }
+
+    if (PyDict_SetItemString(result, "decimal_point", decimal_point) < 0) {
+        Py_DECREF(decimal_point);
+        Py_DECREF(thousands_sep);
+        goto failed;
+    }
+    Py_DECREF(decimal_point);
+
+    if (PyDict_SetItemString(result, "thousands_sep", thousands_sep) < 0) {
+        Py_DECREF(thousands_sep);
+        goto failed;
+    }
+    Py_DECREF(thousands_sep);
+
+    x = copy_grouping(grouping);
+    RESULT("grouping", x);
+
     return result;
 
   failed:
-    Py_XDECREF(result);
+    Py_DECREF(result);
     return NULL;
 }
 
diff --git a/Python/fileutils.c b/Python/fileutils.c
index a50075eced0..9a1435cfb32 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -1746,3 +1746,80 @@ _Py_set_blocking(int fd, int blocking)
     return -1;
 }
 #endif
+
+
+int
+_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
+                         const char **grouping)
+{
+    int res = -1;
+
+    struct lconv *lc = localeconv();
+
+    int change_locale = 0;
+    if (decimal_point != NULL &&
+        (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
+    {
+        change_locale = 1;
+    }
+    if (thousands_sep != NULL &&
+        (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
+    {
+        change_locale = 1;
+    }
+
+    /* Keep a copy of the LC_CTYPE locale */
+    char *oldloc = NULL, *loc = NULL;
+    if (change_locale) {
+        oldloc = setlocale(LC_CTYPE, NULL);
+        if (!oldloc) {
+            PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale");
+            return -1;
+        }
+
+        oldloc = _PyMem_Strdup(oldloc);
+        if (!oldloc) {
+            PyErr_NoMemory();
+            return -1;
+        }
+
+        loc = setlocale(LC_NUMERIC, NULL);
+        if (loc != NULL && strcmp(loc, oldloc) == 0) {
+            loc = NULL;
+        }
+
+        if (loc != NULL) {
+            /* Only set the locale temporarilty the LC_CTYPE locale
+               if LC_NUMERIC locale is different than LC_CTYPE locale and
+               decimal_point and/or thousands_sep are non-ASCII or longer than
+               1 byte */
+            setlocale(LC_CTYPE, loc);
+        }
+    }
+
+    if (decimal_point != NULL) {
+        *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
+        if (*decimal_point == NULL) {
+            goto error;
+        }
+    }
+    if (thousands_sep != NULL) {
+        *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
+        if (*thousands_sep == NULL) {
+            goto error;
+        }
+    }
+
+    if (grouping != NULL) {
+        *grouping = lc->grouping;
+    }
+
+    res = 0;
+
+error:
+    if (loc != NULL) {
+        setlocale(LC_CTYPE, oldloc);
+    }
+    PyMem_Free(oldloc);
+    return res;
+}
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index 397ae7faafc..71e673d9f83 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -704,18 +704,11 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
 {
     switch (type) {
     case LT_CURRENT_LOCALE: {
-        struct lconv *locale_data = localeconv();
-        locale_info->decimal_point = PyUnicode_DecodeLocale(
-                                         locale_data->decimal_point,
-                                         NULL);
-        if (locale_info->decimal_point == NULL)
+        if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
+                                     &locale_info->thousands_sep,
+                                     &locale_info->grouping) < 0) {
             return -1;
-        locale_info->thousands_sep = PyUnicode_DecodeLocale(
-                                         locale_data->thousands_sep,
-                                         NULL);
-        if (locale_info->thousands_sep == NULL)
-            return -1;
-        locale_info->grouping = locale_data->grouping;
+        }
         break;
     }
     case LT_DEFAULT_LOCALE:



More information about the Python-checkins mailing list