[Python-checkins] bpo-28604: Fix localeconv() for different LC_MONETARY (GH-10606)

Victor Stinner webhook-mailer at python.org
Tue Nov 20 10:20:29 EST 2018


https://github.com/python/cpython/commit/02e6bf7f2025cddcbde6432f6b6396198ab313f4
commit: 02e6bf7f2025cddcbde6432f6b6396198ab313f4
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2018-11-20T16:20:16+01:00
summary:

bpo-28604: Fix localeconv() for different LC_MONETARY (GH-10606)

locale.localeconv() now sets temporarily the LC_CTYPE locale to the
LC_MONETARY locale if the two locales are different and monetary
strings are non-ASCII. This temporary change affects other threads.

Changes:

* locale.localeconv() can now set LC_CTYPE to LC_MONETARY to decode
  monetary fields.
* Add LocaleInfo.grouping_buffer: copy localeconv() grouping string
  since it can be replaced anytime if a different thread calls
  localeconv().
* _Py_GetLocaleconvNumeric() now requires a "struct lconv *"
  structure, so locale.localeconv() now longer calls localeconv()
  twice. Moreover, the function now requires all arguments to be
  non-NULL.
* Rename STATIC_LOCALE_INFO_INIT to LocaleInfo_STATIC_INIT.
* Move _Py_GetLocaleconvNumeric() definition from fileutils.h
  to pycore_fileutils.h. pycore_fileutils.h now includes locale.h.
* The _locale module is now built with Py_BUILD_CORE defined.

files:
A Misc/NEWS.d/next/Library/2018-11-20-13-34-01.bpo-28604.iiih5h.rst
M Doc/library/locale.rst
M Include/fileutils.h
M Include/internal/pycore_fileutils.h
M Modules/Setup
M Modules/_localemodule.c
M Python/fileutils.c
M Python/formatter_unicode.c

diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst
index 2fd44fe8e90a..bf57a0835591 100644
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@@ -148,10 +148,8 @@ The :mod:`locale` module defines the following exception and functions:
    +--------------+-----------------------------------------+
 
    The function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC``
-   locale to decode ``decimal_point`` and ``thousands_sep`` byte strings if
-   they are non-ASCII or longer than 1 byte, and the ``LC_NUMERIC`` locale is
-   different than the ``LC_CTYPE`` locale. This temporary change affects other
-   threads.
+   locale or the ``LC_MONETARY`` locale if locales are different and numeric or
+   monetary strings are non-ASCII. This temporary change affects other threads.
 
    .. versionchanged:: 3.7
       The function now sets temporarily the ``LC_CTYPE`` locale to the
diff --git a/Include/fileutils.h b/Include/fileutils.h
index fdd60fffcd55..830e56ad367a 100644
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@@ -170,11 +170,6 @@ PyAPI_FUNC(int) _Py_get_blocking(int fd);
 PyAPI_FUNC(int) _Py_set_blocking(int fd, int blocking);
 #endif   /* !MS_WINDOWS */
 
-PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
-    PyObject **decimal_point,
-    PyObject **thousands_sep,
-    const char **grouping);
-
 #endif   /* Py_LIMITED_API */
 
 #ifdef __cplusplus
diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h
index d577e099d1f5..98eb258fe61e 100644
--- a/Include/internal/pycore_fileutils.h
+++ b/Include/internal/pycore_fileutils.h
@@ -8,6 +8,8 @@ extern "C" {
 #  error "Py_BUILD_CORE must be defined to include this header"
 #endif
 
+#include <locale.h>   /* struct lconv */
+
 PyAPI_FUNC(int) _Py_DecodeUTF8Ex(
     const char *arg,
     Py_ssize_t arglen,
@@ -30,6 +32,11 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
 
 PyAPI_FUNC(int) _Py_GetForceASCII(void);
 
+PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
+    struct lconv *lc,
+    PyObject **decimal_point,
+    PyObject **thousands_sep);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Misc/NEWS.d/next/Library/2018-11-20-13-34-01.bpo-28604.iiih5h.rst b/Misc/NEWS.d/next/Library/2018-11-20-13-34-01.bpo-28604.iiih5h.rst
new file mode 100644
index 000000000000..289e484c35d6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-11-20-13-34-01.bpo-28604.iiih5h.rst
@@ -0,0 +1,3 @@
+:func:`locale.localeconv` now sets temporarily the ``LC_CTYPE`` locale to the
+``LC_MONETARY`` locale if the two locales are different and monetary strings
+are non-ASCII. This temporary change affects other threads.
diff --git a/Modules/Setup b/Modules/Setup
index e7b939d55182..11ddd0c7b202 100644
--- a/Modules/Setup
+++ b/Modules/Setup
@@ -120,7 +120,7 @@ time -DPy_BUILD_CORE -I$(srcdir)/Include/internal timemodule.c	# -lm # time oper
 _thread -DPy_BUILD_CORE -I$(srcdir)/Include/internal _threadmodule.c	# low-level threading interface
 
 # access to ISO C locale support
-_locale _localemodule.c  # -lintl
+_locale -DPy_BUILD_CORE _localemodule.c  # -lintl
 
 # Standard I/O baseline
 _io -DPy_BUILD_CORE -I$(srcdir)/Include/internal -I$(srcdir)/Modules/_io _io/_iomodule.c _io/iobase.c _io/fileio.c _io/bytesio.c _io/bufferedio.c _io/textio.c _io/stringio.c
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 3fdbc5ea8122..4202cc401414 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -11,6 +11,7 @@ This software comes with no warranty. Use at your own risk.
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
+#include "pycore_fileutils.h"
 
 #include <stdio.h>
 #include <locale.h>
@@ -128,6 +129,82 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
     return result_object;
 }
 
+static int
+locale_is_ascii(const char *str)
+{
+    return (strlen(str) == 1 && ((unsigned char)str[0]) <= 127);
+}
+
+static int
+locale_decode_monetary(PyObject *dict, struct lconv *lc)
+{
+    int change_locale;
+    change_locale = (!locale_is_ascii(lc->int_curr_symbol)
+                     || !locale_is_ascii(lc->currency_symbol)
+                     || !locale_is_ascii(lc->mon_decimal_point)
+                     || !locale_is_ascii(lc->mon_thousands_sep));
+
+    /* Keep a copy of the LC_CTYPE locale */
+    char *oldloc = NULL, *loc = NULL;
+    if (change_locale) {
+        oldloc = setlocale(LC_CTYPE, NULL);
+        if (!oldloc) {
+            PyErr_SetString(PyExc_RuntimeWarning,
+                            "failed to get LC_CTYPE locale");
+            return -1;
+        }
+
+        oldloc = _PyMem_Strdup(oldloc);
+        if (!oldloc) {
+            PyErr_NoMemory();
+            return -1;
+        }
+
+        loc = setlocale(LC_MONETARY, NULL);
+        if (loc != NULL && strcmp(loc, oldloc) == 0) {
+            loc = NULL;
+        }
+
+        if (loc != NULL) {
+            /* Only set the locale temporarily the LC_CTYPE locale
+               to the LC_MONETARY locale if the two locales are different and
+               at least one string is non-ASCII. */
+            setlocale(LC_CTYPE, loc);
+        }
+    }
+
+    int res = -1;
+
+#define RESULT_STRING(ATTR) \
+    do { \
+        PyObject *obj; \
+        obj = PyUnicode_DecodeLocale(lc->ATTR, NULL); \
+        if (obj == NULL) { \
+            goto done; \
+        } \
+        if (PyDict_SetItemString(dict, Py_STRINGIFY(ATTR), obj) < 0) { \
+            Py_DECREF(obj); \
+            goto done; \
+        } \
+        Py_DECREF(obj); \
+    } while (0)
+
+    RESULT_STRING(int_curr_symbol);
+    RESULT_STRING(currency_symbol);
+    RESULT_STRING(mon_decimal_point);
+    RESULT_STRING(mon_thousands_sep);
+#undef RESULT_STRING
+
+    res = 0;
+
+done:
+    if (loc != NULL) {
+        setlocale(LC_CTYPE, oldloc);
+    }
+    PyMem_Free(oldloc);
+    return res;
+}
+
 PyDoc_STRVAR(localeconv__doc__,
 "() -> dict. Returns numeric and monetary locale-specific parameters.");
 
@@ -135,7 +212,7 @@ static PyObject*
 PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored))
 {
     PyObject* result;
-    struct lconv *l;
+    struct lconv *lc;
     PyObject *x;
 
     result = PyDict_New();
@@ -144,7 +221,7 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored))
     }
 
     /* if LC_NUMERIC is different in the C library, use saved value */
-    l = localeconv();
+    lc = localeconv();
 
     /* hopefully, the localeconv result survives the C library calls
        involved herein */
@@ -162,22 +239,21 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored))
 
 #define RESULT_STRING(s)\
     do { \
-        x = PyUnicode_DecodeLocale(l->s, NULL); \
+        x = PyUnicode_DecodeLocale(lc->s, NULL); \
         RESULT(#s, x); \
     } while (0)
 
 #define RESULT_INT(i)\
     do { \
-        x = PyLong_FromLong(l->i); \
+        x = PyLong_FromLong(lc->i); \
         RESULT(#i, x); \
     } while (0)
 
-    /* Monetary information */
-    RESULT_STRING(int_curr_symbol);
-    RESULT_STRING(currency_symbol);
-    RESULT_STRING(mon_decimal_point);
-    RESULT_STRING(mon_thousands_sep);
-    x = copy_grouping(l->mon_grouping);
+    /* Monetary information: LC_MONETARY encoding */
+    if (locale_decode_monetary(result, lc) < 0) {
+        goto failed;
+    }
+    x = copy_grouping(lc->mon_grouping);
     RESULT("mon_grouping", x);
 
     RESULT_STRING(positive_sign);
@@ -191,12 +267,9 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored))
     RESULT_INT(p_sign_posn);
     RESULT_INT(n_sign_posn);
 
-    /* Numeric information */
+    /* Numeric information: LC_NUMERIC encoding */
     PyObject *decimal_point, *thousands_sep;
-    const char *grouping;
-    if (_Py_GetLocaleconvNumeric(&decimal_point,
-                                 &thousands_sep,
-                                 &grouping) < 0) {
+    if (_Py_GetLocaleconvNumeric(lc, &decimal_point, &thousands_sep) < 0) {
         goto failed;
     }
 
@@ -213,7 +286,7 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored))
     }
     Py_DECREF(thousands_sep);
 
-    x = copy_grouping(grouping);
+    x = copy_grouping(lc->grouping);
     RESULT("grouping", x);
 
     return result;
@@ -221,6 +294,10 @@ PyLocale_localeconv(PyObject* self, PyObject *Py_UNUSED(ignored))
   failed:
     Py_DECREF(result);
     return NULL;
+
+#undef RESULT
+#undef RESULT_STRING
+#undef RESULT_INT
 }
 
 #if defined(HAVE_WCSCOLL)
diff --git a/Python/fileutils.c b/Python/fileutils.c
index c9a8e58dd122..033c2ff71b91 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -1868,22 +1868,17 @@ _Py_set_blocking(int fd, int blocking)
 
 
 int
-_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
-                         const char **grouping)
+_Py_GetLocaleconvNumeric(struct lconv *lc,
+                         PyObject **decimal_point, PyObject **thousands_sep)
 {
-    int res = -1;
-
-    struct lconv *lc = localeconv();
+    assert(decimal_point != NULL);
+    assert(thousands_sep != NULL);
 
     int change_locale = 0;
-    if (decimal_point != NULL &&
-        (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
-    {
+    if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
         change_locale = 1;
     }
-    if (thousands_sep != NULL &&
-        (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
-    {
+    if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
         change_locale = 1;
     }
 
@@ -1892,7 +1887,8 @@ _Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
     if (change_locale) {
         oldloc = setlocale(LC_CTYPE, NULL);
         if (!oldloc) {
-            PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale");
+            PyErr_SetString(PyExc_RuntimeWarning,
+                            "failed to get LC_CTYPE locale");
             return -1;
         }
 
@@ -1908,7 +1904,7 @@ _Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
         }
 
         if (loc != NULL) {
-            /* Only set the locale temporarilty the LC_CTYPE locale
+            /* Only set the locale temporarily the LC_CTYPE locale
                if LC_NUMERIC locale is different than LC_CTYPE locale and
                decimal_point and/or thousands_sep are non-ASCII or longer than
                1 byte */
@@ -1916,26 +1912,21 @@ _Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
         }
     }
 
-    if (decimal_point != NULL) {
-        *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
-        if (*decimal_point == NULL) {
-            goto error;
-        }
-    }
-    if (thousands_sep != NULL) {
-        *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
-        if (*thousands_sep == NULL) {
-            goto error;
-        }
+    int res = -1;
+
+    *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
+    if (*decimal_point == NULL) {
+        goto done;
     }
 
-    if (grouping != NULL) {
-        *grouping = lc->grouping;
+    *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
+    if (*thousands_sep == NULL) {
+        goto done;
     }
 
     res = 0;
 
-error:
+done:
     if (loc != NULL) {
         setlocale(LC_CTYPE, oldloc);
     }
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index ba09cc67becf..e12ba49bd2c1 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -3,6 +3,7 @@
    of int.__float__, etc., that take and return unicode objects */
 
 #include "Python.h"
+#include "pycore_fileutils.h"
 #include <locale.h>
 
 /* Raises an exception about an unknown presentation type for this
@@ -396,9 +397,10 @@ typedef struct {
     PyObject *decimal_point;
     PyObject *thousands_sep;
     const char *grouping;
+    char *grouping_buffer;
 } LocaleInfo;
 
-#define STATIC_LOCALE_INFO_INIT {0, 0, 0}
+#define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
 
 /* describes the layout for an integer, see the comment in
    calc_number_widths() for details */
@@ -705,11 +707,22 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
 {
     switch (type) {
     case LT_CURRENT_LOCALE: {
-        if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
-                                     &locale_info->thousands_sep,
-                                     &locale_info->grouping) < 0) {
+        struct lconv *lc = localeconv();
+        if (_Py_GetLocaleconvNumeric(lc,
+                                     &locale_info->decimal_point,
+                                     &locale_info->thousands_sep) < 0) {
             return -1;
         }
+
+        /* localeconv() grouping can become a dangling pointer or point
+           to a different string if another thread calls localeconv() during
+           the string formatting. Copy the string to avoid this risk. */
+        locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
+        if (locale_info->grouping_buffer == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        locale_info->grouping = locale_info->grouping_buffer;
         break;
     }
     case LT_DEFAULT_LOCALE:
@@ -743,6 +756,7 @@ free_locale_info(LocaleInfo *locale_info)
 {
     Py_XDECREF(locale_info->decimal_point);
     Py_XDECREF(locale_info->thousands_sep);
+    PyMem_Free(locale_info->grouping_buffer);
 }
 
 /************************************************************************/
@@ -855,7 +869,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
 
     /* Locale settings, either from the actual locale or
        from a hard-code pseudo-locale */
-    LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
+    LocaleInfo locale = LocaleInfo_STATIC_INIT;
 
     /* no precision allowed on integers */
     if (format->precision != -1) {
@@ -1027,7 +1041,7 @@ format_float_internal(PyObject *value,
 
     /* Locale settings, either from the actual locale or
        from a hard-code pseudo-locale */
-    LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
+    LocaleInfo locale = LocaleInfo_STATIC_INIT;
 
     if (format->precision > INT_MAX) {
         PyErr_SetString(PyExc_ValueError, "precision too big");
@@ -1190,7 +1204,7 @@ format_complex_internal(PyObject *value,
 
     /* Locale settings, either from the actual locale or
        from a hard-code pseudo-locale */
-    LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
+    LocaleInfo locale = LocaleInfo_STATIC_INIT;
 
     if (format->precision > INT_MAX) {
         PyErr_SetString(PyExc_ValueError, "precision too big");



More information about the Python-checkins mailing list