[Python-checkins] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)

Victor Stinner webhook-mailer at python.org
Wed Aug 29 03:58:22 EDT 2018


https://github.com/python/cpython/commit/315877dc361d554bec34b4b62c270479ad36a1be
commit: 315877dc361d554bec34b4b62c270479ad36a1be
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2018-08-29T09:58:12+02:00
summary:

bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)

Standard streams like sys.stdout now use the "surrogateescape" error
handler, instead of "strict", on the POSIX locale (when the C locale is not
coerced and the UTF-8 Mode is disabled).

Add tests on sys.stdout.errors with LC_ALL=POSIX.

files:
A Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
M Lib/test/test_sys.py
M Python/pylifecycle.c

diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 005c82d13dc7..f3dd3bb67b38 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -654,10 +654,10 @@ def test_getfilesystemencoding(self):
             expected = None
         self.check_fsencoding(fs_encoding, expected)
 
-    def c_locale_get_error_handler(self, isolated=False, encoding=None):
+    def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
         # Force the POSIX locale
         env = os.environ.copy()
-        env["LC_ALL"] = "C"
+        env["LC_ALL"] = locale
         env["PYTHONCOERCECLOCALE"] = "0"
         code = '\n'.join((
             'import sys',
@@ -683,44 +683,50 @@ def c_locale_get_error_handler(self, isolated=False, encoding=None):
         stdout, stderr = p.communicate()
         return stdout
 
-    def test_c_locale_surrogateescape(self):
-        out = self.c_locale_get_error_handler(isolated=True)
+    def check_locale_surrogateescape(self, locale):
+        out = self.c_locale_get_error_handler(locale, isolated=True)
         self.assertEqual(out,
                          'stdin: surrogateescape\n'
                          'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
 
         # replace the default error handler
-        out = self.c_locale_get_error_handler(encoding=':ignore')
+        out = self.c_locale_get_error_handler(locale, encoding=':ignore')
         self.assertEqual(out,
                          'stdin: ignore\n'
                          'stdout: ignore\n'
                          'stderr: backslashreplace\n')
 
         # force the encoding
-        out = self.c_locale_get_error_handler(encoding='iso8859-1')
+        out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
         self.assertEqual(out,
                          'stdin: strict\n'
                          'stdout: strict\n'
                          'stderr: backslashreplace\n')
-        out = self.c_locale_get_error_handler(encoding='iso8859-1:')
+        out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
         self.assertEqual(out,
                          'stdin: strict\n'
                          'stdout: strict\n'
                          'stderr: backslashreplace\n')
 
         # have no any effect
-        out = self.c_locale_get_error_handler(encoding=':')
+        out = self.c_locale_get_error_handler(locale, encoding=':')
         self.assertEqual(out,
                          'stdin: surrogateescape\n'
                          'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
-        out = self.c_locale_get_error_handler(encoding='')
+        out = self.c_locale_get_error_handler(locale, encoding='')
         self.assertEqual(out,
                          'stdin: surrogateescape\n'
                          'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
 
+    def test_c_locale_surrogateescape(self):
+        self.check_locale_surrogateescape('C')
+
+    def test_posix_locale_surrogateescape(self):
+        self.check_locale_surrogateescape('POSIX')
+
     def test_implementation(self):
         # This test applies to all implementations equally.
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
new file mode 100644
index 000000000000..893e4f573f16
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst	
@@ -0,0 +1,3 @@
+Standard streams like sys.stdout now use the "surrogateescape" error
+handler, instead of "strict", on the POSIX locale (when the C locale is not
+coerced and the UTF-8 Mode is disabled).
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 8c77859209ab..33af06ec18b9 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -345,13 +345,13 @@ get_stdio_errors(void)
 {
     const char *ctype_loc = setlocale(LC_CTYPE, NULL);
     if (ctype_loc != NULL) {
-        /* "surrogateescape" is the default in the legacy C locale */
-        if (strcmp(ctype_loc, "C") == 0) {
+        /* surrogateescape is the default in the legacy C and POSIX locales */
+        if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
             return "surrogateescape";
         }
 
 #ifdef PY_COERCE_C_LOCALE
-        /* "surrogateescape" is the default in locale coercion target locales */
+        /* surrogateescape is the default in locale coercion target locales */
         const _LocaleCoercionTarget *target = NULL;
         for (target = _TARGET_LOCALES; target->locale_name; target++) {
             if (strcmp(ctype_loc, target->locale_name) == 0) {
@@ -1791,16 +1791,29 @@ init_sys_streams(PyInterpreterState *interp)
             if (err) {
                 *err = '\0';
                 err++;
-                if (*err && !errors) {
-                    errors = err;
+                if (!err[0]) {
+                    err = NULL;
                 }
             }
-            if (!encoding && *pythonioencoding) {
-                encoding = pythonioencoding;
-                if (!errors) {
-                    errors = "strict";
+
+            /* Does PYTHONIOENCODING contain an encoding? */
+            if (pythonioencoding[0]) {
+                if (!encoding) {
+                    encoding = pythonioencoding;
+                }
+
+                /* If the encoding is set but not the error handler,
+                   use "strict" error handler by default.
+                   PYTHONIOENCODING=latin1 behaves as
+                   PYTHONIOENCODING=latin1:strict. */
+                if (!err) {
+                    err = "strict";
                 }
             }
+
+            if (!errors && err != NULL) {
+                errors = err;
+            }
         }
 
         if (interp->core_config.utf8_mode) {



More information about the Python-checkins mailing list