[Python-checkins] r87054 - in python/branches/py3k: Doc/library/sys.rst Lib/test/test_cmd_line.py Misc/NEWS Python/sysmodule.c

victor.stinner python-checkins at python.org
Sat Dec 4 18:24:34 CET 2010


Author: victor.stinner
Date: Sat Dec  4 18:24:33 2010
New Revision: 87054

Log:
Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
UnicodeEncodeError.


Modified:
   python/branches/py3k/Doc/library/sys.rst
   python/branches/py3k/Lib/test/test_cmd_line.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Python/sysmodule.c

Modified: python/branches/py3k/Doc/library/sys.rst
==============================================================================
--- python/branches/py3k/Doc/library/sys.rst	(original)
+++ python/branches/py3k/Doc/library/sys.rst	Sat Dec  4 18:24:33 2010
@@ -99,13 +99,39 @@
 
 .. function:: displayhook(value)
 
-   If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves
-   it in ``builtins._``.
+   If *value* is not ``None``, this function prints ``repr(value)`` to
+   ``sys.stdout``, and saves *value* in ``builtins._``. If ``repr(value)`` is
+   not encodable to ``sys.stdout.encoding`` with ``sys.stdout.errors`` error
+   handler (which is probably ``'strict'``), encode it to
+   ``sys.stdout.encoding`` with ``'backslashreplace'`` error handler.
 
    ``sys.displayhook`` is called on the result of evaluating an :term:`expression`
    entered in an interactive Python session.  The display of these values can be
    customized by assigning another one-argument function to ``sys.displayhook``.
 
+   Pseudo-code::
+
+       def displayhook(value):
+           if value is None:
+               return
+           # Set '_' to None to avoid recursion
+           builtins._ = None
+           text = repr(value)
+           try:
+               sys.stdout.write(text)
+           except UnicodeEncodeError:
+               bytes = text.encode(sys.stdout.encoding, 'backslashreplace')
+               if hasattr(sys.stdout, 'buffer'):
+                   sys.stdout.buffer.write(bytes)
+               else:
+                   text = bytes.decode(sys.stdout.encoding, 'strict')
+                   sys.stdout.write(text)
+           sys.stdout.write("\n")
+           builtins._ = value
+
+   .. versionchanged:: 3.2
+      Use ``'backslashreplace'`` error handler on :exc:`UnicodeEncodeError`.
+
 
 .. function:: excepthook(type, value, traceback)
 

Modified: python/branches/py3k/Lib/test/test_cmd_line.py
==============================================================================
--- python/branches/py3k/Lib/test/test_cmd_line.py	(original)
+++ python/branches/py3k/Lib/test/test_cmd_line.py	Sat Dec  4 18:24:33 2010
@@ -221,6 +221,24 @@
         self.assertIn(path1.encode('ascii'), out)
         self.assertIn(path2.encode('ascii'), out)
 
+    def test_displayhook_unencodable(self):
+        for encoding in ('ascii', 'latin1', 'utf8'):
+            env = os.environ.copy()
+            env['PYTHONIOENCODING'] = encoding
+            p = subprocess.Popen(
+                [sys.executable, '-i'],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                env=env)
+            # non-ascii, surrogate, non-BMP printable, non-BMP unprintable
+            text = "a=\xe9 b=\uDC80 c=\U00010000 d=\U0010FFFF"
+            p.stdin.write(ascii(text).encode('ascii') + b"\n")
+            p.stdin.write(b'exit()\n')
+            data = kill_python(p)
+            escaped = repr(text).encode(encoding, 'backslashreplace')
+            self.assertIn(escaped, data)
+
 
 def test_main():
     test.support.run_unittest(CmdLineTest)

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Sat Dec  4 18:24:33 2010
@@ -49,6 +49,9 @@
 Library
 -------
 
+- Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
+  UnicodeEncodeError.
+
 - Add the "display" and "undisplay" pdb commands.
 
 - Issue #7245: Add a SIGINT handler in pdb that allows to break a program

Modified: python/branches/py3k/Python/sysmodule.c
==============================================================================
--- python/branches/py3k/Python/sysmodule.c	(original)
+++ python/branches/py3k/Python/sysmodule.c	Sat Dec  4 18:24:33 2010
@@ -65,6 +65,68 @@
         return PyDict_SetItemString(sd, name, v);
 }
 
+/* Write repr(o) to sys.stdout using sys.stdout.encoding and 'backslashreplace'
+   error handler. If sys.stdout has a buffer attribute, use
+   sys.stdout.buffer.write(encoded), otherwise redecode the string and use
+   sys.stdout.write(redecoded).
+
+   Helper function for sys_displayhook(). */
+static int
+sys_displayhook_unencodable(PyObject *outf, PyObject *o)
+{
+    PyObject *stdout_encoding = NULL;
+    PyObject *encoded, *escaped_str, *repr_str, *buffer, *result;
+    char *stdout_encoding_str;
+    int ret;
+
+    stdout_encoding = PyObject_GetAttrString(outf, "encoding");
+    if (stdout_encoding == NULL)
+        goto error;
+    stdout_encoding_str = _PyUnicode_AsString(stdout_encoding);
+    if (stdout_encoding_str == NULL)
+        goto error;
+
+    repr_str = PyObject_Repr(o);
+    if (repr_str == NULL)
+        goto error;
+    encoded = PyUnicode_AsEncodedString(repr_str,
+                                        stdout_encoding_str,
+                                        "backslashreplace");
+    Py_DECREF(repr_str);
+    if (encoded == NULL)
+        goto error;
+
+    buffer = PyObject_GetAttrString(outf, "buffer");
+    if (buffer) {
+        result = PyObject_CallMethod(buffer, "write", "(O)", encoded);
+        Py_DECREF(buffer);
+        Py_DECREF(encoded);
+        if (result == NULL)
+            goto error;
+        Py_DECREF(result);
+    }
+    else {
+        PyErr_Clear();
+        escaped_str = PyUnicode_FromEncodedObject(encoded,
+                                                  stdout_encoding_str,
+                                                  "strict");
+        Py_DECREF(encoded);
+        if (PyFile_WriteObject(escaped_str, outf, Py_PRINT_RAW) != 0) {
+            Py_DECREF(escaped_str);
+            goto error;
+        }
+        Py_DECREF(escaped_str);
+    }
+    ret = 0;
+    goto finally;
+
+error:
+    ret = -1;
+finally:
+    Py_XDECREF(stdout_encoding);
+    return ret;
+}
+
 static PyObject *
 sys_displayhook(PyObject *self, PyObject *o)
 {
@@ -72,6 +134,7 @@
     PyInterpreterState *interp = PyThreadState_GET()->interp;
     PyObject *modules = interp->modules;
     PyObject *builtins = PyDict_GetItemString(modules, "builtins");
+    int err;
 
     if (builtins == NULL) {
         PyErr_SetString(PyExc_RuntimeError, "lost builtins module");
@@ -92,8 +155,19 @@
         PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
         return NULL;
     }
-    if (PyFile_WriteObject(o, outf, 0) != 0)
-        return NULL;
+    if (PyFile_WriteObject(o, outf, 0) != 0) {
+        if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
+            /* repr(o) is not encodable to sys.stdout.encoding with
+             * sys.stdout.errors error handler (which is probably 'strict') */
+            PyErr_Clear();
+            err = sys_displayhook_unencodable(outf, o);
+            if (err)
+                return NULL;
+        }
+        else {
+            return NULL;
+        }
+    }
     if (PyFile_WriteString("\n", outf) != 0)
         return NULL;
     if (PyObject_SetAttrString(builtins, "_", o) != 0)


More information about the Python-checkins mailing list