[Python-checkins] r87054 - in python/branches/py3k: Doc/library/sys.rst Lib/test/test_cmd_line.py Misc/NEWS Python/sysmodule.c
victor.stinner
python-checkins at python.org
Sat Dec 4 18:24:34 CET 2010
Author: victor.stinner
Date: Sat Dec 4 18:24:33 2010
New Revision: 87054
Log:
Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
UnicodeEncodeError.
Modified:
python/branches/py3k/Doc/library/sys.rst
python/branches/py3k/Lib/test/test_cmd_line.py
python/branches/py3k/Misc/NEWS
python/branches/py3k/Python/sysmodule.c
Modified: python/branches/py3k/Doc/library/sys.rst
==============================================================================
--- python/branches/py3k/Doc/library/sys.rst (original)
+++ python/branches/py3k/Doc/library/sys.rst Sat Dec 4 18:24:33 2010
@@ -99,13 +99,39 @@
.. function:: displayhook(value)
- If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves
- it in ``builtins._``.
+ If *value* is not ``None``, this function prints ``repr(value)`` to
+ ``sys.stdout``, and saves *value* in ``builtins._``. If ``repr(value)`` is
+ not encodable to ``sys.stdout.encoding`` with ``sys.stdout.errors`` error
+ handler (which is probably ``'strict'``), encode it to
+ ``sys.stdout.encoding`` with ``'backslashreplace'`` error handler.
``sys.displayhook`` is called on the result of evaluating an :term:`expression`
entered in an interactive Python session. The display of these values can be
customized by assigning another one-argument function to ``sys.displayhook``.
+ Pseudo-code::
+
+ def displayhook(value):
+ if value is None:
+ return
+ # Set '_' to None to avoid recursion
+ builtins._ = None
+ text = repr(value)
+ try:
+ sys.stdout.write(text)
+ except UnicodeEncodeError:
+ bytes = text.encode(sys.stdout.encoding, 'backslashreplace')
+ if hasattr(sys.stdout, 'buffer'):
+ sys.stdout.buffer.write(bytes)
+ else:
+ text = bytes.decode(sys.stdout.encoding, 'strict')
+ sys.stdout.write(text)
+ sys.stdout.write("\n")
+ builtins._ = value
+
+ .. versionchanged:: 3.2
+ Use ``'backslashreplace'`` error handler on :exc:`UnicodeEncodeError`.
+
.. function:: excepthook(type, value, traceback)
Modified: python/branches/py3k/Lib/test/test_cmd_line.py
==============================================================================
--- python/branches/py3k/Lib/test/test_cmd_line.py (original)
+++ python/branches/py3k/Lib/test/test_cmd_line.py Sat Dec 4 18:24:33 2010
@@ -221,6 +221,24 @@
self.assertIn(path1.encode('ascii'), out)
self.assertIn(path2.encode('ascii'), out)
+ def test_displayhook_unencodable(self):
+ for encoding in ('ascii', 'latin1', 'utf8'):
+ env = os.environ.copy()
+ env['PYTHONIOENCODING'] = encoding
+ p = subprocess.Popen(
+ [sys.executable, '-i'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ env=env)
+ # non-ascii, surrogate, non-BMP printable, non-BMP unprintable
+ text = "a=\xe9 b=\uDC80 c=\U00010000 d=\U0010FFFF"
+ p.stdin.write(ascii(text).encode('ascii') + b"\n")
+ p.stdin.write(b'exit()\n')
+ data = kill_python(p)
+ escaped = repr(text).encode(encoding, 'backslashreplace')
+ self.assertIn(escaped, data)
+
def test_main():
test.support.run_unittest(CmdLineTest)
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Sat Dec 4 18:24:33 2010
@@ -49,6 +49,9 @@
Library
-------
+- Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
+ UnicodeEncodeError.
+
- Add the "display" and "undisplay" pdb commands.
- Issue #7245: Add a SIGINT handler in pdb that allows to break a program
Modified: python/branches/py3k/Python/sysmodule.c
==============================================================================
--- python/branches/py3k/Python/sysmodule.c (original)
+++ python/branches/py3k/Python/sysmodule.c Sat Dec 4 18:24:33 2010
@@ -65,6 +65,68 @@
return PyDict_SetItemString(sd, name, v);
}
+/* Write repr(o) to sys.stdout using sys.stdout.encoding and 'backslashreplace'
+ error handler. If sys.stdout has a buffer attribute, use
+ sys.stdout.buffer.write(encoded), otherwise redecode the string and use
+ sys.stdout.write(redecoded).
+
+ Helper function for sys_displayhook(). */
+static int
+sys_displayhook_unencodable(PyObject *outf, PyObject *o)
+{
+ PyObject *stdout_encoding = NULL;
+ PyObject *encoded, *escaped_str, *repr_str, *buffer, *result;
+ char *stdout_encoding_str;
+ int ret;
+
+ stdout_encoding = PyObject_GetAttrString(outf, "encoding");
+ if (stdout_encoding == NULL)
+ goto error;
+ stdout_encoding_str = _PyUnicode_AsString(stdout_encoding);
+ if (stdout_encoding_str == NULL)
+ goto error;
+
+ repr_str = PyObject_Repr(o);
+ if (repr_str == NULL)
+ goto error;
+ encoded = PyUnicode_AsEncodedString(repr_str,
+ stdout_encoding_str,
+ "backslashreplace");
+ Py_DECREF(repr_str);
+ if (encoded == NULL)
+ goto error;
+
+ buffer = PyObject_GetAttrString(outf, "buffer");
+ if (buffer) {
+ result = PyObject_CallMethod(buffer, "write", "(O)", encoded);
+ Py_DECREF(buffer);
+ Py_DECREF(encoded);
+ if (result == NULL)
+ goto error;
+ Py_DECREF(result);
+ }
+ else {
+ PyErr_Clear();
+ escaped_str = PyUnicode_FromEncodedObject(encoded,
+ stdout_encoding_str,
+ "strict");
+ Py_DECREF(encoded);
+ if (PyFile_WriteObject(escaped_str, outf, Py_PRINT_RAW) != 0) {
+ Py_DECREF(escaped_str);
+ goto error;
+ }
+ Py_DECREF(escaped_str);
+ }
+ ret = 0;
+ goto finally;
+
+error:
+ ret = -1;
+finally:
+ Py_XDECREF(stdout_encoding);
+ return ret;
+}
+
static PyObject *
sys_displayhook(PyObject *self, PyObject *o)
{
@@ -72,6 +134,7 @@
PyInterpreterState *interp = PyThreadState_GET()->interp;
PyObject *modules = interp->modules;
PyObject *builtins = PyDict_GetItemString(modules, "builtins");
+ int err;
if (builtins == NULL) {
PyErr_SetString(PyExc_RuntimeError, "lost builtins module");
@@ -92,8 +155,19 @@
PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
return NULL;
}
- if (PyFile_WriteObject(o, outf, 0) != 0)
- return NULL;
+ if (PyFile_WriteObject(o, outf, 0) != 0) {
+ if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
+ /* repr(o) is not encodable to sys.stdout.encoding with
+ * sys.stdout.errors error handler (which is probably 'strict') */
+ PyErr_Clear();
+ err = sys_displayhook_unencodable(outf, o);
+ if (err)
+ return NULL;
+ }
+ else {
+ return NULL;
+ }
+ }
if (PyFile_WriteString("\n", outf) != 0)
return NULL;
if (PyObject_SetAttrString(builtins, "_", o) != 0)
More information about the Python-checkins
mailing list