[Python-checkins] cpython (3.3): support encoding error handlers that return bytes (closes #16585)

benjamin.peterson python-checkins at python.org
Sun Dec 2 17:21:15 CET 2012


http://hg.python.org/cpython/rev/5c88c72dec60
changeset:   80691:5c88c72dec60
branch:      3.3
parent:      80689:b1db531736a3
user:        Benjamin Peterson <benjamin at python.org>
date:        Sun Dec 02 11:20:28 2012 -0500
summary:
  support encoding error handlers that return bytes (closes #16585)

files:
  Lib/test/test_multibytecodec.py    |  4 ++++
  Misc/NEWS                          |  3 +++
  Modules/cjkcodecs/multibytecodec.c |  8 ++++++--
  3 files changed, 13 insertions(+), 2 deletions(-)


diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -45,6 +45,10 @@
         self.assertRaises(IndexError, dec,
                           b'apple\x92ham\x93spam', 'test.cjktest')
 
+    def test_errorhandler_returns_bytes(self):
+        enc = "\u30fb\udc80".encode('gb18030', 'surrogateescape')
+        self.assertEqual(enc, b'\x819\xa79\x80')
+
     def test_codingspec(self):
         try:
             for enc in ALL_CJKENCODINGS:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -98,6 +98,9 @@
 Library
 -------
 
+- Issue #16585: Make CJK encoders support error handlers that return bytes per
+  PEP 383.
+
 - Issue #10182: The re module doesn't truncate indices to 32 bits anymore.
   Patch by Serhiy Storchaka.
 
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -316,7 +316,7 @@
         goto errorexit;
 
     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
-        !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
+        (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
         PyErr_SetString(PyExc_TypeError,
                         "encoding error handler must return "
@@ -324,7 +324,7 @@
         goto errorexit;
     }
 
-    {
+    if (PyUnicode_Check(tobj)) {
         const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
 
         retstr = multibytecodec_encode(codec, state, &uraw,
@@ -333,6 +333,10 @@
         if (retstr == NULL)
             goto errorexit;
     }
+    else {
+        Py_INCREF(tobj);
+        retstr = tobj;
+    }
 
     assert(PyBytes_Check(retstr));
     retstrsize = PyBytes_GET_SIZE(retstr);

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list