[pypy-commit] pypy merge-2.7.2: CPython Issue #12100: Don't reset incremental encoders of CJK codecs at each call to encode().

amauryfa noreply at buildbot.pypy.org
Sun Jan 22 22:35:50 CET 2012


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: merge-2.7.2
Changeset: r51668:0c697ef6b87f
Date: 2012-01-22 22:32 +0100
http://bitbucket.org/pypy/pypy/changeset/0c697ef6b87f/

Log:	CPython Issue #12100: Don't reset incremental encoders of CJK codecs
	at each call to encode().

diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -230,14 +230,14 @@
         if ignore_error == 0:
             flags = MBENC_FLUSH | MBENC_RESET
         else:
-            flags = MBENC_RESET
+            flags = 0
         while True:
             r = pypy_cjk_enc_chunk(encodebuf, flags)
             if r == 0 or r == ignore_error:
                 break
             multibytecodec_encerror(encodebuf, r, errors,
                                     errorcb, namecb, unicodedata)
-        while True:
+        while flags & MBENC_RESET:
             r = pypy_cjk_enc_reset(encodebuf)
             if r == 0:
                 break
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py
--- a/pypy/module/_multibytecodec/test/test_app_incremental.py
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -129,6 +129,15 @@
         r = e.encode(u"xyz\u5f95\u6c85")
         assert r == 'xyz~{abcd~}'
 
+    def test_encode_hz_noreset(self):
+        text = (u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
+                u'Bye.')
+        out = ''
+        e = self.IncrementalHzEncoder()
+        for c in text:
+            out += e.encode(c)
+        assert out == b'~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.'
+
     def test_encode_hz_error(self):
         e = self.IncrementalHzEncoder()
         raises(UnicodeEncodeError, e.encode, u"\u4321", True)
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -187,7 +187,7 @@
       Py_ssize_t r;
       Py_ssize_t inleft = (Py_ssize_t)(d->inbuf_end - d->inbuf);
       Py_ssize_t outleft = (Py_ssize_t)(d->outbuf_end - d->outbuf);
-      if (inleft == 0)
+      if (inleft == 0 && !(flags & MBENC_RESET))
         return 0;
       r = d->codec->encode(&d->state, d->codec->config,
                            &d->inbuf, inleft, &d->outbuf, outleft, flags);
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -84,6 +84,7 @@
 #define MBERR_NOMEMORY          (-4) /* out of memory */
 
 #define MBENC_FLUSH             0x0001 /* encode all characters encodable */
+#define MBENC_RESET             0x0002 /* reset after an encoding session */
 #define MBENC_MAX               MBENC_FLUSH
 
 


More information about the pypy-commit mailing list