[pypy-commit] pypy merge-2.7.2: CPython Issue #12100: Don't reset incremental encoders of CJK codecs at each call to encode().
amauryfa
noreply at buildbot.pypy.org
Sun Jan 22 22:35:50 CET 2012
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: merge-2.7.2
Changeset: r51668:0c697ef6b87f
Date: 2012-01-22 22:32 +0100
http://bitbucket.org/pypy/pypy/changeset/0c697ef6b87f/
Log: CPython Issue #12100: Don't reset incremental encoders of CJK codecs
at each call to encode().
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -230,14 +230,14 @@
if ignore_error == 0:
flags = MBENC_FLUSH | MBENC_RESET
else:
- flags = MBENC_RESET
+ flags = 0
while True:
r = pypy_cjk_enc_chunk(encodebuf, flags)
if r == 0 or r == ignore_error:
break
multibytecodec_encerror(encodebuf, r, errors,
errorcb, namecb, unicodedata)
- while True:
+ while flags & MBENC_RESET:
r = pypy_cjk_enc_reset(encodebuf)
if r == 0:
break
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py
--- a/pypy/module/_multibytecodec/test/test_app_incremental.py
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -129,6 +129,15 @@
r = e.encode(u"xyz\u5f95\u6c85")
assert r == 'xyz~{abcd~}'
+ def test_encode_hz_noreset(self):
+ text = (u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
+ u'Bye.')
+ out = ''
+ e = self.IncrementalHzEncoder()
+ for c in text:
+ out += e.encode(c)
+ assert out == b'~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.'
+
def test_encode_hz_error(self):
e = self.IncrementalHzEncoder()
raises(UnicodeEncodeError, e.encode, u"\u4321", True)
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -187,7 +187,7 @@
Py_ssize_t r;
Py_ssize_t inleft = (Py_ssize_t)(d->inbuf_end - d->inbuf);
Py_ssize_t outleft = (Py_ssize_t)(d->outbuf_end - d->outbuf);
- if (inleft == 0)
+ if (inleft == 0 && !(flags & MBENC_RESET))
return 0;
r = d->codec->encode(&d->state, d->codec->config,
&d->inbuf, inleft, &d->outbuf, outleft, flags);
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -84,6 +84,7 @@
#define MBERR_NOMEMORY (-4) /* out of memory */
#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
+#define MBENC_RESET 0x0002 /* reset after an encoding session */
#define MBENC_MAX MBENC_FLUSH
More information about the pypy-commit
mailing list