[pypy-commit] pypy default: errors="replace" in decode.
Armin Rigo
noreply at buildbot.pypy.org
Sun Jun 5 11:14:36 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r44709:ab73d694925f
Date: 2011-06-05 11:06 +0200
http://bitbucket.org/pypy/pypy/changeset/ab73d694925f/
Log: errors="replace" in decode.
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -104,7 +104,8 @@
pypy_cjk_dec_inbuf_consumed = llexternal('pypy_cjk_dec_inbuf_consumed',
[DECODEBUF_P], rffi.SSIZE_T)
pypy_cjk_dec_inbuf_add = llexternal('pypy_cjk_dec_inbuf_add',
- [DECODEBUF_P, rffi.SSIZE_T], lltype.Void)
+ [DECODEBUF_P, rffi.SSIZE_T, rffi.INT],
+ rffi.INT)
def decode(codec, stringdata, errors="strict"):
inleft = len(stringdata)
@@ -141,9 +142,13 @@
else:
raise RuntimeError
#
- # if errors == ERROR_REPLACE:...
- if errors == "ignore": # or errors == ERROR_REPLACE
- pypy_cjk_dec_inbuf_add(decodebuf, esize)
+ if errors == "ignore":
+ pypy_cjk_dec_inbuf_add(decodebuf, esize, 0)
+ return # continue decoding
+ if errors == "replace":
+ e = pypy_cjk_dec_inbuf_add(decodebuf, esize, 1)
+ if e == MBERR_NOMEMORY:
+ raise MemoryError
return # continue decoding
start = pypy_cjk_dec_inbuf_consumed(decodebuf)
end = start + esize
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -44,6 +44,14 @@
r = codec.decode("def~{}abc", 'ignore')
assert r == (u'def\u5fcf', 9)
+ def test_decode_hz_replace(self):
+ import _codecs_cn
+ codec = _codecs_cn.getcodec("hz")
+ r = codec.decode("def~{}abc", errors='replace')
+ assert r == (u'def\ufffd\u5fcf', 9)
+ r = codec.decode("def~{}abc", 'replace')
+ assert r == (u'def\ufffd\u5fcf', 9)
+
def test_encode_hz(self):
import _codecs_cn
codec = _codecs_cn.getcodec("hz")
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -41,6 +41,11 @@
u = decode(c, 'def~{}abc', 'ignore')
assert u == u'def\u5fcf'
+def test_decode_hz_replace():
+ c = getcodec("hz")
+ u = decode(c, 'def~{}abc', 'replace')
+ assert u == u'def\ufffd\u5fcf'
+
def test_encode_hz():
c = getcodec("hz")
s = encode(c, u'foobar')
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -1,6 +1,8 @@
#include <stdlib.h>
#include "src/cjkcodecs/multibytecodec.h"
+#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
+
struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
char *inbuf, Py_ssize_t inlen)
@@ -93,9 +95,18 @@
return d->inbuf - d->inbuf_start;
}
-void pypy_cjk_dec_inbuf_add(struct pypy_cjk_dec_s* d, Py_ssize_t skip)
+int pypy_cjk_dec_inbuf_add(struct pypy_cjk_dec_s* d, Py_ssize_t skip,
+ int add_replacement_character)
{
+ if (add_replacement_character)
+ {
+ if (d->outbuf >= d->outbuf_end)
+ if (expand_decodebuffer(d, 1) == -1)
+ return MBERR_NOMEMORY;
+ *d->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
+ }
d->inbuf += skip;
+ return 0;
}
/************************************************************/
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -102,7 +102,7 @@
Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d);
Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d);
-void pypy_cjk_dec_inbuf_add(struct pypy_cjk_dec_s*, Py_ssize_t);
+int pypy_cjk_dec_inbuf_add(struct pypy_cjk_dec_s*, Py_ssize_t, int);
struct pypy_cjk_enc_s {
const MultibyteCodec *codec;
More information about the pypy-commit
mailing list