[pypy-commit] pypy unicode-utf8: Revert some changes that are not needed any more here
arigo
pypy.commits at gmail.com
Sat Oct 14 05:43:23 EDT 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r92749:e6e4622c8bc2
Date: 2017-10-14 07:04 +0200
http://bitbucket.org/pypy/pypy/changeset/e6e4622c8bc2/
Log: Revert some changes that are not needed any more here
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -3,7 +3,7 @@
from rpython.translator.tool.cbuild import ExternalCompilationInfo
from rpython.translator import cdir
-UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'.encode("utf8")
+UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
class EncodeDecodeError(Exception):
@@ -148,17 +148,16 @@
if errors == "strict":
raise EncodeDecodeError(start, end, reason)
elif errors == "ignore":
- replace = ""
- lgt = 0
+ replace = u""
elif errors == "replace":
replace = UNICODE_REPLACEMENT_CHARACTER
- lgt = 1
else:
assert errorcb
- replace, end, lgt = errorcb(errors, namecb, reason,
+ replace, end = errorcb(errors, namecb, reason,
stringdata, start, end)
- with rffi.scoped_nonmoving_unicodebuffer(replace.decode("utf8")) as inbuf:
- r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, lgt, end)
+ # 'replace' is RPython unicode here
+ with rffi.scoped_nonmoving_unicodebuffer(replace) as inbuf:
+ r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
if r == MBERR_NOMEMORY:
raise MemoryError
@@ -257,7 +256,7 @@
replace = "?"
else:
assert errorcb
- retu, rets, end, lgt = errorcb(errors, namecb, reason,
+ retu, rets, end = errorcb(errors, namecb, reason,
unicodedata.encode("utf8"), start, end)
if rets is not None:
# py3k only
diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py
--- a/pypy/module/_multibytecodec/interp_incremental.py
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -96,24 +96,25 @@
c_codecs.pypy_cjk_enc_free(self.encodebuf)
self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO)
- @unwrap_spec(utf8object='utf8', final=bool)
- def encode_w(self, utf8object, objlen, final=False):
- object = utf8object.decode('utf8')
+ @unwrap_spec(object='utf8', final=bool)
+ def encode_w(self, object, final=False):
+ u_object = object.decode('utf8')
space = self.space
state = space.fromcache(CodecState)
if len(self.pending) > 0:
- object = self.pending + object
+ u_object = self.pending + u_object
try:
- output = c_codecs.encodeex(self.encodebuf, object, self.errors,
+ output = c_codecs.encodeex(self.encodebuf, u_object, self.errors,
state.encode_error_handler, self.name,
get_ignore_error(final))
except c_codecs.EncodeDecodeError as e:
- raise wrap_unicodeencodeerror(space, e, utf8object, self.name)
+ raise wrap_unicodeencodeerror(space, e, object, len(u_object),
+ self.name)
except RuntimeError:
raise wrap_runtimeerror(space)
pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf)
- assert 0 <= pos <= len(object)
- self.pending = object[pos:]
+ assert 0 <= pos <= len(u_object)
+ self.pending = u_object[pos:]
return space.newbytes(output)
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -28,20 +28,22 @@
space.newint(len(input))])
@unwrap_spec(input='utf8', errors="text_or_none")
- def encode(self, space, input, inputlen, errors=None):
+ def encode(self, space, input, errors=None):
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
#
+ u_input = input.decode('utf8')
try:
- output = c_codecs.encode(self.codec, input.decode('utf8'), errors,
+ output = c_codecs.encode(self.codec, u_input, errors,
state.encode_error_handler, self.name)
except c_codecs.EncodeDecodeError as e:
- raise wrap_unicodeencodeerror(space, e, input, self.name)
+ raise wrap_unicodeencodeerror(space, e, input, len(u_input),
+ self.name)
except RuntimeError:
raise wrap_runtimeerror(space)
return space.newtuple([space.newbytes(output),
- space.newint(inputlen)])
+ space.newint(len(u_input))])
MultibyteCodec.typedef = TypeDef(
@@ -71,12 +73,12 @@
space.newint(e.end),
space.newtext(e.reason)]))
-def wrap_unicodeencodeerror(space, e, input, name):
+def wrap_unicodeencodeerror(space, e, input, inputlen, name):
raise OperationError(
space.w_UnicodeEncodeError,
space.newtuple([
space.newtext(name),
- space.newutf8(input, -1),
+ space.newutf8(input, inputlen),
space.newint(e.start),
space.newint(e.end),
space.newtext(e.reason)]))
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -126,6 +126,6 @@
def test_encode_custom_error_handler_bytes():
c = getcodec("hz")
def errorhandler(errors, enc, msg, t, startingpos, endingpos):
- return None, '\xc3', endingpos, -1
+ return None, '\xc3', endingpos
s = encode(c, u'abc\u1234def', 'foo', errorhandler)
assert '\xc3' in s
More information about the pypy-commit
mailing list