[pypy-commit] pypy unicode-utf8-py3: fix one _codecs test
mattip
pypy.commits at gmail.com
Sat Jun 30 22:48:23 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94788:d1d68fcf4b34
Date: 2018-06-30 19:33 -0700
http://bitbucket.org/pypy/pypy/changeset/d1d68fcf4b34/
Log: fix one _codecs test
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -3,10 +3,8 @@
from rpython.rlib.objectmodel import we_are_translated, not_rpython
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
from rpython.rlib import runicode
-from rpython.rlib.runicode import (
- code_to_unichr, MAXUNICODE,
- raw_unicode_escape_helper_unicode)
-from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
+from rpython.rlib.runicode import ( raw_unicode_escape_helper_unicode)
+from rpython.rlib import rutf8
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -96,24 +94,10 @@
return call_errorhandler
def make_decode_errorhandler(self, space):
- errorhandler = self._make_errorhandler(space, True)
- def decode_call_errorhandler(errors, encoding, reason, input,
- startpos, endpos):
- w_replace, newpos = errorhandler(errors, encoding, reason, input,
- startpos, endpos)
- return space.utf8_w(w_replace), newpos
- return decode_call_errorhandler
+ return self._make_errorhandler(space, True)
def make_encode_errorhandler(self, space):
- errorhandler = self._make_errorhandler(space, False)
- def encode_call_errorhandler(errors, encoding, reason, input,
- startpos, endpos):
- w_replace, newpos = errorhandler(errors, encoding, reason, input,
- startpos, endpos)
- if space.isinstance_w(w_replace, space.w_unicode):
- return space.utf8_w(w_replace), None, newpos
- return None, space.bytes_w(w_replace), newpos
- return encode_call_errorhandler
+ return self._make_errorhandler(space, False)
def get_unicodedata_handler(self, space):
if self.unicodedata_handler:
@@ -336,9 +320,9 @@
except KeyError:
raw_unicode_escape_helper_unicode(builder, oc)
else:
- builder.append(u'\\N{')
- builder.append(unicode(name))
- builder.append(u'}')
+ builder.append('\\N{')
+ builder.append(name)
+ builder.append('}')
pos = rutf8.next_codepoint_pos(obj, pos)
r = builder.build()
lgt = rutf8.check_utf8(r, True)
@@ -662,14 +646,18 @@
def make_utf_encoder_wrapper(name):
rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
func = _find_implementation(rname)
- @unwrap_spec(uni=unicode, errors='text_or_none')
- def wrap_encoder(space, uni, errors="strict"):
+ @unwrap_spec(errors='text_or_none')
+ def wrap_encoder(space, w_arg, errors="strict"):
+ from pypy.interpreter import unicodehelper
+
+ w_arg = unicodehelper.convert_arg_to_w_unicode(space, w_arg, rname)
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
- result = func(uni, len(uni), errors, state.encode_error_handler,
+ utf8len = w_arg._length
+ result = func(w_arg._utf8, errors, state.encode_error_handler,
allow_surrogates=False)
- return space.newtuple([space.newbytes(result), space.newint(len(uni))])
+ return space.newtuple([space.newbytes(result), space.newint(utf8len)])
wrap_encoder.__name__ = func.__name__
globals()[name] = wrap_encoder
@@ -750,8 +738,9 @@
# utf-8 functions are not regular, because we have to pass
# "allow_surrogates=False"
- at unwrap_spec(uni=unicode, errors='text_or_none')
-def utf_8_encode(space, uni, errors="strict"):
+ at unwrap_spec(errors='text_or_none')
+def utf_8_encode(space, w_obj, errors="strict"):
+ utf8, lgt = space.utf8_len_w(w_obj)
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
@@ -759,9 +748,9 @@
# an @elidable function nowadays. Instead, we need the _impl().
# (The problem is the errorhandler, which calls arbitrary Python.)
result = runicode.unicode_encode_utf_8_impl(
- uni, len(uni), errors, state.encode_error_handler,
+ utf8, lgt, errors, state.encode_error_handler,
allow_surrogates=False)
- return space.newtuple([space.newbytes(result), space.newint(len(uni))])
+ return space.newtuple([space.newbytes(result), space.newint(lgt)])
@unwrap_spec(string='bufferstr', errors='text_or_none',
w_final = WrappedDefault(False))
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1234,6 +1234,15 @@
w_encoded = encode_object(space, w_repr, 'ascii', 'backslashreplace')
return decode_object(space, w_encoded, 'ascii', None)
+def unicode_from_string(space, w_bytes):
+ # this is a performance and bootstrapping hack
+ encoding = getdefaultencoding(space)
+ if encoding != 'ascii':
+ return unicode_from_encoded_object(space, w_bytes, encoding, "strict")
+ s = space.bytes_w(w_bytes)
+ unicodehelper.check_ascii_or_raise(space, s)
+ return W_UnicodeObject(s, len(s))
+
class UnicodeDocstrings:
"""str(object='') -> str
More information about the pypy-commit
mailing list