[pypy-commit] pypy unicode-utf8-py3: fix one _codecs test

Sat Jun 30 22:48:23 EDT 2018

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94788:d1d68fcf4b34
Date: 2018-06-30 19:33 -0700
http://bitbucket.org/pypy/pypy/changeset/d1d68fcf4b34/

Log:	fix one _codecs test

diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -3,10 +3,8 @@
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib import runicode
-from rpython.rlib.runicode import (
-    code_to_unichr, MAXUNICODE,
-    raw_unicode_escape_helper_unicode)
-from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
+from rpython.rlib.runicode import ( raw_unicode_escape_helper_unicode)
+from rpython.rlib import rutf8
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -96,24 +94,10 @@
         return call_errorhandler
 
     def make_decode_errorhandler(self, space):
-        errorhandler = self._make_errorhandler(space, True)
-        def decode_call_errorhandler(errors, encoding, reason, input,
-                                     startpos, endpos):
-            w_replace, newpos = errorhandler(errors, encoding, reason, input,
-                                             startpos, endpos)
-            return space.utf8_w(w_replace), newpos
-        return decode_call_errorhandler
+        return self._make_errorhandler(space, True)
 
     def make_encode_errorhandler(self, space):
-        errorhandler = self._make_errorhandler(space, False)
-        def encode_call_errorhandler(errors, encoding, reason, input,
-                                     startpos, endpos):
-            w_replace, newpos = errorhandler(errors, encoding, reason, input,
-                                             startpos, endpos)
-            if space.isinstance_w(w_replace, space.w_unicode):
-                return space.utf8_w(w_replace), None, newpos
-            return None, space.bytes_w(w_replace), newpos
-        return encode_call_errorhandler
+        return self._make_errorhandler(space, False)
 
     def get_unicodedata_handler(self, space):
         if self.unicodedata_handler:
@@ -336,9 +320,9 @@
             except KeyError:
                 raw_unicode_escape_helper_unicode(builder, oc)
             else:
-                builder.append(u'\\N{')
-                builder.append(unicode(name))
-                builder.append(u'}')
+                builder.append('\\N{')
+                builder.append(name)
+                builder.append('}')
             pos = rutf8.next_codepoint_pos(obj, pos)
         r = builder.build()
         lgt = rutf8.check_utf8(r, True)
@@ -662,14 +646,18 @@
 def make_utf_encoder_wrapper(name):
     rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
     func = _find_implementation(rname)
-    @unwrap_spec(uni=unicode, errors='text_or_none')
-    def wrap_encoder(space, uni, errors="strict"):
+    @unwrap_spec(errors='text_or_none')
+    def wrap_encoder(space, w_arg, errors="strict"):
+        from pypy.interpreter import unicodehelper
+
+        w_arg = unicodehelper.convert_arg_to_w_unicode(space, w_arg, rname)
         if errors is None:
             errors = 'strict'
         state = space.fromcache(CodecState)
-        result = func(uni, len(uni), errors, state.encode_error_handler,
+        utf8len = w_arg._length
+        result = func(w_arg._utf8, errors, state.encode_error_handler,
                       allow_surrogates=False)
-        return space.newtuple([space.newbytes(result), space.newint(len(uni))])
+        return space.newtuple([space.newbytes(result), space.newint(utf8len)])
     wrap_encoder.__name__ = func.__name__
     globals()[name] = wrap_encoder
 
@@ -750,8 +738,9 @@
 
 # utf-8 functions are not regular, because we have to pass
 # "allow_surrogates=False"
- at unwrap_spec(uni=unicode, errors='text_or_none')
-def utf_8_encode(space, uni, errors="strict"):
+ at unwrap_spec(errors='text_or_none')
+def utf_8_encode(space, w_obj, errors="strict"):
+    utf8, lgt = space.utf8_len_w(w_obj)
     if errors is None:
         errors = 'strict'
     state = space.fromcache(CodecState)
@@ -759,9 +748,9 @@
     # an @elidable function nowadays.  Instead, we need the _impl().
     # (The problem is the errorhandler, which calls arbitrary Python.)
     result = runicode.unicode_encode_utf_8_impl(
-        uni, len(uni), errors, state.encode_error_handler,
+        utf8, lgt, errors, state.encode_error_handler,
         allow_surrogates=False)
-    return space.newtuple([space.newbytes(result), space.newint(len(uni))])
+    return space.newtuple([space.newbytes(result), space.newint(lgt)])
 
 @unwrap_spec(string='bufferstr', errors='text_or_none',
              w_final = WrappedDefault(False))
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1234,6 +1234,15 @@
     w_encoded = encode_object(space, w_repr, 'ascii', 'backslashreplace')
     return decode_object(space, w_encoded, 'ascii', None)
 
+def unicode_from_string(space, w_bytes):
+    # this is a performance and bootstrapping hack
+    encoding = getdefaultencoding(space)
+    if encoding != 'ascii':
+        return unicode_from_encoded_object(space, w_bytes, encoding, "strict")
+    s = space.bytes_w(w_bytes)
+    unicodehelper.check_ascii_or_raise(space, s)
+    return W_UnicodeObject(s, len(s))
+
 
 class UnicodeDocstrings:
     """str(object='') -> str