[pypy-commit] pypy unicode-utf8: get enough plumbing to start running more tests

Sat Feb 25 10:18:43 EST 2017

Author: fijal
Branch: unicode-utf8
Changeset: r90354:6af99b57c74f
Date: 2017-02-25 16:18 +0100
http://bitbucket.org/pypy/pypy/changeset/6af99b57c74f/

Log:	get enough plumbing to start running more tests

diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -159,6 +159,7 @@
 
     def visit_utf8(self, el, app_sig):
         self.checked_space_method(el, app_sig)
+        self.orig_arg() # iterate
 
     def visit_nonnegint(self, el, app_sig):
         self.checked_space_method(el, app_sig)
@@ -607,6 +608,16 @@
                              "the name of an argument of the following "
                              "function" % (name,))
 
+    if kw_spec:
+        filtered = []
+        i = 0
+        while i < len(unwrap_spec):
+            elem = unwrap_spec[i]
+            filtered.append(elem)
+            if elem == 'utf8':
+                i += 1
+            i += 1
+        unwrap_spec = filtered
     return unwrap_spec
 
 
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -74,3 +74,8 @@
         uni, len(uni), "strict",
         errorhandler=raise_unicode_exception_encode,
         allow_surrogates=True)
+
+def utf8_encode_ascii(utf8, utf8len, errors, errorhandler):
+    if len(utf8) == utf8len:
+        return utf8
+    xxx
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -371,15 +371,16 @@
 from rpython.rlib import runicode
 
 def make_encoder_wrapper(name):
-    rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
-    assert hasattr(runicode, rname)
-    @unwrap_spec(uni='utf8', errors='str_or_None')
+    rname = "utf8_encode_%s" % (name.replace("_encode", ""), )
+    @unwrap_spec(utf8='utf8', errors='str_or_None')
     def wrap_encoder(space, utf8, utf8len, errors="strict"):
+        from pypy.interpreter import unicodehelper
+
         if errors is None:
             errors = 'strict'
         state = space.fromcache(CodecState)
-        func = getattr(runicode, rname)
-        result = func(utf8, len(utf8), utf8len,
+        func = getattr(unicodehelper, rname)
+        result = func(utf8, utf8len,
             errors, state.encode_error_handler)
         return space.newtuple([space.newbytes(result), space.newint(utf8len)])
     wrap_encoder.func_name = rname
@@ -438,10 +439,11 @@
 
 # utf-8 functions are not regular, because we have to pass
 # "allow_surrogates=True"
- at unwrap_spec(uni=unicode, errors='str_or_None')
-def utf_8_encode(space, uni, errors="strict"):
+ at unwrap_spec(utf8='utf8', errors='str_or_None')
+def utf_8_encode(space, utf8, utf8len, errors="strict"):
     if errors is None:
         errors = 'strict'
+    xxx
     state = space.fromcache(CodecState)
     # NB. can't call unicode_encode_utf_8() directly because that's
     # an @elidable function nowadays.  Instead, we need the _impl().
@@ -605,8 +607,9 @@
         final, state.decode_error_handler, mapping)
     return space.newtuple([space.newunicode(result), space.newint(consumed)])
 
- at unwrap_spec(uni=unicode, errors='str_or_None')
-def charmap_encode(space, uni, errors="strict", w_mapping=None):
+ at unwrap_spec(utf8='utf8', errors='str_or_None')
+def charmap_encode(space, utf8, utf8len, errors="strict", w_mapping=None):
+    xxx
     if errors is None:
         errors = 'strict'
     if space.is_none(w_mapping):
@@ -621,9 +624,10 @@
     return space.newtuple([space.newbytes(result), space.newint(len(uni))])
 
 
- at unwrap_spec(chars=unicode)
-def charmap_build(space, chars):
+ at unwrap_spec(chars='utf8')
+def charmap_build(space, chars, charslen):
     # XXX CPython sometimes uses a three-level trie
+    xxx
     w_charmap = space.newdict()
     for num in range(len(chars)):
         elem = chars[num]
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -4,7 +4,7 @@
     compute_hash, compute_unique_id, import_from_mixin,
     enforceargs, newlist_hint)
 from rpython.rlib.buffer import StringBuffer
-from rpython.rlib.rstring import StringBuilder, split, rsplit
+from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder
 from rpython.rlib.runicode import (
     make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
     unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii)
@@ -111,7 +111,7 @@
         return space.newint(ord(self._value[0]))
 
     def _new(self, value):
-        return W_UnicodeObject(value.encode('utf8', len(value)))
+        return W_UnicodeObject(value.encode('utf8'), len(value))
 
     def _new_from_list(self, value):
         xxx
@@ -153,7 +153,7 @@
         assert len(char) == 1
         return char[0]
 
-    _builder = StringBuilder
+    _builder = UnicodeBuilder
 
     def _isupper(self, ch):
         return unicodedb.isupper(ord(ch))
@@ -411,7 +411,8 @@
 
     def descr_islower(self, space):
         cased = False
-        for uchar in self._value:
+        val = self._val(space)
+        for uchar in val:
             if (unicodedb.isupper(ord(uchar)) or
                 unicodedb.istitle(ord(uchar))):
                 return space.w_False
@@ -421,7 +422,7 @@
 
     def descr_isupper(self, space):
         cased = False
-        for uchar in self._value:
+        for uchar in self._val(space):
             if (unicodedb.islower(ord(uchar)) or
                 unicodedb.istitle(ord(uchar))):
                 return space.w_False
@@ -466,7 +467,7 @@
             lgt += w_u._length
             prealloc_size += len(unwrapped[i])
 
-        sb = self._builder(prealloc_size)
+        sb = StringBuilder(prealloc_size)
         for i in range(size):
             if value and i != 0:
                 sb.append(value)
@@ -508,7 +509,7 @@
         if num_zeros <= 0:
             # cannot return self, in case it is a subclass of str
             return W_UnicodeObject(selfval, self._len())
-        builder = self._builder(num_zeros + len(selfval))
+        builder = StringBuilder(num_zeros + len(selfval))
         if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'):
             # copy sign to first position
             builder.append(selfval[0])
@@ -569,9 +570,6 @@
 
         return W_UnicodeObject(centered, self._len() + d)
 
-    def descr_title(self, space):
-        return 
-
 def wrapunicode(space, uni):
     return W_UnicodeObject(uni)