[pypy-commit] pypy unicode-utf8: get enough plumbing to start running more tests
fijal
pypy.commits at gmail.com
Sat Feb 25 10:18:43 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r90354:6af99b57c74f
Date: 2017-02-25 16:18 +0100
http://bitbucket.org/pypy/pypy/changeset/6af99b57c74f/
Log: get enough plumbing to start running more tests
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -159,6 +159,7 @@
def visit_utf8(self, el, app_sig):
self.checked_space_method(el, app_sig)
+ self.orig_arg() # iterate
def visit_nonnegint(self, el, app_sig):
self.checked_space_method(el, app_sig)
@@ -607,6 +608,16 @@
"the name of an argument of the following "
"function" % (name,))
+ if kw_spec:
+ filtered = []
+ i = 0
+ while i < len(unwrap_spec):
+ elem = unwrap_spec[i]
+ filtered.append(elem)
+ if elem == 'utf8':
+ i += 1
+ i += 1
+ unwrap_spec = filtered
return unwrap_spec
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -74,3 +74,8 @@
uni, len(uni), "strict",
errorhandler=raise_unicode_exception_encode,
allow_surrogates=True)
+
+def utf8_encode_ascii(utf8, utf8len, errors, errorhandler):
+ if len(utf8) == utf8len:
+ return utf8
+ xxx
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -371,15 +371,16 @@
from rpython.rlib import runicode
def make_encoder_wrapper(name):
- rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
- assert hasattr(runicode, rname)
- @unwrap_spec(uni='utf8', errors='str_or_None')
+ rname = "utf8_encode_%s" % (name.replace("_encode", ""), )
+ @unwrap_spec(utf8='utf8', errors='str_or_None')
def wrap_encoder(space, utf8, utf8len, errors="strict"):
+ from pypy.interpreter import unicodehelper
+
if errors is None:
errors = 'strict'
state = space.fromcache(CodecState)
- func = getattr(runicode, rname)
- result = func(utf8, len(utf8), utf8len,
+ func = getattr(unicodehelper, rname)
+ result = func(utf8, utf8len,
errors, state.encode_error_handler)
return space.newtuple([space.newbytes(result), space.newint(utf8len)])
wrap_encoder.func_name = rname
@@ -438,10 +439,11 @@
# utf-8 functions are not regular, because we have to pass
# "allow_surrogates=True"
- at unwrap_spec(uni=unicode, errors='str_or_None')
-def utf_8_encode(space, uni, errors="strict"):
+ at unwrap_spec(utf8='utf8', errors='str_or_None')
+def utf_8_encode(space, utf8, utf8len, errors="strict"):
if errors is None:
errors = 'strict'
+ xxx
state = space.fromcache(CodecState)
# NB. can't call unicode_encode_utf_8() directly because that's
# an @elidable function nowadays. Instead, we need the _impl().
@@ -605,8 +607,9 @@
final, state.decode_error_handler, mapping)
return space.newtuple([space.newunicode(result), space.newint(consumed)])
- at unwrap_spec(uni=unicode, errors='str_or_None')
-def charmap_encode(space, uni, errors="strict", w_mapping=None):
+ at unwrap_spec(utf8='utf8', errors='str_or_None')
+def charmap_encode(space, utf8, utf8len, errors="strict", w_mapping=None):
+ xxx
if errors is None:
errors = 'strict'
if space.is_none(w_mapping):
@@ -621,9 +624,10 @@
return space.newtuple([space.newbytes(result), space.newint(len(uni))])
- at unwrap_spec(chars=unicode)
-def charmap_build(space, chars):
+ at unwrap_spec(chars='utf8')
+def charmap_build(space, chars, charslen):
# XXX CPython sometimes uses a three-level trie
+ xxx
w_charmap = space.newdict()
for num in range(len(chars)):
elem = chars[num]
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -4,7 +4,7 @@
compute_hash, compute_unique_id, import_from_mixin,
enforceargs, newlist_hint)
from rpython.rlib.buffer import StringBuffer
-from rpython.rlib.rstring import StringBuilder, split, rsplit
+from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder
from rpython.rlib.runicode import (
make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii)
@@ -111,7 +111,7 @@
return space.newint(ord(self._value[0]))
def _new(self, value):
- return W_UnicodeObject(value.encode('utf8', len(value)))
+ return W_UnicodeObject(value.encode('utf8'), len(value))
def _new_from_list(self, value):
xxx
@@ -153,7 +153,7 @@
assert len(char) == 1
return char[0]
- _builder = StringBuilder
+ _builder = UnicodeBuilder
def _isupper(self, ch):
return unicodedb.isupper(ord(ch))
@@ -411,7 +411,8 @@
def descr_islower(self, space):
cased = False
- for uchar in self._value:
+ val = self._val(space)
+ for uchar in val:
if (unicodedb.isupper(ord(uchar)) or
unicodedb.istitle(ord(uchar))):
return space.w_False
@@ -421,7 +422,7 @@
def descr_isupper(self, space):
cased = False
- for uchar in self._value:
+ for uchar in self._val(space):
if (unicodedb.islower(ord(uchar)) or
unicodedb.istitle(ord(uchar))):
return space.w_False
@@ -466,7 +467,7 @@
lgt += w_u._length
prealloc_size += len(unwrapped[i])
- sb = self._builder(prealloc_size)
+ sb = StringBuilder(prealloc_size)
for i in range(size):
if value and i != 0:
sb.append(value)
@@ -508,7 +509,7 @@
if num_zeros <= 0:
# cannot return self, in case it is a subclass of str
return W_UnicodeObject(selfval, self._len())
- builder = self._builder(num_zeros + len(selfval))
+ builder = StringBuilder(num_zeros + len(selfval))
if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'):
# copy sign to first position
builder.append(selfval[0])
@@ -569,9 +570,6 @@
return W_UnicodeObject(centered, self._len() + d)
- def descr_title(self, space):
- return
-
def wrapunicode(space, uni):
return W_UnicodeObject(uni)
More information about the pypy-commit
mailing list