[pypy-commit] pypy unicode-utf8: whack the slowpath too
fijal
pypy.commits at gmail.com
Fri Dec 8 06:10:56 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r93305:a50930e1db6b
Date: 2017-12-07 18:07 +0200
http://bitbucket.org/pypy/pypy/changeset/a50930e1db6b/
Log: whack the slowpath too
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -460,10 +460,12 @@
# utf-8 functions are not regular, because we have to pass
# "allow_surrogates=True"
- at unwrap_spec(utf8='utf8', errors='text_or_none')
-def utf_8_encode(space, utf8, errors="strict"):
- length, _ = rutf8.check_utf8(utf8, allow_surrogates=True)
- return space.newtuple([space.newbytes(utf8), space.newint(length)])
+ at unwrap_spec(errors='text_or_none')
+def utf_8_encode(space, w_obj, errors="strict"):
+ utf8, lgt = space.utf8_len_w(w_obj)
+ if rutf8.has_surrogates(utf8):
+ utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
+ return space.newtuple([space.newbytes(utf8), space.newint(lgt)])
#@unwrap_spec(uni=unicode, errors='text_or_none')
#def utf_8_encode(space, uni, errors="strict"):
# if errors is None:
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -741,6 +741,8 @@
assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac'
assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82'
assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96'
+ assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82'
+ assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96'
assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80'
assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80'
assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000
More information about the pypy-commit
mailing list