[pypy-commit] pypy unicode-utf8: fix enough to pass all the tests in test_unicodeobject

Sat Nov 4 18:17:01 EDT 2017

Author: fijal
Branch: unicode-utf8
Changeset: r92938:94c9ccfbd63c
Date: 2017-11-04 19:17 +0100
http://bitbucket.org/pypy/pypy/changeset/94c9ccfbd63c/

Log:	fix enough to pass all the tests in test_unicodeobject

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -69,7 +69,7 @@
         final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle,
         unicodedata_handler=unicodedata_handler)
     # XXX argh.  we want each surrogate to be encoded separately
-    utf8 = ''.join([u.encode('utf8') for u in result_u])
+    utf8 = result_u.encode('utf8')
     if rutf8.first_non_ascii_char(utf8) == -1:
         flag = rutf8.FLAG_ASCII
     elif _has_surrogate(result_u):
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -475,12 +475,11 @@
     # call the fast version for checking
     try:
         lgt, flag = rutf8.check_utf8(string, allow_surrogates=True)
-    except rutf8.CheckError as e:
+    except rutf8.CheckError:
         # XXX do the way around runicode - we can optimize it later if we
         # decide we care about obscure cases
-        res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string),
-            errors, final, state.decode_error_handler)
-        flag = unicodehelper._get_flag(res.decode("utf8"))
+        res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string,
+            len(string), errors, final, state.decode_error_handler)
         return space.newtuple([space.newutf8(res, lgt, flag),
                                space.newint(consumed)])
     else:
@@ -695,12 +694,11 @@
 
     unicode_name_handler = state.get_unicodedata_handler(space)
 
-    result, consumed, lgt = unicodehelper.str_decode_unicode_escape(
+    result, consumed, lgt, flag = unicodehelper.str_decode_unicode_escape(
         string, len(string), errors,
         final, state.decode_error_handler,
         unicode_name_handler)
 
-    flag = unicodehelper._get_flag(result.decode('utf8'))
     return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)])
 
 # ____________________________________________________________