[pypy-commit] pypy unicode-utf8: small fixes, for revisting later once we actually want tests to pass

fijal pypy.commits at gmail.com
Sat Nov 4 18:16:57 EDT 2017


Author: fijal
Branch: unicode-utf8
Changeset: r92936:71debd44669a
Date: 2017-11-04 15:31 +0100
http://bitbucket.org/pypy/pypy/changeset/71debd44669a/

Log:	small fixes, for revisting later once we actually want tests to pass

diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -478,10 +478,10 @@
     except rutf8.CheckError as e:
         # XXX do the way around runicode - we can optimize it later if we
         # decide we care about obscure cases
-        xxx
         res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string),
             errors, final, state.decode_error_handler)
-        return space.newtuple([space.newutf8(res, lgt),
+        flag = unicodehelper._get_flag(res.decode("utf8"))
+        return space.newtuple([space.newutf8(res, lgt, flag),
                                space.newint(consumed)])
     else:
         return space.newtuple([space.newutf8(string, lgt, flag),
@@ -700,7 +700,8 @@
         final, state.decode_error_handler,
         unicode_name_handler)
 
-    return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)])
+    flag = unicodehelper._get_flag(result.decode('utf8'))
+    return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)])
 
 # ____________________________________________________________
 # Unicode-internal
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -74,11 +74,12 @@
             space.newtext(e.reason)]))
 
 def wrap_unicodeencodeerror(space, e, input, inputlen, name):
+    flag = 13
     raise OperationError(
         space.w_UnicodeEncodeError,
         space.newtuple([
             space.newtext(name),
-            space.newutf8(input, inputlen),
+            space.newutf8(input, inputlen, flag),
             space.newint(e.start),
             space.newint(e.end),
             space.newtext(e.reason)]))
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -478,8 +478,8 @@
             # I suppose this is a valid utf8, but there is noone to check
             # and noone to catch an error either
             try:
-                lgt = rutf8.check_utf8(s, True)
-                return space.newutf8(s, lgt)
+                lgt, flag = rutf8.check_utf8(s, True)
+                return space.newutf8(s, lgt, flag)
             except rutf8.CheckError:
                 from pypy.interpreter import unicodehelper
                 # get the correct error msg


More information about the pypy-commit mailing list