[pypy-commit] pypy unicode-utf8-py3: avoid elidible fail, should revisit and figure out why

Tue Aug 7 16:03:36 EDT 2018

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94970:fbb06dc84f8e
Date: 2018-08-07 09:20 -0700
http://bitbucket.org/pypy/pypy/changeset/fbb06dc84f8e/

Log:	avoid elidible fail, should revisit and figure out why

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -5,3 +5,4 @@
 * fix _pypyjson to not use a wrapped dict when decoding an object
 * make sure we review all the places that call ord(unichr) to check for ValueErrors
 * rewrite unicodeobject.unicode_to_decimal_w to only use utf8 encoded bytes
+* revisit why runicode import str_decode_utf_8_impl needed instead of runicode import str_decode_utf_8
diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -29,7 +29,7 @@
                     except:  # we can't allow any exceptions here!
                         return None""")
         elif self.text is not None:
-            from rpython.rlib.runicode import str_decode_utf_8
+            from rpython.rlib.runicode import str_decode_utf_8_impl
             # self.text may not be UTF-8 in case of decoding errors.
             # adjust the encoded text offset to a decoded offset
             # XXX do the right thing about continuation lines, which
@@ -40,12 +40,12 @@
                 return u'\ufffd', endpos
             if offset > len(self.text):
                 offset = len(self.text)
-            text, _ = str_decode_utf_8(self.text, offset,
-                             'replace', errorhandler=replace_error_handler)
+            text, _ = str_decode_utf_8_impl(self.text, offset,
+                             'replace', False, replace_error_handler, True)
             offset = len(text)
             if len(self.text) != offset:
-                text, _ = str_decode_utf_8(self.text, len(self.text),
-                             'replace', errorhandler=replace_error_handler)
+                text, _ = str_decode_utf_8_impl(self.text, len(self.text),
+                             'replace', False, replace_error_handler, True)
             w_text = space.newtext(text)
         return space.newtuple([
             space.newtext(self.msg),
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -49,6 +49,7 @@
                                              space.newint(startingpos),
                                              space.newint(endingpos),
                                              space.newtext(msg)]))
+        return u'', None, 0
     return raise_unicode_exception_encode
 
 def default_error_encode(
@@ -106,8 +107,8 @@
                                     force_replace=False)
     elif _MACOSX:
         uni = space.utf8_w(w_uni)
-        bytes = runicode.unicode_encode_utf_8_impl(
-            uni, len(uni), 'surrogateescape',
+        bytes = unicodehelper.utf8_encode_utf_8(
+            uni, 'surrogateescape',
             errorhandler=state.encode_error_handler,
             allow_surrogates=False)
     elif space.sys.filesystemencoding is None or state.codec_need_encodings:
@@ -120,8 +121,7 @@
         uni = space.realunicode_w(w_uni)
         if u'\x00' in uni:
             raise oefmt(space.w_ValueError, "embedded null character")
-        bytes = unicode_encode_locale_surrogateescape(
-            uni, errorhandler=encode_error_handler(space))
+        bytes = unicode_encode_locale_surrogateescape(uni)
     else:
         from pypy.module.sys.interp_encoding import getfilesystemencoding
         return space.call_method(w_uni, 'encode',