[pypy-commit] pypy unicode-utf8-py3: avoid elidible fail, should revisit and figure out why
mattip
pypy.commits at gmail.com
Tue Aug 7 16:03:36 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94970:fbb06dc84f8e
Date: 2018-08-07 09:20 -0700
http://bitbucket.org/pypy/pypy/changeset/fbb06dc84f8e/
Log: avoid elidible fail, should revisit and figure out why
diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -5,3 +5,4 @@
* fix _pypyjson to not use a wrapped dict when decoding an object
* make sure we review all the places that call ord(unichr) to check for ValueErrors
* rewrite unicodeobject.unicode_to_decimal_w to only use utf8 encoded bytes
+* revisit why runicode import str_decode_utf_8_impl needed instead of runicode import str_decode_utf_8
diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -29,7 +29,7 @@
except: # we can't allow any exceptions here!
return None""")
elif self.text is not None:
- from rpython.rlib.runicode import str_decode_utf_8
+ from rpython.rlib.runicode import str_decode_utf_8_impl
# self.text may not be UTF-8 in case of decoding errors.
# adjust the encoded text offset to a decoded offset
# XXX do the right thing about continuation lines, which
@@ -40,12 +40,12 @@
return u'\ufffd', endpos
if offset > len(self.text):
offset = len(self.text)
- text, _ = str_decode_utf_8(self.text, offset,
- 'replace', errorhandler=replace_error_handler)
+ text, _ = str_decode_utf_8_impl(self.text, offset,
+ 'replace', False, replace_error_handler, True)
offset = len(text)
if len(self.text) != offset:
- text, _ = str_decode_utf_8(self.text, len(self.text),
- 'replace', errorhandler=replace_error_handler)
+ text, _ = str_decode_utf_8_impl(self.text, len(self.text),
+ 'replace', False, replace_error_handler, True)
w_text = space.newtext(text)
return space.newtuple([
space.newtext(self.msg),
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -49,6 +49,7 @@
space.newint(startingpos),
space.newint(endingpos),
space.newtext(msg)]))
+ return u'', None, 0
return raise_unicode_exception_encode
def default_error_encode(
@@ -106,8 +107,8 @@
force_replace=False)
elif _MACOSX:
uni = space.utf8_w(w_uni)
- bytes = runicode.unicode_encode_utf_8_impl(
- uni, len(uni), 'surrogateescape',
+ bytes = unicodehelper.utf8_encode_utf_8(
+ uni, 'surrogateescape',
errorhandler=state.encode_error_handler,
allow_surrogates=False)
elif space.sys.filesystemencoding is None or state.codec_need_encodings:
@@ -120,8 +121,7 @@
uni = space.realunicode_w(w_uni)
if u'\x00' in uni:
raise oefmt(space.w_ValueError, "embedded null character")
- bytes = unicode_encode_locale_surrogateescape(
- uni, errorhandler=encode_error_handler(space))
+ bytes = unicode_encode_locale_surrogateescape(uni)
else:
from pypy.module.sys.interp_encoding import getfilesystemencoding
return space.call_method(w_uni, 'encode',
More information about the pypy-commit
mailing list