[pypy-commit] pypy unicode-utf8-py3: try and fail to make progress with test_unicodehelper
mattip
pypy.commits at gmail.com
Thu Jun 14 01:43:30 EDT 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94763:52d2576ff698
Date: 2018-06-13 22:42 -0700
http://bitbucket.org/pypy/pypy/changeset/52d2576ff698/
Log: try and fail to make progress with test_unicodehelper
diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -6,14 +6,19 @@
from pypy.interpreter.unicodehelper import (
encode_utf8, str_decode_utf8, utf8_encode_utf_32_be, str_decode_utf_32_be)
from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp
-
+from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
+from pypy.interpreter import unicodehelper as uh
+from pypy.module._codecs.interp_codecs import CodecState
class Hit(Exception):
pass
-from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
-from pypy.interpreter import unicodehelper as uh
-from pypy.module._codecs.interp_codecs import CodecState
+class FakeSpace:
+ def __getattr__(self, name):
+ if name in ('w_UnicodeEncodeError', 'w_UnicodeDecodeError'):
+ raise Hit
+ raise AttributeError(name)
+
def decode_utf8(u):
return str_decode_utf8(u, "strict", True, None)
@@ -82,18 +87,23 @@
@pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"])
def test_utf32_surrogates(unich):
- assert (unicode_encode_utf_32_be(unich, 1, None) ==
+ assert (utf8_encode_utf_32_be(unich.encode('utf-8'), None) ==
struct.pack('>i', ord(unich)))
with pytest.raises(UnicodeEncodeError):
- unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False)
+ def errorhandler(errors, enc, msg, b, startingpos, endingpos):
+ u = b.decode('utf-8')
+ raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
+ utf8_encode_utf_32_be(unich.encode('utf-8'), None, errorhandler,
+ allow_surrogates=False)
def replace_with(ru, rs):
def errorhandler(errors, enc, msg, u, startingpos, endingpos):
if errors == 'strict':
raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
- return ru, rs, endingpos
- return unicode_encode_utf_32_be(
- u"<%s>" % unich, 3, None,
+ return ru.encode('utf-8'), endingpos
+ uch = u"<%s>" % unich
+ return utf8_encode_utf_32_be(
+ uch.encode('utf8'), None,
errorhandler, allow_surrogates=False)
assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be')
assert (replace_with(None, '\xca\xfe\xca\xfe') ==
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -93,12 +93,12 @@
from pypy.module._codecs import interp_codecs
state = space.fromcache(interp_codecs.CodecState)
if _WIN32:
- uni = space.unicode_w(w_uni)
+ uni = space.utf8_w(w_uni)
bytes = unicode_encode_mbcs(uni, len(uni), 'strict',
errorhandler=encode_error_handler(space),
force_replace=False)
elif _MACOSX:
- uni = space.unicode_w(w_uni)
+ uni = space.utf8_w(w_uni)
bytes = runicode.unicode_encode_utf_8_impl(
uni, len(uni), 'surrogateescape',
errorhandler=state.encode_error_handler,
@@ -110,8 +110,8 @@
# instead
from pypy.module._codecs.locale import (
unicode_encode_locale_surrogateescape)
- uni = space.unicode_w(w_uni)
- if u'\x00' in uni:
+ uni = space.utf8_w(w_uni)
+ if b'\x00' in uni:
raise oefmt(space.w_ValueError, "embedded null character")
bytes = unicode_encode_locale_surrogateescape(
uni, errorhandler=encode_error_handler(space))
More information about the pypy-commit
mailing list