[pypy-commit] pypy unicode-utf8-py3: try and fail to make progress with test_unicodehelper

Thu Jun 14 01:43:30 EDT 2018

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94763:52d2576ff698
Date: 2018-06-13 22:42 -0700
http://bitbucket.org/pypy/pypy/changeset/52d2576ff698/

Log:	try and fail to make progress with test_unicodehelper

diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -6,14 +6,19 @@
 from pypy.interpreter.unicodehelper import (
     encode_utf8, str_decode_utf8, utf8_encode_utf_32_be, str_decode_utf_32_be)
 from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp
-
+from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
+from pypy.interpreter import unicodehelper as uh
+from pypy.module._codecs.interp_codecs import CodecState
 
 class Hit(Exception):
     pass
 
-from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
-from pypy.interpreter import unicodehelper as uh
-from pypy.module._codecs.interp_codecs import CodecState
+class FakeSpace:
+    def __getattr__(self, name):
+        if name in ('w_UnicodeEncodeError', 'w_UnicodeDecodeError'):
+            raise Hit
+        raise AttributeError(name)
+
 
 def decode_utf8(u):
     return str_decode_utf8(u, "strict", True, None)
@@ -82,18 +87,23 @@
 
 @pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"])
 def test_utf32_surrogates(unich):
-    assert (unicode_encode_utf_32_be(unich, 1, None) ==
+    assert (utf8_encode_utf_32_be(unich.encode('utf-8'), None) ==
             struct.pack('>i', ord(unich)))
     with pytest.raises(UnicodeEncodeError):
-        unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False)
+        def errorhandler(errors, enc, msg, b, startingpos, endingpos):
+             u = b.decode('utf-8')
+             raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
+        utf8_encode_utf_32_be(unich.encode('utf-8'), None, errorhandler,
+                              allow_surrogates=False)
 
     def replace_with(ru, rs):
         def errorhandler(errors, enc, msg, u, startingpos, endingpos):
             if errors == 'strict':
                 raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
-            return ru, rs, endingpos
-        return unicode_encode_utf_32_be(
-            u"<%s>" % unich, 3, None,
+            return ru.encode('utf-8'), endingpos
+        uch = u"<%s>" % unich
+        return utf8_encode_utf_32_be(
+            uch.encode('utf8'), None,
             errorhandler, allow_surrogates=False)
     assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be')
     assert (replace_with(None, '\xca\xfe\xca\xfe') ==
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -93,12 +93,12 @@
     from pypy.module._codecs import interp_codecs
     state = space.fromcache(interp_codecs.CodecState)
     if _WIN32:
-        uni = space.unicode_w(w_uni)
+        uni = space.utf8_w(w_uni)
         bytes = unicode_encode_mbcs(uni, len(uni), 'strict',
                                     errorhandler=encode_error_handler(space),
                                     force_replace=False)
     elif _MACOSX:
-        uni = space.unicode_w(w_uni)
+        uni = space.utf8_w(w_uni)
         bytes = runicode.unicode_encode_utf_8_impl(
             uni, len(uni), 'surrogateescape',
             errorhandler=state.encode_error_handler,
@@ -110,8 +110,8 @@
         # instead
         from pypy.module._codecs.locale import (
             unicode_encode_locale_surrogateescape)
-        uni = space.unicode_w(w_uni)
-        if u'\x00' in uni:
+        uni = space.utf8_w(w_uni)
+        if b'\x00' in uni:
             raise oefmt(space.w_ValueError, "embedded null character")
         bytes = unicode_encode_locale_surrogateescape(
             uni, errorhandler=encode_error_handler(space))