[pypy-commit] pypy py3.6: fix merge, tests
mattip
pypy.commits at gmail.com
Mon Feb 18 10:07:38 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: py3.6
Changeset: r96060:6bfc3b49077d
Date: 2019-02-17 18:17 +0200
http://bitbucket.org/pypy/pypy/changeset/6bfc3b49077d/
Log: fix merge, tests
diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -1,13 +1,6 @@
-import py
-import pytest
-import struct
-import sys
from pypy.interpreter.unicodehelper import (
- encode_utf8, decode_utf8,
- unicode_encode_utf_8,
- unicode_encode_utf_32_be, str_decode_utf_32_be
+ utf8_encode_utf_8, decode_utf8sp,
)
-from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp
class Hit(Exception):
@@ -20,18 +13,6 @@
raise AttributeError(name)
-def test_encode_utf8():
- space = FakeSpace()
- assert encode_utf8(space, u"abc") == "abc"
- assert encode_utf8(space, u"\u1234") == "\xe1\x88\xb4"
- py.test.raises(Hit, encode_utf8, space, u"\ud800")
- py.test.raises(Hit, encode_utf8, space, u"\udc00")
- # for the following test, go to lengths to avoid CPython's optimizer
- # and .pyc file storage, which collapse the two surrogates into one
- c = u"\udc00"
- py.test.raises(Hit, encode_utf8, space, u"\ud800" + c)
-
-
def test_encode_utf_8_combine_surrogates():
"""
In the case of a surrogate pair, the error handler should
@@ -52,80 +33,20 @@
that is a valid surrogate pair.
"""
assert s[start:end] in [u'\udc80', u'\uD800\uDFFF']
- return [], None, end
+ return '', 0, end
- unicode_encode_utf_8(
- u, len(u), True,
+ utf8_encode_utf_8(
+ u, 'strict',
errorhandler=errorhandler,
allow_surrogates=False
)
-def test_encode_utf8_allow_surrogates():
- sp = FakeSpace()
- assert encode_utf8(sp, u"\ud800", allow_surrogates=True) == "\xed\xa0\x80"
- assert encode_utf8(sp, u"\udc00", allow_surrogates=True) == "\xed\xb0\x80"
- c = u"\udc00"
- got = encode_utf8(sp, u"\ud800" + c, allow_surrogates=True)
- assert got == "\xf0\x90\x80\x80"
-
-def test_encode_utf8sp():
- sp = FakeSpace()
- assert encode_utf8sp(sp, u"\ud800") == "\xed\xa0\x80"
- assert encode_utf8sp(sp, u"\udc00") == "\xed\xb0\x80"
- c = u"\udc00"
- got = encode_utf8sp(sp, u"\ud800" + c)
- assert got == "\xed\xa0\x80\xed\xb0\x80"
-
-def test_decode_utf8():
- space = FakeSpace()
- assert decode_utf8(space, "abc") == u"abc"
- assert decode_utf8(space, "\xe1\x88\xb4") == u"\u1234"
- py.test.raises(Hit, decode_utf8, space, "\xed\xa0\x80")
- py.test.raises(Hit, decode_utf8, space, "\xed\xb0\x80")
- py.test.raises(Hit, decode_utf8, space, "\xed\xa0\x80\xed\xb0\x80")
- got = decode_utf8(space, "\xf0\x90\x80\x80")
- if sys.maxunicode > 65535:
- assert map(ord, got) == [0x10000]
- else:
- assert map(ord, got) == [55296, 56320]
-
-def test_decode_utf8_allow_surrogates():
- sp = FakeSpace()
- assert decode_utf8(sp, "\xed\xa0\x80", allow_surrogates=True) == u"\ud800"
- assert decode_utf8(sp, "\xed\xb0\x80", allow_surrogates=True) == u"\udc00"
- got = decode_utf8(sp, "\xed\xa0\x80\xed\xb0\x80", allow_surrogates=True)
- assert map(ord, got) == [0xd800, 0xdc00]
- got = decode_utf8(sp, "\xf0\x90\x80\x80", allow_surrogates=True)
- assert map(ord, got) == [0x10000]
-
def test_decode_utf8sp():
space = FakeSpace()
- assert decode_utf8sp(space, "\xed\xa0\x80") == u"\ud800"
- assert decode_utf8sp(space, "\xed\xb0\x80") == u"\udc00"
+ assert decode_utf8sp(space, "\xed\xa0\x80") == ("\xed\xa0\x80", 1, 3)
+ assert decode_utf8sp(space, "\xed\xb0\x80") == ("\xed\xb0\x80", 1, 3)
got = decode_utf8sp(space, "\xed\xa0\x80\xed\xb0\x80")
- assert map(ord, got) == [0xd800, 0xdc00]
+ assert map(ord, got[0].decode('utf8')) == [0xd800, 0xdc00]
got = decode_utf8sp(space, "\xf0\x90\x80\x80")
- assert map(ord, got) == [0x10000]
+ assert map(ord, got[0].decode('utf8')) == [0x10000]
- at pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"])
-def test_utf32_surrogates(unich):
- assert (unicode_encode_utf_32_be(unich, 1, None) ==
- struct.pack('>i', ord(unich)))
- with pytest.raises(UnicodeEncodeError):
- unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False)
-
- def replace_with(ru, rs):
- def errorhandler(errors, enc, msg, u, startingpos, endingpos):
- if errors == 'strict':
- raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
- return ru, rs, endingpos
- return unicode_encode_utf_32_be(
- u"<%s>" % unich, 3, None,
- errorhandler, allow_surrogates=False)
-
- assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be')
- assert (replace_with(None, '\xca\xfe\xca\xfe') ==
- '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>')
-
- with pytest.raises(UnicodeDecodeError):
- str_decode_utf_32_be(b"\x00\x00\xdc\x80", 4, None)
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1013,8 +1013,6 @@
return result.build()
- # used only in (unused) encode_utf8
- xxx
def decode_utf8sp(space, string):
# Surrogate-preserving utf-8 decoding. Assuming there is no
# encoding error, it should always be reversible, and the reverse is
More information about the pypy-commit
mailing list