[pypy-commit] pypy code_page-utf8: test, fix encoding code_pages
mattip
pypy.commits at gmail.com
Tue Nov 12 22:35:54 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: code_page-utf8
Changeset: r98040:5db4b2b481b5
Date: 2019-11-12 22:01 -0500
http://bitbucket.org/pypy/pypy/changeset/5db4b2b481b5/
Log: test, fix encoding code_pages
diff --git a/pypy/interpreter/unicodehelper_win32.py b/pypy/interpreter/unicodehelper_win32.py
--- a/pypy/interpreter/unicodehelper_win32.py
+++ b/pypy/interpreter/unicodehelper_win32.py
@@ -163,12 +163,18 @@
charsize = 1
else:
chars[0] = Py_UNICODE_HIGH_SURROGATE(uni)
- chars[0] = Py_UNICODE_LOW_SURROGATE(uni)
+ chars[1] = Py_UNICODE_LOW_SURROGATE(uni)
charsize = 2
# first get the size of the result
outsize = WideCharToMultiByte(cp, flags, chars, charsize, None, 0,
None, used_default_p)
+
if outsize == 0:
+ if rwin32.GetLastError_saved() == rwin32.ERROR_NO_UNICODE_TRANSLATION:
+ r, pos, retype = errorhandler(errors, name,
+ "invalid character", s, pos, pos+1)
+ res.append(r)
+ continue
raise rwin32.lastSavedWindowsError()
# If we used a default char, then we failed!
if (used_default_p and rffi.cast(lltype.Bool, used_default_p[0])):
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -1,4 +1,5 @@
import sys
+import pytest
class AppTestCodecs:
spaceconfig = {
@@ -525,6 +526,57 @@
):
check_decode(1252, test)
+ def test_encode_65001(self):
+ tests = [
+ ('abc', 'strict', b'abc'),
+ ('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),
+ ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
+ ('\udc80', 'strict', None),
+ ('\udc80', 'ignore', b''),
+ ('\udc80', 'replace', b'?'),
+ ('\udc80', 'backslashreplace', b'\\udc80'),
+ ('\udc80', 'namereplace', b'\\udc80'),
+ ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
+ ]
+ for text, errors, expected in tests:
+ if expected is not None:
+ try:
+ encoded = text.encode('cp65001', errors)
+ except UnicodeEncodeError as err:
+ assert False, ('Unable to encode %a to cp65001 with '
+ 'errors=%r: %s' % (text, errors, err))
+ assert encoded ==expected, ('%a.encode("cp65001", %r)=%a != %a'
+ % (text, errors, encoded, expected))
+ else:
+ raises(UnicodeEncodeError, text.encode, "cp65001", errors)
+
+ def test_decode_65001(self):
+ tests = [
+ (b'abc', 'strict', 'abc'),
+ (b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'),
+ (b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'),
+ (b'\xef\xbf\xbd', 'strict', '\ufffd'),
+ (b'[\xc3\xa9]', 'strict', '[\xe9]'),
+ # invalid bytes
+ (b'[\xff]', 'strict', None),
+ (b'[\xff]', 'ignore', '[]'),
+ (b'[\xff]', 'replace', '[\ufffd]'),
+ (b'[\xff]', 'surrogateescape', '[\udcff]'),
+ (b'[\xed\xb2\x80]', 'strict', None),
+ (b'[\xed\xb2\x80]', 'ignore', '[]'),
+ (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
+ ]
+ for raw, errors, expected in tests:
+ if expected is not None:
+ try:
+ decoded = raw.decode('cp65001', errors)
+ except UnicodeDecodeError as err:
+ assert False, ('Unable to decode %a from cp65001 with '
+ 'errors=%r: %s' % (raw, errors, err))
+ assert decoded == expected, ('%a.decode("cp65001", %r)=%a != %a'
+ % (raw, errors, decoded, expected))
+ else:
+ raises(UnicodeDecodeError, raw.decode, 'cp65001', errors)
class AppTestPartialEvaluation:
More information about the pypy-commit
mailing list