[pypy-commit] pypy default: PyUnicode_AsUTF{16,32}String()
arigo
pypy.commits at gmail.com
Sun Jan 21 03:58:43 EST 2018
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r93692:84180176fef1
Date: 2018-01-21 09:58 +0100
http://bitbucket.org/pypy/pypy/changeset/84180176fef1/
Log: PyUnicode_AsUTF{16,32}String()
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -1552,14 +1552,6 @@
"""
raise NotImplementedError
- at cpython_api([PyObject], PyObject)
-def PyUnicode_AsUTF32String(space, unicode):
- """Return a Python string using the UTF-32 encoding in native byte order. The
- string always starts with a BOM mark. Error handling is "strict". Return
- NULL if an exception was raised by the codec.
- """
- raise NotImplementedError
-
@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP, Py_ssize_t], PyObject)
def PyUnicode_DecodeUTF16Stateful(space, s, size, errors, byteorder, consumed):
"""If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
@@ -1595,13 +1587,6 @@
changes in your code for properly supporting 64-bit systems."""
raise NotImplementedError
- at cpython_api([PyObject], PyObject)
-def PyUnicode_AsUTF16String(space, unicode):
- """Return a Python string using the UTF-16 encoding in native byte order. The
- string always starts with a BOM mark. Error handling is "strict". Return
- NULL if an exception was raised by the codec."""
- raise NotImplementedError
-
@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP], PyObject)
def PyUnicode_DecodeUTF7(space, s, size, errors):
"""Create a Unicode object by decoding size bytes of the UTF-7 encoded string
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -145,6 +145,20 @@
res = module.test_unicode_format(1, "xyz")
assert res == u"bla 1 ble xyz\n"
+ def test_AsUTFNString(self):
+ module = self.import_extension('foo', [
+ ("asutf8", "METH_O", "return PyUnicode_AsUTF8String(args);"),
+ ("asutf16", "METH_O", "return PyUnicode_AsUTF16String(args);"),
+ ("asutf32", "METH_O", "return PyUnicode_AsUTF32String(args);"),
+ ])
+ u = u'sp\x09m\u1234\U00012345'
+ s = module.asutf8(u)
+ assert s == u.encode('utf-8')
+ s = module.asutf16(u)
+ assert s == u.encode('utf-16')
+ s = module.asutf32(u)
+ assert s == u.encode('utf-32')
+
class TestUnicode(BaseApiTest):
def test_unicodeobject(self, space):
@@ -247,10 +261,24 @@
lltype.free(ar, flavor='raw')
def test_AsUTF8String(self, space):
- w_u = space.wrap(u'sp\x09m')
+ w_u = space.wrap(u'sp\x09m\u1234')
w_res = PyUnicode_AsUTF8String(space, w_u)
assert space.type(w_res) is space.w_bytes
- assert space.unwrap(w_res) == 'sp\tm'
+ assert space.unwrap(w_res) == 'sp\tm\xe1\x88\xb4'
+
+ def test_AsUTF16String(self, space):
+ u = u'sp\x09m\u1234\U00012345'
+ w_u = space.wrap(u)
+ w_res = PyUnicode_AsUTF16String(space, w_u)
+ assert space.type(w_res) is space.w_bytes
+ assert space.unwrap(w_res) == u.encode('utf-16')
+
+ def test_AsUTF32String(self, space):
+ u = u'sp\x09m\u1234\U00012345'
+ w_u = space.wrap(u)
+ w_res = PyUnicode_AsUTF32String(space, w_u)
+ assert space.type(w_res) is space.w_bytes
+ assert space.unwrap(w_res) == u.encode('utf-32')
def test_decode_utf8(self, space):
u = rffi.str2charp(u'sp\x134m'.encode("utf-8"))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -474,7 +474,7 @@
ref[0] = rffi.cast(PyObject, py_newuni)
return 0
-def make_conversion_functions(suffix, encoding):
+def make_conversion_functions(suffix, encoding, only_for_asstring=False):
@cpython_api([PyObject], PyObject)
@func_renamer('PyUnicode_As%sString' % suffix)
def PyUnicode_AsXXXString(space, w_unicode):
@@ -486,6 +486,9 @@
return unicodeobject.encode_object(space, w_unicode, encoding, "strict")
globals()['PyUnicode_As%sString' % suffix] = PyUnicode_AsXXXString
+ if only_for_asstring:
+ return
+
@cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING], PyObject)
@func_renamer('PyUnicode_Decode%s' % suffix)
def PyUnicode_DecodeXXX(space, s, size, errors):
@@ -516,6 +519,8 @@
globals()['PyUnicode_Encode%s' % suffix] = PyUnicode_EncodeXXX
make_conversion_functions('UTF8', 'utf-8')
+make_conversion_functions('UTF16', 'utf-16', only_for_asstring=True)
+make_conversion_functions('UTF32', 'utf-32', only_for_asstring=True)
make_conversion_functions('ASCII', 'ascii')
make_conversion_functions('Latin1', 'latin-1')
if sys.platform == 'win32':
More information about the pypy-commit
mailing list