[pypy-commit] pypy py3.5: Add PyUnicode_AsUTF8AndSize, change PyUnicode_AsUTF8 to use it
rlamy
pypy.commits at gmail.com
Sat Mar 4 07:37:10 EST 2017
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r90535:6ba16736df1d
Date: 2017-03-04 13:35 +0100
http://bitbucket.org/pypy/pypy/changeset/6ba16736df1d/
Log: Add PyUnicode_AsUTF8AndSize, change PyUnicode_AsUTF8 to use it
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -235,6 +235,18 @@
""")])
assert module.test_macro_invocations() == u''
+ def test_AsUTF8AndSize(self):
+ module = self.import_extension('foo', [
+ ("utf8", "METH_O",
+ """
+ Py_ssize_t size;
+ char *utf8 = PyUnicode_AsUTF8AndSize(args, &size);
+ return PyBytes_FromStringAndSize(utf8, size);
+ """)])
+ assert module.utf8('xyz') == b'xyz'
+ assert module.utf8('café') == 'café'.encode('utf-8')
+
+
class TestUnicode(BaseApiTest):
def test_unicodeobject(self, space):
encoding = rffi.charp2str(PyUnicode_GetDefaultEncoding(space, ))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -114,6 +114,9 @@
def set_ascii(py_obj, value):
get_state(py_obj).c_ascii = cts.cast('unsigned int', value)
+def get_ready(py_obj):
+ return get_state(py_obj).c_ready
+
def set_ready(py_obj, value):
get_state(py_obj).c_ready = cts.cast('unsigned int', value)
@@ -318,8 +321,13 @@
set_wbuffer(ref, rffi.unicode2wcharp(u))
return get_wbuffer(ref)
- at cts.decl("char * PyUnicode_AsUTF8(PyObject *unicode)")
-def PyUnicode_AsUTF8(space, ref):
+ at cts.decl("char * PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)")
+def PyUnicode_AsUTF8AndSize(space, ref, psize):
+ if not PyUnicode_Check(space, ref):
+ PyErr_BadArgument(space)
+ if not get_ready(ref):
+ res = _PyUnicode_Ready(space, ref)
+
if not get_utf8(ref):
# Copy unicode buffer
w_unicode = from_ref(space, ref)
@@ -327,8 +335,15 @@
"strict")
s = space.bytes_w(w_encoded)
set_utf8(ref, rffi.str2charp(s))
+ set_utf8_len(ref, len(s))
+ if psize:
+ psize[0] = get_utf8_len(ref)
return get_utf8(ref)
+ at cts.decl("char * PyUnicode_AsUTF8(PyObject *unicode)")
+def PyUnicode_AsUTF8(space, ref):
+ return PyUnicode_AsUTF8AndSize(space, ref, cts.cast('Py_ssize_t *', 0))
+
@cpython_api([PyObject, rffi.CWCHARP, Py_ssize_t], Py_ssize_t, error=-1)
def PyUnicode_AsWideChar(space, ref, buf, size):
"""Copy the Unicode object contents into the wchar_t buffer w. At most
More information about the pypy-commit
mailing list