[pypy-commit] pypy py3.5: Add PyUnicode_AsUTF8AndSize, change PyUnicode_AsUTF8 to use it

rlamy pypy.commits at gmail.com
Sat Mar 4 07:37:10 EST 2017


Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r90535:6ba16736df1d
Date: 2017-03-04 13:35 +0100
http://bitbucket.org/pypy/pypy/changeset/6ba16736df1d/

Log:	Add PyUnicode_AsUTF8AndSize, change PyUnicode_AsUTF8 to use it

diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -235,6 +235,18 @@
              """)])
         assert module.test_macro_invocations() == u''
 
+    def test_AsUTF8AndSize(self):
+        module = self.import_extension('foo', [
+             ("utf8", "METH_O",
+             """
+                Py_ssize_t size;
+                char *utf8 = PyUnicode_AsUTF8AndSize(args, &size);
+                return PyBytes_FromStringAndSize(utf8, size);
+             """)])
+        assert module.utf8('xyz') == b'xyz'
+        assert module.utf8('café') == 'café'.encode('utf-8')
+
+
 class TestUnicode(BaseApiTest):
     def test_unicodeobject(self, space):
         encoding = rffi.charp2str(PyUnicode_GetDefaultEncoding(space, ))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -114,6 +114,9 @@
 def set_ascii(py_obj, value):
     get_state(py_obj).c_ascii = cts.cast('unsigned int', value)
 
+def get_ready(py_obj):
+    return get_state(py_obj).c_ready
+
 def set_ready(py_obj, value):
     get_state(py_obj).c_ready = cts.cast('unsigned int', value)
 
@@ -318,8 +321,13 @@
         set_wbuffer(ref, rffi.unicode2wcharp(u))
     return get_wbuffer(ref)
 
- at cts.decl("char * PyUnicode_AsUTF8(PyObject *unicode)")
-def PyUnicode_AsUTF8(space, ref):
+ at cts.decl("char * PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)")
+def PyUnicode_AsUTF8AndSize(space, ref, psize):
+    if not PyUnicode_Check(space, ref):
+        PyErr_BadArgument(space)
+    if not get_ready(ref):
+        res = _PyUnicode_Ready(space, ref)
+
     if not get_utf8(ref):
         # Copy unicode buffer
         w_unicode = from_ref(space, ref)
@@ -327,8 +335,15 @@
                                                 "strict")
         s = space.bytes_w(w_encoded)
         set_utf8(ref, rffi.str2charp(s))
+        set_utf8_len(ref, len(s))
+    if psize:
+        psize[0] = get_utf8_len(ref)
     return get_utf8(ref)
 
+ at cts.decl("char * PyUnicode_AsUTF8(PyObject *unicode)")
+def PyUnicode_AsUTF8(space, ref):
+    return PyUnicode_AsUTF8AndSize(space, ref, cts.cast('Py_ssize_t *', 0))
+
 @cpython_api([PyObject, rffi.CWCHARP, Py_ssize_t], Py_ssize_t, error=-1)
 def PyUnicode_AsWideChar(space, ref, buf, size):
     """Copy the Unicode object contents into the wchar_t buffer w.  At most


More information about the pypy-commit mailing list