[pypy-commit] pypy PEP393: Reimplement PyUnicode_AS_UNICODE and PyUnicode_AS_DATA as C macros

Tue Jan 31 15:33:26 EST 2017

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: PEP393
Changeset: r89863:47a51ca1c26f
Date: 2017-01-31 20:32 +0000
http://bitbucket.org/pypy/pypy/changeset/47a51ca1c26f/

Log:	Reimplement PyUnicode_AS_UNICODE and PyUnicode_AS_DATA as C macros

diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h
--- a/pypy/module/cpyext/include/unicodeobject.h
+++ b/pypy/module/cpyext/include/unicodeobject.h
@@ -7,6 +7,20 @@
 
 #include <cpyext_unicodeobject.h>
 
+/* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
+   representation on demand.  Using this macro is very inefficient now,
+   try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
+   use PyUnicode_WRITE() and PyUnicode_READ(). */
+
+#define PyUnicode_AS_UNICODE(op) \
+    (assert(PyUnicode_Check(op)), \
+     (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
+      PyUnicode_AsUnicode((PyObject *)(op)))
+
+#define PyUnicode_AS_DATA(op) \
+    ((const char *)(PyUnicode_AS_UNICODE(op)))
+
+
 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char *format, va_list vargs);
 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char *format, ...);
 
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -226,13 +226,11 @@
 
     def test_AS(self, space):
         word = space.wrap(u'spam')
-        array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word))
-        array2 = PyUnicode_AS_UNICODE(space, word)
-        array3 = PyUnicode_AsUnicode(space, word)
+        array = rffi.cast(rffi.CWCHARP, PyUnicode_AsUnicode(space, word))
+        array2 = PyUnicode_AsUnicode(space, word)
         for (i, char) in enumerate(space.unwrap(word)):
             assert array[i] == char
             assert array2[i] == char
-            assert array3[i] == char
         with raises_w(space, TypeError):
             PyUnicode_AsUnicode(space, space.newbytes('spam'))
 
@@ -625,7 +623,7 @@
         count1 = space.int_w(space.len(w_x))
         target_chunk = lltype.malloc(rffi.CWCHARP.TO, count1, flavor='raw')
 
-        x_chunk = PyUnicode_AS_UNICODE(space, w_x)
+        x_chunk = PyUnicode_AsUnicode(space, w_x)
         Py_UNICODE_COPY(space, target_chunk, x_chunk, 4)
         w_y = space.wrap(rffi.wcharpsize2unicode(target_chunk, 4))
 
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -253,12 +253,6 @@
     """Get the maximum ordinal for a Unicode character."""
     return runicode.UNICHR(runicode.MAXUNICODE)
 
- at cpython_api([rffi.VOIDP], rffi.CCHARP, error=CANNOT_FAIL)
-def PyUnicode_AS_DATA(space, ref):
-    """Return a pointer to the internal buffer of the object. o has to be a
-    PyUnicodeObject (not checked)."""
-    return rffi.cast(rffi.CCHARP, PyUnicode_AS_UNICODE(space, ref))
-
 @cpython_api([rffi.VOIDP], Py_ssize_t, error=CANNOT_FAIL)
 def PyUnicode_GET_DATA_SIZE(space, w_obj):
     """Return the size of the object's internal buffer in bytes.  o has to be a
@@ -330,25 +324,6 @@
         set_utf8_len(py_obj, 0)
 
 
- at cpython_api([rffi.VOIDP], rffi.CWCHARP, error=CANNOT_FAIL)
-def PyUnicode_AS_UNICODE(space, ref):
-    """Return a pointer to the internal Py_UNICODE buffer of the object.  ref
-    has to be a PyUnicodeObject (not checked).
-
-    CPython description:
-
-    Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
-    representation on demand.  Using this macro is very inefficient now,
-    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
-    use PyUnicode_WRITE() and PyUnicode_READ().
-    """
-    if not get_wbuffer(ref):
-        # Copy unicode buffer
-        w_unicode = from_ref(space, rffi.cast(PyObject, ref))
-        u = space.unicode_w(w_unicode)
-        set_wbuffer(ref, rffi.unicode2wcharp(u))
-    return get_wbuffer(ref)
-
 @cpython_api([PyObject], rffi.CWCHARP)
 def PyUnicode_AsUnicode(space, ref):
     """Return a read-only pointer to the Unicode object's internal Py_UNICODE
@@ -357,7 +332,12 @@
     w_type = from_ref(space, rffi.cast(PyObject, ref.c_ob_type))
     if not space.issubtype_w(w_type, space.w_unicode):
         raise oefmt(space.w_TypeError, "expected unicode object")
-    return PyUnicode_AS_UNICODE(space, rffi.cast(rffi.VOIDP, ref))
+    if not get_wbuffer(ref):
+        # Copy unicode buffer
+        w_unicode = from_ref(space, rffi.cast(PyObject, ref))
+        u = space.unicode_w(w_unicode)
+        set_wbuffer(ref, rffi.unicode2wcharp(u))
+    return get_wbuffer(ref)
 
 @cts.decl("char * PyUnicode_AsUTF8(PyObject *unicode)")
 def PyUnicode_AsUTF8(space, ref):
@@ -402,8 +382,7 @@
     string may or may not be 0-terminated.  It is the responsibility of the caller
     to make sure that the wchar_t string is 0-terminated in case this is
     required by the application."""
-    ref = rffi.cast(PyUnicodeObject, ref)
-    c_buffer = PyUnicode_AS_UNICODE(space, rffi.cast(rffi.VOIDP, ref))
+    c_buffer = PyUnicode_AsUnicode(space, ref)
     c_length = get_wsize(ref)
 
     # If possible, try to copy the 0-termination as well