[pypy-commit] pypy PEP393: Reimplement PyUnicode_AS_UNICODE and PyUnicode_AS_DATA as C macros
rlamy
pypy.commits at gmail.com
Tue Jan 31 15:33:26 EST 2017
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: PEP393
Changeset: r89863:47a51ca1c26f
Date: 2017-01-31 20:32 +0000
http://bitbucket.org/pypy/pypy/changeset/47a51ca1c26f/
Log: Reimplement PyUnicode_AS_UNICODE and PyUnicode_AS_DATA as C macros
diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h
--- a/pypy/module/cpyext/include/unicodeobject.h
+++ b/pypy/module/cpyext/include/unicodeobject.h
@@ -7,6 +7,20 @@
#include <cpyext_unicodeobject.h>
+/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
+ representation on demand. Using this macro is very inefficient now,
+ try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
+ use PyUnicode_WRITE() and PyUnicode_READ(). */
+
+#define PyUnicode_AS_UNICODE(op) \
+ (assert(PyUnicode_Check(op)), \
+ (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
+ PyUnicode_AsUnicode((PyObject *)(op)))
+
+#define PyUnicode_AS_DATA(op) \
+ ((const char *)(PyUnicode_AS_UNICODE(op)))
+
+
PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char *format, va_list vargs);
PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char *format, ...);
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -226,13 +226,11 @@
def test_AS(self, space):
word = space.wrap(u'spam')
- array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word))
- array2 = PyUnicode_AS_UNICODE(space, word)
- array3 = PyUnicode_AsUnicode(space, word)
+ array = rffi.cast(rffi.CWCHARP, PyUnicode_AsUnicode(space, word))
+ array2 = PyUnicode_AsUnicode(space, word)
for (i, char) in enumerate(space.unwrap(word)):
assert array[i] == char
assert array2[i] == char
- assert array3[i] == char
with raises_w(space, TypeError):
PyUnicode_AsUnicode(space, space.newbytes('spam'))
@@ -625,7 +623,7 @@
count1 = space.int_w(space.len(w_x))
target_chunk = lltype.malloc(rffi.CWCHARP.TO, count1, flavor='raw')
- x_chunk = PyUnicode_AS_UNICODE(space, w_x)
+ x_chunk = PyUnicode_AsUnicode(space, w_x)
Py_UNICODE_COPY(space, target_chunk, x_chunk, 4)
w_y = space.wrap(rffi.wcharpsize2unicode(target_chunk, 4))
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -253,12 +253,6 @@
"""Get the maximum ordinal for a Unicode character."""
return runicode.UNICHR(runicode.MAXUNICODE)
- at cpython_api([rffi.VOIDP], rffi.CCHARP, error=CANNOT_FAIL)
-def PyUnicode_AS_DATA(space, ref):
- """Return a pointer to the internal buffer of the object. o has to be a
- PyUnicodeObject (not checked)."""
- return rffi.cast(rffi.CCHARP, PyUnicode_AS_UNICODE(space, ref))
-
@cpython_api([rffi.VOIDP], Py_ssize_t, error=CANNOT_FAIL)
def PyUnicode_GET_DATA_SIZE(space, w_obj):
"""Return the size of the object's internal buffer in bytes. o has to be a
@@ -330,25 +324,6 @@
set_utf8_len(py_obj, 0)
- at cpython_api([rffi.VOIDP], rffi.CWCHARP, error=CANNOT_FAIL)
-def PyUnicode_AS_UNICODE(space, ref):
- """Return a pointer to the internal Py_UNICODE buffer of the object. ref
- has to be a PyUnicodeObject (not checked).
-
- CPython description:
-
- Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
- representation on demand. Using this macro is very inefficient now,
- try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
- use PyUnicode_WRITE() and PyUnicode_READ().
- """
- if not get_wbuffer(ref):
- # Copy unicode buffer
- w_unicode = from_ref(space, rffi.cast(PyObject, ref))
- u = space.unicode_w(w_unicode)
- set_wbuffer(ref, rffi.unicode2wcharp(u))
- return get_wbuffer(ref)
-
@cpython_api([PyObject], rffi.CWCHARP)
def PyUnicode_AsUnicode(space, ref):
"""Return a read-only pointer to the Unicode object's internal Py_UNICODE
@@ -357,7 +332,12 @@
w_type = from_ref(space, rffi.cast(PyObject, ref.c_ob_type))
if not space.issubtype_w(w_type, space.w_unicode):
raise oefmt(space.w_TypeError, "expected unicode object")
- return PyUnicode_AS_UNICODE(space, rffi.cast(rffi.VOIDP, ref))
+ if not get_wbuffer(ref):
+ # Copy unicode buffer
+ w_unicode = from_ref(space, rffi.cast(PyObject, ref))
+ u = space.unicode_w(w_unicode)
+ set_wbuffer(ref, rffi.unicode2wcharp(u))
+ return get_wbuffer(ref)
@cts.decl("char * PyUnicode_AsUTF8(PyObject *unicode)")
def PyUnicode_AsUTF8(space, ref):
@@ -402,8 +382,7 @@
string may or may not be 0-terminated. It is the responsibility of the caller
to make sure that the wchar_t string is 0-terminated in case this is
required by the application."""
- ref = rffi.cast(PyUnicodeObject, ref)
- c_buffer = PyUnicode_AS_UNICODE(space, rffi.cast(rffi.VOIDP, ref))
+ c_buffer = PyUnicode_AsUnicode(space, ref)
c_length = get_wsize(ref)
# If possible, try to copy the 0-termination as well
More information about the pypy-commit
mailing list