[pypy-commit] pypy py3k: cpyext: implement PyUnicode_AsWideCharString.

Thu Nov 22 23:50:05 CET 2012

Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3k
Changeset: r59056:040de362d4bf
Date: 2012-11-22 22:46 +0100
http://bitbucket.org/pypy/pypy/changeset/040de362d4bf/

Log:	cpyext: implement PyUnicode_AsWideCharString. Also correctly export
	PyUnicode_FromFormat.

diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -356,6 +356,7 @@
     'PyArg_ParseTuple', 'PyArg_UnpackTuple', 'PyArg_ParseTupleAndKeywords',
     'PyArg_VaParse', 'PyArg_VaParseTupleAndKeywords', '_PyArg_NoKeywords',
     'PyString_FromFormat', 'PyString_FromFormatV',
+    'PyUnicode_FromFormat', 'PyUnicode_FromFormatV', 'PyUnicode_AsWideCharString',
     'PyModule_AddObject', 'PyModule_AddIntConstant', 'PyModule_AddStringConstant',
     'Py_BuildValue', 'Py_VaBuildValue', 'PyTuple_Pack',
 
diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h
--- a/pypy/module/cpyext/include/unicodeobject.h
+++ b/pypy/module/cpyext/include/unicodeobject.h
@@ -26,8 +26,10 @@
 } PyUnicodeObject;
 
 
-PyObject *PyUnicode_FromFormatV(const char *format, va_list vargs);
-PyObject *PyUnicode_FromFormat(const char *format, ...);
+PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char *format, va_list vargs);
+PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char *format, ...);
+
+PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(PyObject *unicode, Py_ssize_t *size);
 
 Py_LOCAL_INLINE(size_t) Py_UNICODE_strlen(const Py_UNICODE *u)
 {
diff --git a/pypy/module/cpyext/src/unicodeobject.c b/pypy/module/cpyext/src/unicodeobject.c
--- a/pypy/module/cpyext/src/unicodeobject.c
+++ b/pypy/module/cpyext/src/unicodeobject.c
@@ -522,3 +522,35 @@
     return ret;
 }
 
+wchar_t*
+PyUnicode_AsWideCharString(PyObject *unicode,
+                           Py_ssize_t *size)
+{
+    wchar_t* buffer;
+    Py_ssize_t buflen;
+
+    if (unicode == NULL) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    buflen = PyUnicode_GET_SIZE(unicode) + 1;
+    if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    /* PyPy shortcut: Unicode is already an array of wchar_t */
+    buffer = PyMem_MALLOC(buflen * sizeof(wchar_t));
+    if (buffer == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    if (PyUnicode_AsWideChar(unicode, buffer, buflen) < 0)
+	return NULL;
+    if (size != NULL)
+        *size = buflen - 1;
+    return buffer;
+}
+
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -2173,22 +2173,6 @@
     raise NotImplementedError
     
 
- at cpython_api([PyObject, Py_ssize_t], rffi.CWCHARP)
-def PyUnicode_AsWideCharString(space, unicode, size):
-    """Convert the Unicode object to a wide character string. The output string
-    always ends with a nul character. If size is not NULL, write the number
-    of wide characters (excluding the trailing 0-termination character) into
-    *size.
-    
-    Returns a buffer allocated by PyMem_Alloc() (use
-    PyMem_Free() to free it) on success. On error, returns NULL,
-    *size is undefined and raises a MemoryError. Note that the
-    resulting wchar_t* string might contain null characters, which
-    would cause the string to be truncated when used with most C functions.
-    """
-    raise NotImplementedError
-    
-
 @cpython_api([rffi.CArrayPtr(Py_UNICODE), Py_ssize_t, rffi.CCHARP, rffi.CCHARP], PyObject)
 def PyUnicode_Encode(space, s, size, encoding, errors):
     """Encode the Py_UNICODE buffer s of the given size and return a Python
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -110,6 +110,27 @@
         res = module.test_unicode_format(1, "xyz")
         assert res == "bla 1 ble xyz\n"
 
+    def test_aswidecharstring(self):
+        module = self.import_extension('foo', [
+            ("aswidecharstring", "METH_O",
+             '''
+             PyObject *result;
+             Py_ssize_t size;
+             wchar_t *buffer;
+
+             buffer = PyUnicode_AsWideCharString(args, &size);
+             if (buffer == NULL)
+                 return NULL;
+
+             result = PyUnicode_FromWideChar(buffer, size + 1);
+             PyMem_Free(buffer);
+             if (result == NULL)
+                 return NULL;
+             return Py_BuildValue("(Nn)", result, size);
+             ''')])
+        res = module.aswidecharstring("Caf\xe9")
+        assert res == ("Caf\xe9\0", 4)
+
 
 class TestUnicode(BaseApiTest):
     def test_unicodeobject(self, space, api):
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -255,13 +255,13 @@
     to make sure that the wchar_t string is 0-terminated in case this is
     required by the application."""
     c_buffer = PyUnicode_AS_UNICODE(space, ref)
+    ref = rffi.cast(PyUnicodeObject, ref)
     c_size = ref.c_size
 
     # If possible, try to copy the 0-termination as well
     if size > c_size:
         size = c_size + 1
 
-
     i = 0
     while i < size:
         buf[i] = c_buffer[i]