[pypy-commit] cffi default: There is no reason to restrict ffi.unpack() to primitives.

Sat Apr 16 18:17:53 EDT 2016

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r2663:f8852464e468
Date: 2016-04-17 00:18 +0200
http://bitbucket.org/cffi/cffi/changeset/f8852464e468/

Log:	There is no reason to restrict ffi.unpack() to primitives.

diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -5586,7 +5586,7 @@
 {
     CDataObject *cd;
     CTypeDescrObject *ctitem;
-    Py_ssize_t i, length, itemsize, best_alignment;
+    Py_ssize_t i, length, itemsize;
     PyObject *result;
     char *src;
     int casenum;
@@ -5596,11 +5596,9 @@
                                      &CData_Type, &cd, &length))
         return NULL;
 
-    ctitem = cd->c_type->ct_itemdescr;
-    if (!(cd->c_type->ct_flags & (CT_ARRAY|CT_POINTER)) ||
-        !(ctitem->ct_flags & CT_PRIMITIVE_ANY)) {
+    if (!(cd->c_type->ct_flags & (CT_ARRAY|CT_POINTER))) {
         PyErr_Format(PyExc_TypeError,
-                     "expected a pointer to a primitive type, got '%s'",
+                     "expected a pointer or array, got '%s'",
                      cd->c_type->ct_name);
         return NULL;
     }
@@ -5620,6 +5618,7 @@
     }
 
     /* byte- and unicode strings */
+    ctitem = cd->c_type->ct_itemdescr;
     if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
         if (ctitem->ct_size == sizeof(char))
             return PyBytes_FromStringAndSize(cd->c_data, length);
@@ -5630,17 +5629,34 @@
     }
 
     /* else, the result is a list.  This implementation should be
-       equivalent to, but on CPython much faster than, 'list(p[0:length])'.
+       equivalent to but much faster than '[p[i] for i in range(length)]'.
+       (Note that on PyPy, 'list(p[0:length])' should be equally fast,
+       but arguably, finding out that there *is* such an unexpected way
+       to write things down is the real problem.)
     */
-    result = PyList_New(length); if (result == NULL) return NULL;
+    result = PyList_New(length);
+    if (result == NULL)
+        return NULL;
 
     src = cd->c_data;
     itemsize = ctitem->ct_size;
-    best_alignment = ctitem->ct_length;
+    if (itemsize < 0) {
+        PyErr_Format(PyExc_ValueError, "'%s' points to items of unknown size",
+                     cd->c_type->ct_name);
+        return NULL;
+    }
+
+    /* Determine some common fast-paths for the loop below.  The case -1
+       is the fall-back, which always gives the right answer. */
+
+#define ALIGNMENT_CHECK(align)                          \
+        (((align) & ((align) - 1)) == 0 &&              \
+         (((uintptr_t)src) & ((align) - 1)) == 0)
 
     casenum = -1;
-    if ((best_alignment & (best_alignment - 1)) == 0 &&
-        (((uintptr_t)src) & (best_alignment - 1)) == 0) {
+
+    if ((ctitem->ct_flags & CT_PRIMITIVE_ANY) &&
+            ALIGNMENT_CHECK(ctitem->ct_length)) {
         /* Source data is fully aligned; we can directly read without
            memcpy().  The unaligned case is expected to be rare; in
            this situation it is ok to fall back to the general
@@ -5667,6 +5683,10 @@
             else if (itemsize == sizeof(float))  casenum = 8;
         }
     }
+    else if (ctitem->ct_flags & (CT_POINTER | CT_FUNCTIONPTR)) {
+        casenum = 10;    /* any pointer */
+    }
+#undef ALIGNMENT_CHECK
 
     for (i = 0; i < length; i++) {
         PyObject *x;
@@ -5685,6 +5705,7 @@
         case 7: x = PyLong_FromUnsignedLong(*(unsigned long *)src); break;
         case 8: x = PyFloat_FromDouble(*(float *)src); break;
         case 9: x = PyFloat_FromDouble(*(double *)src); break;
+        case 10: x = new_simple_cdata(*(char **)src, ctitem); break;
         }
         if (x == NULL) {
             Py_DECREF(result);
diff --git a/c/ffi_obj.c b/c/ffi_obj.c
--- a/c/ffi_obj.c
+++ b/c/ffi_obj.c
@@ -460,17 +460,19 @@
                                     from _cffi_backend.c */
 
 PyDoc_STRVAR(ffi_unpack_doc,
-"Unpack an array of primitive C data of the given length,\n"
+"Unpack an array of C data of the given length,\n"
 "returning a Python string/unicode/list.\n"
 "\n"
 "If 'cdata' is a pointer to 'char', returns a byte string.\n"
-"Unlike ffi.string(), it does not stop at the first null.\n"
+"It does not stop at the first null.  This is equivalent to:\n"
+"ffi.buffer(cdata, length)[:]\n"
 "\n"
 "If 'cdata' is a pointer to 'wchar_t', returns a unicode string.\n"
 "'length' is measured in wchar_t's; it is not the size in bytes.\n"
 "\n"
-"If 'cdata' is a pointer to some other integer or floating-point\n"
-"type, returns a list of 'length' integers or floats.");
+"If 'cdata' is a pointer to anything else, returns a list of\n"
+"'length' items.  This is a faster equivalent to:\n"
+"[cdata[i] for i in range(length)]");
 
 #define ffi_unpack  b_unpack     /* ffi_unpack() => b_unpack()
                                     from _cffi_backend.c */
diff --git a/c/test_c.py b/c/test_c.py
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -3563,8 +3563,30 @@
     py.test.raises(TypeError, unpack, p)
     py.test.raises(TypeError, unpack, b"foobar", 6)
     py.test.raises(TypeError, unpack, cast(BInt, 42), 1)
+    #
+    BPtr = new_pointer_type(BInt)
+    random_ptr = cast(BPtr, -424344)
+    other_ptr = cast(BPtr, 54321)
+    BArray = new_array_type(new_pointer_type(BPtr), None)
+    lst = unpack(newp(BArray, [random_ptr, other_ptr]), 2)
+    assert lst == [random_ptr, other_ptr]
+    #
     BFunc = new_function_type((BInt, BInt), BInt, False)
-    py.test.raises(TypeError, unpack, cast(new_pointer_type(BFunc), 42), 1)
+    BFuncPtr = new_pointer_type(BFunc)
+    lst = unpack(newp(new_array_type(BFuncPtr, None), 2), 2)
+    assert len(lst) == 2
+    assert not lst[0] and not lst[1]
+    assert typeof(lst[0]) is BFunc
+    #
+    BStruct = new_struct_type("foo")
+    BStructPtr = new_pointer_type(BStruct)
+    e = py.test.raises(ValueError, unpack, cast(BStructPtr, 42), 5)
+    assert str(e.value) == "'foo *' points to items of unknown size"
+    complete_struct_or_union(BStruct, [('a1', BInt, -1),
+                                       ('a2', BInt, -1)])
+    lst = unpack(newp(new_array_type(BStructPtr, None), [[4,5], [6,7]]), 2)
+    assert typeof(lst[0]) is BStruct
+    assert lst[0].a1 == 4 and lst[1].a2 == 7
     #
     py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 0)
     py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 10)
diff --git a/cffi/api.py b/cffi/api.py
--- a/cffi/api.py
+++ b/cffi/api.py
@@ -300,17 +300,19 @@
         return self._backend.string(cdata, maxlen)
 
     def unpack(self, cdata, length):
-        """Unpack an array of primitive C data of the given length,
+        """Unpack an array of C data of the given length, 
         returning a Python string/unicode/list.
 
         If 'cdata' is a pointer to 'char', returns a byte string.
-        Unlike ffi.string(), it does not stop at the first null.
+        It does not stop at the first null.  This is equivalent to:
+        ffi.buffer(cdata, length)[:]
 
         If 'cdata' is a pointer to 'wchar_t', returns a unicode string.
         'length' is measured in wchar_t's; it is not the size in bytes.
 
-        If 'cdata' is a pointer to some other integer or floating-point
-        type, returns a list of 'length' integers or floats.
+        If 'cdata' is a pointer to anything else, returns a list of
+        'length' items.  This is a faster equivalent to:
+        [cdata[i] for i in range(length)]
         """
         return self._backend.unpack(cdata, length)