[pypy-commit] pypy default: Implement ffi.unpack(); the fast-paths are coming next

Sun Apr 17 04:24:33 EDT 2016

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r83706:343cbe027c00
Date: 2016-04-17 09:55 +0200
http://bitbucket.org/pypy/pypy/changeset/343cbe027c00/

Log:	Implement ffi.unpack(); the fast-paths are coming next

diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
--- a/pypy/module/_cffi_backend/__init__.py
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -48,7 +48,7 @@
         'from_buffer': 'func.from_buffer',
 
         'string': 'func.string',
-        'rawstring': 'func.rawstring',
+        'unpack': 'func.unpack',
         'buffer': 'cbuffer.buffer',
         'memmove': 'func.memmove',
 
diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py
--- a/pypy/module/_cffi_backend/cdataobj.py
+++ b/pypy/module/_cffi_backend/cdataobj.py
@@ -367,6 +367,25 @@
         with self as ptr:
             return W_CDataGCP(self.space, ptr, self.ctype, self, w_destructor)
 
+    def unpack(self, length):
+        from pypy.module._cffi_backend.ctypeptr import W_CTypePtrOrArray
+        space = self.space
+        if not self.ctype.is_nonfunc_pointer_or_array:
+            raise oefmt(space.w_TypeError,
+                        "expected a pointer or array, got '%s'",
+                        self.ctype.name)
+        if length < 0:
+            raise oefmt(space.w_ValueError, "'length' cannot be negative")
+        ctype = self.ctype
+        assert isinstance(ctype, W_CTypePtrOrArray)
+        with self as ptr:
+            if not ptr:
+                raise oefmt(space.w_RuntimeError,
+                            "cannot use unpack() on %s",
+                            space.str_w(self.repr()))
+            w_result = ctype.ctitem.unpack_ptr(ctype, ptr, length)
+        return w_result
+
 
 class W_CDataMem(W_CData):
     """This is used only by the results of cffi.cast('int', x)
diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py
--- a/pypy/module/_cffi_backend/ctypearray.py
+++ b/pypy/module/_cffi_backend/ctypearray.py
@@ -109,21 +109,6 @@
     def typeoffsetof_index(self, index):
         return self.ctptr.typeoffsetof_index(index)
 
-    def rawstring(self, w_cdata):
-        if isinstance(self.ctitem, ctypeprim.W_CTypePrimitive):
-            space = self.space
-            length = w_cdata.get_array_length()
-            if self.ctitem.size == rffi.sizeof(lltype.Char):
-                with w_cdata as ptr:
-                    s = rffi.charpsize2str(ptr, length)
-                return space.wrapbytes(s)
-            elif self.is_unichar_ptr_or_array():
-                with w_cdata as ptr:
-                    cdata = rffi.cast(rffi.CWCHARP, ptr)
-                    u = rffi.wcharpsize2unicode(cdata, length)
-                return space.wrap(u)
-        return W_CTypePtrOrArray.rawstring(self, w_cdata)
-
 
 class W_CDataIter(W_Root):
     _immutable_fields_ = ['ctitem', 'cdata', '_stop']    # but not '_next'
diff --git a/pypy/module/_cffi_backend/ctypeobj.py b/pypy/module/_cffi_backend/ctypeobj.py
--- a/pypy/module/_cffi_backend/ctypeobj.py
+++ b/pypy/module/_cffi_backend/ctypeobj.py
@@ -127,11 +127,20 @@
         raise oefmt(space.w_TypeError,
                     "string(): unexpected cdata '%s' argument", self.name)
 
-    def rawstring(self, cdataobj):
+    def unpack_ptr(self, w_ctypeptr, ptr, length):
+        # generic implementation, when the type of items is not known to
+        # be one for which a fast-case exists
         space = self.space
-        raise oefmt(space.w_TypeError,
-                    "expected a 'char[]' or 'uint8_t[]' or 'int8_t[]' "
-                    "or 'wchar_t[]', got '%s'", self.name)
+        itemsize = self.size
+        if itemsize < 0:
+            raise oefmt(space.w_ValueError,
+                        "'%s' points to items of unknown size",
+                        w_ctypeptr.name)
+        result_w = [None] * length
+        for i in range(length):
+            result_w[i] = self.convert_to_object(ptr)
+            ptr = rffi.ptradd(ptr, itemsize)
+        return space.newlist(result_w)
 
     def add(self, cdata, i):
         space = self.space
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -125,6 +125,10 @@
         value = self._convert_to_char(w_ob)
         cdata[0] = value
 
+    def unpack_ptr(self, w_ctypeptr, ptr, length):
+        s = rffi.charpsize2str(ptr, length)
+        return self.space.wrapbytes(s)
+
 
 # XXX explicitly use an integer type instead of lltype.UniChar here,
 # because for now the latter is defined as unsigned by RPython (even
@@ -171,6 +175,10 @@
         value = self._convert_to_unichar(w_ob)
         rffi.cast(rffi.CWCHARP, cdata)[0] = value
 
+    def unpack_ptr(self, w_ctypeptr, ptr, length):
+        u = rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length)
+        return self.space.wrap(u)
+
 
 class W_CTypePrimitiveSigned(W_CTypePrimitive):
     _attrs_            = ['value_fits_long', 'value_smaller_than_long']
diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py
--- a/pypy/module/_cffi_backend/ffi_obj.py
+++ b/pypy/module/_cffi_backend/ffi_obj.py
@@ -542,19 +542,23 @@
         return w_cdata.ctype.string(w_cdata, maxlen)
 
 
-    @unwrap_spec(w_cdata=W_CData)
-    def descr_rawstring(self, w_cdata):
-        """\
-Convert a cdata that is an array of 'char' or 'wchar_t' to
-a byte or unicode string.  Unlike ffi.string(), it does not stop
-at the first null.
+    @unwrap_spec(w_cdata=W_CData, length=int)
+    def descr_unpack(self, w_cdata, length):
+        """Unpack an array of C data of the given length,
+returning a Python string/unicode/list.
 
-Note that if you have a pointer and an explicit length, you
-can use 'p[0:length]' to make an array view.  This is similar to
-the construct 'list(p[0:length])', which returns a list of chars/
-unichars/ints/floats."""
+If 'cdata' is a pointer to 'char', returns a byte string.
+It does not stop at the first null.  This is equivalent to:
+ffi.buffer(cdata, length)[:]
+
+If 'cdata' is a pointer to 'wchar_t', returns a unicode string.
+'length' is measured in wchar_t's; it is not the size in bytes.
+
+If 'cdata' is a pointer to anything else, returns a list of
+'length' items.  This is a faster equivalent to:
+[cdata[i] for i in range(length)]"""
         #
-        return w_cdata.ctype.rawstring(w_cdata)
+        return w_cdata.unpack(length)
 
 
     def descr_sizeof(self, w_arg):
@@ -751,8 +755,8 @@
         new_allocator = interp2app(W_FFIObject.descr_new_allocator),
         new_handle  = interp2app(W_FFIObject.descr_new_handle),
         offsetof    = interp2app(W_FFIObject.descr_offsetof),
-        rawstring   = interp2app(W_FFIObject.descr_rawstring),
         sizeof      = interp2app(W_FFIObject.descr_sizeof),
         string      = interp2app(W_FFIObject.descr_string),
         typeof      = interp2app(W_FFIObject.descr_typeof),
+        unpack      = interp2app(W_FFIObject.descr_unpack),
         **_extras)
diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py
--- a/pypy/module/_cffi_backend/func.py
+++ b/pypy/module/_cffi_backend/func.py
@@ -78,9 +78,9 @@
 
 # ____________________________________________________________
 
- at unwrap_spec(w_cdata=cdataobj.W_CData)
-def rawstring(space, w_cdata):
-    return w_cdata.ctype.rawstring(w_cdata)
+ at unwrap_spec(w_cdata=cdataobj.W_CData, length=int)
+def unpack(space, w_cdata, length):
+    return w_cdata.unpack(length)
 
 # ____________________________________________________________
 
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -3515,21 +3515,71 @@
     _get_common_types(d)
     assert d['bool'] == '_Bool'
 
-def test_rawstring():
+def test_unpack():
     BChar = new_primitive_type("char")
     BArray = new_array_type(new_pointer_type(BChar), 10)   # char[10]
     p = newp(BArray, b"abc\x00def")
-    assert rawstring(p) == b"abc\x00def\x00\x00\x00"
-    assert rawstring(p[1:6]) == b"bc\x00de"
+    p0 = p
+    assert unpack(p, 10) == b"abc\x00def\x00\x00\x00"
+    assert unpack(p+1, 5) == b"bc\x00de"
     BWChar = new_primitive_type("wchar_t")
     BArray = new_array_type(new_pointer_type(BWChar), 10)   # wchar_t[10]
     p = newp(BArray, u"abc\x00def")
-    assert rawstring(p) == u"abc\x00def\x00\x00\x00"
-    assert rawstring(p[1:6]) == u"bc\x00de"
-    BChar = new_primitive_type("uint8_t")
-    BArray = new_array_type(new_pointer_type(BChar), 10)   # uint8_t[10]
-    p = newp(BArray, [65 + i for i in range(10)])
-    assert rawstring(p) == b"ABCDEFGHIJ"
+    assert unpack(p, 10) == u"abc\x00def\x00\x00\x00"
+
+    for typename, samples in [
+            ("uint8_t",  [0, 2**8-1]),
+            ("uint16_t", [0, 2**16-1]),
+            ("uint32_t", [0, 2**32-1]),
+            ("uint64_t", [0, 2**64-1]),
+            ("int8_t",  [-2**7, 2**7-1]),
+            ("int16_t", [-2**15, 2**15-1]),
+            ("int32_t", [-2**31, 2**31-1]),
+            ("int64_t", [-2**63, 2**63-1]),
+            ("_Bool", [0, 1]),
+            ("float", [0.0, 10.5]),
+            ("double", [12.34, 56.78]),
+            ]:
+        BItem = new_primitive_type(typename)
+        BArray = new_array_type(new_pointer_type(BItem), 10)
+        p = newp(BArray, samples)
+        result = unpack(p, len(samples))
+        assert result == samples
+        for i in range(len(samples)):
+            assert result[i] == p[i] and type(result[i]) is type(p[i])
     #
-    py.test.raises(TypeError, rawstring, "foobar")
-    py.test.raises(TypeError, rawstring, p + 1)
+    BInt = new_primitive_type("int")
+    py.test.raises(TypeError, unpack, p)
+    py.test.raises(TypeError, unpack, b"foobar", 6)
+    py.test.raises(TypeError, unpack, cast(BInt, 42), 1)
+    #
+    BPtr = new_pointer_type(BInt)
+    random_ptr = cast(BPtr, -424344)
+    other_ptr = cast(BPtr, 54321)
+    BArray = new_array_type(new_pointer_type(BPtr), None)
+    lst = unpack(newp(BArray, [random_ptr, other_ptr]), 2)
+    assert lst == [random_ptr, other_ptr]
+    #
+    BFunc = new_function_type((BInt, BInt), BInt, False)
+    BFuncPtr = new_pointer_type(BFunc)
+    lst = unpack(newp(new_array_type(BFuncPtr, None), 2), 2)
+    assert len(lst) == 2
+    assert not lst[0] and not lst[1]
+    assert typeof(lst[0]) is BFunc
+    #
+    BStruct = new_struct_type("foo")
+    BStructPtr = new_pointer_type(BStruct)
+    e = py.test.raises(ValueError, unpack, cast(BStructPtr, 42), 5)
+    assert str(e.value) == "'foo *' points to items of unknown size"
+    complete_struct_or_union(BStruct, [('a1', BInt, -1),
+                                       ('a2', BInt, -1)])
+    array_of_structs = newp(new_array_type(BStructPtr, None), [[4,5], [6,7]])
+    lst = unpack(array_of_structs, 2)
+    assert typeof(lst[0]) is BStruct
+    assert lst[0].a1 == 4 and lst[1].a2 == 7
+    #
+    py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 0)
+    py.test.raises(RuntimeError, unpack, cast(new_pointer_type(BChar), 0), 10)
+    #
+    py.test.raises(ValueError, unpack, p0, -1)
+    py.test.raises(ValueError, unpack, p, -1)
diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py
--- a/pypy/module/_cffi_backend/test/test_ffi_obj.py
+++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py
@@ -477,15 +477,10 @@
             raises(ValueError, ffi.init_once, do_init, "tag")
             assert seen == [1] * (i + 1)
 
-    def test_rawstring(self):
+    def test_unpack(self):
         import _cffi_backend as _cffi1_backend
         ffi = _cffi1_backend.FFI()
-        p = ffi.new("char[]", "abc\x00def")
-        assert ffi.rawstring(p) == "abc\x00def\x00"
-        assert ffi.rawstring(p[1:6]) == "bc\x00de"
-        p = ffi.new("wchar_t[]", u"abc\x00def")
-        assert ffi.rawstring(p) == u"abc\x00def\x00"
-        assert ffi.rawstring(p[1:6]) == u"bc\x00de"
-        #
-        raises(TypeError, ffi.rawstring, "foobar")
-        raises(TypeError, ffi.rawstring, p + 1)
+        p = ffi.new("char[]", b"abc\x00def")
+        assert ffi.unpack(p+1, 7) == b"bc\x00def\x00"
+        p = ffi.new("int[]", [-123456789])
+        assert ffi.unpack(p, 1) == [-123456789]