[pypy-commit] pypy default: Merged numpy-full-fromstring

Thu Dec 15 21:22:07 CET 2011

Author: Jeff Terrace <jterrace at gmail.com>
Branch: 
Changeset: r50592:65311ed125b7
Date: 2011-12-15 15:21 -0500
http://bitbucket.org/pypy/pypy/changeset/65311ed125b7/

Log:	Merged numpy-full-fromstring

diff --git a/pypy/module/micronumpy/interp_support.py b/pypy/module/micronumpy/interp_support.py
--- a/pypy/module/micronumpy/interp_support.py
+++ b/pypy/module/micronumpy/interp_support.py
@@ -1,34 +1,90 @@
-from pypy.interpreter.error import OperationError
+from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.gateway import unwrap_spec
-from pypy.module.micronumpy.interp_dtype import get_dtype_cache
-from pypy.rlib.rstruct.runpack import runpack
 from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.module.micronumpy import interp_dtype
+from pypy.objspace.std.strutil import strip_spaces
 
 
 FLOAT_SIZE = rffi.sizeof(lltype.Float)
 
- at unwrap_spec(s=str)
-def fromstring(space, s):
+def _fromstring_text(space, s, count, sep, length, dtype):
     from pypy.module.micronumpy.interp_numarray import W_NDimArray
+
+    sep_stripped = strip_spaces(sep)
+    skip_bad_vals = len(sep_stripped) == 0
+
+    items = []
+    num_items = 0
+    idx = 0
+    
+    while (num_items < count or count == -1) and idx < len(s):
+        nextidx = s.find(sep, idx)
+        if nextidx < 0:
+            nextidx = length
+        piece = strip_spaces(s[idx:nextidx])
+        if len(piece) > 0 or not skip_bad_vals:
+            if len(piece) == 0 and not skip_bad_vals:
+                val = dtype.itemtype.default_fromstring(space)
+            else:
+                try:
+                    val = dtype.coerce(space, space.wrap(piece))
+                except OperationError, e:
+                    if not e.match(space, space.w_ValueError):
+                        raise
+                    gotit = False
+                    while not gotit and len(piece) > 0:
+                        piece = piece[:-1]
+                        try:
+                            val = dtype.coerce(space, space.wrap(piece))
+                            gotit = True
+                        except OperationError, e:
+                            if not e.match(space, space.w_ValueError):
+                                raise
+                    if not gotit:
+                        val = dtype.itemtype.default_fromstring(space)
+                    nextidx = length
+            items.append(val)
+            num_items += 1
+        idx = nextidx + 1
+    
+    if count > num_items:
+        raise OperationError(space.w_ValueError, space.wrap(
+            "string is smaller than requested size"))
+
+    a = W_NDimArray(num_items, [num_items], dtype=dtype)
+    for i, val in enumerate(items):
+        a.dtype.setitem(a.storage, i, val)
+    
+    return space.wrap(a)
+
+def _fromstring_bin(space, s, count, length, dtype):
+    from pypy.module.micronumpy.interp_numarray import W_NDimArray
+    
+    itemsize = dtype.itemtype.get_element_size()
+    if count == -1:
+        count = length / itemsize
+    if length % itemsize != 0:
+        raise operationerrfmt(space.w_ValueError,
+                              "string length %d not divisable by item size %d",
+                              length, itemsize)
+    if count * itemsize > length:
+        raise OperationError(space.w_ValueError, space.wrap(
+            "string is smaller than requested size"))
+        
+    a = W_NDimArray(count, [count], dtype=dtype)
+    for i in range(count):
+        val = dtype.itemtype.runpack_str(s[i*itemsize:i*itemsize + itemsize])
+        a.dtype.setitem(a.storage, i, val)
+        
+    return space.wrap(a)
+
+ at unwrap_spec(s=str, count=int, sep=str)
+def fromstring(space, s, w_dtype=None, count=-1, sep=''):
+    dtype = space.interp_w(interp_dtype.W_Dtype,
+        space.call_function(space.gettypefor(interp_dtype.W_Dtype), w_dtype)
+    )
     length = len(s)
-
-    if length % FLOAT_SIZE == 0:
-        number = length/FLOAT_SIZE
+    if sep == '':
+        return _fromstring_bin(space, s, count, length, dtype)
     else:
-        raise OperationError(space.w_ValueError, space.wrap(
-            "string length %d not divisable by %d" % (length, FLOAT_SIZE)))
-
-    dtype = get_dtype_cache(space).w_float64dtype
-    a = W_NDimArray(number, [number], dtype=dtype)
-
-    start = 0
-    end = FLOAT_SIZE
-    i = 0
-    while i < number:
-        part = s[start:end]
-        a.dtype.setitem(a.storage, i, dtype.box(runpack('d', part)))
-        i += 1
-        start += FLOAT_SIZE
-        end += FLOAT_SIZE
-
-    return space.wrap(a)
+        return _fromstring_text(space, s, count, sep, length, dtype)
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -1194,13 +1194,107 @@
         import struct
         BaseNumpyAppTest.setup_class.im_func(cls)
         cls.w_data = cls.space.wrap(struct.pack('dddd', 1, 2, 3, 4))
+        cls.w_fdata = cls.space.wrap(struct.pack('f', 2.3))
+        cls.w_float32val = cls.space.wrap(struct.pack('f', 5.2))
+        cls.w_float64val = cls.space.wrap(struct.pack('d', 300.4))
 
     def test_fromstring(self):
-        from numpypy import fromstring
+        from numpypy import fromstring, array, uint8, float32, int32
+        import sys
         a = fromstring(self.data)
         for i in range(4):
             assert a[i] == i + 1
-        raises(ValueError, fromstring, "abc")
+        b = fromstring('\x01\x02', dtype=uint8)
+        assert a[0] == 1
+        assert a[1] == 2
+        c = fromstring(self.fdata, dtype=float32)
+        assert c[0] == float32(2.3)
+        d = fromstring("1 2", sep=' ', count=2, dtype=uint8)
+        assert len(d) == 2
+        assert d[0] == 1
+        assert d[1] == 2
+        e = fromstring('3, 4,5', dtype=uint8, sep=',')
+        assert len(e) == 3
+        assert e[0] == 3
+        assert e[1] == 4
+        assert e[2] == 5
+        f = fromstring('\x01\x02\x03\x04\x05', dtype=uint8, count=3)
+        assert len(f) == 3
+        assert f[0] == 1
+        assert f[1] == 2
+        assert f[2] == 3
+        g = fromstring("1  2    3 ", dtype=uint8, sep=" ")
+        assert len(g) == 3
+        assert g[0] == 1
+        assert g[1] == 2
+        assert g[2] == 3
+        h = fromstring("1, , 2, 3", dtype=uint8, sep=",")
+        assert (h == [1,0,2,3]).all()
+        i = fromstring("1    2 3", dtype=uint8, sep=" ")
+        assert (i == [1,2,3]).all()
+        j = fromstring("1\t\t\t\t2\t3", dtype=uint8, sep="\t")
+        assert (j == [1,2,3]).all()
+        k = fromstring("1,x,2,3", dtype=uint8, sep=",")
+        assert (k == [1,0]).all()
+        l = fromstring("1,x,2,3", dtype='float32', sep=",")
+        assert (l == [1.0,-1.0]).all()
+        m = fromstring("1,,2,3", sep=",")
+        assert (m == [1.0,-1.0,2.0,3.0]).all()
+        n = fromstring("3.4 2.0 3.8 2.2", dtype=int32, sep=" ")
+        assert (n == [3]).all()
+        o = fromstring("1.0 2f.0f 3.8 2.2", dtype=float32, sep=" ")
+        assert len(o) == 2
+        assert o[0] == 1.0
+        assert o[1] == 2.0
+        p = fromstring("1.0,,2.0,3.0", sep=",")
+        assert (p == [1.0, -1.0, 2.0, 3.0]).all()
+        q = fromstring("1.0,,2.0,3.0", sep=" ")
+        assert (q == [1.0]).all()
+        r = fromstring("\x01\x00\x02", dtype='bool')
+        assert (r == [True, False, True]).all()
+        s = fromstring("1,2,3,,5", dtype=bool, sep=",")
+        assert (s == [True, True, True, False, True]).all()
+        t = fromstring("", bool)
+        assert (t == []).all()
+        u = fromstring("\x01\x00\x00\x00\x00\x00\x00\x00", dtype=int)
+        if sys.maxint > 2 ** 31 - 1:
+            assert (u == [1]).all()
+        else:
+            assert (u == [1, 0]).all()
+        
+    def test_fromstring_types(self):
+        from numpypy import fromstring
+        from numpypy import int8, int16, int32, int64
+        from numpypy import uint8, uint16, uint32
+        from numpypy import float32, float64
+        a = fromstring('\xFF', dtype=int8)
+        assert a[0] == -1
+        b = fromstring('\xFF', dtype=uint8)
+        assert b[0] == 255
+        c = fromstring('\xFF\xFF', dtype=int16)
+        assert c[0] == -1
+        d = fromstring('\xFF\xFF', dtype=uint16)
+        assert d[0] == 65535
+        e = fromstring('\xFF\xFF\xFF\xFF', dtype=int32)
+        assert e[0] == -1
+        f = fromstring('\xFF\xFF\xFF\xFF', dtype=uint32)
+        assert repr(f[0]) == '4294967295'
+        g = fromstring('\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF', dtype=int64)
+        assert g[0] == -1
+        h = fromstring(self.float32val, dtype=float32)
+        assert h[0] == float32(5.2)
+        i = fromstring(self.float64val, dtype=float64)
+        assert i[0] == float64(300.4)
+        
+        
+    def test_fromstring_invalid(self):
+        from numpypy import fromstring, uint16, uint8, int32
+        #default dtype is 64-bit float, so 3 bytes should fail
+        raises(ValueError, fromstring, "\x01\x02\x03")
+        #3 bytes is not modulo 2 bytes (int16)
+        raises(ValueError, fromstring, "\x01\x03\x03", dtype=uint16)
+        #5 bytes is larger than 3 bytes
+        raises(ValueError, fromstring, "\x01\x02\x03", count=5, dtype=uint8)
 
 
 class AppTestRepr(BaseNumpyAppTest):
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -8,6 +8,7 @@
 from pypy.rlib.objectmodel import specialize
 from pypy.rlib.rarithmetic import LONG_BIT, widen
 from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.rstruct.runpack import runpack
 
 
 def simple_unary_op(func):
@@ -55,6 +56,8 @@
 
 class Primitive(object):
     _mixin_ = True
+    format_code = '?'
+    
     def get_element_size(self):
         return rffi.sizeof(self.T)
 
@@ -84,6 +87,9 @@
     def _coerce(self, space, w_item):
         raise NotImplementedError
 
+    def default_fromstring(self, space):
+        raise NotImplementedError
+
     def read(self, storage, width, i, offset):
         return self.box(libffi.array_getitem(clibffi.cast_type_to_ffitype(self.T),
             width, storage, i, offset
@@ -102,6 +108,9 @@
                 width, storage, i, offset, value
             )
 
+    def runpack_str(self, s):
+        return self.box(runpack(self.format_code, s))
+
     @simple_binary_op
     def add(self, v1, v2):
         return v1 + v2
@@ -164,6 +173,7 @@
 class Bool(BaseType, Primitive):
     T = lltype.Bool
     BoxType = interp_boxes.W_BoolBox
+    format_code = '?'
 
     True = BoxType(True)
     False = BoxType(False)
@@ -192,6 +202,9 @@
 
     def for_computation(self, v):
         return int(v)
+    
+    def default_fromstring(self, space):
+        return self.box(False)
 
 class Integer(Primitive):
     _mixin_ = True
@@ -205,6 +218,9 @@
 
     def for_computation(self, v):
         return widen(v)
+    
+    def default_fromstring(self, space):
+        return self.box(0)
 
     @simple_binary_op
     def div(self, v1, v2):
@@ -241,30 +257,37 @@
 class Int8(BaseType, Integer):
     T = rffi.SIGNEDCHAR
     BoxType = interp_boxes.W_Int8Box
+    format_code = "b"
 
 class UInt8(BaseType, Integer):
     T = rffi.UCHAR
     BoxType = interp_boxes.W_UInt8Box
+    format_code = "B"
 
 class Int16(BaseType, Integer):
     T = rffi.SHORT
     BoxType = interp_boxes.W_Int16Box
+    format_code = "h"
 
 class UInt16(BaseType, Integer):
     T = rffi.USHORT
     BoxType = interp_boxes.W_UInt16Box
+    format_code = "H"
 
 class Int32(BaseType, Integer):
     T = rffi.INT
     BoxType = interp_boxes.W_Int32Box
+    format_code = "i"
 
 class UInt32(BaseType, Integer):
     T = rffi.UINT
     BoxType = interp_boxes.W_UInt32Box
+    format_code = "I"
 
 class Long(BaseType, Integer):
     T = rffi.LONG
     BoxType = interp_boxes.W_LongBox
+    format_code = 'l'
 
 class ULong(BaseType, Integer):
     T = rffi.ULONG
@@ -273,10 +296,12 @@
 class Int64(BaseType, Integer):
     T = rffi.LONGLONG
     BoxType = interp_boxes.W_Int64Box
+    format_code = "q"
 
 class UInt64(BaseType, Integer):
     T = rffi.ULONGLONG
     BoxType = interp_boxes.W_UInt64Box
+    format_code = "Q"
 
     def _coerce(self, space, w_item):
         try:
@@ -304,6 +329,9 @@
     def for_computation(self, v):
         return float(v)
 
+    def default_fromstring(self, space):
+        return self.box(-1.0)
+
     @simple_binary_op
     def div(self, v1, v2):
         try:
@@ -403,7 +431,9 @@
 class Float32(BaseType, Float):
     T = rffi.FLOAT
     BoxType = interp_boxes.W_Float32Box
+    format_code = "f"
 
 class Float64(BaseType, Float):
     T = rffi.DOUBLE
-    BoxType = interp_boxes.W_Float64Box
\ No newline at end of file
+    BoxType = interp_boxes.W_Float64Box
+    format_code = "d"
\ No newline at end of file