[pypy-commit] pypy default: backout the backout of str-dtype-improvement in c967eefd1789 after discussion with fijal

bdkearns noreply at buildbot.pypy.org
Fri Mar 22 22:46:41 CET 2013


Author: Brian Kearns <bdkearns at gmail.com>
Branch: 
Changeset: r62664:f7ec62daeb1b
Date: 2013-03-22 17:45 -0400
http://bitbucket.org/pypy/pypy/changeset/f7ec62daeb1b/

Log:	backout the backout of str-dtype-improvement in c967eefd1789 after
	discussion with fijal

diff --git a/pypy/module/micronumpy/arrayimpl/concrete.py b/pypy/module/micronumpy/arrayimpl/concrete.py
--- a/pypy/module/micronumpy/arrayimpl/concrete.py
+++ b/pypy/module/micronumpy/arrayimpl/concrete.py
@@ -49,8 +49,8 @@
             return
         shape = shape_agreement(space, self.get_shape(), arr)
         if impl.storage == self.storage:
-            impl = impl.copy()
-        loop.setslice(shape, self, impl)
+            impl = impl.copy(space)
+        loop.setslice(space, shape, self, impl)
 
     def get_size(self):
         return self.size // self.dtype.itemtype.get_element_size()
@@ -245,12 +245,12 @@
         return SliceArray(self.start, strides,
                           backstrides, shape, self, orig_array)
 
-    def copy(self):
+    def copy(self, space):
         strides, backstrides = support.calc_strides(self.get_shape(), self.dtype,
                                                     self.order)
         impl = ConcreteArray(self.get_shape(), self.dtype, self.order, strides,
                              backstrides)
-        return loop.setslice(self.get_shape(), impl, self)
+        return loop.setslice(space, self.get_shape(), impl, self)
 
     def create_axis_iter(self, shape, dim, cum):
         return iter.AxisIterator(self, shape, dim, cum)
@@ -281,7 +281,11 @@
 
     def astype(self, space, dtype):
         new_arr = W_NDimArray.from_shape(self.get_shape(), dtype)
-        loop.copy_from_to(self, new_arr.implementation, dtype)
+        if dtype.is_str_or_unicode():
+            raise OperationError(space.w_NotImplementedError, space.wrap(
+                "astype(%s) not implemented yet" % self.dtype))
+        else:    
+            loop.setslice(space, new_arr.get_shape(), new_arr.implementation, self)
         return new_arr
 
 class ConcreteArrayNotOwning(BaseConcreteArray):
diff --git a/pypy/module/micronumpy/arrayimpl/scalar.py b/pypy/module/micronumpy/arrayimpl/scalar.py
--- a/pypy/module/micronumpy/arrayimpl/scalar.py
+++ b/pypy/module/micronumpy/arrayimpl/scalar.py
@@ -50,7 +50,7 @@
     def set_scalar_value(self, w_val):
         self.value = w_val.convert_to(self.dtype)
 
-    def copy(self):
+    def copy(self, space):
         scalar = Scalar(self.dtype)
         scalar.value = self.value
         return scalar
diff --git a/pypy/module/micronumpy/interp_arrayops.py b/pypy/module/micronumpy/interp_arrayops.py
--- a/pypy/module/micronumpy/interp_arrayops.py
+++ b/pypy/module/micronumpy/interp_arrayops.py
@@ -116,12 +116,21 @@
                     "all the input arrays must have same number of dimensions"))
             elif i == _axis:
                 shape[i] += axis_size
+        a_dt = arr.get_dtype()
+        if dtype.is_record_type() and a_dt.is_record_type():
+            #Record types must match
+            for f in dtype.fields:
+                if f not in a_dt.fields or \
+                             dtype.fields[f] != a_dt.fields[f]:
+                    raise OperationError(space.w_TypeError, 
+                               space.wrap("record type mismatch"))
+        elif dtype.is_record_type() or a_dt.is_record_type():
+            raise OperationError(space.w_TypeError, 
+                        space.wrap("invalid type promotion"))
         dtype = interp_ufuncs.find_binop_result_dtype(space, dtype,
                                                       arr.get_dtype())
         if _axis < 0 or len(arr.get_shape()) <= _axis:
             raise operationerrfmt(space.w_IndexError, "axis %d out of bounds [0, %d)", axis, len(shape))
-    if _axis < 0 or len(shape) <= _axis:
-        raise operationerrfmt(space.w_IndexError, "axis %d out of bounds [0, %d)", axis, len(shape))
     res = W_NDimArray.from_shape(shape, dtype, 'C')
     chunks = [Chunk(0, i, 1, i) for i in shape]
     axis_start = 0
diff --git a/pypy/module/micronumpy/interp_boxes.py b/pypy/module/micronumpy/interp_boxes.py
--- a/pypy/module/micronumpy/interp_boxes.py
+++ b/pypy/module/micronumpy/interp_boxes.py
@@ -290,6 +290,10 @@
         dtype.itemtype.store(self.arr, self.ofs, ofs,
                              dtype.coerce(space, w_value))
 
+    def convert_to(self, dtype):
+        # if we reach here, the record fields are guarenteed to match.
+        return self
+
 class W_CharacterBox(W_FlexibleBox):
     pass
 
@@ -303,10 +307,6 @@
             arr.storage[i] = arg[i]
         return W_StringBox(arr, 0, arr.dtype)
 
-    def convert_to(self, dtype):
-        from pypy.module.micronumpy import types
-        assert isinstance(dtype.itemtype, types.StringType)
-        return self
 
 class W_UnicodeBox(W_CharacterBox):
     def descr__new__unicode_box(space, w_subtype, w_arg):
@@ -320,11 +320,6 @@
         #    arr.storage[i] = arg[i]
         return W_UnicodeBox(arr, 0, arr.dtype)
 
-    def convert_to(self, dtype):
-        from pypy.module.micronumpy import types
-        assert isinstance(dtype.itemtype, types.UnicodeType)
-        return self
-
 
 class W_ComplexFloatingBox(W_InexactBox):
     _attrs_ = ()
diff --git a/pypy/module/micronumpy/interp_dtype.py b/pypy/module/micronumpy/interp_dtype.py
--- a/pypy/module/micronumpy/interp_dtype.py
+++ b/pypy/module/micronumpy/interp_dtype.py
@@ -71,6 +71,8 @@
     def box_complex(self, real, imag):
         return self.itemtype.box_complex(real, imag)
 
+    def build_and_convert(self, space, box):
+        return self.itemtype.build_and_convert(space, self, box)
     def coerce(self, space, w_item):
         return self.itemtype.coerce(space, self, w_item)
 
diff --git a/pypy/module/micronumpy/interp_flatiter.py b/pypy/module/micronumpy/interp_flatiter.py
--- a/pypy/module/micronumpy/interp_flatiter.py
+++ b/pypy/module/micronumpy/interp_flatiter.py
@@ -76,7 +76,7 @@
         base = self.base
         start, stop, step, length = space.decode_index4(w_idx, base.get_size())
         arr = convert_to_array(space, w_value)
-        loop.flatiter_setitem(self.base, arr, start, step, length)
+        loop.flatiter_setitem(space, self.base, arr, start, step, length)
 
     def descr_iter(self):
         return self
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -258,17 +258,14 @@
         return self.implementation.get_scalar_value()
 
     def descr_copy(self, space):
-        return W_NDimArray(self.implementation.copy())
+        return W_NDimArray(self.implementation.copy(space))
 
     def descr_get_real(self, space):
         return W_NDimArray(self.implementation.get_real(self))
 
     def descr_get_imag(self, space):
         ret = self.implementation.get_imag(self)
-        if ret:
-            return W_NDimArray(ret)
-        raise OperationError(space.w_NotImplementedError,
-                    space.wrap('imag not implemented for this dtype'))
+        return W_NDimArray(ret)
 
     def descr_set_real(self, space, w_value):
         # copy (broadcast) values into self
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -414,7 +414,7 @@
     if promote_to_float:
         return find_unaryop_result_dtype(space, dt2, promote_to_float=True)
     # If they're the same kind, choose the greater one.
-    if dt1.kind == dt2.kind:
+    if dt1.kind == dt2.kind and not dt2.is_flexible_type():
         return dt2
 
     # Everything promotes to float, and bool promotes to everything.
@@ -434,7 +434,23 @@
     elif dt2.num == 10 or (LONG_BIT == 64 and dt2.num == 8):
         # UInt64 + signed = Float64
         dtypenum = 12
-    else:
+    elif dt2.is_flexible_type():
+        # For those operations that get here (concatenate, stack),
+        # flexible types take precedence over numeric type
+        if dt2.is_record_type():
+            return dt2
+        if dt1.is_str_or_unicode():
+            if dt2.num == 18:
+                if dt2.itemtype.get_element_size() >= \
+                           dt1.itemtype.get_element_size():
+                    return dt2
+                return dt1
+            if dt2.itemtype.get_element_size() >= \
+                       dt1.itemtype.get_element_size():
+                return dt2
+            return dt1
+        return dt2
+    else:    
         # increase to the next signed type
         dtypenum = dt2.num + 1
     newdtype = interp_dtype.get_dtype_cache(space).dtypes_by_num[dtypenum]
diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -65,12 +65,19 @@
         obj_iter.next()
     return out
 
-setslice_driver = jit.JitDriver(name='numpy_setslice',
+setslice_driver1 = jit.JitDriver(name='numpy_setslice1',
                                 greens = ['shapelen', 'dtype'],
-                                reds = ['target', 'source', 'target_iter',
-                                        'source_iter'])
+                                reds = 'auto')
+setslice_driver2 = jit.JitDriver(name='numpy_setslice2',
+                                greens = ['shapelen', 'dtype'],
+                                reds = 'auto')
 
-def setslice(shape, target, source):
+def setslice(space, shape, target, source):
+    if target.dtype.is_str_or_unicode():
+        return setslice_build_and_convert(space, shape, target, source)
+    return setslice_to(space, shape, target, source)
+
+def setslice_to(space, shape, target, source):
     # note that unlike everything else, target and source here are
     # array implementations, not arrays
     target_iter = target.create_iter(shape)
@@ -78,15 +85,26 @@
     dtype = target.dtype
     shapelen = len(shape)
     while not target_iter.done():
-        setslice_driver.jit_merge_point(shapelen=shapelen, dtype=dtype,
-                                        target=target, source=source,
-                                        target_iter=target_iter,
-                                        source_iter=source_iter)
+        setslice_driver1.jit_merge_point(shapelen=shapelen, dtype=dtype)
         target_iter.setitem(source_iter.getitem().convert_to(dtype))
         target_iter.next()
         source_iter.next()
     return target
 
+def setslice_build_and_convert(space, shape, target, source):
+    # note that unlike everything else, target and source here are
+    # array implementations, not arrays
+    target_iter = target.create_iter(shape)
+    source_iter = source.create_iter(shape)
+    dtype = target.dtype
+    shapelen = len(shape)
+    while not target_iter.done():
+        setslice_driver2.jit_merge_point(shapelen=shapelen, dtype=dtype)
+        target_iter.setitem(dtype.build_and_convert(space, source_iter.getitem()))
+        target_iter.next()
+        source_iter.next()
+    return target
+
 reduce_driver = jit.JitDriver(name='numpy_reduce',
                               greens = ['shapelen', 'func', 'done_func',
                                         'calc_dtype', 'identity'],
@@ -358,17 +376,27 @@
         ri.next()
     return res
 
-flatiter_setitem_driver = jit.JitDriver(name = 'numpy_flatiter_setitem',
+flatiter_setitem_driver1 = jit.JitDriver(name = 'numpy_flatiter_setitem1',
                                         greens = ['dtype'],
                                         reds = 'auto')
 
-def flatiter_setitem(arr, val, start, step, length):
+flatiter_setitem_driver2 = jit.JitDriver(name = 'numpy_flatiter_setitem2',
+                                        greens = ['dtype'],
+                                        reds = 'auto')
+
+def flatiter_setitem(space, arr, val, start, step, length):
+    dtype = arr.get_dtype()
+    if dtype.is_str_or_unicode():
+        return flatiter_setitem_build_and_convert(space, arr, val, start, step, length)
+    return flatiter_setitem_to(space, arr, val, start, step, length)
+
+def flatiter_setitem_to(space, arr, val, start, step, length):
     dtype = arr.get_dtype()
     arr_iter = arr.create_iter()
     val_iter = val.create_iter()
     arr_iter.next_skip_x(start)
     while length > 0:
-        flatiter_setitem_driver.jit_merge_point(dtype=dtype)
+        flatiter_setitem_driver1.jit_merge_point(dtype=dtype)
         arr_iter.setitem(val_iter.getitem().convert_to(dtype))
         # need to repeat i_nput values until all assignments are done
         arr_iter.next_skip_x(step)
@@ -377,6 +405,21 @@
         # WTF numpy?
         val_iter.reset()
 
+def flatiter_setitem_build_and_convert(space, arr, val, start, step, length):
+    dtype = arr.get_dtype()
+    arr_iter = arr.create_iter()
+    val_iter = val.create_iter()
+    arr_iter.next_skip_x(start)
+    while length > 0:
+        flatiter_setitem_driver2.jit_merge_point(dtype=dtype)
+        arr_iter.setitem(dtype.build_and_convert(space, val_iter.getitem()))
+        # need to repeat i_nput values until all assignments are done
+        arr_iter.next_skip_x(step)
+        length -= 1
+        val_iter.next()
+        # WTF numpy?
+        val_iter.reset()
+
 fromstring_driver = jit.JitDriver(name = 'numpy_fromstring',
                                   greens = ['itemsize', 'dtype'],
                                   reds = 'auto')
@@ -461,18 +504,6 @@
                           val_arr.descr_getitem(space, w_idx))
         iter.next()
 
-copy_from_to_driver = jit.JitDriver(greens = ['dtype'],
-                                    reds = 'auto')
-
-def copy_from_to(from_, to, dtype):
-    from_iter = from_.create_iter()
-    to_iter = to.create_iter()
-    while not from_iter.done():
-        copy_from_to_driver.jit_merge_point(dtype=dtype)
-        to_iter.setitem(from_iter.getitem().convert_to(dtype))
-        to_iter.next()
-        from_iter.next()
-
 byteswap_driver = jit.JitDriver(greens = ['dtype'],
                                     reds = 'auto')
 
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -1480,6 +1480,32 @@
         a = (a + a)[::2]
         b = concatenate((a[:3], a[-3:]))
         assert (b == [2, 6, 10, 2, 6, 10]).all()
+        a = concatenate((array([1]), array(['abc'])))
+        assert str(a.dtype) == '|S3'
+        a = concatenate((array([]), array(['abc'])))
+        assert a[0] == 'abc'
+        a = concatenate((['abcdef'], ['abc']))
+        assert a[0] == 'abcdef'
+        assert str(a.dtype) == '|S6'
+    
+    def test_record_concatenate(self):
+        # only an exact match can succeed
+        from numpypy import zeros, concatenate
+        a = concatenate((zeros((2,),dtype=[('x', int), ('y', float)]),
+                         zeros((2,),dtype=[('x', int), ('y', float)])))
+        assert a.shape == (4,)
+        exc = raises(TypeError, concatenate, 
+                            (zeros((2,), dtype=[('x', int), ('y', float)]),
+                            (zeros((2,), dtype=[('x', float), ('y', float)]))))
+        assert str(exc.value).startswith('record type mismatch')
+        exc = raises(TypeError, concatenate, ([1], zeros((2,),
+                                            dtype=[('x', int), ('y', float)])))
+        assert str(exc.value).startswith('invalid type promotion')
+        exc = raises(TypeError, concatenate, (['abc'], zeros((2,),
+                                            dtype=[('x', int), ('y', float)])))
+        assert str(exc.value).startswith('invalid type promotion')
+
+
 
     def test_std(self):
         from numpypy import array
@@ -1650,6 +1676,12 @@
 
         a = array('x').astype('S3').dtype
         assert a.itemsize == 3
+        # scalar vs. array
+        try:
+            a = array([1, 2, 3.14156]).astype('S3').dtype
+            assert a.itemsize == 3
+        except NotImplementedError:
+            skip('astype("S3") not implemented for numeric arrays')
 
     def test_base(self):
         from numpypy import array
@@ -1955,7 +1987,7 @@
         assert (a.transpose() == b).all()
 
     def test_flatiter(self):
-        from numpypy import array, flatiter, arange
+        from numpypy import array, flatiter, arange, zeros
         a = array([[10, 30], [40, 60]])
         f_iter = a.flat
         assert f_iter.next() == 10
@@ -1971,6 +2003,9 @@
         a = arange(10).reshape(5, 2)
         raises(IndexError, 'a.flat[(1, 2)]')
         assert a.flat.base is a
+        m = zeros((2,2), dtype='S3')
+        m.flat[1] = 1
+        assert m[0,1] == '1'
 
     def test_flatiter_array_conv(self):
         from numpypy import array, dot
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -1634,6 +1634,7 @@
     def get_size(self):
         return self.size
 
+
 class StringType(BaseType, BaseStringType):
     T = lltype.Char
 
@@ -1641,7 +1642,7 @@
     def coerce(self, space, dtype, w_item):
         from pypy.module.micronumpy.interp_dtype import new_string_dtype
         arg = space.str_w(space.str(w_item))
-        arr = interp_boxes.VoidBoxStorage(len(arg), new_string_dtype(space, len(arg)))
+        arr = VoidBoxStorage(len(arg), new_string_dtype(space, len(arg)))
         for i in range(len(arg)):
             arr.storage[i] = arg[i]
         return interp_boxes.W_StringBox(arr,  0, arr.dtype)
@@ -1682,6 +1683,20 @@
     def to_builtin_type(self, space, box):
         return space.wrap(self.to_str(box))
 
+    def build_and_convert(self, space, mydtype, box):
+        if box.get_dtype(space).is_str_or_unicode():
+            arg = box.get_dtype(space).itemtype.to_str(box)
+        else:
+            w_arg = box.descr_str(space)
+            arg = space.str_w(space.str(w_arg))
+        arr = VoidBoxStorage(self.size, mydtype)
+        i = 0
+        for i in range(min(len(arg), self.size)):
+            arr.storage[i] = arg[i]
+        for j in range(i + 1, self.size):
+            arr.storage[j] = '\x00'
+        return interp_boxes.W_StringBox(arr,  0, arr.dtype)
+        
 class VoidType(BaseType, BaseStringType):
     T = lltype.Char
 


More information about the pypy-commit mailing list