[pypy-commit] pypy vecopt: distinct between input/output argument in vector type conversion

plan_rich noreply at buildbot.pypy.org
Mon Jun 1 09:14:31 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77736:cb7dddccc7f0
Date: 2015-06-01 09:14 +0200
http://bitbucket.org/pypy/pypy/changeset/cb7dddccc7f0/

Log:	distinct between input/output argument in vector type conversion
	call2 uses a list to track iterator and their states

diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -15,7 +15,7 @@
 
 call2_driver = jit.JitDriver(
     name='numpy_call2',
-    greens=['shapelen', 'func', 'left_advance', 'right_advance', 'calc_dtype', 'res_dtype' ],
+    greens=['shapelen', 'func', 'left_iter_index', 'right_iter_index', 'calc_dtype', 'res_dtype' ],
     reds='auto', vectorize=True)
 
 def call2(space, shape, func, calc_dtype, res_dtype, w_lhs, w_rhs, out):
@@ -43,9 +43,12 @@
 
     # TODO handle __array_priorities__ and maybe flip the order
 
+    left_iter_index = 1
+    right_iter_index = 2
     if w_lhs.get_size() == 1:
         w_left = w_lhs.get_scalar_value().convert_to(space, calc_dtype)
         left_iter = left_state = None
+        left_iter_index = -1
     else:
         w_left = None
         left_iter, left_state = w_lhs.create_iter(shape)
@@ -54,6 +57,7 @@
     if w_rhs.get_size() == 1:
         w_right = w_rhs.get_scalar_value().convert_to(space, calc_dtype)
         right_iter = right_state = None
+        right_iter_index = -1
     else:
         w_right = None
         right_iter, right_state = w_rhs.create_iter(shape)
@@ -63,34 +67,34 @@
                                      w_instance=lhs_for_subtype)
     out_iter, out_state = out.create_iter(shape)
 
-    left_advance = True
-    right_advance = True
-    if left_iter and left_iter.matches_range(out_iter):
-        left_advance = False
-        left_state = out_state
-    if right_iter and right_iter.matches_range(out_iter):
-        right_advance = False
-        right_state = out_state
+    iter_list = [out_iter, left_iter, right_iter]
+    state_list = [out_state, left_state, right_state]
+
+    if left_iter_index > 0 and left_iter.matches_range(out_iter):
+        left_iter_index = 0
+    if right_iter_index > 0 and right_iter.matches_range(out_iter):
+        right_iter_index = 0
 
     shapelen = len(shape)
     while not out_iter.done(out_state):
-        call2_driver.jit_merge_point(shapelen=shapelen, left_advance=left_advance, right_advance=right_advance,
+        call2_driver.jit_merge_point(shapelen=shapelen, left_iter_index=left_iter_index,
+                                     right_iter_index=right_iter_index,
                                      func=func, calc_dtype=calc_dtype, res_dtype=res_dtype)
-        if left_iter:
-            w_left = left_iter.getitem(left_state).convert_to(space, calc_dtype)
-            if left_advance:
-                left_state = left_iter.next(left_state)
-        if right_iter:
-            w_right = right_iter.getitem(right_state).convert_to(space, calc_dtype)
-            if right_advance:
-                right_state = right_iter.next(right_state)
+        if left_iter_index > 0:
+            iter = iter_list[left_iter_index]
+            state = state_list[left_iter_index]
+            w_left = iter.getitem(state).convert_to(space, calc_dtype)
+            if left_iter_index == 1:
+                state_list[left_iter_index] = iter.next(state)
+        if right_iter_index > 0:
+            iter = iter_list[right_iter_index]
+            state = state_list[right_iter_index]
+            w_right = iter.getitem(state).convert_to(space, calc_dtype)
+            if right_iter_index == 2:
+                state_list[right_iter_index] = iter.next(state)
         out_iter.setitem(out_state, func(calc_dtype, w_left, w_right).convert_to(
             space, res_dtype))
-        out_state = out_iter.next(out_state)
-        if not left_advance:
-            left_state = out_state
-        if not right_advance:
-            right_state = out_state
+        state_list[0] = out_state = out_iter.next(out_state)
 
     return out
 
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -15,6 +15,7 @@
         ns = {
             'double': self.floatarraydescr,
             'float': self.singlefloatarraydescr,
+            'long': self.intarraydescr,
         }
         loop = opparse("        [p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,f0,f1,f2,f3,f4,f5]\n" + source + \
                        "\n        jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,f0,f1,f2,f3,f4,f5)",
@@ -24,7 +25,7 @@
         return loop
 
     def pack(self, loop, l, r):
-        return [Node(op,i) for i,op in enumerate(loop.operations[l:r])]
+        return [Node(op,l+i) for i,op in enumerate(loop.operations[l:r])]
 
     def schedule(self, loop_orig, packs, vec_reg_size=16):
         loop = get_model(False).ExtendedTreeLoop("loop")
@@ -35,7 +36,7 @@
         vsd = VecScheduleData(vec_reg_size)
         for pack in packs:
             if len(pack) == 1:
-                ops.append(pack[0])
+                ops.append(pack[0].getoperation())
             else:
                 for op in vsd.as_vector_operation(Pack(pack)):
                     ops.append(op)
@@ -58,22 +59,42 @@
         loop2 = self.schedule(loop1, [pack1])
         loop3 = self.parse("""
         v1[i32#4] = vec_raw_load(p0, i0, 4, descr=float)
-        i14 = vec_raw_load(p0, i4, descr=float)
-        i15 = vec_raw_load(p0, i5, descr=float)
+        i14 = raw_load(p0, i4, descr=float)
+        i15 = raw_load(p0, i5, descr=float)
+        """)
+        self.assert_equal(loop2, loop3)
+
+    def test_int_to_float(self):
+        loop1 = self.parse("""
+        i10 = raw_load(p0, i0, descr=long)
+        i11 = raw_load(p0, i1, descr=long)
+        f10 = cast_int_to_float(i10)
+        f11 = cast_int_to_float(i11)
+        """)
+        pack1 = self.pack(loop1, 0, 2)
+        pack2 = self.pack(loop1, 2, 4)
+        print pack1
+        print pack2
+        loop2 = self.schedule(loop1, [pack1, pack2])
+        loop3 = self.parse("""
+        v1[i64#2] = vec_raw_load(p0, i0, 2, descr=long)
+        v2[i32#2] = vec_int_signext(v1[i64#2], 4)
+        v3[f64#2] = vec_cast_int_to_float(v2[i32#2])
         """)
         self.assert_equal(loop2, loop3)
 
     def test_cost_model_reject_only_load_vectorizable(self):
         loop1 = self.parse("""
-        f10 = raw_load(p0, i0, descr=double)
-        f11 = raw_load(p0, i1, descr=double)
-        i1 = int_add(1,1)
-        guard_true(i1) [f10]
+        f10 = raw_load(p0, i0, descr=long)
+        f11 = raw_load(p0, i1, descr=long)
+        guard_true(i0) [f10]
         guard_true(i1) [f11]
         """)
         try:
-            pack1 = self.pack(loop1, 0, 6)
-            loop2 = self.schedule(loop1, [pack1])
+            pack1 = self.pack(loop1, 0, 2)
+            pack2 = self.pack(loop1, 2, 3)
+            pack3 = self.pack(loop1, 3, 4)
+            loop2 = self.schedule(loop1, [pack1, pack2, pack3])
             py.test.fail("this loops should have bailed out")
         except NotAProfitableLoop:
             pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -789,44 +789,63 @@
         self.preamble_ops = None
         self.sched_data = None
         self.pack = None
+        self.input_type = None
+        self.output_type = None
 
     def is_vector_arg(self, i):
         if i < 0 or i >= len(self.arg_ptypes):
             return False
         return self.arg_ptypes[i] is not None
 
-    def pack_ptype(self, op):
+    def getsplitsize(self):
+        return self.input_type.getsize()
+
+    def determine_input_type(self, op):
         _, vbox = self.sched_data.getvector_of_box(op.getarg(0))
         if vbox:
             return PackType.of(vbox)
         else:
             raise RuntimeError("fatal: box %s is not in a vector box" % (op.getarg(0),))
 
+    def determine_output_type(self, op):
+        return self.determine_input_type(op)
+
     def as_vector_operation(self, pack, sched_data, oplist):
         self.sched_data = sched_data
         self.preamble_ops = oplist
         op0 = pack.operations[0].getoperation()
-        self.ptype = self.pack_ptype(op0)
+        self.input_type = self.determine_input_type(op0)
+        self.output_type = self.determine_output_type(op0)
 
         off = 0
         stride = self.split_pack(pack)
+        left = len(pack.operations)
         assert stride > 0
         while off < len(pack.operations):
+            if left < stride:
+                self.preamble_ops.append(pack.operations[off].getoperation())
+                off += 1
+                continue
             ops = pack.operations[off:off+stride]
             self.pack = Pack(ops)
             self.transform_pack(ops, off, stride)
             off += stride
+            left -= stride
 
         self.pack = None
         self.preamble_ops = None
         self.sched_data = None
-        self.ptype = None
+        self.input_type = None
+        self.output_type = None
 
     def split_pack(self, pack):
         pack_count = len(pack.operations)
         vec_reg_size = self.sched_data.vec_reg_size
-        if pack_count * self.ptype.getsize() > vec_reg_size:
-            return vec_reg_size // self.ptype.getsize()
+        bytes = pack_count * self.getsplitsize()
+        if bytes > vec_reg_size:
+            return vec_reg_size // self.getsplitsize()
+        if bytes < vec_reg_size:
+            return 1
         return pack_count
 
     def before_argument_transform(self, args):
@@ -838,11 +857,11 @@
         #
         self.before_argument_transform(args)
         #
-        result = op.result
         for i,arg in enumerate(args):
             if self.is_vector_arg(i):
                 args[i] = self.transform_argument(args[i], i, off)
         #
+        result = op.result
         result = self.transform_result(result, off)
         #
         vop = ResOperation(op.vector, args, result, op.getdescr())
@@ -860,31 +879,23 @@
         return vbox
 
     def new_result_vector_box(self):
-        size = self.ptype.getsize()
-        count = min(self.ptype.getcount(), len(self.pack.operations))
-        return BoxVector(self.ptype.gettype(), count, size, self.ptype.signed)
+        type = self.output_type.gettype()
+        size = self.output_type.getsize()
+        count = min(self.output_type.getcount(), len(self.pack.operations))
+        signed = self.output_type.signed
+        return BoxVector(type, count, size, signed)
 
     def transform_argument(self, arg, argidx, off):
         ops = self.pack.operations
         box_pos, vbox = self.sched_data.getvector_of_box(arg)
         if not vbox:
             # constant/variable expand this box
-            vbox = self.ptype.new_vector_box(len(ops))
+            vbox = self.input_type.new_vector_box(len(ops))
             vbox = self.expand_box_to_vector_box(vbox, ops, arg, argidx)
             box_pos = 0
 
-        enforced_type = self.ptype
-        # convert type f -> i, i -> f
-        # if enforced_type.gettype() != vbox.gettype():
-        #     raise NotImplementedError("cannot yet convert between types")
-
-        # convert size i64 -> i32, i32 -> i64, ...
-        if enforced_type.getsize() != vbox.getsize():
-            vbox = self.extend(vbox, self.ptype)
-
         # use the input as an indicator for the pack type
-        arg_ptype = PackType.of(vbox)
-        packable = self.sched_data.vec_reg_size // arg_ptype.getsize()
+        packable = self.sched_data.vec_reg_size // self.input_type.getsize()
         packed = vbox.item_count
         assert packed >= 0
         assert packable >= 0
@@ -894,21 +905,24 @@
             vbox = self._pack(vbox, packed, args, packable)
         elif packed > packable:
             # the argument has more items than the operation is able to process!
-            vbox = self.unpack(vbox, off, packable, arg_ptype)
+            vbox = self.unpack(vbox, off, packable, self.input_type)
         #
         if off != 0 and box_pos != 0:
             # The original box is at a position != 0 but it
             # is required to be at position 0. Unpack it!
-            vbox = self.unpack(vbox, off, len(ops), arg_ptype)
+            vbox = self.unpack(vbox, off, len(ops), self.input_type)
+        # convert type f -> i, i -> f
+        if self.input_type.gettype() != vbox.gettype():
+            raise NotImplementedError("cannot yet convert between types")
+        # convert size i64 -> i32, i32 -> i64, ...
+        if self.input_type.getsize() > 0 and \
+           self.input_type.getsize() != vbox.getsize():
+            vbox = self.extend(vbox, self.input_type)
         #
         return vbox
 
     def extend(self, vbox, newtype):
-        if vbox.item_count * vbox.item_size == self.sched_data.vec_reg_size:
-            return vbox
         assert vbox.gettype() == newtype.gettype()
-        assert (vbox.item_count * newtype.getsize()) == \
-               self.sched_data.vec_reg_size
         if vbox.gettype() == INT:
             return self.extend_int(vbox, newtype)
         else:
@@ -1025,6 +1039,12 @@
         self.to_size = outtype.getsize()
         OpToVectorOp.__init__(self, (intype, ), outtype)
 
+    def determine_input_type(self, op):
+        return self.arg_ptypes[0]
+
+    def determine_output_type(self, op):
+        return self.result_ptype
+
     def split_pack(self, pack):
         if self.from_size > self.to_size:
             # cast down
@@ -1037,12 +1057,14 @@
         return len(pack.operations)
 
     def new_result_vector_box(self):
+        type = self.output_type.gettype()
         size = self.to_size
-        count = self.ptype.getcount()
+        count = self.output_type.getcount()
         vec_reg_size = self.sched_data.vec_reg_size
         if count * size > vec_reg_size:
             count = vec_reg_size // size
-        return BoxVector(self.result_ptype.gettype(), count, size, self.ptype.signed)
+        signed = self.output_type.signed
+        return BoxVector(type, count, size, signed)
 
 class SignExtToVectorOp(OpToVectorOp):
     def __init__(self, intype, outtype):
@@ -1054,7 +1076,7 @@
         sizearg = op0.getarg(1)
         assert isinstance(sizearg, ConstInt)
         self.size = sizearg.value
-        if self.ptype.getsize() > self.size:
+        if self.input_type.getsize() > self.size:
             # cast down
             return OpToVectorOp.split_pack(self, pack)
         _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
@@ -1064,11 +1086,11 @@
         return vbox.getcount()
 
     def new_result_vector_box(self):
-        count = self.ptype.getcount()
+        count = self.input_type.getcount()
         vec_reg_size = self.sched_data.vec_reg_size
         if count * self.size > vec_reg_size:
             count = vec_reg_size // self.size
-        return BoxVector(self.result_ptype.gettype(), count, self.size, self.ptype.signed)
+        return BoxVector(self.result_ptype.gettype(), count, self.size, self.input_type.signed)
 
 PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
 
@@ -1076,22 +1098,38 @@
     def __init__(self):
         OpToVectorOp.__init__(self, (), PT_GENERIC)
 
-    def pack_ptype(self, op):
+    def determine_input_type(self, op):
+        return None
+
+    def determine_output_type(self, op):
         return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
 
     def before_argument_transform(self, args):
         args.append(ConstInt(len(self.pack.operations)))
 
+    def getsplitsize(self):
+        return self.output_type.getsize()
+
+    def new_result_vector_box(self):
+        type = self.output_type.gettype()
+        size = self.output_type.getsize()
+        count = len(self.pack.operations)
+        signed = self.output_type.signed
+        return BoxVector(type, count, size, signed)
+
 class StoreToVectorStore(OpToVectorOp):
     def __init__(self):
         OpToVectorOp.__init__(self, (None, None, PT_GENERIC), None)
         self.has_descr = True
 
-    def pack_ptype(self, op):
+    def determine_input_type(self, op):
         return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
 
-PT_FLOAT = PackType(FLOAT, 4, False)
-PT_DOUBLE = PackType(FLOAT, 8, False)
+    def determine_output_type(self, op):
+        return None
+
+PT_FLOAT_2 = PackType(FLOAT, 4, False, 2)
+PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2)
 PT_FLOAT_GENERIC = PackType(INT, -1, True)
 PT_INT64 = PackType(INT, 8, True)
 PT_INT32 = PackType(INT, 4, True)
@@ -1107,6 +1145,8 @@
 LOAD_TRANS = LoadToVectorLoad()
 STORE_TRANS = StoreToVectorStore()
 
+# note that the following definition is x86 machine
+# specific.
 ROP_ARG_RES_VECTOR = {
     rop.VEC_INT_ADD:     INT_OP_TO_VOP,
     rop.VEC_INT_SUB:     INT_OP_TO_VOP,
@@ -1130,10 +1170,10 @@
     rop.VEC_RAW_STORE:        STORE_TRANS,
     rop.VEC_SETARRAYITEM_RAW: STORE_TRANS,
 
-    rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE, PT_FLOAT),
-    rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT, PT_DOUBLE),
-    rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE, PT_INT32),
-    rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32, PT_DOUBLE),
+    rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2, PT_FLOAT_2),
+    rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, PT_DOUBLE_2),
+    rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32),
+    rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32, PT_DOUBLE_2),
 }
 
 class VecScheduleData(SchedulerData):
@@ -1274,7 +1314,6 @@
     def __init__(self, ops):
         self.operations = ops
         self.savings = 0
-        self.ptype = None
         for node in self.operations:
             node.pack = self
 
@@ -1288,13 +1327,6 @@
         leftmost = other.operations[0]
         return rightmost == leftmost
 
-    def size_in_bytes(self):
-        return self.ptype.get_byte_size() * len(self.operations)
-
-    def is_overloaded(self, vec_reg_byte_size):
-        size = self.size_in_bytes()
-        return size > vec_reg_byte_size
-
     def __repr__(self):
         return "Pack(%r)" % self.operations
 
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -123,13 +123,13 @@
             box = ts.BoxRef()
             _box_counter_more_than(self.model, elem[1:])
         elif elem.startswith('v'):
-            pattern = re.compile('.*\[(-?)(i|f)(\d+)#(\d+)\]')
+            pattern = re.compile('.*\[(u?)(i|f)(\d+)#(\d+)\]')
             match = pattern.match(elem)
             if match:
                 item_type = match.group(2)[0]
                 item_size = int(match.group(3)) // 8
                 item_count = int(match.group(4))
-                item_signed = match.group(1) == 's'
+                item_signed = not (match.group(1) == 'u')
                 box = self.model.BoxVector(item_type, item_count, item_size, item_signed)
                 lbracket = elem.find('[')
                 number = elem[1:lbracket]


More information about the pypy-commit mailing list