[pypy-commit] pypy vecopt: split box_(un)pack into float_(un)pack and int_(un)pack, adjusted some tests that unrolled to often (scheduler splits packed instructions if size is too big)

plan_rich noreply at buildbot.pypy.org
Fri May 15 18:40:22 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77339:2861d6325f12
Date: 2015-05-15 11:07 +0200
http://bitbucket.org/pypy/pypy/changeset/2861d6325f12/

Log:	split box_(un)pack into float_(un)pack and int_(un)pack, adjusted
	some tests that unrolled to often (scheduler splits packed
	instructions if size is too big)

diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -122,7 +122,7 @@
         return """
         a = |30|
         b = a + a
-        b -> 15
+        b -> 17
         """
     def test_float_add(self):
         result = self.run("float_add")
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2554,7 +2554,7 @@
         elif count == 2:
             self.mc.MOVDDUP(resloc, loc0)
 
-    def genop_vec_box_unpack(self, op, arglocs, resloc):
+    def genop_vec_float_unpack(self, op, arglocs, resloc):
         loc0, tmploc, indexloc, countloc = arglocs
         count = countloc.value
         index = indexloc.value
@@ -2566,10 +2566,11 @@
             tmploc = self._shuffle_by_index(loc0, tmploc, item_type, size, index, count)
             self.mc.MOVD32_rx(resloc.value, tmploc.value)
         elif size == 8:
-            if index == 0:
-                self.mc.UNPCKLPD(resloc, loc0)
-            else:
-                self.mc.UNPCKHPD(resloc, loc0)
+            pass
+            #if index == 1:
+            #    self.mc.SHUFPD_xxi(resloc, loc0, 0|(1<<2))
+            #else:
+            #    self.mc.UNPCKHPD(resloc, loc0)
 
     def _shuffle_by_index(self, src_loc, tmp_loc, item_type, size, index, count):
         if index == 0 and count == 1:
@@ -2592,29 +2593,34 @@
             raise NotImplementedError("shuffle by index for non floats")
 
 
-    def genop_vec_box_pack(self, op, arglocs, resloc):
-        toloc, fromloc, tmploc = arglocs
+    def genop_vec_float_pack(self, op, arglocs, resloc):
+        resultloc, fromloc, tmploc = arglocs
         result = op.result
         indexarg = op.getarg(2)
+        countarg = op.getarg(2)
         assert isinstance(result, BoxVector)
         assert isinstance(indexarg, ConstInt)
+        assert isinstance(countarg, ConstInt)
         index = indexarg.value
+        count = countarg.value
         size = result.item_size
-        #py.test.set_trace()
         if size == 4:
-            select = (1 << 2) # move 0 -> 0, 1 -> 1 for toloc
-            # TODO
-            if index == 2:
-                select |= (1<<6) # move 0 -> 2, 1 -> 3 for fromloc
+            if count == 1:
+                raise NotImplementedError("pack: float single pack")
+            elif count == 2:
+                select = (1 << 2) # move 0 -> 0, 1 -> 1 for toloc
+                if index == 0:
+                    # move 0 -> 2, 1 -> 3 for fromloc
+                    self.mc.SHUFPS_xxi(resultloc.value, fromloc.value, select | (1 << 2))
+                elif index == 2:
+                    # move 0 -> 2, 1 -> 3 for fromloc
+                    self.mc.SHUFPS_xxi(resultloc.value, fromloc.value, select | (1 << 6))
+                else:
+                    raise NotImplementedError("pack: only index in {0,2} supported")
             else:
-                raise NotImplementedError("index is not equal to 2")
-
-            self.mc.SHUFPS_xxi(toloc.value, fromloc.value, select)
+                raise NotImplementedError("pack: count 3 for single float pack not supported")
         elif size == 8:
-            if indexloc.value == 0:
-                self.mc.UNPCKLPD(resloc, loc0)
-            else:
-                self.mc.UNPCKHPD(resloc, loc0)
+            raise NotImplementedError("pack: float double pack")
 
     def genop_vec_cast_float_to_singlefloat(self, op, arglocs, resloc):
         argloc, _ = arglocs
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1505,7 +1505,6 @@
 
     consider_vec_raw_store = consider_vec_setarrayitem_raw
 
-    
     def consider_vec_arith(self, op):
         count = op.getarg(2)
         assert isinstance(count, ConstInt)
@@ -1535,7 +1534,7 @@
     consider_vec_float_eq = consider_vec_logic
     del consider_vec_logic
 
-    def consider_vec_box_pack(self, op):
+    def consider_vec_float_pack(self, op):
         args = op.getarglist()
         loc1 = self.make_sure_var_in_reg(op.getarg(1), args)
         result =  self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
@@ -1544,7 +1543,7 @@
         self.xrm.possibly_free_var(tmpxvar)
         self.perform(op, [result, loc1, tmploc], result)
 
-    def consider_vec_box_unpack(self, op):
+    def consider_vec_float_unpack(self, op):
         count = op.getarg(2)
         index = op.getarg(1)
         assert isinstance(count, ConstInt)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -878,7 +878,7 @@
         vopt = self.schedule(loop,1)
         self.assert_equal(loop, self.parse_loop(vops))
 
-    @pytest.mark.parametrize('unroll', range(2,16,3))
+    @pytest.mark.parametrize('unroll', [1])
     def test_vectorize_index_variable_combination(self, unroll):
         ops = """
         [p0,i0]
@@ -940,7 +940,7 @@
         ops = """
         [p0,i0]
         guard_early_exit() [p0,i0]
-        i1 = getarrayitem_raw(p0, i0, descr=intarraydescr)
+        i1 = getarrayitem_raw(p0, i0, descr=chararraydescr)
         i2 = int_add(i0, 1)
         i3 = int_lt(i2, 102)
         guard_true(i3) [p0,i0]
@@ -957,7 +957,7 @@
         i2 = int_add(i0, 16)
         i3 = int_lt(i2, 102)
         guard_true(i3) [p0,i0]
-        i1 = vec_getarrayitem_raw(p0, i0, 16, descr=intarraydescr)
+        i1 = vec_getarrayitem_raw(p0, i0, 16, descr=chararraydescr)
         jump(p0,i2)
         """.format(dead_code=dead_code)
         vopt = self.vectorize(self.parse_loop(ops),15)
@@ -1071,8 +1071,8 @@
         v62 = vec_raw_load(i4, i41, 2, descr=floatarraydescr) 
         v63 = vec_float_add(v61, v62, 2) 
         vec_raw_store(i0, i37, v63, 2, descr=floatarraydescr) 
-        f100 = vec_box_unpack(v61, 1)
-        f101 = vec_box_unpack(v62, 1)
+        f100 = vec_float_unpack(v61, 1, 1)
+        f101 = vec_float_unpack(v62, 1, 1)
         jump(p36, i53, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18)
         """
         vopt = self.vectorize(self.parse_loop(ops))
@@ -1137,8 +1137,8 @@
         v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr)
         v19 = vec_cast_float_to_singlefloat(v17, 2)
         v20 = vec_cast_float_to_singlefloat(v18, 2)
-        vec_box_pack(v19, v20, 2)
-        vec_setarrayitem_raw(p1, i1, v19, 4, descr=singlefloatarraydescr)
+        v21 = vec_float_pack(v19, v20, 2, 2)
+        vec_setarrayitem_raw(p1, i1, v21, 4, descr=singlefloatarraydescr)
         jump(p0, p1, i7)
         """
         vopt = self.vectorize(self.parse_loop(ops))
@@ -1192,7 +1192,7 @@
         v224 = vec_float_add(v219, v222, 2)
         v225 = vec_cast_float_to_singlefloat(v223, 2)
         v226 = vec_cast_float_to_singlefloat(v224, 2)
-        v227 = vec_box_pack(v225, v226, 2, 2)
+        v227 = vec_float_pack(v225, v226, 2, 2)
         vec_raw_store(p2, i4, v227, 4, descr=singlefloatarraydescr)
         jump(p0, p1, p2, i210, i189)
         """
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -404,7 +404,10 @@
             arg_cloned = arg.clonebox()
             cj = ConstInt(j)
             ci = ConstInt(1)
-            unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, cj, ci], arg_cloned)
+            opnum = rop.VEC_FLOAT_UNPACK
+            if vbox.type == INT:
+                opnum = rop.VEC_INT_UNPACK
+            unpack_op = ResOperation(opnum, [vbox, cj, ci], arg_cloned)
             self.emit_operation(unpack_op)
             sched_data.rename_unpacked(arg, arg_cloned)
             args[i] = arg_cloned
@@ -741,6 +744,9 @@
           this function creates a box pack instruction to merge them to:
           v1/2 = [A,B,X,Y]
         """
+        opnum = rop.VEC_FLOAT_PACK
+        if tgt_box.type == INT:
+            opnum = rop.VEC_INT_PACK
         arg_count = len(args)
         i = index
         while i < arg_count and tgt_box.item_count < packable:
@@ -751,9 +757,8 @@
                 continue
             new_box = tgt_box.clonebox()
             new_box.item_count += src_box.item_count
-            op = ResOperation(rop.VEC_BOX_PACK,
-                              [tgt_box, src_box, ConstInt(i),
-                               ConstInt(src_box.item_count)], new_box)
+            op = ResOperation(opnum, [tgt_box, src_box, ConstInt(i),
+                                      ConstInt(src_box.item_count)], new_box)
             self.preamble_ops.append(op)
             self._check_vec_pack(op)
             i += src_box.item_count
@@ -803,9 +808,12 @@
         else:
             resop = ResOperation(rop.VEC_BOX, [ConstInt(self.pack_ops)], vbox)
             self.preamble_ops.append(resop)
+            opnum = rop.VEC_FLOAT_PACK
+            if arg.type == INT:
+                opnum = rop.VEC_INT_PACK
             for i,op in enumerate(ops):
                 arg = op.getoperation().getarg(argidx)
-                resop = ResOperation(rop.VEC_BOX_PACK,
+                resop = ResOperation(opnum,
                                      [vbox,ConstInt(i),arg], None)
                 self.preamble_ops.append(resop)
         return vbox
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -471,8 +471,10 @@
     'VEC_CAST_FLOAT_TO_INT/2',
     'VEC_CAST_INT_TO_FLOAT/2',
 
-    'VEC_BOX_UNPACK/3',          # iX|fX = VEC_BOX_UNPACK(vX, index, item_count)
-    'VEC_BOX_PACK/4',            # VEC_BOX_PACK(vX, var/const, index, item_count)
+    'VEC_FLOAT_UNPACK/3',        # iX|fX = VEC_FLOAT_UNPACK(vX, index, item_count)
+    'VEC_FLOAT_PACK/4',          # VEC_FLOAT_PACK(vX, var/const, index, item_count)
+    'VEC_INT_UNPACK/3',          # iX|fX = VEC_INT_UNPACK(vX, index, item_count)
+    'VEC_INT_PACK/4',            # VEC_INT_PACK(vX, var/const, index, item_count)
     'VEC_EXPAND/2',              # vX = VEC_EXPAND(var/const, item_count)
     'VEC_BOX/1',
     '_VEC_PURE_LAST',


More information about the pypy-commit mailing list