[pypy-commit] pypy vecopt: added missing result for pack instruction (expand box to vector)

plan_rich noreply at buildbot.pypy.org
Tue May 26 16:56:42 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77584:0aebdb7b396b
Date: 2015-05-26 16:56 +0200
http://bitbucket.org/pypy/pypy/changeset/0aebdb7b396b/

Log:	added missing result for pack instruction (expand box to vector)
	added some instructions for float/double correctly packing doubles
	now (was not correct) cumsum now works

diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -323,20 +323,9 @@
         """
 
     def test_cumsum(self):
-        py.test.skip()
         result = self.run("cumsum")
         assert result == 15
         self.check_trace_count(1)
-        self.check_simple_loop({
-            'float_add': 1,
-            'guard_false': 1,
-            'guard_not_invalidated': 1,
-            'int_add': 3,
-            'int_ge': 1,
-            'jump': 1,
-            'raw_load': 1,
-            'raw_store': 1,
-        })
 
     def define_axissum():
         return """
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2696,7 +2696,7 @@
                         # if source is a normal register (unpack)
                         assert count == 1
                         assert si == 0
-                        self.mc.MOVSD(X86_64_XMM_SCRATCH_REG, srcloc)
+                        self.mc.MOVAPS(X86_64_XMM_SCRATCH_REG, srcloc)
                         src = X86_64_XMM_SCRATCH_REG.value
                     select = ((si & 0x3) << 6)|((ri & 0x3) << 4)
                     self.mc.INSERTPS_xxi(resloc.value, src, select)
@@ -2719,15 +2719,17 @@
                 else:
                     assert srcidx == 1
                     if residx == 0:
-                        source = resloc.value
-                        if resloc.value != srcloc.value:
-                            self.mc.MOVUPD(resloc, srcloc)
-                        # r = (s[1], r[0])
-                        self.mc.SHUFPD_xxi(resloc.value, source, 1)
+                        # r = (s[1], r[1])
+                        if resloc != srcloc:
+                            self.mc.UNPCKHPD(resloc, srcloc)
+                        self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
                     else:
                         assert residx == 1
                         # r = (r[0], s[1])
-                        self.mc.SHUFPD_xxi(resloc.value, srcloc.value, 2)
+                        if resloc != srcloc:
+                            self.mc.SHUFPS_xxi(resloc.value, resloc.value, 1)
+                            self.mc.UNPCKHPD(resloc, srcloc)
+                        # if they are equal nothing is to be done
 
     genop_vec_float_unpack = genop_vec_float_pack
 
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1579,18 +1579,20 @@
     del consider_vec_logic
 
     def consider_vec_int_pack(self, op):
-        index = op.getarg(2)
-        count = op.getarg(3)
+        index = op.getarg(1)
+        arg = op.getarg(2)
         assert isinstance(index, ConstInt)
-        assert isinstance(count, ConstInt)
         args = op.getarglist()
-        srcloc = self.make_sure_var_in_reg(op.getarg(1), args)
+        srcloc = self.make_sure_var_in_reg(arg, args)
         resloc =  self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
         residx = 0
         assert isinstance(op.result, BoxVector)
         args = op.getarglist()
         size = op.result.getsize()
-        arglocs = [resloc, srcloc, imm(index.value), imm(0), imm(count.value), imm(size)]
+        count = 1
+        if isinstance(arg, BoxVector):
+            count = arg.getcount()
+        arglocs = [resloc, srcloc, imm(index.value), imm(0), imm(count), imm(size)]
         self.perform(op, arglocs, resloc)
 
     consider_vec_float_pack = consider_vec_int_pack
@@ -1643,7 +1645,7 @@
 
     def consider_vec_box(self, op):
         # pseudo instruction, needed to create a new variable
-        pass
+        self.xrm.force_allocate_reg(op.result)
 
     def consider_guard_early_exit(self, op):
         pass
diff --git a/rpython/jit/backend/x86/regloc.py b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -643,10 +643,11 @@
     MOVSD = _binaryop('MOVSD')
     MOVSS = _binaryop('MOVSS')
     MOVAPD = _binaryop('MOVAPD')
+    MOVAPS = _binaryop('MOVAPS')
     MOVDQA = _binaryop('MOVDQA')
     MOVDQU = _binaryop('MOVDQU')
+    MOVUPD = _binaryop('MOVUPD')
     MOVUPS = _binaryop('MOVUPS')
-    MOVUPD = _binaryop('MOVUPD')
     ADDSD = _binaryop('ADDSD')
     SUBSD = _binaryop('SUBSD')
     MULSD = _binaryop('MULSD')
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -918,10 +918,10 @@
 define_modrm_modes('MOVSD_*x', ['\xF2', rex_nw, '\x0F\x11', register(2,8)], regtype='XMM')
 define_modrm_modes('MOVSS_x*', ['\xF3', rex_nw, '\x0F\x10', register(1,8)], regtype='XMM')
 define_modrm_modes('MOVSS_*x', ['\xF3', rex_nw, '\x0F\x11', register(2,8)], regtype='XMM')
-define_modrm_modes('MOVAPD_x*', ['\x66', rex_nw, '\x0F\x28', register(1,8)],
-                   regtype='XMM')
-define_modrm_modes('MOVAPD_*x', ['\x66', rex_nw, '\x0F\x29', register(2,8)],
-                   regtype='XMM')
+define_modrm_modes('MOVAPD_x*', ['\x66', rex_nw, '\x0F\x28', register(1,8)], regtype='XMM')
+define_modrm_modes('MOVAPD_*x', ['\x66', rex_nw, '\x0F\x29', register(2,8)], regtype='XMM')
+define_modrm_modes('MOVAPS_x*', [        rex_nw, '\x0F\x28', register(1,8)], regtype='XMM')
+define_modrm_modes('MOVAPS_*x', [        rex_nw, '\x0F\x29', register(2,8)], regtype='XMM')
 
 define_modrm_modes('MOVDQA_x*', ['\x66', rex_nw, '\x0F\x6F', register(1, 8)], regtype='XMM')
 define_modrm_modes('MOVDQA_*x', ['\x66', rex_nw, '\x0F\x7F', register(2, 8)], regtype='XMM')
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -736,7 +736,7 @@
         assert isinstance(box, BoxVector)
         if count == -1:
             count = box.item_count
-        return PackType(box.item_type, box.item_size, box.signed, count)
+        return PackType(box.item_type, box.item_size, box.item_signed, count)
 
     def clone(self):
         return PackType(self.type, self.size, self.signed, self.count)
@@ -957,8 +957,10 @@
                 opnum = rop.VEC_INT_PACK
             for i,op in enumerate(ops):
                 arg = op.getoperation().getarg(argidx)
+                new_box = vbox.clonebox()
                 resop = ResOperation(opnum,
-                                     [vbox,ConstInt(i),arg], None)
+                                     [vbox,ConstInt(i),arg], new_box)
+                vbox = new_box
                 self.preamble_ops.append(resop)
         return vbox
 


More information about the pypy-commit mailing list