[pypy-commit] pypy vecopt: reverted the 8 immediate suffix (solved differently for tests)

plan_rich noreply at buildbot.pypy.org
Mon Jun 8 14:15:43 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77953:6e0e98c3d70a
Date: 2015-06-08 10:50 +0200
http://bitbucket.org/pypy/pypy/changeset/6e0e98c3d70a/

Log:	reverted the 8 immediate suffix (solved differently for tests)

diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -243,17 +243,18 @@
 
     def define_int8_expand():
         return """
-        a = astype(|30|, int16)
-        c = astype(|1|, int16)
+        a = astype(|30|, int8)
+        c = astype(|1|, int8)
         c[0] = 8i
         b = a + c
         d = b -> 0:17
         sum(d)
         """
     def test_int8_expand(self):
+        py.test.skip("TODO implement assembler")
         result = self.run("int8_expand")
-        assert int(result) == 16*8 + sum(range(0,17))
-        self.check_vectorized(3, 2)
+        assert int(result) == 8*8 + sum(range(0,17))
+        self.check_vectorized(3, 2) # TODO sum at the end
 
     def define_int32_add_const():
         return """
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2615,24 +2615,24 @@
             return # already the right size
         if size == 4 and tosize == 2:
             scratch = X86_64_SCRATCH_REG
-            self.mc.PSHUFLW_xxi8(resloc.value, srcloc.value, 0b11111000)
-            self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 4)
-            self.mc.PINSRW_xri8(resloc.value, scratch.value, 2)
-            self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 6)
-            self.mc.PINSRW_xri8(resloc.value, scratch.value, 3)
+            self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000)
+            self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4)
+            self.mc.PINSRW_xri(resloc.value, scratch.value, 2)
+            self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6)
+            self.mc.PINSRW_xri(resloc.value, scratch.value, 3)
         elif size == 4 and tosize == 8:
             scratch = X86_64_SCRATCH_REG.value
-            self.mc.PEXTRD_rxi8(scratch, srcloc.value, 1)
-            self.mc.PINSRQ_xri8(resloc.value, scratch, 1)
-            self.mc.PEXTRD_rxi8(scratch, srcloc.value, 0)
-            self.mc.PINSRQ_xri8(resloc.value, scratch, 0)
+            self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
+            self.mc.PINSRQ_xri(resloc.value, scratch, 1)
+            self.mc.PEXTRD_rxi(scratch, srcloc.value, 0)
+            self.mc.PINSRQ_xri(resloc.value, scratch, 0)
         elif size == 8 and tosize == 4:
             # is there a better sequence to move them?
             scratch = X86_64_SCRATCH_REG.value
-            self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 0)
-            self.mc.PINSRD_xri8(resloc.value, scratch, 0)
-            self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 1)
-            self.mc.PINSRD_xri8(resloc.value, scratch, 1)
+            self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0)
+            self.mc.PINSRD_xri(resloc.value, scratch, 0)
+            self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
+            self.mc.PINSRD_xri(resloc.value, scratch, 1)
         else:
             raise NotImplementedError("sign ext missing: " + str(size) + " -> " + str(tosize))
 
@@ -2653,19 +2653,19 @@
         assert not srcloc.is_xmm
         size = sizeloc.value
         if size == 1:
-            self.mc.PINSRB_xri8(resloc.value, srcloc.value, 0)
+            self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
             self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
         elif size == 2:
-            self.mc.PINSRW_xri8(resloc.value, srcloc.value, 0)
-            self.mc.PINSRW_xri8(resloc.value, srcloc.value, 4)
-            self.mc.PSHUFLW_xxi8(resloc.value, resloc.value, 0)
-            self.mc.PSHUFHW_xxi8(resloc.value, resloc.value, 0)
+            self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
+            self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
+            self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
+            self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
         elif size == 4:
-            self.mc.PINSRD_xri8(resloc.value, srcloc.value, 0)
-            self.mc.PSHUFD_xxi8(resloc.value, resloc.value, 0)
+            self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
+            self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
         elif size == 8:
-            self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 0)
-            self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 1)
+            self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
+            self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
         else:
             raise NotImplementedError("missing size %d for int expand" % (size,))
 
@@ -2684,28 +2684,28 @@
         while k > 0:
             if size == 8:
                 if resultloc.is_xmm:
-                    self.mc.PEXTRQ_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
-                    self.mc.PINSRQ_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+                    self.mc.PEXTRQ_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+                    self.mc.PINSRQ_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
                 else:
-                    self.mc.PEXTRQ_rxi8(resultloc.value, sourceloc.value, si)
+                    self.mc.PEXTRQ_rxi(resultloc.value, sourceloc.value, si)
             elif size == 4:
                 if resultloc.is_xmm:
-                    self.mc.PEXTRD_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
-                    self.mc.PINSRD_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+                    self.mc.PEXTRD_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+                    self.mc.PINSRD_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
                 else:
-                    self.mc.PEXTRD_rxi8(resultloc.value, sourceloc.value, si)
+                    self.mc.PEXTRD_rxi(resultloc.value, sourceloc.value, si)
             elif size == 2:
                 if resultloc.is_xmm:
-                    self.mc.PEXTRW_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
-                    self.mc.PINSRW_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+                    self.mc.PEXTRW_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+                    self.mc.PINSRW_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
                 else:
-                    self.mc.PEXTRW_rxi8(resultloc.value, sourceloc.value, si)
+                    self.mc.PEXTRW_rxi(resultloc.value, sourceloc.value, si)
             elif size == 1:
                 if resultloc.is_xmm:
-                    self.mc.PEXTRB_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
-                    self.mc.PINSRB_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+                    self.mc.PEXTRB_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+                    self.mc.PINSRB_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
                 else:
-                    self.mc.PEXTRB_rxi8(resultloc.value, sourceloc.value, si)
+                    self.mc.PEXTRB_rxi(resultloc.value, sourceloc.value, si)
             si += 1
             ri += 1
             k -= 1
@@ -2734,9 +2734,9 @@
                         self.mov(X86_64_XMM_SCRATCH_REG, srcloc)
                         src = X86_64_XMM_SCRATCH_REG.value
                     select = ((si & 0x3) << 6)|((ri & 0x3) << 4)
-                    self.mc.INSERTPS_xxi8(resloc.value, src, select)
+                    self.mc.INSERTPS_xxi(resloc.value, src, select)
                 else:
-                    self.mc.PEXTRD_rxi8(resloc.value, srcloc.value, si)
+                    self.mc.PEXTRD_rxi(resloc.value, srcloc.value, si)
                 si += 1
                 ri += 1
                 k -= 1
@@ -2757,12 +2757,12 @@
                         # r = (s[1], r[1])
                         if resloc != srcloc:
                             self.mc.UNPCKHPD(resloc, srcloc)
-                        self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1)
+                        self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
                     else:
                         assert residx == 1
                         # r = (r[0], s[1])
                         if resloc != srcloc:
-                            self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1)
+                            self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
                             self.mc.UNPCKHPD(resloc, srcloc)
                         # if they are equal nothing is to be done
 
diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -332,6 +332,7 @@
 
     def should_skip_instruction_bit32(self, instrname, argmodes):
         if self.WORD != 8:
+            # those are tested in the 64 bit test case
             return (
                 # the test suite uses 64 bit registers instead of 32 bit...
                 (instrname == 'PEXTRQ') or
diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
@@ -26,6 +26,7 @@
                 # Not testing FSTP on 64-bit for now
                 (instrname == 'FSTP') or
                 # the test suite uses 64 bit registers instead of 32 bit...
+                # it is tested in the 32 bit test!
                 (instrname == 'PEXTRD') or
                 (instrname == 'PINSRD')
         )


More information about the pypy-commit mailing list