[pypy-commit] pypy vecopt: reverted the 8 immediate suffix (solved differently for tests)
plan_rich
noreply at buildbot.pypy.org
Mon Jun 8 14:15:43 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77953:6e0e98c3d70a
Date: 2015-06-08 10:50 +0200
http://bitbucket.org/pypy/pypy/changeset/6e0e98c3d70a/
Log: reverted the 8 immediate suffix (solved differently for tests)
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -243,17 +243,18 @@
def define_int8_expand():
return """
- a = astype(|30|, int16)
- c = astype(|1|, int16)
+ a = astype(|30|, int8)
+ c = astype(|1|, int8)
c[0] = 8i
b = a + c
d = b -> 0:17
sum(d)
"""
def test_int8_expand(self):
+ py.test.skip("TODO implement assembler")
result = self.run("int8_expand")
- assert int(result) == 16*8 + sum(range(0,17))
- self.check_vectorized(3, 2)
+ assert int(result) == 8*8 + sum(range(0,17))
+ self.check_vectorized(3, 2) # TODO sum at the end
def define_int32_add_const():
return """
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2615,24 +2615,24 @@
return # already the right size
if size == 4 and tosize == 2:
scratch = X86_64_SCRATCH_REG
- self.mc.PSHUFLW_xxi8(resloc.value, srcloc.value, 0b11111000)
- self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 4)
- self.mc.PINSRW_xri8(resloc.value, scratch.value, 2)
- self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 6)
- self.mc.PINSRW_xri8(resloc.value, scratch.value, 3)
+ self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000)
+ self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4)
+ self.mc.PINSRW_xri(resloc.value, scratch.value, 2)
+ self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6)
+ self.mc.PINSRW_xri(resloc.value, scratch.value, 3)
elif size == 4 and tosize == 8:
scratch = X86_64_SCRATCH_REG.value
- self.mc.PEXTRD_rxi8(scratch, srcloc.value, 1)
- self.mc.PINSRQ_xri8(resloc.value, scratch, 1)
- self.mc.PEXTRD_rxi8(scratch, srcloc.value, 0)
- self.mc.PINSRQ_xri8(resloc.value, scratch, 0)
+ self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
+ self.mc.PINSRQ_xri(resloc.value, scratch, 1)
+ self.mc.PEXTRD_rxi(scratch, srcloc.value, 0)
+ self.mc.PINSRQ_xri(resloc.value, scratch, 0)
elif size == 8 and tosize == 4:
# is there a better sequence to move them?
scratch = X86_64_SCRATCH_REG.value
- self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 0)
- self.mc.PINSRD_xri8(resloc.value, scratch, 0)
- self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 1)
- self.mc.PINSRD_xri8(resloc.value, scratch, 1)
+ self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0)
+ self.mc.PINSRD_xri(resloc.value, scratch, 0)
+ self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
+ self.mc.PINSRD_xri(resloc.value, scratch, 1)
else:
raise NotImplementedError("sign ext missing: " + str(size) + " -> " + str(tosize))
@@ -2653,19 +2653,19 @@
assert not srcloc.is_xmm
size = sizeloc.value
if size == 1:
- self.mc.PINSRB_xri8(resloc.value, srcloc.value, 0)
+ self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
elif size == 2:
- self.mc.PINSRW_xri8(resloc.value, srcloc.value, 0)
- self.mc.PINSRW_xri8(resloc.value, srcloc.value, 4)
- self.mc.PSHUFLW_xxi8(resloc.value, resloc.value, 0)
- self.mc.PSHUFHW_xxi8(resloc.value, resloc.value, 0)
+ self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
+ self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
+ self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
+ self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
elif size == 4:
- self.mc.PINSRD_xri8(resloc.value, srcloc.value, 0)
- self.mc.PSHUFD_xxi8(resloc.value, resloc.value, 0)
+ self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
+ self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
elif size == 8:
- self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 0)
- self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 1)
+ self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
+ self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
else:
raise NotImplementedError("missing size %d for int expand" % (size,))
@@ -2684,28 +2684,28 @@
while k > 0:
if size == 8:
if resultloc.is_xmm:
- self.mc.PEXTRQ_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRQ_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PEXTRQ_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRQ_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRQ_rxi8(resultloc.value, sourceloc.value, si)
+ self.mc.PEXTRQ_rxi(resultloc.value, sourceloc.value, si)
elif size == 4:
if resultloc.is_xmm:
- self.mc.PEXTRD_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRD_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PEXTRD_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRD_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRD_rxi8(resultloc.value, sourceloc.value, si)
+ self.mc.PEXTRD_rxi(resultloc.value, sourceloc.value, si)
elif size == 2:
if resultloc.is_xmm:
- self.mc.PEXTRW_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRW_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PEXTRW_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRW_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRW_rxi8(resultloc.value, sourceloc.value, si)
+ self.mc.PEXTRW_rxi(resultloc.value, sourceloc.value, si)
elif size == 1:
if resultloc.is_xmm:
- self.mc.PEXTRB_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRB_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PEXTRB_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRB_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRB_rxi8(resultloc.value, sourceloc.value, si)
+ self.mc.PEXTRB_rxi(resultloc.value, sourceloc.value, si)
si += 1
ri += 1
k -= 1
@@ -2734,9 +2734,9 @@
self.mov(X86_64_XMM_SCRATCH_REG, srcloc)
src = X86_64_XMM_SCRATCH_REG.value
select = ((si & 0x3) << 6)|((ri & 0x3) << 4)
- self.mc.INSERTPS_xxi8(resloc.value, src, select)
+ self.mc.INSERTPS_xxi(resloc.value, src, select)
else:
- self.mc.PEXTRD_rxi8(resloc.value, srcloc.value, si)
+ self.mc.PEXTRD_rxi(resloc.value, srcloc.value, si)
si += 1
ri += 1
k -= 1
@@ -2757,12 +2757,12 @@
# r = (s[1], r[1])
if resloc != srcloc:
self.mc.UNPCKHPD(resloc, srcloc)
- self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1)
+ self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
else:
assert residx == 1
# r = (r[0], s[1])
if resloc != srcloc:
- self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1)
+ self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
self.mc.UNPCKHPD(resloc, srcloc)
# if they are equal nothing is to be done
diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -332,6 +332,7 @@
def should_skip_instruction_bit32(self, instrname, argmodes):
if self.WORD != 8:
+ # those are tested in the 64 bit test case
return (
# the test suite uses 64 bit registers instead of 32 bit...
(instrname == 'PEXTRQ') or
diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
@@ -26,6 +26,7 @@
# Not testing FSTP on 64-bit for now
(instrname == 'FSTP') or
# the test suite uses 64 bit registers instead of 32 bit...
+ # it is tested in the 32 bit test!
(instrname == 'PEXTRD') or
(instrname == 'PINSRD')
)
More information about the pypy-commit
mailing list