[pypy-commit] pypy vecopt: removed manual test since it is tested in auto test now
plan_rich
noreply at buildbot.pypy.org
Mon Jun 8 10:28:38 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77949:1fc0d9cd2612
Date: 2015-06-08 10:28 +0200
http://bitbucket.org/pypy/pypy/changeset/1fc0d9cd2612/
Log: removed manual test since it is tested in auto test now fixed some
other tests in the x86 backend
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -232,14 +232,14 @@
c = astype(|1|, int16)
c[0] = 16i
b = a + c
- d = b -> 7:9
+ d = b -> 7:15
sum(d)
"""
def test_int16_expand(self):
result = self.run("int16_expand")
- i = 2
+ i = 8
assert int(result) == i*16 + sum(range(7,7+i))
- self.check_vectorized(2, 2)
+ self.check_vectorized(3, 2) # TODO sum at the end
def define_int8_expand():
return """
@@ -253,7 +253,7 @@
def test_int8_expand(self):
result = self.run("int8_expand")
assert int(result) == 16*8 + sum(range(0,17))
- self.check_vectorized(2, 2)
+ self.check_vectorized(3, 2)
def define_int32_add_const():
return """
diff --git a/rpython/jit/backend/tool/viewcode.py b/rpython/jit/backend/tool/viewcode.py
--- a/rpython/jit/backend/tool/viewcode.py
+++ b/rpython/jit/backend/tool/viewcode.py
@@ -57,6 +57,7 @@
'x86_32': 'i386',
'x86_64': 'i386:x86-64',
'x86-64': 'i386:x86-64',
+ 'x86-64-sse4': 'i386:x86-64',
'i386': 'i386',
'arm': 'arm',
'arm_32': 'arm',
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2615,24 +2615,24 @@
return # already the right size
if size == 4 and tosize == 2:
scratch = X86_64_SCRATCH_REG
- self.mc.PSHUFLW_xxi(resloc.value, srcloc.value, 0b11111000)
- self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 4)
- self.mc.PINSRW_xri(resloc.value, scratch.value, 2)
- self.mc.PEXTRW_rxi(scratch.value, srcloc.value, 6)
- self.mc.PINSRW_xri(resloc.value, scratch.value, 3)
+ self.mc.PSHUFLW_xxi8(resloc.value, srcloc.value, 0b11111000)
+ self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 4)
+ self.mc.PINSRW_xri8(resloc.value, scratch.value, 2)
+ self.mc.PEXTRW_rxi8(scratch.value, srcloc.value, 6)
+ self.mc.PINSRW_xri8(resloc.value, scratch.value, 3)
elif size == 4 and tosize == 8:
scratch = X86_64_SCRATCH_REG.value
- self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
- self.mc.PINSRQ_xri(resloc.value, scratch, 1)
- self.mc.PEXTRD_rxi(scratch, srcloc.value, 0)
- self.mc.PINSRQ_xri(resloc.value, scratch, 0)
+ self.mc.PEXTRD_rxi8(scratch, srcloc.value, 1)
+ self.mc.PINSRQ_xri8(resloc.value, scratch, 1)
+ self.mc.PEXTRD_rxi8(scratch, srcloc.value, 0)
+ self.mc.PINSRQ_xri8(resloc.value, scratch, 0)
elif size == 8 and tosize == 4:
# is there a better sequence to move them?
scratch = X86_64_SCRATCH_REG.value
- self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0)
- self.mc.PINSRD_xri(resloc.value, scratch, 0)
- self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
- self.mc.PINSRD_xri(resloc.value, scratch, 1)
+ self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 0)
+ self.mc.PINSRD_xri8(resloc.value, scratch, 0)
+ self.mc.PEXTRQ_rxi8(scratch, srcloc.value, 1)
+ self.mc.PINSRD_xri8(resloc.value, scratch, 1)
else:
raise NotImplementedError("sign ext missing: " + str(size) + " -> " + str(tosize))
@@ -2653,19 +2653,19 @@
assert not srcloc.is_xmm
size = sizeloc.value
if size == 1:
- self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
+ self.mc.PINSRB_xri8(resloc.value, srcloc.value, 0)
self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
elif size == 2:
- self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
- self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
- self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
- self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
+ self.mc.PINSRW_xri8(resloc.value, srcloc.value, 0)
+ self.mc.PINSRW_xri8(resloc.value, srcloc.value, 4)
+ self.mc.PSHUFLW_xxi8(resloc.value, resloc.value, 0)
+ self.mc.PSHUFHW_xxi8(resloc.value, resloc.value, 0)
elif size == 4:
- self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
- self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
+ self.mc.PINSRD_xri8(resloc.value, srcloc.value, 0)
+ self.mc.PSHUFD_xxi8(resloc.value, resloc.value, 0)
elif size == 8:
- self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
- self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
+ self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 0)
+ self.mc.PINSRQ_xri8(resloc.value, srcloc.value, 1)
else:
raise NotImplementedError("missing size %d for int expand" % (size,))
@@ -2676,34 +2676,36 @@
srcidx = srcidxloc.value
residx = residxloc.value
count = countloc.value
+ # for small data type conversion this can be quite costy
+ # j = pack(i,4,4)
si = srcidx
ri = residx
k = count
while k > 0:
if size == 8:
if resultloc.is_xmm:
- self.mc.PEXTRQ_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRQ_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PEXTRQ_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRQ_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRQ_rxi(resultloc.value, sourceloc.value, si)
+ self.mc.PEXTRQ_rxi8(resultloc.value, sourceloc.value, si)
elif size == 4:
if resultloc.is_xmm:
- self.mc.PEXTRD_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRD_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PEXTRD_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRD_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRD_rxi(resultloc.value, sourceloc.value, si)
+ self.mc.PEXTRD_rxi8(resultloc.value, sourceloc.value, si)
elif size == 2:
if resultloc.is_xmm:
- self.mc.PEXTRW_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRW_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PEXTRW_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRW_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRW_rxi(resultloc.value, sourceloc.value, si)
+ self.mc.PEXTRW_rxi8(resultloc.value, sourceloc.value, si)
elif size == 1:
if resultloc.is_xmm:
- self.mc.PEXTRB_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRB_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PEXTRB_rxi8(X86_64_SCRATCH_REG.value, sourceloc.value, si)
+ self.mc.PINSRB_xri8(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRB_rxi(resultloc.value, sourceloc.value, si)
+ self.mc.PEXTRB_rxi8(resultloc.value, sourceloc.value, si)
si += 1
ri += 1
k -= 1
@@ -2732,9 +2734,9 @@
self.mov(X86_64_XMM_SCRATCH_REG, srcloc)
src = X86_64_XMM_SCRATCH_REG.value
select = ((si & 0x3) << 6)|((ri & 0x3) << 4)
- self.mc.INSERTPS_xxi(resloc.value, src, select)
+ self.mc.INSERTPS_xxi8(resloc.value, src, select)
else:
- self.mc.PEXTRD_rxi(resloc.value, srcloc.value, si)
+ self.mc.PEXTRD_rxi8(resloc.value, srcloc.value, si)
si += 1
ri += 1
k -= 1
@@ -2755,12 +2757,12 @@
# r = (s[1], r[1])
if resloc != srcloc:
self.mc.UNPCKHPD(resloc, srcloc)
- self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
+ self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1)
else:
assert residx == 1
# r = (r[0], s[1])
if resloc != srcloc:
- self.mc.SHUFPD_xxi(resloc.value, resloc.value, 1)
+ self.mc.SHUFPD_xxi8(resloc.value, resloc.value, 1)
self.mc.UNPCKHPD(resloc, srcloc)
# if they are equal nothing is to be done
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -728,10 +728,9 @@
MOVD32_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2))
MOVD32_xs = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_sp(2))
- PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
-
MOVSS_xx = xmminsn('\xF3', rex_nw, '\x0F\x10', register(1,8), register(2), '\xC0')
+ PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
PSRLDQ_xi = xmminsn('\x66', rex_nw, '\x0F\x73', register(1),
orbyte(0x3 << 3), '\xC0', immediate(2, 'b'))
UNPCKLPD_xx = xmminsn('\x66', rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0')
diff --git a/rpython/jit/backend/x86/test/test_rx86.py b/rpython/jit/backend/x86/test/test_rx86.py
--- a/rpython/jit/backend/x86/test/test_rx86.py
+++ b/rpython/jit/backend/x86/test/test_rx86.py
@@ -245,77 +245,3 @@
assert len(cls.MULTIBYTE_NOPs) == 16
for i in range(16):
assert len(cls.MULTIBYTE_NOPs[i]) == i
-
-def test_pextr():
- s = CodeBuilder64()
- s.PEXTRW_rxi(R.r11, R.xmm0,0)
- assert s.getvalue() == '\x66\x44\x0f\xc5\xd8\x00'
- s.clear()
- s.PEXTRW_rxi(R.edi, R.xmm15, 15)
- assert s.getvalue() == '\x66\x41\x0f\xc5\xff\x0f'
- s.clear()
- s.PEXTRD_rxi(R.eax, R.xmm11, 2)
- assert s.getvalue() == '\x66\x44\x0f\x3a\x16\xd8\x02'
- s.clear()
- s.PEXTRD_rxi(R.r11, R.xmm5, 2)
- assert s.getvalue() == '\x66\x41\x0f\x3a\x16\xeb\x02'
- s.clear()
- s.PEXTRQ_rxi(R.ebp, R.xmm0, 7)
- assert s.getvalue() == '\x66\x48\x0f\x3a\x16\xc5\x07'
- # BYTE
- s.clear()
- s.PEXTRB_rxi(R.eax, R.xmm13, 24)
- assert s.getvalue() == '\x66\x44\x0f\x3a\x14\xe8\x18'
- s.clear()
- s.PEXTRB_rxi(R.r15, R.xmm5, 33)
- assert s.getvalue() == '\x66\x41\x0f\x3a\x14\xef\x21'
- # EXTR SINGLE FLOAT
- s.clear()
- s.EXTRACTPS_rxi(R.eax, R.xmm15, 2)
- assert s.getvalue() == '\x66\x44\x0f\x3a\x17\xf8\x02'
- s.clear()
- s.EXTRACTPS_rxi(R.r11, R.xmm0, 1)
- assert s.getvalue() == '\x66\x41\x0f\x3a\x17\xc3\x01'
- s.clear()
- s.EXTRACTPS_rxi(R.eax, R.xmm0, 1)
- assert s.getvalue() == '\x66\x0f\x3a\x17\xc0\x01'
- s.clear()
- s.EXTRACTPS_rxi(R.r15, R.xmm15, 4)
- assert s.getvalue() == '\x66\x45\x0f\x3a\x17\xff\x04'
-
-def test_pinsr():
- s = CodeBuilder64()
- s.PINSRW_xri(R.xmm0, R.r11,0)
- assert s.getvalue() == '\x66\x41\x0f\xc4\xc3\x00'
- s.clear()
- s.PINSRW_xri(R.xmm15, R.edi, 15)
- assert s.getvalue() == '\x66\x44\x0f\xc4\xff\x0f'
- s.clear()
- s.PINSRD_xri(R.xmm11, R.eax, 2)
- assert s.getvalue() == '\x66\x44\x0f\x3a\x22\xd8\x02'
- s.clear()
- s.PINSRD_xri(R.xmm5, R.r11, 2)
- assert s.getvalue() == '\x66\x41\x0f\x3a\x22\xeb\x02'
- s.clear()
- s.PINSRQ_xri(R.xmm0, R.ebp, 7)
- assert s.getvalue() == '\x66\x48\x0f\x3a\x22\xc5\x07'
- # BYTE
- s.clear()
- s.PINSRB_xri(R.xmm13, R.eax, 24)
- assert s.getvalue() == '\x66\x44\x0f\x3a\x20\xe8\x18'
- s.clear()
- s.PINSRB_xri(R.xmm5, R.r15, 33)
- assert s.getvalue() == '\x66\x41\x0f\x3a\x20\xef\x21'
- # EXTR SINGLE FLOAT
- s.clear()
- s.INSERTPS_xxi(R.xmm15, R.xmm0, 2)
- assert s.getvalue() == '\x66\x44\x0f\x3a\x21\xf8\x02'
- s.clear()
- s.INSERTPS_xxi(R.xmm0, R.xmm11, 1)
- assert s.getvalue() == '\x66\x41\x0f\x3a\x21\xc3\x01'
- s.clear()
- s.INSERTPS_xxi(R.xmm0, R.xmm0, 1)
- assert s.getvalue() == '\x66\x0f\x3a\x21\xc0\x01'
- s.clear()
- s.INSERTPS_xxi(R.xmm15, R.xmm15, 4)
- assert s.getvalue() == '\x66\x45\x0f\x3a\x21\xff\x04'
diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -196,6 +196,8 @@
instrname = 'MOVD'
if argmodes == 'xb':
py.test.skip('"as" uses an undocumented alternate encoding??')
+ if argmodes == 'xx' and self.WORD != 8:
+ instrname = 'MOVQ'
#
for args in args_lists:
suffix = ""
@@ -328,6 +330,15 @@
(instrname == 'MULTIBYTE')
)
+ def should_skip_instruction_bit32(self, instrname, argmodes):
+ if self.WORD != 8:
+ return (
+ # the test suite uses 64 bit registers instead of 32 bit...
+ (instrname == 'PEXTRQ') or
+ (instrname == 'PINSRQ')
+ )
+
+ return False
def complete_test(self, methname):
@@ -336,7 +347,8 @@
else:
instrname, argmodes = methname, ''
- if self.should_skip_instruction(instrname, argmodes):
+ if self.should_skip_instruction(instrname, argmodes) or \
+ self.should_skip_instruction_bit32(instrname, argmodes):
print "Skipping %s" % methname
return
@@ -370,6 +382,19 @@
else:
instr_suffix = None
+ if instrname.find('EXTR') != -1 or \
+ instrname.find('INSR') != -1 or \
+ instrname.find('INSERT') != -1 or \
+ instrname.find('EXTRACT') != -1 or \
+ instrname.find('SRLDQ') != -1 or \
+ instrname.find('SHUF') != -1:
+ realargmodes = []
+ for mode in argmodes:
+ if mode == 'i':
+ mode = 'i8'
+ realargmodes.append(mode)
+ argmodes = realargmodes
+
print "Testing %s with argmodes=%r" % (instrname, argmodes)
self.methname = methname
self.is_xmm_insn = getattr(getattr(self.X86_CodeBuilder,
diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
@@ -24,7 +24,10 @@
return (
super(TestRx86_64, self).should_skip_instruction(instrname, argmodes) or
# Not testing FSTP on 64-bit for now
- (instrname == 'FSTP')
+ (instrname == 'FSTP') or
+ # the test suite uses 64 bit registers instead of 32 bit...
+ (instrname == 'PEXTRD') or
+ (instrname == 'PINSRD')
)
def array_tests(self):
More information about the pypy-commit
mailing list