[pypy-commit] pypy zarch-simd-support: more vector ops, add, subtract, logic or, and, xor, float mul, float div
plan_rich
pypy.commits at gmail.com
Wed Sep 14 07:01:07 EDT 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: zarch-simd-support
Changeset: r87106:155d0af20ad7
Date: 2016-09-14 12:54 +0200
http://bitbucket.org/pypy/pypy/changeset/155d0af20ad7/
Log: more vector ops, add, subtract, logic or, and, xor, float mul, float
div
diff --git a/rpython/jit/backend/zarch/instruction_builder.py b/rpython/jit/backend/zarch/instruction_builder.py
--- a/rpython/jit/backend/zarch/instruction_builder.py
+++ b/rpython/jit/backend/zarch/instruction_builder.py
@@ -488,7 +488,7 @@
def build_vrr_c(mnemonic, (opcode1,opcode2), argtypes='v,v,v,m,m'):
@builder.arguments(argtypes)
- def encode_vrr_c(self, v1, v2, v3, mask1, mask2):
+ def encode_vrr_c(self, v1, v2, v3, mask1=0, mask2=0):
self.writechar(opcode1)
rbx = (v1 >= 16) << 3
rbx |= (v2 >= 16) << 2
diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py
--- a/rpython/jit/backend/zarch/instructions.py
+++ b/rpython/jit/backend/zarch/instructions.py
@@ -299,9 +299,22 @@
'VST': ('vrx', ['\xE7','\x0E'], 'v,bid'),
+ # integral
+ # -> arith
+ 'VA': ('vrr_c', ['\xE7','\xF3'], 'v,v,v,m'),
+ 'VS': ('vrr_c', ['\xE7','\xF7'], 'v,v,v,m'),
+
+ # -> logic
+ 'VO': ('vrr_c', ['\xE7','\x6A'], 'v,v,v'),
+ 'VNO': ('vrr_c', ['\xE7','\x6B'], 'v,v,v'),
+ 'VN': ('vrr_c', ['\xE7','\x68'], 'v,v,v'),
+ 'VX': ('vrr_c', ['\xE7','\x6D'], 'v,v,v'),
+
# floating point
'VFA': ('vrr_c', ['\xE7','\xE3']),
'VFS': ('vrr_c', ['\xE7','\xE2']),
+ 'VFM': ('vrr_c', ['\xE7','\xE7']),
+ 'VFD': ('vrr_c', ['\xE7','\xE5']),
# '': ('', ['','']),
}
diff --git a/rpython/jit/backend/zarch/locations.py b/rpython/jit/backend/zarch/locations.py
--- a/rpython/jit/backend/zarch/locations.py
+++ b/rpython/jit/backend/zarch/locations.py
@@ -250,3 +250,20 @@
imm1 = imm(1)
imm0 = imm(0)
+MASK_VEC_BYTE = 0
+MASK_VEC_HWORD = 1
+MASK_VEC_WORD = 2
+MASK_VEC_DWORD = 3
+
+def itemsize_to_mask(v):
+ if v == 16:
+ return MASK_VEC_QWORD
+ elif v == 8:
+ return MASK_VEC_DWORD
+ elif v == 4:
+ return MASK_VEC_WORD
+ elif v == 2:
+ return MASK_VEC_HWORD
+ elif v == 1:
+ return MASK_VEC_BYTE
+ assert 0, "not supported itemsize to mask!"
diff --git a/rpython/jit/backend/zarch/vector_ext.py b/rpython/jit/backend/zarch/vector_ext.py
--- a/rpython/jit/backend/zarch/vector_ext.py
+++ b/rpython/jit/backend/zarch/vector_ext.py
@@ -107,29 +107,13 @@
def emit_vec_int_add(self, op, arglocs, regalloc):
resloc, loc0, loc1, size_loc = arglocs
size = size_loc.value
- if size == 1:
- self.mc.vaddubm(resloc.value, loc0.value, loc1.value)
- elif size == 2:
- self.mc.vadduhm(resloc.value, loc0.value, loc1.value)
- elif size == 4:
- self.mc.vadduwm(resloc.value, loc0.value, loc1.value)
- elif size == 8:
- self.mc.vaddudm(resloc.value, loc0.value, loc1.value)
+ mask = l.itemsize_to_mask(size_loc.value)
+ self.mc.VA(resloc, loc0, loc1, mask)
def emit_vec_int_sub(self, op, arglocs, regalloc):
resloc, loc0, loc1, size_loc = arglocs
- size = size_loc.value
- if size == 1:
- # TODO verify if unsigned subtract is the wanted feature
- self.mc.vsububm(resloc.value, loc0.value, loc1.value)
- elif size == 2:
- # TODO verify if unsigned subtract is the wanted feature
- self.mc.vsubuhm(resloc.value, loc0.value, loc1.value)
- elif size == 4:
- # TODO verify if unsigned subtract is the wanted feature
- self.mc.vsubuwm(resloc.value, loc0.value, loc1.value)
- elif size == 8:
- self.mc.vsubudm(resloc.value, loc0.value, loc1.value)
+ mask = l.itemsize_to_mask(size_loc.value)
+ self.mc.VS(resloc, loc0, loc1, mask)
def emit_vec_float_add(self, op, arglocs, regalloc):
resloc, loc0, loc1, itemsize_loc = arglocs
@@ -145,39 +129,40 @@
if itemsize == 8:
self.mc.VFS(resloc, loc0, loc1, 3, 0)
return
- not_implemented("vec_float_add of size %d" % itemsize)
+ not_implemented("vec_float_sub of size %d" % itemsize)
def emit_vec_float_mul(self, op, arglocs, regalloc):
resloc, loc0, loc1, itemsize_loc = arglocs
itemsize = itemsize_loc.value
- if itemsize == 4:
- self.mc.xvmulsp(resloc.value, loc0.value, loc1.value)
- elif itemsize == 8:
- self.mc.xvmuldp(resloc.value, loc0.value, loc1.value)
+ if itemsize == 8:
+ self.mc.VFM(resloc, loc0, loc1, 3, 0)
+ return
+ not_implemented("vec_float_mul of size %d" % itemsize)
def emit_vec_float_truediv(self, op, arglocs, regalloc):
resloc, loc0, loc1, itemsize_loc = arglocs
itemsize = itemsize_loc.value
- if itemsize == 4:
- self.mc.xvdivsp(resloc.value, loc0.value, loc1.value)
- elif itemsize == 8:
- self.mc.xvdivdp(resloc.value, loc0.value, loc1.value)
+ if itemsize == 8:
+ self.mc.VFD(resloc, loc0, loc1, 3, 0)
+ return
+ not_implemented("vec_float_truediv of size %d" % itemsize)
def emit_vec_int_and(self, op, arglocs, regalloc):
resloc, loc0, loc1, sizeloc = arglocs
- self.mc.vand(resloc.value, loc0.value, loc1.value)
+ self.mc.VN(resloc, loc0, loc1)
def emit_vec_int_or(self, op, arglocs, regalloc):
resloc, loc0, loc1, sizeloc = arglocs
- self.mc.vor(resloc.value, loc0.value, loc1.value)
+ self.mc.VO(resloc, loc0, loc1)
def emit_vec_int_xor(self, op, arglocs, regalloc):
resloc, loc0, loc1, sizeloc = arglocs
- self.mc.vxor(resloc.value, loc0.value, loc1.value)
+ self.mc.VX(resloc, loc0, loc1)
def emit_vec_int_signext(self, op, arglocs, regalloc):
resloc, loc0 = arglocs
- # TODO
+ # signext is only allowed if the data type sizes do not change.
+ # e.g. [byte,byte] = sign_ext([byte, byte]), a simple move is sufficient!
self.regalloc_mov(loc0, resloc)
def emit_vec_float_abs(self, op, arglocs, regalloc):
More information about the pypy-commit
mailing list