[pypy-commit] pypy zarch-simd-support: some more pack/unpack cases implemented
plan_rich
pypy.commits at gmail.com
Fri Sep 16 08:42:05 EDT 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: zarch-simd-support
Changeset: r87131:2cd9f79ff7de
Date: 2016-09-16 11:59 +0200
http://bitbucket.org/pypy/pypy/changeset/2cd9f79ff7de/
Log: some more pack/unpack cases implemented
diff --git a/rpython/jit/backend/zarch/instruction_builder.py b/rpython/jit/backend/zarch/instruction_builder.py
--- a/rpython/jit/backend/zarch/instruction_builder.py
+++ b/rpython/jit/backend/zarch/instruction_builder.py
@@ -546,6 +546,30 @@
self.writechar(opcode2)
return encode_vri_a
+def build_vrs_b(mnemonic, (opcode1,opcode2), argtypes='v,r,db,m'):
+ @builder.arguments(argtypes)
+ def encode_vrs_b(self, v1, r2, db3, m4):
+ self.writechar(opcode1)
+ rbx = (v1 >= 16) << 3
+ byte = (v1 & BIT_MASK_4) << 4 | (r2 & BIT_MASK_4)
+ self.writechar(chr(byte))
+ encode_base_displace(self, db3)
+ self.writechar(chr((m4 & BIT_MASK_4) << 4 | (rbx & BIT_MASK_4)))
+ self.writechar(opcode2)
+ return encode_vrs_b
+
+def build_vrs_c(mnemonic, (opcode1,opcode2), argtypes='r,v,db,m'):
+ @builder.arguments(argtypes)
+ def encode_vrs_c(self, r1, v2, db3, m4):
+ self.writechar(opcode1)
+ rbx = (v2 >= 16) << 2
+ byte = (r1 & BIT_MASK_4) << 4 | (v2 & BIT_MASK_4)
+ self.writechar(chr(byte))
+ encode_base_displace(self, db3)
+ self.writechar(chr((m4 & BIT_MASK_4) << 4 | (rbx & BIT_MASK_4)))
+ self.writechar(opcode2)
+ return encode_vrs_c
+
def build_unpack_func(mnemonic, func):
@always_inline
diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py
--- a/rpython/jit/backend/zarch/instructions.py
+++ b/rpython/jit/backend/zarch/instructions.py
@@ -334,6 +334,8 @@
'VMRL': ('vrr_c', ['\xE7','\x60'], 'v,v,v,m'),
'VMRH': ('vrr_c', ['\xE7','\x61'], 'v,v,v,m'),
'VPDI': ('vrr_c', ['\xE7','\x84'], 'v,v,v,m'),
+ 'VLVG': ('vrs_b', ['\xE7','\x22']),
+ 'VLGV': ('vrs_c', ['\xE7','\x21']),
# '': ('', ['','']),
}
diff --git a/rpython/jit/backend/zarch/vector_ext.py b/rpython/jit/backend/zarch/vector_ext.py
--- a/rpython/jit/backend/zarch/vector_ext.py
+++ b/rpython/jit/backend/zarch/vector_ext.py
@@ -263,24 +263,6 @@
# 4 => bit 1 from the MSB: XxC
self.mc.VCGD(resloc, loc0, 3, 4, mask.RND_TOZERO.value)
- def emit_vec_expand_f(self, op, arglocs, regalloc):
- assert isinstance(op, VectorOp)
- resloc, srcloc = arglocs
- size = op.bytesize
- res = resloc.value
- if isinstance(srcloc, l.ConstFloatLoc):
- # they are aligned!
- assert size == 8
- tloc = regalloc.rm.get_scratch_reg()
- self.mc.load_imm(tloc, srcloc.value)
- self.mc.lxvd2x(res, 0, tloc.value)
- elif size == 8:
- # splat the low of src to both slots in res
- src = srcloc.value
- self.mc.xxspltdl(res, src, src)
- else:
- not_implemented("vec expand in this combination not supported")
-
def emit_vec_expand_i(self, op, arglocs, regalloc):
assert isinstance(op, VectorOp)
resloc, loc0 = arglocs
@@ -292,13 +274,12 @@
def _accum_reduce(self, op, arg, accumloc, targetloc):
# Currently the accumulator can ONLY be 64 bit float/int
if arg.type == FLOAT:
- # r = (r[0]+r[1],r[0]+r[1])
- self.mc.VMRL(targetloc, accumloc, accumloc, l.MASK_VEC_DWORD)
+ self.mc.VPDI(targetloc, accumloc, accumloc, permi(1,0))
if op == '+':
- self.mc.VFA(targetloc, targetloc, accumloc, 3, 0, 0)
+ self.mc.VFA(targetloc, targetloc, accumloc, 3, 0b1000, 0)
return
elif op == '*':
- self.mc.VFM(targetloc, targetloc, accumloc, 3, 0, 0)
+ self.mc.VFM(targetloc, targetloc, accumloc, 3, 0b1000, 0)
return
else:
assert arg.type == INT
@@ -317,41 +298,14 @@
def emit_vec_pack_i(self, op, arglocs, regalloc):
assert isinstance(op, VectorOp)
- resultloc, vloc, sourceloc, residxloc, srcidxloc, countloc = arglocs
- srcidx = srcidxloc.value
+ resloc, vloc, sourceloc, residxloc, srcidxloc, countloc = arglocs
residx = residxloc.value
count = countloc.value
- res = resultloc.value
- vector = vloc.value
- src = sourceloc.value
size = op.bytesize
assert resultloc.is_vector_reg() # vector <- reg
- self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET)
- self.mc.stvx(vector, r.SCRATCH2.value, r.SP.value)
- idx = residx
- if size == 8:
- if not IS_BIG_ENDIAN:
- idx = (16 // size) - 1 - idx
- self.mc.store(src, r.SP.value, PARAM_SAVE_AREA_OFFSET+8*idx)
- elif size == 4:
- for j in range(count):
- idx = j + residx
- if not IS_BIG_ENDIAN:
- idx = (16 // size) - 1 - idx
- self.mc.stw(src, r.SP.value, PARAM_SAVE_AREA_OFFSET+4*idx)
- elif size == 2:
- for j in range(count):
- idx = j + residx
- if not IS_BIG_ENDIAN:
- idx = (16 // size) - 1 - idx
- self.mc.sth(src, r.SP.value, PARAM_SAVE_AREA_OFFSET+2*idx)
- elif size == 1:
- for j in range(count):
- idx = j + residx
- if not IS_BIG_ENDIAN:
- idx = (16 // size) - 1 - idx
- self.mc.stb(src, r.SP.value, PARAM_SAVE_AREA_OFFSET+idx)
- self.mc.lvx(res, r.SCRATCH2.value, r.SP.value)
+ for j in range(count):
+ index = l.addr(j + residx)
+ self.mc.VLVG(resloc, sourceloc, index, l.itemsize_to_mask(size))
def emit_vec_unpack_i(self, op, arglocs, regalloc):
assert isinstance(op, VectorOp)
@@ -364,44 +318,26 @@
if count == 1:
assert srcloc.is_vector_reg()
assert not resloc.is_vector_reg()
- off = PARAM_SAVE_AREA_OFFSET
- self.mc.load_imm(r.SCRATCH2, off)
- self.mc.stvx(src, r.SCRATCH2.value, r.SP.value)
- if not IS_BIG_ENDIAN:
- idx = (16 // size) - 1 - idx
- off += size * idx
- if size == 8:
- self.mc.load(res, r.SP.value, off)
- return
- elif size == 4:
- self.mc.lwa(res, r.SP.value, off)
- return
- elif size == 2:
- self.mc.lha(res, r.SP.value, off)
- return
- elif size == 1:
- self.mc.lbz(res, r.SP.value, off)
- self.mc.extsb(res, res)
- return
+ self.mc.VLGV(resloc, srcloc, index, l.itemsize_to_mask(size))
else:
# count is not 1, but only 2 is supported for i32
# 4 for i16 and 8 for i8.
src = srcloc.value
res = resloc.value
- self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET)
- self.mc.stvx(src, r.SCRATCH2.value, r.SP.value)
- self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET+16)
- self.mc.stvx(res, r.SCRATCH2.value, r.SP.value)
+ #self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET)
+ #self.mc.stvx(src, r.SCRATCH2.value, r.SP.value)
+ #self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET+16)
+ #self.mc.stvx(res, r.SCRATCH2.value, r.SP.value)
if count * size == 8:
if not IS_BIG_ENDIAN:
endian_off = 8
- off = PARAM_SAVE_AREA_OFFSET
- off = off + endian_off - (idx * size)
- assert idx * size + 8 <= 16
- self.mc.load(r.SCRATCH.value, r.SP.value, off)
- self.mc.store(r.SCRATCH.value, r.SP.value, PARAM_SAVE_AREA_OFFSET+16+endian_off)
- self.mc.lvx(res, r.SCRATCH2.value, r.SP.value)
+ #off = PARAM_SAVE_AREA_OFFSET
+ #off = off + endian_off - (idx * size)
+ #assert idx * size + 8 <= 16
+ #self.mc.load(r.SCRATCH.value, r.SP.value, off)
+ #self.mc.store(r.SCRATCH.value, r.SP.value, PARAM_SAVE_AREA_OFFSET+16+endian_off)
+ #self.mc.lvx(res, r.SCRATCH2.value, r.SP.value)
return
not_implemented("%d bit integer, count %d" % \
diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -402,10 +402,10 @@
bits = 64
la = data.draw(st.lists(strat, min_size=10, max_size=150))
- #la = [1.0] * 10
+ la = [1.0] * 10
l = len(la)
- accum = 0 #data.draw(strat)
+ accum = data.draw(strat)
rawstorage = RawStorage()
va = rawstorage.new(la, type)
res = self.meta_interp(f, [accum, l*size, va])
More information about the pypy-commit
mailing list