[pypy-commit] pypy ppc-vsx-support: vectorized reduction test now passes (ppc)
plan_rich
pypy.commits at gmail.com
Mon Jul 4 09:30:34 EDT 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: ppc-vsx-support
Changeset: r85536:1f00adc7b0fd
Date: 2016-07-04 15:29 +0200
http://bitbucket.org/pypy/pypy/changeset/1f00adc7b0fd/
Log: vectorized reduction test now passes (ppc)
diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -713,6 +713,8 @@
vperm = VA(4, XO10=43)
vsel = VA(4, XO10=42)
vspltisb = VXI(4, XO8=780)
+ vspltisw = VXI(4, XO8=844)
+ vspltisw = VXI(4, XO8=908)
VX_splat = Form("ivrT", "ivrB", "ivrA", "XO8")
vspltb = VX_splat(4, XO8=524)
diff --git a/rpython/jit/backend/ppc/condition.py b/rpython/jit/backend/ppc/condition.py
--- a/rpython/jit/backend/ppc/condition.py
+++ b/rpython/jit/backend/ppc/condition.py
@@ -6,6 +6,10 @@
GE = 5
SO = 6
NS = 7
+VEQ = 8
+VEQI = 9
+VNE = 10
+VNEI = 11
cond_none = -1 # invalid
def negate(cond):
@@ -19,6 +23,8 @@
assert negate(GE) == LT
assert negate(SO) == NS
assert negate(NS) == SO
+assert negate(VEQ) == VEQI
+assert negate(VNE) == VNEI
encoding = [
(2, 12), # EQ
@@ -29,4 +35,8 @@
(0, 4), # GE
(3, 12), # SO
(3, 4), # NS
+ (24, 12), # VEQ
+ (24, 4), # VEQI
+ (26, 12), # VNE
+ (26, 4), # VNEI
]
diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py
--- a/rpython/jit/backend/ppc/ppc_assembler.py
+++ b/rpython/jit/backend/ppc/ppc_assembler.py
@@ -1369,7 +1369,6 @@
allocation. This needs remapping which is done here for both normal registers
and accumulation registers.
"""
- import pdb; pdb.set_trace()
asminfo, bridge_faildescr, version, looptoken = target
assert isinstance(bridge_faildescr, ResumeGuardDescr)
assert isinstance(faildescr, ResumeGuardDescr)
@@ -1385,7 +1384,6 @@
# if accumulation is saved at the guard, we need to update it here!
guard_locs = self.rebuild_faillocs_from_descr(faildescr, version.inputargs)
bridge_locs = self.rebuild_faillocs_from_descr(bridge_faildescr, version.inputargs)
- #import pdb; pdb.set_trace()
guard_accum_info = faildescr.rd_vector_info
# O(n**2), but usually you only have at most 1 fail argument
while guard_accum_info:
diff --git a/rpython/jit/backend/ppc/vector_ext.py b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -34,7 +34,18 @@
# "propagate it between this operation and the next guard by keeping
# it in the cc". In the uncommon case, result_loc is another
# register, and we emit a load from the cc into this register.
+
+ # Possibly invert the bit in the CR
+ bit, invert = c.encoding[condition]
+ assert 24 <= bit <= 27
+ if invert == 12:
+ pass
+ elif invert == 4:
+ asm.mc.crnor(bit, bit, bit)
+ else:
+ assert 0
assert asm.guard_success_cc == c.cond_none
+ #
if result_loc is r.SPP:
asm.guard_success_cc = condition
else:
@@ -386,6 +397,7 @@
self.mc.vcmpgtuwx(resloc.value, argloc.value, tmp)
elif size == 8:
self.mc.vcmpgtudx(resloc.value, argloc.value, tmp)
+ flush_vec_cc(self, regalloc, c.VNEI, op.bytesize, resloc)
def emit_vec_float_eq(self, op, arglocs, regalloc):
resloc, loc1, loc2, sizeloc = arglocs
@@ -404,7 +416,7 @@
else:
notimplemented("[ppc/assembler] float == for size %d" % size)
self.mc.lvx(resloc.value, off, r.SP.value)
- flush_vec_cc(self, regalloc, c.EQ, op.bytesize, resloc)
+ flush_vec_cc(self, regalloc, c.VEQI, op.bytesize, resloc)
def emit_vec_float_xor(self, op, arglocs, regalloc):
resloc, l0, l1, sizeloc = arglocs
@@ -432,7 +444,7 @@
res = resloc.value
self.mc.lvx(res, off, r.SP.value)
self.mc.vnor(res, res, res) # complement
- flush_vec_cc(self, regalloc, c.NE, op.bytesize, resloc)
+ flush_vec_cc(self, regalloc, c.VNEI, op.bytesize, resloc)
def emit_vec_cast_int_to_float(self, op, arglocs, regalloc):
res, l0 = arglocs
@@ -455,7 +467,7 @@
self.mc.vcmpequwx(res.value, l0.value, l1.value)
elif size == 8:
self.mc.vcmpequdx(res.value, l0.value, l1.value)
- flush_vec_cc(self, regalloc, c.EQ, op.bytesize, res)
+ flush_vec_cc(self, regalloc, c.VEQI, op.bytesize, res)
def emit_vec_int_ne(self, op, arglocs, regalloc):
res, l0, l1, sizeloc = arglocs
@@ -471,7 +483,7 @@
elif size == 8:
self.mc.vcmpequdx(res.value, res.value, tmp)
self.mc.vnor(res.value, res.value, res.value)
- flush_vec_cc(self, regalloc, c.NE, op.bytesize, res)
+ flush_vec_cc(self, regalloc, c.VEQI, op.bytesize, res)
def emit_vec_expand_f(self, op, arglocs, regalloc):
resloc, srcloc = arglocs
@@ -549,7 +561,7 @@
if size == 8:
if srcloc.is_vector_reg(): # reg <- vector
assert not resloc.is_vector_reg()
- self.mc.load_imm(r.SCRATCH, PARAM_SAVE_AREA_OFFSET)
+ self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET)
self.mc.stvx(src, r.SCRATCH2.value, r.SP.value)
self.mc.load(res, r.SP.value, PARAM_SAVE_AREA_OFFSET+8*idx)
else:
@@ -621,6 +633,17 @@
else:
return self.ivrm.force_allocate_reg(op, forbidden_vars)
+ def force_allocate_vector_reg_or_cc(self, op):
+ assert op.type == INT
+ if self.next_op_can_accept_cc(self.operations, self.rm.position):
+ # hack: return the SPP location to mean "lives in CC". This
+ # SPP will not actually be used, and the location will be freed
+ # after the next op as usual.
+ self.rm.force_allocate_frame_reg(op)
+ return r.SPP
+ else:
+ return self.force_allocate_vector_reg(op)
+
def ensure_vector_reg(self, box):
if box.type == FLOAT:
return self.vrm.make_sure_var_in_reg(box,
@@ -691,14 +714,25 @@
prepare_vec_int_and = prepare_vec_arith
prepare_vec_int_or = prepare_vec_arith
prepare_vec_int_xor = prepare_vec_arith
-
- prepare_vec_float_eq = prepare_vec_arith
- prepare_vec_float_ne = prepare_vec_arith
- prepare_vec_int_eq = prepare_vec_arith
- prepare_vec_int_ne = prepare_vec_arith
prepare_vec_float_xor = prepare_vec_arith
del prepare_vec_arith
+ def prepare_vec_bool(self, op):
+ a0 = op.getarg(0)
+ a1 = op.getarg(1)
+ assert isinstance(op, VectorOp)
+ size = op.bytesize
+ args = op.getarglist()
+ loc0 = self.ensure_vector_reg(a0)
+ loc1 = self.ensure_vector_reg(a1)
+ resloc = self.force_allocate_vector_reg_or_cc(op)
+ return [resloc, loc0, loc1, imm(size)]
+
+ prepare_vec_float_eq = prepare_vec_bool
+ prepare_vec_float_ne = prepare_vec_bool
+ prepare_vec_int_eq = prepare_vec_bool
+ prepare_vec_int_ne = prepare_vec_bool
+ del prepare_vec_bool
def prepare_vec_store(self, op):
descr = op.getdescr()
@@ -826,7 +860,7 @@
arg = op.getarg(0)
assert isinstance(arg, VectorOp)
argloc = self.ensure_vector_reg(arg)
- resloc = self.force_allocate_vector_reg(op)
+ resloc = self.force_allocate_vector_reg_or_cc(op)
return [resloc, argloc, imm(arg.bytesize)]
def _prepare_vec(self, op):
@@ -843,19 +877,11 @@
prepare_vec_cast_int_to_float = prepare_vec_cast_float_to_int
- def load_vector_condition_into_cc(self, box):
- if self.assembler.guard_success_cc == c.cond_none:
- # compare happended before
- #loc = self.ensure_reg(box)
- #mc = self.assembler.mc
- #mc.cmp_op(0, loc.value, 0, imm=True)
- self.assembler.guard_success_cc = c.NE
-
def prepare_vec_guard_true(self, op):
- self.load_vector_condition_into_cc(op.getarg(0))
+ self.assembler.guard_success_cc = c.VEQ
return self._prepare_guard(op)
def prepare_vec_guard_false(self, op):
- self.load_vector_condition_into_cc(op.getarg(0))
+ self.assembler.guard_success_cc = c.VNE
return self._prepare_guard(op)
diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -332,29 +332,29 @@
@py.test.mark.parametrize('type,func,init,insert,at,count,breaks',
# all
- [#(rffi.DOUBLE, lambda x: not bool(x), 1.0, None, -1,32, False),
- #(rffi.DOUBLE, lambda x: x == 0.0, 1.0, None, -1,33, False),
- #(rffi.DOUBLE, lambda x: x == 0.0, 1.0, 0.0, 33,34, True),
- #(rffi.DOUBLE, lambda x: x == 0.0, 1.0, 0.1, 4,34, False),
- #(lltype.Signed, lambda x: not bool(x), 1, None, -1,32, False),
+ [(rffi.DOUBLE, lambda x: not bool(x), 1.0, None, -1,32, False),
+ (rffi.DOUBLE, lambda x: x == 0.0, 1.0, None, -1,33, False),
+ (rffi.DOUBLE, lambda x: x == 0.0, 1.0, 0.0, 33,34, True),
+ (rffi.DOUBLE, lambda x: x == 0.0, 1.0, 0.1, 4,34, False),
+ (lltype.Signed, lambda x: not bool(x), 1, None, -1,32, False),
(lltype.Signed, lambda x: not bool(x), 1, 0, 14,32, True),
- #(lltype.Signed, lambda x: not bool(x), 1, 0, 15,31, True),
- #(lltype.Signed, lambda x: not bool(x), 1, 0, 4,30, True),
- #(lltype.Signed, lambda x: x == 0, 1, None, -1,33, False),
- #(lltype.Signed, lambda x: x == 0, 1, 0, 33,34, True),
- ## any
- #(rffi.DOUBLE, lambda x: x != 0.0, 0.0, 1.0, 33,35, True),
- #(rffi.DOUBLE, lambda x: x != 0.0, 0.0, 1.0, -1,36, False),
- #(rffi.DOUBLE, lambda x: bool(x), 0.0, 1.0, 33,37, True),
- #(rffi.DOUBLE, lambda x: bool(x), 0.0, 1.0, -1,38, False),
- #(lltype.Signed, lambda x: x != 0, 0, 1, 33,35, True),
- #(lltype.Signed, lambda x: x != 0, 0, 1, -1,36, False),
- #(lltype.Signed, lambda x: bool(x), 0, 1, 33,37, True),
- #(lltype.Signed, lambda x: bool(x), 0, 1, -1,38, False),
- #(rffi.INT, lambda x: intmask(x) != 0, rffi.r_int(0), rffi.r_int(1), 33,35, True),
- #(rffi.INT, lambda x: intmask(x) != 0, rffi.r_int(0), rffi.r_int(1), -1,36, False),
- #(rffi.INT, lambda x: bool(intmask(x)), rffi.r_int(0), rffi.r_int(1), 33,37, True),
- #(rffi.INT, lambda x: bool(intmask(x)), rffi.r_int(0), rffi.r_int(1), -1,38, False),
+ (lltype.Signed, lambda x: not bool(x), 1, 0, 15,31, True),
+ (lltype.Signed, lambda x: not bool(x), 1, 0, 4,30, True),
+ (lltype.Signed, lambda x: x == 0, 1, None, -1,33, False),
+ (lltype.Signed, lambda x: x == 0, 1, 0, 33,34, True),
+ # any
+ (rffi.DOUBLE, lambda x: x != 0.0, 0.0, 1.0, 33,35, True),
+ (rffi.DOUBLE, lambda x: x != 0.0, 0.0, 1.0, -1,36, False),
+ (rffi.DOUBLE, lambda x: bool(x), 0.0, 1.0, 33,37, True),
+ (rffi.DOUBLE, lambda x: bool(x), 0.0, 1.0, -1,38, False),
+ (lltype.Signed, lambda x: x != 0, 0, 1, 33,35, True),
+ (lltype.Signed, lambda x: x != 0, 0, 1, -1,36, False),
+ (lltype.Signed, lambda x: bool(x), 0, 1, 33,37, True),
+ (lltype.Signed, lambda x: bool(x), 0, 1, -1,38, False),
+ (rffi.INT, lambda x: intmask(x) != 0, rffi.r_int(0), rffi.r_int(1), 33,35, True),
+ (rffi.INT, lambda x: intmask(x) != 0, rffi.r_int(0), rffi.r_int(1), -1,36, False),
+ (rffi.INT, lambda x: bool(intmask(x)), rffi.r_int(0), rffi.r_int(1), 33,37, True),
+ (rffi.INT, lambda x: bool(intmask(x)), rffi.r_int(0), rffi.r_int(1), -1,38, False),
])
def test_bool_reduction(self, type, func, init, insert, at, count, breaks):
myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
More information about the pypy-commit
mailing list