[pypy-commit] pypy vecopt: split box_(un)pack into float_(un)pack and int_(un)pack, adjusted some tests that unrolled to often (scheduler splits packed instructions if size is too big)
plan_rich
noreply at buildbot.pypy.org
Fri May 15 18:40:22 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77339:2861d6325f12
Date: 2015-05-15 11:07 +0200
http://bitbucket.org/pypy/pypy/changeset/2861d6325f12/
Log: split box_(un)pack into float_(un)pack and int_(un)pack, adjusted
some tests that unrolled to often (scheduler splits packed
instructions if size is too big)
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -122,7 +122,7 @@
return """
a = |30|
b = a + a
- b -> 15
+ b -> 17
"""
def test_float_add(self):
result = self.run("float_add")
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2554,7 +2554,7 @@
elif count == 2:
self.mc.MOVDDUP(resloc, loc0)
- def genop_vec_box_unpack(self, op, arglocs, resloc):
+ def genop_vec_float_unpack(self, op, arglocs, resloc):
loc0, tmploc, indexloc, countloc = arglocs
count = countloc.value
index = indexloc.value
@@ -2566,10 +2566,11 @@
tmploc = self._shuffle_by_index(loc0, tmploc, item_type, size, index, count)
self.mc.MOVD32_rx(resloc.value, tmploc.value)
elif size == 8:
- if index == 0:
- self.mc.UNPCKLPD(resloc, loc0)
- else:
- self.mc.UNPCKHPD(resloc, loc0)
+ pass
+ #if index == 1:
+ # self.mc.SHUFPD_xxi(resloc, loc0, 0|(1<<2))
+ #else:
+ # self.mc.UNPCKHPD(resloc, loc0)
def _shuffle_by_index(self, src_loc, tmp_loc, item_type, size, index, count):
if index == 0 and count == 1:
@@ -2592,29 +2593,34 @@
raise NotImplementedError("shuffle by index for non floats")
- def genop_vec_box_pack(self, op, arglocs, resloc):
- toloc, fromloc, tmploc = arglocs
+ def genop_vec_float_pack(self, op, arglocs, resloc):
+ resultloc, fromloc, tmploc = arglocs
result = op.result
indexarg = op.getarg(2)
+ countarg = op.getarg(2)
assert isinstance(result, BoxVector)
assert isinstance(indexarg, ConstInt)
+ assert isinstance(countarg, ConstInt)
index = indexarg.value
+ count = countarg.value
size = result.item_size
- #py.test.set_trace()
if size == 4:
- select = (1 << 2) # move 0 -> 0, 1 -> 1 for toloc
- # TODO
- if index == 2:
- select |= (1<<6) # move 0 -> 2, 1 -> 3 for fromloc
+ if count == 1:
+ raise NotImplementedError("pack: float single pack")
+ elif count == 2:
+ select = (1 << 2) # move 0 -> 0, 1 -> 1 for toloc
+ if index == 0:
+ # move 0 -> 2, 1 -> 3 for fromloc
+ self.mc.SHUFPS_xxi(resultloc.value, fromloc.value, select | (1 << 2))
+ elif index == 2:
+ # move 0 -> 2, 1 -> 3 for fromloc
+ self.mc.SHUFPS_xxi(resultloc.value, fromloc.value, select | (1 << 6))
+ else:
+ raise NotImplementedError("pack: only index in {0,2} supported")
else:
- raise NotImplementedError("index is not equal to 2")
-
- self.mc.SHUFPS_xxi(toloc.value, fromloc.value, select)
+ raise NotImplementedError("pack: count 3 for single float pack not supported")
elif size == 8:
- if indexloc.value == 0:
- self.mc.UNPCKLPD(resloc, loc0)
- else:
- self.mc.UNPCKHPD(resloc, loc0)
+ raise NotImplementedError("pack: float double pack")
def genop_vec_cast_float_to_singlefloat(self, op, arglocs, resloc):
argloc, _ = arglocs
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1505,7 +1505,6 @@
consider_vec_raw_store = consider_vec_setarrayitem_raw
-
def consider_vec_arith(self, op):
count = op.getarg(2)
assert isinstance(count, ConstInt)
@@ -1535,7 +1534,7 @@
consider_vec_float_eq = consider_vec_logic
del consider_vec_logic
- def consider_vec_box_pack(self, op):
+ def consider_vec_float_pack(self, op):
args = op.getarglist()
loc1 = self.make_sure_var_in_reg(op.getarg(1), args)
result = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
@@ -1544,7 +1543,7 @@
self.xrm.possibly_free_var(tmpxvar)
self.perform(op, [result, loc1, tmploc], result)
- def consider_vec_box_unpack(self, op):
+ def consider_vec_float_unpack(self, op):
count = op.getarg(2)
index = op.getarg(1)
assert isinstance(count, ConstInt)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -878,7 +878,7 @@
vopt = self.schedule(loop,1)
self.assert_equal(loop, self.parse_loop(vops))
- @pytest.mark.parametrize('unroll', range(2,16,3))
+ @pytest.mark.parametrize('unroll', [1])
def test_vectorize_index_variable_combination(self, unroll):
ops = """
[p0,i0]
@@ -940,7 +940,7 @@
ops = """
[p0,i0]
guard_early_exit() [p0,i0]
- i1 = getarrayitem_raw(p0, i0, descr=intarraydescr)
+ i1 = getarrayitem_raw(p0, i0, descr=chararraydescr)
i2 = int_add(i0, 1)
i3 = int_lt(i2, 102)
guard_true(i3) [p0,i0]
@@ -957,7 +957,7 @@
i2 = int_add(i0, 16)
i3 = int_lt(i2, 102)
guard_true(i3) [p0,i0]
- i1 = vec_getarrayitem_raw(p0, i0, 16, descr=intarraydescr)
+ i1 = vec_getarrayitem_raw(p0, i0, 16, descr=chararraydescr)
jump(p0,i2)
""".format(dead_code=dead_code)
vopt = self.vectorize(self.parse_loop(ops),15)
@@ -1071,8 +1071,8 @@
v62 = vec_raw_load(i4, i41, 2, descr=floatarraydescr)
v63 = vec_float_add(v61, v62, 2)
vec_raw_store(i0, i37, v63, 2, descr=floatarraydescr)
- f100 = vec_box_unpack(v61, 1)
- f101 = vec_box_unpack(v62, 1)
+ f100 = vec_float_unpack(v61, 1, 1)
+ f101 = vec_float_unpack(v62, 1, 1)
jump(p36, i53, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18)
"""
vopt = self.vectorize(self.parse_loop(ops))
@@ -1137,8 +1137,8 @@
v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr)
v19 = vec_cast_float_to_singlefloat(v17, 2)
v20 = vec_cast_float_to_singlefloat(v18, 2)
- vec_box_pack(v19, v20, 2)
- vec_setarrayitem_raw(p1, i1, v19, 4, descr=singlefloatarraydescr)
+ v21 = vec_float_pack(v19, v20, 2, 2)
+ vec_setarrayitem_raw(p1, i1, v21, 4, descr=singlefloatarraydescr)
jump(p0, p1, i7)
"""
vopt = self.vectorize(self.parse_loop(ops))
@@ -1192,7 +1192,7 @@
v224 = vec_float_add(v219, v222, 2)
v225 = vec_cast_float_to_singlefloat(v223, 2)
v226 = vec_cast_float_to_singlefloat(v224, 2)
- v227 = vec_box_pack(v225, v226, 2, 2)
+ v227 = vec_float_pack(v225, v226, 2, 2)
vec_raw_store(p2, i4, v227, 4, descr=singlefloatarraydescr)
jump(p0, p1, p2, i210, i189)
"""
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -404,7 +404,10 @@
arg_cloned = arg.clonebox()
cj = ConstInt(j)
ci = ConstInt(1)
- unpack_op = ResOperation(rop.VEC_BOX_UNPACK, [vbox, cj, ci], arg_cloned)
+ opnum = rop.VEC_FLOAT_UNPACK
+ if vbox.type == INT:
+ opnum = rop.VEC_INT_UNPACK
+ unpack_op = ResOperation(opnum, [vbox, cj, ci], arg_cloned)
self.emit_operation(unpack_op)
sched_data.rename_unpacked(arg, arg_cloned)
args[i] = arg_cloned
@@ -741,6 +744,9 @@
this function creates a box pack instruction to merge them to:
v1/2 = [A,B,X,Y]
"""
+ opnum = rop.VEC_FLOAT_PACK
+ if tgt_box.type == INT:
+ opnum = rop.VEC_INT_PACK
arg_count = len(args)
i = index
while i < arg_count and tgt_box.item_count < packable:
@@ -751,9 +757,8 @@
continue
new_box = tgt_box.clonebox()
new_box.item_count += src_box.item_count
- op = ResOperation(rop.VEC_BOX_PACK,
- [tgt_box, src_box, ConstInt(i),
- ConstInt(src_box.item_count)], new_box)
+ op = ResOperation(opnum, [tgt_box, src_box, ConstInt(i),
+ ConstInt(src_box.item_count)], new_box)
self.preamble_ops.append(op)
self._check_vec_pack(op)
i += src_box.item_count
@@ -803,9 +808,12 @@
else:
resop = ResOperation(rop.VEC_BOX, [ConstInt(self.pack_ops)], vbox)
self.preamble_ops.append(resop)
+ opnum = rop.VEC_FLOAT_PACK
+ if arg.type == INT:
+ opnum = rop.VEC_INT_PACK
for i,op in enumerate(ops):
arg = op.getoperation().getarg(argidx)
- resop = ResOperation(rop.VEC_BOX_PACK,
+ resop = ResOperation(opnum,
[vbox,ConstInt(i),arg], None)
self.preamble_ops.append(resop)
return vbox
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -471,8 +471,10 @@
'VEC_CAST_FLOAT_TO_INT/2',
'VEC_CAST_INT_TO_FLOAT/2',
- 'VEC_BOX_UNPACK/3', # iX|fX = VEC_BOX_UNPACK(vX, index, item_count)
- 'VEC_BOX_PACK/4', # VEC_BOX_PACK(vX, var/const, index, item_count)
+ 'VEC_FLOAT_UNPACK/3', # iX|fX = VEC_FLOAT_UNPACK(vX, index, item_count)
+ 'VEC_FLOAT_PACK/4', # VEC_FLOAT_PACK(vX, var/const, index, item_count)
+ 'VEC_INT_UNPACK/3', # iX|fX = VEC_INT_UNPACK(vX, index, item_count)
+ 'VEC_INT_PACK/4', # VEC_INT_PACK(vX, var/const, index, item_count)
'VEC_EXPAND/2', # vX = VEC_EXPAND(var/const, item_count)
'VEC_BOX/1',
'_VEC_PURE_LAST',
More information about the pypy-commit
mailing list