[pypy-commit] pypy vecopt: retinkering the dependency construction, statements with sideeffects need stronger dependencies
plan_rich
noreply at buildbot.pypy.org
Thu May 28 13:04:07 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77640:f83b729acb89
Date: 2015-05-28 13:04 +0200
http://bitbucket.org/pypy/pypy/changeset/f83b729acb89/
Log: retinkering the dependency construction, statements with sideeffects
need stronger dependencies improved the guard strengthen
optimization removed a glitch in constructing pack operations
(arguments missing and intermixed)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2696,7 +2696,7 @@
# if source is a normal register (unpack)
assert count == 1
assert si == 0
- self.mc.MOVAPS(X86_64_XMM_SCRATCH_REG, srcloc)
+ self.mc.move(X86_64_XMM_SCRATCH_REG, srcloc)
src = X86_64_XMM_SCRATCH_REG.value
select = ((si & 0x3) << 6)|((ri & 0x3) << 4)
self.mc.INSERTPS_xxi(resloc.value, src, select)
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1579,20 +1579,20 @@
del consider_vec_logic
def consider_vec_int_pack(self, op):
- index = op.getarg(1)
- arg = op.getarg(2)
+ # new_res = vec_int_pack(res, src, index, count)
+ arg = op.getarg(1)
+ index = op.getarg(2)
+ count = op.getarg(3)
assert isinstance(index, ConstInt)
+ assert isinstance(count, ConstInt)
args = op.getarglist()
srcloc = self.make_sure_var_in_reg(arg, args)
resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
- residx = 0
+ residx = index.value # where to put it in result?
+ srcidx = 0
assert isinstance(op.result, BoxVector)
- args = op.getarglist()
size = op.result.getsize()
- count = 1
- if isinstance(arg, BoxVector):
- count = arg.getcount()
- arglocs = [resloc, srcloc, imm(index.value), imm(0), imm(count), imm(size)]
+ arglocs = [resloc, srcloc, imm(residx), imm(srcidx), imm(count.value), imm(size)]
self.perform(op, arglocs, resloc)
consider_vec_float_pack = consider_vec_int_pack
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -392,6 +392,10 @@
def __init__(self, graph):
self.graph = graph
self.defs = {}
+ self.non_pure = []
+
+ def add_non_pure(self, node):
+ self.non_pure.append(node)
def define(self, arg, node, argcell=None):
if isinstance(arg, Const):
@@ -537,9 +541,13 @@
if node.exits_early():
pass
else:
+ # consider cross iterations?
if len(self.guards) > 0:
last_guard = self.guards[-1]
last_guard.edge_to(node, "guardorder")
+ for nonpure in tracker.non_pure:
+ nonpure.edge_to(node, failarg=True)
+ tracker.non_pure = []
self.guards.append(node)
else:
self.build_non_pure_dependencies(node, tracker)
@@ -689,6 +697,8 @@
if len(self.guards) > 0:
last_guard = self.guards[-1]
last_guard.edge_to(node, "sideeffect")
+ # and the next guard instruction
+ tracker.add_non_pure(node)
def __repr__(self):
graph = "graph([\n"
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -937,10 +937,10 @@
i3 = int_lt(i2, 102)
guard_true(i3) [p0,i0]
{dead_code}
- i500 = same_as(i2)
- i300 = int_lt(i500, 102)
+ i500 = int_add(i0, 16)
+ i501 = int_lt(i2, 102)
i1 = vec_getarrayitem_raw(p0, i0, 16, descr=chararraydescr)
- jump(p0,i500)
+ jump(p0,i2)
""".format(dead_code=dead_code)
vopt = self.vectorize(self.parse_loop(ops),15)
self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -982,12 +982,12 @@
i2 = int_add(i0, 2)
i3 = int_lt(i2, 10)
guard_true(i3) [p0,i0]
- i4 = same_as(i2)
- i5 = int_lt(i4, 10)
+ i4 = int_add(i0, 2)
+ i5 = int_lt(i2, 10)
v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
v3 = vec_int_expand(42)
v2 = vec_int_mul(v1, v3)
- jump(p0,i4)
+ jump(p0,i2)
"""
vopt = self.vectorize(self.parse_loop(ops),1)
self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1011,12 +1011,12 @@
i2 = int_add(i0, 2)
i3 = int_lt(i2, 10)
guard_true(i3) [p0,i0]
- i4 = same_as(i2)
- i5 = int_lt(i4, 10)
+ i4 = int_add(i0, 2)
+ i5 = int_lt(i2, 10)
v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
v3 = vec_float_expand(f3)
v2 = vec_int_mul(v1, v3)
- jump(p0,i4,f3)
+ jump(p0,i2,f3)
"""
vopt = self.vectorize(self.parse_loop(ops),1)
self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1053,15 +1053,15 @@
i55 = int_add(i44, 16)
i54 = int_add(i41, 16)
i56 = int_add(i37, 16)
- i629 = same_as(i637)
- i57 = int_ge(i629, i18)
+ i629 = int_add(i28, 2)
+ i57 = int_ge(i637, i18)
v61 = vec_raw_load(i21, i44, 2, descr=floatarraydescr)
v62 = vec_raw_load(i4, i41, 2, descr=floatarraydescr)
v63 = vec_float_add(v61, v62)
vec_raw_store(i0, i37, v63, descr=floatarraydescr)
f100 = vec_float_unpack(v61, 1, 1)
f101 = vec_float_unpack(v62, 1, 1)
- jump(p36, i629, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18)
+ jump(p36, i637, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18)
"""
vopt = self.vectorize(self.parse_loop(ops))
self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1090,15 +1090,15 @@
i8 = int_ge(i5, 36)
i6 = int_add(i1, 3)
i11 = int_ge(i6, 36)
- i7 = same_as(i50)
- i14 = int_ge(i7, 36)
+ i7 = int_add(i1, 4)
+ i14 = int_ge(i50, 36)
v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr)
v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr)
v19 = vec_cast_float_to_singlefloat(v17)
v20 = vec_cast_float_to_singlefloat(v18)
v21 = vec_float_pack(v19, v20, 2, 2)
vec_setarrayitem_raw(p1, i1, v21, descr=singlefloatarraydescr)
- jump(p0, p1, i7)
+ jump(p0, p1, i50)
"""
vopt = self.vectorize(self.parse_loop(ops))
self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1136,8 +1136,8 @@
i207 = int_add(i0, 16)
i196 = int_add(i4, 12)
i197 = int_lt(i196, 100)
- i205 = same_as(i500)
- i206 = int_lt(i205, 100)
+ i205 = int_add(i4, 16)
+ i206 = int_lt(i500, 100)
v228 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr)
v229 = vec_cast_singlefloat_to_float(v228)
v230 = vec_int_unpack(v228, 2, 2)
@@ -1152,7 +1152,7 @@
v239 = vec_cast_float_to_singlefloat(v237)
v240 = vec_float_pack(v238, v239, 2, 2)
vec_raw_store(p2, i4, v240, descr=singlefloatarraydescr)
- jump(p0, p1, p2, i207, i205)
+ jump(p0, p1, p2, i207, i500)
"""
vopt = self.vectorize(self.parse_loop(ops))
self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1237,6 +1237,47 @@
opt = self.vectorize(self.parse_loop(trace))
self.debug_print_operations(opt.loop)
+ def test_cast_1(self):
+ trace = """
+ [i9, i10, p2, p11, i12, i13, p4, p5, p14, i15, p8, i16, p17, i18, i19, i20, i21, i22, i23]
+ guard_early_exit() [p8, p5, p4, p2, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, i9]
+ i24 = raw_load(i20, i16, descr=singlefloatarraydescr)
+ guard_not_invalidated() [p8, p5, p4, p2, i24, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, None]
+ i27 = int_add(i16, 4)
+ i28 = raw_load(i21, i19, descr=singlefloatarraydescr)
+ i30 = int_add(i19, 4)
+ f31 = cast_singlefloat_to_float(i24)
+ f32 = cast_singlefloat_to_float(i28)
+ f33 = float_add(f31, f32)
+ i34 = cast_float_to_singlefloat(f33)
+ raw_store(i22, i13, i34, descr=singlefloatarraydescr)
+ i36 = int_add(i12, 1)
+ i38 = int_add(i13, 4)
+ i39 = int_ge(i36, i23)
+ guard_false(i39) [p8, p5, p4, p2, i27, i28, i30, i24, i38, i36, p17, None, None, None, None, p14, p11, i18, i15, None, None]
+ jump(i24, i28, p2, p11, i36, i38, p4, p5, p14, i15, p8, i27, p17, i18, i30, i20, i21, i22, i23)
+ """
+ opt = self.vectorize(self.parse_loop(trace))
+ self.debug_print_operations(opt.loop)
+
+ def test_all_guard(self):
+ trace = """
+ [p0, p3, i4, i5, i6, i7]
+ guard_early_exit() [p0, p3, i5, i4]
+ f8 = raw_load(i6, i5, descr=floatarraydescr)
+ guard_not_invalidated() [p0, f8, p3, i5, i4]
+ i9 = cast_float_to_int(f8)
+ i11 = int_and(i9, 255)
+ guard_false(i11) [p0, p3, i5, i4]
+ i13 = int_add(i4, 1)
+ i15 = int_add(i5, 8)
+ i16 = int_ge(i13, i7)
+ guard_false(i16) [p0, i13, i15, p3, None, None]
+ jump(p0, p3, i13, i15, i6, i7)
+ """
+ opt = self.vectorize(self.parse_loop(trace))
+ self.debug_print_operations(opt.loop)
+
def test_reduction_basic(self):
trace = """
[p5, i6, p2, i7, p1, p8, i9, i10, f11, i12, i13, i14]
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -507,14 +507,15 @@
""" An object wrapper around a guard. Helps to determine
if one guard implies another
"""
- def __init__(self, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg):
+ def __init__(self, index, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg):
+ self.index = index
self.op = op
self.cmp_op = cmp_op
self.lhs = lhs
self.rhs = rhs
self.lhs_arg = lhs_arg
self.rhs_arg = rhs_arg
- self.emitted = False
+ self.implied = False
self.stronger = False
def implies(self, guard, opt):
@@ -638,7 +639,7 @@
def propagate_all_forward(self, loop):
""" strengthens the guards that protect an integral value """
strongest_guards = {}
- implied_guards = {}
+ guards = {}
# the guards are ordered. guards[i] is before guards[j] iff i < j
operations = loop.operations
last_guard = None
@@ -652,44 +653,43 @@
lhs = self.index_vars.get(lhs_arg, lhs_arg)
rhs_arg = cmp_op.getarg(1)
rhs = self.index_vars.get(rhs_arg, rhs_arg)
- strongest = strongest_guards.get(key, None)
- if not strongest:
- strongest_guards[key] = Guard(op, cmp_op,
- lhs, lhs_arg,
- rhs, rhs_arg)
+ other = strongest_guards.get(key, None)
+ if not other:
+ guard = Guard(i, op, cmp_op,
+ lhs, lhs_arg,
+ rhs, rhs_arg)
+ strongest_guards[key] = guard
+ # nothing known, at this position emit the guard
+ guards[i] = guard
else: # implicit index(strongest) < index(current)
- guard = Guard(op, cmp_op,
+ guard = Guard(i, op, cmp_op,
lhs, lhs_arg, rhs, rhs_arg)
- if guard.implies(strongest, self):
+ if guard.implies(other, self):
+ strongest_guards[key] = guard
guard.stronger = True
- strongest_guards[key] = guard
- elif strongest.implies(guard, self):
- implied_guards[op] = True
+ guard.index = other.index
+ guards[other.index] = guard
+ # do not mark as emit
+ continue
+ elif other.implies(guard, self):
+ guard.implied = True
+ # mark as emit
+ guards[i] = guard
+ strongest_guards = None
#
self.renamer = Renamer()
last_op_idx = len(operations)-1
for i,op in enumerate(operations):
op = operations[i]
if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE):
- if implied_guards.get(op, False):
- # this guard is implied, thus removed
+ guard = guards.get(i, None)
+ if not guard or guard.implied:
+ # this guard is implied or marked as not emitted (= None)
continue
- key = self.get_key(op, operations, i)
- if key[0] is not None:
- strongest = strongest_guards.get(key, None)
- if not strongest or not strongest.stronger:
- # If the key is not None and there _must_ be a strongest
- # guard. If strongest is None, this operation implies the
- # strongest guard that has been already been emitted.
- self.emit_operation(op)
- continue
- elif strongest.emitted:
- continue
- strongest.emit_operations(self)
- strongest.emitted = True
+ if guard.stronger:
+ guard.emit_operations(self)
continue
if op.result:
- # emit a same_as op if a box uses the same index variable
index_var = self.index_vars.get(op.result, None)
if index_var:
if not index_var.is_identity():
@@ -981,7 +981,7 @@
arg = op.getoperation().getarg(argidx)
new_box = vbox.clonebox()
resop = ResOperation(opnum,
- [vbox,ConstInt(i),arg], new_box)
+ [vbox,arg,ConstInt(i),ConstInt(0)], new_box)
vbox = new_box
self.preamble_ops.append(resop)
return vbox
More information about the pypy-commit
mailing list