[pypy-commit] pypy vecopt2: weaker guards are stripped from the trace
plan_rich
noreply at buildbot.pypy.org
Tue May 5 09:46:23 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77128:d16c3d437d4e
Date: 2015-05-01 09:43 +0200
http://bitbucket.org/pypy/pypy/changeset/d16c3d437d4e/
Log: weaker guards are stripped from the trace quick and dirty
implementation to remove redundant index calculations (j=i+1;k=j+1
=> j=i+1;k=i+2) consider to move this into the rewrite optimizer (as
fijal suggested)
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -54,7 +54,9 @@
if exclude_last:
count -= 1
while i < count:
- if not self.path[i].op.has_no_side_effect():
+ op = self.path[i].getoperation()
+ if not op.has_no_side_effect() \
+ and op.getopnum() != rop.GUARD_EARLY_EXIT:
return False
i += 1
return True
@@ -62,6 +64,9 @@
def walk(self, node):
self.path.append(node)
+ def cut_off_at(self, index):
+ self.path = self.path[:index]
+
def clone(self):
return Path(self.path[:])
@@ -89,26 +94,26 @@
def getfailarg_set(self):
op = self.getoperation()
assert isinstance(op, GuardResOp)
- args = []
+ args = {}
if op.getfailargs():
for arg in op.getfailargs():
- args.append(arg)
- return args
+ args[arg] = None
+ return args.keys()
elif op.rd_snapshot:
ss = op.rd_snapshot
assert isinstance(ss, Snapshot)
while ss:
for box in ss.boxes:
- args.append(box)
+ args[box] = None
ss = ss.prev
- return args
+ return args.keys()
def relax_guard_to(self, guard):
""" Relaxes a guard operation to an earlier guard. """
tgt_op = self.getoperation()
- op = guard
+ op = guard.getoperation()
assert isinstance(tgt_op, GuardResOp)
assert isinstance(op, GuardResOp)
#descr = compile.ResumeAtLoopHeaderDescr()
@@ -237,24 +242,34 @@
worklist.append(dep.to)
return True
- def iterate_paths(self, to, backwards=False):
+ def iterate_paths(self, to, backwards=False, path_max_len=-1):
""" yield all nodes from self leading to 'to' """
if self == to:
return
- worklist = [(Path([self]),self)]
+ path = Path([self])
+ worklist = [(0, self, 1)]
while len(worklist) > 0:
- path,node = worklist.pop()
+ index,node,pathlen = worklist.pop()
if backwards:
iterdir = node.depends()
else:
iterdir = node.provides()
- for dep in iterdir:
- cloned_path = path.clone()
- cloned_path.walk(dep.to)
- if dep.to == to:
- yield cloned_path
+ if index >= len(iterdir):
+ continue
+ else:
+ next_dep = iterdir[index]
+ next_node = next_dep.to
+ index += 1
+ if index < len(iterdir):
+ worklist.append((index, node, pathlen))
+ path.cut_off_at(pathlen)
+ path.walk(next_node)
+ pathlen += 1
+
+ if next_node is to or (path_max_len > 0 and pathlen >= path_max_len):
+ yield path
else:
- worklist.append((cloned_path,dep.to))
+ worklist.append((0, next_node, pathlen))
def remove_edge_to(self, node):
i = 0
@@ -661,7 +676,10 @@
to = dep.to
node.remove_edge_to(to)
if not to.emitted and to.depends_count() == 0:
- self.schedulable_nodes.append(to)
+ if to.pack:
+ self.schedulable_nodes.append(to)
+ else:
+ self.schedulable_nodes.insert(0, to)
node.clear_dependencies()
node.emitted = True
@@ -682,6 +700,18 @@
var = self.index_vars[arg] = IndexVar(arg)
return var
+ def operation_INT_LT(self, op, node):
+ box_a0 = op.getarg(0)
+ box_a1 = op.getarg(1)
+ left = None
+ right = None
+ if not self.is_const_integral(box_a0):
+ left = self.get_or_create(box_a0)
+ if not self.is_const_integral(box_a1):
+ right = self.get_or_create(box_a1)
+ box_r = op.result
+ self.index_vars[box_r] = IndexGuard(op.getopnum(), left, right)
+
additive_func_source = """
def operation_{name}(self, op, node):
box_r = op.result
@@ -762,6 +792,25 @@
IntegralForwardModification.inspect_operation = integral_dispatch_opt
del integral_dispatch_opt
+class IndexGuard(object):
+ def __init__(self, opnum, lindex_var, rindex_var):
+ self.opnum = opnum
+ self.lindex_var = lindex_var
+ self.rindex_var = rindex_var
+
+ def getindex_vars(self):
+ if self.lindex_var and self.rindex_var:
+ return (self.lindex_var, self.rindex_var)
+ elif self.lindex_var:
+ return (self.lindex_var,)
+ elif self.rindex_var:
+ return (self.rindex_var,)
+ else:
+ assert False, "integer comparison must have left or right index"
+
+ def adapt_operation(self, op):
+ pass
+
class IndexVar(object):
def __init__(self, var):
self.var = var
@@ -769,6 +818,9 @@
self.coefficient_div = 1
self.constant = 0
+ def getvariable(self):
+ return self.var
+
def __eq__(self, other):
if self.same_variable(other):
return self.diff(other) == 0
@@ -777,6 +829,10 @@
def __ne__(self, other):
return not self.__eq__(other)
+ def less(self, other):
+ if self.same_variable(other):
+ return self.diff(other) < 0
+
def clone(self):
c = IndexVar(self.var)
c.coefficient_mul = self.coefficient_mul
@@ -799,6 +855,18 @@
return 'IndexVar(%s*(%s/%s)+%s)' % (self.var, self.coefficient_mul,
self.coefficient_div, self.constant)
+ def adapt_operation(self, op):
+ # TODO
+ if self.coefficient_mul == 1 and \
+ self.coefficient_div == 1 and \
+ op.getopnum() == rop.INT_ADD:
+ if isinstance(op.getarg(0), Box) and isinstance(op.getarg(1), Const):
+ op.setarg(0, self.var)
+ op.setarg(1, ConstInt(self.constant))
+ elif isinstance(op.getarg(1), Box) and isinstance(op.getarg(0), Const):
+ op.setarg(1, self.var)
+ op.setarg(0, ConstInt(self.constant))
+
class MemoryRef(object):
""" a memory reference to an array object. IntegralForwardModification is able
to propagate changes to this object if applied in backwards direction.
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -45,7 +45,7 @@
node_b = graph.getnode(idx_b)
dependency = node_a.getedge_to(node_b)
if dependency is None and idx_b not in exceptions.setdefault(idx,[]):
- #self._write_dot_and_convert_to_svg(graph, graph.nodes, 'except')
+ self._write_dot_and_convert_to_svg(graph, 'except')
assert dependency is not None, \
" it is expected that instruction at index" + \
" %s depends on instr on index %s but it does not.\n%s" \
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -26,7 +26,7 @@
class VecTestHelper(DependencyBaseTest):
- enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unfold"
+ enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap"
jitdriver_sd = FakeJitDriverStaticData()
@@ -57,6 +57,10 @@
raise NotAVectorizeableLoop()
if unroll_factor == -1:
unroll_factor = opt.get_unroll_count(ARCH_VEC_REG_SIZE)
+ opt.analyse_index_calculations()
+ if opt.dependency_graph is not None:
+ self._write_dot_and_convert_to_svg(opt.dependency_graph, "ee" + self.test_name)
+ opt.schedule()
opt.unroll_loop_iterations(loop, unroll_factor)
opt.loop.operations = opt.get_newoperations()
opt.clear_newoperations()
@@ -91,6 +95,16 @@
opt.schedule()
return opt
+ def vectorize(self, loop, unroll_factor = -1):
+ opt = self.vectoroptimizer_unrolled(loop, unroll_factor)
+ opt.find_adjacent_memory_refs()
+ opt.extend_packset()
+ opt.combine_packset()
+ opt.schedule()
+ opt.collapse_index_guards()
+ self._do_optimize_loop(loop, {}, export_state=False)
+ return opt
+
def assert_unroll_loop_equals(self, loop, expected_loop, \
unroll_factor = -1):
vectoroptimizer = self.vectoroptimizer_unrolled(loop, unroll_factor)
@@ -696,12 +710,12 @@
loop = self.parse_loop(ops)
vopt = self.extend_packset(loop,1)
assert len(vopt.dependency_graph.memory_refs) == 4
+ self.assert_independent(4,10)
self.assert_independent(5,11)
self.assert_independent(6,12)
- self.assert_independent(7,13)
assert len(vopt.packset.packs) == 3
self.assert_packset_empty(vopt.packset, len(loop.operations),
- [(6,12), (5,11), (7,13)])
+ [(5,11), (4,10), (6,12)])
@pytest.mark.parametrize("descr", ['char','float','int','singlefloat'])
def test_packset_combine_simple(self,descr):
@@ -853,8 +867,8 @@
i11 = int_le(i1, 128)
guard_true(i11) []
i12 = int_add(i1, {stride})
+ v2 = vec_getarrayitem_raw(p1, i0, 2, descr={descr}arraydescr)
v1 = vec_getarrayitem_raw(p0, i0, 2, descr={descr}arraydescr)
- v2 = vec_getarrayitem_raw(p1, i0, 2, descr={descr}arraydescr)
v3 = {op}(v1,v2,2)
vec_setarrayitem_raw(p2, i0, v3, 2, descr={descr}arraydescr)
jump(p0,p1,p2,i12)
@@ -919,6 +933,7 @@
"""
opt="""
[i0, i1, i2, i3, i4]
+ i6 = int_mul(i0, 8)
i11 = int_add(i0, 1)
i12 = int_lt(i11, i1)
guard_true(i12) []
@@ -926,9 +941,8 @@
i13 = int_add(i11, 1)
i18 = int_lt(i13, i1)
guard_true(i18) []
- i6 = int_mul(i0, 8)
+ v20 = vec_raw_load(i3, i6, 2, descr=intarraydescr)
v19 = vec_raw_load(i2, i6, 2, descr=intarraydescr)
- v20 = vec_raw_load(i3, i6, 2, descr=intarraydescr)
v21 = vec_int_add(v19, v20, 2)
vec_raw_store(i4, i6, v21, 2, descr=intarraydescr)
jump(i13, i1, i2, i3, i4)
@@ -976,22 +990,27 @@
def test_collapse_index_guard_1(self):
ops = """
[p0,i0]
- guard_early_exit() []
+ guard_early_exit() [p0,i0]
i1 = getarrayitem_raw(p0, i0, descr=intarraydescr)
i2 = int_add(i0, 1)
i3 = int_lt(i2, 102)
guard_true(i3) [p0,i0]
jump(p0,i2)
"""
+ dead_code = '\n '.join([
+ "i{t} = int_add(i0,{i})\n i{s} = int_lt(i{t}, 102)".format(
+ i=i+1, t=i+4, s=i+20)
+ for i in range(0,15)])
opt="""
[p0,i0]
+ {dead_code}
i2 = int_add(i0, 16)
i3 = int_lt(i2, 102)
guard_true(i3) [p0,i0]
i1 = vec_getarrayitem_raw(p0, i0, 16, descr=intarraydescr)
jump(p0,i2)
- """
- vopt = self.schedule(self.parse_loop(ops),15)
+ """.format(dead_code=dead_code)
+ vopt = self.vectorize(self.parse_loop(ops),15)
self.assert_equal(vopt.loop, self.parse_loop(opt))
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -53,19 +53,22 @@
def_opt = Optimizer(metainterp_sd, jitdriver_sd, loop, optimizations)
def_opt.propagate_all_forward()
+#class CollapseGuardOptimization(Optimization):
+# def __init__(self, index_vars = None):
+# self.index_vars = index_vars or {}
+#
+# def propagate_forward(
+
class VectorizingOptimizer(Optimizer):
""" Try to unroll the loop and find instructions to group """
def __init__(self, metainterp_sd, jitdriver_sd, loop, optimizations):
Optimizer.__init__(self, metainterp_sd, jitdriver_sd, loop, optimizations)
- self.memory_refs = []
self.dependency_graph = None
- self.first_debug_merge_point = False
self.packset = None
self.unroll_count = 0
self.smallest_type_bytes = 0
- self.early_exit = None
- self.future_condition = None
+ self.early_exit_idx = -1
def propagate_all_forward(self, clear=True):
self.clear_newoperations()
@@ -74,7 +77,6 @@
if jump.getopnum() not in (rop.LABEL, rop.JUMP):
# compile_loop appends a additional label to all loops
# we cannot optimize normal traces
- assert False
raise NotAVectorizeableLoop()
self.linear_find_smallest_type(self.loop)
@@ -85,6 +87,12 @@
# we cannot optimize normal traces (if there is no label)
raise NotAVectorizeableLoop()
+ # find index guards and move to the earliest position
+ self.analyse_index_calculations()
+ if self.dependency_graph is not None:
+ self.schedule() # reorder the trace
+
+
# unroll
self.unroll_count = self.get_unroll_count(vsize)
self.unroll_loop_iterations(self.loop, self.unroll_count)
@@ -96,12 +104,13 @@
self.find_adjacent_memory_refs()
self.extend_packset()
self.combine_packset()
- self.collapse_index_guards()
self.schedule()
+ self.collapse_index_guards()
+
def emit_operation(self, op):
- if op.getopnum() == rop.GUARD_EARLY_EXIT or \
- op.getopnum() == rop.DEBUG_MERGE_POINT:
+ #if op.getopnum() == rop.GUARD_EARLY_EXIT or \
+ if op.getopnum() == rop.DEBUG_MERGE_POINT:
return
self._last_emitted_op = op
self._newoperations.append(op)
@@ -114,6 +123,7 @@
""" Unroll the loop X times. unroll_count is an integral how
often to further unroll the loop.
"""
+
op_count = len(loop.operations)
label_op = loop.operations[0].clone()
@@ -125,7 +135,7 @@
jump_op = ResOperation(rop.JUMP, jump_op.getarglist(), None, label_op.getdescr())
else:
jump_op = jump_op.clone()
- jump_op.setdescr(label_op.getdescr())
+ #jump_op.setdescr(label_op.getdescr())
assert jump_op.is_final()
self.emit_unrolled_operation(label_op)
@@ -134,12 +144,11 @@
#self.emit_unrolled_operation(guard_ee_op)
operations = []
+ start_index = 1
for i in range(1,op_count-1):
op = loop.operations[i].clone()
- if loop.operations[i].getopnum() == rop.GUARD_FUTURE_CONDITION:
- pass
if loop.operations[i].getopnum() == rop.GUARD_EARLY_EXIT:
- self.future_condition = op
+ continue
operations.append(op)
self.emit_unrolled_operation(op)
@@ -157,11 +166,13 @@
if la != ja:
rename_map[la] = ja
#
+ emitted_ee = False
for op in operations:
if op.getopnum() == rop.GUARD_FUTURE_CONDITION:
continue # do not unroll this operation twice
if op.getopnum() == rop.GUARD_EARLY_EXIT:
- continue # do not unroll this operation twice
+ emitted_ee = True
+ pass # do not unroll this operation twice
copied_op = op.clone()
if copied_op.result is not None:
# every result assigns a new box, thus creates an entry
@@ -180,7 +191,7 @@
# not only the arguments, but also the fail args need
# to be adjusted. rd_snapshot stores the live variables
# that are needed to resume.
- if copied_op.is_guard():
+ if copied_op.is_guard() and emitted_ee:
assert isinstance(copied_op, GuardResOp)
snapshot = self.clone_snapshot(copied_op.rd_snapshot, rename_map)
copied_op.rd_snapshot = snapshot
@@ -231,6 +242,8 @@
def linear_find_smallest_type(self, loop):
# O(#operations)
for i,op in enumerate(loop.operations):
+ if op.getopnum() == rop.GUARD_EARLY_EXIT:
+ self.early_exit_idx = i
if op.is_array_op():
descr = op.getdescr()
if not descr.is_array_of_pointers():
@@ -250,7 +263,6 @@
def build_dependency_graph(self):
self.dependency_graph = DependencyGraph(self.loop.operations)
- self.relax_index_guards()
def find_adjacent_memory_refs(self):
""" the pre pass already builds a hash of memory references and the
@@ -346,12 +358,11 @@
break
def schedule(self):
+ self.guard_early_exit = -1
self.clear_newoperations()
scheduler = Scheduler(self.dependency_graph, VecScheduleData())
- #dprint("scheduling loop. scheduleable are: " + str(scheduler.schedulable_nodes))
while scheduler.has_more():
candidate = scheduler.next()
- #dprint(" candidate", candidate, "has pack?", candidate.pack != None, "pack", candidate.pack)
if candidate.pack:
pack = candidate.pack
if scheduler.schedulable(pack.operations):
@@ -362,8 +373,6 @@
else:
scheduler.schedule_later(0)
else:
- if candidate.getopnum() == rop.GUARD_EARLY_EXIT:
- pass
position = len(self._newoperations)
self.emit_operation(candidate.getoperation())
scheduler.schedule(0, position)
@@ -372,69 +381,90 @@
for node in self.dependency_graph.nodes:
assert node.emitted
self.loop.operations = self._newoperations[:]
+ self.clear_newoperations()
- def relax_index_guards(self):
- label_idx = 0
- early_exit_idx = 1
- label = self.dependency_graph.getnode(label_idx)
- ee_guard = self.dependency_graph.getnode(early_exit_idx)
- if not ee_guard.is_guard_early_exit():
- return # cannot relax
+ def analyse_index_calculations(self):
+ if len(self.loop.operations) <= 1 or self.early_exit_idx == -1:
+ return
- #self.early_exit = ee_guard
+ self.dependency_graph = dependencies = DependencyGraph(self.loop.operations)
- for guard_node in self.dependency_graph.guards:
- if guard_node == ee_guard:
- continue
- if guard_node.getopnum() not in (rop.GUARD_TRUE,rop.GUARD_FALSE):
+ label_node = dependencies.getnode(0)
+ ee_guard_node = dependencies.getnode(self.early_exit_idx)
+ guards = dependencies.guards
+ fail_args = []
+ for guard_node in guards:
+ if guard_node is ee_guard_node:
continue
del_deps = []
pullup = []
- iterb = guard_node.iterate_paths(ee_guard, True)
last_prev_node = None
- for path in iterb:
+ for path in guard_node.iterate_paths(ee_guard_node, True):
prev_node = path.second()
- if fail_args_break_dependency(guard_node, prev_node, ee_guard):
+ if fail_args_break_dependency(guard_node, prev_node, ee_guard_node):
if prev_node == last_prev_node:
continue
- dprint("relax) ", prev_node, "=>", guard_node)
- del_deps.append((prev_node,guard_node))
+ del_deps.append((prev_node, guard_node))
else:
- pullup.append(path)
+ if path.has_no_side_effects(exclude_first=True, exclude_last=True):
+ #index_guards[guard.getindex()] = IndexGuard(guard, path.path[:])
+ pullup.append(path.last_but_one())
last_prev_node = prev_node
for a,b in del_deps:
a.remove_edge_to(b)
- for candidate in pullup:
- lbo = candidate.last_but_one()
- if candidate.has_no_side_effects(exclude_first=True, exclude_last=True):
- ee_guard.remove_edge_to(lbo)
- label.edge_to(lbo, label='pullup')
- guard_node.edge_to(ee_guard, label='pullup')
- label.remove_edge_to(ee_guard)
-
- guard_node.relax_guard_to(self.future_condition)
+ for lbo in pullup:
+ if lbo is ee_guard_node:
+ continue
+ ee_guard_node.remove_edge_to(lbo)
+ label_node.edge_to(lbo, label='pullup')
+ # only the last guard needs a connection
+ guard_node.edge_to(ee_guard_node, label='pullup-last-guard')
+ guard_node.relax_guard_to(ee_guard_node)
def collapse_index_guards(self):
- pass
- #final_ops = []
- #last_guard = None
- #is_after_relax = False
- #for op in self._newoperations:
- # if op.getopnum() == rop.GUARD_EARLY_EXIT:
- # assert last_guard is not None
- # final_ops.append(last_guard)
- # is_after_relax = True
- # continue
- # if not is_after_relax:
- # if op.is_guard():
- # last_guard = op
- # else:
- # final_ops.append(op)
- # else:
- # final_ops.append(op)
- #assert is_after_relax
- #return final_ops
+ strongest_guards = {}
+ strongest_guards_var = {}
+ index_vars = self.dependency_graph.index_vars
+ operations = self.loop.operations
+ var_for_guard = {}
+ for i in range(len(operations)-1, -1, -1):
+ op = operations[i]
+ if op.is_guard():
+ for arg in op.getarglist():
+ var_for_guard[arg] = True
+ try:
+ comparison = index_vars[arg]
+ for index_var in comparison.getindex_vars():
+ var = index_var.getvariable()
+ strongest_known = strongest_guards_var.get(var, None)
+ if not strongest_known:
+ strongest_guards_var[var] = index_var
+ continue
+ if index_var.less(strongest_known):
+ strongest_guards_var[var] = strongest_known
+ strongest_guards[op] = strongest_known
+ except KeyError:
+ pass
+ last_op_idx = len(operations)-1
+ for op in operations:
+ if op.is_guard():
+ stronger_guard = strongest_guards.get(op, None)
+ if stronger_guard:
+ # there is a stronger guard
+ continue
+ else:
+ self.emit_operation(op)
+ continue
+ if op.is_always_pure() and op.result:
+ try:
+ var_index = index_vars[op.result]
+ var_index.adapt_operation(op)
+ except KeyError:
+ pass
+ self.emit_operation(op)
+
+ self.loop.operations = self._newoperations[:]
def must_unpack_result_to_exec(op, target_op):
# TODO either move to resop or util
@@ -445,7 +475,6 @@
def prohibit_packing(op1, op2):
if op1.is_array_op():
if op1.getarg(1) == op2.result:
- dprint("prohibit)", op1, op2)
return True
return False
More information about the pypy-commit
mailing list