[pypy-commit] pypy vecopt2: relaxing guards dependency works for the first simple case
plan_rich
noreply at buildbot.pypy.org
Tue May 5 09:46:05 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77113:e92fd08ff586
Date: 2015-04-13 11:52 +0200
http://bitbucket.org/pypy/pypy/changeset/e92fd08ff586/
Log: relaxing guards dependency works for the first simple case
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -32,8 +32,32 @@
def __init__(self,path):
self.path = path
- def walk(self, idx):
- self.path.append(idx)
+ def second(self):
+ if len(self.path) <= 1:
+ return None
+ return self.path[1]
+
+ def last_but_one(self):
+ if len(self.path) < 2:
+ return None
+ return self.path[len(self.path)-2]
+
+ def has_no_side_effects(self, exclude_first=False, exclude_last=False):
+ last = len(self.path)-1
+ count = len(self.path)
+ i = 0
+ if exclude_first:
+ i += 1
+ if exclude_last:
+ count -= 1
+ while i < count:
+ if not self.path[i].op.has_no_side_effect():
+ return False
+ i += 1
+ return True
+
+ def walk(self, node):
+ self.path.append(node)
def clone(self):
return Path(self.path[:])
@@ -60,7 +84,7 @@
def getopname(self):
return self.op.getopname()
- def edge_to(self, to, arg, label=None):
+ def edge_to(self, to, arg=None, label=None):
assert self != to
dep = self.depends_on(to)
if not dep:
@@ -91,8 +115,8 @@
return None
def clear_dependencies(self):
- self.adjacent_list.clear()
- self.adjacent_list_back.clear()
+ self.adjacent_list = []
+ self.adjacent_list_back = []
def is_guard_early_exit(self):
return self.op.getopnum() == rop.GUARD_NO_EARLY_EXIT
@@ -134,7 +158,7 @@
def provides(self):
return self.adjacent_list
- def depends_count(self, idx):
+ def depends_count(self):
return len(self.adjacent_list_back)
def depends(self):
@@ -178,24 +202,40 @@
worklist.append(dep.to)
return True
- def iterate_paths_backward(self, ai, bi):
- if ai == bi:
+ def iterate_paths(self, to, backwards=False):
+ """ yield all nodes from self leading to 'to' """
+ if self == to:
return
- if ai > bi:
- ai, bi = bi, ai
- worklist = [(Path([bi]),bi)]
+ worklist = [(Path([self]),self)]
while len(worklist) > 0:
- path,idx = worklist.pop()
- for dep in self.depends(idx):
- if ai > dep.idx_from or dep.points_backward():
- # this points above ai (thus unrelevant)
- continue
+ path,node = worklist.pop()
+ if backwards:
+ iterdir = node.depends()
+ else:
+ iterdir = node.provides()
+ for dep in iterdir:
cloned_path = path.clone()
- cloned_path.walk(dep.idx_from)
- if dep.idx_from == ai:
+ cloned_path.walk(dep.to)
+ if dep.to == to:
yield cloned_path
else:
- worklist.append((cloned_path,dep.idx_from))
+ worklist.append((cloned_path,dep.to))
+
+ def remove_edge_to(self, node):
+ i = 0
+ while i < len(self.adjacent_list):
+ dep = self.adjacent_list[i]
+ if dep.to == node:
+ del self.adjacent_list[i]
+ break
+ i += 1
+ i = 0
+ while i < len(node.adjacent_list_back):
+ dep = node.adjacent_list_back[i]
+ if dep.to == self:
+ del node.adjacent_list_back[i]
+ break
+ i += 1
def getedge_to(self, other):
for dep in self.adjacent_list:
@@ -203,18 +243,6 @@
return dep
return None
- def i_remove_dependency(self, idx_at, idx_to):
- at = self.nodes[idx_at]
- to = self.nodes[idx_to]
- self.remove_dependency(at, to)
- def remove_dependency(self, at, to):
- """ Removes a all dependencies that point to 'to' """
- self.adjacent_list[at] = \
- [d for d in self.adjacent_list[at] if d.to != to]
- self.adjacent_list[to] = \
- [d for d in self.adjacent_list[to] if d.at != at]
-
- return args
def __repr__(self):
return "Node(opidx: %d)"%self.opidx
@@ -222,6 +250,8 @@
return not self.__eq__(other)
def __eq__(self, other):
+ if other is None:
+ return False
assert isinstance(other, Node)
return self.opidx == other.opidx
@@ -347,7 +377,7 @@
def __init__(self, operations):
self.nodes = [ Node(op,i) for i,op in enumerate(operations) ]
self.memory_refs = {}
- self.schedulable_nodes = [0] # label is always scheduleable
+ self.schedulable_nodes = []
self.index_vars = {}
self.guards = []
self.build_dependencies()
@@ -365,13 +395,15 @@
"""
tracker = DefTracker(self)
#
+ label_pos = 0
+ jump_pos = len(self.nodes)-1
intformod = IntegralForwardModification(self.memory_refs, self.index_vars)
# pass 1
for i,node in enumerate(self.nodes):
op = node.op
# the label operation defines all operations at the
# beginning of the loop
- if op.getopnum() == rop.LABEL:
+ if op.getopnum() == rop.LABEL and i != jump_pos:
# TODO is it valid that a label occurs at the end of a trace?
ee_node = self.nodes[i+1]
if ee_node.is_guard_early_exit():
@@ -398,16 +430,18 @@
for guard_node in self.guards:
self._build_guard_dependencies(guard_node, op.getopnum(), tracker)
# pass 3 find schedulable nodes
- jump_pos = len(self.nodes)-1
jump_node = self.nodes[jump_pos]
+ label_node = self.nodes[label_pos]
+ self.schedulable_nodes.append(label_node)
for node in self.nodes:
- if node.dependency_count() == 0:
- self.schedulable_nodes.append(node.opidx)
- # every leaf instruction points to the jump_op. in theory every instruction
- # points to jump_op. this forces the jump/finish op to be the last operation
if node != jump_node:
+ if node.dependency_count() == 0:
+ self.schedulable_nodes.append(node)
+ # every leaf instruction points to the jump_op. in theory every instruction
+ # points to jump_op. this forces the jump/finish op to be the last operation
if node.provides_count() == 0:
node.edge_to(jump_node, None, label='jump')
+ print "\n\neee", self.schedulable_nodes
def _build_guard_dependencies(self, guard_node, guard_opnum, tracker):
if guard_opnum >= rop.GUARD_NOT_INVALIDATED:
@@ -557,10 +591,10 @@
self.schedulable_nodes = self.graph.schedulable_nodes
self.sched_data = sched_data
- def has_more_to_schedule(self):
+ def has_more(self):
return len(self.schedulable_nodes) > 0
- def next_schedule_index(self):
+ def next(self):
return self.schedulable_nodes[0]
def schedulable(self, indices):
@@ -578,37 +612,24 @@
def schedule_all(self, opindices):
while len(opindices) > 0:
- print "sched"
opidx = opindices.pop()
for i,node in enumerate(self.schedulable_nodes):
if node == opidx:
+ self.schedule(i)
break
- else:
- i = -1
- if i != -1:
- self.schedule(i)
def schedule(self, index):
node = self.schedulable_nodes[index]
del self.schedulable_nodes[index]
to_del = []
- adj_list = self.graph.adjacent_list[node]
- for dep in adj_list:
- self.graph.remove_dependency_by_index(node, dep.idx_to)
- self.graph.remove_dependency_by_index(dep.idx_to, node)
- print "remove", node, "<=>", dep.idx_to
- if self.is_schedulable(dep.idx_to):
- print "sched", dep.idx_to
- self.schedulable_nodes.append(dep.idx_to)
- #
- # TODO for dep in self.graph.provides(node):
- # candidate = dep.idx_to
+ print " schedule", node.getoperation()
+ for dep in node.provides()[:]:
+ node.remove_edge_to(dep.to)
+ print " >=X=>", node, dep.to, "count",dep.to.depends_count()
+ if dep.to.depends_count() == 0:
+ self.schedulable_nodes.append(dep.to)
node.clear_dependencies()
- def is_schedulable(self, idx):
- print "is sched", idx, "count:", self.graph.depends_count(idx), self.graph.adjacent_list[idx]
- return self.graph.depends_count(idx) == 0
-
class IntegralForwardModification(object):
""" Calculates integral modifications on an integer box. """
def __init__(self, memory_refs, index_vars):
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -121,7 +121,6 @@
node = self.last_graph.getnode(idx)
return self.last_graph.memory_refs[node]
-
class BaseTestDependencyGraph(DependencyBaseTest):
def test_dependency_empty(self):
ops = """
@@ -298,22 +297,10 @@
self.assert_dependent(1,2)
self.assert_dependent(0,3)
- def test_setarrayitem_depend_with_no_memref_info(self):
- ops="""
- [p0, i1] # 0: 1,2,3?,4?
- setarrayitem_raw(p0, i1, 1, descr=floatarraydescr) # 1: 4?
- i2 = int_add(i1,1) # 2: 3
- setarrayitem_raw(p0, i2, 2, descr=floatarraydescr) # 3: 4
- jump(p0, i1) # 4:
- """
- self.assert_dependencies(ops, full_check=True)
- self.assert_independent(1,2)
- self.assert_independent(1,3)
-
def test_setarrayitem_dont_depend_with_memref_info(self):
ops="""
[p0, i1] # 0: 1,2,3?,4?
- setarrayitem_raw(p0, i1, 1, descr=chararraydescr) # 1: 3?,4?
+ setarrayitem_raw(p0, i1, 1, descr=chararraydescr) # 1: 4
i2 = int_add(i1,1) # 2: 3
setarrayitem_raw(p0, i2, 2, descr=chararraydescr) # 3: 4
jump(p0, i1) # 4:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -809,7 +809,6 @@
""".format(op=op,descr=descr,stride=stride)
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
- self.debug_print_operations(loop)
assert len(vopt.dependency_graph.memory_refs) == 12
if len(vopt.packset.packs) != 4:
for pack in vopt.packset.packs:
@@ -905,6 +904,7 @@
def test_123(self):
ops = """
[i0, i1, i2, i3, i4]
+ guard_no_early_exit() []
debug_merge_point(0, 0, '1')
i6 = int_mul(i0, 8)
i7 = raw_load(i2, i6, descr=intarraydescr)
@@ -915,9 +915,10 @@
i12 = int_lt(i11, i1)
guard_true(i12) [i4, i3, i2, i1, i11]
debug_merge_point(0, 0, '2')
- label(i11, i1, i2, i3, i4)
+ jump(i11, i1, i2, i3, i4)
"""
vopt = self.schedule(self.parse_loop(ops),1)
+ self.debug_print_operations(vopt.loop)
def test_schedule_vectorized_trace_1(self):
ops = """
@@ -937,6 +938,5 @@
vopt = self.schedule(self.parse_loop(ops),1)
self.debug_print_operations(vopt.loop)
-
class TestLLtype(BaseTestVectorize, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -189,8 +189,6 @@
copied_op.setfailargs(args)
#
self.emit_unrolled_operation(copied_op)
- #self.vec_info.index = len(self._newoperations)-1
- #self.vec_info.inspect_operation(copied_op)
# the jump arguments have been changed
# if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop
@@ -248,7 +246,7 @@
def build_dependency_graph(self):
self.dependency_graph = DependencyGraph(self.loop.operations)
- #self.relax_guard_dependencies()
+ self.relax_guard_dependencies()
def find_adjacent_memory_refs(self):
""" the pre pass already builds a hash of memory references and the
@@ -279,7 +277,6 @@
self.packset.add_pair(node_a, node_b)
def extend_packset(self):
- print "extend_packset"
pack_count = self.packset.pack_count()
while True:
for pack in self.packset.packs:
@@ -348,68 +345,67 @@
def schedule(self):
self.clear_newoperations()
scheduler = Scheduler(self.dependency_graph, VecScheduleData())
- while scheduler.has_more_to_schedule():
- candidate = scheduler.next_to_schedule()
- pack = self.packset.pack_for_operation(candidate)
- if pack:
- self._schedule_pack(scheduler, pack)
+ print "scheduling loop"
+ while scheduler.has_more():
+ candidate = scheduler.next()
+ print " candidate", candidate
+ if candidate.pack:
+ pack = candidate.pack
+ if scheduler.schedulable(pack.operations):
+ vop = scheduler.sched_data.as_vector_operation(pack)
+ self.emit_operation(vop)
+ scheduler.schedule_all(pack.operations)
+ else:
+ scheduler.schedule_later(0)
else:
self.emit_operation(candidate.getoperation())
scheduler.schedule(0)
self.loop.operations = self._newoperations[:]
- def _schedule_pack(self, scheduler, pack):
- opindices = [ e.opidx for e in pack.operations ]
- if scheduler.schedulable(opindices):
- vop = scheduler.sched_data \
- .as_vector_operation(pack, self.loop.operations)
- self.emit_operation(vop)
- scheduler.schedule_all(opindices)
- else:
- scheduler.schedule_later(0)
-
def relax_guard_dependencies(self):
early_exit_idx = 1
- operations = self.loop.operations
- assert operations[early_exit_idx].getopnum() == \
- rop.GUARD_NO_EARLY_EXIT
- target_guard = operations[early_exit_idx]
- for guard_idx in self.dependency_graph.guards:
- if guard_idx == early_exit_idx:
+ label_idx = 0
+ label = self.dependency_graph.getnode(label_idx)
+ ee_guard = self.dependency_graph.getnode(early_exit_idx)
+ if not ee_guard.getopnum() == rop.GUARD_NO_EARLY_EXIT:
+ return # cannot relax
+
+ for guard_node in self.dependency_graph.guards:
+ if guard_node == ee_guard:
continue
- guard = operations[guard_idx]
- if guard.getopnum() not in (rop.GUARD_TRUE,rop.GUARD_FALSE):
+ if guard_node.getopnum() not in (rop.GUARD_TRUE,rop.GUARD_FALSE):
continue
- self.dependency_graph.edge(early_exit_idx, guard_idx, early_exit_idx, label='EE')
- print "put", guard_idx, "=>", early_exit_idx
del_deps = []
- for path in self.dependency_graph.iterate_paths_backward(guard_idx, early_exit_idx):
- op_idx = path.path[1]
- print "path", path.path
- op = operations[op_idx]
- if fail_args_break_dependency(guard, guard_idx, target_guard, early_exit_idx, op, op_idx):
- print " +>+>==> break", op_idx, "=>", guard_idx
- del_deps.append(op_idx)
- for dep_idx in del_deps:
- self.dependency_graph.remove_dependency_by_index(dep_idx, guard_idx)
+ pullup = []
+ iterb = guard_node.iterate_paths(ee_guard, True)
+ last_prev_node = None
+ for path in iterb:
+ prev_node = path.second()
+ if fail_args_break_dependency(guard_node, prev_node, ee_guard):
+ if prev_node == last_prev_node:
+ continue
+ print ">=XXX=> ", prev_node, "=>", guard_node
+ del_deps.append((prev_node,guard_node))
+ else:
+ pullup.append(path)
+ last_prev_node = prev_node
+ for a,b in del_deps:
+ a.remove_edge_to(b)
+ for candidate in pullup:
+ lbo = candidate.last_but_one()
+ if candidate.has_no_side_effects(exclude_first=True, exclude_last=True):
+ ee_guard.remove_edge_to(lbo)
+ label.edge_to(lbo, label='pullup')
+ guard_node.edge_to(ee_guard, label='pullup')
+ label.remove_edge_to(ee_guard)
- del_deps = []
- for dep in self.dependency_graph.provides(early_exit_idx):
- del_deps.append(dep.idx_to)
- for dep_idx in del_deps:
- self.dependency_graph.remove_dependency_by_index(1, dep_idx)
- self.dependency_graph.edge(dep_idx, 0, dep_idx)
- last_idx = len(operations) - 1
- self.dependency_graph.remove_dependency_by_index(0,1)
- self.dependency_graph.edge(last_idx, early_exit_idx, last_idx)
+def fail_args_break_dependency(guard, prev_op, target_guard):
+ failargs = set(guard.getoperation().getfailargs())
+ new_failargs = set(target_guard.getoperation().getfailargs())
-def fail_args_break_dependency(guard, guard_idx, target_guard, target_guard_idx, op, op_idx):
- failargs = set(guard.getfailargs())
- new_failargs = set(target_guard.getfailargs())
-
- print " args:", [op.result] + op.getarglist()[:], " &&& ", failargs, " !!! ", new_failargs
- if op.is_array_op():
+ op = prev_op.getoperation()
+ if not op.has_no_side_effect():
return True
if op.result is not None:
arg = op.result
@@ -420,28 +416,29 @@
if arg not in failargs or \
arg in failargs and arg in new_failargs:
return False
+ # THINK about: increased index in fail arg, but normal index on arglist
+ # this might be an indicator for edge removal
return True
class VecScheduleData(SchedulerData):
def __init__(self):
self.box_to_vbox = {}
- def as_vector_operation(self, pack, operations):
+ def as_vector_operation(self, pack):
assert len(pack.operations) > 1
self.pack = pack
- ops = [operations[w.opidx] for w in pack.operations]
- op0 = operations[pack.operations[0].opidx]
+ op0 = pack.operations[0].getoperation()
assert op0.vector != -1
args = op0.getarglist()[:]
if op0.vector in (rop.VEC_RAW_LOAD, rop.VEC_RAW_STORE):
args.append(ConstInt(0))
- vopt = ResOperation(op0.vector, args,
+ vop = ResOperation(op0.vector, args,
op0.result, op0.getdescr())
- self._inspect_operation(vopt,ops) # op0 is for dispatch only
+ self._inspect_operation(vop) # op0 is for dispatch only
#if op0.vector not in (rop.VEC_RAW_LOAD, rop.VEC_RAW_STORE):
# op_count = len(pack.operations)
# args.append(ConstInt(op_count))
- return vopt
+ return vop
def _pack_vector_arg(self, vop, op, i, vbox):
arg = op.getarg(i)
@@ -463,11 +460,13 @@
return vbox
bin_arith_trans = """
- def _vectorize_{name}(self, vop, ops):
+ def _vectorize_{name}(self, vop):
vbox_arg_0 = None
vbox_arg_1 = None
vbox_result = None
- for i, op in enumerate(ops):
+ ops = self.pack.operations
+ for i, node in enumerate(ops):
+ op = node.getoperation()
vbox_arg_0 = self._pack_vector_arg(vop, op, 0, vbox_arg_0)
vbox_arg_1 = self._pack_vector_arg(vop, op, 1, vbox_arg_1)
vbox_result= self._pack_vector_result(vop, op, vbox_result)
@@ -482,16 +481,20 @@
exec py.code.Source(bin_arith_trans.format(name='VEC_FLOAT_SUB')).compile()
del bin_arith_trans
- def _vectorize_VEC_RAW_LOAD(self, vop, ops):
+ def _vectorize_VEC_RAW_LOAD(self, vop):
vbox_result = None
- for i, op in enumerate(ops):
+ ops = self.pack.operations
+ for i, node in enumerate(ops):
+ op = node.getoperation()
vbox_result= self._pack_vector_result(vop, op, vbox_result)
vbox_result.item_count = len(ops)
vop.setarg(vop.numargs()-1,ConstInt(len(ops)))
- def _vectorize_VEC_RAW_STORE(self, vop, ops):
+ def _vectorize_VEC_RAW_STORE(self, vop):
vbox_arg_2 = None
- for i, op in enumerate(ops):
+ ops = self.pack.operations
+ for i, node in enumerate(ops):
+ op = node.getoperation()
vbox_arg_2 = self._pack_vector_arg(vop, op, 2, vbox_arg_2)
vbox_arg_2.item_count = len(ops)
vop.setarg(vop.numargs()-1,ConstInt(len(ops)))
@@ -499,28 +502,12 @@
VecScheduleData._inspect_operation = \
make_dispatcher_method(VecScheduleData, '_vectorize_')
-
def isomorphic(l_op, r_op):
- """ Described in the paper ``Instruction-Isomorphism in Program Execution''.
- I think this definition is to strict. TODO -> find another reference
- For now it must have the same instruction type, the array parameter must be equal,
- and it must be of the same type (both size in bytes and type of array).
+ """ Same instructions have the same operation name.
+ TODO what about parameters?
"""
if l_op.getopnum() == r_op.getopnum():
return True
- # the stronger counterpart. TODO which structural equivalence is
- # needed here?
- #if l_op.getopnum() == r_op.getopnum() and \
- # l_op.getarg(0) == r_op.getarg(0):
- # l_d = l_op.getdescr()
- # r_d = r_op.getdescr()
- # if l_d is not None and r_d is not None:
- # if l_d.get_item_size_in_bytes() == r_d.get_item_size_in_bytes():
- # if l_d.getflag() == r_d.getflag():
- # return True
- # elif l_d is None and r_d is None:
- # return True
- #return False
class PackSet(object):
@@ -536,7 +523,6 @@
return len(self.packs)
def add_pair(self, l, r):
- print "adds", l, r
self.packs.append(Pair(l,r))
def can_be_packed(self, lnode, rnode):
@@ -611,6 +597,8 @@
def __init__(self, ops):
self.operations = ops
self.savings = 0
+ for node in self.operations:
+ node.pack = self
def rightmost_match_leftmost(self, other):
assert isinstance(other, Pack)
More information about the pypy-commit
mailing list