[pypy-commit] pypy vecopt2: extend packset now checks both sides of the pack
plan_rich
noreply at buildbot.pypy.org
Tue May 5 09:46:06 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77114:2c2953e1a8d5
Date: 2015-04-13 16:14 +0200
http://bitbucket.org/pypy/pypy/changeset/2c2953e1a8d5/
Log: extend packset now checks both sides of the pack
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -70,6 +70,7 @@
self.adjacent_list_back = []
self.memory_ref = None
self.pack = None
+ self.emitted = False
def getoperation(self):
return self.op
@@ -84,6 +85,17 @@
def getopname(self):
return self.op.getopname()
+ def relax_guard_to(self, guard):
+ """ Relaxes a guard operation to an earlier guard. """
+ assert self.op.is_guard()
+ assert guard.op.is_guard()
+
+ my_op = self.getoperation()
+ op = guard.getoperation()
+ my_op.setdescr(op.getdescr())
+ my_op.setfailargs(op.getfailargs())
+ my_op.rd_snapshot = op.rd_snapshot
+
def edge_to(self, to, arg=None, label=None):
assert self != to
dep = self.depends_on(to)
@@ -165,7 +177,7 @@
return self.adjacent_list_back
def dependencies(self):
- return self.adjacent_list[:] + self.adjacent_list_back[:]
+ return self.adjacent_list[:] + self.adjacent_list_back[:] # COPY
def is_after(self, other):
return self.opidx > other.opidx
@@ -441,7 +453,6 @@
# points to jump_op. this forces the jump/finish op to be the last operation
if node.provides_count() == 0:
node.edge_to(jump_node, None, label='jump')
- print "\n\neee", self.schedulable_nodes
def _build_guard_dependencies(self, guard_node, guard_opnum, tracker):
if guard_opnum >= rop.GUARD_NOT_INVALIDATED:
@@ -600,12 +611,14 @@
def schedulable(self, indices):
for index in indices:
if index not in self.schedulable_nodes:
+ print "pack", index, "not sched"
break
else:
return True
return False
def schedule_later(self, index):
+ assert len(self.schedulable_nodes) != 1, "not possible! " + str(self.schedulable_nodes[0].getoperation())
node = self.schedulable_nodes[index]
del self.schedulable_nodes[index]
self.schedulable_nodes.append(node)
@@ -620,15 +633,17 @@
def schedule(self, index):
node = self.schedulable_nodes[index]
+ assert not node.emitted
del self.schedulable_nodes[index]
to_del = []
print " schedule", node.getoperation()
- for dep in node.provides()[:]:
+ for dep in node.provides()[:]: # COPY
node.remove_edge_to(dep.to)
print " >=X=>", node, dep.to, "count",dep.to.depends_count()
if dep.to.depends_count() == 0:
self.schedulable_nodes.append(dep.to)
node.clear_dependencies()
+ node.emitted = True
class IntegralForwardModification(object):
""" Calculates integral modifications on an integer box. """
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -105,7 +105,10 @@
for i,op in enumerate(loop.operations):
print "[",i,"]",op,
if op.is_guard():
- print op.rd_snapshot.boxes
+ if op.rd_snapshot:
+ print op.rd_snapshot.boxes
+ else:
+ print op.getfailargs()
else:
print ""
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -683,6 +683,7 @@
def test_packset_extend_load_modify_store(self):
ops = """
[p0,i0]
+ guard_no_early_exit() []
i1 = int_add(i0, 1)
i2 = int_le(i1, 16)
guard_true(i2) [p0, i0]
@@ -694,12 +695,12 @@
loop = self.parse_loop(ops)
vopt = self.extend_packset(loop,1)
assert len(vopt.dependency_graph.memory_refs) == 4
- self.assert_independent(4,10)
self.assert_independent(5,11)
self.assert_independent(6,12)
+ self.assert_independent(7,13)
assert len(vopt.packset.packs) == 3
self.assert_packset_empty(vopt.packset, len(loop.operations),
- [(5,11), (4,10), (6,12)])
+ [(6,12), (5,11), (7,13)])
@pytest.mark.parametrize("descr", ['char','float','int','singlefloat'])
def test_packset_combine_simple(self,descr):
@@ -810,9 +811,6 @@
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
assert len(vopt.dependency_graph.memory_refs) == 12
- if len(vopt.packset.packs) != 4:
- for pack in vopt.packset.packs:
- print vopt.packset.packs
assert len(vopt.packset.packs) == 4
for opindices in [(4,11,18,25),(5,12,19,26),
@@ -836,6 +834,7 @@
def test_schedule_vector_operation(self, op, descr, stride):
ops = """
[p0,p1,p2,i0] # 0
+ guard_no_early_exit() []
i10 = int_le(i0, 128) # 1, 8, 15, 22
guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23
i2 = getarrayitem_gc(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24
@@ -848,13 +847,14 @@
vops = """
[p0,p1,p2,i0]
i10 = int_le(i0, 128)
- guard_true(i10) [p0,p1,p2,i0]
+ guard_true(i10) []
i1 = int_add(i0, {stride})
i11 = int_le(i1, 128)
- guard_true(i11) [p0,p1,p2,i0]
+ guard_true(i11) []
+ i12 = int_add(i1, {stride})
+ guard_no_early_exit() []
v1 = vec_raw_load(p0, i0, 2, descr={descr}arraydescr)
v2 = vec_raw_load(p1, i0, 2, descr={descr}arraydescr)
- i12 = int_add(i1, {stride})
v3 = {op}(v1,v2)
vec_raw_store(p2, i0, v3, 2, descr={descr}arraydescr)
jump(p0,p1,p2,i12)
@@ -884,8 +884,10 @@
def test_vectorize_raw_load_mul_index(self):
+ pytest.skip("")
ops = """
[i0, i1, i2, i3, i4, i5, i6, i7]
+ guard_no_early_exit() []
i9 = int_mul(i0, 8)
i10 = raw_load(i3, i9, descr=intarraydescr)
i11 = int_mul(i0, 8)
@@ -901,11 +903,10 @@
"""
vopt = self.schedule(self.parse_loop(ops),1)
- def test_123(self):
+ def test_vschedule_trace_1(self):
ops = """
[i0, i1, i2, i3, i4]
guard_no_early_exit() []
- debug_merge_point(0, 0, '1')
i6 = int_mul(i0, 8)
i7 = raw_load(i2, i6, descr=intarraydescr)
i8 = raw_load(i3, i6, descr=intarraydescr)
@@ -914,13 +915,30 @@
i11 = int_add(i0, 1)
i12 = int_lt(i11, i1)
guard_true(i12) [i4, i3, i2, i1, i11]
- debug_merge_point(0, 0, '2')
jump(i11, i1, i2, i3, i4)
"""
+ opt="""
+ [i0, i1, i2, i3, i4]
+ i11 = int_add(i0, 1)
+ i12 = int_lt(i11, i1)
+ guard_true(i12) []
+ i14 = int_mul(i11, 8)
+ i13 = int_add(i11, 1)
+ i18 = int_lt(i13, i1)
+ guard_true(i18) []
+ guard_no_early_exit() []
+ i6 = int_mul(i0, 8)
+ v19 = vec_raw_load(i2, i6, 2, descr=intarraydescr)
+ v20 = vec_raw_load(i3, i6, 2, descr=intarraydescr)
+ v21 = vec_int_add(v19, v20)
+ vec_raw_store(i4, i6, v21, 2, descr=intarraydescr)
+ jump(i13, i1, i2, i3, i4)
+ """
vopt = self.schedule(self.parse_loop(ops),1)
- self.debug_print_operations(vopt.loop)
+ self.assert_equal(vopt.loop, self.parse_loop(opt))
- def test_schedule_vectorized_trace_1(self):
+ def test_vschedule_trace_2(self):
+ pytest.skip()
ops = """
[i0, i1, i2, i3, i4, i5, i6, i7]
guard_no_early_exit() []
@@ -935,8 +953,27 @@
guard_future_condition() []
jump(i12, i8, i9, i3, i4, i5, i10, i7)
"""
+ opt = """
+ [i0, i1, i2, i3, i4, i5, i6, i7]
+ i12 = int_add(i0, 8)
+ i14 = int_mul(i7, 8)
+ i20 = int_mul(i7, 8)
+ i15 = int_lt(i12, i14)
+ guard_true(i15) []
+ i16 = int_add(i12, 8)
+ i21 = int_lt(i16, i20)
+ guard_true(i21) []
+ guard_no_early_exit() []
+ v22 = vec_raw_load(i3, i0, 2, descr=intarraydescr)
+ v23 = vec_raw_load(i4, i0, 2, descr=intarraydescr)
+ v24 = vec_int_add(v22, v23)
+ vec_raw_store(i5, i0, v24, 2, descr=intarraydescr)
+ i17 = vec_unpack(v22, 0)
+ i18 = vec_unpack(v22, 1)
+ jump(i16, i17, i18, i3, i4, i5, i19, i7)
+ """
vopt = self.schedule(self.parse_loop(ops),1)
- self.debug_print_operations(vopt.loop)
+ self.assert_equal(vopt.loop, self.parse_loop(opt))
class TestLLtype(BaseTestVectorize, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -33,18 +33,6 @@
else:
print ""
-def must_unpack_result_to_exec(op, target_op):
- # TODO either move to resop or util
- if op.getoperation().vector != -1:
- return False
- return True
-
-def prohibit_packing(op1, op2):
- if op2.is_array_op():
- if op2.getarg(1) == op1.result:
- return True
- return False
-
def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations):
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, optimizations)
try:
@@ -246,7 +234,7 @@
def build_dependency_graph(self):
self.dependency_graph = DependencyGraph(self.loop.operations)
- self.relax_guard_dependencies()
+ self.relax_index_guards()
def find_adjacent_memory_refs(self):
""" the pre pass already builds a hash of memory references and the
@@ -341,14 +329,19 @@
i += 1
if len_before == len(self.packset.packs):
break
+ if not we_are_translated():
+ print "packs:"
+ for pack in self.packset.packs:
+ print " P:", pack
def schedule(self):
self.clear_newoperations()
scheduler = Scheduler(self.dependency_graph, VecScheduleData())
print "scheduling loop"
+ i = 100
while scheduler.has_more():
candidate = scheduler.next()
- print " candidate", candidate
+ print " candidate", candidate, "has pack?", candidate.pack != None, "pack", candidate.pack
if candidate.pack:
pack = candidate.pack
if scheduler.schedulable(pack.operations):
@@ -360,12 +353,18 @@
else:
self.emit_operation(candidate.getoperation())
scheduler.schedule(0)
+ i += 1
+ if i > 200:
+ assert False
self.loop.operations = self._newoperations[:]
+ if not we_are_translated():
+ for node in self.dependency_graph.nodes:
+ assert node.emitted
- def relax_guard_dependencies(self):
+ def relax_index_guards(self):
+ label_idx = 0
early_exit_idx = 1
- label_idx = 0
label = self.dependency_graph.getnode(label_idx)
ee_guard = self.dependency_graph.getnode(early_exit_idx)
if not ee_guard.getopnum() == rop.GUARD_NO_EARLY_EXIT:
@@ -400,12 +399,27 @@
guard_node.edge_to(ee_guard, label='pullup')
label.remove_edge_to(ee_guard)
+ guard_node.relax_guard_to(ee_guard)
+
+def must_unpack_result_to_exec(op, target_op):
+ # TODO either move to resop or util
+ if op.getoperation().vector != -1:
+ return False
+ return True
+
+def prohibit_packing(op1, op2):
+ if op1.is_array_op():
+ if op1.getarg(1) == op2.result:
+ print "prohibit", op1, op2
+ return True
+ return False
+
def fail_args_break_dependency(guard, prev_op, target_guard):
failargs = set(guard.getoperation().getfailargs())
new_failargs = set(target_guard.getoperation().getfailargs())
op = prev_op.getoperation()
- if not op.has_no_side_effect():
+ if not op.is_always_pure(): # TODO has_no_side_effect():
return True
if op.result is not None:
arg = op.result
@@ -544,21 +558,27 @@
"""
savings = -1
- # without loss of generatlity: only check 'left' operation
lpacknode = pack.left
- if prohibit_packing(lnode.getoperation(), lpacknode.getoperation()):
+ if prohibit_packing(lpacknode.getoperation(), lnode.getoperation()):
+ return -1
+ rpacknode = pack.right
+ if prohibit_packing(rpacknode.getoperation(), rnode.getoperation()):
return -1
if not expand_forward:
#print " backward savings", savings
- if not must_unpack_result_to_exec(lpacknode, lnode):
+ if not must_unpack_result_to_exec(lpacknode, lnode) and \
+ not must_unpack_result_to_exec(rpacknode, rnode):
savings += 1
#print " => backward savings", savings
else:
#print " forward savings", savings
- if not must_unpack_result_to_exec(lpacknode, lnode):
+ if not must_unpack_result_to_exec(lpacknode, lnode) and \
+ not must_unpack_result_to_exec(rpacknode, rnode):
savings += 1
#print " => forward savings", savings
+ if savings >= 0:
+ print "estimated " + str(savings) + " for lpack,lnode", lpacknode, lnode
return savings
@@ -567,10 +587,14 @@
is not iterated when calling this method. """
pack_i = self.packs[i]
pack_j = self.packs[j]
+ pack_i.clear()
+ pack_j.clear()
operations = pack_i.operations
for op in pack_j.operations[1:]:
operations.append(op)
self.packs[i] = Pack(operations)
+
+
# instead of deleting an item in the center of pack array,
# the last element is assigned to position j and
# the last slot is freed. Order of packs doesn't matter
@@ -600,6 +624,10 @@
for node in self.operations:
node.pack = self
+ def clear(self):
+ for node in self.operations:
+ node.pack = None
+
def rightmost_match_leftmost(self, other):
assert isinstance(other, Pack)
rightmost = self.operations[-1]
More information about the pypy-commit
mailing list