[pypy-commit] pypy vecopt2: extend packset now checks both sides of the pack

plan_rich noreply at buildbot.pypy.org
Tue May 5 09:46:06 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77114:2c2953e1a8d5
Date: 2015-04-13 16:14 +0200
http://bitbucket.org/pypy/pypy/changeset/2c2953e1a8d5/

Log:	extend packset now checks both sides of the pack

diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -70,6 +70,7 @@
         self.adjacent_list_back = []
         self.memory_ref = None
         self.pack = None
+        self.emitted = False
 
     def getoperation(self):
         return self.op
@@ -84,6 +85,17 @@
     def getopname(self):
         return self.op.getopname()
 
+    def relax_guard_to(self, guard):
+        """ Relaxes a guard operation to an earlier guard. """
+        assert self.op.is_guard()
+        assert guard.op.is_guard()
+
+        my_op = self.getoperation()
+        op = guard.getoperation()
+        my_op.setdescr(op.getdescr())
+        my_op.setfailargs(op.getfailargs())
+        my_op.rd_snapshot = op.rd_snapshot
+
     def edge_to(self, to, arg=None, label=None):
         assert self != to
         dep = self.depends_on(to)
@@ -165,7 +177,7 @@
         return self.adjacent_list_back
 
     def dependencies(self):
-        return self.adjacent_list[:] + self.adjacent_list_back[:]
+        return self.adjacent_list[:] + self.adjacent_list_back[:] # COPY
 
     def is_after(self, other):
         return self.opidx > other.opidx
@@ -441,7 +453,6 @@
                 # points to jump_op. this forces the jump/finish op to be the last operation
                 if node.provides_count() == 0:
                     node.edge_to(jump_node, None, label='jump')
-        print "\n\neee", self.schedulable_nodes
 
     def _build_guard_dependencies(self, guard_node, guard_opnum, tracker):
         if guard_opnum >= rop.GUARD_NOT_INVALIDATED:
@@ -600,12 +611,14 @@
     def schedulable(self, indices):
         for index in indices:
             if index not in self.schedulable_nodes:
+                print "pack", index, "not sched"
                 break
         else:
             return True
         return False
 
     def schedule_later(self, index):
+        assert len(self.schedulable_nodes) != 1, "not possible! " + str(self.schedulable_nodes[0].getoperation())
         node = self.schedulable_nodes[index]
         del self.schedulable_nodes[index]
         self.schedulable_nodes.append(node)
@@ -620,15 +633,17 @@
 
     def schedule(self, index):
         node = self.schedulable_nodes[index]
+        assert not node.emitted
         del self.schedulable_nodes[index]
         to_del = []
         print "  schedule", node.getoperation()
-        for dep in node.provides()[:]:
+        for dep in node.provides()[:]: # COPY
             node.remove_edge_to(dep.to)
             print "    >=X=>", node, dep.to, "count",dep.to.depends_count()
             if dep.to.depends_count() == 0:
                 self.schedulable_nodes.append(dep.to)
         node.clear_dependencies()
+        node.emitted = True
 
 class IntegralForwardModification(object):
     """ Calculates integral modifications on an integer box. """
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -105,7 +105,10 @@
         for i,op in enumerate(loop.operations):
             print "[",i,"]",op,
             if op.is_guard():
-                print op.rd_snapshot.boxes
+                if op.rd_snapshot:
+                    print op.rd_snapshot.boxes
+                else:
+                    print op.getfailargs()
             else:
                 print ""
 
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -683,6 +683,7 @@
     def test_packset_extend_load_modify_store(self):
         ops = """
         [p0,i0]
+        guard_no_early_exit() []
         i1 = int_add(i0, 1)
         i2 = int_le(i1, 16)
         guard_true(i2) [p0, i0]
@@ -694,12 +695,12 @@
         loop = self.parse_loop(ops)
         vopt = self.extend_packset(loop,1)
         assert len(vopt.dependency_graph.memory_refs) == 4
-        self.assert_independent(4,10)
         self.assert_independent(5,11)
         self.assert_independent(6,12)
+        self.assert_independent(7,13)
         assert len(vopt.packset.packs) == 3
         self.assert_packset_empty(vopt.packset, len(loop.operations),
-                                  [(5,11), (4,10), (6,12)])
+                                  [(6,12), (5,11), (7,13)])
 
     @pytest.mark.parametrize("descr", ['char','float','int','singlefloat'])
     def test_packset_combine_simple(self,descr):
@@ -810,9 +811,6 @@
         loop = self.parse_loop(ops)
         vopt = self.combine_packset(loop,3)
         assert len(vopt.dependency_graph.memory_refs) == 12
-        if len(vopt.packset.packs) != 4:
-            for pack in vopt.packset.packs:
-                print vopt.packset.packs
         assert len(vopt.packset.packs) == 4
 
         for opindices in [(4,11,18,25),(5,12,19,26),
@@ -836,6 +834,7 @@
     def test_schedule_vector_operation(self, op, descr, stride):
         ops = """
         [p0,p1,p2,i0] # 0
+        guard_no_early_exit() []
         i10 = int_le(i0, 128)  # 1, 8, 15, 22
         guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23
         i2 = getarrayitem_gc(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24
@@ -848,13 +847,14 @@
         vops = """
         [p0,p1,p2,i0]
         i10 = int_le(i0, 128)
-        guard_true(i10) [p0,p1,p2,i0]
+        guard_true(i10) []
         i1 = int_add(i0, {stride})
         i11 = int_le(i1, 128)
-        guard_true(i11) [p0,p1,p2,i0]
+        guard_true(i11) []
+        i12 = int_add(i1, {stride})
+        guard_no_early_exit() []
         v1 = vec_raw_load(p0, i0, 2, descr={descr}arraydescr)
         v2 = vec_raw_load(p1, i0, 2, descr={descr}arraydescr)
-        i12 = int_add(i1, {stride})
         v3 = {op}(v1,v2)
         vec_raw_store(p2, i0, v3, 2, descr={descr}arraydescr)
         jump(p0,p1,p2,i12)
@@ -884,8 +884,10 @@
 
 
     def test_vectorize_raw_load_mul_index(self):
+        pytest.skip("")
         ops = """
         [i0, i1, i2, i3, i4, i5, i6, i7]
+        guard_no_early_exit() []
         i9 = int_mul(i0, 8)
         i10 = raw_load(i3, i9, descr=intarraydescr)
         i11 = int_mul(i0, 8)
@@ -901,11 +903,10 @@
         """
         vopt = self.schedule(self.parse_loop(ops),1)
 
-    def test_123(self):
+    def test_vschedule_trace_1(self):
         ops = """
         [i0, i1, i2, i3, i4]
         guard_no_early_exit() []
-        debug_merge_point(0, 0, '1')
         i6 = int_mul(i0, 8)
         i7 = raw_load(i2, i6, descr=intarraydescr)
         i8 = raw_load(i3, i6, descr=intarraydescr)
@@ -914,13 +915,30 @@
         i11 = int_add(i0, 1)
         i12 = int_lt(i11, i1)
         guard_true(i12) [i4, i3, i2, i1, i11]
-        debug_merge_point(0, 0, '2')
         jump(i11, i1, i2, i3, i4)
         """
+        opt="""
+        [i0, i1, i2, i3, i4]
+        i11 = int_add(i0, 1) 
+        i12 = int_lt(i11, i1) 
+        guard_true(i12) []
+        i14 = int_mul(i11, 8) 
+        i13 = int_add(i11, 1) 
+        i18 = int_lt(i13, i1) 
+        guard_true(i18) []
+        guard_no_early_exit() []
+        i6 = int_mul(i0, 8) 
+        v19 = vec_raw_load(i2, i6, 2, descr=intarraydescr) 
+        v20 = vec_raw_load(i3, i6, 2, descr=intarraydescr) 
+        v21 = vec_int_add(v19, v20) 
+        vec_raw_store(i4, i6, v21, 2, descr=intarraydescr) 
+        jump(i13, i1, i2, i3, i4)
+        """
         vopt = self.schedule(self.parse_loop(ops),1)
-        self.debug_print_operations(vopt.loop)
+        self.assert_equal(vopt.loop, self.parse_loop(opt))
 
-    def test_schedule_vectorized_trace_1(self):
+    def test_vschedule_trace_2(self):
+        pytest.skip()
         ops = """
         [i0, i1, i2, i3, i4, i5, i6, i7]
         guard_no_early_exit() []
@@ -935,8 +953,27 @@
         guard_future_condition() []
         jump(i12, i8, i9, i3, i4, i5, i10, i7)
         """
+        opt = """
+        [i0, i1, i2, i3, i4, i5, i6, i7]
+        i12 = int_add(i0, 8) 
+        i14 = int_mul(i7, 8) 
+        i20 = int_mul(i7, 8) 
+        i15 = int_lt(i12, i14) 
+        guard_true(i15) []
+        i16 = int_add(i12, 8) 
+        i21 = int_lt(i16, i20) 
+        guard_true(i21) []
+        guard_no_early_exit() []
+        v22 = vec_raw_load(i3, i0, 2, descr=intarraydescr) 
+        v23 = vec_raw_load(i4, i0, 2, descr=intarraydescr) 
+        v24 = vec_int_add(v22, v23) 
+        vec_raw_store(i5, i0, v24, 2, descr=intarraydescr) 
+        i17 = vec_unpack(v22, 0)
+        i18 = vec_unpack(v22, 1)
+        jump(i16, i17, i18, i3, i4, i5, i19, i7)
+        """
         vopt = self.schedule(self.parse_loop(ops),1)
-        self.debug_print_operations(vopt.loop)
+        self.assert_equal(vopt.loop, self.parse_loop(opt))
 
 class TestLLtype(BaseTestVectorize, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -33,18 +33,6 @@
         else:
             print ""
 
-def must_unpack_result_to_exec(op, target_op):
-    # TODO either move to resop or util
-    if op.getoperation().vector != -1:
-        return False
-    return True
-
-def prohibit_packing(op1, op2):
-    if op2.is_array_op():
-        if op2.getarg(1) == op1.result:
-            return True
-    return False
-
 def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations):
     opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, optimizations)
     try:
@@ -246,7 +234,7 @@
 
     def build_dependency_graph(self):
         self.dependency_graph = DependencyGraph(self.loop.operations)
-        self.relax_guard_dependencies()
+        self.relax_index_guards()
 
     def find_adjacent_memory_refs(self):
         """ the pre pass already builds a hash of memory references and the
@@ -341,14 +329,19 @@
                 i += 1
             if len_before == len(self.packset.packs):
                 break
+        if not we_are_translated():
+            print "packs:"
+            for pack in self.packset.packs:
+                print " P:", pack
 
     def schedule(self):
         self.clear_newoperations()
         scheduler = Scheduler(self.dependency_graph, VecScheduleData())
         print "scheduling loop"
+        i = 100
         while scheduler.has_more():
             candidate = scheduler.next()
-            print "  candidate", candidate
+            print "  candidate", candidate, "has pack?", candidate.pack != None, "pack", candidate.pack
             if candidate.pack:
                 pack = candidate.pack
                 if scheduler.schedulable(pack.operations):
@@ -360,12 +353,18 @@
             else:
                 self.emit_operation(candidate.getoperation())
                 scheduler.schedule(0)
+            i += 1
+            if i > 200:
+                assert False
 
         self.loop.operations = self._newoperations[:]
+        if not we_are_translated():
+            for node in self.dependency_graph.nodes:
+                assert node.emitted
 
-    def relax_guard_dependencies(self):
+    def relax_index_guards(self):
+        label_idx = 0
         early_exit_idx = 1
-        label_idx = 0
         label = self.dependency_graph.getnode(label_idx)
         ee_guard = self.dependency_graph.getnode(early_exit_idx)
         if not ee_guard.getopnum() == rop.GUARD_NO_EARLY_EXIT:
@@ -400,12 +399,27 @@
             guard_node.edge_to(ee_guard, label='pullup')
             label.remove_edge_to(ee_guard)
 
+            guard_node.relax_guard_to(ee_guard)
+
+def must_unpack_result_to_exec(op, target_op):
+    # TODO either move to resop or util
+    if op.getoperation().vector != -1:
+        return False
+    return True
+
+def prohibit_packing(op1, op2):
+    if op1.is_array_op():
+        if op1.getarg(1) == op2.result:
+            print "prohibit", op1, op2
+            return True
+    return False
+
 def fail_args_break_dependency(guard, prev_op, target_guard):
     failargs = set(guard.getoperation().getfailargs())
     new_failargs = set(target_guard.getoperation().getfailargs())
 
     op = prev_op.getoperation()
-    if not op.has_no_side_effect():
+    if not op.is_always_pure(): # TODO has_no_side_effect():
         return True
     if op.result is not None:
         arg = op.result
@@ -544,21 +558,27 @@
         """
         savings = -1
 
-        # without loss of generatlity: only check 'left' operation
         lpacknode = pack.left
-        if prohibit_packing(lnode.getoperation(), lpacknode.getoperation()):
+        if prohibit_packing(lpacknode.getoperation(), lnode.getoperation()):
+            return -1
+        rpacknode = pack.right
+        if prohibit_packing(rpacknode.getoperation(), rnode.getoperation()):
             return -1
 
         if not expand_forward:
             #print " backward savings", savings
-            if not must_unpack_result_to_exec(lpacknode, lnode):
+            if not must_unpack_result_to_exec(lpacknode, lnode) and \
+               not must_unpack_result_to_exec(rpacknode, rnode):
                 savings += 1
             #print " => backward savings", savings
         else:
             #print " forward savings", savings
-            if not must_unpack_result_to_exec(lpacknode, lnode):
+            if not must_unpack_result_to_exec(lpacknode, lnode) and \
+               not must_unpack_result_to_exec(rpacknode, rnode):
                 savings += 1
             #print " => forward savings", savings
+        if savings >= 0:
+            print "estimated " + str(savings) + " for lpack,lnode", lpacknode, lnode
 
         return savings
 
@@ -567,10 +587,14 @@
         is not iterated when calling this method. """
         pack_i = self.packs[i]
         pack_j = self.packs[j]
+        pack_i.clear()
+        pack_j.clear()
         operations = pack_i.operations
         for op in pack_j.operations[1:]:
             operations.append(op)
         self.packs[i] = Pack(operations)
+
+
         # instead of deleting an item in the center of pack array,
         # the last element is assigned to position j and
         # the last slot is freed. Order of packs doesn't matter
@@ -600,6 +624,10 @@
         for node in self.operations:
             node.pack = self
 
+    def clear(self):
+        for node in self.operations:
+            node.pack = None
+
     def rightmost_match_leftmost(self, other):
         assert isinstance(other, Pack)
         rightmost = self.operations[-1]


More information about the pypy-commit mailing list