[pypy-commit] pypy vecopt: costmodel impl extended

plan_rich noreply at buildbot.pypy.org
Mon Jun 1 12:56:05 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77740:7ce427746614
Date: 2015-06-01 12:56 +0200
http://bitbucket.org/pypy/pypy/changeset/7ce427746614/

Log:	costmodel impl extended added tests for cost model extracted tests
	into another file

diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -80,6 +80,7 @@
         self.adjacent_list_back = []
         self.memory_ref = None
         self.pack = None
+        self.pack_position = -1
         self.emitted = False
         self.schedule_position = -1
         self.priority = 0
@@ -962,12 +963,6 @@
             self.current_end.next_nonconst = idxvar
         self.current_end = idxvar
 
-    def is_adjacent_with_runtime_check(self, other, graph):
-        return self.next_nonconst is not None and \
-               self.next_nonconst is self.current_end and \
-               self.next_nonconst.opnum == rop.INT_ADD and \
-               self.next_nonconst.is_identity()
-
     def getvariable(self):
         return self.var
 
@@ -1086,15 +1081,6 @@
             return abs(self.index_var.diff(other.index_var)) - stride == 0
         return False
 
-    def is_adjacent_with_runtime_check(self, other, graph):
-        """there are many cases where the stride is variable
-           it is a priori not known if two unrolled memory accesses are
-           tightly packed"""
-        assert isinstance(other, MemoryRef)
-        if self.array == other.array and self.descr == other.descr:
-            return self.index_var.is_adjacent_with_runtime_check(other.index_var, graph)
-        return False
-
     def match(self, other):
         assert isinstance(other, MemoryRef)
         if self.array == other.array and self.descr == other.descr:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -0,0 +1,131 @@
+import py
+
+from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
+from rpython.jit.metainterp.optimizeopt.util import equaloplists
+from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
+        Pack, NotAProfitableLoop, VectorizingOptimizer)
+from rpython.jit.metainterp.optimizeopt.dependency import Node
+from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
+from rpython.jit.metainterp.optimizeopt.test.test_schedule import SchedulerBaseTest
+from rpython.jit.metainterp.optimizeopt.test.test_vectorize import (FakeMetaInterpStaticData,
+        FakeJitDriverStaticData)
+from rpython.jit.metainterp.resoperation import rop, ResOperation
+from rpython.jit.tool.oparser import parse as opparse
+from rpython.jit.tool.oparser_model import get_model
+
+class FakeMemoryRef(object):
+    def __init__(self, iv):
+        self.index_var = iv
+
+    def is_adjacent_to(self, other):
+        iv = self.index_var
+        ov = other.index_var
+        val = (int(str(ov.var)[1:]) - int(str(iv.var)[1:]))
+        print iv, ov, "adja?", val == 1
+        # i0 and i1 are adjacent
+        # i1 and i2 ...
+        # but not i0, i2
+        # ...
+        return val == 1
+
+class CostModelBaseTest(SchedulerBaseTest):
+    def savings(self, loop):
+        metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+        jitdriver_sd = FakeJitDriverStaticData()
+        opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, [])
+        opt.build_dependency_graph()
+        graph = opt.dependency_graph
+        for k,m in graph.memory_refs.items():
+            graph.memory_refs[k] = FakeMemoryRef(m.index_var)
+            print "memory ref", k, m
+        opt.find_adjacent_memory_refs()
+        opt.extend_packset()
+        opt.combine_packset()
+        for pack in opt.packset.packs:
+            print "apck:"
+            print '\n'.join([str(op.getoperation()) for op in pack.operations])
+            print
+        return opt.costmodel.calculate_savings(opt.packset)
+
+    def assert_operations_match(self, loop_a, loop_b):
+        assert equaloplists(loop_a.operations, loop_b.operations)
+
+    def test_load_2_unpack(self):
+        loop1 = self.parse("""
+        f10 = raw_load(p0, i0, descr=double)
+        f11 = raw_load(p0, i1, descr=double)
+        guard_true(i0) [f10]
+        guard_true(i1) [f11]
+        """)
+        # for double the costs are
+        # unpack index 1 savings: -2
+        # unpack index 0 savings: -1
+        savings = self.savings(loop1)
+        assert savings == -2
+
+    def test_load_4_unpack(self):
+        loop1 = self.parse("""
+        i10 = raw_load(p0, i0, descr=float)
+        i11 = raw_load(p0, i1, descr=float)
+        i12 = raw_load(p0, i2, descr=float)
+        i13 = raw_load(p0, i3, descr=float)
+        guard_true(i0) [i10]
+        guard_true(i1) [i11]
+        guard_true(i2) [i12]
+        guard_true(i3) [i13]
+        """)
+        savings = self.savings(loop1)
+        assert savings == -1
+
+    def test_load_2_unpack_1(self):
+        loop1 = self.parse("""
+        f10 = raw_load(p0, i0, descr=double)
+        f11 = raw_load(p0, i1, descr=double)
+        guard_true(i0) [f10]
+        """)
+        savings = self.savings(loop1)
+        assert savings == 0
+
+    def test_load_2_unpack_1_index1(self):
+        loop1 = self.parse("""
+        f10 = raw_load(p0, i0, descr=double)
+        f11 = raw_load(p0, i1, descr=double)
+        guard_true(i0) [f11]
+        """)
+        savings = self.savings(loop1)
+        assert savings == -1
+
+    def test_load_arith(self):
+        loop1 = self.parse("""
+        i10 = raw_load(p0, i0, descr=int)
+        i11 = raw_load(p0, i1, descr=int)
+        i12 = raw_load(p0, i2, descr=int)
+        i13 = raw_load(p0, i3, descr=int)
+        i15 = int_add(i10, 1)
+        i16 = int_add(i11, 1)
+        i17 = int_add(i12, 1)
+        i18 = int_add(i13, 1)
+        """)
+        savings = self.savings(loop1)
+        assert savings == 6
+
+    def test_load_arith_store(self):
+        loop1 = self.parse("""
+        i10 = raw_load(p0, i0, descr=int)
+        i11 = raw_load(p0, i1, descr=int)
+        i12 = raw_load(p0, i2, descr=int)
+        i13 = raw_load(p0, i3, descr=int)
+        i15 = int_add(i10, 1)
+        i16 = int_add(i11, 1)
+        i17 = int_add(i12, 1)
+        i18 = int_add(i13, 1)
+        raw_store(p1, i4, i15, descr=int)
+        raw_store(p1, i5, i16, descr=int)
+        raw_store(p1, i6, i17, descr=int)
+        raw_store(p1, i7, i18, descr=int)
+        """)
+        savings = self.savings(loop1)
+        assert savings == 6
+
+class Test(CostModelBaseTest, LLtypeMixin):
+    pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -1,31 +1,43 @@
 import py
 
+from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
 from rpython.jit.metainterp.optimizeopt.util import equaloplists
 from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
-        Pack, NotAProfitableLoop)
+        Pack, NotAProfitableLoop, VectorizingOptimizer)
 from rpython.jit.metainterp.optimizeopt.dependency import Node
 from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 from rpython.jit.metainterp.optimizeopt.test.test_dependency import DependencyBaseTest
+from rpython.jit.metainterp.optimizeopt.test.test_vectorize import (FakeMetaInterpStaticData,
+        FakeJitDriverStaticData)
+from rpython.jit.metainterp.resoperation import rop, ResOperation
 from rpython.jit.tool.oparser import parse as opparse
 from rpython.jit.tool.oparser_model import get_model
 
 class SchedulerBaseTest(DependencyBaseTest):
 
-    def parse(self, source):
+    def parse(self, source, inc_label_jump=True):
         ns = {
             'double': self.floatarraydescr,
             'float': self.singlefloatarraydescr,
             'long': self.intarraydescr,
+            'int': self.int32arraydescr,
         }
-        loop = opparse("        [p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,f0,f1,f2,f3,f4,f5]\n" + source + \
-                       "\n        jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,f0,f1,f2,f3,f4,f5)",
+        loop = opparse("        [p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5]\n" + source + \
+                       "\n        jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5)",
                        cpu=self.cpu,
                        namespace=ns)
+        if inc_label_jump:
+            token = JitCellToken()
+            loop.operations = \
+                [ResOperation(rop.LABEL, loop.inputargs, None, descr=TargetToken(token))] + \
+                loop.operations
+            return loop
+
         del loop.operations[-1]
         return loop
 
     def pack(self, loop, l, r):
-        return [Node(op,l+i) for i,op in enumerate(loop.operations[l:r])]
+        return [Node(op,1+l+i) for i,op in enumerate(loop.operations[1+l:1+r])]
 
     def schedule(self, loop_orig, packs, vec_reg_size=16):
         loop = get_model(False).ExtendedTreeLoop("loop")
@@ -46,6 +58,7 @@
     def assert_operations_match(self, loop_a, loop_b):
         assert equaloplists(loop_a.operations, loop_b.operations)
 
+class Test(SchedulerBaseTest, LLtypeMixin):
     def test_schedule_split_load(self):
         loop1 = self.parse("""
         i10 = raw_load(p0, i0, descr=float)
@@ -61,7 +74,7 @@
         v1[i32#4] = vec_raw_load(p0, i0, 4, descr=float)
         i14 = raw_load(p0, i4, descr=float)
         i15 = raw_load(p0, i5, descr=float)
-        """)
+        """, False)
         self.assert_equal(loop2, loop3)
 
     def test_int_to_float(self):
@@ -73,31 +86,10 @@
         """)
         pack1 = self.pack(loop1, 0, 2)
         pack2 = self.pack(loop1, 2, 4)
-        print pack1
-        print pack2
         loop2 = self.schedule(loop1, [pack1, pack2])
         loop3 = self.parse("""
         v1[i64#2] = vec_raw_load(p0, i0, 2, descr=long)
         v2[i32#2] = vec_int_signext(v1[i64#2], 4)
         v3[f64#2] = vec_cast_int_to_float(v2[i32#2])
-        """)
+        """, False)
         self.assert_equal(loop2, loop3)
-
-    def test_cost_model_reject_only_load_vectorizable(self):
-        loop1 = self.parse("""
-        f10 = raw_load(p0, i0, descr=long)
-        f11 = raw_load(p0, i1, descr=long)
-        guard_true(i0) [f10]
-        guard_true(i1) [f11]
-        """)
-        try:
-            pack1 = self.pack(loop1, 0, 2)
-            pack2 = self.pack(loop1, 2, 3)
-            pack3 = self.pack(loop1, 3, 4)
-            loop2 = self.schedule(loop1, [pack1, pack2, pack3])
-            py.test.fail("this loops should have bailed out")
-        except NotAProfitableLoop:
-            pass
-
-class TestLLType(SchedulerBaseTest, LLtypeMixin):
-    pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py b/rpython/jit/metainterp/optimizeopt/test/test_util.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_util.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py
@@ -155,6 +155,7 @@
     arraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Signed))
     floatarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Float))
     intarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Signed))
+    int32arraydescr = cpu.arraydescrof(lltype.GcArray(rffi.INT))
     uintarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Unsigned))
     chararraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Char))
     singlefloatarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.SingleFloat))
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -112,6 +112,8 @@
         self.find_adjacent_memory_refs()
         self.extend_packset()
         self.combine_packset()
+        if not self.costmodel.profitable(self.packset):
+            raise NotAProfitableLoop()
         self.schedule()
 
         gso = GuardStrengthenOpt(self.dependency_graph.index_vars)
@@ -284,7 +286,7 @@
                 # that point forward:
                 if node_a.is_before(node_b):
                     if memref_a.is_adjacent_to(memref_b):
-                        if self.packset.can_be_packed(node_a, node_b):
+                        if self.packset.can_be_packed(node_a, node_b, None):
                             pair = Pair(node_a,node_b)
                             self.packset.packs.append(pair)
 
@@ -304,31 +306,21 @@
             for rdep in pack.right.depends():
                 lnode = ldep.to
                 rnode = rdep.to
-                if lnode.is_before(rnode) and self.packset.can_be_packed(lnode, rnode):
-                    savings = self.costmodel.estimate_savings(lnode, rnode, pack, False)
-                    if savings >= 0:
+                isomorph = isomorphic(lnode.getoperation(), rnode.getoperation())
+                if isomorph and lnode.is_before(rnode):
+                    if self.packset.can_be_packed(lnode, rnode, pack):
                         self.packset.add_pair(lnode, rnode)
 
     def follow_def_uses(self, pack):
         assert isinstance(pack, Pair)
-        savings = -1
-        candidate = (None,None)
         for ldep in pack.left.provides():
             for rdep in pack.right.provides():
                 lnode = ldep.to
                 rnode = rdep.to
-                if lnode.is_before(rnode) and \
-                   self.packset.can_be_packed(lnode, rnode):
-                    est_savings = \
-                        self.costmodel.estimate_savings(lnode, rnode, pack, True)
-                    if est_savings > savings:
-                        savings = est_savings
-                        candidate = (lnode, rnode)
-        #
-        if savings >= 0:
-            assert candidate[0] is not None
-            assert candidate[1] is not None
-            self.packset.add_pair(candidate[0], candidate[1])
+                isomorph = isomorphic(lnode.getoperation(), rnode.getoperation())
+                if isomorph and lnode.is_before(rnode):
+                    if self.packset.can_be_packed(lnode, rnode, pack):
+                        self.packset.add_pair(lnode, rnode)
 
     def combine_packset(self):
         if len(self.packset.packs) == 0:
@@ -729,41 +721,54 @@
         self._newoperations.append(op)
 
 class CostModel(object):
-    def estimate_savings(self, lnode, rnode, origin_pack, expand_forward):
-        """ Estimate the number of savings to add this pair.
-        Zero is the minimum value returned. This should take
-        into account the benefit of executing this instruction
-        as SIMD instruction.
-        """
+    def unpack_cost(self, index, op):
+        raise NotImplementedError
 
-        lpacknode = origin_pack.left
-        if self.prohibit_packing(lpacknode.getoperation(), lnode.getoperation()):
-            return -1
-        rpacknode = origin_pack.right
-        if self.prohibit_packing(rpacknode.getoperation(), rnode.getoperation()):
-            return -1
+    def savings_for_pack(self, opnum, times):
+        raise NotImplementedError
 
-        return 0
+    def savings_for_unpacking(self, node, index):
+        savings = 0
+        result = node.getoperation().result
+        print node.op, "[", index, "]===>"
+        for use in node.provides():
+            if use.to.pack is None and use.because_of(result):
+                savings -= self.unpack_cost(index, node.getoperation())
+                print "   - ", savings, use.to.op
+        return savings
 
-    def prohibit_packing(self, packed, inquestion):
-        """ Blocks the packing of some operations """
-        if inquestion.vector == -1:
-            return True
-        if packed.is_array_op():
-            if packed.getarg(1) == inquestion.result:
-                return True
-        return False
+    def calculate_savings(self, packset):
+        savings = 0
+        for pack in packset.packs:
+            savings += self.savings_for_pack(pack.opnum, pack.opcount())
+            print
+            print "pack", savings
+            op0 = pack.operations[0].getoperation()
+            if op0.result:
+                for i,node in enumerate(pack.operations):
+                    savings += self.savings_for_unpacking(node, i)
+                    print " +=> sss", savings
+        return savings
 
-    def must_unpack_result_to_exec(self, op, target_op):
-        # TODO either move to resop or util
-        if op.getoperation().vector != -1:
-            return False
-        return True
+    def profitable(self, packset):
+        return self.calculate_savings(packset) >= 0
 
 class X86_CostModel(CostModel):
 
-    def savings(self, op, times):
-        return 0
+    COST_BENEFIT = {
+    }
+
+    def savings_for_pack(self, opnum, times):
+        cost, benefit_factor = X86_CostModel.COST_BENEFIT.get(opnum, (1,1))
+        return benefit_factor * times - cost
+
+    def unpack_cost(self, index, op):
+        if op.getdescr():
+            if op.getdescr().is_array_of_floats():
+                if index == 1:
+                    return 2
+        return 1
+
 
 class PackType(PrimitiveTypeMixin):
     UNKNOWN_TYPE = '-'
@@ -1242,9 +1247,7 @@
         self.box_to_vbox[box] = (off, vector)
 
 def isomorphic(l_op, r_op):
-    """ Same instructions have the same operation name.
-    TODO what about parameters?
-    """
+    """ Subject of definition """
     if l_op.getopnum() == r_op.getopnum():
         return True
     return False
@@ -1266,13 +1269,34 @@
         p = Pair(l,r)
         self.packs.append(p)
 
-    def can_be_packed(self, lnode, rnode):
+    def can_be_packed(self, lnode, rnode, origin_pack):
         if isomorphic(lnode.getoperation(), rnode.getoperation()):
             if lnode.independent(rnode):
                 for pack in self.packs:
                     if pack.left == lnode or \
                        pack.right == rnode:
                         return False
+                if origin_pack is None:
+                    return True
+                return self.profitable_pack(lnode, rnode, origin_pack)
+        return False
+
+    def profitable_pack(self, lnode, rnode, origin_pack):
+        lpacknode = origin_pack.left
+        if self.prohibit_packing(lpacknode.getoperation(), lnode.getoperation()):
+            return False
+        rpacknode = origin_pack.right
+        if self.prohibit_packing(rpacknode.getoperation(), rnode.getoperation()):
+            return False
+
+        return True
+
+    def prohibit_packing(self, packed, inquestion):
+        """ Blocks the packing of some operations """
+        if inquestion.vector == -1:
+            return True
+        if packed.is_array_op():
+            if packed.getarg(1) == inquestion.result:
                 return True
         return False
 
@@ -1313,13 +1337,21 @@
     """
     def __init__(self, ops):
         self.operations = ops
-        self.savings = 0
-        for node in self.operations:
+        for i,node in enumerate(self.operations):
             node.pack = self
+            node.pack_position = i
+
+    def opcount(self):
+        return len(self.operations)
+
+    def opnum(self):
+        assert len(self.operations) > 0
+        return self.operations[0].getoperation().getopnum()
 
     def clear(self):
         for node in self.operations:
             node.pack = None
+            node.pack_position = -1
 
     def rightmost_match_leftmost(self, other):
         assert isinstance(other, Pack)


More information about the pypy-commit mailing list