[pypy-commit] pypy vecopt: costmodel impl extended
plan_rich
noreply at buildbot.pypy.org
Mon Jun 1 12:56:05 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77740:7ce427746614
Date: 2015-06-01 12:56 +0200
http://bitbucket.org/pypy/pypy/changeset/7ce427746614/
Log: costmodel impl extended added tests for cost model extracted tests
into another file
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -80,6 +80,7 @@
self.adjacent_list_back = []
self.memory_ref = None
self.pack = None
+ self.pack_position = -1
self.emitted = False
self.schedule_position = -1
self.priority = 0
@@ -962,12 +963,6 @@
self.current_end.next_nonconst = idxvar
self.current_end = idxvar
- def is_adjacent_with_runtime_check(self, other, graph):
- return self.next_nonconst is not None and \
- self.next_nonconst is self.current_end and \
- self.next_nonconst.opnum == rop.INT_ADD and \
- self.next_nonconst.is_identity()
-
def getvariable(self):
return self.var
@@ -1086,15 +1081,6 @@
return abs(self.index_var.diff(other.index_var)) - stride == 0
return False
- def is_adjacent_with_runtime_check(self, other, graph):
- """there are many cases where the stride is variable
- it is a priori not known if two unrolled memory accesses are
- tightly packed"""
- assert isinstance(other, MemoryRef)
- if self.array == other.array and self.descr == other.descr:
- return self.index_var.is_adjacent_with_runtime_check(other.index_var, graph)
- return False
-
def match(self, other):
assert isinstance(other, MemoryRef)
if self.array == other.array and self.descr == other.descr:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -0,0 +1,131 @@
+import py
+
+from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
+from rpython.jit.metainterp.optimizeopt.util import equaloplists
+from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
+ Pack, NotAProfitableLoop, VectorizingOptimizer)
+from rpython.jit.metainterp.optimizeopt.dependency import Node
+from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
+from rpython.jit.metainterp.optimizeopt.test.test_schedule import SchedulerBaseTest
+from rpython.jit.metainterp.optimizeopt.test.test_vectorize import (FakeMetaInterpStaticData,
+ FakeJitDriverStaticData)
+from rpython.jit.metainterp.resoperation import rop, ResOperation
+from rpython.jit.tool.oparser import parse as opparse
+from rpython.jit.tool.oparser_model import get_model
+
+class FakeMemoryRef(object):
+ def __init__(self, iv):
+ self.index_var = iv
+
+ def is_adjacent_to(self, other):
+ iv = self.index_var
+ ov = other.index_var
+ val = (int(str(ov.var)[1:]) - int(str(iv.var)[1:]))
+ print iv, ov, "adja?", val == 1
+ # i0 and i1 are adjacent
+ # i1 and i2 ...
+ # but not i0, i2
+ # ...
+ return val == 1
+
+class CostModelBaseTest(SchedulerBaseTest):
+ def savings(self, loop):
+ metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+ jitdriver_sd = FakeJitDriverStaticData()
+ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, [])
+ opt.build_dependency_graph()
+ graph = opt.dependency_graph
+ for k,m in graph.memory_refs.items():
+ graph.memory_refs[k] = FakeMemoryRef(m.index_var)
+ print "memory ref", k, m
+ opt.find_adjacent_memory_refs()
+ opt.extend_packset()
+ opt.combine_packset()
+ for pack in opt.packset.packs:
+ print "apck:"
+ print '\n'.join([str(op.getoperation()) for op in pack.operations])
+ print
+ return opt.costmodel.calculate_savings(opt.packset)
+
+ def assert_operations_match(self, loop_a, loop_b):
+ assert equaloplists(loop_a.operations, loop_b.operations)
+
+ def test_load_2_unpack(self):
+ loop1 = self.parse("""
+ f10 = raw_load(p0, i0, descr=double)
+ f11 = raw_load(p0, i1, descr=double)
+ guard_true(i0) [f10]
+ guard_true(i1) [f11]
+ """)
+ # for double the costs are
+ # unpack index 1 savings: -2
+ # unpack index 0 savings: -1
+ savings = self.savings(loop1)
+ assert savings == -2
+
+ def test_load_4_unpack(self):
+ loop1 = self.parse("""
+ i10 = raw_load(p0, i0, descr=float)
+ i11 = raw_load(p0, i1, descr=float)
+ i12 = raw_load(p0, i2, descr=float)
+ i13 = raw_load(p0, i3, descr=float)
+ guard_true(i0) [i10]
+ guard_true(i1) [i11]
+ guard_true(i2) [i12]
+ guard_true(i3) [i13]
+ """)
+ savings = self.savings(loop1)
+ assert savings == -1
+
+ def test_load_2_unpack_1(self):
+ loop1 = self.parse("""
+ f10 = raw_load(p0, i0, descr=double)
+ f11 = raw_load(p0, i1, descr=double)
+ guard_true(i0) [f10]
+ """)
+ savings = self.savings(loop1)
+ assert savings == 0
+
+ def test_load_2_unpack_1_index1(self):
+ loop1 = self.parse("""
+ f10 = raw_load(p0, i0, descr=double)
+ f11 = raw_load(p0, i1, descr=double)
+ guard_true(i0) [f11]
+ """)
+ savings = self.savings(loop1)
+ assert savings == -1
+
+ def test_load_arith(self):
+ loop1 = self.parse("""
+ i10 = raw_load(p0, i0, descr=int)
+ i11 = raw_load(p0, i1, descr=int)
+ i12 = raw_load(p0, i2, descr=int)
+ i13 = raw_load(p0, i3, descr=int)
+ i15 = int_add(i10, 1)
+ i16 = int_add(i11, 1)
+ i17 = int_add(i12, 1)
+ i18 = int_add(i13, 1)
+ """)
+ savings = self.savings(loop1)
+ assert savings == 6
+
+ def test_load_arith_store(self):
+ loop1 = self.parse("""
+ i10 = raw_load(p0, i0, descr=int)
+ i11 = raw_load(p0, i1, descr=int)
+ i12 = raw_load(p0, i2, descr=int)
+ i13 = raw_load(p0, i3, descr=int)
+ i15 = int_add(i10, 1)
+ i16 = int_add(i11, 1)
+ i17 = int_add(i12, 1)
+ i18 = int_add(i13, 1)
+ raw_store(p1, i4, i15, descr=int)
+ raw_store(p1, i5, i16, descr=int)
+ raw_store(p1, i6, i17, descr=int)
+ raw_store(p1, i7, i18, descr=int)
+ """)
+ savings = self.savings(loop1)
+ assert savings == 6
+
+class Test(CostModelBaseTest, LLtypeMixin):
+ pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -1,31 +1,43 @@
import py
+from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
from rpython.jit.metainterp.optimizeopt.util import equaloplists
from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
- Pack, NotAProfitableLoop)
+ Pack, NotAProfitableLoop, VectorizingOptimizer)
from rpython.jit.metainterp.optimizeopt.dependency import Node
from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
from rpython.jit.metainterp.optimizeopt.test.test_dependency import DependencyBaseTest
+from rpython.jit.metainterp.optimizeopt.test.test_vectorize import (FakeMetaInterpStaticData,
+ FakeJitDriverStaticData)
+from rpython.jit.metainterp.resoperation import rop, ResOperation
from rpython.jit.tool.oparser import parse as opparse
from rpython.jit.tool.oparser_model import get_model
class SchedulerBaseTest(DependencyBaseTest):
- def parse(self, source):
+ def parse(self, source, inc_label_jump=True):
ns = {
'double': self.floatarraydescr,
'float': self.singlefloatarraydescr,
'long': self.intarraydescr,
+ 'int': self.int32arraydescr,
}
- loop = opparse(" [p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,f0,f1,f2,f3,f4,f5]\n" + source + \
- "\n jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,f0,f1,f2,f3,f4,f5)",
+ loop = opparse(" [p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5]\n" + source + \
+ "\n jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5)",
cpu=self.cpu,
namespace=ns)
+ if inc_label_jump:
+ token = JitCellToken()
+ loop.operations = \
+ [ResOperation(rop.LABEL, loop.inputargs, None, descr=TargetToken(token))] + \
+ loop.operations
+ return loop
+
del loop.operations[-1]
return loop
def pack(self, loop, l, r):
- return [Node(op,l+i) for i,op in enumerate(loop.operations[l:r])]
+ return [Node(op,1+l+i) for i,op in enumerate(loop.operations[1+l:1+r])]
def schedule(self, loop_orig, packs, vec_reg_size=16):
loop = get_model(False).ExtendedTreeLoop("loop")
@@ -46,6 +58,7 @@
def assert_operations_match(self, loop_a, loop_b):
assert equaloplists(loop_a.operations, loop_b.operations)
+class Test(SchedulerBaseTest, LLtypeMixin):
def test_schedule_split_load(self):
loop1 = self.parse("""
i10 = raw_load(p0, i0, descr=float)
@@ -61,7 +74,7 @@
v1[i32#4] = vec_raw_load(p0, i0, 4, descr=float)
i14 = raw_load(p0, i4, descr=float)
i15 = raw_load(p0, i5, descr=float)
- """)
+ """, False)
self.assert_equal(loop2, loop3)
def test_int_to_float(self):
@@ -73,31 +86,10 @@
""")
pack1 = self.pack(loop1, 0, 2)
pack2 = self.pack(loop1, 2, 4)
- print pack1
- print pack2
loop2 = self.schedule(loop1, [pack1, pack2])
loop3 = self.parse("""
v1[i64#2] = vec_raw_load(p0, i0, 2, descr=long)
v2[i32#2] = vec_int_signext(v1[i64#2], 4)
v3[f64#2] = vec_cast_int_to_float(v2[i32#2])
- """)
+ """, False)
self.assert_equal(loop2, loop3)
-
- def test_cost_model_reject_only_load_vectorizable(self):
- loop1 = self.parse("""
- f10 = raw_load(p0, i0, descr=long)
- f11 = raw_load(p0, i1, descr=long)
- guard_true(i0) [f10]
- guard_true(i1) [f11]
- """)
- try:
- pack1 = self.pack(loop1, 0, 2)
- pack2 = self.pack(loop1, 2, 3)
- pack3 = self.pack(loop1, 3, 4)
- loop2 = self.schedule(loop1, [pack1, pack2, pack3])
- py.test.fail("this loops should have bailed out")
- except NotAProfitableLoop:
- pass
-
-class TestLLType(SchedulerBaseTest, LLtypeMixin):
- pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py b/rpython/jit/metainterp/optimizeopt/test/test_util.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_util.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py
@@ -155,6 +155,7 @@
arraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Signed))
floatarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Float))
intarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Signed))
+ int32arraydescr = cpu.arraydescrof(lltype.GcArray(rffi.INT))
uintarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Unsigned))
chararraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Char))
singlefloatarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.SingleFloat))
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -112,6 +112,8 @@
self.find_adjacent_memory_refs()
self.extend_packset()
self.combine_packset()
+ if not self.costmodel.profitable(self.packset):
+ raise NotAProfitableLoop()
self.schedule()
gso = GuardStrengthenOpt(self.dependency_graph.index_vars)
@@ -284,7 +286,7 @@
# that point forward:
if node_a.is_before(node_b):
if memref_a.is_adjacent_to(memref_b):
- if self.packset.can_be_packed(node_a, node_b):
+ if self.packset.can_be_packed(node_a, node_b, None):
pair = Pair(node_a,node_b)
self.packset.packs.append(pair)
@@ -304,31 +306,21 @@
for rdep in pack.right.depends():
lnode = ldep.to
rnode = rdep.to
- if lnode.is_before(rnode) and self.packset.can_be_packed(lnode, rnode):
- savings = self.costmodel.estimate_savings(lnode, rnode, pack, False)
- if savings >= 0:
+ isomorph = isomorphic(lnode.getoperation(), rnode.getoperation())
+ if isomorph and lnode.is_before(rnode):
+ if self.packset.can_be_packed(lnode, rnode, pack):
self.packset.add_pair(lnode, rnode)
def follow_def_uses(self, pack):
assert isinstance(pack, Pair)
- savings = -1
- candidate = (None,None)
for ldep in pack.left.provides():
for rdep in pack.right.provides():
lnode = ldep.to
rnode = rdep.to
- if lnode.is_before(rnode) and \
- self.packset.can_be_packed(lnode, rnode):
- est_savings = \
- self.costmodel.estimate_savings(lnode, rnode, pack, True)
- if est_savings > savings:
- savings = est_savings
- candidate = (lnode, rnode)
- #
- if savings >= 0:
- assert candidate[0] is not None
- assert candidate[1] is not None
- self.packset.add_pair(candidate[0], candidate[1])
+ isomorph = isomorphic(lnode.getoperation(), rnode.getoperation())
+ if isomorph and lnode.is_before(rnode):
+ if self.packset.can_be_packed(lnode, rnode, pack):
+ self.packset.add_pair(lnode, rnode)
def combine_packset(self):
if len(self.packset.packs) == 0:
@@ -729,41 +721,54 @@
self._newoperations.append(op)
class CostModel(object):
- def estimate_savings(self, lnode, rnode, origin_pack, expand_forward):
- """ Estimate the number of savings to add this pair.
- Zero is the minimum value returned. This should take
- into account the benefit of executing this instruction
- as SIMD instruction.
- """
+ def unpack_cost(self, index, op):
+ raise NotImplementedError
- lpacknode = origin_pack.left
- if self.prohibit_packing(lpacknode.getoperation(), lnode.getoperation()):
- return -1
- rpacknode = origin_pack.right
- if self.prohibit_packing(rpacknode.getoperation(), rnode.getoperation()):
- return -1
+ def savings_for_pack(self, opnum, times):
+ raise NotImplementedError
- return 0
+ def savings_for_unpacking(self, node, index):
+ savings = 0
+ result = node.getoperation().result
+ print node.op, "[", index, "]===>"
+ for use in node.provides():
+ if use.to.pack is None and use.because_of(result):
+ savings -= self.unpack_cost(index, node.getoperation())
+ print " - ", savings, use.to.op
+ return savings
- def prohibit_packing(self, packed, inquestion):
- """ Blocks the packing of some operations """
- if inquestion.vector == -1:
- return True
- if packed.is_array_op():
- if packed.getarg(1) == inquestion.result:
- return True
- return False
+ def calculate_savings(self, packset):
+ savings = 0
+ for pack in packset.packs:
+ savings += self.savings_for_pack(pack.opnum, pack.opcount())
+ print
+ print "pack", savings
+ op0 = pack.operations[0].getoperation()
+ if op0.result:
+ for i,node in enumerate(pack.operations):
+ savings += self.savings_for_unpacking(node, i)
+ print " +=> sss", savings
+ return savings
- def must_unpack_result_to_exec(self, op, target_op):
- # TODO either move to resop or util
- if op.getoperation().vector != -1:
- return False
- return True
+ def profitable(self, packset):
+ return self.calculate_savings(packset) >= 0
class X86_CostModel(CostModel):
- def savings(self, op, times):
- return 0
+ COST_BENEFIT = {
+ }
+
+ def savings_for_pack(self, opnum, times):
+ cost, benefit_factor = X86_CostModel.COST_BENEFIT.get(opnum, (1,1))
+ return benefit_factor * times - cost
+
+ def unpack_cost(self, index, op):
+ if op.getdescr():
+ if op.getdescr().is_array_of_floats():
+ if index == 1:
+ return 2
+ return 1
+
class PackType(PrimitiveTypeMixin):
UNKNOWN_TYPE = '-'
@@ -1242,9 +1247,7 @@
self.box_to_vbox[box] = (off, vector)
def isomorphic(l_op, r_op):
- """ Same instructions have the same operation name.
- TODO what about parameters?
- """
+ """ Subject of definition """
if l_op.getopnum() == r_op.getopnum():
return True
return False
@@ -1266,13 +1269,34 @@
p = Pair(l,r)
self.packs.append(p)
- def can_be_packed(self, lnode, rnode):
+ def can_be_packed(self, lnode, rnode, origin_pack):
if isomorphic(lnode.getoperation(), rnode.getoperation()):
if lnode.independent(rnode):
for pack in self.packs:
if pack.left == lnode or \
pack.right == rnode:
return False
+ if origin_pack is None:
+ return True
+ return self.profitable_pack(lnode, rnode, origin_pack)
+ return False
+
+ def profitable_pack(self, lnode, rnode, origin_pack):
+ lpacknode = origin_pack.left
+ if self.prohibit_packing(lpacknode.getoperation(), lnode.getoperation()):
+ return False
+ rpacknode = origin_pack.right
+ if self.prohibit_packing(rpacknode.getoperation(), rnode.getoperation()):
+ return False
+
+ return True
+
+ def prohibit_packing(self, packed, inquestion):
+ """ Blocks the packing of some operations """
+ if inquestion.vector == -1:
+ return True
+ if packed.is_array_op():
+ if packed.getarg(1) == inquestion.result:
return True
return False
@@ -1313,13 +1337,21 @@
"""
def __init__(self, ops):
self.operations = ops
- self.savings = 0
- for node in self.operations:
+ for i,node in enumerate(self.operations):
node.pack = self
+ node.pack_position = i
+
+ def opcount(self):
+ return len(self.operations)
+
+ def opnum(self):
+ assert len(self.operations) > 0
+ return self.operations[0].getoperation().getopnum()
def clear(self):
for node in self.operations:
node.pack = None
+ node.pack_position = -1
def rightmost_match_leftmost(self, other):
assert isinstance(other, Pack)
More information about the pypy-commit
mailing list