[pypy-commit] pypy vecopt: started to find reduce/accumulation functions that are vectorizable (e.g. sum)
plan_rich
noreply at buildbot.pypy.org
Mon Jun 8 14:15:45 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77954:90b97695ef67
Date: 2015-06-08 14:15 +0200
http://bitbucket.org/pypy/pypy/changeset/90b97695ef67/
Log: started to find reduce/accumulation functions that are vectorizable
(e.g. sum)
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -330,8 +330,7 @@
def test_sum(self):
result = self.run("sum")
assert result == sum(range(30))
- # TODO impl reduce
- self.check_vectorized(1, 0)
+ self.check_vectorized(1, 1)
def define_cumsum():
return """
@@ -343,6 +342,7 @@
def test_cumsum(self):
result = self.run("cumsum")
assert result == 15
+ # not vectorizable, has one back edge
self.check_vectorized(1, 0)
def define_axissum():
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -1041,6 +1041,33 @@
vopt = self.vectorize(self.parse_loop(ops),1)
self.assert_equal(vopt.loop, self.parse_loop(opt, add_label=False))
+ def test_accumulate_basic(self):
+ trace = """
+ [p0, i0, f0]
+ guard_early_exit() [p0, i0, f0]
+ f1 = raw_load(p0, i0, descr=floatarraydescr)
+ f2 = float_add(f0, f1)
+ i1 = int_add(i0, 8)
+ i2 = int_lt(i1, 100)
+ guard_false(i2) [p0, i0, f2]
+ jump(p0, i1, f2)
+ """
+ trace_opt = """
+ [p0, i0, v2[f64|2]]
+ guard_early_exit() [p0, i0, v2[f64|2]]
+ i1 = int_add(i0, 16)
+ i2 = int_lt(i1, 100)
+ guard_false(i2) [p0, i0, v[f64|2]]
+ i10 = int_add(i0, 16)
+ i20 = int_lt(i10, 100)
+ v1[f64|2] = vec_raw_load(p0, i0, 2, descr=floatarraydescr)
+ v3[f64|2] = vec_float_hadd(v2[f64|2], v1[f64|2])
+ jump(p0, i1, v3[f64|2])
+ """
+ opt = self.vectorize(self.parse_loop(trace))
+ self.debug_print_operations(opt.loop)
+
+
def test_element_f45_in_guard_failargs(self):
ops = """
[p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
@@ -1325,23 +1352,5 @@
opt = self.vectorize(self.parse_loop(trace))
self.debug_print_operations(opt.loop)
- def test_reduction_basic(self):
- trace = """
- [p5, i6, p2, i7, p1, p8, i9, i10, f11, i12, i13, i14]
- guard_early_exit() [p2, p1, p5, f11, i9, i6, i10, i7, p8]
- f15 = raw_load(i12, i10, descr=floatarraydescr)
- guard_not_invalidated() [p2, p1, f15, p5, f11, i9, i6, i10, i7, p8]
- f16 = float_add(f11, f15)
- raw_store(i13, i7, f16, descr=floatarraydescr)
- i18 = int_add(i7, 8)
- i20 = int_add(i9, 1)
- i22 = int_add(i10, 8)
- i23 = int_ge(i20, i14)
- guard_false(i23) [p2, p1, i20, i18, f16, i22, p5, None, None, i6, None, None, p8]
- jump(p5, i6, p2, i18, p1, p8, i20, i22, f16, i12, i13, i14)
- """
- opt = self.vectorize(self.parse_loop(trace))
- self.debug_print_operations(opt.loop)
-
class TestLLtype(BaseTestVectorize, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -136,10 +136,7 @@
self._newoperations.append(op)
def unroll_loop_iterations(self, loop, unroll_count):
- """ Unroll the loop X times. unroll_count is an integral how
- often to further unroll the loop.
- """
-
+ """ Unroll the loop X times. unroll_count + 1 = unroll_factor """
op_count = len(loop.operations)
label_op = loop.operations[0].clone()
@@ -293,8 +290,8 @@
# that point forward:
if node_a.is_before(node_b):
if memref_a.is_adjacent_to(memref_b):
- if self.packset.can_be_packed(node_a, node_b, None):
- pair = Pair(node_a,node_b)
+ pair = self.packset.can_be_packed(node_a, node_b, None)
+ if pair:
self.packset.packs.append(pair)
def extend_packset(self):
@@ -315,8 +312,9 @@
rnode = rdep.to
isomorph = isomorphic(lnode.getoperation(), rnode.getoperation())
if isomorph and lnode.is_before(rnode):
- if self.packset.can_be_packed(lnode, rnode, pack):
- self.packset.add_pair(lnode, rnode)
+ pair = self.packset.can_be_packed(lnode, rnode, pack)
+ if pair:
+ self.packset.packs.append(pair)
def follow_def_uses(self, pack):
assert isinstance(pack, Pair)
@@ -1322,17 +1320,50 @@
p = Pair(l,r)
self.packs.append(p)
+ def accumulates(self, lnode, rnode, origin_pack):
+ # lnode and rnode are isomorphic and dependent
+ lop = lnode.getoperation()
+ opnum = lop.getopnum()
+ rop = rnode.getoperation()
+
+ if opnum in (rop.FLOAT_ADD, rop.INT_ADD):
+ assert lop.numargs() == 2 and lop.result is not None
+ accum, accum_pos = self.getaccumulator_variable(lop, rop, origin_pack)
+ if not accum:
+ return False
+ loaded_pos = (accum_pos + 1) % 2
+ # the dependency exists only because of the result of lnode
+ for dep in lnode.provides():
+ if dep.to is rnode:
+ if not dep.because_of(accum):
+ # not quite ... this is not handlable
+ return False
+ # this can be handled by accumulation
+ return True
+
+ return False
+
+ def getaccumulator_variable(self, lop, rop, origin_pack):
+ args = rop.getarglist()
+ for arg, i in enumerate(args):
+ if arg is lop.result:
+ return arg, i
+
+ return None, -1
+
def can_be_packed(self, lnode, rnode, origin_pack):
if isomorphic(lnode.getoperation(), rnode.getoperation()):
- if lnode.independent(rnode):
+ independent = lnode.independent(rnode)
+ if independent or self.accumulates(lnode, rnode, origin_pack):
for pack in self.packs:
if pack.left == lnode or \
pack.right == rnode:
- return False
+ return None
if origin_pack is None:
- return True
- return self.profitable_pack(lnode, rnode, origin_pack)
- return False
+ return Pair(lnode, rnode)
+ if self.profitable_pack(lnode, rnode, origin_pack)
+ return Pair(lnode, rnode)
+ return None
def profitable_pack(self, lnode, rnode, origin_pack):
lpacknode = origin_pack.left
More information about the pypy-commit
mailing list