[pypy-commit] pypy vecopt-merge: merged iterator sharing into the vecopt-merge
plan_rich
noreply at buildbot.pypy.org
Mon Aug 17 10:30:30 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt-merge
Changeset: r79002:06ec92fa38c0
Date: 2015-08-17 10:30 +0200
http://bitbucket.org/pypy/pypy/changeset/06ec92fa38c0/
Log: merged iterator sharing into the vecopt-merge
diff --git a/pypy/module/micronumpy/iterators.py b/pypy/module/micronumpy/iterators.py
--- a/pypy/module/micronumpy/iterators.py
+++ b/pypy/module/micronumpy/iterators.py
@@ -83,6 +83,10 @@
self._indices = indices
self.offset = offset
+ def same(self, other):
+ if self.offset == other.offset:
+ return self.iterator.same_shape(other.iterator)
+ return False
class ArrayIter(object):
_immutable_fields_ = ['contiguous', 'array', 'size', 'ndim_m1', 'shape_m1[*]',
@@ -100,6 +104,7 @@
self.array = array
self.size = size
self.ndim_m1 = len(shape) - 1
+ #
self.shape_m1 = [s - 1 for s in shape]
self.strides = strides
self.backstrides = backstrides
@@ -113,6 +118,17 @@
factors[ndim-i-1] = factors[ndim-i] * shape[ndim-i]
self.factors = factors
+ def same_shape(self, other):
+ """ if two iterators share the same shape,
+ next() only needs to be called on one!
+ """
+ return (self.contiguous == other.contiguous and
+ self.array.dtype is self.array.dtype and
+ self.shape_m1 == other.shape_m1 and
+ self.strides == other.strides and
+ self.backstrides == other.backstrides and
+ self.factors == other.factors)
+
@jit.unroll_safe
def reset(self, state=None, mutate=False):
index = 0
@@ -196,7 +212,7 @@
return state.index >= self.size
def getitem(self, state):
- assert state.iterator is self
+ # assert state.iterator is self
return self.array.getitem(state.offset)
def getitem_bool(self, state):
@@ -207,7 +223,6 @@
assert state.iterator is self
self.array.setitem(state.offset, elem)
-
def AxisIter(array, shape, axis):
strides = array.get_strides()
backstrides = array.get_backstrides()
diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -2,6 +2,7 @@
operations. This is the place to look for all the computations that iterate
over all the array elements.
"""
+import py
from pypy.interpreter.error import OperationError
from rpython.rlib import jit
from rpython.rlib.rstring import StringBuilder
@@ -13,11 +14,6 @@
from pypy.interpreter.argument import Arguments
-call2_driver = jit.JitDriver(
- name='numpy_call2',
- greens=['shapelen', 'func', 'left', 'right', 'calc_dtype', 'res_dtype'],
- reds='auto', vectorize=True)
-
def call2(space, shape, func, calc_dtype, w_lhs, w_rhs, out):
if w_lhs.get_size() == 1:
w_left = w_lhs.get_scalar_value().convert_to(space, calc_dtype)
@@ -38,28 +34,96 @@
out_iter, out_state = out.create_iter(shape)
shapelen = len(shape)
res_dtype = out.get_dtype()
- while not out_iter.done(out_state):
- call2_driver.jit_merge_point(shapelen=shapelen, func=func,
- left=left_iter is None,
- right=right_iter is None,
- calc_dtype=calc_dtype, res_dtype=res_dtype)
- if left_iter:
- w_left = left_iter.getitem(left_state).convert_to(space, calc_dtype)
- left_state = left_iter.next(left_state)
- if right_iter:
- w_right = right_iter.getitem(right_state).convert_to(space, calc_dtype)
- right_state = right_iter.next(right_state)
- w_out = func(calc_dtype, w_left, w_right)
- out_iter.setitem(out_state, w_out.convert_to(space, res_dtype))
- out_state = out_iter.next(out_state)
- # if not set to None, the values will be loop carried
- # (for the var,var case), forcing the vectorization to unpack
- # the vector registers at the end of the loop
- if left_iter:
- w_left = None
- if right_iter:
- w_right = None
- return out
+ call2_func = try_to_share_iterators_call2(left_iter, right_iter,
+ left_state, right_state, out_state)
+ params = (space, shapelen, func, calc_dtype, res_dtype, out,
+ w_left, w_right, left_iter, right_iter, out_iter,
+ left_state, right_state, out_state)
+ return call2_func(*params)
+
+def try_to_share_iterators_call2(left_iter, right_iter, left_state, right_state, out_state):
+ # these are all possible iterator sharing combinations
+ # left == right == out
+ # left == right
+ # left == out
+ # right == out
+ right_out_equal = False
+ if right_iter:
+ # rhs is not a scalar
+ if out_state.same(right_state):
+ right_out_equal = True
+ #
+ if not left_iter:
+ # lhs is a scalar
+ if right_out_equal:
+ return call2_advance_out_left
+ else:
+ # left is a scalar, and right and out do not match
+ return call2_advance_out_left_right
+ else:
+ # lhs is NOT a scalar
+ if out_state.same(left_state):
+ # (2) out and left are the same -> remove left
+ if right_out_equal:
+ # the best case
+ return call2_advance_out
+ else:
+ return call2_advance_out_right
+ else:
+ if right_out_equal:
+ return call2_advance_out_left
+ else:
+ if right_iter and right_state.same(left_state):
+ return call2_advance_out_left_eq_right
+ else:
+ return call2_advance_out_left_right
+
+ assert 0, "logical problem with the selection of the call 2 case"
+
+def generate_call2_cases(name, left_state, right_state):
+ call2_driver = jit.JitDriver(name='numpy_call2_' + name,
+ greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'],
+ reds='auto', vectorize=True)
+ #
+ advance_left_state = left_state == "left_state"
+ advance_right_state = right_state == "right_state"
+ code = """
+ def method(space, shapelen, func, calc_dtype, res_dtype, out,
+ w_left, w_right, left_iter, right_iter, out_iter,
+ left_state, right_state, out_state):
+ while not out_iter.done(out_state):
+ call2_driver.jit_merge_point(shapelen=shapelen, func=func,
+ calc_dtype=calc_dtype, res_dtype=res_dtype)
+ if left_iter:
+ w_left = left_iter.getitem({left_state}).convert_to(space, calc_dtype)
+ if right_iter:
+ w_right = right_iter.getitem({right_state}).convert_to(space, calc_dtype)
+ w_out = func(calc_dtype, w_left, w_right)
+ out_iter.setitem(out_state, w_out.convert_to(space, res_dtype))
+ out_state = out_iter.next(out_state)
+ if advance_left_state and left_iter:
+ left_state = left_iter.next(left_state)
+ if advance_right_state and right_iter:
+ right_state = right_iter.next(right_state)
+ #
+ # if not set to None, the values will be loop carried
+ # (for the var,var case), forcing the vectorization to unpack
+ # the vector registers at the end of the loop
+ if left_iter:
+ w_left = None
+ if right_iter:
+ w_right = None
+ return out
+ """
+ exec(py.code.Source(code.format(left_state=left_state,right_state=right_state)).compile(), locals())
+ method.__name__ = "call2_" + name
+ return method
+
+call2_advance_out = generate_call2_cases("inc_out", "out_state", "out_state")
+call2_advance_out_left = generate_call2_cases("inc_out_left", "left_state", "out_state")
+call2_advance_out_right = generate_call2_cases("inc_out_right", "out_state", "right_state")
+call2_advance_out_left_eq_right = generate_call2_cases("inc_out_left_eq_right", "left_state", "left_state")
+call2_advance_out_left_right = generate_call2_cases("inc_out_left_right", "left_state", "right_state")
call1_driver = jit.JitDriver(
name='numpy_call1',
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -911,8 +911,10 @@
def test_multidim_slice(self):
result = self.run('multidim_slice')
assert result == 12
- self.check_trace_count(2)
- self.check_vectorized(1,0) # TODO?
+ self.check_trace_count(3)
+ # ::2 creates a view object -> needs an inner loop
+ # that iterates continous chunks of the matrix
+ self.check_vectorized(1,1)
# NOT WORKING
diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -155,6 +155,13 @@
def __repr__(self):
return 'ArrayDescr(%r)' % (self.OUTERA,)
+ def is_array_of_primitives(self):
+ kind = getkind(self.A.OF)
+ return kind == 'float' or \
+ kind == 'int' or \
+ kind == ''
+
+
def is_array_of_pointers(self):
return getkind(self.A.OF) == 'ref'
diff --git a/rpython/jit/backend/llsupport/descr.py b/rpython/jit/backend/llsupport/descr.py
--- a/rpython/jit/backend/llsupport/descr.py
+++ b/rpython/jit/backend/llsupport/descr.py
@@ -203,6 +203,11 @@
def getconcrete_type(self):
return self.concrete_type
+ def is_array_of_primitives(self):
+ return self.flag == FLAG_FLOAT or \
+ self.flag == FLAG_SIGNED or \
+ self.flag == FLAG_UNSIGNED
+
def is_array_of_pointers(self):
return self.flag == FLAG_POINTER
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -344,6 +344,8 @@
rop.VEC_RAW_STORE,
rop.VEC_GETARRAYITEM_RAW,
rop.VEC_SETARRAYITEM_RAW,
+ rop.VEC_GETARRAYITEM_GC,
+ rop.VEC_SETARRAYITEM_GC,
): # list of opcodes never executed by pyjitpl
continue
if rop._VEC_PURE_FIRST <= value <= rop._VEC_PURE_LAST:
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -805,8 +805,9 @@
def operation_{name}(self, op, node):
descr = op.getdescr()
idx_ref = self.get_or_create(op.getarg(1))
- node.memory_ref = MemoryRef(op, idx_ref, {raw_access})
- self.memory_refs[node] = node.memory_ref
+ if descr.is_array_of_primitives():
+ node.memory_ref = MemoryRef(op, idx_ref, {raw_access})
+ self.memory_refs[node] = node.memory_ref
"""
exec py.code.Source(array_access_source
.format(name='RAW_LOAD',raw_access=True)).compile()
@@ -816,6 +817,10 @@
.format(name='GETARRAYITEM_RAW',raw_access=False)).compile()
exec py.code.Source(array_access_source
.format(name='SETARRAYITEM_RAW',raw_access=False)).compile()
+ exec py.code.Source(array_access_source
+ .format(name='GETARRAYITEM_GC',raw_access=False)).compile()
+ exec py.code.Source(array_access_source
+ .format(name='SETARRAYITEM_GC',raw_access=False)).compile()
del array_access_source
integral_dispatch_opt = make_dispatcher_method(IntegralForwardModification, 'operation_')
IntegralForwardModification.inspect_operation = integral_dispatch_opt
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -692,8 +692,10 @@
rop.VEC_RAW_LOAD: LOAD_TRANS,
rop.VEC_GETARRAYITEM_RAW: LOAD_TRANS,
+ rop.VEC_GETARRAYITEM_GC: LOAD_TRANS,
rop.VEC_RAW_STORE: STORE_TRANS,
rop.VEC_SETARRAYITEM_RAW: STORE_TRANS,
+ rop.VEC_SETARRAYITEM_GC: STORE_TRANS,
rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2, PT_FLOAT_2),
rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, PT_DOUBLE_2),
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -212,21 +212,6 @@
"""
self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
- def test_vectorize_skip_impossible_2(self):
- ops = """
- [p0,i0]
- i1 = int_add(i0,1)
- i2 = int_le(i1, 10)
- guard_true(i2) []
- i3 = getarrayitem_gc(p0,i0,descr=intarraydescr)
- jump(p0,i1)
- """
- try:
- self.vectorize(self.parse_loop(ops))
- py.test.fail("should not happend")
- except NotAVectorizeableLoop:
- pass
-
def test_unroll_empty_stays_empty(self):
""" has no operations in this trace, thus it stays empty
after unrolling it 2 times """
@@ -264,6 +249,26 @@
"""
self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
+ def test_load_primitive_python_list(self):
+ """ it currently rejects pointer arrays """
+ ops = """
+ [p0,i0]
+ i2 = getarrayitem_gc(p0,i0,descr=floatarraydescr)
+ i1 = int_add(i0,1)
+ i3 = getarrayitem_gc(p0,i1,descr=floatarraydescr)
+ i4 = int_add(i1,1)
+ jump(p0,i4)
+ """
+ opt = """
+ [p0,i0]
+ i1 = int_add(i0,1)
+ i2 = int_add(i0,2)
+ i3 = vec_getarrayitem_gc(p0,i0,2,descr=floatarraydescr)
+ jump(p0,i2)
+ """
+ vopt = self.vectorize(self.parse_loop(ops),0)
+ self.assert_equal(vopt.loop, self.parse_loop(opt))
+
def test_vect_unroll_char(self):
""" a 16 byte vector register can hold 16 bytes thus
it is unrolled 16 times. (it is the smallest type in the trace) """
@@ -316,7 +321,7 @@
def test_estimate_unroll_factor_smallest_byte_zero(self):
ops = """
[p0,i0]
- raw_load(p0,i0,descr=arraydescr2)
+ raw_load(p0,i0,descr=arraydescr)
jump(p0,i0)
"""
vopt = self.vectoroptimizer(self.parse_loop(ops))
@@ -326,7 +331,7 @@
def test_array_operation_indices_not_unrolled(self):
ops = """
[p0,i0]
- raw_load(p0,i0,descr=arraydescr2)
+ raw_load(p0,i0,descr=arraydescr)
jump(p0,i0)
"""
vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -253,13 +253,12 @@
def linear_find_smallest_type(self, loop):
# O(#operations)
for i,op in enumerate(loop.operations):
- if op.is_raw_array_access():
+ if op.is_primitive_array_access():
descr = op.getdescr()
- if not descr.is_array_of_pointers():
- byte_count = descr.get_item_size_in_bytes()
- if self.smallest_type_bytes == 0 \
- or byte_count < self.smallest_type_bytes:
- self.smallest_type_bytes = byte_count
+ byte_count = descr.get_item_size_in_bytes()
+ if self.smallest_type_bytes == 0 \
+ or byte_count < self.smallest_type_bytes:
+ self.smallest_type_bytes = byte_count
def get_unroll_count(self, simd_vec_reg_bytes):
""" This is an estimated number of further unrolls """
@@ -667,7 +666,7 @@
if origin_pack is None:
descr = lnode.getoperation().getdescr()
ptype = PackType.by_descr(descr, self.vec_reg_size)
- if lnode.getoperation().is_raw_load():
+ if lnode.getoperation().is_primitive_load():
# load outputs value, no input
return Pair(lnode, rnode, None, ptype)
else:
@@ -710,7 +709,7 @@
""" Blocks the packing of some operations """
if inquestion.vector == -1:
return True
- if packed.is_raw_array_access():
+ if packed.is_primitive_array_access():
if packed.getarg(1) == inquestion.result:
return True
if not forward and inquestion.getopnum() == rop.INT_SIGNEXT:
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -174,10 +174,19 @@
def is_raw_array_access(self):
return self.is_raw_load() or self.is_raw_store()
- def is_raw_load(self):
+ def is_primitive_array_access(self):
+ """ Indicates that this operations loads/stores a
+ primitive type (int,float) """
+ if self.is_primitive_load() or self.is_primitive_store():
+ descr = self.getdescr()
+ if descr.is_array_of_primitives():
+ return True
+ return False
+
+ def is_primitive_load(self):
return rop._RAW_LOAD_FIRST < self.getopnum() < rop._RAW_LOAD_LAST
- def is_raw_store(self):
+ def is_primitive_store(self):
return rop._RAW_STORE_FIRST < self.getopnum() < rop._RAW_STORE_LAST
def is_comparison(self):
@@ -568,13 +577,13 @@
#
'_ALWAYS_PURE_LAST', # ----- end of always_pure operations -----
+ '_RAW_LOAD_FIRST',
'GETARRAYITEM_GC/2d',
-
- '_RAW_LOAD_FIRST',
'GETARRAYITEM_RAW/2d',
'VEC_GETARRAYITEM_RAW/3d',
'RAW_LOAD/2d',
'VEC_RAW_LOAD/3d',
+ 'VEC_GETARRAYITEM_GC/3d',
'_RAW_LOAD_LAST',
'GETINTERIORFIELD_GC/2d',
@@ -596,13 +605,14 @@
'_NOSIDEEFFECT_LAST', # ----- end of no_side_effect operations -----
'INCREMENT_DEBUG_COUNTER/1',
- 'SETARRAYITEM_GC/3d',
'_RAW_STORE_FIRST',
+ 'SETARRAYITEM_GC/3d',
'SETARRAYITEM_RAW/3d',
'VEC_SETARRAYITEM_RAW/3d',
'RAW_STORE/3d',
'VEC_RAW_STORE/3d',
+ 'VEC_SETARRAYITEM_GC/3d',
'_RAW_STORE_LAST',
'SETINTERIORFIELD_GC/3d',
@@ -796,8 +806,10 @@
_opvector = {
rop.RAW_LOAD: rop.VEC_RAW_LOAD,
rop.GETARRAYITEM_RAW: rop.VEC_GETARRAYITEM_RAW,
+ rop.GETARRAYITEM_GC: rop.VEC_GETARRAYITEM_GC,
rop.RAW_STORE: rop.VEC_RAW_STORE,
rop.SETARRAYITEM_RAW: rop.VEC_SETARRAYITEM_RAW,
+ rop.SETARRAYITEM_GC: rop.VEC_SETARRAYITEM_GC,
rop.INT_ADD: rop.VEC_INT_ADD,
rop.INT_SUB: rop.VEC_INT_SUB,
More information about the pypy-commit
mailing list