[pypy-commit] pypy vecopt-merge: merged iterator sharing into the vecopt-merge

plan_rich noreply at buildbot.pypy.org
Mon Aug 17 10:30:30 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt-merge
Changeset: r79002:06ec92fa38c0
Date: 2015-08-17 10:30 +0200
http://bitbucket.org/pypy/pypy/changeset/06ec92fa38c0/

Log:	merged iterator sharing into the vecopt-merge

diff --git a/pypy/module/micronumpy/iterators.py b/pypy/module/micronumpy/iterators.py
--- a/pypy/module/micronumpy/iterators.py
+++ b/pypy/module/micronumpy/iterators.py
@@ -83,6 +83,10 @@
         self._indices = indices
         self.offset = offset
 
+    def same(self, other):
+        if self.offset == other.offset:
+            return self.iterator.same_shape(other.iterator)
+        return False
 
 class ArrayIter(object):
     _immutable_fields_ = ['contiguous', 'array', 'size', 'ndim_m1', 'shape_m1[*]',
@@ -100,6 +104,7 @@
         self.array = array
         self.size = size
         self.ndim_m1 = len(shape) - 1
+        #
         self.shape_m1 = [s - 1 for s in shape]
         self.strides = strides
         self.backstrides = backstrides
@@ -113,6 +118,17 @@
                 factors[ndim-i-1] = factors[ndim-i] * shape[ndim-i]
         self.factors = factors
 
+    def same_shape(self, other):
+        """ if two iterators share the same shape,
+        next() only needs to be called on one!
+        """
+        return (self.contiguous == other.contiguous and
+                self.array.dtype is self.array.dtype and
+                self.shape_m1 == other.shape_m1 and
+                self.strides == other.strides and
+                self.backstrides == other.backstrides and
+                self.factors == other.factors)
+
     @jit.unroll_safe
     def reset(self, state=None, mutate=False):
         index = 0
@@ -196,7 +212,7 @@
         return state.index >= self.size
 
     def getitem(self, state):
-        assert state.iterator is self
+        # assert state.iterator is self
         return self.array.getitem(state.offset)
 
     def getitem_bool(self, state):
@@ -207,7 +223,6 @@
         assert state.iterator is self
         self.array.setitem(state.offset, elem)
 
-
 def AxisIter(array, shape, axis):
     strides = array.get_strides()
     backstrides = array.get_backstrides()
diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -2,6 +2,7 @@
 operations. This is the place to look for all the computations that iterate
 over all the array elements.
 """
+import py
 from pypy.interpreter.error import OperationError
 from rpython.rlib import jit
 from rpython.rlib.rstring import StringBuilder
@@ -13,11 +14,6 @@
 from pypy.interpreter.argument import Arguments
 
 
-call2_driver = jit.JitDriver(
-    name='numpy_call2',
-    greens=['shapelen', 'func', 'left', 'right', 'calc_dtype', 'res_dtype'],
-    reds='auto', vectorize=True)
-
 def call2(space, shape, func, calc_dtype, w_lhs, w_rhs, out):
     if w_lhs.get_size() == 1:
         w_left = w_lhs.get_scalar_value().convert_to(space, calc_dtype)
@@ -38,28 +34,96 @@
     out_iter, out_state = out.create_iter(shape)
     shapelen = len(shape)
     res_dtype = out.get_dtype()
-    while not out_iter.done(out_state):
-        call2_driver.jit_merge_point(shapelen=shapelen, func=func,
-                                     left=left_iter is None,
-                                     right=right_iter is None,
-                                     calc_dtype=calc_dtype, res_dtype=res_dtype)
-        if left_iter:
-            w_left = left_iter.getitem(left_state).convert_to(space, calc_dtype)
-            left_state = left_iter.next(left_state)
-        if right_iter:
-            w_right = right_iter.getitem(right_state).convert_to(space, calc_dtype)
-            right_state = right_iter.next(right_state)
-        w_out = func(calc_dtype, w_left, w_right)
-        out_iter.setitem(out_state, w_out.convert_to(space, res_dtype))
-        out_state = out_iter.next(out_state)
-        # if not set to None, the values will be loop carried
-        # (for the var,var case), forcing the vectorization to unpack
-        # the vector registers at the end of the loop
-        if left_iter:
-            w_left = None
-        if right_iter:
-            w_right = None
-    return out
+    call2_func = try_to_share_iterators_call2(left_iter, right_iter,
+            left_state, right_state, out_state)
+    params = (space, shapelen, func, calc_dtype, res_dtype, out,
+              w_left, w_right, left_iter, right_iter, out_iter,
+              left_state, right_state, out_state)
+    return call2_func(*params)
+
+def try_to_share_iterators_call2(left_iter, right_iter, left_state, right_state, out_state):
+    # these are all possible iterator sharing combinations
+    # left == right == out
+    # left == right
+    # left == out
+    # right == out
+    right_out_equal = False
+    if right_iter:
+        # rhs is not a scalar
+        if out_state.same(right_state):
+            right_out_equal = True
+    #
+    if not left_iter:
+        # lhs is a scalar
+        if right_out_equal:
+            return call2_advance_out_left
+        else:
+            # left is a scalar, and right and out do not match
+            return call2_advance_out_left_right
+    else:
+        # lhs is NOT a scalar
+        if out_state.same(left_state):
+            # (2) out and left are the same -> remove left
+            if right_out_equal:
+                # the best case
+                return call2_advance_out
+            else:
+                return call2_advance_out_right
+        else:
+            if right_out_equal:
+                return call2_advance_out_left
+            else:
+                if right_iter and right_state.same(left_state):
+                    return call2_advance_out_left_eq_right
+                else:
+                    return call2_advance_out_left_right
+
+    assert 0, "logical problem with the selection of the call 2 case"
+
+def generate_call2_cases(name, left_state, right_state):
+    call2_driver = jit.JitDriver(name='numpy_call2_' + name,
+        greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'],
+        reds='auto', vectorize=True)
+    #
+    advance_left_state = left_state == "left_state"
+    advance_right_state = right_state == "right_state"
+    code = """
+    def method(space, shapelen, func, calc_dtype, res_dtype, out,
+               w_left, w_right, left_iter, right_iter, out_iter,
+               left_state, right_state, out_state):
+        while not out_iter.done(out_state):
+            call2_driver.jit_merge_point(shapelen=shapelen, func=func,
+                    calc_dtype=calc_dtype, res_dtype=res_dtype)
+            if left_iter:
+                w_left = left_iter.getitem({left_state}).convert_to(space, calc_dtype)
+            if right_iter:
+                w_right = right_iter.getitem({right_state}).convert_to(space, calc_dtype)
+            w_out = func(calc_dtype, w_left, w_right)
+            out_iter.setitem(out_state, w_out.convert_to(space, res_dtype))
+            out_state = out_iter.next(out_state)
+            if advance_left_state and left_iter:
+                left_state = left_iter.next(left_state)
+            if advance_right_state and right_iter:
+                right_state = right_iter.next(right_state)
+            #
+            # if not set to None, the values will be loop carried
+            # (for the var,var case), forcing the vectorization to unpack
+            # the vector registers at the end of the loop
+            if left_iter:
+                w_left = None
+            if right_iter:
+                w_right = None
+        return out
+    """
+    exec(py.code.Source(code.format(left_state=left_state,right_state=right_state)).compile(), locals())
+    method.__name__ = "call2_" + name
+    return method
+
+call2_advance_out = generate_call2_cases("inc_out", "out_state", "out_state")
+call2_advance_out_left = generate_call2_cases("inc_out_left", "left_state", "out_state")
+call2_advance_out_right = generate_call2_cases("inc_out_right", "out_state", "right_state")
+call2_advance_out_left_eq_right = generate_call2_cases("inc_out_left_eq_right", "left_state", "left_state")
+call2_advance_out_left_right = generate_call2_cases("inc_out_left_right", "left_state", "right_state")
 
 call1_driver = jit.JitDriver(
     name='numpy_call1',
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -911,8 +911,10 @@
     def test_multidim_slice(self):
         result = self.run('multidim_slice')
         assert result == 12
-        self.check_trace_count(2)
-        self.check_vectorized(1,0) # TODO?
+        self.check_trace_count(3)
+        # ::2 creates a view object -> needs an inner loop
+        # that iterates continous chunks of the matrix
+        self.check_vectorized(1,1) 
 
     # NOT WORKING
 
diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -155,6 +155,13 @@
     def __repr__(self):
         return 'ArrayDescr(%r)' % (self.OUTERA,)
 
+    def is_array_of_primitives(self):
+        kind = getkind(self.A.OF)
+        return kind == 'float' or \
+               kind == 'int' or \
+               kind == ''
+
+
     def is_array_of_pointers(self):
         return getkind(self.A.OF) == 'ref'
 
diff --git a/rpython/jit/backend/llsupport/descr.py b/rpython/jit/backend/llsupport/descr.py
--- a/rpython/jit/backend/llsupport/descr.py
+++ b/rpython/jit/backend/llsupport/descr.py
@@ -203,6 +203,11 @@
     def getconcrete_type(self):
         return self.concrete_type
 
+    def is_array_of_primitives(self):
+        return self.flag == FLAG_FLOAT or \
+               self.flag == FLAG_SIGNED or \
+               self.flag == FLAG_UNSIGNED
+
     def is_array_of_pointers(self):
         return self.flag == FLAG_POINTER
 
diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -344,6 +344,8 @@
                          rop.VEC_RAW_STORE,
                          rop.VEC_GETARRAYITEM_RAW,
                          rop.VEC_SETARRAYITEM_RAW,
+                         rop.VEC_GETARRAYITEM_GC,
+                         rop.VEC_SETARRAYITEM_GC,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
             if rop._VEC_PURE_FIRST <= value <= rop._VEC_PURE_LAST:
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -805,8 +805,9 @@
     def operation_{name}(self, op, node):
         descr = op.getdescr()
         idx_ref = self.get_or_create(op.getarg(1))
-        node.memory_ref = MemoryRef(op, idx_ref, {raw_access})
-        self.memory_refs[node] = node.memory_ref
+        if descr.is_array_of_primitives():
+            node.memory_ref = MemoryRef(op, idx_ref, {raw_access})
+            self.memory_refs[node] = node.memory_ref
     """
     exec py.code.Source(array_access_source
            .format(name='RAW_LOAD',raw_access=True)).compile()
@@ -816,6 +817,10 @@
            .format(name='GETARRAYITEM_RAW',raw_access=False)).compile()
     exec py.code.Source(array_access_source
            .format(name='SETARRAYITEM_RAW',raw_access=False)).compile()
+    exec py.code.Source(array_access_source
+           .format(name='GETARRAYITEM_GC',raw_access=False)).compile()
+    exec py.code.Source(array_access_source
+           .format(name='SETARRAYITEM_GC',raw_access=False)).compile()
     del array_access_source
 integral_dispatch_opt = make_dispatcher_method(IntegralForwardModification, 'operation_')
 IntegralForwardModification.inspect_operation = integral_dispatch_opt
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -692,8 +692,10 @@
 
     rop.VEC_RAW_LOAD:         LOAD_TRANS,
     rop.VEC_GETARRAYITEM_RAW: LOAD_TRANS,
+    rop.VEC_GETARRAYITEM_GC: LOAD_TRANS,
     rop.VEC_RAW_STORE:        STORE_TRANS,
     rop.VEC_SETARRAYITEM_RAW: STORE_TRANS,
+    rop.VEC_SETARRAYITEM_GC: STORE_TRANS,
 
     rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2, PT_FLOAT_2),
     rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, PT_DOUBLE_2),
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -212,21 +212,6 @@
         """
         self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
 
-    def test_vectorize_skip_impossible_2(self):
-        ops = """
-        [p0,i0]
-        i1 = int_add(i0,1)
-        i2 = int_le(i1, 10)
-        guard_true(i2) []
-        i3 = getarrayitem_gc(p0,i0,descr=intarraydescr)
-        jump(p0,i1)
-        """
-        try:
-            self.vectorize(self.parse_loop(ops))
-            py.test.fail("should not happend")
-        except NotAVectorizeableLoop:
-            pass
-
     def test_unroll_empty_stays_empty(self):
         """ has no operations in this trace, thus it stays empty
         after unrolling it 2 times """
@@ -264,6 +249,26 @@
         """
         self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
 
+    def test_load_primitive_python_list(self):
+        """ it currently rejects pointer arrays """
+        ops = """
+        [p0,i0]
+        i2 = getarrayitem_gc(p0,i0,descr=floatarraydescr)
+        i1 = int_add(i0,1)
+        i3 = getarrayitem_gc(p0,i1,descr=floatarraydescr)
+        i4 = int_add(i1,1)
+        jump(p0,i4)
+        """
+        opt = """
+        [p0,i0]
+        i1 = int_add(i0,1)
+        i2 = int_add(i0,2)
+        i3 = vec_getarrayitem_gc(p0,i0,2,descr=floatarraydescr)
+        jump(p0,i2)
+        """
+        vopt = self.vectorize(self.parse_loop(ops),0)
+        self.assert_equal(vopt.loop, self.parse_loop(opt))
+
     def test_vect_unroll_char(self):
         """ a 16 byte vector register can hold 16 bytes thus 
         it is unrolled 16 times. (it is the smallest type in the trace) """
@@ -316,7 +321,7 @@
     def test_estimate_unroll_factor_smallest_byte_zero(self):
         ops = """
         [p0,i0]
-        raw_load(p0,i0,descr=arraydescr2)
+        raw_load(p0,i0,descr=arraydescr)
         jump(p0,i0)
         """
         vopt = self.vectoroptimizer(self.parse_loop(ops))
@@ -326,7 +331,7 @@
     def test_array_operation_indices_not_unrolled(self):
         ops = """
         [p0,i0]
-        raw_load(p0,i0,descr=arraydescr2)
+        raw_load(p0,i0,descr=arraydescr)
         jump(p0,i0)
         """
         vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -253,13 +253,12 @@
     def linear_find_smallest_type(self, loop):
         # O(#operations)
         for i,op in enumerate(loop.operations):
-            if op.is_raw_array_access():
+            if op.is_primitive_array_access():
                 descr = op.getdescr()
-                if not descr.is_array_of_pointers():
-                    byte_count = descr.get_item_size_in_bytes()
-                    if self.smallest_type_bytes == 0 \
-                       or byte_count < self.smallest_type_bytes:
-                        self.smallest_type_bytes = byte_count
+                byte_count = descr.get_item_size_in_bytes()
+                if self.smallest_type_bytes == 0 \
+                   or byte_count < self.smallest_type_bytes:
+                    self.smallest_type_bytes = byte_count
 
     def get_unroll_count(self, simd_vec_reg_bytes):
         """ This is an estimated number of further unrolls """
@@ -667,7 +666,7 @@
                 if origin_pack is None:
                     descr = lnode.getoperation().getdescr()
                     ptype = PackType.by_descr(descr, self.vec_reg_size)
-                    if lnode.getoperation().is_raw_load():
+                    if lnode.getoperation().is_primitive_load():
                         # load outputs value, no input
                         return Pair(lnode, rnode, None, ptype)
                     else:
@@ -710,7 +709,7 @@
         """ Blocks the packing of some operations """
         if inquestion.vector == -1:
             return True
-        if packed.is_raw_array_access():
+        if packed.is_primitive_array_access():
             if packed.getarg(1) == inquestion.result:
                 return True
         if not forward and inquestion.getopnum() == rop.INT_SIGNEXT:
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -174,10 +174,19 @@
     def is_raw_array_access(self):
         return self.is_raw_load() or self.is_raw_store()
 
-    def is_raw_load(self):
+    def is_primitive_array_access(self):
+        """ Indicates that this operations loads/stores a
+        primitive type (int,float) """
+        if self.is_primitive_load() or self.is_primitive_store():
+            descr = self.getdescr()
+            if descr.is_array_of_primitives():
+                return True
+        return False
+
+    def is_primitive_load(self):
         return rop._RAW_LOAD_FIRST < self.getopnum() < rop._RAW_LOAD_LAST
 
-    def is_raw_store(self):
+    def is_primitive_store(self):
         return rop._RAW_STORE_FIRST < self.getopnum() < rop._RAW_STORE_LAST
 
     def is_comparison(self):
@@ -568,13 +577,13 @@
     #
     '_ALWAYS_PURE_LAST',  # ----- end of always_pure operations -----
 
+    '_RAW_LOAD_FIRST',
     'GETARRAYITEM_GC/2d',
-
-    '_RAW_LOAD_FIRST',
     'GETARRAYITEM_RAW/2d',
     'VEC_GETARRAYITEM_RAW/3d',
     'RAW_LOAD/2d',
     'VEC_RAW_LOAD/3d',
+    'VEC_GETARRAYITEM_GC/3d',
     '_RAW_LOAD_LAST',
 
     'GETINTERIORFIELD_GC/2d',
@@ -596,13 +605,14 @@
     '_NOSIDEEFFECT_LAST', # ----- end of no_side_effect operations -----
 
     'INCREMENT_DEBUG_COUNTER/1',
-    'SETARRAYITEM_GC/3d',
 
     '_RAW_STORE_FIRST',
+    'SETARRAYITEM_GC/3d',
     'SETARRAYITEM_RAW/3d',
     'VEC_SETARRAYITEM_RAW/3d',
     'RAW_STORE/3d',
     'VEC_RAW_STORE/3d',
+    'VEC_SETARRAYITEM_GC/3d',
     '_RAW_STORE_LAST',
 
     'SETINTERIORFIELD_GC/3d',
@@ -796,8 +806,10 @@
 _opvector = {
     rop.RAW_LOAD:         rop.VEC_RAW_LOAD,
     rop.GETARRAYITEM_RAW: rop.VEC_GETARRAYITEM_RAW,
+    rop.GETARRAYITEM_GC: rop.VEC_GETARRAYITEM_GC,
     rop.RAW_STORE:        rop.VEC_RAW_STORE,
     rop.SETARRAYITEM_RAW: rop.VEC_SETARRAYITEM_RAW,
+    rop.SETARRAYITEM_GC: rop.VEC_SETARRAYITEM_GC,
 
     rop.INT_ADD:   rop.VEC_INT_ADD,
     rop.INT_SUB:   rop.VEC_INT_SUB,


More information about the pypy-commit mailing list