[pypy-commit] pypy vecopt2: added vector IR operations (load/store & arithmetic)

plan_rich noreply at buildbot.pypy.org
Tue May 5 09:45:48 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77098:6c8dc39764dc
Date: 2015-03-27 18:04 +0100
http://bitbucket.org/pypy/pypy/changeset/6c8dc39764dc/

Log:	added vector IR operations (load/store & arithmetic) adapted the
	blackhole interpreter/executor to ignore those for now started to
	implement the scheduling of operations

diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -340,6 +340,12 @@
                          rop.LABEL,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
+            # XXX this is temporary! after the algorithm works i have to adjust the
+            # black hole interpreter!
+            if rop._VEC_ARITHMETIC_FIRST <= value <= rop._VEC_ARITHMETIC_LAST or \
+               value == rop.VEC_RAW_LOAD or value == rop.VEC_RAW_STORE:
+                continue
+
             raise AssertionError("missing %r" % (key,))
     return execute_by_num_args
 
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -106,6 +106,7 @@
         self.memory_refs = memory_refs
         self.adjacent_list = [ [] for i in range(len(self.operations)) ]
         self.integral_mod = IntegralMod()
+        self.schedulable_nodes = [0] # label is always scheduleable
         self.build_dependencies(self.operations)
 
     def build_dependencies(self, operations):
@@ -146,6 +147,9 @@
                 if i > 0:
                     self._guard_dependency(op, i, operations, tracker)
 
+            if len(self.adjacent_list[i]) == 0:
+                self.schedulable_nodes.append(i)
+
     def update_memory_ref(self, op, index, tracker):
         if index not in self.memory_refs:
             return
@@ -375,6 +379,26 @@
         opnum = op.getopnum()
         return rop.SETARRAYITEM_GC<= opnum and opnum <= rop.UNICODESETITEM
 
+class Scheduler(object):
+    def __init__(self, graph):
+        self.graph = graph
+        self.schedulable_nodes = self.graph.schedulable_nodes
+
+    def has_more_to_schedule(self):
+        return len(self.schedulable_nodes) > 0
+
+    def next_schedule_index(self):
+        return self.schedulable_nodes[0]
+
+    def schedule(self, index):
+        node = self.schedulable_nodes[index]
+        del self.schedulable_nodes[index]
+        #
+        for dep in self.graph.get_uses(node):
+            self.schedulable_nodes.append(dep.idx_to)
+        #
+        # self.graph.adjacent_list[node] = None
+
 class IntegralMod(object):
     """ Calculates integral modifications on an integer object.
     The operations must be provided in backwards direction and of one
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -11,7 +11,7 @@
 from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph
 from rpython.jit.metainterp.optimizeopt.unroll import Inliner
 from rpython.jit.metainterp.optimizeopt.vectorize import (VectorizingOptimizer, MemoryRef,
-        isomorphic, Pair)
+        isomorphic, Pair, NotAVectorizeableLoop)
 from rpython.jit.metainterp.optimize import InvalidLoop
 from rpython.jit.metainterp.history import ConstInt, BoxInt, get_const_ptr_for_string
 from rpython.jit.metainterp import executor, compile, resume
@@ -81,6 +81,15 @@
         opt.combine_packset()
         return opt
 
+    def schedule(self, loop, unroll_factor = -1):
+        opt = self.vec_optimizer_unrolled(loop, unroll_factor)
+        opt.build_dependency_graph()
+        opt.find_adjacent_memory_refs()
+        opt.extend_packset()
+        opt.combine_packset()
+        opt.schedule()
+        return opt
+
     def assert_unroll_loop_equals(self, loop, expected_loop, \
                      unroll_factor = -1):
         vec_optimizer = self.vec_optimizer_unrolled(loop, unroll_factor)
@@ -783,19 +792,23 @@
         []
         jump()
         """
-        vopt = self.combine_packset(self.parse_loop(ops),15)
-        assert len(vopt.vec_info.memory_refs) == 0
-        assert len(vopt.packset.packs) == 0
+        try:
+            self.combine_packset(self.parse_loop(ops),15)
+            pytest.fail("combine should raise an exception if no pack "
+                        "statements are present")
+        except NotAVectorizeableLoop:
+            pass
 
         ops = """
         [p0,i0]
         i3 = getarrayitem_gc(p0, i0, descr=floatarraydescr)
         jump(p0,i3)
         """
-        loop = self.parse_loop(ops)
-        vopt = self.combine_packset(loop,15)
-        assert len(vopt.vec_info.memory_refs) == 16
-        assert len(vopt.packset.packs) == 0
+        try:
+            loop = self.parse_loop(ops)
+            self.combine_packset(loop,15)
+        except NotAVectorizeableLoop:
+            pass
 
     def test_packset_vector_operation(self):
         for op in ['int_add', 'int_sub', 'int_mul']:
@@ -849,13 +862,13 @@
             guard_true(i16) []
             i2 = vec_raw_load(p0, i0, 4, descr=floatarraydescr)
             i3 = vec_raw_load(p1, i0, 4, descr=floatarraydescr)
-            i4 = {op}(i2,i3)
+            i4 = {op}(i2,i3,4,descr=floatarraydescr)
             vec_raw_store(p2, i0, i4, 4, descr=floatarraydescr)
             jump(p0,p1,p2,i15)
             """.format(op=vop)
             loop = self.parse_loop(ops)
             vopt = self.schedule(loop,3)
-            self.assert_equals(loop, self.parse_loop(vops)
+            self.assert_equal(loop, self.parse_loop(vops))
 
 class TestLLtype(BaseTestVectorize, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -4,7 +4,7 @@
 from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, Optimization
 from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, 
-        MemoryRef, IntegralMod)
+        MemoryRef, IntegralMod, Scheduler)
 from rpython.jit.metainterp.resoperation import rop
 from rpython.jit.metainterp.resume import Snapshot
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
@@ -39,6 +39,10 @@
         self.packset = None
         self.unroll_count = 0
 
+    def emit_operation(self, op):
+        self._last_emitted_op = op
+        self._newoperations.append(op)
+
     def emit_unrolled_operation(self, op):
         if op.getopnum() == rop.DEBUG_MERGE_POINT:
             self.last_debug_merge_point = op
@@ -263,26 +267,42 @@
             self.packset.add_pair(*candidate)
 
     def combine_packset(self):
-        changed = False
+        if len(self.packset.packs) == 0:
+            raise NotAVectorizeableLoop()
         while True:
-            changed = False
+            len_before = len(self.packset.packs)
             for i,pack1 in enumerate(self.packset.packs):
                 for j,pack2 in enumerate(self.packset.packs):
                     if i == j:
                         continue
                     if pack1.rightmost_match_leftmost(pack2):
                         self.packset.combine(i,j)
-                        changed = True
-                        break
+                        continue
                     if pack2.rightmost_match_leftmost(pack1):
                         self.packset.combine(j,i)
-                        changed = True
-                        break
-                if changed:
-                    break
-            if not changed:
+                        continue
+            if len_before == len(self.packset.packs):
                 break
 
+    def schedule(self):
+        scheduler = Scheduler(self.dependency_graph)
+        while scheduler.has_more_to_schedule():
+            candidate_index = scheduler.next_schedule_index()
+            candidate = self.loop.operations[candidate_index]
+            pack = self.packset.pack_for_operation(candidate, candidate_index)
+            if pack:
+                self._schedule_pack(scheduler, pack)
+            else:
+                self.emit_operation(candidate)
+                scheduler.schedule(0)
+
+    def _schedule_pack(self, scheduler, pack):
+        if scheduler.all_schedulable([ e.opidx for e in pack.operations ]):
+            self.emit_vec_operation(pack)
+
+    def emit_vec_operation(self, pack):
+        pass
+
 def isomorphic(l_op, r_op):
     """ Described in the paper ``Instruction-Isomorphism in Program Execution''.
     I think this definition is to strict. TODO -> find another reference
@@ -350,7 +370,15 @@
         for op in pack_j.operations[1:]:
             operations.append(op)
         self.packs[i] = Pack(operations)
-        del self.packs[j]
+        # instead of deleting an item in the center of pack array,
+        # the last element is assigned to position j and
+        # the last slot is freed. Order of packs don't matter
+        last_pos = len(self.packs) - 1
+        if j == last_pos:
+            del self.packs[j]
+        else:
+            self.packs[j] = self.packs[last_pos]
+            del self.packs[last_pos]
 
 class Pack(object):
     """ A pack is a set of n statements that are:
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -167,6 +167,9 @@
     def is_ovf(self):
         return rop._OVF_FIRST <= self.getopnum() <= rop._OVF_LAST
 
+    def is_vector_arithmetic(self):
+        return rop._VEC_ARITHMETIC_FIRST <= self.getopnum() <= rop._VEC_ARITHMETIC_LAST
+
     def is_comparison(self):
         return self.is_always_pure() and self.returns_bool_result()
 
@@ -440,6 +443,28 @@
     'CONVERT_FLOAT_BYTES_TO_LONGLONG/1',
     'CONVERT_LONGLONG_BYTES_TO_FLOAT/1',
     #
+    # vector operations
+    '_VEC_ARITHMETIC_FIRST',
+    'VEC_CHAR_ADD/3d',
+    'VEC_CHAR_SUB/3d',
+    'VEC_CHAR_MUL/3d',
+    'VEC_SHORT_ADD/3d',
+    'VEC_SHORT_SUB/3d',
+    'VEC_SHORT_MUL/3d',
+    'VEC_INT_ADD/3d',
+    'VEC_INT_SUB/3d',
+    'VEC_INT_MUL/3d',
+    'VEC_UINT_ADD/3d',
+    'VEC_UINT_SUB/3d',
+    'VEC_UINT_MUL/3d',
+    'VEC_SP_FLOAT_ADD/3d',
+    'VEC_SP_FLOAT_SUB/3d',
+    'VEC_SP_FLOAT_MUL/3d',
+    'VEC_FLOAT_ADD/3d',
+    'VEC_FLOAT_SUB/3d',
+    'VEC_FLOAT_MUL/3d',
+    '_VEC_ARITHMETIC_LAST',
+    #
     'INT_LT/2b',
     'INT_LE/2b',
     'INT_EQ/2b',
@@ -488,6 +513,7 @@
     'GETARRAYITEM_RAW/2d',
     'GETINTERIORFIELD_GC/2d',
     'RAW_LOAD/2d',
+    'VEC_RAW_LOAD/3d',
     'GETFIELD_GC/1d',
     'GETFIELD_RAW/1d',
     '_MALLOC_FIRST',
@@ -511,6 +537,7 @@
     'SETINTERIORFIELD_GC/3d',
     'SETINTERIORFIELD_RAW/3d',    # right now, only used by tests
     'RAW_STORE/3d',
+    'VEC_RAW_STORE/4d',
     'SETFIELD_GC/2d',
     'ZERO_PTR_FIELD/2', # only emitted by the rewrite, clears a pointer field
                         # at a given constant offset, no descr


More information about the pypy-commit mailing list