[pypy-commit] pypy vecopt: ironed out the problems with the bridge creation

plan_rich noreply at buildbot.pypy.org
Wed Jul 8 09:34:02 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r78492:a026d96015e4
Date: 2015-07-08 09:34 +0200
http://bitbucket.org/pypy/pypy/changeset/a026d96015e4/

Log:	ironed out the problems with the bridge creation the fail arguments
	now save the regloc of the scalar variable, the actual position is
	saved on the descriptor and reconstructed later

diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -528,14 +528,12 @@
     def test_prod(self):
         result = self.run("prod")
         assert int(result) == 576
-        self.check_trace_count(1)
-        self.check_vectorized(2, 1)
+        self.check_vectorized(1, 1)
 
     def test_prod_zero(self):
         result = self.run("prod_zero")
         assert int(result) == 0
-        self.check_trace_count(1)
-        self.check_vectorized(2, 1)
+        self.check_vectorized(1, 1)
 
 
     def define_max():
@@ -767,8 +765,7 @@
     def test_setslice(self):
         result = self.run("setslice")
         assert result == 5.5
-        self.check_trace_count(1)
-        self.check_vectorized(2, 1)
+        self.check_vectorized(1, 1)
 
     def define_virtual_slice():
         return """
@@ -806,7 +803,6 @@
     def test_flat_getitem(self):
         result = self.run("flat_getitem")
         assert result == 10.0
-        self.check_trace_count(1)
         self.check_vectorized(0,0)
 
     def define_flat_setitem():
@@ -820,7 +816,6 @@
     def test_flat_setitem(self):
         result = self.run("flat_setitem")
         assert result == 1.0
-        self.check_trace_count(1)
         self.check_vectorized(1,0) # TODO this can be improved
 
     def define_dot():
@@ -847,8 +842,7 @@
     def test_argsort(self):
         result = self.run("argsort")
         assert result == 6
-        self.check_trace_count(1)
-        self.check_vectorized(2,1) # vec. setslice
+        self.check_vectorized(1,1) # vec. setslice
 
     def define_where():
         return """
@@ -862,7 +856,6 @@
     def test_where(self):
         result = self.run("where")
         assert result == -40
-        self.check_trace_count(1)
         self.check_vectorized(1, 0) # TODO might be possible to vectorize
 
     def define_searchsorted():
@@ -877,7 +870,6 @@
         result = self.run("searchsorted")
         assert result == 0
         self.check_trace_count(6)
-        # TODO?
 
     def define_int_mul_array():
         return """
@@ -908,8 +900,7 @@
     def test_slice(self):
         result = self.run("slice")
         assert result == 18
-        self.check_trace_count(1)
-        self.check_vectorized(2,1)
+        self.check_vectorized(1,1)
 
     def define_multidim_slice():
         return """
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -8,7 +8,7 @@
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.jit.metainterp.history import (Const, Box, VOID,
-    BoxVector, ConstInt, BoxVectorAccum)
+    BoxVector, ConstInt)
 from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
 from rpython.jit.metainterp.compile import CompileLoopVersionDescr
 from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -26,6 +26,7 @@
     ConstFloat, BoxInt, BoxFloat, BoxVector, BoxVectorAccum, INT, REF,
     FLOAT, VECTOR, TargetToken)
 from rpython.jit.metainterp.resoperation import rop, ResOperation
+from rpython.jit.metainterp.compile import ResumeGuardDescr
 from rpython.rlib import rgc
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.rarithmetic import r_longlong, r_uint
@@ -304,7 +305,32 @@
         self.assembler.regalloc_perform_math(op, arglocs, result_loc)
 
     def locs_for_fail(self, guard_op):
-        return [self.loc(v) for v in guard_op.getfailargs()]
+        faillocs = []
+        descr = guard_op.getdescr()
+        for v in guard_op.getfailargs():
+            if v is not None and isinstance(v, BoxVectorAccum):
+                loc = self.loc(v.scalar_var)
+                self.update_accumulation_loc(v, descr)
+                faillocs.append(loc)
+            else:
+                faillocs.append(self.loc(v))
+
+        return faillocs
+
+    def update_accumulation_loc(self, accumbox, descr):
+        """ Saves the location to the AccumInfo object.
+        Necessary to reconstruct the values at a guard exit.
+        """
+        box = accumbox.scalar_var
+        assert isinstance(descr, ResumeGuardDescr)
+        accum_info = descr.rd_accum_list
+        while accum_info:
+            if accum_info.box is box:
+                accum_info.loc = self.loc(accumbox)
+                break
+            accum_info = accum_info.prev
+        else:
+            raise AssertionError("accum box has no accum_info entry")
 
     def perform_with_guard(self, op, guard_op, arglocs, result_loc):
         faillocs = self.locs_for_fail(guard_op)
diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -12,6 +12,7 @@
 from rpython.jit.backend.llsupport.regalloc import (get_scale, valid_addressing_size)
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.rtyper.lltypesystem import lltype
 
 # duplicated for easy migration, def in assembler.py as well
 # DUP START
@@ -65,23 +66,21 @@
         accum_info = faildescr.rd_accum_list
         while accum_info:
             pos = accum_info.position
-            loc = fail_locs[pos]
+            loc = accum_info.loc
+            tgtloc = fail_locs[pos]
+            # the upper elements will be lost if saved to the stack!
             assert isinstance(loc, RegLoc)
-            arg = fail_args[pos]
-            if isinstance(arg, BoxVectorAccum):
-                arg = arg.scalar_var
+            if not isinstance(tgtloc, RegLoc):
+                tgtloc = regalloc.force_allocate_reg(accum_info.box)
+            arg = accum_info.box
             assert arg is not None
-            tgtloc = regalloc.force_allocate_reg(arg, fail_args)
             if accum_info.operation == '+':
-                # reduction using plus
                 self._accum_reduce_sum(arg, loc, tgtloc)
             elif accum_info.operation == '*':
                 self._accum_reduce_mul(arg, loc, tgtloc)
             else:
                 not_implemented("accum operator %s not implemented" %
                                             (accum_info.operation)) 
-            fail_locs[pos] = tgtloc
-            regalloc.possibly_free_var(arg)
             accum_info = accum_info.prev
 
     def _accum_reduce_mul(self, arg, accumloc, targetloc):
diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -711,9 +711,9 @@
             i += 1
         assert label.getopnum() == rop.LABEL
         self.label_pos = i
-        self.parent_trace_label_args = None
-        self.bridge_label_args = label.getarglist()
-        self.inputargs = None
+        #self.parent_trace_label_args = None
+        #self.bridge_label_args = label.getarglist()
+        self.inputargs = label.getarglist()
 
     def adddescr(self, op, descr):
         self.faildescrs.append((op, descr))
@@ -730,14 +730,6 @@
         label.setdescr(token)
         jump.setdescr(token)
 
-        assert len(self.bridge_label_args) <= len(self.parent_trace_label_args)
-        for i in range(len(self.bridge_label_args)):
-            arg = self.parent_trace_label_args[i]
-            if isinstance(arg, BoxVectorAccum):
-                self.bridge_label_args[i] = arg
-                label.setarg(i, arg)
-        self.inputargs = self.bridge_label_args
-
         return token
 
 class TreeLoop(object):
@@ -809,30 +801,38 @@
     def seen_args(inputargs):
         seen = {}
         for arg in inputargs:
+            if arg is None:
+                continue
             if isinstance(arg, BoxVectorAccum):
                 seen[arg.scalar_var] = None
-                seen[arg] = None
             else:
                 seen[arg] = None
         return seen
 
     @staticmethod
+    def check_if_box_was_seen(box, seen):
+        if box is not None:
+            assert isinstance(box, Box)
+            if isinstance(box, BoxVectorAccum):
+                assert box in seen or box.scalar_var in seen
+            else:
+                assert box in seen
+
+    @staticmethod
     def check_consistency_of_branch(operations, seen):
         "NOT_RPYTHON"
         for op in operations:
             for i in range(op.numargs()):
                 box = op.getarg(i)
                 if isinstance(box, Box):
-                    assert box in seen
+                    TreeLoop.check_if_box_was_seen(box, seen)
             if op.is_guard():
                 assert op.getdescr() is not None
                 if hasattr(op.getdescr(), '_debug_suboperations'):
                     ops = op.getdescr()._debug_suboperations
                     TreeLoop.check_consistency_of_branch(ops, seen.copy())
                 for box in op.getfailargs() or []:
-                    if box is not None:
-                        assert isinstance(box, Box)
-                        assert box in seen
+                    TreeLoop.check_if_box_was_seen(box, seen)
             else:
                 assert op.getfailargs() is None
             box = op.result
@@ -844,8 +844,12 @@
                 inputargs = op.getarglist()
                 for box in inputargs:
                     assert isinstance(box, Box), "LABEL contains %r" % (box,)
-                seen = dict.fromkeys(inputargs)
-                assert len(seen) == len(inputargs), (
+                seen = TreeLoop.seen_args(inputargs)
+                seen_count = len(seen)
+                for arg in seen:
+                    if isinstance(arg, BoxVectorAccum):
+                        seen_count -= 1
+                assert seen_count == len(inputargs), (
                     "duplicate Box in the LABEL arguments")
 
         assert operations[-1].is_final()
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -861,10 +861,12 @@
     PLUS = '+'
     MULTIPLY = '*'
 
-    def __init__(self, var, pos, operator):
+    def __init__(self, opnum, var, pos):
         self.var = var
         self.pos = pos
-        self.operator = operator
+        self.operator = Accum.PLUS
+        if opnum == rop.FLOAT_MUL:
+            self.operator = Accum.MULTIPLY
 
 class Pack(object):
     """ A pack is a set of n statements that are:
diff --git a/rpython/jit/metainterp/optimizeopt/util.py b/rpython/jit/metainterp/optimizeopt/util.py
--- a/rpython/jit/metainterp/optimizeopt/util.py
+++ b/rpython/jit/metainterp/optimizeopt/util.py
@@ -222,11 +222,6 @@
                 args = guard.getfailargs()
             for i,arg in enumerate(args):
                 value = self.rename_map.get(arg,arg)
-                if value is not arg and isinstance(value, BoxVectorAccum):
-                    descr = guard.getdescr()
-                    assert isinstance(descr,ResumeGuardDescr)
-                    ai = AccumInfo(descr.rd_accum_list, i, value.operator)
-                    descr.rd_accum_list = ai
                 args[i] = value
             return args
         return None
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -8,11 +8,11 @@
 import py
 import time
 
-from rpython.jit.metainterp.resume import Snapshot
+from rpython.jit.metainterp.resume import Snapshot, AccumInfo
 from rpython.jit.metainterp.jitexc import NotAVectorizeableLoop, NotAProfitableLoop
 from rpython.jit.metainterp.optimizeopt.unroll import optimize_unroll
 from rpython.jit.metainterp.compile import (ResumeAtLoopHeaderDescr,
-        CompileLoopVersionDescr, invent_fail_descr_for_op)
+        CompileLoopVersionDescr, invent_fail_descr_for_op, ResumeGuardDescr)
 from rpython.jit.metainterp.history import (ConstInt, VECTOR, FLOAT, INT,
         BoxVector, BoxFloat, BoxInt, ConstFloat, TargetToken, JitCellToken, Box,
         BoxVectorAccum, LoopVersion)
@@ -31,21 +31,6 @@
 from rpython.rlib.jit import Counters
 from rpython.rtyper.lltypesystem import lltype, rffi
 
-def debug_print_operations(loop):
-    """ NOT_RPYTHON """
-    if not we_are_translated():
-        print('--- loop instr numbered ---')
-        def ps(snap):
-            if snap.prev is None:
-                return []
-            return ps(snap.prev) + snap.boxes[:]
-        for i,op in enumerate(loop.operations):
-            print "[",str(i).center(2," "),"]",op,
-            if op.is_guard():
-                print op.getfailargs()
-            else:
-                print ""
-
 def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations,
                     inline_short_preamble, start_state, cost_threshold):
     optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations,
@@ -72,7 +57,7 @@
 
         aligned_vector_version = LoopVersion(loop, aligned=True)
 
-        loop.versions = [orig_version] #, aligned_vector_version]
+        loop.versions = [orig_version]
 
         metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
         metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations, -2, None, None, "post vectorize")
@@ -198,8 +183,6 @@
 
         self.emit_unrolled_operation(label_op)
 
-        self.orig_loop_version.parent_trace_label_args = label_op.getarglist()[:]
-
         renamer = Renamer()
         oi = 0
         pure = True
@@ -495,6 +478,17 @@
                 assert node.emitted
         if vector and not self.costmodel.profitable():
             return
+        if vector:
+            # add accumulation info to the descriptor
+            for guard_node in self.dependency_graph.guards:
+                op = guard_node.getoperation()
+                failargs = op.getfailargs()
+                for i,arg in enumerate(failargs):
+                    if isinstance(arg, BoxVectorAccum):
+                        descr = op.getdescr()
+                        assert isinstance(descr,ResumeGuardDescr)
+                        ai = AccumInfo(descr.rd_accum_list, i, arg.operator, arg.scalar_var)
+                        descr.rd_accum_list = ai
         self.loop.operations = \
             sched_data.prepend_invariant_operations(self._newoperations)
         self.clear_newoperations()
@@ -837,10 +831,7 @@
                 # of leading/preceding signext/floatcast instructions needs to be
                 # considered. => tree pattern matching problem.
                 return None
-            operator = Accum.PLUS
-            if opnum == rop.FLOAT_MUL:
-                operator = Accum.MULTIPLY
-            accum = Accum(accum_var, accum_pos, operator)
+            accum = Accum(opnum, accum_var, accum_pos)
             return AccumPair(lnode, rnode, ptype, ptype, accum)
 
         return None
diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py
--- a/rpython/jit/metainterp/resume.py
+++ b/rpython/jit/metainterp/resume.py
@@ -35,11 +35,13 @@
         self.pc = pc
 
 class AccumInfo(object):
-    __slots__ = ('prev', 'position', 'operation')
-    def __init__(self, prev, position, operation):
+    __slots__ = ('prev', 'position', 'operation', 'box', 'loc')
+    def __init__(self, prev, position, operation, box):
         self.prev = prev
         self.operation = operation
         self.position = position
+        self.box = box
+        self.loc = None
 
 def _ensure_parent_resumedata(framestack, n):
     target = framestack[n]


More information about the pypy-commit mailing list