[pypy-commit] pypy vecopt: retinkering the dependency construction, statements with sideeffects need stronger dependencies

plan_rich noreply at buildbot.pypy.org
Thu May 28 13:04:07 CEST 2015


Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77640:f83b729acb89
Date: 2015-05-28 13:04 +0200
http://bitbucket.org/pypy/pypy/changeset/f83b729acb89/

Log:	retinkering the dependency construction, statements with sideeffects
	need stronger dependencies improved the guard strengthen
	optimization removed a glitch in constructing pack operations
	(arguments missing and intermixed)

diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2696,7 +2696,7 @@
                         # if source is a normal register (unpack)
                         assert count == 1
                         assert si == 0
-                        self.mc.MOVAPS(X86_64_XMM_SCRATCH_REG, srcloc)
+                        self.mc.move(X86_64_XMM_SCRATCH_REG, srcloc)
                         src = X86_64_XMM_SCRATCH_REG.value
                     select = ((si & 0x3) << 6)|((ri & 0x3) << 4)
                     self.mc.INSERTPS_xxi(resloc.value, src, select)
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1579,20 +1579,20 @@
     del consider_vec_logic
 
     def consider_vec_int_pack(self, op):
-        index = op.getarg(1)
-        arg = op.getarg(2)
+        # new_res = vec_int_pack(res, src, index, count)
+        arg = op.getarg(1)
+        index = op.getarg(2)
+        count = op.getarg(3)
         assert isinstance(index, ConstInt)
+        assert isinstance(count, ConstInt)
         args = op.getarglist()
         srcloc = self.make_sure_var_in_reg(arg, args)
         resloc =  self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
-        residx = 0
+        residx = index.value # where to put it in result?
+        srcidx = 0
         assert isinstance(op.result, BoxVector)
-        args = op.getarglist()
         size = op.result.getsize()
-        count = 1
-        if isinstance(arg, BoxVector):
-            count = arg.getcount()
-        arglocs = [resloc, srcloc, imm(index.value), imm(0), imm(count), imm(size)]
+        arglocs = [resloc, srcloc, imm(residx), imm(srcidx), imm(count.value), imm(size)]
         self.perform(op, arglocs, resloc)
 
     consider_vec_float_pack = consider_vec_int_pack
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -392,6 +392,10 @@
     def __init__(self, graph):
         self.graph = graph
         self.defs = {}
+        self.non_pure = []
+
+    def add_non_pure(self, node):
+        self.non_pure.append(node)
 
     def define(self, arg, node, argcell=None):
         if isinstance(arg, Const):
@@ -537,9 +541,13 @@
                 if node.exits_early():
                     pass
                 else:
+                    # consider cross iterations?
                     if len(self.guards) > 0:
                         last_guard = self.guards[-1]
                         last_guard.edge_to(node, "guardorder")
+                    for nonpure in tracker.non_pure:
+                        nonpure.edge_to(node, failarg=True)
+                    tracker.non_pure = []
                 self.guards.append(node)
             else:
                 self.build_non_pure_dependencies(node, tracker)
@@ -689,6 +697,8 @@
             if len(self.guards) > 0:
                 last_guard = self.guards[-1]
                 last_guard.edge_to(node, "sideeffect")
+            # and the next guard instruction
+            tracker.add_non_pure(node)
 
     def __repr__(self):
         graph = "graph([\n"
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -937,10 +937,10 @@
         i3 = int_lt(i2, 102)
         guard_true(i3) [p0,i0]
         {dead_code}
-        i500 = same_as(i2)
-        i300 = int_lt(i500, 102)
+        i500 = int_add(i0, 16)
+        i501 = int_lt(i2, 102)
         i1 = vec_getarrayitem_raw(p0, i0, 16, descr=chararraydescr)
-        jump(p0,i500)
+        jump(p0,i2)
         """.format(dead_code=dead_code)
         vopt = self.vectorize(self.parse_loop(ops),15)
         self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -982,12 +982,12 @@
         i2 = int_add(i0, 2)
         i3 = int_lt(i2, 10)
         guard_true(i3) [p0,i0]
-        i4 = same_as(i2)
-        i5 = int_lt(i4, 10)
+        i4 = int_add(i0, 2)
+        i5 = int_lt(i2, 10)
         v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
         v3 = vec_int_expand(42)
         v2 = vec_int_mul(v1, v3)
-        jump(p0,i4)
+        jump(p0,i2)
         """
         vopt = self.vectorize(self.parse_loop(ops),1)
         self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1011,12 +1011,12 @@
         i2 = int_add(i0, 2)
         i3 = int_lt(i2, 10)
         guard_true(i3) [p0,i0]
-        i4 = same_as(i2)
-        i5 = int_lt(i4, 10)
+        i4 = int_add(i0, 2)
+        i5 = int_lt(i2, 10)
         v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
         v3 = vec_float_expand(f3)
         v2 = vec_int_mul(v1, v3)
-        jump(p0,i4,f3)
+        jump(p0,i2,f3)
         """
         vopt = self.vectorize(self.parse_loop(ops),1)
         self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1053,15 +1053,15 @@
         i55 = int_add(i44, 16) 
         i54 = int_add(i41, 16) 
         i56 = int_add(i37, 16) 
-        i629 = same_as(i637)
-        i57 = int_ge(i629, i18) 
+        i629 = int_add(i28, 2)
+        i57 = int_ge(i637, i18) 
         v61 = vec_raw_load(i21, i44, 2, descr=floatarraydescr) 
         v62 = vec_raw_load(i4, i41, 2, descr=floatarraydescr) 
         v63 = vec_float_add(v61, v62) 
         vec_raw_store(i0, i37, v63, descr=floatarraydescr) 
         f100 = vec_float_unpack(v61, 1, 1)
         f101 = vec_float_unpack(v62, 1, 1)
-        jump(p36, i629, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18)
+        jump(p36, i637, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18)
         """
         vopt = self.vectorize(self.parse_loop(ops))
         self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1090,15 +1090,15 @@
         i8 = int_ge(i5, 36)
         i6 = int_add(i1, 3)
         i11 = int_ge(i6, 36)
-        i7 = same_as(i50)
-        i14 = int_ge(i7, 36)
+        i7 = int_add(i1, 4)
+        i14 = int_ge(i50, 36)
         v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr)
         v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr)
         v19 = vec_cast_float_to_singlefloat(v17)
         v20 = vec_cast_float_to_singlefloat(v18)
         v21 = vec_float_pack(v19, v20, 2, 2)
         vec_setarrayitem_raw(p1, i1, v21, descr=singlefloatarraydescr)
-        jump(p0, p1, i7)
+        jump(p0, p1, i50)
         """
         vopt = self.vectorize(self.parse_loop(ops))
         self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1136,8 +1136,8 @@
         i207 = int_add(i0, 16)
         i196 = int_add(i4, 12)
         i197 = int_lt(i196, 100)
-        i205 = same_as(i500)
-        i206 = int_lt(i205, 100)
+        i205 = int_add(i4, 16)
+        i206 = int_lt(i500, 100)
         v228 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr)
         v229 = vec_cast_singlefloat_to_float(v228)
         v230 = vec_int_unpack(v228, 2, 2)
@@ -1152,7 +1152,7 @@
         v239 = vec_cast_float_to_singlefloat(v237)
         v240 = vec_float_pack(v238, v239, 2, 2)
         vec_raw_store(p2, i4, v240, descr=singlefloatarraydescr)
-        jump(p0, p1, p2, i207, i205)
+        jump(p0, p1, p2, i207, i500)
         """
         vopt = self.vectorize(self.parse_loop(ops))
         self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1237,6 +1237,47 @@
         opt = self.vectorize(self.parse_loop(trace))
         self.debug_print_operations(opt.loop)
 
+    def test_cast_1(self):
+        trace = """
+        [i9, i10, p2, p11, i12, i13, p4, p5, p14, i15, p8, i16, p17, i18, i19, i20, i21, i22, i23]
+        guard_early_exit() [p8, p5, p4, p2, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, i9]
+        i24 = raw_load(i20, i16, descr=singlefloatarraydescr)
+        guard_not_invalidated() [p8, p5, p4, p2, i24, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, None]
+        i27 = int_add(i16, 4)
+        i28 = raw_load(i21, i19, descr=singlefloatarraydescr)
+        i30 = int_add(i19, 4)
+        f31 = cast_singlefloat_to_float(i24)
+        f32 = cast_singlefloat_to_float(i28)
+        f33 = float_add(f31, f32)
+        i34 = cast_float_to_singlefloat(f33)
+        raw_store(i22, i13, i34, descr=singlefloatarraydescr)
+        i36 = int_add(i12, 1)
+        i38 = int_add(i13, 4)
+        i39 = int_ge(i36, i23)
+        guard_false(i39) [p8, p5, p4, p2, i27, i28, i30, i24, i38, i36, p17, None, None, None, None, p14, p11, i18, i15, None, None]
+        jump(i24, i28, p2, p11, i36, i38, p4, p5, p14, i15, p8, i27, p17, i18, i30, i20, i21, i22, i23)
+        """
+        opt = self.vectorize(self.parse_loop(trace))
+        self.debug_print_operations(opt.loop)
+
+    def test_all_guard(self):
+        trace = """
+        [p0, p3, i4, i5, i6, i7]
+        guard_early_exit() [p0, p3, i5, i4]
+        f8 = raw_load(i6, i5, descr=floatarraydescr)
+        guard_not_invalidated() [p0, f8, p3, i5, i4]
+        i9 = cast_float_to_int(f8)
+        i11 = int_and(i9, 255)
+        guard_false(i11) [p0, p3, i5, i4]
+        i13 = int_add(i4, 1)
+        i15 = int_add(i5, 8)
+        i16 = int_ge(i13, i7)
+        guard_false(i16) [p0, i13, i15, p3, None, None]
+        jump(p0, p3, i13, i15, i6, i7)
+        """
+        opt = self.vectorize(self.parse_loop(trace))
+        self.debug_print_operations(opt.loop)
+
     def test_reduction_basic(self):
         trace = """
         [p5, i6, p2, i7, p1, p8, i9, i10, f11, i12, i13, i14]
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -507,14 +507,15 @@
     """ An object wrapper around a guard. Helps to determine
         if one guard implies another
     """
-    def __init__(self, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg):
+    def __init__(self, index, op, cmp_op, lhs, lhs_arg, rhs, rhs_arg):
+        self.index = index
         self.op = op
         self.cmp_op = cmp_op
         self.lhs = lhs
         self.rhs = rhs
         self.lhs_arg = lhs_arg
         self.rhs_arg = rhs_arg
-        self.emitted = False
+        self.implied = False
         self.stronger = False
 
     def implies(self, guard, opt):
@@ -638,7 +639,7 @@
     def propagate_all_forward(self, loop):
         """ strengthens the guards that protect an integral value """
         strongest_guards = {}
-        implied_guards = {}
+        guards = {}
         # the guards are ordered. guards[i] is before guards[j] iff i < j
         operations = loop.operations
         last_guard = None
@@ -652,44 +653,43 @@
                     lhs = self.index_vars.get(lhs_arg, lhs_arg)
                     rhs_arg = cmp_op.getarg(1)
                     rhs = self.index_vars.get(rhs_arg, rhs_arg)
-                    strongest = strongest_guards.get(key, None)
-                    if not strongest:
-                        strongest_guards[key] = Guard(op, cmp_op,
-                                                      lhs, lhs_arg,
-                                                      rhs, rhs_arg)
+                    other = strongest_guards.get(key, None)
+                    if not other:
+                        guard = Guard(i, op, cmp_op,
+                                      lhs, lhs_arg,
+                                      rhs, rhs_arg)
+                        strongest_guards[key] = guard
+                        # nothing known, at this position emit the guard
+                        guards[i] = guard
                     else: # implicit index(strongest) < index(current)
-                        guard = Guard(op, cmp_op,
+                        guard = Guard(i, op, cmp_op,
                                       lhs, lhs_arg, rhs, rhs_arg)
-                        if guard.implies(strongest, self):
+                        if guard.implies(other, self):
+                            strongest_guards[key] = guard
                             guard.stronger = True
-                            strongest_guards[key] = guard
-                        elif strongest.implies(guard, self):
-                            implied_guards[op] = True
+                            guard.index = other.index
+                            guards[other.index] = guard
+                            # do not mark as emit
+                            continue
+                        elif other.implies(guard, self):
+                            guard.implied = True
+                        # mark as emit
+                        guards[i] = guard
+        strongest_guards = None
         #
         self.renamer = Renamer()
         last_op_idx = len(operations)-1
         for i,op in enumerate(operations):
             op = operations[i]
             if op.is_guard() and op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE):
-                if implied_guards.get(op, False):
-                    # this guard is implied, thus removed
+                guard = guards.get(i, None)
+                if not guard or guard.implied:
+                    # this guard is implied or marked as not emitted (= None)
                     continue
-                key = self.get_key(op, operations, i)
-                if key[0] is not None:
-                    strongest = strongest_guards.get(key, None)
-                    if not strongest or not strongest.stronger:
-                        # If the key is not None and there _must_ be a strongest
-                        # guard. If strongest is None, this operation implies the
-                        # strongest guard that has been already been emitted.
-                        self.emit_operation(op)
-                        continue
-                    elif strongest.emitted:
-                        continue
-                    strongest.emit_operations(self)
-                    strongest.emitted = True
+                if guard.stronger:
+                    guard.emit_operations(self)
                     continue
             if op.result:
-                # emit a same_as op if a box uses the same index variable
                 index_var = self.index_vars.get(op.result, None)
                 if index_var:
                     if not index_var.is_identity():
@@ -981,7 +981,7 @@
                 arg = op.getoperation().getarg(argidx)
                 new_box = vbox.clonebox()
                 resop = ResOperation(opnum,
-                                     [vbox,ConstInt(i),arg], new_box)
+                                     [vbox,arg,ConstInt(i),ConstInt(0)], new_box)
                 vbox = new_box
                 self.preamble_ops.append(resop)
         return vbox


More information about the pypy-commit mailing list