[pypy-commit] pypy vecopt-merge: costmodel now working again and ported most part of accum as well

plan_rich noreply at buildbot.pypy.org
Thu Sep 17 11:28:57 CEST 2015


Author: Richard Plangger <planrichi at gmail.com>
Branch: vecopt-merge
Changeset: r79663:b304567d9f23
Date: 2015-09-17 11:29 +0200
http://bitbucket.org/pypy/pypy/changeset/b304567d9f23/

Log:	costmodel now working again and ported most part of accum as well

diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -1,13 +1,13 @@
 from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT,
         ConstInt, ConstFloat, TargetToken)
 from rpython.jit.metainterp.resoperation import (rop, ResOperation,
-        GuardResOp, VecOperation, OpHelpers)
+        GuardResOp, VecOperation, OpHelpers, VecOperationNew)
 from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
         MemoryRef, Node, IndexVar)
 from rpython.jit.metainterp.optimizeopt.renamer import Renamer
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.jit.metainterp.jitexc import NotAProfitableLoop
-from rpython.rlib.objectmodel import specialize
+from rpython.rlib.objectmodel import specialize, always_inline
 
 
 class SchedulerState(object):
@@ -133,27 +133,52 @@
                 assert node.emitted
 
 class TypeRestrict(object):
-    ANY_TYPE = -1
+    ANY_TYPE = '\x00'
     ANY_SIZE = -1
     ANY_SIGN = -1
     ANY_COUNT = -1
     SIGNED = 1
     UNSIGNED = 0
 
-    def __init__(self, type=-1, bytesize=-1, count=-1, sign=-1):
+    def __init__(self,
+                 type=ANY_TYPE,
+                 bytesize=ANY_SIZE,
+                 count=ANY_SIGN,
+                 sign=ANY_COUNT):
         self.type = type
         self.bytesize = bytesize
         self.sign = sign
         self.count = count
 
-    def allows(self, type, count):
-        if self.type != ANY_TYPE:
-            if self.type != type.type:
-                return False
+    @always_inline
+    def any_size(self):
+        return self.bytesize == TypeRestrict.ANY_SIZE
 
-        # TODO
+    def check(self, value):
+        assert value.datatype != '\x00'
+        if self.type != TypeRestrict.ANY_TYPE:
+            if self.type != value.datatype:
+                assert 0, "type mismatch"
 
-        return True
+        assert value.bytesize > 0
+        if not self.any_size():
+            if self.bytesize != value.bytesize:
+                assert 0, "size mismatch"
+
+        assert value.count > 0
+        if self.count != TypeRestrict.ANY_COUNT:
+            if self.count != value.count:
+                assert 0, "count mismatch"
+
+        if self.sign != TypeRestrict.ANY_SIGN:
+            if bool(self.sign) != value.sign:
+                assert 0, "sign mismatch"
+
+    def max_input_count(self, count):
+        """ How many """
+        if self.count != TypeRestrict.ANY_COUNT:
+            return self.count
+        return count
 
 class trans(object):
 
@@ -205,32 +230,22 @@
 
 def turn_into_vector(state, pack):
     """ Turn a pack into a vector instruction """
-    #
-    # TODO self.check_if_pack_supported(pack)
-    op = pack.leftmost()
-    args = op.getarglist()
+    check_if_pack_supported(state, pack)
+    state.costmodel.record_pack_savings(pack, pack.numops())
+    left = pack.leftmost()
+    args = left.getarglist_copy()
     prepare_arguments(state, pack, args)
-    vop = VecOperation(op.vector, args, op, pack.numops(), op.getdescr())
+    vecop = VecOperation(left.vector, args, left,
+                         pack.numops(), left.getdescr())
+    state.oplist.append(vecop)
     for i,node in enumerate(pack.operations):
         op = node.getoperation()
-        state.setvector_of_box(op,i,vop)
-    #
+        state.setvector_of_box(op,i,vecop)
     if op.is_guard():
         assert isinstance(op, GuardResOp)
-        assert isinstance(vop, GuardResOp)
-        vop.setfailargs(op.getfailargs())
-        vop.rd_snapshot = op.rd_snapshot
-    state.costmodel.record_pack_savings(pack, pack.numops())
-    #
-    if pack.is_accumulating():
-        box = oplist[position].result
-        assert box is not None
-        for node in pack.operations:
-            op = node.getoperation()
-            assert not op.returns_void()
-            state.renamer.start_renaming(op, box)
-    #
-    state.oplist.append(vop)
+        assert isinstance(vecop, GuardResOp)
+        vecop.setfailargs(op.getfailargs())
+        vecop.rd_snapshot = op.rd_snapshot
 
 
 def prepare_arguments(state, pack, args):
@@ -238,7 +253,9 @@
     # The following cases can occur:
     # 1) argument is present in the box_to_vbox map.
     #    a) vector can be reused immediatly (simple case)
-    #    b) an operation forces the unpacking of a vector
+    #    b) the size of the input is mismatching (crop the vector)
+    #    c) values are scattered in differnt registers
+    #    d) the operand is not at the right position in the vector
     # 2) argument is not known to reside in a vector
     #    a) expand vars/consts before the label and add as argument
     #    b) expand vars created in the loop body
@@ -250,24 +267,49 @@
         if i >= len(restrictions) or restrictions[i] is None:
             # ignore this argument
             continue
+        restrict = restrictions[i]
         if arg.returns_vector():
+            restrict.check(arg)
             continue
         pos, vecop = state.getvector_of_box(arg)
         if not vecop:
             # 2) constant/variable expand this box
             expand(state, pack, args, arg, i)
+            restrict.check(args[i])
             continue
+        # 1)
+        args[i] = vecop # a)
+        assemble_scattered_values(state, pack, args, i) # c)
+        crop_vector(state, restrict, pack, args, i) # b)
+        position_values(state, restrict, pack, args, i, pos) # d)
+        restrict.check(args[i])
+
+ at always_inline
+def crop_vector(state, restrict, pack, args, i):
+    # convert size i64 -> i32, i32 -> i64, ...
+    arg = args[i]
+    newsize, size = restrict.bytesize, arg.bytesize
+    if not restrict.any_size() and newsize != size:
+        assert arg.type == 'i'
+        state._prevent_signext(newsize, size)
+        count = arg.count
+        vecop = VecOperationNew(rop.VEC_INT_SIGNEXT, [arg, ConstInt(newsize)],
+                                'i', newsize, arg.signed, count)
+        state.oplist.append(vecop)
+        state.costmodel.record_cast_int(size, newsize, count)
         args[i] = vecop
-        assemble_scattered_values(state, pack, args, i)
-        position_values(state, pack, args, i, pos)
 
+ at always_inline
 def assemble_scattered_values(state, pack, args, index):
-    vectors = pack.argument_vectors(state, pack, index)
+    args_at_index = [node.getoperation().getarg(index) for node in pack.operations]
+    args_at_index[0] = args[index]
+    vectors = pack.argument_vectors(state, pack, index, args_at_index)
     if len(vectors) > 1:
         # the argument is scattered along different vector boxes
         args[index] = gather(state, vectors, pack.numops())
         state.remember_args_in_vector(pack, index, args[index])
 
+ at always_inline
 def gather(state, vectors, count): # packed < packable and packed < stride:
     (_, arg) = vectors[0]
     i = 1
@@ -278,39 +320,32 @@
         i += 1
     return arg
 
-def position_values(state, pack, args, index, position):
+ at always_inline
+def position_values(state, restrict, pack, args, index, position):
     if position != 0:
         # The vector box is at a position != 0 but it
         # is required to be at position 0. Unpack it!
         arg = args[index]
-        args[index] = unpack_from_vector(state, arg, position, arg.count - position)
+        count = restrict.max_input_count(arg.count)
+        args[index] = unpack_from_vector(state, arg, position, count)
         state.remember_args_in_vector(pack, index, args[index])
 
-        # convert size i64 -> i32, i32 -> i64, ...
-        # TODO if self.bytesize > 0:
-        #   determine_trans(
-        #   self.input_type.getsize() != vecop.getsize():
-        #    vecop = self.extend(vecop, self.input_type)
-
-def check_if_pack_supported(self, pack):
-    op0 = pack.operations[0].getoperation()
-    if self.input_type is None:
-        # must be a load/guard op
-        return
-    insize = self.input_type.getsize()
-    if op0.is_typecast():
+def check_if_pack_supported(state, pack):
+    left = pack.leftmost()
+    insize = left.bytesize
+    if left.is_typecast():
         # prohibit the packing of signext calls that
         # cast to int16/int8.
-        _, outsize = op0.cast_to()
-        self.sched_data._prevent_signext(outsize, insize)
-    if op0.getopnum() == rop.INT_MUL:
+        state._prevent_signext(left.cast_to_bytesize(),
+                               left.cast_from_bytesize())
+    if left.getopnum() == rop.INT_MUL:
         if insize == 8 or insize == 1:
             # see assembler for comment why
             raise NotAProfitableLoop
 
 def unpack_from_vector(state, arg, index, count):
     """ Extract parts of the vector box into another vector box """
-    print "unpack i", index, "c", count, "v", arg
+    #print "unpack i", index, "c", count, "v", arg
     assert count > 0
     assert index + count <= arg.count
     args = [arg, ConstInt(index), ConstInt(count)]
@@ -702,12 +737,12 @@
             vector register.
         """
         before_count = len(packlist)
-        print "splitting pack", self
+        #print "splitting pack", self
         pack = self
         while pack.pack_load(vec_reg_size) > Pack.FULL:
             pack.clear()
             oplist, newoplist = pack.slice_operations(vec_reg_size)
-            print "  split of %dx, left: %d" % (len(oplist), len(newoplist))
+            #print "  split of %dx, left: %d" % (len(oplist), len(newoplist))
             pack.operations = oplist
             pack.update_pack_of_nodes()
             if not pack.leftmost().is_typecast():
@@ -723,7 +758,7 @@
                 newpack.clear()
                 newpack.operations = []
                 break
-        print "  => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
+        #print "  => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
         pack.update_pack_of_nodes()
 
     def slice_operations(self, vec_reg_size):
@@ -749,11 +784,10 @@
                 accum = False
         return rightmost is leftmost and accum
 
-    def argument_vectors(self, state, pack, index):
-        args = [node.getoperation().getarg(index) for node in pack.operations]
+    def argument_vectors(self, state, pack, index, pack_args_index):
         vectors = []
         last = None
-        for arg in args:
+        for arg in pack_args_index:
             pos, vecop = state.getvector_of_box(arg)
             if vecop is not last and vecop is not None:
                 vectors.append((pos, vecop))
@@ -792,23 +826,3 @@
         assert isinstance(right, Node)
         Pair.__init__(self, left, right)
         self.accum = accum
-
-#def extend(self, vbox, newtype):
-#    assert vbox.gettype() == newtype.gettype()
-#    if vbox.gettype() == INT:
-#        return self.extend_int(vbox, newtype)
-#    else:
-#        raise NotImplementedError("cannot yet extend float")
-#
-#def extend_int(self, vbox, newtype):
-#    vbox_cloned = newtype.new_vector_box(vbox.getcount())
-#    self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
-#    newsize = newtype.getsize()
-#    assert newsize > 0
-#    op = ResOperation(rop.VEC_INT_SIGNEXT, 
-#                      [vbox, ConstInt(newsize)],
-#                      vbox_cloned)
-#    self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), vbox.getcount())
-#    self.vecops.append(op)
-#    return vbox_cloned
-
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -141,7 +141,7 @@
         savings = self.savings(loop1)
         assert savings == 2
 
-    @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,0)])
+    @py.test.mark.parametrize("bytes,s", [(1,0),(2,0),(4,0),(8,0)])
     def test_sum_float_to_int(self, bytes, s):
         loop1 = self.parse_trace("""
         f10 = raw_load_f(p0, i0, descr=double)
@@ -200,5 +200,16 @@
         except NotAProfitableLoop:
             pass
 
+    def test_force_long_to_int_cast(self):
+        trace = self.parse_trace("""
+        i10 = raw_load_i(p0, i1, descr=long)
+        i11 = raw_load_i(p0, i2, descr=long)
+        f10 = cast_int_to_float(i10)
+        f11 = cast_int_to_float(i11)
+        """)
+        number = self.savings(trace)
+        assert number == 1
+
+
 class Test(CostModelBaseTest, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -23,7 +23,8 @@
 from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState,
         Scheduler, Pack, Pair, AccumPair)
 from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
-from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp, Accum)
+from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp,
+        Accum, OpHelpers, VecOperation)
 from rpython.rlib import listsort
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
@@ -643,8 +644,10 @@
 
     def record_cast_int(self, fromsize, tosize, count):
         # for each move there is 1 instruction
-        self.savings += -count
-        print "$$$ cast", -count, "now", self.savings
+        if fromsize == 8 and tosize == 4 and count == 2:
+            self.savings -= 1
+        else:
+            self.savings += -count
 
     def record_vector_pack(self, src, index, count):
         if src.datatype == FLOAT:
@@ -700,7 +703,6 @@
                 if self.profitable_pack(lnode, rnode, origin_pack, forward):
                     return Pair(lnode, rnode)
             else:
-                print "dependent"
                 if self.contains_pair(lnode, rnode):
                     return None
                 if origin_pack is not None:
@@ -787,7 +789,7 @@
             size = INT_WORD
             if left.type == 'f':
                 size = FLOAT_WORD
-            if left.bytesize == right.bytesize and left.bytesize == size:
+            if not (left.bytesize == right.bytesize and left.bytesize == size):
                 # do not support if if the type size is smaller
                 # than the cpu word size.
                 # WHY?
@@ -811,35 +813,34 @@
         for pack in self.packs:
             if not pack.is_accumulating():
                 continue
-            xxx
             accum = pack.accum
-            # create a new vector box for the parameters
-            box = pack.input_type.new_vector_box()
-            size = vec_reg_size // pack.input_type.getsize()
+            datatype = accum.getdatatype()
+            bytesize = accum.getbytesize()
+            count = vec_reg_size // bytesize
+            signed = datatype == 'i'
+            oplist = state.invariant_oplist
             # reset the box to zeros or ones
             if accum.operator == Accum.PLUS:
-                op = ResOperation(rop.VEC_BOX, [ConstInt(size)], box)
-                state.invariant_oplist.append(op)
-                result = box.clonebox()
-                op = ResOperation(rop.VEC_INT_XOR, [box, box], result)
-                state.invariant_oplist.append(op)
-                box = result
+                vecop = OpHelpers.create_vec(datatype, bytesize, signed)
+                oplist.append(vecop)
+                vecop = VecOperation(rop.VEC_INT_XOR, [vecop, vecop],
+                                     vecop, count)
+                oplist.append(vecop)
             elif accum.operator == Accum.MULTIPLY:
                 # multiply is only supported by floats
-                op = ResOperation(rop.VEC_FLOAT_EXPAND, [ConstFloat(1.0), ConstInt(size)], box)
-                state.invariant_oplist.append(op)
+                vecop = OpHelpers.create_vec_expand(ConstFloat(1.0), bytesize,
+                                                    signed, count)
+                oplist.append(vecop)
             else:
-                raise NotImplementedError("can only handle %s" % accum.operator)
-            result = box.clonebox()
-            assert isinstance(result, BoxVector)
-            result.accum = accum
+                raise NotImplementedError("cannot handle %s" % accum.operator)
             # pack the scalar value
-            op = ResOperation(getpackopnum(box.gettype()),
-                              [box, accum.var, ConstInt(0), ConstInt(1)], result)
-            state.invariant_oplist.append(op)
+            args = [vecop, accum.getseed(), ConstInt(0), ConstInt(1)]
+            vecop = OpHelpers.create_vec_pack(datatype, args, bytesize,
+                                              signed, count)
+            oplist.append(vecop)
             # rename the variable with the box
-            state.setvector_of_box(accum.getoriginalbox(), 0, result) # prevent it from expansion
-            state.renamer.start_renaming(accum.getoriginalbox(), result)
+            state.setvector_of_box(accum.getseed(), 0, vecop) # prevent it from expansion
+            state.renamer.start_renaming(accum.getseed(), vecop)
 
     def split_overloaded_packs(self):
         newpacks = []
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -637,15 +637,16 @@
         if opnum == rop.FLOAT_MUL:
             self.operator = Accum.MULTIPLY
 
-    def getoriginalbox(self):
+    def getdatatype(self):
+        return self.var.datatype
+
+    def getbytesize(self):
+        return self.var.bytesize
+
+    def getseed(self):
+        """ The variable holding the seed value """
         return self.var
 
-    def getop(self):
-        return self.operator
-
-    def accumulates_value(self):
-        return True
-
 class CastOp(object):
     _mixin_ = True
 
@@ -653,7 +654,7 @@
         return True
 
     def cast_to(self):
-        _, _, to_type, size = self.casts
+        to_type, size = self.casts[2], self.casts[3]
         if self.casts[3] == 0:
             if self.getopnum() == rop.INT_SIGNEXT:
                 from rpython.jit.metainterp.history import ConstInt


More information about the pypy-commit mailing list