[pypy-commit] pypy vecopt-merge: costmodel now working again and ported most part of accum as well
plan_rich
noreply at buildbot.pypy.org
Thu Sep 17 11:28:57 CEST 2015
Author: Richard Plangger <planrichi at gmail.com>
Branch: vecopt-merge
Changeset: r79663:b304567d9f23
Date: 2015-09-17 11:29 +0200
http://bitbucket.org/pypy/pypy/changeset/b304567d9f23/
Log: costmodel now working again and ported most part of accum as well
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -1,13 +1,13 @@
from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT,
ConstInt, ConstFloat, TargetToken)
from rpython.jit.metainterp.resoperation import (rop, ResOperation,
- GuardResOp, VecOperation, OpHelpers)
+ GuardResOp, VecOperation, OpHelpers, VecOperationNew)
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
MemoryRef, Node, IndexVar)
from rpython.jit.metainterp.optimizeopt.renamer import Renamer
from rpython.rlib.objectmodel import we_are_translated
from rpython.jit.metainterp.jitexc import NotAProfitableLoop
-from rpython.rlib.objectmodel import specialize
+from rpython.rlib.objectmodel import specialize, always_inline
class SchedulerState(object):
@@ -133,27 +133,52 @@
assert node.emitted
class TypeRestrict(object):
- ANY_TYPE = -1
+ ANY_TYPE = '\x00'
ANY_SIZE = -1
ANY_SIGN = -1
ANY_COUNT = -1
SIGNED = 1
UNSIGNED = 0
- def __init__(self, type=-1, bytesize=-1, count=-1, sign=-1):
+ def __init__(self,
+ type=ANY_TYPE,
+ bytesize=ANY_SIZE,
+ count=ANY_SIGN,
+ sign=ANY_COUNT):
self.type = type
self.bytesize = bytesize
self.sign = sign
self.count = count
- def allows(self, type, count):
- if self.type != ANY_TYPE:
- if self.type != type.type:
- return False
+ @always_inline
+ def any_size(self):
+ return self.bytesize == TypeRestrict.ANY_SIZE
- # TODO
+ def check(self, value):
+ assert value.datatype != '\x00'
+ if self.type != TypeRestrict.ANY_TYPE:
+ if self.type != value.datatype:
+ assert 0, "type mismatch"
- return True
+ assert value.bytesize > 0
+ if not self.any_size():
+ if self.bytesize != value.bytesize:
+ assert 0, "size mismatch"
+
+ assert value.count > 0
+ if self.count != TypeRestrict.ANY_COUNT:
+ if self.count != value.count:
+ assert 0, "count mismatch"
+
+ if self.sign != TypeRestrict.ANY_SIGN:
+ if bool(self.sign) != value.sign:
+ assert 0, "sign mismatch"
+
+ def max_input_count(self, count):
+ """ How many """
+ if self.count != TypeRestrict.ANY_COUNT:
+ return self.count
+ return count
class trans(object):
@@ -205,32 +230,22 @@
def turn_into_vector(state, pack):
""" Turn a pack into a vector instruction """
- #
- # TODO self.check_if_pack_supported(pack)
- op = pack.leftmost()
- args = op.getarglist()
+ check_if_pack_supported(state, pack)
+ state.costmodel.record_pack_savings(pack, pack.numops())
+ left = pack.leftmost()
+ args = left.getarglist_copy()
prepare_arguments(state, pack, args)
- vop = VecOperation(op.vector, args, op, pack.numops(), op.getdescr())
+ vecop = VecOperation(left.vector, args, left,
+ pack.numops(), left.getdescr())
+ state.oplist.append(vecop)
for i,node in enumerate(pack.operations):
op = node.getoperation()
- state.setvector_of_box(op,i,vop)
- #
+ state.setvector_of_box(op,i,vecop)
if op.is_guard():
assert isinstance(op, GuardResOp)
- assert isinstance(vop, GuardResOp)
- vop.setfailargs(op.getfailargs())
- vop.rd_snapshot = op.rd_snapshot
- state.costmodel.record_pack_savings(pack, pack.numops())
- #
- if pack.is_accumulating():
- box = oplist[position].result
- assert box is not None
- for node in pack.operations:
- op = node.getoperation()
- assert not op.returns_void()
- state.renamer.start_renaming(op, box)
- #
- state.oplist.append(vop)
+ assert isinstance(vecop, GuardResOp)
+ vecop.setfailargs(op.getfailargs())
+ vecop.rd_snapshot = op.rd_snapshot
def prepare_arguments(state, pack, args):
@@ -238,7 +253,9 @@
# The following cases can occur:
# 1) argument is present in the box_to_vbox map.
# a) vector can be reused immediatly (simple case)
- # b) an operation forces the unpacking of a vector
+ # b) the size of the input is mismatching (crop the vector)
+ # c) values are scattered in differnt registers
+ # d) the operand is not at the right position in the vector
# 2) argument is not known to reside in a vector
# a) expand vars/consts before the label and add as argument
# b) expand vars created in the loop body
@@ -250,24 +267,49 @@
if i >= len(restrictions) or restrictions[i] is None:
# ignore this argument
continue
+ restrict = restrictions[i]
if arg.returns_vector():
+ restrict.check(arg)
continue
pos, vecop = state.getvector_of_box(arg)
if not vecop:
# 2) constant/variable expand this box
expand(state, pack, args, arg, i)
+ restrict.check(args[i])
continue
+ # 1)
+ args[i] = vecop # a)
+ assemble_scattered_values(state, pack, args, i) # c)
+ crop_vector(state, restrict, pack, args, i) # b)
+ position_values(state, restrict, pack, args, i, pos) # d)
+ restrict.check(args[i])
+
+ at always_inline
+def crop_vector(state, restrict, pack, args, i):
+ # convert size i64 -> i32, i32 -> i64, ...
+ arg = args[i]
+ newsize, size = restrict.bytesize, arg.bytesize
+ if not restrict.any_size() and newsize != size:
+ assert arg.type == 'i'
+ state._prevent_signext(newsize, size)
+ count = arg.count
+ vecop = VecOperationNew(rop.VEC_INT_SIGNEXT, [arg, ConstInt(newsize)],
+ 'i', newsize, arg.signed, count)
+ state.oplist.append(vecop)
+ state.costmodel.record_cast_int(size, newsize, count)
args[i] = vecop
- assemble_scattered_values(state, pack, args, i)
- position_values(state, pack, args, i, pos)
+ at always_inline
def assemble_scattered_values(state, pack, args, index):
- vectors = pack.argument_vectors(state, pack, index)
+ args_at_index = [node.getoperation().getarg(index) for node in pack.operations]
+ args_at_index[0] = args[index]
+ vectors = pack.argument_vectors(state, pack, index, args_at_index)
if len(vectors) > 1:
# the argument is scattered along different vector boxes
args[index] = gather(state, vectors, pack.numops())
state.remember_args_in_vector(pack, index, args[index])
+ at always_inline
def gather(state, vectors, count): # packed < packable and packed < stride:
(_, arg) = vectors[0]
i = 1
@@ -278,39 +320,32 @@
i += 1
return arg
-def position_values(state, pack, args, index, position):
+ at always_inline
+def position_values(state, restrict, pack, args, index, position):
if position != 0:
# The vector box is at a position != 0 but it
# is required to be at position 0. Unpack it!
arg = args[index]
- args[index] = unpack_from_vector(state, arg, position, arg.count - position)
+ count = restrict.max_input_count(arg.count)
+ args[index] = unpack_from_vector(state, arg, position, count)
state.remember_args_in_vector(pack, index, args[index])
- # convert size i64 -> i32, i32 -> i64, ...
- # TODO if self.bytesize > 0:
- # determine_trans(
- # self.input_type.getsize() != vecop.getsize():
- # vecop = self.extend(vecop, self.input_type)
-
-def check_if_pack_supported(self, pack):
- op0 = pack.operations[0].getoperation()
- if self.input_type is None:
- # must be a load/guard op
- return
- insize = self.input_type.getsize()
- if op0.is_typecast():
+def check_if_pack_supported(state, pack):
+ left = pack.leftmost()
+ insize = left.bytesize
+ if left.is_typecast():
# prohibit the packing of signext calls that
# cast to int16/int8.
- _, outsize = op0.cast_to()
- self.sched_data._prevent_signext(outsize, insize)
- if op0.getopnum() == rop.INT_MUL:
+ state._prevent_signext(left.cast_to_bytesize(),
+ left.cast_from_bytesize())
+ if left.getopnum() == rop.INT_MUL:
if insize == 8 or insize == 1:
# see assembler for comment why
raise NotAProfitableLoop
def unpack_from_vector(state, arg, index, count):
""" Extract parts of the vector box into another vector box """
- print "unpack i", index, "c", count, "v", arg
+ #print "unpack i", index, "c", count, "v", arg
assert count > 0
assert index + count <= arg.count
args = [arg, ConstInt(index), ConstInt(count)]
@@ -702,12 +737,12 @@
vector register.
"""
before_count = len(packlist)
- print "splitting pack", self
+ #print "splitting pack", self
pack = self
while pack.pack_load(vec_reg_size) > Pack.FULL:
pack.clear()
oplist, newoplist = pack.slice_operations(vec_reg_size)
- print " split of %dx, left: %d" % (len(oplist), len(newoplist))
+ #print " split of %dx, left: %d" % (len(oplist), len(newoplist))
pack.operations = oplist
pack.update_pack_of_nodes()
if not pack.leftmost().is_typecast():
@@ -723,7 +758,7 @@
newpack.clear()
newpack.operations = []
break
- print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
+ #print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
pack.update_pack_of_nodes()
def slice_operations(self, vec_reg_size):
@@ -749,11 +784,10 @@
accum = False
return rightmost is leftmost and accum
- def argument_vectors(self, state, pack, index):
- args = [node.getoperation().getarg(index) for node in pack.operations]
+ def argument_vectors(self, state, pack, index, pack_args_index):
vectors = []
last = None
- for arg in args:
+ for arg in pack_args_index:
pos, vecop = state.getvector_of_box(arg)
if vecop is not last and vecop is not None:
vectors.append((pos, vecop))
@@ -792,23 +826,3 @@
assert isinstance(right, Node)
Pair.__init__(self, left, right)
self.accum = accum
-
-#def extend(self, vbox, newtype):
-# assert vbox.gettype() == newtype.gettype()
-# if vbox.gettype() == INT:
-# return self.extend_int(vbox, newtype)
-# else:
-# raise NotImplementedError("cannot yet extend float")
-#
-#def extend_int(self, vbox, newtype):
-# vbox_cloned = newtype.new_vector_box(vbox.getcount())
-# self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
-# newsize = newtype.getsize()
-# assert newsize > 0
-# op = ResOperation(rop.VEC_INT_SIGNEXT,
-# [vbox, ConstInt(newsize)],
-# vbox_cloned)
-# self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(), vbox.getcount())
-# self.vecops.append(op)
-# return vbox_cloned
-
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -141,7 +141,7 @@
savings = self.savings(loop1)
assert savings == 2
- @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,0)])
+ @py.test.mark.parametrize("bytes,s", [(1,0),(2,0),(4,0),(8,0)])
def test_sum_float_to_int(self, bytes, s):
loop1 = self.parse_trace("""
f10 = raw_load_f(p0, i0, descr=double)
@@ -200,5 +200,16 @@
except NotAProfitableLoop:
pass
+ def test_force_long_to_int_cast(self):
+ trace = self.parse_trace("""
+ i10 = raw_load_i(p0, i1, descr=long)
+ i11 = raw_load_i(p0, i2, descr=long)
+ f10 = cast_int_to_float(i10)
+ f11 = cast_int_to_float(i11)
+ """)
+ number = self.savings(trace)
+ assert number == 1
+
+
class Test(CostModelBaseTest, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -23,7 +23,8 @@
from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState,
Scheduler, Pack, Pair, AccumPair)
from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
-from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp, Accum)
+from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp,
+ Accum, OpHelpers, VecOperation)
from rpython.rlib import listsort
from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.debug import debug_print, debug_start, debug_stop
@@ -643,8 +644,10 @@
def record_cast_int(self, fromsize, tosize, count):
# for each move there is 1 instruction
- self.savings += -count
- print "$$$ cast", -count, "now", self.savings
+ if fromsize == 8 and tosize == 4 and count == 2:
+ self.savings -= 1
+ else:
+ self.savings += -count
def record_vector_pack(self, src, index, count):
if src.datatype == FLOAT:
@@ -700,7 +703,6 @@
if self.profitable_pack(lnode, rnode, origin_pack, forward):
return Pair(lnode, rnode)
else:
- print "dependent"
if self.contains_pair(lnode, rnode):
return None
if origin_pack is not None:
@@ -787,7 +789,7 @@
size = INT_WORD
if left.type == 'f':
size = FLOAT_WORD
- if left.bytesize == right.bytesize and left.bytesize == size:
+ if not (left.bytesize == right.bytesize and left.bytesize == size):
# do not support if if the type size is smaller
# than the cpu word size.
# WHY?
@@ -811,35 +813,34 @@
for pack in self.packs:
if not pack.is_accumulating():
continue
- xxx
accum = pack.accum
- # create a new vector box for the parameters
- box = pack.input_type.new_vector_box()
- size = vec_reg_size // pack.input_type.getsize()
+ datatype = accum.getdatatype()
+ bytesize = accum.getbytesize()
+ count = vec_reg_size // bytesize
+ signed = datatype == 'i'
+ oplist = state.invariant_oplist
# reset the box to zeros or ones
if accum.operator == Accum.PLUS:
- op = ResOperation(rop.VEC_BOX, [ConstInt(size)], box)
- state.invariant_oplist.append(op)
- result = box.clonebox()
- op = ResOperation(rop.VEC_INT_XOR, [box, box], result)
- state.invariant_oplist.append(op)
- box = result
+ vecop = OpHelpers.create_vec(datatype, bytesize, signed)
+ oplist.append(vecop)
+ vecop = VecOperation(rop.VEC_INT_XOR, [vecop, vecop],
+ vecop, count)
+ oplist.append(vecop)
elif accum.operator == Accum.MULTIPLY:
# multiply is only supported by floats
- op = ResOperation(rop.VEC_FLOAT_EXPAND, [ConstFloat(1.0), ConstInt(size)], box)
- state.invariant_oplist.append(op)
+ vecop = OpHelpers.create_vec_expand(ConstFloat(1.0), bytesize,
+ signed, count)
+ oplist.append(vecop)
else:
- raise NotImplementedError("can only handle %s" % accum.operator)
- result = box.clonebox()
- assert isinstance(result, BoxVector)
- result.accum = accum
+ raise NotImplementedError("cannot handle %s" % accum.operator)
# pack the scalar value
- op = ResOperation(getpackopnum(box.gettype()),
- [box, accum.var, ConstInt(0), ConstInt(1)], result)
- state.invariant_oplist.append(op)
+ args = [vecop, accum.getseed(), ConstInt(0), ConstInt(1)]
+ vecop = OpHelpers.create_vec_pack(datatype, args, bytesize,
+ signed, count)
+ oplist.append(vecop)
# rename the variable with the box
- state.setvector_of_box(accum.getoriginalbox(), 0, result) # prevent it from expansion
- state.renamer.start_renaming(accum.getoriginalbox(), result)
+ state.setvector_of_box(accum.getseed(), 0, vecop) # prevent it from expansion
+ state.renamer.start_renaming(accum.getseed(), vecop)
def split_overloaded_packs(self):
newpacks = []
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -637,15 +637,16 @@
if opnum == rop.FLOAT_MUL:
self.operator = Accum.MULTIPLY
- def getoriginalbox(self):
+ def getdatatype(self):
+ return self.var.datatype
+
+ def getbytesize(self):
+ return self.var.bytesize
+
+ def getseed(self):
+ """ The variable holding the seed value """
return self.var
- def getop(self):
- return self.operator
-
- def accumulates_value(self):
- return True
-
class CastOp(object):
_mixin_ = True
@@ -653,7 +654,7 @@
return True
def cast_to(self):
- _, _, to_type, size = self.casts
+ to_type, size = self.casts[2], self.casts[3]
if self.casts[3] == 0:
if self.getopnum() == rop.INT_SIGNEXT:
from rpython.jit.metainterp.history import ConstInt
More information about the pypy-commit
mailing list