[pypy-commit] pypy vecopt: rewritten splitting of packs, added asserts to ensure the impl assumptions are correct. some tests broke (it is not yet finished)
plan_rich
noreply at buildbot.pypy.org
Fri Jun 26 12:29:07 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r78321:2e935c4aa59d
Date: 2015-06-26 12:29 +0200
http://bitbucket.org/pypy/pypy/changeset/2e935c4aa59d/
Log: rewritten splitting of packs, added asserts to ensure the impl
assumptions are correct. some tests broke (it is not yet finished)
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -264,23 +264,8 @@
#
self.check_if_pack_supported(pack)
#
- off = 0
- stride = self.split_pack(pack, self.sched_data.vec_reg_size)
- left = len(pack.operations)
- assert stride > 0
- while off < len(pack.operations):
- print left, "<", stride
- if stride == 1:
- op = pack.operations[off].getoperation()
- self.preamble_ops.append(op)
- off += 1
- continue
- ops = pack.operations[off:off+stride]
- self.pack = Pack(ops, pack.input_type, pack.output_type)
- self.costmodel.record_pack_savings(self.pack)
- self.transform_pack(ops, off, stride)
- off += stride
- left -= stride
+ self.pack = pack
+ self.transform_pack()
self.pack = None
self.costmodel = None
@@ -305,35 +290,52 @@
def before_argument_transform(self, args):
pass
- def transform_pack(self, ops, off, stride):
- op = self.pack.operations[0].getoperation()
- args = op.getarglist()
- #
- self.before_argument_transform(args)
- #
- for i,arg in enumerate(args):
- if isinstance(arg, BoxVector):
- continue
- if self.is_vector_arg(i):
- args[i] = self.transform_argument(args[i], i, off, stride)
- #
- result = op.result
- result = self.transform_result(result, off)
- #
- vop = ResOperation(op.vector, args, result, op.getdescr())
- if op.is_guard():
- assert isinstance(op, GuardResOp)
- vop.setfailargs(op.getfailargs())
- vop.rd_snapshot = op.rd_snapshot
- self.preamble_ops.append(vop)
+ def transform_pack(self):
+ self.off = 0
+ while self.off < self.pack.opcount():
+ op = self.pack.operations[self.off].getoperation()
+ args = op.getarglist()
+ #
+ self.before_argument_transform(args)
+ #
+ argument_infos = []
+ self.transform_arguments(args, argument_infos)
+ #
+ result = op.result
+ result = self.transform_result(result)
+ #
+ vop = ResOperation(op.vector, args, result, op.getdescr())
+ if op.is_guard():
+ assert isinstance(op, GuardResOp)
+ vop.setfailargs(op.getfailargs())
+ vop.rd_snapshot = op.rd_snapshot
+ self.preamble_ops.append(vop)
+ stride = self.consumed_operations(argument_infos, result)
+ self.costmodel.record_pack_savings(self.pack, stride)
+ assert stride != 0
+ self.off += stride
- def transform_result(self, result, off):
+ def consumed_operations(self, argument_infos, result):
+ ops = self.getoperations()
+ if len(argument_infos) == 0:
+ return result.getcount()
+ if len(argument_infos) == 1:
+ return argument_infos[0]
+ if not we_are_translated():
+ first = argument_infos[0]
+ for ai in argument_infos:
+ assert first == ai
+ return argument_infos[0]
+
+ def transform_result(self, result):
if result is None:
return None
vbox = self.new_result_vector_box()
#
# mark the position and the vbox in the hash
- for i, node in enumerate(self.pack.operations):
+ for i, node in enumerate(self.getoperations()):
+ if i >= vbox.item_count:
+ break
op = node.getoperation()
self.sched_data.setvector_of_box(op.result, i, vbox)
return vbox
@@ -345,56 +347,99 @@
signed = self.output_type.signed
return BoxVector(type, count, size, signed)
- def transform_argument(self, arg, argidx, off, stride):
- ops = self.pack.operations
- box_pos, vbox = self.sched_data.getvector_of_box(arg)
- if not vbox:
- # constant/variable expand this box
- vbox = self.expand(ops, arg, argidx)
- box_pos = 0
- # convert size i64 -> i32, i32 -> i64, ...
- if self.input_type.getsize() > 0 and \
- self.input_type.getsize() != vbox.getsize():
- vbox = self.extend(vbox, self.input_type)
+ def getoperations(self):
+ return self.pack.operations[self.off:]
- # use the input as an indicator for the pack type
- packable = self.input_type.getcount()
- packed = vbox.item_count
- assert packed >= 0
- assert packable >= 0
- vboxes = self.vector_boxes_for_args(argidx)
- if len(vboxes) > 1: # packed < packable and packed < stride:
- # the argument is scattered along different vector boxes
- args = [op.getoperation().getarg(argidx) for op in ops]
- vbox = self._pack(vbox, packed, args, packable)
- self.update_input_output(self.pack)
- box_pos = 0
- elif packed > packable:
- # box_pos == 0 then it is already at the right place
- # the argument has more items than the operation is able to process!
- args = [op.getoperation().getarg(argidx) for op in ops]
- vbox = self.unpack(vbox, args, off, packable, self.input_type)
- self.update_input_output(self.pack)
- box_pos = 0
- elif off != 0 and box_pos != 0:
- import py; py.test.set_trace()
- # The original box is at a position != 0 but it
- # is required to be at position 0. Unpack it!
- args = [op.getoperation().getarg(argidx) for op in ops]
- vbox = self.unpack(vbox, args, off, len(ops), self.input_type)
- self.update_input_output(self.pack)
- #
- assert vbox is not None
- return vbox
+ def transform_arguments(self, args, argument_info):
+ for i,arg in enumerate(args):
+ if isinstance(arg, BoxVector):
+ continue
+ if not self.is_vector_arg(i):
+ continue
+ box_pos, vbox = self.sched_data.getvector_of_box(arg)
+ if not vbox:
+ # constant/variable expand this box
+ vbox = self.expand(arg, i)
+ self.sched_data.setvector_of_box(arg, 0, vbox)
+ box_pos = 0
+ # convert size i64 -> i32, i32 -> i64, ...
+ if self.input_type.getsize() > 0 and \
+ self.input_type.getsize() != vbox.getsize():
+ vbox = self.extend(vbox, self.input_type)
+
+ # use the input as an indicator for the pack type
+ packable = self.input_type.getcount()
+ packed = vbox.item_count
+ assert packed >= 0
+ assert packable >= 0
+ if packed > packable:
+ # the argument has more items than the operation is able to process!
+ # box_pos == 0 then it is already at the right place
+ argument_info.append(packable)
+ if box_pos != 0:
+ args[i] = self.unpack(vbox, self.off, packable, self.input_type)
+ self.update_arg_in_vector_pos(i, args[i])
+ #self.update_input_output(self.pack)
+ continue
+ else:
+ assert vbox is not None
+ args[i] = vbox
+ continue
+ vboxes = self.vector_boxes_for_args(i)
+ if packed < packable and len(vboxes) > 1:
+ # the argument is scattered along different vector boxes
+ args[i] = self.gather(vboxes, packable)
+ self.update_arg_in_vector_pos(i, args[i])
+ argument_info.append(args[i].item_count)
+ continue
+ if box_pos != 0:
+ # The vector box is at a position != 0 but it
+ # is required to be at position 0. Unpack it!
+ args[i] = self.unpack(vbox, self.off, packable, self.input_type)
+ self.update_arg_in_vector_pos(i, args[i])
+ argument_info.append(args[i].item_count)
+ continue
+ #self.update_input_output(self.pack)
+ #
+ assert vbox is not None
+ args[i] = vbox
+ argument_info.append(args[i].item_count)
+
+ def gather(self, vboxes, target_count): # packed < packable and packed < stride:
+ i = 0
+ (_, box) = vboxes[0]
+ while i < len(vboxes):
+ if i+1 >= len(vboxes):
+ break
+ (box2_pos, box2) = vboxes[i+1]
+ if box.getcount() + box2.getcount() <= target_count:
+ box = self.package(box, box.getcount(),
+ box2, box2_pos, box2.getcount())
+ i += 2
+ return box
+ pass
+ # OLD
+ #args = [op.getoperation().getarg(argidx) for op in ops]
+ #vbox = self._pack(vbox, packed, args, packable)
+ #self.update_input_output(self.pack)
+ #box_pos = 0
+
+ def update_arg_in_vector_pos(self, argidx, box):
+ arguments = [op.getoperation().getarg(argidx) for op in self.getoperations()]
+ for i,arg in enumerate(arguments):
+ if i >= box.item_count:
+ break
+ self.sched_data.setvector_of_box(arg, i, box)
def vector_boxes_for_args(self, index):
- args = [op.getoperation().getarg(index) for op in self.pack.operations]
+ args = [op.getoperation().getarg(index) for op in self.getoperations()]
vboxes = []
last_vbox = None
for arg in args:
pos, vbox = self.sched_data.getvector_of_box(arg)
- if vbox != last_vbox and vbox is not None:
- vboxes.append(vbox)
+ if vbox is not last_vbox and vbox is not None:
+ vboxes.append((pos, vbox))
+ last_vbox = vbox
return vboxes
@@ -415,22 +460,37 @@
self.preamble_ops.append(op)
return vbox_cloned
- def unpack(self, vbox, args, index, count, arg_ptype):
+ def unpack(self, vbox, index, count, arg_ptype):
+ assert index < vbox.item_count
+ assert index + count <= vbox.item_count
vbox_cloned = vectorbox_clone_set(vbox, count=count)
opnum = getunpackopnum(vbox.item_type)
op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)], vbox_cloned)
self.costmodel.record_vector_unpack(vbox, index, count)
self.preamble_ops.append(op)
#
- for i,arg in enumerate(args):
- self.sched_data.setvector_of_box(arg, i, vbox_cloned)
- #
return vbox_cloned
- def _pack(self, tgt_box, index, args, packable):
+ def package(self, tgt, tidx, src, sidx, scount):
+ """ tgt = [1,2,3,4,_,_,_,_]
+ src = [5,6,_,_]
+ new_box = [1,2,3,4,5,6,_,_] after the operation, tidx=4, scount=2
+ """
+ assert sidx == 0 # restriction
+ count = tgt.item_count + src.item_count
+ new_box = vectorbox_clone_set(tgt, count=count)
+ opnum = getpackopnum(tgt.item_type)
+ op = ResOperation(opnum, [tgt, src, ConstInt(tidx), ConstInt(scount)], new_box)
+ self.preamble_ops.append(op)
+ self.costmodel.record_vector_pack(src, sidx, scount)
+ if not we_are_translated():
+ self._check_vec_pack(op)
+ return new_box
+
+ def package2(self, tgt_box, index, args, packable):
""" If there are two vector boxes:
- v1 = [<empty>,<emtpy>,X,Y]
- v2 = [A,B,<empty>,<empty>]
+ v1 = [_,_,X,Y]
+ v2 = [A,B,_,_]
this function creates a box pack instruction to merge them to:
v1/2 = [A,B,X,Y]
"""
@@ -482,8 +542,9 @@
assert index.value + count.value <= result.item_count
assert result.item_count > arg0.item_count
- def expand(self, nodes, arg, argidx):
- vbox = self.input_type.new_vector_box(len(nodes))
+ def expand(self, arg, argidx):
+ elem_count = self.input_type.getcount()
+ vbox = self.input_type.new_vector_box(elem_count)
box_type = arg.type
expanded_map = self.sched_data.expanded_map
invariant_ops = self.sched_data.invariant_oplist
@@ -496,7 +557,7 @@
if already_expanded:
return already_expanded
- for i, node in enumerate(nodes):
+ for i, node in enumerate(self.getoperations()):
op = node.getoperation()
if not arg.same_box(op.getarg(argidx)):
break
@@ -509,10 +570,10 @@
expanded_map[arg] = vbox
return vbox
- op = ResOperation(rop.VEC_BOX, [ConstInt(len(nodes))], vbox)
+ op = ResOperation(rop.VEC_BOX, [ConstInt(elem_count)], vbox)
invariant_ops.append(op)
opnum = getpackopnum(arg.type)
- for i,node in enumerate(nodes):
+ for i,node in enumerate(self.getoperations()):
op = node.getoperation()
arg = op.getarg(argidx)
new_box = vbox.clonebox()
@@ -737,6 +798,7 @@
return self.box_to_vbox.get(arg, (-1, None))
def setvector_of_box(self, box, off, vector):
+ assert off < vector.item_count
self.box_to_vbox[box] = (off, vector)
def prepend_invariant_operations(self, oplist):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -530,7 +530,7 @@
def record_cast_int(self, op):
raise NotImplementedError
- def record_pack_savings(self, pack):
+ def record_pack_savings(self, pack, times):
raise NotImplementedError
def record_vector_pack(self, box, index, count):
@@ -550,8 +550,7 @@
class X86_CostModel(CostModel):
- def record_pack_savings(self, pack):
- times = pack.opcount()
+ def record_pack_savings(self, pack, times):
cost, benefit_factor = (1,1)
node = pack.operations[0]
op = node.getoperation()
More information about the pypy-commit
mailing list