[pypy-commit] pypy vecopt: distinct between input/output argument in vector type conversion
plan_rich
noreply at buildbot.pypy.org
Mon Jun 1 09:14:31 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77736:cb7dddccc7f0
Date: 2015-06-01 09:14 +0200
http://bitbucket.org/pypy/pypy/changeset/cb7dddccc7f0/
Log: distinct between input/output argument in vector type conversion
call2 uses a list to track iterator and their states
diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -15,7 +15,7 @@
call2_driver = jit.JitDriver(
name='numpy_call2',
- greens=['shapelen', 'func', 'left_advance', 'right_advance', 'calc_dtype', 'res_dtype' ],
+ greens=['shapelen', 'func', 'left_iter_index', 'right_iter_index', 'calc_dtype', 'res_dtype' ],
reds='auto', vectorize=True)
def call2(space, shape, func, calc_dtype, res_dtype, w_lhs, w_rhs, out):
@@ -43,9 +43,12 @@
# TODO handle __array_priorities__ and maybe flip the order
+ left_iter_index = 1
+ right_iter_index = 2
if w_lhs.get_size() == 1:
w_left = w_lhs.get_scalar_value().convert_to(space, calc_dtype)
left_iter = left_state = None
+ left_iter_index = -1
else:
w_left = None
left_iter, left_state = w_lhs.create_iter(shape)
@@ -54,6 +57,7 @@
if w_rhs.get_size() == 1:
w_right = w_rhs.get_scalar_value().convert_to(space, calc_dtype)
right_iter = right_state = None
+ right_iter_index = -1
else:
w_right = None
right_iter, right_state = w_rhs.create_iter(shape)
@@ -63,34 +67,34 @@
w_instance=lhs_for_subtype)
out_iter, out_state = out.create_iter(shape)
- left_advance = True
- right_advance = True
- if left_iter and left_iter.matches_range(out_iter):
- left_advance = False
- left_state = out_state
- if right_iter and right_iter.matches_range(out_iter):
- right_advance = False
- right_state = out_state
+ iter_list = [out_iter, left_iter, right_iter]
+ state_list = [out_state, left_state, right_state]
+
+ if left_iter_index > 0 and left_iter.matches_range(out_iter):
+ left_iter_index = 0
+ if right_iter_index > 0 and right_iter.matches_range(out_iter):
+ right_iter_index = 0
shapelen = len(shape)
while not out_iter.done(out_state):
- call2_driver.jit_merge_point(shapelen=shapelen, left_advance=left_advance, right_advance=right_advance,
+ call2_driver.jit_merge_point(shapelen=shapelen, left_iter_index=left_iter_index,
+ right_iter_index=right_iter_index,
func=func, calc_dtype=calc_dtype, res_dtype=res_dtype)
- if left_iter:
- w_left = left_iter.getitem(left_state).convert_to(space, calc_dtype)
- if left_advance:
- left_state = left_iter.next(left_state)
- if right_iter:
- w_right = right_iter.getitem(right_state).convert_to(space, calc_dtype)
- if right_advance:
- right_state = right_iter.next(right_state)
+ if left_iter_index > 0:
+ iter = iter_list[left_iter_index]
+ state = state_list[left_iter_index]
+ w_left = iter.getitem(state).convert_to(space, calc_dtype)
+ if left_iter_index == 1:
+ state_list[left_iter_index] = iter.next(state)
+ if right_iter_index > 0:
+ iter = iter_list[right_iter_index]
+ state = state_list[right_iter_index]
+ w_right = iter.getitem(state).convert_to(space, calc_dtype)
+ if right_iter_index == 2:
+ state_list[right_iter_index] = iter.next(state)
out_iter.setitem(out_state, func(calc_dtype, w_left, w_right).convert_to(
space, res_dtype))
- out_state = out_iter.next(out_state)
- if not left_advance:
- left_state = out_state
- if not right_advance:
- right_state = out_state
+ state_list[0] = out_state = out_iter.next(out_state)
return out
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -15,6 +15,7 @@
ns = {
'double': self.floatarraydescr,
'float': self.singlefloatarraydescr,
+ 'long': self.intarraydescr,
}
loop = opparse(" [p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,f0,f1,f2,f3,f4,f5]\n" + source + \
"\n jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,f0,f1,f2,f3,f4,f5)",
@@ -24,7 +25,7 @@
return loop
def pack(self, loop, l, r):
- return [Node(op,i) for i,op in enumerate(loop.operations[l:r])]
+ return [Node(op,l+i) for i,op in enumerate(loop.operations[l:r])]
def schedule(self, loop_orig, packs, vec_reg_size=16):
loop = get_model(False).ExtendedTreeLoop("loop")
@@ -35,7 +36,7 @@
vsd = VecScheduleData(vec_reg_size)
for pack in packs:
if len(pack) == 1:
- ops.append(pack[0])
+ ops.append(pack[0].getoperation())
else:
for op in vsd.as_vector_operation(Pack(pack)):
ops.append(op)
@@ -58,22 +59,42 @@
loop2 = self.schedule(loop1, [pack1])
loop3 = self.parse("""
v1[i32#4] = vec_raw_load(p0, i0, 4, descr=float)
- i14 = vec_raw_load(p0, i4, descr=float)
- i15 = vec_raw_load(p0, i5, descr=float)
+ i14 = raw_load(p0, i4, descr=float)
+ i15 = raw_load(p0, i5, descr=float)
+ """)
+ self.assert_equal(loop2, loop3)
+
+ def test_int_to_float(self):
+ loop1 = self.parse("""
+ i10 = raw_load(p0, i0, descr=long)
+ i11 = raw_load(p0, i1, descr=long)
+ f10 = cast_int_to_float(i10)
+ f11 = cast_int_to_float(i11)
+ """)
+ pack1 = self.pack(loop1, 0, 2)
+ pack2 = self.pack(loop1, 2, 4)
+ print pack1
+ print pack2
+ loop2 = self.schedule(loop1, [pack1, pack2])
+ loop3 = self.parse("""
+ v1[i64#2] = vec_raw_load(p0, i0, 2, descr=long)
+ v2[i32#2] = vec_int_signext(v1[i64#2], 4)
+ v3[f64#2] = vec_cast_int_to_float(v2[i32#2])
""")
self.assert_equal(loop2, loop3)
def test_cost_model_reject_only_load_vectorizable(self):
loop1 = self.parse("""
- f10 = raw_load(p0, i0, descr=double)
- f11 = raw_load(p0, i1, descr=double)
- i1 = int_add(1,1)
- guard_true(i1) [f10]
+ f10 = raw_load(p0, i0, descr=long)
+ f11 = raw_load(p0, i1, descr=long)
+ guard_true(i0) [f10]
guard_true(i1) [f11]
""")
try:
- pack1 = self.pack(loop1, 0, 6)
- loop2 = self.schedule(loop1, [pack1])
+ pack1 = self.pack(loop1, 0, 2)
+ pack2 = self.pack(loop1, 2, 3)
+ pack3 = self.pack(loop1, 3, 4)
+ loop2 = self.schedule(loop1, [pack1, pack2, pack3])
py.test.fail("this loops should have bailed out")
except NotAProfitableLoop:
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -789,44 +789,63 @@
self.preamble_ops = None
self.sched_data = None
self.pack = None
+ self.input_type = None
+ self.output_type = None
def is_vector_arg(self, i):
if i < 0 or i >= len(self.arg_ptypes):
return False
return self.arg_ptypes[i] is not None
- def pack_ptype(self, op):
+ def getsplitsize(self):
+ return self.input_type.getsize()
+
+ def determine_input_type(self, op):
_, vbox = self.sched_data.getvector_of_box(op.getarg(0))
if vbox:
return PackType.of(vbox)
else:
raise RuntimeError("fatal: box %s is not in a vector box" % (op.getarg(0),))
+ def determine_output_type(self, op):
+ return self.determine_input_type(op)
+
def as_vector_operation(self, pack, sched_data, oplist):
self.sched_data = sched_data
self.preamble_ops = oplist
op0 = pack.operations[0].getoperation()
- self.ptype = self.pack_ptype(op0)
+ self.input_type = self.determine_input_type(op0)
+ self.output_type = self.determine_output_type(op0)
off = 0
stride = self.split_pack(pack)
+ left = len(pack.operations)
assert stride > 0
while off < len(pack.operations):
+ if left < stride:
+ self.preamble_ops.append(pack.operations[off].getoperation())
+ off += 1
+ continue
ops = pack.operations[off:off+stride]
self.pack = Pack(ops)
self.transform_pack(ops, off, stride)
off += stride
+ left -= stride
self.pack = None
self.preamble_ops = None
self.sched_data = None
- self.ptype = None
+ self.input_type = None
+ self.output_type = None
def split_pack(self, pack):
pack_count = len(pack.operations)
vec_reg_size = self.sched_data.vec_reg_size
- if pack_count * self.ptype.getsize() > vec_reg_size:
- return vec_reg_size // self.ptype.getsize()
+ bytes = pack_count * self.getsplitsize()
+ if bytes > vec_reg_size:
+ return vec_reg_size // self.getsplitsize()
+ if bytes < vec_reg_size:
+ return 1
return pack_count
def before_argument_transform(self, args):
@@ -838,11 +857,11 @@
#
self.before_argument_transform(args)
#
- result = op.result
for i,arg in enumerate(args):
if self.is_vector_arg(i):
args[i] = self.transform_argument(args[i], i, off)
#
+ result = op.result
result = self.transform_result(result, off)
#
vop = ResOperation(op.vector, args, result, op.getdescr())
@@ -860,31 +879,23 @@
return vbox
def new_result_vector_box(self):
- size = self.ptype.getsize()
- count = min(self.ptype.getcount(), len(self.pack.operations))
- return BoxVector(self.ptype.gettype(), count, size, self.ptype.signed)
+ type = self.output_type.gettype()
+ size = self.output_type.getsize()
+ count = min(self.output_type.getcount(), len(self.pack.operations))
+ signed = self.output_type.signed
+ return BoxVector(type, count, size, signed)
def transform_argument(self, arg, argidx, off):
ops = self.pack.operations
box_pos, vbox = self.sched_data.getvector_of_box(arg)
if not vbox:
# constant/variable expand this box
- vbox = self.ptype.new_vector_box(len(ops))
+ vbox = self.input_type.new_vector_box(len(ops))
vbox = self.expand_box_to_vector_box(vbox, ops, arg, argidx)
box_pos = 0
- enforced_type = self.ptype
- # convert type f -> i, i -> f
- # if enforced_type.gettype() != vbox.gettype():
- # raise NotImplementedError("cannot yet convert between types")
-
- # convert size i64 -> i32, i32 -> i64, ...
- if enforced_type.getsize() != vbox.getsize():
- vbox = self.extend(vbox, self.ptype)
-
# use the input as an indicator for the pack type
- arg_ptype = PackType.of(vbox)
- packable = self.sched_data.vec_reg_size // arg_ptype.getsize()
+ packable = self.sched_data.vec_reg_size // self.input_type.getsize()
packed = vbox.item_count
assert packed >= 0
assert packable >= 0
@@ -894,21 +905,24 @@
vbox = self._pack(vbox, packed, args, packable)
elif packed > packable:
# the argument has more items than the operation is able to process!
- vbox = self.unpack(vbox, off, packable, arg_ptype)
+ vbox = self.unpack(vbox, off, packable, self.input_type)
#
if off != 0 and box_pos != 0:
# The original box is at a position != 0 but it
# is required to be at position 0. Unpack it!
- vbox = self.unpack(vbox, off, len(ops), arg_ptype)
+ vbox = self.unpack(vbox, off, len(ops), self.input_type)
+ # convert type f -> i, i -> f
+ if self.input_type.gettype() != vbox.gettype():
+ raise NotImplementedError("cannot yet convert between types")
+ # convert size i64 -> i32, i32 -> i64, ...
+ if self.input_type.getsize() > 0 and \
+ self.input_type.getsize() != vbox.getsize():
+ vbox = self.extend(vbox, self.input_type)
#
return vbox
def extend(self, vbox, newtype):
- if vbox.item_count * vbox.item_size == self.sched_data.vec_reg_size:
- return vbox
assert vbox.gettype() == newtype.gettype()
- assert (vbox.item_count * newtype.getsize()) == \
- self.sched_data.vec_reg_size
if vbox.gettype() == INT:
return self.extend_int(vbox, newtype)
else:
@@ -1025,6 +1039,12 @@
self.to_size = outtype.getsize()
OpToVectorOp.__init__(self, (intype, ), outtype)
+ def determine_input_type(self, op):
+ return self.arg_ptypes[0]
+
+ def determine_output_type(self, op):
+ return self.result_ptype
+
def split_pack(self, pack):
if self.from_size > self.to_size:
# cast down
@@ -1037,12 +1057,14 @@
return len(pack.operations)
def new_result_vector_box(self):
+ type = self.output_type.gettype()
size = self.to_size
- count = self.ptype.getcount()
+ count = self.output_type.getcount()
vec_reg_size = self.sched_data.vec_reg_size
if count * size > vec_reg_size:
count = vec_reg_size // size
- return BoxVector(self.result_ptype.gettype(), count, size, self.ptype.signed)
+ signed = self.output_type.signed
+ return BoxVector(type, count, size, signed)
class SignExtToVectorOp(OpToVectorOp):
def __init__(self, intype, outtype):
@@ -1054,7 +1076,7 @@
sizearg = op0.getarg(1)
assert isinstance(sizearg, ConstInt)
self.size = sizearg.value
- if self.ptype.getsize() > self.size:
+ if self.input_type.getsize() > self.size:
# cast down
return OpToVectorOp.split_pack(self, pack)
_, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
@@ -1064,11 +1086,11 @@
return vbox.getcount()
def new_result_vector_box(self):
- count = self.ptype.getcount()
+ count = self.input_type.getcount()
vec_reg_size = self.sched_data.vec_reg_size
if count * self.size > vec_reg_size:
count = vec_reg_size // self.size
- return BoxVector(self.result_ptype.gettype(), count, self.size, self.ptype.signed)
+ return BoxVector(self.result_ptype.gettype(), count, self.size, self.input_type.signed)
PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
@@ -1076,22 +1098,38 @@
def __init__(self):
OpToVectorOp.__init__(self, (), PT_GENERIC)
- def pack_ptype(self, op):
+ def determine_input_type(self, op):
+ return None
+
+ def determine_output_type(self, op):
return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
def before_argument_transform(self, args):
args.append(ConstInt(len(self.pack.operations)))
+ def getsplitsize(self):
+ return self.output_type.getsize()
+
+ def new_result_vector_box(self):
+ type = self.output_type.gettype()
+ size = self.output_type.getsize()
+ count = len(self.pack.operations)
+ signed = self.output_type.signed
+ return BoxVector(type, count, size, signed)
+
class StoreToVectorStore(OpToVectorOp):
def __init__(self):
OpToVectorOp.__init__(self, (None, None, PT_GENERIC), None)
self.has_descr = True
- def pack_ptype(self, op):
+ def determine_input_type(self, op):
return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
-PT_FLOAT = PackType(FLOAT, 4, False)
-PT_DOUBLE = PackType(FLOAT, 8, False)
+ def determine_output_type(self, op):
+ return None
+
+PT_FLOAT_2 = PackType(FLOAT, 4, False, 2)
+PT_DOUBLE_2 = PackType(FLOAT, 8, False, 2)
PT_FLOAT_GENERIC = PackType(INT, -1, True)
PT_INT64 = PackType(INT, 8, True)
PT_INT32 = PackType(INT, 4, True)
@@ -1107,6 +1145,8 @@
LOAD_TRANS = LoadToVectorLoad()
STORE_TRANS = StoreToVectorStore()
+# note that the following definition is x86 machine
+# specific.
ROP_ARG_RES_VECTOR = {
rop.VEC_INT_ADD: INT_OP_TO_VOP,
rop.VEC_INT_SUB: INT_OP_TO_VOP,
@@ -1130,10 +1170,10 @@
rop.VEC_RAW_STORE: STORE_TRANS,
rop.VEC_SETARRAYITEM_RAW: STORE_TRANS,
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE, PT_FLOAT),
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT, PT_DOUBLE),
- rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE, PT_INT32),
- rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32, PT_DOUBLE),
+ rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE_2, PT_FLOAT_2),
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, PT_DOUBLE_2),
+ rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32),
+ rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32, PT_DOUBLE_2),
}
class VecScheduleData(SchedulerData):
@@ -1274,7 +1314,6 @@
def __init__(self, ops):
self.operations = ops
self.savings = 0
- self.ptype = None
for node in self.operations:
node.pack = self
@@ -1288,13 +1327,6 @@
leftmost = other.operations[0]
return rightmost == leftmost
- def size_in_bytes(self):
- return self.ptype.get_byte_size() * len(self.operations)
-
- def is_overloaded(self, vec_reg_byte_size):
- size = self.size_in_bytes()
- return size > vec_reg_byte_size
-
def __repr__(self):
return "Pack(%r)" % self.operations
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -123,13 +123,13 @@
box = ts.BoxRef()
_box_counter_more_than(self.model, elem[1:])
elif elem.startswith('v'):
- pattern = re.compile('.*\[(-?)(i|f)(\d+)#(\d+)\]')
+ pattern = re.compile('.*\[(u?)(i|f)(\d+)#(\d+)\]')
match = pattern.match(elem)
if match:
item_type = match.group(2)[0]
item_size = int(match.group(3)) // 8
item_count = int(match.group(4))
- item_signed = match.group(1) == 's'
+ item_signed = not (match.group(1) == 'u')
box = self.model.BoxVector(item_type, item_count, item_size, item_signed)
lbracket = elem.find('[')
number = elem[1:lbracket]
More information about the pypy-commit
mailing list