[pypy-commit] pypy vecopt: improved the scheduling (missed to emit pack/unpack ops), work in progress
plan_rich
noreply at buildbot.pypy.org
Mon May 18 15:17:24 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77373:c0d72e0205ae
Date: 2015-05-18 15:17 +0200
http://bitbucket.org/pypy/pypy/changeset/c0d72e0205ae/
Log: improved the scheduling (missed to emit pack/unpack ops), work in
progress
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -2,6 +2,7 @@
It should not be imported by the module itself
"""
import re
+import py
from pypy.interpreter import special
from pypy.interpreter.baseobjspace import InternalSpaceCache, W_Root, ObjSpace
from pypy.interpreter.error import OperationError
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -170,16 +170,23 @@
return """
a = astype(|30|, int32)
b = a + 1i
- c = a + 2.0
x1 = b -> 7
x2 = b -> 8
- x3 = c -> 11
- x4 = c -> 12
- x1 + x2 + x3 + x4
+ x1 + x2
"""
+ #return """
+ #a = astype(|30|, int32)
+ #b = a + 1i
+ #c = a + 2.0
+ #x1 = b -> 7
+ #x2 = b -> 8
+ #x3 = c -> 11
+ #x4 = c -> 12
+ #x1 + x2 + x3 + x4
+ #"""
def test_int32_add_const(self):
result = self.run("int32_add_const")
- assert int(result) == 7+1+8+1+11+2+12+2
+ assert int(result) == 7+1+8+1
self.check_vectorized(1, 1)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -865,8 +865,14 @@
# ------------------------------------------------------------
def mov(self, from_loc, to_loc):
- if (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or (isinstance(to_loc, RegLoc) and to_loc.is_xmm):
- self.mc.MOVSD(to_loc, from_loc)
+ from_xmm = isinstance(from_loc, RegLoc) and from_loc.is_xmm
+ to_xmm = isinstance(to_loc, RegLoc) and to_loc.is_xmm
+ if from_xmm or to_xmm:
+ if from_xmm and to_xmm:
+ # copy 128-bit from -> to
+ self.mc.MOVAPD(to_loc, from_loc)
+ else:
+ self.mc.MOVSD(to_loc, from_loc)
else:
assert to_loc is not ebp
self.mc.MOV(to_loc, from_loc)
@@ -2547,17 +2553,29 @@
srcloc, sizeloc, tosizeloc = arglocs
size = sizeloc.value
tosize = tosizeloc.value
- if size == 8 and tosize == 4:
+ if size == 4 and tosize == 8:
+ scratch = X86_64_SCRATCH_REG.value
+ print resloc, "[0] <- int64(", srcloc, "[0])"
+ print resloc, "[1] <- int64(", srcloc, "[1])"
+ self.mc.PEXTRD_rxi(scratch, srcloc.value, 1)
+ self.mc.PINSRQ_xri(resloc.value, scratch, 1)
+ self.mc.PEXTRD_rxi(scratch, srcloc.value, 0)
+ self.mc.PINSRQ_xri(resloc.value, scratch, 0)
+ elif size == 8 and tosize == 4:
# is there a better sequence to move them?
- self.mc.MOVDQU(resloc, srcloc)
- self.mc.PSRLDQ(srcloc, 8)
- self.mc.PUNPCKLDQ(resloc, srcloc)
+ scratch = X86_64_SCRATCH_REG.value
+ print resloc, "[0] <- int32(", srcloc, "[0])"
+ print resloc, "[1] <- int32(", srcloc, "[1])"
+ self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0)
+ self.mc.PINSRD_xri(resloc.value, scratch, 0)
+ self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1)
+ self.mc.PINSRD_xri(resloc.value, scratch, 1)
else:
py.test.set_trace()
raise NotImplementedError("sign ext missing")
def genop_vec_float_expand(self, op, arglocs, resloc):
- loc0, countloc = arglocs
+ loc0, sizeloc, countloc = arglocs
count = countloc.value
if count == 1:
raise NotImplementedError("expand count 1")
@@ -2620,31 +2638,32 @@
si = srcidx
ri = residx
k = count
+ print resultloc,"[", residx, "] <- ",sourceloc,"[",srcidx,"] count", count
while k > 0:
if size == 8:
if resultloc.is_xmm:
self.mc.PEXTRQ_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRQ_xri(resloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PINSRQ_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRQ_rxi(resloc.value, sourceloc.value, si)
+ self.mc.PEXTRQ_rxi(resultloc.value, sourceloc.value, si)
elif size == 4:
if resultloc.is_xmm:
self.mc.PEXTRD_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRD_xri(resloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PINSRD_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRD_rxi(resloc.value, sourceloc.value, si)
+ self.mc.PEXTRD_rxi(resultloc.value, sourceloc.value, si)
elif size == 2:
if resultloc.is_xmm:
self.mc.PEXTRW_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRW_xri(resloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PINSRW_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRW_rxi(resloc.value, sourceloc.value, si)
+ self.mc.PEXTRW_rxi(resultloc.value, sourceloc.value, si)
elif size == 1:
if resultloc.is_xmm:
self.mc.PEXTRB_rxi(X86_64_SCRATCH_REG.value, sourceloc.value, si)
- self.mc.PINSRB_xri(resloc.value, X86_64_SCRATCH_REG.value, ri)
+ self.mc.PINSRB_xri(resultloc.value, X86_64_SCRATCH_REG.value, ri)
else:
- self.mc.PEXTRB_rxi(resloc.value, sourceloc.value, si)
+ self.mc.PEXTRB_rxi(resultloc.value, sourceloc.value, si)
si += 1
ri += 1
k -= 1
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1553,7 +1553,7 @@
loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0), args)
result = self.force_allocate_reg(op.result, args)
tmpxvar = TempBox()
- tmploc = self.xrm.force_allocate_reg(tmpxvar)
+ tmploc = self.xrm.force_allocate_reg(tmpxvar, args)
self.xrm.possibly_free_var(tmpxvar)
self.perform(op, [loc0, tmploc, imm(index.value), imm(count.value)], result)
@@ -1569,7 +1569,7 @@
assert isinstance(op.result, BoxVector)
args = op.getarglist()
size = op.result.item_size
- arglocs = [resloc, srcloc, imm(residx), imm(index.value), imm(count.value), imm(size)]
+ arglocs = [resloc, srcloc, imm(index.value), imm(0), imm(count.value), imm(size)]
self.perform(op, arglocs, resloc)
def consider_vec_int_unpack(self, op):
@@ -1599,7 +1599,6 @@
def consider_vec_int_signext(self, op):
args = op.getarglist()
- srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
sizearg = op.getarg(0)
result = op.result
@@ -1607,7 +1606,7 @@
assert isinstance(result, BoxVector)
size = sizearg.item_size
tosize = result.item_size
- self.perform(op, [srcloc, imm(size), imm(tosize)], resloc)
+ self.perform(op, [resloc, imm(size), imm(tosize)], resloc)
def consider_vec_box(self, op):
# pseudo instruction, needed to create a new variable
@@ -1617,7 +1616,7 @@
pass
def consider_vec_cast_float_to_singlefloat(self, op):
- count = op.getarg(1)
+ count = op.getarg(2)
assert isinstance(count, ConstInt)
args = op.getarglist()
loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
@@ -1636,12 +1635,12 @@
self.perform(op, [loc0, tmploc, imm(index.value)], result)
def consider_vec_cast_float_to_int(self, op):
- count = op.getarg(1)
- assert isinstance(count, ConstInt)
+ src = op.getarg(0)
+ res = op.result
args = op.getarglist()
- loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
- result = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
- self.perform(op, [loc0, imm(count.value)], result)
+ srcloc = self.make_sure_var_in_reg(src, args)
+ resloc = self.xrm.force_result_in_reg(res, src, args)
+ self.perform(op, [srcloc], resloc)
consider_vec_cast_int_to_float = consider_vec_cast_float_to_int
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -732,7 +732,8 @@
MOVSS_xx = xmminsn('\xF3', rex_nw, '\x0F\x10', register(1,8), register(2), '\xC0')
- PSRLDQ_xi = xmminsn('\x66', rex_nw, '\x0F\x73', register(1,8), immediate(2, 'b'))
+ PSRLDQ_xi = xmminsn('\x66', rex_nw, '\x0F\x73', register(1),
+ orbyte(0x3 << 3), '\xC0', immediate(2, 'b'))
UNPCKLPD_xx = xmminsn('\x66', rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0')
UNPCKHPD_xx = xmminsn('\x66', rex_nw, '\x0F\x15', register(1, 8), register(2), '\xC0')
UNPCKLPS_xx = xmminsn( rex_nw, '\x0F\x14', register(1, 8), register(2), '\xC0')
@@ -743,10 +744,10 @@
PSHUFD_xxi = xmminsn('\x66', rex_nw, '\x0F\x70', register(1,8), register(2), '\xC0', immediate(3, 'b'))
# following require SSE4_1
- PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC4', register(1,8), register(2), '\xC0', immediate(3, 'b'))
- PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(1,8), register(2), '\xC0', immediate(3, 'b'))
+ PEXTRQ_rxi = xmminsn('\x66', rex_w, '\x0F\x3A\x16', register(2,8), register(1), '\xC0', immediate(3, 'b'))
+ PEXTRD_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x16', register(2,8), register(1), '\xC0', immediate(3, 'b'))
+ PEXTRW_rxi = xmminsn('\x66', rex_nw, '\x0F\xC4', register(2,8), register(1), '\xC0', immediate(3, 'b'))
+ PEXTRB_rxi = xmminsn('\x66', rex_nw, '\x0F\x3A\x14', register(2,8), register(1), '\xC0', immediate(3, 'b'))
PINSRQ_xri = xmminsn('\x66', rex_w, '\x0F\x3A\x22', register(1,8), register(2), '\xC0', immediate(3, 'b'))
PINSRD_xri = xmminsn('\x66', rex_nw, '\x0F\x3A\x22', register(1,8), register(2), '\xC0', immediate(3, 'b'))
PINSRW_xri = xmminsn('\x66', rex_nw, '\x0F\xC5', register(1,8), register(2), '\xC0', immediate(3, 'b'))
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -1001,7 +1001,7 @@
i3 = int_lt(i2, 10)
guard_true(i3) [p0,i0]
v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
- v3 = vec_expand(42, 2)
+ v3 = vec_int_expand(42)
v2 = vec_int_mul(v1, v3, 2)
jump(p0,i2)
"""
@@ -1028,7 +1028,7 @@
i3 = int_lt(i2, 10)
guard_true(i3) [p0,i0]
v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
- v3 = vec_expand(f3, 2)
+ v3 = vec_float_expand(f3)
v2 = vec_int_mul(v1, v3, 2)
jump(p0,i2,f3)
"""
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -562,6 +562,9 @@
def is_valid(self):
return self.type != PackType.UNKNOWN_TYPE and self.size > 0
+ def new_vector_box(self, count):
+ return BoxVector(self.type, count, self.size, self.signed)
+
def record_vbox(self, vbox):
if self.type == PackType.UNKNOWN_TYPE:
self.type = vbox.item_type
@@ -577,44 +580,56 @@
return PackType(self.type, self.size, self.signed)
-class PackArgs(object):
- def __init__(self, arg_pos, result_type=None, result=True, index=-1):
- self.mask = 0
- self.result_type = result_type
- self.result = result
+class OpToVectorOp(object):
+ def __init__(self, arg_ptypes, result_ptype, index=-1, result_vsize_arg=-1):
+ self.arg_ptypes = arg_ptypes
+ self.result_ptype = result_ptype
+ # TODO remove them?
+ self.result = result_ptype != None
+ self.result_vsize_arg = result_vsize_arg
self.index = index
- for p in arg_pos:
- self.mask |= (1<<p)
- def getpacktype(self):
- if self.result_type is not None:
- return self.result_type.clone()
- return PackType(PackType.UNKNOWN_TYPE, 0, True)
+ def get_result_ptype(self):
+ return self.result_ptype
+
+ def get_arg_ptype(self, i):
+ if i < 0 or i >= len(self.arg_ptypes):
+ return None
+ return self.arg_ptypes[i]
def vector_arg(self, i):
- return bool((1<<(i)) & self.mask)
+ if i < 0 or i >= len(self.arg_ptypes):
+ return False
+ return self.arg_ptypes[i] is not None
+PT_FLOAT = PackType(FLOAT, 4, False)
+PT_DOUBLE = PackType(FLOAT, 8, False)
+PT_INT_GENERIC = PackType(INT, -1, True)
+PT_INT64 = PackType(INT, 8, True)
+PT_FLOAT_GENERIC = PackType(INT, -1, True)
+PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, True)
ROP_ARG_RES_VECTOR = {
- rop.VEC_INT_ADD: PackArgs((0,1)),
- rop.VEC_INT_SUB: PackArgs((0,1)),
- rop.VEC_INT_MUL: PackArgs((0,1)),
- rop.VEC_INT_SIGNEXT: PackArgs((0,)),
+ rop.VEC_INT_ADD: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), PT_INT_GENERIC),
+ rop.VEC_INT_SUB: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), PT_INT_GENERIC),
+ rop.VEC_INT_MUL: OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), PT_INT_GENERIC),
+ rop.VEC_INT_SIGNEXT: OpToVectorOp((PT_INT_GENERIC,), PT_INT_GENERIC, result_vsize_arg=1),
- rop.VEC_FLOAT_ADD: PackArgs((0,1)),
- rop.VEC_FLOAT_SUB: PackArgs((0,1)),
- rop.VEC_FLOAT_MUL: PackArgs((0,1)),
- rop.VEC_FLOAT_EQ: PackArgs((0,1), result_type=PackType(INT, -1, True)),
+ rop.VEC_FLOAT_ADD: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), PT_FLOAT_GENERIC),
+ rop.VEC_FLOAT_SUB: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), PT_FLOAT_GENERIC),
+ rop.VEC_FLOAT_MUL: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), PT_FLOAT_GENERIC),
+ rop.VEC_FLOAT_EQ: OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), PT_INT_GENERIC),
- rop.VEC_RAW_LOAD: PackArgs(()),
- rop.VEC_GETARRAYITEM_RAW: PackArgs(()),
- rop.VEC_RAW_STORE: PackArgs((2,), result=False),
- rop.VEC_SETARRAYITEM_RAW: PackArgs((2,), result=False),
+ rop.VEC_RAW_LOAD: OpToVectorOp((), PT_GENERIC),
+ rop.VEC_GETARRAYITEM_RAW: OpToVectorOp((), PT_GENERIC),
+ rop.VEC_RAW_STORE: OpToVectorOp((None,None,PT_INT_GENERIC,), None),
+ rop.VEC_SETARRAYITEM_RAW: OpToVectorOp((None,None,PT_INT_GENERIC,), None),
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: PackArgs((0,), result_type=PackType(FLOAT, 4, False)),
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: PackArgs((0,), result_type=PackType(FLOAT, 8, False), index=1),
- rop.VEC_CAST_FLOAT_TO_INT: PackArgs((0,), result_type=PackType(INT, 8, True)),
- rop.VEC_CAST_INT_TO_FLOAT: PackArgs((0,), result_type=PackType(FLOAT, 8, False)),
+ rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOp((PT_DOUBLE,), PT_FLOAT),
+ # TODO remove index
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOp((PT_FLOAT,), PT_DOUBLE, index=1),
+ rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOp((PT_DOUBLE,), PT_INT64),
+ rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOp((PT_INT64,), PT_DOUBLE),
}
@@ -639,7 +654,11 @@
assert op_count > 1
self.pack = pack
# properties that hold for the pack are:
- # isomorphism (see func above)
+ # + isomorphism (see func above)
+ # + tight packed (no room between vector elems)
+ if pack.operations[0].op.vector == rop.VEC_RAW_LOAD:
+ assert pack.ptype is not None
+ print pack.ptype
if pack.ptype is None:
self.propagate_ptype()
@@ -663,51 +682,61 @@
assert op0.vector != -1
args = op0.getarglist()[:]
- packargs = ROP_ARG_RES_VECTOR.get(op0.vector, None)
- if packargs is None:
+ tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
+ if tovector is None:
raise NotImplementedError("vecop map entry missing. trans: pack -> vop")
- if packargs.index != -1:
+ if tovector.index != -1:
args.append(ConstInt(self.pack_off))
args.append(ConstInt(self.pack_ops))
vop = ResOperation(op0.vector, args, op0.result, op0.getdescr())
for i,arg in enumerate(args):
- if packargs.vector_arg(i):
- self.vector_arg(vop, i, True)
- if packargs.result:
- self.vector_result(vop, packargs)
+ arg_ptype = tovector.get_arg_ptype(i)
+ if arg_ptype is not None:
+ if arg_ptype.size == -1:
+ arg_ptype = self.pack.ptype
+ self.vector_arg(vop, i, arg_ptype)
+ if tovector.result:
+ self.vector_result(vop, tovector)
self.preamble_ops.append(vop)
def propagate_ptype(self):
op0 = self.pack.operations[0].getoperation()
- packargs = ROP_ARG_RES_VECTOR.get(op0.vector, None)
- if packargs is None:
+ tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
+ if tovector is None:
raise NotImplementedError("vecop map entry missing. trans: pack -> vop")
args = op0.getarglist()[:]
- ptype = packargs.getpacktype()
+ res_ptype = tovector.get_result_ptype()
for i,arg in enumerate(args):
- if packargs.vector_arg(i):
+ if tovector.vector_arg(i):
_, vbox = self.box_to_vbox.get(arg, (-1, None))
if vbox is not None:
- ptype.record_vbox(vbox)
+ res_ptype.record_vbox(vbox)
else:
# vbox of a variable/constant is not present here
pass
- self.pack.ptype = ptype
+ self.pack.ptype = res_ptype
- def vector_result(self, vop, packargs):
+ def vector_result(self, vop, tovector):
ops = self.pack.operations
result = vop.result
- if packargs.result_type is not None:
- ptype = packargs.getpacktype()
+ ptype = tovector.get_result_ptype()
+ if ptype is not None and ptype.gettype() != PackType.UNKNOWN_TYPE:
if ptype.size == -1:
ptype.size = self.pack.ptype.size
vbox = self.box_vector(ptype)
else:
vbox = self.box_vector(self.pack.ptype)
+ if tovector.result_vsize_arg != -1:
+ # vec_int_signext specifies the size in bytes on the
+ # first argument.
+ arg = vop.getarg(tovector.result_vsize_arg)
+ assert isinstance(arg, ConstInt)
+ vbox.item_size = arg.value
+ #
vop.result = vbox
i = self.pack_off
end = i + self.pack_ops
@@ -720,24 +749,50 @@
""" TODO remove this? """
return BoxVector(ptype.type, self.pack_ops, ptype.size, ptype.signed)
- def vector_arg(self, vop, argidx, expand):
+ def vector_arg(self, vop, argidx, arg_ptype):
ops = self.pack.operations
_, vbox = self.box_to_vbox.get(vop.getarg(argidx), (-1, None))
if not vbox:
vbox = self.expand_box_to_vector_box(vop, argidx)
# vbox is a primitive type mixin
- packable = self.vec_reg_size // self.pack.ptype.getsize()
+ packable = self.vec_reg_size // arg_ptype.getsize()
packed = vbox.item_count
+ assert packed >= 0
+ assert packable >= 0
if packed < packable:
- # due to casting problems values might be scattered along
- # different vector boxes
+ # the argument is scattered along different vector boxes
args = [op.getoperation().getarg(argidx) for op in ops]
- self.package(vbox, packed, args, packable)
- _, vbox = self.box_to_vbox.get(vop.getarg(argidx), (-1, None))
+ vbox = self._pack(vbox, packed, args, packable)
+ elif packed > packable:
+ # the argument has more items than the operation is able to process!
+ vbox = self.unpack(vbox, self.pack_off, packable, arg_ptype)
+ vbox = self.extend(vbox, arg_ptype)
vop.setarg(argidx, vbox)
return vbox
- def package(self, tgt_box, index, args, packable):
+ def extend(self, vbox, arg_ptype):
+ py.test.set_trace()
+ if vbox.item_count * vbox.item_size == self.vec_reg_size:
+ return vbox
+ size = arg_ptype.getsize()
+ assert (vbox.item_count * size) == self.vec_reg_size
+ opnum = rop.VEC_INT_SIGNEXT
+ vbox_cloned = arg_ptype.new_vector_box(vbox.item_count)
+ op = ResOperation(opnum, [vbox, ConstInt(size), ConstInt(vbox.item_count)], vbox_cloned)
+ self.preamble_ops.append(op)
+ return vbox_cloned
+
+ def unpack(self, vbox, index, count, arg_ptype):
+ vbox_cloned = vbox.clonebox()
+ vbox_cloned.item_count = count
+ opnum = rop.VEC_FLOAT_UNPACK
+ if vbox.item_type == INT:
+ opnum = rop.VEC_INT_UNPACK
+ op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)], vbox_cloned)
+ self.preamble_ops.append(op)
+ return vbox_cloned
+
+ def _pack(self, tgt_box, index, args, packable):
""" If there are two vector boxes:
v1 = [<empty>,<emtpy>,X,Y]
v2 = [A,B,<empty>,<empty>]
@@ -747,6 +802,7 @@
opnum = rop.VEC_FLOAT_PACK
if tgt_box.item_type == INT:
opnum = rop.VEC_INT_PACK
+ py.test.set_trace()
arg_count = len(args)
i = index
while i < arg_count and tgt_box.item_count < packable:
@@ -768,6 +824,8 @@
for j in range(i):
arg = args[j]
self.box_to_vbox[arg] = (j, new_box)
+ _, vbox = self.box_to_vbox.get(args[0], (-1, None))
+ return vbox
def _check_vec_pack(self, op):
result = op.result
@@ -808,6 +866,7 @@
if box_type == INT:
expand_opnum = rop.VEC_INT_EXPAND
+ # TODO
vbox = BoxVector(box_type, self.pack_ops)
if all_same_box:
expand_op = ResOperation(expand_opnum, [arg], vbox)
More information about the pypy-commit
mailing list