[pypy-commit] pypy vecopt-merge: x86 assembler half way through, accumulation leaves behind an fail descr that is tried to be stiched (but removed from the trace)
plan_rich
noreply at buildbot.pypy.org
Thu Sep 24 17:50:56 CEST 2015
Author: Richard Plangger <planrichi at gmail.com>
Branch: vecopt-merge
Changeset: r79812:fd39e085206b
Date: 2015-09-24 17:50 +0200
http://bitbucket.org/pypy/pypy/changeset/fd39e085206b/
Log: x86 assembler half way through, accumulation leaves behind an fail
descr that is tried to be stiched (but removed from the trace)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1787,8 +1787,7 @@
self.guard_success_cc = rx86.Conditions['E']
self.implement_guard(guard_token)
- def genop_guard_guard_nonnull_class(self, ign_1, guard_op,
- guard_token, locs, ign_2):
+ def genop_guard_guard_nonnull_class(self, guard_op, guard_token, locs, ign):
self.mc.CMP(locs[0], imm1)
# Patched below
self.mc.J_il8(rx86.Conditions['B'], 0)
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -63,8 +63,7 @@
save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
class X86XMMRegisterManager(RegisterManager):
-
- box_types = [FLOAT, VECTOR]
+ box_types = [FLOAT, INT] # yes INT!
all_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
# we never need lower byte I hope
save_around_call_regs = all_regs
@@ -203,7 +202,7 @@
return self.fm.get_frame_depth()
def possibly_free_var(self, var):
- if var.type == FLOAT or var.type == VECTOR:
+ if var.type == FLOAT or var.is_vector():
self.xrm.possibly_free_var(var)
else:
self.rm.possibly_free_var(var)
@@ -223,7 +222,7 @@
def make_sure_var_in_reg(self, var, forbidden_vars=[],
selected_reg=None, need_lower_byte=False):
- if var.type == FLOAT or var.type == VECTOR:
+ if var.type == FLOAT or var.is_vector():
if isinstance(var, ConstFloat):
return FloatImmedLoc(var.getfloatstorage())
return self.xrm.make_sure_var_in_reg(var, forbidden_vars,
@@ -234,7 +233,7 @@
def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None,
need_lower_byte=False):
- if var.type == FLOAT or var.type == VECTOR:
+ if var.type == FLOAT or var.is_vector():
return self.xrm.force_allocate_reg(var, forbidden_vars,
selected_reg, need_lower_byte)
else:
@@ -317,26 +316,15 @@
self.assembler.regalloc_perform_math(op, arglocs, result_loc)
def locs_for_fail(self, guard_op):
- faillocs = []
+ faillocs = [self.loc(arg) for arg in guard_op.getfailargs()]
descr = guard_op.getdescr()
- for i,arg in enumerate(guard_op.getfailargs()):
- if arg is None:
- faillocs.append(None)
- continue
- if arg.is_vector() and arg.getaccum():
- # for an accumulator store the position of the original
- # box and in llsupport/assembler save restore information
- # on the descriptor
- loc = self.loc(accum.getoriginalbox())
- faillocs.append(loc)
- assert isinstance(descr, ResumeGuardDescr)
- descr.rd_accum_list = AccumInfo(descr.rd_accum_list,
- i, accum.operator,
- accum.getoriginalbox(),
- self.loc(arg))
- else:
- faillocs.append(self.loc(arg))
-
+ if descr and descr.rd_accum_list:
+ accuminfo = descr.rd_accum_list
+ while accuminfo:
+ accuminfo.vector_loc = faillocs[accuminfo.getpos_in_failargs()]
+ loc = self.loc(accuminfo.getoriginal())
+ faillocs[accuminfo.getpos_in_failargs()] = loc
+ accuminfo = accuminfo.next()
return faillocs
def perform_guard(self, guard_op, arglocs, result_loc):
@@ -406,7 +394,7 @@
def loc(self, v):
if v is None: # xxx kludgy
return None
- if v.type == FLOAT or v.type == VECTOR:
+ if v.type == FLOAT or v.is_vector():
return self.xrm.loc(v)
return self.rm.loc(v)
@@ -1392,7 +1380,7 @@
box = op.getarg(i)
src_loc = self.loc(box)
dst_loc = arglocs[i]
- if box.type != FLOAT and box.type != VECTOR:
+ if box.type != FLOAT and not box.is_vector():
src_locations1.append(src_loc)
dst_locations1.append(dst_loc)
else:
diff --git a/rpython/jit/backend/x86/test/test_x86vector.py b/rpython/jit/backend/x86/test/test_x86vector.py
--- a/rpython/jit/backend/x86/test/test_x86vector.py
+++ b/rpython/jit/backend/x86/test/test_x86vector.py
@@ -14,9 +14,18 @@
class TestBasic(test_basic.Jit386Mixin, test_vector.VectorizeTests):
# for the individual tests see
# ====> ../../../metainterp/test/test_basic.py
+ def setup_method(self, method):
+ clazz = self.CPUClass
+ def init(*args, **kwargs):
+ cpu = clazz(*args, **kwargs)
+ cpu.supports_guard_gc_type = True
+ return cpu
+ self.CPUClass = init
+
enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll'
class TestAssembler(BaseTestAssembler):
+
def imm_4_int32(self, a, b, c, d):
adr = self.xrm.assembler.datablockwrapper.malloc_aligned(16, 16)
ptr = rffi.cast(rffi.CArrayPtr(rffi.INT), adr)
diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -568,7 +568,7 @@
args = op.getarglist()
base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
- result_loc = self.force_allocate_reg(op.result)
+ result_loc = self.force_allocate_reg(op)
self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs),
imm(integer), imm(aligned)], result_loc)
@@ -601,11 +601,10 @@
def consider_vec_arith(self, op):
lhs = op.getarg(0)
- assert isinstance(lhs, BoxVector)
- size = lhs.item_size
+ size = lhs.bytesize
args = op.getarglist()
loc1 = self.make_sure_var_in_reg(op.getarg(1), args)
- loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args)
self.perform(op, [loc0, loc1, imm(size)], loc0)
consider_vec_int_add = consider_vec_arith
@@ -622,7 +621,7 @@
assert isinstance(lhs, BoxVector)
size = lhs.item_size
args = op.getarglist()
- res = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ res = self.xrm.force_result_in_reg(op, op.getarg(0), args)
self.perform(op, [res, imm(size)], res)
consider_vec_float_neg = consider_vec_arith_unary
@@ -631,19 +630,17 @@
def consider_vec_logic(self, op):
lhs = op.getarg(0)
- assert isinstance(lhs, BoxVector)
- size = lhs.item_size
args = op.getarglist()
source = self.make_sure_var_in_reg(op.getarg(1), args)
- result = self.force_result_in_reg(op.result, op.getarg(0), args)
- self.perform(op, [source, imm(size)], result)
+ result = self.xrm.force_result_in_reg(op, op.getarg(0), args)
+ self.perform(op, [source, imm(lhs.bytesize)], result)
def consider_vec_float_eq(self, op, guard_op):
lhs = op.getarg(0)
assert isinstance(lhs, BoxVector)
size = lhs.item_size
args = op.getarglist()
- lhsloc = self.force_result_in_reg(op.result, op.getarg(0), args)
+ lhsloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
rhsloc = self.make_sure_var_in_reg(op.getarg(1), args)
if guard_op:
self.perform_with_guard(op, guard_op, [lhsloc, rhsloc, imm(size)], None)
@@ -668,12 +665,11 @@
assert isinstance(count, ConstInt)
args = op.getarglist()
srcloc = self.make_sure_var_in_reg(arg, args)
- resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
residx = index.value # where to put it in result?
srcidx = 0
- assert isinstance(op.result, BoxVector)
- size = op.result.getsize()
- arglocs = [resloc, srcloc, imm(residx), imm(srcidx), imm(count.value), imm(size)]
+ arglocs = [resloc, srcloc, imm(residx), imm(srcidx),
+ imm(count.value), imm(op.bytesize)]
self.perform(op, arglocs, resloc)
consider_vec_pack_f = consider_vec_pack_i
@@ -703,19 +699,15 @@
consider_vec_unpack_f = consider_vec_unpack_i
def consider_vec_expand_f(self, op):
- result = op.result
- assert isinstance(result, BoxVector)
arg = op.getarg(0)
args = op.getarglist()
if arg.is_constant():
- resloc = self.xrm.force_allocate_reg(result)
- srcloc = self.xrm.expand_float(result.getsize(), arg)
+ resloc = self.xrm.force_allocate_reg(op)
+ srcloc = self.xrm.expand_float(op.bytesize, arg)
else:
- resloc = self.xrm.force_result_in_reg(op.result, arg, args)
+ resloc = self.xrm.force_result_in_reg(op, arg, args)
srcloc = resloc
-
- size = op.result.getsize()
- self.perform(op, [srcloc, imm(size)], resloc)
+ self.perform(op, [srcloc, imm(op.bytesize)], resloc)
def consider_vec_expand_i(self, op):
arg = op.getarg(0)
@@ -724,21 +716,15 @@
srcloc = self.rm.convert_to_imm(arg)
else:
srcloc = self.make_sure_var_in_reg(arg, args)
- resloc = self.xrm.force_allocate_reg(op.result, args)
- assert isinstance(op.result, BoxVector)
- size = op.result.getsize()
- self.perform(op, [srcloc, imm(size)], resloc)
+ resloc = self.xrm.force_allocate_reg(op, args)
+ self.perform(op, [srcloc, imm(op.bytesize)], resloc)
def consider_vec_int_signext(self, op):
args = op.getarglist()
- resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
- sizearg = op.getarg(0)
- result = op.result
- assert isinstance(sizearg, BoxVector)
- assert isinstance(result, BoxVector)
- size = sizearg.getsize()
- tosize = result.getsize()
- self.perform(op, [resloc, imm(size), imm(tosize)], resloc)
+ resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
+ size = op.cast_from_bytesize()
+ assert size > 0
+ self.perform(op, [resloc, imm(size), imm(op.bytesize)], resloc)
def consider_vec_int_is_true(self, op, guard_op):
args = op.getarglist()
@@ -753,8 +739,8 @@
def _consider_vec(self, op):
# pseudo instruction, needed to create a new variable
- self.xrm.force_allocate_reg(op.result)
-
+ self.xrm.force_allocate_reg(op)
+
consider_vec_i = _consider_vec
consider_vec_f = _consider_vec
@@ -764,7 +750,7 @@
def consider_vec_cast_float_to_int(self, op):
args = op.getarglist()
srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
- resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
+ resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
self.perform(op, [srcloc], resloc)
consider_vec_cast_int_to_float = consider_vec_cast_float_to_int
diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -301,7 +301,8 @@
from rpython.jit.metainterp.optimizeopt.vector import optimize_vector
loop_info, loop_ops = optimize_vector(metainterp_sd,
jitdriver_sd, warmstate,
- loop_info, loop_ops)
+ loop_info, loop_ops,
+ jitcell_token)
#
loop = create_empty_loop(metainterp)
loop.original_jitcell_token = jitcell_token
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -122,7 +122,6 @@
break
i -= 1
else:
- print "insert at 0", target
worklist.insert(0, target)
node.clear_dependencies()
node.emitted = True
@@ -379,7 +378,6 @@
def unpack_from_vector(state, arg, index, count):
""" Extract parts of the vector box into another vector box """
- #print "unpack i", index, "c", count, "v", arg
assert count > 0
assert index + count <= arg.count
args = [arg, ConstInt(index), ConstInt(count)]
@@ -555,9 +553,7 @@
descr.rd_accum_list = AccumInfo(descr.rd_accum_list, i,
accum.operator, arg, None)
seed = accum.getleftmostseed()
- print "pre", failargs[i], "=>",
failargs[i] = self.renamer.rename_map.get(seed, seed)
- print failargs[i]
def profitable(self):
return self.costmodel.profitable()
@@ -613,7 +609,6 @@
if argument and not argument.is_constant():
arg = self.ensure_unpacked(i, argument)
if argument is not arg:
- print "exchange at", i, fail_args[i], "=", arg
fail_args[i] = arg
def ensure_unpacked(self, index, arg):
@@ -780,12 +775,10 @@
vector register.
"""
before_count = len(packlist)
- print "splitting pack", self
pack = self
while pack.pack_load(vec_reg_size) > Pack.FULL:
pack.clear()
oplist, newoplist = pack.slice_operations(vec_reg_size)
- print " split of %dx, left: %d" % (len(oplist), len(newoplist))
pack.operations = oplist
pack.update_pack_of_nodes()
if not pack.leftmost().is_typecast():
@@ -801,7 +794,6 @@
newpack.clear()
newpack.operations = []
break
- print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
pack.update_pack_of_nodes()
def slice_operations(self, vec_reg_size):
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -44,19 +44,22 @@
self.jump = jump
assert self.jump.getopnum() == rop.JUMP
- def finaloplist(self, jitcell_token=None, label=False):
+ def finaloplist(self, jitcell_token=None, reset_label_token=True, label=False):
oplist = []
if jitcell_token:
- token = TargetToken(jitcell_token)
- token.original_jitcell_token = jitcell_token
- jitcell_token.target_tokens.append(token)
- self.label.setdescr(token)
+ if reset_label_token:
+ token = TargetToken(jitcell_token)
+ token.original_jitcell_token = jitcell_token
+ jitcell_token.target_tokens.append(token)
+ self.label.setdescr(token)
if self.prefix_label:
token = TargetToken(jitcell_token)
token.original_jitcell_token = jitcell_token
jitcell_token.target_tokens.append(token)
self.prefix_label.setdescr(token)
- self.jump.setdescr(token)
+ self.jump.setdescr(token)
+ if reset_label_token:
+ self.jump.setdescr(token)
if self.prefix_label:
oplist = self.prefix + [self.prefix_label]
elif self.prefix:
@@ -93,7 +96,8 @@
loop.prefix_label = prefix_label
return loop
-def optimize_vector(metainterp_sd, jitdriver_sd, warmstate, loop_info, loop_ops):
+def optimize_vector(metainterp_sd, jitdriver_sd, warmstate,
+ loop_info, loop_ops, jitcell_token=None):
""" Enter the world of SIMD. Bails if it cannot transform the trace. """
user_code = not jitdriver_sd.vec and warmstate.vec_all
loop = VectorLoop(loop_info.label_op, loop_ops[1:-1], loop_ops[-1])
@@ -122,7 +126,7 @@
debug_stop("vec-opt-loop")
#
info.label_op = loop.label
- return info, loop.finaloplist()
+ return info, loop.finaloplist(jitcell_token=jitcell_token, reset_label_token=False)
except NotAVectorizeableLoop:
debug_stop("vec-opt-loop")
# vectorization is not possible
diff --git a/rpython/jit/metainterp/optimizeopt/version.py b/rpython/jit/metainterp/optimizeopt/version.py
--- a/rpython/jit/metainterp/optimizeopt/version.py
+++ b/rpython/jit/metainterp/optimizeopt/version.py
@@ -112,62 +112,10 @@
else:
assert 0, "olddescr must be found"
- def update_token(self, jitcell_token, all_target_tokens):
- # this is only invoked for versioned loops!
- # TODO
- label_index = index_of_first(rop.LABEL, self.operations, 0)
- label = self.operations[label_index]
- jump = self.operations[-1]
- #
- assert jump.getopnum() == rop.JUMP
- #
- token = TargetToken(jitcell_token)
- token.original_jitcell_token = jitcell_token
- all_target_tokens.append(token)
- if label.getdescr() is None or label.getdescr() is not jump.getdescr():
- label_index = index_of_first(rop.LABEL, self.operations, 1)
- if label_index > 0:
- second_label = self.operations[label_index]
- # set the inner loop
- second_label.setdescr(token)
- jump.setdescr(token)
- # set the first label
- token = TargetToken(jitcell_token)
- token.original_jitcell_token = jitcell_token
- all_target_tokens.append(token)
- label.setdescr(token)
- return
- label.setdescr(token)
- jump.setdescr(token)
-
def create_backend_loop(self, metainterp, jitcell_token):
vl = create_empty_loop(metainterp)
- vl.operations = self.loop.finaloplist(jitcell_token,True)
+ vl.operations = self.loop.finaloplist(jitcell_token,True,True)
vl.inputargs = self.loop.label.getarglist_copy()
vl.original_jitcell_token = jitcell_token
return vl
-
-#def index_of_first(opnum, operations, pass_by=0):
-# """ returns the position of the first operation matching the opnum.
-# Or -1 if non is found
-# """
-# for i,op in enumerate(operations):
-# if op.getopnum() == opnum:
-# if pass_by == 0:
-# return i
-# else:
-# pass_by -= 1
-# return -1
-#
-#def find_first_index(self, opnum, pass_by=0):
-# """ return the first index of the operation having the same opnum or -1 """
-# return index_of_first(opnum, self.operations, pass_by)
-#
-#def find_first(self, opnum, pass_by=0):
-# index = self.find_first_index(opnum, pass_by)
-# if index != -1:
-# return self.operations[index]
-# return None
-
-
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1185,12 +1185,15 @@
_cast_ops = {
'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4, 2),
+ 'VEC_CAST_FLOAT_TO_INT': ('f', 8, 'i', 4, 2),
'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8, 2),
+ 'VEC_CAST_INT_TO_FLOAT': ('i', 4, 'f', 8, 2),
'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4, 2),
+ 'VEC_CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4, 2),
'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8, 2),
+ 'VEC_CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8, 2),
'INT_SIGNEXT': ('i', 0, 'i', 0, 0),
- #'CAST_PTR_TO_INT': ('r', 0, 'i', 4),
- #'CAST_INT_TO_PTR': ('i', 4, 'r', 0),
+ 'VEC_INT_SIGNEXT': ('i', 0, 'i', 0, 0),
}
# ____________________________________________________________
@@ -1292,7 +1295,7 @@
else:
assert result_type == 'n'
if name in _cast_ops:
- if name == "INT_SIGNEXT":
+ if "INT_SIGNEXT" in name:
mixins.append(SignExtOp)
mixins.append(CastOp)
@@ -1302,7 +1305,6 @@
return type(cls_name, bases, dic)
setup(__name__ == '__main__') # print out the table when run directly
-del _oplist
_opboolinverse = {
rop.INT_EQ: rop.INT_NE,
diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py
--- a/rpython/jit/metainterp/resume.py
+++ b/rpython/jit/metainterp/resume.py
@@ -57,6 +57,9 @@
self.scalar_box = box
self.vector_loc = loc
+ def getoriginal(self):
+ return self.scalar_box
+
def getpos_in_failargs(self):
return self.scalar_position
More information about the pypy-commit
mailing list