[pypy-commit] pypy vecopt-merge: vecopt.py tests passing again, now let's finally head to the assembler
plan_rich
noreply at buildbot.pypy.org
Mon Sep 21 11:40:20 CEST 2015
Author: Richard Plangger <planrichi at gmail.com>
Branch: vecopt-merge
Changeset: r79737:664117c201a8
Date: 2015-09-21 11:40 +0200
http://bitbucket.org/pypy/pypy/changeset/664117c201a8/
Log: vecopt.py tests passing again, now let's finally head to the
assembler
diff --git a/rpython/jit/metainterp/optimizeopt/guard.py b/rpython/jit/metainterp/optimizeopt/guard.py
--- a/rpython/jit/metainterp/optimizeopt/guard.py
+++ b/rpython/jit/metainterp/optimizeopt/guard.py
@@ -91,7 +91,7 @@
guard.setdescr(descr.clone())
guard.setarg(0, box_result)
label = loop.find_first(rop.LABEL)
- guard.setfailargs(label.getarglist())
+ guard.setfailargs(label.getarglist()[:])
opt.emit_operation(guard)
return guard
@@ -120,7 +120,7 @@
descr = myop.getdescr()
descr.copy_all_attributes_from(other.op.getdescr())
myop.rd_frame_info_list = otherop.rd_frame_info_list
- myop.setfailargs(otherop.getfailargs())
+ myop.setfailargs(otherop.getfailargs()[:])
myop.rd_snapshot = otherop.rd_snapshot
def emit_varops(self, opt, var, old_arg):
@@ -140,6 +140,7 @@
opt.emit_operation(cmp_op)
# emit that actual guard
guard = ResOperation(self.op.getopnum(), [cmp_op], self.op.getdescr())
+ guard.setfailargs(self.op.getfailargs()[:])
opt.emit_operation(guard)
self.setindex(opt.operation_position()-1)
self.setoperation(guard)
@@ -173,6 +174,7 @@
self.strength_reduced = 0 # how many guards could be removed?
self.strongest_guards = {}
self.guards = {}
+ self.delayed = {}
def collect_guard_information(self, loop):
operations = loop.operations
@@ -271,8 +273,30 @@
def emit_operation(self, op):
self.renamer.rename(op)
+ #if op.is_always_pure():
+ # self.delay(op)
+ # return
+ #self.emit_delayed_for(op)
+ #if not op.is_always_pure():
self._newoperations.append(op)
+ def delay(self, op):
+ self.delayed[op] = None
+ print "delayed", op
+
+ def emit_delayed_for(self, op):
+ if op.is_inputarg():
+ return
+ additional = []
+ if op.is_guard():
+ additional = op.getfailargs()
+ for arg in op.getarglist() + additional:
+ if arg in self.delayed:
+ del self.delayed[arg]
+ self.emit_delayed_for(arg)
+ self._newoperations.append(op)
+
+
def operation_position(self):
return len(self._newoperations)
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -5,6 +5,7 @@
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
MemoryRef, Node, IndexVar)
from rpython.jit.metainterp.optimizeopt.renamer import Renamer
+from rpython.jit.metainterp.resume import AccumInfo
from rpython.rlib.objectmodel import we_are_translated
from rpython.jit.metainterp.jitexc import NotAProfitableLoop
from rpython.rlib.objectmodel import specialize, always_inline
@@ -23,14 +24,16 @@
def post_schedule(self):
loop = self.graph.loop
self.renamer.rename(loop.jump)
+ self.ensure_args_unpacked(loop.jump)
loop.operations = self.oplist
loop.prefix = self.invariant_oplist
- if len(self.invariant_vector_vars) > 0:
- # TODO, accum?
+ if len(self.invariant_vector_vars) + len(self.invariant_oplist) > 0:
args = loop.label.getarglist_copy() + self.invariant_vector_vars
opnum = loop.label.getopnum()
# TODO descr?
- loop.prefix_label = loop.label.copy_and_change(opnum, args)
+ op = loop.label.copy_and_change(opnum, args)
+ self.renamer.rename(op)
+ loop.prefix_label = op
def profitable(self):
return True
@@ -172,25 +175,22 @@
def any_size(self):
return self.bytesize == TypeRestrict.ANY_SIZE
+ @always_inline
+ def any_count(self):
+ return self.count == TypeRestrict.ANY_COUNT
+
def check(self, value):
assert value.datatype != '\x00'
if self.type != TypeRestrict.ANY_TYPE:
- if self.type != value.datatype:
- assert 0, "type mismatch"
-
+ assert self.type == value.datatype
assert value.bytesize > 0
if not self.any_size():
- if self.bytesize != value.bytesize:
- assert 0, "size mismatch"
-
+ assert self.bytesize == value.bytesize
assert value.count > 0
if self.count != TypeRestrict.ANY_COUNT:
- if self.count != value.count:
- assert 0, "count mismatch"
-
+ assert value.count >= self.count
if self.sign != TypeRestrict.ANY_SIGN:
- if bool(self.sign) != value.sign:
- assert 0, "sign mismatch"
+ assert bool(self.sign) == value.sign
def max_input_count(self, count):
""" How many """
@@ -205,8 +205,7 @@
TR_ANY_INTEGER = TypeRestrict(INT)
TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
- TR_LONG = TypeRestrict(INT, 8, 2)
- TR_INT_2 = TypeRestrict(INT, 4, 2)
+ TR_INT32_2 = TypeRestrict(INT, 4, 2)
# note that the following definition is x86 arch specific
MAPPING = {
@@ -237,9 +236,10 @@
rop.VEC_INT_SIGNEXT: [TR_ANY_INTEGER],
rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: [TR_DOUBLE_2],
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: [TR_FLOAT_2],
+ # weird but the trace will store single floats in int boxes
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: [TR_INT32_2],
rop.VEC_CAST_FLOAT_TO_INT: [TR_DOUBLE_2],
- rop.VEC_CAST_INT_TO_FLOAT: [TR_INT_2],
+ rop.VEC_CAST_INT_TO_FLOAT: [TR_INT32_2],
rop.VEC_FLOAT_EQ: [TR_ANY_FLOAT,TR_ANY_FLOAT],
rop.VEC_FLOAT_NE: [TR_ANY_FLOAT,TR_ANY_FLOAT],
@@ -264,11 +264,6 @@
assert isinstance(vecop, GuardResOp)
vecop.setfailargs(op.getfailargs())
vecop.rd_snapshot = op.rd_snapshot
- if pack.is_accumulating():
- for i,node in enumerate(pack.operations):
- op = node.getoperation()
- state.accumulation[op] = pack
-
def prepare_arguments(state, pack, args):
# Transforming one argument to a vector box argument
@@ -344,6 +339,12 @@
@always_inline
def position_values(state, restrict, pack, args, index, position):
+ arg = args[index]
+ newcount, count = restrict.count, arg.count
+ if not restrict.any_count() and newcount != count:
+ if position == 0:
+ pass
+ pass
if position != 0:
# The vector box is at a position != 0 but it
# is required to be at position 0. Unpack it!
@@ -527,18 +528,17 @@
#self.appendedvar_pos_arg_count = len(sched_data.invariant_vector_vars)
failargs = op.getfailargs()
descr = op.getdescr()
+ # note: stitching a guard must resemble the order of the label
+ # otherwise a wrong mapping is handed to the register allocator
for i,arg in enumerate(failargs):
if arg is None:
continue
accum = self.accumulation.get(arg, None)
if accum:
assert isinstance(accum, AccumPack)
- accum.attach_accum_info(descr.rd_accum_list, i)
-
- def post_schedule(self):
- loop = self.graph.loop
- self.ensure_args_unpacked(loop.jump)
- SchedulerState.post_schedule(self)
+ accum.attach_accum_info(descr, i, arg)
+ seed = accum.getseed()
+ failargs[i] = self.renamer.rename_map.get(seed, seed)
def profitable(self):
return self.costmodel.profitable()
@@ -602,6 +602,8 @@
if var:
if var in self.invariant_vector_vars:
return arg
+ if arg in self.accumulation:
+ return var
args = [var, ConstInt(pos), ConstInt(1)]
vecop = OpHelpers.create_vec_unpack(var.type, args, var.bytesize,
var.signed, 1)
@@ -757,12 +759,12 @@
vector register.
"""
before_count = len(packlist)
- #print "splitting pack", self
+ print "splitting pack", self
pack = self
while pack.pack_load(vec_reg_size) > Pack.FULL:
pack.clear()
oplist, newoplist = pack.slice_operations(vec_reg_size)
- #print " split of %dx, left: %d" % (len(oplist), len(newoplist))
+ print " split of %dx, left: %d" % (len(oplist), len(newoplist))
pack.operations = oplist
pack.update_pack_of_nodes()
if not pack.leftmost().is_typecast():
@@ -778,7 +780,7 @@
newpack.clear()
newpack.operations = []
break
- #print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
+ print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
pack.update_pack_of_nodes()
def slice_operations(self, vec_reg_size):
@@ -864,9 +866,8 @@
return 0
def attach_accum_info(self, descr, position, scalar):
- descr.rd_accum_list = AccumInfo(descr.rd_accum_list,
- position, self.operator,
- self.scalar, None)
+ descr.rd_accum_list = AccumInfo(descr.rd_accum_list, position, self.operator,
+ scalar, None)
def is_accumulating(self):
return True
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -1085,7 +1085,7 @@
f2 = float_add(f0, f1)
i1 = int_add(i0, 8)
i2 = int_lt(i1, 100)
- guard_false(i2) [p0, i0, f2]
+ guard_true(i2) [p0, i0, f2]
jump(p0, i1, f2)
"""
trace_opt = """
@@ -1094,9 +1094,11 @@
v7[2xf64] = vec_int_xor(v6[0xf64], v6[0xf64])
v2[2xf64] = vec_pack_f(v7[2xf64], f0, 0, 1)
label(p0, i0, v2[2xf64])
+ i100 = int_add(i0, 8)
+ i200 = int_lt(i100, 100)
i1 = int_add(i0, 16)
i2 = int_lt(i1, 100)
- guard_false(i2) [p0, i0, v2[2xf64]]
+ guard_true(i2) [p0, i0, v2[2xf64]]
i10 = int_add(i0, 16)
i20 = int_lt(i10, 100)
v1[2xf64] = vec_raw_load_f(p0, i0, descr=floatarraydescr)
@@ -1108,7 +1110,7 @@
self.assert_equal(loop, self.parse_loop(trace_opt))
def test_element_f45_in_guard_failargs(self):
- ops = """
+ trace = self.parse_loop("""
[p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
f45 = raw_load_f(i21, i44, descr=floatarraydescr)
guard_not_invalidated() [p38, p12, p9, p14, f45, p39, i37, i44, f35, i40, p42, i43, None, i28, p36, i41]
@@ -1122,33 +1124,33 @@
i52 = int_ge(i50, i18)
guard_false(i52) [p38, p12, p9, p14, i48, i46, f47, i51, i50, f45, p39, None, None, None, i40, p42, i43, None, None, p36, None]
jump(p36, i50, p9, i51, p14, f45, p12, p38, f47, p39, i40, i48, p42, i43, i46, i21, i4, i0, i18)
- """
- opt = """
+ """)
+ trace_opt = self.parse_loop("""
[p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
- guard_not_invalidated() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41]
+ guard_not_invalidated() [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
i50 = int_add(i28, 1)
- i48 = int_add(i41, 8)
- i51 = int_add(i37, 8)
- i54 = int_add(i41, 16)
- i46 = int_add(i44, 8)
- i56 = int_add(i37, 16)
- i52 = int_ge(i50, i18)
- i637 = int_add(i28, 2)
- i638 = int_ge(i637, i18)
+ i20 = int_ge(i50, i18)
+ i54 = int_add(i28, 2)
+ i638 = int_ge(i54, i18)
guard_false(i638) [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
+ i12 = int_add(i44, 8)
+ i56 = int_add(i41, 8)
+ i46 = int_add(i37, 8)
+ i47 = int_add(i28, 2)
+ i52 = int_ge(i47, i18)
i55 = int_add(i44, 16)
- i629 = int_add(i28, 2)
- i57 = int_ge(i637, i18)
- v61 = vec_raw_load_f(i21, i44, 2, descr=floatarraydescr)
- v62 = vec_raw_load_f(i4, i41, 2, descr=floatarraydescr)
- v63 = vec_float_add(v61, v62)
+ i629 = int_add(i41, 16)
+ i637 = int_add(i37, 16)
+ v61[2xf64] = vec_raw_load_f(i21, i44, descr=floatarraydescr)
+ v62[2xf64] = vec_raw_load_f(i4, i41, descr=floatarraydescr)
+ v63[2xf64] = vec_float_add(v61, v62)
vec_raw_store(i0, i37, v63, descr=floatarraydescr)
- f100 = vec_float_unpack(v61, 1, 1)
- f101 = vec_float_unpack(v62, 1, 1)
+ f100 = vec_unpack_f(v61, 1, 1)
+ f101 = vec_unpack_f(v62, 1, 1)
jump(p36, i637, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18)
- """
- vopt = self.vectorize(self.parse_loop(ops))
- self.assert_equal(vopt.loop, self.parse_loop(opt))
+ """)
+ vopt = self.vectorize(trace)
+ self.assert_equal(trace, trace_opt)
def test_shrink_vector_size(self):
ops = """
@@ -1187,7 +1189,7 @@
self.assert_equal(loop, self.parse_loop(opt))
def test_castup_arith_castdown(self):
- ops = """
+ trace = self.parse_loop("""
[p0,p1,p2,i0,i4]
i10 = raw_load_i(p0, i0, descr=float32arraydescr)
i1 = int_add(i0, 4)
@@ -1201,76 +1203,57 @@
i186 = int_lt(i5, 100)
guard_true(i186) []
jump(p0,p1,p2,i1,i5)
- """
- opt = """
+ """)
+ trace_opt = self.parse_loop("""
[p0, p1, p2, i0, i4]
i5 = int_add(i4, 4)
- i1 = int_add(i0, 4)
i186 = int_lt(i5, 100)
i500 = int_add(i4, 16)
i501 = int_lt(i500, 100)
guard_true(i501) [p0, p1, p2, i0, i4]
- i189 = int_add(i0, 8)
+ i189 = int_add(i0, 4)
i187 = int_add(i4, 8)
- i198 = int_add(i0, 12)
i188 = int_lt(i187, 100)
- i207 = int_add(i0, 16)
+ i207 = int_add(i0, 8)
i196 = int_add(i4, 12)
i197 = int_lt(i196, 100)
- i205 = int_add(i4, 16)
- i206 = int_lt(i205, 100)
- v228 = vec_raw_load_i(p0, i0, 4, descr=float32arraydescr)
- v229 = vec_cast_singlefloat_to_float(v228)
- v230 = vec_int_unpack(v228, 2, 2)
+ i205 = int_add(i0, 12)
+ i400 = int_add(i4, 16)
+ i401= int_lt(i400, 100)
+ i402 = int_add(i0, 16)
+ v228[4xi32] = vec_raw_load_i(p0, i0, descr=float32arraydescr)
+ v229[2xf64] = vec_cast_singlefloat_to_float(v228)
+ v230 = vec_unpack_i(v228, 2, 2)
v231 = vec_cast_singlefloat_to_float(v230)
- v232 = vec_raw_load_i(p1, i1, 4, descr=float32arraydescr)
+ v232 = vec_raw_load_i(p1, i189, descr=float32arraydescr)
v233 = vec_cast_singlefloat_to_float(v232)
- v234 = vec_int_unpack(v232, 2, 2)
+ v236 = vec_float_add(v229, v233)
+ v238 = vec_cast_float_to_singlefloat(v236)
+ v234 = vec_unpack_i(v232, 2, 2)
v235 = vec_cast_singlefloat_to_float(v234)
v237 = vec_float_add(v231, v235)
v239 = vec_cast_float_to_singlefloat(v237)
- v236 = vec_float_add(v229, v233)
- v238 = vec_cast_float_to_singlefloat(v236)
- v240 = vec_pack_f(v238, v239, 2, 2)
+ v240 = vec_pack_i(v238, v239, 2, 2)
vec_raw_store(p2, i4, v240, descr=float32arraydescr)
jump(p0, p1, p2, i207, i500)
- """
- vopt = self.vectorize(self.parse_loop(ops))
- self.assert_equal(vopt.loop, self.parse_loop(opt))
-
- def test_truediv_abs_neg_float(self):
- ops = """
- [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19]
- f20 = raw_load(i16, i12, descr=floatarraydescr)
- guard_not_invalidated() [p8, p7, p5, p4, p2, f20, None, i12, i11, p10, i15, i14, p13]
- i23 = int_add(i12, 8)
- f24 = float_truediv(f20, f17)
- f25 = float_abs(f20)
- f26 = float_neg(f20)
- raw_store(i18, i15, f24, descr=floatarraydescr)
- i26 = int_add(i14, 1)
- i28 = int_add(i15, 8)
- i29 = int_ge(i26, i19)
- guard_false(i29) [p8, p7, p5, p4, p2, f20, i23, i28, None, p13]
- jump(f20, p10, i11, p4, i23, p2, p5, p13, i26, p7, i28, p8, i16, f17, i18, i19)
- """
- opt = self.vectorize(self.parse_loop(ops))
- self.debug_print_operations(opt.loop)
+ """)
+ vopt = self.vectorize(trace)
+ self.assert_equal(trace, trace_opt)
def test_axis_sum(self):
trace = """
[i1, p10, i11, p8, i12, p3, p4, p13, i14, i15, p6, p9, i16, i17, i18, i19, i20, i21, i22, i23]
- f24 = raw_load(i16, i12, descr=floatarraydescr)
+ f24 = raw_load_f(i16, i12, descr=floatarraydescr)
guard_not_invalidated() [i1, p9, p8, p6, p4, p3, f24, i11, i15, p13, i12, i14, p10]
i26 = int_add(i12, 8)
- i27 = getarrayitem_gc(p13, i1, descr=floatarraydescr)
+ i27 = getarrayitem_gc_f(p13, i1, descr=floatarraydescr)
i28 = int_is_zero(i27)
guard_false(i28) [i1, p9, p8, p6, p4, p3, f24, i26, i11, i15, p13, None, i14, p10]
- f30 = raw_load(i17, i15, descr=floatarraydescr)
+ f30 = raw_load_f(i17, i15, descr=floatarraydescr)
f31 = float_add(f30, f24)
raw_store(i18, i15, f31, descr=floatarraydescr)
i33 = int_add(i14, 1)
- i34 = getarrayitem_gc(p13, i19, descr=floatarraydescr)
+ i34 = getarrayitem_gc_f(p13, i19, descr=floatarraydescr)
i35 = int_lt(i34, i20)
guard_true(i35) [i1, p9, p8, p6, p4, p3, i21, i34, i15, i33, i19, p13, f31, None, i26, i11, None, None, None, i14, p10]
i37 = int_add(i34, 1)
@@ -1287,7 +1270,8 @@
pass
def test_cast_1(self):
- trace = """
+ # TODO
+ trace = self.parse_loop("""
[i9, i10, p2, p11, i12, i13, p4, p5, p14, i15, p8, i16, p17, i18, i19, i20, i21, i22, i23]
i24 = raw_load_i(i20, i16, descr=float32arraydescr)
guard_not_invalidated() [p8, p5, p4, p2, i24, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, None]
@@ -1304,11 +1288,33 @@
i39 = int_ge(i36, i23)
guard_false(i39) [p8, p5, p4, p2, i27, i28, i30, i24, i38, i36, p17, None, None, None, None, p14, p11, i18, i15, None, None]
jump(i24, i28, p2, p11, i36, i38, p4, p5, p14, i15, p8, i27, p17, i18, i30, i20, i21, i22, i23)
- """
- opt = self.vectorize(self.parse_loop(trace))
- self.debug_print_operations(opt.loop)
+ """)
+ opt = self.vectorize(trace)
+ self.debug_print_operations(trace)
+
+ def test_truediv_abs_neg_float(self):
+ # TODO
+ trace = self.parse_loop("""
+ [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19]
+ f20 = raw_load_f(i16, i12, descr=floatarraydescr)
+ guard_not_invalidated() [p8, p7, p5, p4, p2, f20, None, i12, i11, p10, i15, i14, p13]
+ i23 = int_add(i12, 8)
+ f24 = float_truediv(f20, f17)
+ f25 = float_abs(f20)
+ f26 = float_neg(f20)
+ raw_store(i18, i15, f24, descr=floatarraydescr)
+ i26 = int_add(i14, 1)
+ i28 = int_add(i15, 8)
+ i29 = int_ge(i26, i19)
+ guard_false(i29) [p8, p7, p5, p4, p2, f20, i23, i28, None, p13]
+ jump(f20, p10, i11, p4, i23, p2, p5, p13, i26, p7, i28, p8, i16, f17, i18, i19)
+ """)
+ opt = self.vectorize(trace)
+ self.debug_print_operations(trace)
+
def test_all_guard(self):
+ # TODO
trace = """
[p0, p3, i4, i5, i6, i7]
f8 = raw_load_f(i6, i5, descr=floatarraydescr)
@@ -1327,6 +1333,7 @@
self.debug_print_operations(loop)
def test_max(self):
+ # TODO
trace = """
[p3, i4, p2, i5, f6, i7, i8]
f9 = raw_load_f(i7, i5, descr=floatarraydescr)
diff --git a/rpython/jit/metainterp/optimizeopt/util.py b/rpython/jit/metainterp/optimizeopt/util.py
--- a/rpython/jit/metainterp/optimizeopt/util.py
+++ b/rpython/jit/metainterp/optimizeopt/util.py
@@ -148,6 +148,7 @@
x = op1.getarg(i)
y = op2.getarg(i)
assert x.same_box(remap.get(y, y))
+ assert x.same_shape(remap.get(y, y))
if op2 in remap:
assert op1.same_box(remap[op2])
else:
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -36,7 +36,7 @@
class VectorLoop(object):
def __init__(self, label, oplist, jump):
self.label = label
- self.inputargs = label.getarglist()
+ self.inputargs = label.getarglist_copy()
self.prefix = []
self.prefix_label = None
assert self.label.getopnum() == rop.LABEL
@@ -160,15 +160,6 @@
self.has_two_labels = False
def propagate_all_forward(self, info, loop):
- #label = loop.label
- #jump = loop.jump
- #if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \
- # label.getopnum() != rop.LABEL:
- # import pdb; pdb. set_trace()
- # raise NotAVectorizeableLoop()
- #if jump.numargs() != label.numargs():
- # import pdb; pdb. set_trace()
- # raise NotAVectorizeableLoop()
self.orig_label_args = loop.label.getarglist_copy()
self.linear_find_smallest_type(loop)
byte_count = self.smallest_type_bytes
@@ -207,29 +198,6 @@
def unroll_loop_iterations(self, loop, unroll_count):
""" Unroll the loop X times. unroll_count + 1 = unroll_factor """
numops = len(loop.operations)
- # use the target token of the label
- #target_token = label_op.getdescr()
- #if not we_are_translated():
- # target_token.assumed_classes = {}
- #if jump_op.getopnum() == rop.LABEL:
- # jump_op = ResOperation(rop.JUMP, jump_op.getarglist(), target_token)
- #else:
- # jump_op = jump_op.clone()
- # jump_op.setdescr(target_token)
- #assert jump_op.is_final()
-
- #self.emit_unrolled_operation(label_op)
-
- #for i in range(0,numops):
- # op = loop.operations[i].copy()
- # if op.is_guard():
- # assert isinstance(op, GuardResOp)
- # failargs = renamer.rename_failargs(op, clone=True)
- # snapshot = renamer.rename_rd_snapshot(op.rd_snapshot, clone=True)
- # op.setfailargs(failargs)
- # op.rd_snapshot = snapshot
- # operations.append(op)
- # self.emit_unrolled_operation(op)
renamer = Renamer()
operations = loop.operations
@@ -560,16 +528,12 @@
""" Marks this guard as an early exit! """
op = node.getoperation()
assert isinstance(op, GuardResOp)
- descr = None
if op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE):
descr = CompileLoopVersionDescr()
- else:
- descr = ResumeAtLoopHeaderDescr()
- if op.getdescr():
- descr.copy_all_attributes_from(op.getdescr())
- #
- op.setdescr(descr)
- op.setfailargs(loop.inputargs)
+ if op.getdescr():
+ descr.copy_all_attributes_from(op.getdescr())
+ op.setdescr(descr)
+ op.setfailargs(loop.label.getarglist_copy())
class CostModel(object):
""" Utility to estimate the savings for the new trace loop.
@@ -789,6 +753,9 @@
for pack in self.packs:
if not pack.is_accumulating():
continue
+ for i,node in enumerate(pack.operations):
+ op = node.getoperation()
+ state.accumulation[op] = pack
assert isinstance(pack, AccumPack)
datatype = pack.getdatatype()
bytesize = pack.getbytesize()
@@ -818,6 +785,7 @@
state.setvector_of_box(pack.getseed(), 0, vecop) # prevent it from expansion
state.renamer.start_renaming(pack.getseed(), vecop)
+
def split_overloaded_packs(self):
newpacks = []
for i,pack in enumerate(self.packs):
diff --git a/rpython/jit/metainterp/optimizeopt/version.py b/rpython/jit/metainterp/optimizeopt/version.py
--- a/rpython/jit/metainterp/optimizeopt/version.py
+++ b/rpython/jit/metainterp/optimizeopt/version.py
@@ -28,9 +28,6 @@
else:
self.descrs.append(descr)
self.leads_to[descr] = version
- # note: stitching a guard must resemble the order of the label
- # otherwise a wrong mapping is handed to the register allocator
- op.setfailargs(version.renamed_inputargs)
assert version.renamed_inputargs is not None
def remove(self, descr):
diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -37,7 +37,7 @@
return self is other
def same_shape(self, other):
- return self is other
+ return True
def repr_short(self, memo):
return self.repr(memo)
More information about the pypy-commit
mailing list