[pypy-commit] pypy vecopt: turned off vectorize opt for all jit drivers and enabled vectorize opt in micronumpy loop jit drivers
plan_rich
noreply at buildbot.pypy.org
Wed May 20 14:39:52 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r77428:46704e37a322
Date: 2015-05-20 10:39 +0200
http://bitbucket.org/pypy/pypy/changeset/46704e37a322/
Log: turned off vectorize opt for all jit drivers and enabled vectorize
opt in micronumpy loop jit drivers resolved a problem in a test case
general exception clause printing debug information when vecopt
fails
diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -16,7 +16,7 @@
call2_driver = jit.JitDriver(
name='numpy_call2',
greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'],
- reds='auto')
+ reds='auto', vectorize=True)
def call2(space, shape, func, calc_dtype, res_dtype, w_lhs, w_rhs, out):
# handle array_priority
@@ -81,7 +81,7 @@
call1_driver = jit.JitDriver(
name='numpy_call1',
greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'],
- reds='auto')
+ reds='auto', vectorize=True)
def call1(space, shape, func, calc_dtype, res_dtype, w_obj, out):
obj_iter, obj_state = w_obj.create_iter(shape)
@@ -103,7 +103,7 @@
call_many_to_one_driver = jit.JitDriver(
name='numpy_call_many_to_one',
greens=['shapelen', 'nin', 'func', 'res_dtype'],
- reds='auto')
+ reds='auto', vectorize=True)
def call_many_to_one(space, shape, func, res_dtype, in_args, out):
# out must hav been built. func needs no calc_type, is usually an
@@ -137,7 +137,7 @@
call_many_to_many_driver = jit.JitDriver(
name='numpy_call_many_to_many',
greens=['shapelen', 'nin', 'nout', 'func', 'res_dtype'],
- reds='auto')
+ reds='auto', vectorize=True)
def call_many_to_many(space, shape, func, res_dtype, in_args, out_args):
# out must hav been built. func needs no calc_type, is usually an
@@ -184,7 +184,7 @@
setslice_driver = jit.JitDriver(name='numpy_setslice',
greens = ['shapelen', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def setslice(space, shape, target, source):
if not shape:
@@ -221,7 +221,7 @@
reduce_driver = jit.JitDriver(name='numpy_reduce',
greens = ['shapelen', 'func', 'done_func',
'calc_dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def compute_reduce(space, obj, calc_dtype, func, done_func, identity):
obj_iter, obj_state = obj.create_iter()
@@ -244,7 +244,7 @@
reduce_cum_driver = jit.JitDriver(name='numpy_reduce_cum_driver',
greens = ['shapelen', 'func', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def compute_reduce_cumulative(space, obj, out, calc_dtype, func, identity):
obj_iter, obj_state = obj.create_iter()
@@ -282,7 +282,7 @@
where_driver = jit.JitDriver(name='numpy_where',
greens = ['shapelen', 'dtype', 'arr_dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def where(space, out, shape, arr, x, y, dtype):
out_iter, out_state = out.create_iter(shape)
@@ -325,7 +325,7 @@
axis_reduce_driver = jit.JitDriver(name='numpy_axis_reduce',
greens=['shapelen', 'func', 'dtype'],
- reds='auto')
+ reds='auto', vectorize=True)
def do_axis_reduce(space, shape, func, arr, dtype, axis, out, identity, cumulative,
temp):
@@ -369,7 +369,7 @@
def _new_argmin_argmax(op_name):
arg_driver = jit.JitDriver(name='numpy_' + op_name,
greens = ['shapelen', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def argmin_argmax(arr):
result = 0
@@ -395,7 +395,7 @@
dot_driver = jit.JitDriver(name = 'numpy_dot',
greens = ['dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def multidim_dot(space, left, right, result, dtype, right_critical_dim):
''' assumes left, right are concrete arrays
@@ -449,7 +449,7 @@
count_all_true_driver = jit.JitDriver(name = 'numpy_count',
greens = ['shapelen', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def count_all_true_concrete(impl):
s = 0
@@ -470,7 +470,7 @@
nonzero_driver = jit.JitDriver(name = 'numpy_nonzero',
greens = ['shapelen', 'dims', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def nonzero(res, arr, box):
res_iter, res_state = res.create_iter()
@@ -492,7 +492,7 @@
getitem_filter_driver = jit.JitDriver(name = 'numpy_getitem_bool',
greens = ['shapelen', 'arr_dtype',
'index_dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def getitem_filter(res, arr, index):
res_iter, res_state = res.create_iter()
@@ -520,7 +520,7 @@
setitem_filter_driver = jit.JitDriver(name = 'numpy_setitem_bool',
greens = ['shapelen', 'arr_dtype',
'index_dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def setitem_filter(space, arr, index, value):
arr_iter, arr_state = arr.create_iter()
@@ -563,7 +563,7 @@
flatiter_setitem_driver = jit.JitDriver(name = 'numpy_flatiter_setitem',
greens = ['dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def flatiter_setitem(space, dtype, val, arr_iter, arr_state, step, length):
val_iter, val_state = val.create_iter()
@@ -583,7 +583,7 @@
fromstring_driver = jit.JitDriver(name = 'numpy_fromstring',
greens = ['itemsize', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def fromstring_loop(space, a, dtype, itemsize, s):
i = 0
@@ -617,7 +617,7 @@
getitem_int_driver = jit.JitDriver(name = 'numpy_getitem_int',
greens = ['shapelen', 'indexlen',
'prefixlen', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def getitem_array_int(space, arr, res, iter_shape, indexes_w, prefix_w):
shapelen = len(iter_shape)
@@ -645,7 +645,7 @@
setitem_int_driver = jit.JitDriver(name = 'numpy_setitem_int',
greens = ['shapelen', 'indexlen',
'prefixlen', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def setitem_array_int(space, arr, iter_shape, indexes_w, val_arr,
prefix_w):
@@ -675,7 +675,7 @@
byteswap_driver = jit.JitDriver(name='numpy_byteswap_driver',
greens = ['dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def byteswap(from_, to):
dtype = from_.dtype
@@ -690,7 +690,7 @@
choose_driver = jit.JitDriver(name='numpy_choose_driver',
greens = ['shapelen', 'mode', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def choose(space, arr, choices, shape, dtype, out, mode):
shapelen = len(shape)
@@ -724,7 +724,7 @@
clip_driver = jit.JitDriver(name='numpy_clip_driver',
greens = ['shapelen', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def clip(space, arr, shape, min, max, out):
assert min or max
@@ -759,7 +759,7 @@
round_driver = jit.JitDriver(name='numpy_round_driver',
greens = ['shapelen', 'dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def round(space, arr, dtype, shape, decimals, out):
arr_iter, arr_state = arr.create_iter(shape)
@@ -775,7 +775,7 @@
diagonal_simple_driver = jit.JitDriver(name='numpy_diagonal_simple_driver',
greens = ['axis1', 'axis2'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def diagonal_simple(space, arr, out, offset, axis1, axis2, size):
out_iter, out_state = out.create_iter()
@@ -819,7 +819,7 @@
def _new_binsearch(side, op_name):
binsearch_driver = jit.JitDriver(name='numpy_binsearch_' + side,
greens=['dtype'],
- reds='auto')
+ reds='auto', vectorize=True)
def binsearch(space, arr, key, ret):
assert len(arr.get_shape()) == 1
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -140,7 +140,9 @@
tgt_op.setfailargs(op.getfailargs())
def edge_to(self, to, arg=None, label=None):
- assert self != to
+ if self is to:
+ print "debug: tried to put edge from: ", self.op, "to:", to.op
+ return
dep = self.depends_on(to)
if not dep:
#if force or self.independent(idx_from, idx_to):
@@ -818,34 +820,6 @@
.format(name='INT_SUB', op='-')).compile()
del additive_func_source
- #def operation_INT_ADD(self, op, node):
- # box_r = op.result
- # if not box_r:
- # return
- # box_a0 = op.getarg(0)
- # box_a1 = op.getarg(1)
- # if self.is_const_integral(box_a0) and self.is_const_integral(box_a1):
- # idx_ref = IndexVar(box_r)
- # idx_ref.constant = box_a0.getint() + box_a1.getint()
- # self.index_vars[box_r] = idx_ref
- # elif self.is_const_integral(box_a0):
- # idx_ref = self.get_or_create(box_a1)
- # idx_ref = idx_ref.clone()
- # idx_ref.constant {op}= box_a0.getint()
- # self.index_vars[box_r] = idx_ref
- # elif self.is_const_integral(box_a1):
- # idx_ref = self.get_or_create(box_a0)
- # idx_ref = idx_ref.clone()
- # idx_ref.add_const(box_a1.getint())
- # self.index_vars[box_r] = idx_ref
- # else:
- # # both variables are boxes
- # if box_a1 in self.invariant_vars:
- # idx_var = self.get_or_create(box_a0)
- # idx_var = idx_var.clone()
- # idx_var.set_next_nonconst_mod(BoxedIndexVar(box_a1, op.getopnum(), box_a0))
- # self.index_vars[box_r] = idx_var
-
multiplicative_func_source = """
def operation_{name}(self, op, node):
box_r = op.result
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -63,6 +63,7 @@
opt.schedule()
opt.unroll_loop_iterations(loop, unroll_factor)
opt.loop.operations = opt.get_newoperations()
+ self.debug_print_operations(opt.loop)
opt.clear_newoperations()
opt.build_dependency_graph()
self.last_graph = opt.dependency_graph
@@ -1151,7 +1152,6 @@
i10 = raw_load(p0, i0, descr=singlefloatarraydescr)
i1 = int_add(i0, 4)
i11 = raw_load(p1, i1, descr=singlefloatarraydescr)
- i2 = int_add(i1, 4)
f1 = cast_singlefloat_to_float(i10)
f2 = cast_singlefloat_to_float(i11)
f3 = float_add(f1, f2)
@@ -1160,7 +1160,7 @@
i5 = int_add(i4, 4)
i186 = int_lt(i5, 100)
guard_false(i186) []
- jump(p0,p1,p2,i2,i5)
+ jump(p0,p1,p2,i1,i5)
"""
opt = """
[p0, p1, p2, i0, i4]
@@ -1168,33 +1168,31 @@
i5 = int_add(i4, 4)
i1 = int_add(i0, 4)
i186 = int_lt(i5, 100)
- i2 = int_add(i0, 8)
+ i189 = int_add(i0, 8)
i187 = int_add(i4, 8)
- i191 = int_add(i0, 12)
- i190 = int_lt(i187, 100)
- i192 = int_add(i0, 16)
- i188 = int_add(i4, 12)
- i200 = int_add(i0, 20)
- i199 = int_lt(i188, 100)
- i201 = int_add(i0, 24)
- i189 = int_add(i4, 16)
- i209 = int_add(i0, 28)
- i208 = int_lt(i189, 100)
- guard_false(i208) []
- i210 = int_add(i0, 32)
- v217 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr)
- v218 = vec_cast_singlefloat_to_float(v217, 0, 2)
- v219 = vec_cast_singlefloat_to_float(v217, 2, 2)
- v220 = vec_raw_load(p1, i1, 4, descr=singlefloatarraydescr)
- v221 = vec_cast_singlefloat_to_float(v220, 0, 2)
- v222 = vec_cast_singlefloat_to_float(v220, 2, 2)
- v223 = vec_float_add(v218, v221, 2)
- v224 = vec_float_add(v219, v222, 2)
- v225 = vec_cast_float_to_singlefloat(v223, 2)
- v226 = vec_cast_float_to_singlefloat(v224, 2)
- v227 = vec_float_pack(v225, v226, 2, 2)
- vec_raw_store(p2, i4, v227, 4, descr=singlefloatarraydescr)
- jump(p0, p1, p2, i210, i189)
+ i198 = int_add(i0, 12)
+ i188 = int_lt(i187, 100)
+ i207 = int_add(i0, 16)
+ i196 = int_add(i4, 12)
+ i197 = int_lt(i196, 100)
+ i205 = int_add(i4, 16)
+ i206 = int_lt(i205, 100)
+ guard_false(i206) []
+ v228 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr)
+ v229 = vec_cast_singlefloat_to_float(v228, 2)
+ v230 = vec_int_unpack(v228, 2, 2)
+ v231 = vec_cast_singlefloat_to_float(v230, 2)
+ v232 = vec_raw_load(p1, i1, 4, descr=singlefloatarraydescr)
+ v233 = vec_cast_singlefloat_to_float(v232, 2)
+ v234 = vec_int_unpack(v232, 2, 2)
+ v235 = vec_cast_singlefloat_to_float(v234, 2)
+ v236 = vec_float_add(v229, v233, 2)
+ v237 = vec_float_add(v231, v235, 2)
+ v238 = vec_cast_float_to_singlefloat(v236, 2)
+ v239 = vec_cast_float_to_singlefloat(v237, 2)
+ v240 = vec_float_pack(v238, v239, 2, 2)
+ vec_raw_store(p2, i4, v240, 4, descr=singlefloatarraydescr)
+ jump(p0, p1, p2, i207, i205)
"""
vopt = self.vectorize(self.parse_loop(ops))
self.assert_equal(vopt.loop, self.parse_loop(opt))
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -50,6 +50,18 @@
except NotAVectorizeableLoop:
# vectorization is not possible, propagate only normal optimizations
loop.operations = orig_ops
+ except Exception as e:
+ loop.operations = orig_ops
+ print 'loop with %d instructions failed! ' % (len(orig_ops),)
+ print('--- loop instr numbered ---')
+ for i,op in enumerate(loop.operations):
+ print "[",i,"]",op,
+ if op.is_guard():
+ print op.getfailargs()
+ else:
+ print ""
+ #import traceback
+ #traceback.print_exc()
class VectorizingOptimizer(Optimizer):
""" Try to unroll the loop and find instructions to group """
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -552,7 +552,7 @@
'enable_opts': 'INTERNAL USE ONLY (MAY NOT WORK OR LEAD TO CRASHES): '
'optimizations to enable, or all = %s' % ENABLE_ALL_OPTS,
'max_unroll_recursion': 'how many levels deep to unroll a recursive function',
- 'vectorize': 'turn on the vectorization optimization. default off. requirement: (sse2)',
+ 'vectorize': 'turn on the vectorization optimization. requires sse4.1',
}
PARAMETERS = {'threshold': 1039, # just above 1024, prime
@@ -590,7 +590,7 @@
get_jitcell_at=None, set_jitcell_at=None,
get_printable_location=None, confirm_enter_jit=None,
can_never_inline=None, should_unroll_one_iteration=None,
- name='jitdriver', check_untranslated=True, vectorize=True,
+ name='jitdriver', check_untranslated=True, vectorize=False,
get_unique_id=None):
if greens is not None:
self.greens = greens
More information about the pypy-commit
mailing list