[pypy-commit] pypy vecopt: ironed out the problems with the bridge creation
plan_rich
noreply at buildbot.pypy.org
Wed Jul 8 09:34:02 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r78492:a026d96015e4
Date: 2015-07-08 09:34 +0200
http://bitbucket.org/pypy/pypy/changeset/a026d96015e4/
Log: ironed out the problems with the bridge creation the fail arguments
now save the regloc of the scalar variable, the actual position is
saved on the descriptor and reconstructed later
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -528,14 +528,12 @@
def test_prod(self):
result = self.run("prod")
assert int(result) == 576
- self.check_trace_count(1)
- self.check_vectorized(2, 1)
+ self.check_vectorized(1, 1)
def test_prod_zero(self):
result = self.run("prod_zero")
assert int(result) == 0
- self.check_trace_count(1)
- self.check_vectorized(2, 1)
+ self.check_vectorized(1, 1)
def define_max():
@@ -767,8 +765,7 @@
def test_setslice(self):
result = self.run("setslice")
assert result == 5.5
- self.check_trace_count(1)
- self.check_vectorized(2, 1)
+ self.check_vectorized(1, 1)
def define_virtual_slice():
return """
@@ -806,7 +803,6 @@
def test_flat_getitem(self):
result = self.run("flat_getitem")
assert result == 10.0
- self.check_trace_count(1)
self.check_vectorized(0,0)
def define_flat_setitem():
@@ -820,7 +816,6 @@
def test_flat_setitem(self):
result = self.run("flat_setitem")
assert result == 1.0
- self.check_trace_count(1)
self.check_vectorized(1,0) # TODO this can be improved
def define_dot():
@@ -847,8 +842,7 @@
def test_argsort(self):
result = self.run("argsort")
assert result == 6
- self.check_trace_count(1)
- self.check_vectorized(2,1) # vec. setslice
+ self.check_vectorized(1,1) # vec. setslice
def define_where():
return """
@@ -862,7 +856,6 @@
def test_where(self):
result = self.run("where")
assert result == -40
- self.check_trace_count(1)
self.check_vectorized(1, 0) # TODO might be possible to vectorize
def define_searchsorted():
@@ -877,7 +870,6 @@
result = self.run("searchsorted")
assert result == 0
self.check_trace_count(6)
- # TODO?
def define_int_mul_array():
return """
@@ -908,8 +900,7 @@
def test_slice(self):
result = self.run("slice")
assert result == 18
- self.check_trace_count(1)
- self.check_vectorized(2,1)
+ self.check_vectorized(1,1)
def define_multidim_slice():
return """
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -8,7 +8,7 @@
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.jit.metainterp.history import (Const, Box, VOID,
- BoxVector, ConstInt, BoxVectorAccum)
+ BoxVector, ConstInt)
from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
from rpython.jit.metainterp.compile import CompileLoopVersionDescr
from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -26,6 +26,7 @@
ConstFloat, BoxInt, BoxFloat, BoxVector, BoxVectorAccum, INT, REF,
FLOAT, VECTOR, TargetToken)
from rpython.jit.metainterp.resoperation import rop, ResOperation
+from rpython.jit.metainterp.compile import ResumeGuardDescr
from rpython.rlib import rgc
from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.rarithmetic import r_longlong, r_uint
@@ -304,7 +305,32 @@
self.assembler.regalloc_perform_math(op, arglocs, result_loc)
def locs_for_fail(self, guard_op):
- return [self.loc(v) for v in guard_op.getfailargs()]
+ faillocs = []
+ descr = guard_op.getdescr()
+ for v in guard_op.getfailargs():
+ if v is not None and isinstance(v, BoxVectorAccum):
+ loc = self.loc(v.scalar_var)
+ self.update_accumulation_loc(v, descr)
+ faillocs.append(loc)
+ else:
+ faillocs.append(self.loc(v))
+
+ return faillocs
+
+ def update_accumulation_loc(self, accumbox, descr):
+ """ Saves the location to the AccumInfo object.
+ Necessary to reconstruct the values at a guard exit.
+ """
+ box = accumbox.scalar_var
+ assert isinstance(descr, ResumeGuardDescr)
+ accum_info = descr.rd_accum_list
+ while accum_info:
+ if accum_info.box is box:
+ accum_info.loc = self.loc(accumbox)
+ break
+ accum_info = accum_info.prev
+ else:
+ raise AssertionError("accum box has no accum_info entry")
def perform_with_guard(self, op, guard_op, arglocs, result_loc):
faillocs = self.locs_for_fail(guard_op)
diff --git a/rpython/jit/backend/x86/vector_ext.py b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -12,6 +12,7 @@
from rpython.jit.backend.llsupport.regalloc import (get_scale, valid_addressing_size)
from rpython.rlib.objectmodel import we_are_translated
from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.rtyper.lltypesystem import lltype
# duplicated for easy migration, def in assembler.py as well
# DUP START
@@ -65,23 +66,21 @@
accum_info = faildescr.rd_accum_list
while accum_info:
pos = accum_info.position
- loc = fail_locs[pos]
+ loc = accum_info.loc
+ tgtloc = fail_locs[pos]
+ # the upper elements will be lost if saved to the stack!
assert isinstance(loc, RegLoc)
- arg = fail_args[pos]
- if isinstance(arg, BoxVectorAccum):
- arg = arg.scalar_var
+ if not isinstance(tgtloc, RegLoc):
+ tgtloc = regalloc.force_allocate_reg(accum_info.box)
+ arg = accum_info.box
assert arg is not None
- tgtloc = regalloc.force_allocate_reg(arg, fail_args)
if accum_info.operation == '+':
- # reduction using plus
self._accum_reduce_sum(arg, loc, tgtloc)
elif accum_info.operation == '*':
self._accum_reduce_mul(arg, loc, tgtloc)
else:
not_implemented("accum operator %s not implemented" %
(accum_info.operation))
- fail_locs[pos] = tgtloc
- regalloc.possibly_free_var(arg)
accum_info = accum_info.prev
def _accum_reduce_mul(self, arg, accumloc, targetloc):
diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -711,9 +711,9 @@
i += 1
assert label.getopnum() == rop.LABEL
self.label_pos = i
- self.parent_trace_label_args = None
- self.bridge_label_args = label.getarglist()
- self.inputargs = None
+ #self.parent_trace_label_args = None
+ #self.bridge_label_args = label.getarglist()
+ self.inputargs = label.getarglist()
def adddescr(self, op, descr):
self.faildescrs.append((op, descr))
@@ -730,14 +730,6 @@
label.setdescr(token)
jump.setdescr(token)
- assert len(self.bridge_label_args) <= len(self.parent_trace_label_args)
- for i in range(len(self.bridge_label_args)):
- arg = self.parent_trace_label_args[i]
- if isinstance(arg, BoxVectorAccum):
- self.bridge_label_args[i] = arg
- label.setarg(i, arg)
- self.inputargs = self.bridge_label_args
-
return token
class TreeLoop(object):
@@ -809,30 +801,38 @@
def seen_args(inputargs):
seen = {}
for arg in inputargs:
+ if arg is None:
+ continue
if isinstance(arg, BoxVectorAccum):
seen[arg.scalar_var] = None
- seen[arg] = None
else:
seen[arg] = None
return seen
@staticmethod
+ def check_if_box_was_seen(box, seen):
+ if box is not None:
+ assert isinstance(box, Box)
+ if isinstance(box, BoxVectorAccum):
+ assert box in seen or box.scalar_var in seen
+ else:
+ assert box in seen
+
+ @staticmethod
def check_consistency_of_branch(operations, seen):
"NOT_RPYTHON"
for op in operations:
for i in range(op.numargs()):
box = op.getarg(i)
if isinstance(box, Box):
- assert box in seen
+ TreeLoop.check_if_box_was_seen(box, seen)
if op.is_guard():
assert op.getdescr() is not None
if hasattr(op.getdescr(), '_debug_suboperations'):
ops = op.getdescr()._debug_suboperations
TreeLoop.check_consistency_of_branch(ops, seen.copy())
for box in op.getfailargs() or []:
- if box is not None:
- assert isinstance(box, Box)
- assert box in seen
+ TreeLoop.check_if_box_was_seen(box, seen)
else:
assert op.getfailargs() is None
box = op.result
@@ -844,8 +844,12 @@
inputargs = op.getarglist()
for box in inputargs:
assert isinstance(box, Box), "LABEL contains %r" % (box,)
- seen = dict.fromkeys(inputargs)
- assert len(seen) == len(inputargs), (
+ seen = TreeLoop.seen_args(inputargs)
+ seen_count = len(seen)
+ for arg in seen:
+ if isinstance(arg, BoxVectorAccum):
+ seen_count -= 1
+ assert seen_count == len(inputargs), (
"duplicate Box in the LABEL arguments")
assert operations[-1].is_final()
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -861,10 +861,12 @@
PLUS = '+'
MULTIPLY = '*'
- def __init__(self, var, pos, operator):
+ def __init__(self, opnum, var, pos):
self.var = var
self.pos = pos
- self.operator = operator
+ self.operator = Accum.PLUS
+ if opnum == rop.FLOAT_MUL:
+ self.operator = Accum.MULTIPLY
class Pack(object):
""" A pack is a set of n statements that are:
diff --git a/rpython/jit/metainterp/optimizeopt/util.py b/rpython/jit/metainterp/optimizeopt/util.py
--- a/rpython/jit/metainterp/optimizeopt/util.py
+++ b/rpython/jit/metainterp/optimizeopt/util.py
@@ -222,11 +222,6 @@
args = guard.getfailargs()
for i,arg in enumerate(args):
value = self.rename_map.get(arg,arg)
- if value is not arg and isinstance(value, BoxVectorAccum):
- descr = guard.getdescr()
- assert isinstance(descr,ResumeGuardDescr)
- ai = AccumInfo(descr.rd_accum_list, i, value.operator)
- descr.rd_accum_list = ai
args[i] = value
return args
return None
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -8,11 +8,11 @@
import py
import time
-from rpython.jit.metainterp.resume import Snapshot
+from rpython.jit.metainterp.resume import Snapshot, AccumInfo
from rpython.jit.metainterp.jitexc import NotAVectorizeableLoop, NotAProfitableLoop
from rpython.jit.metainterp.optimizeopt.unroll import optimize_unroll
from rpython.jit.metainterp.compile import (ResumeAtLoopHeaderDescr,
- CompileLoopVersionDescr, invent_fail_descr_for_op)
+ CompileLoopVersionDescr, invent_fail_descr_for_op, ResumeGuardDescr)
from rpython.jit.metainterp.history import (ConstInt, VECTOR, FLOAT, INT,
BoxVector, BoxFloat, BoxInt, ConstFloat, TargetToken, JitCellToken, Box,
BoxVectorAccum, LoopVersion)
@@ -31,21 +31,6 @@
from rpython.rlib.jit import Counters
from rpython.rtyper.lltypesystem import lltype, rffi
-def debug_print_operations(loop):
- """ NOT_RPYTHON """
- if not we_are_translated():
- print('--- loop instr numbered ---')
- def ps(snap):
- if snap.prev is None:
- return []
- return ps(snap.prev) + snap.boxes[:]
- for i,op in enumerate(loop.operations):
- print "[",str(i).center(2," "),"]",op,
- if op.is_guard():
- print op.getfailargs()
- else:
- print ""
-
def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations,
inline_short_preamble, start_state, cost_threshold):
optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations,
@@ -72,7 +57,7 @@
aligned_vector_version = LoopVersion(loop, aligned=True)
- loop.versions = [orig_version] #, aligned_vector_version]
+ loop.versions = [orig_version]
metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations, -2, None, None, "post vectorize")
@@ -198,8 +183,6 @@
self.emit_unrolled_operation(label_op)
- self.orig_loop_version.parent_trace_label_args = label_op.getarglist()[:]
-
renamer = Renamer()
oi = 0
pure = True
@@ -495,6 +478,17 @@
assert node.emitted
if vector and not self.costmodel.profitable():
return
+ if vector:
+ # add accumulation info to the descriptor
+ for guard_node in self.dependency_graph.guards:
+ op = guard_node.getoperation()
+ failargs = op.getfailargs()
+ for i,arg in enumerate(failargs):
+ if isinstance(arg, BoxVectorAccum):
+ descr = op.getdescr()
+ assert isinstance(descr,ResumeGuardDescr)
+ ai = AccumInfo(descr.rd_accum_list, i, arg.operator, arg.scalar_var)
+ descr.rd_accum_list = ai
self.loop.operations = \
sched_data.prepend_invariant_operations(self._newoperations)
self.clear_newoperations()
@@ -837,10 +831,7 @@
# of leading/preceding signext/floatcast instructions needs to be
# considered. => tree pattern matching problem.
return None
- operator = Accum.PLUS
- if opnum == rop.FLOAT_MUL:
- operator = Accum.MULTIPLY
- accum = Accum(accum_var, accum_pos, operator)
+ accum = Accum(opnum, accum_var, accum_pos)
return AccumPair(lnode, rnode, ptype, ptype, accum)
return None
diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py
--- a/rpython/jit/metainterp/resume.py
+++ b/rpython/jit/metainterp/resume.py
@@ -35,11 +35,13 @@
self.pc = pc
class AccumInfo(object):
- __slots__ = ('prev', 'position', 'operation')
- def __init__(self, prev, position, operation):
+ __slots__ = ('prev', 'position', 'operation', 'box', 'loc')
+ def __init__(self, prev, position, operation, box):
self.prev = prev
self.operation = operation
self.position = position
+ self.box = box
+ self.loc = None
def _ensure_parent_resumedata(framestack, n):
target = framestack[n]
More information about the pypy-commit
mailing list