[pypy-commit] pypy vecopt2: started to unroll a trace in the optimizer. work in progress
plan_rich
noreply at buildbot.pypy.org
Tue May 5 09:45:06 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt2
Changeset: r77061:3b35c7d89697
Date: 2015-03-05 17:45 +0100
http://bitbucket.org/pypy/pypy/changeset/3b35c7d89697/
Log: started to unroll a trace in the optimizer. work in progress
diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py b/rpython/jit/metainterp/optimizeopt/__init__.py
--- a/rpython/jit/metainterp/optimizeopt/__init__.py
+++ b/rpython/jit/metainterp/optimizeopt/__init__.py
@@ -8,6 +8,7 @@
from rpython.jit.metainterp.optimizeopt.simplify import OptSimplify
from rpython.jit.metainterp.optimizeopt.pure import OptPure
from rpython.jit.metainterp.optimizeopt.earlyforce import OptEarlyForce
+from rpython.jit.metainterp.optimizeopt.unfold import optimize_unfold
from rpython.rlib.jit import PARAMETERS, ENABLE_ALL_OPTS
from rpython.rlib.unroll import unrolling_iterable
from rpython.rlib.debug import debug_start, debug_stop, debug_print
@@ -20,7 +21,8 @@
('earlyforce', OptEarlyForce),
('pure', OptPure),
('heap', OptHeap),
- ('unroll', None)]
+ ('unroll', None),
+ ('unfold', None)]
# no direct instantiation of unroll
unroll_all_opts = unrolling_iterable(ALL_OPTS)
@@ -34,6 +36,7 @@
def build_opt_chain(metainterp_sd, enable_opts):
optimizations = []
unroll = 'unroll' in enable_opts # 'enable_opts' is normally a dict
+ unfold = 'unfold' in enable_opts
for name, opt in unroll_all_opts:
if name in enable_opts:
if opt is not None:
@@ -43,9 +46,10 @@
if ('rewrite' not in enable_opts or 'virtualize' not in enable_opts
or 'heap' not in enable_opts or 'unroll' not in enable_opts
or 'pure' not in enable_opts):
- optimizations.append(OptSimplify(unroll))
+ if 'unfold' not in enable_opts: # TODO
+ optimizations.append(OptSimplify(unroll))
- return optimizations, unroll
+ return optimizations, unroll, unfold
def optimize_trace(metainterp_sd, jitdriver_sd, loop, enable_opts,
inline_short_preamble=True, start_state=None,
@@ -57,8 +61,15 @@
try:
loop.logops = metainterp_sd.logger_noopt.log_loop(loop.inputargs,
loop.operations)
- optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts)
- if unroll:
+ optimizations, unroll, unfold = build_opt_chain(metainterp_sd, enable_opts)
+ if unfold:
+ return optimize_unfold(metainterp_sd,
+ jitdriver_sd,
+ loop,
+ optimizations,
+ start_state,
+ export_state)
+ elif unroll:
return optimize_unroll(metainterp_sd, jitdriver_sd, loop,
optimizations,
inline_short_preamble, start_state,
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -29,6 +29,9 @@
""" This is basically building the definition-use chain and saving this
information in a graph structure. This is the same as calculating
the reaching definitions and the 'looking back' whenever it is used.
+
+ Write After Read, Write After Write dependencies are not possible,
+ the operations are in SSA form
"""
defining_indices = {}
@@ -38,9 +41,12 @@
for arg in op.getarglist():
defining_indices[arg] = 0
+ # TODO what about a JUMP operation? it often has many parameters (10+) and uses
+ # nearly every definition in the trace (for loops). Maybe we can skip this operation
+
if op.result is not None:
- # overwrites redefinition. This is not a problem
- # if the trace is in SSA form.
+ # the trace is always in SSA form, thus it is neither possible to have a WAR
+ # not a WAW dependency
defining_indices[op.result] = i
for arg in op.getarglist():
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -2,11 +2,13 @@
from rpython.rlib.objectmodel import instantiate
from rpython.jit.metainterp.optimizeopt.test.test_util import (
LLtypeMixin, BaseTest, FakeMetaInterpStaticData, convert_old_style_to_targets)
-from rpython.jit.metainterp.history import TargetToken, JitCellToken
+from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
from rpython.jit.metainterp.optimizeopt import optimize_trace
import rpython.jit.metainterp.optimizeopt.optimizer as optimizeopt
import rpython.jit.metainterp.optimizeopt.virtualize as virtualize
from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph
+from rpython.jit.metainterp.optimizeopt.unroll import Inliner
+from rpython.jit.metainterp.optimizeopt.unfold import OptUnfold
from rpython.jit.metainterp.optimize import InvalidLoop
from rpython.jit.metainterp.history import ConstInt, BoxInt, get_const_ptr_for_string
from rpython.jit.metainterp import executor, compile, resume
@@ -15,20 +17,25 @@
class DepTestHelper(BaseTest):
- enable_opts = "vectorize"
+ enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unfold"
def build_dependency(self, ops):
+ loop = self.parse_loop(ops)
+ return DependencyGraph(None, loop)
+
+ def parse_loop(self, ops):
loop = self.parse(ops, postprocess=self.postprocess)
token = JitCellToken()
loop.operations = [ResOperation(rop.LABEL, loop.inputargs, None,
descr=TargetToken(token))] + loop.operations
if loop.operations[-1].getopnum() == rop.JUMP:
loop.operations[-1].setdescr(token)
- #self._do_optimize_loop(loop, call_pure_results, export_state=False)
- #print '\n'.join([str(o) for o in loop.operations])
- #self.assert_equal(loop, expected)
+ return loop
- return DependencyGraph(None, loop)
+ def assert_unfold_loop(self, loop, unroll_factor, unfolded_loop, call_pure_results=None):
+ OptUnfold.force_unroll_factor = unroll_factor
+ optloop = self._do_optimize_loop(loop, call_pure_results, export_state=True)
+ self.assert_equal(optloop, unfolded_loop)
def assert_def_use(self, graph, from_instr_index, to_instr_index):
assert graph.instr_dependency(from_instr_index,
@@ -62,5 +69,37 @@
self.assert_def_use(dep_graph, 1, 2)
self.assert_def_use(dep_graph, 1, 3)
+ def test_unroll(self):
+ ops = """
+ [p0,p1,p2,i0]
+ i1 = raw_load(p1, i0, descr=floatarraydescr)
+ i2 = raw_load(p2, i0, descr=floatarraydescr)
+ i3 = int_add(i1,i2)
+ raw_store(p0, i0, i3, descr=floatarraydescr)
+ i4 = int_add(i0, 1)
+ i5 = int_le(i4, 10)
+ guard_true(i5) [p0,p1,p2,i4]
+ jump(p0,p1,p2,i4)
+ """
+ unfolded_ops = """
+ [p0,p1,p2,i0]
+ i1 = raw_load(p1, i0, descr=floatarraydescr)
+ i2 = raw_load(p2, i0, descr=floatarraydescr)
+ i3 = int_add(i1,i2)
+ raw_store(p0, i0, i3, descr=floatarraydescr)
+ i4 = int_add(i0, 1)
+ i5 = int_le(i4, 10)
+ guard_true(i5) [p0,p1,p2,i4]
+ i10 = raw_load(p1, i4, descr=floatarraydescr)
+ i11 = raw_load(p2, i4, descr=floatarraydescr)
+ i12 = int_add(i10,i11)
+ raw_store(p0, i4, i12, descr=floatarraydescr)
+ i20 = int_add(i4, 1)
+ i21 = int_le(i20, 10)
+ guard_true(i21) [p0,p1,p2,i20]
+ jump(p0,p1,p2,i21)
+ """
+ self.assert_unfold_loop(self.parse_loop(ops),4, self.parse_loop(unfolded_ops))
+
class TestLLtype(BaseTestDependencyGraph, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/unfold.py b/rpython/jit/metainterp/optimizeopt/unfold.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/metainterp/optimizeopt/unfold.py
@@ -0,0 +1,664 @@
+import sys
+
+from rpython.jit.metainterp.history import TargetToken, JitCellToken, Const
+from rpython.jit.metainterp.inliner import Inliner
+from rpython.jit.metainterp.optimize import InvalidLoop
+from rpython.jit.metainterp.optimizeopt.generalize import KillHugeIntBounds
+from rpython.jit.metainterp.optimizeopt.optimizer import Optimizer, Optimization
+from rpython.jit.metainterp.optimizeopt.virtualstate import (VirtualStateConstructor,
+ ShortBoxes, BadVirtualState, VirtualStatesCantMatch)
+from rpython.jit.metainterp.resoperation import rop, ResOperation, GuardResOp
+from rpython.jit.metainterp.resume import Snapshot
+from rpython.jit.metainterp import compile
+from rpython.rlib.debug import debug_print, debug_start, debug_stop
+
+
+def optimize_unfold(metainterp_sd, jitdriver_sd, loop, optimizations, start_state=None,
+ export_state=True):
+ opt = OptUnfold(metainterp_sd, jitdriver_sd, loop, optimizations)
+ return opt.propagate_all_forward(start_state, export_state)
+
+
+class UnfoldOptimizer(Optimizer):
+ def setup(self):
+ self.importable_values = {}
+ self.emitting_dissabled = False
+ self.emitted_guards = 0
+
+ def ensure_imported(self, value):
+ if not self.emitting_dissabled and value in self.importable_values:
+ imp = self.importable_values[value]
+ del self.importable_values[value]
+ imp.import_value(value)
+
+ def emit_operation(self, op):
+ if op.returns_bool_result():
+ self.bool_boxes[self.getvalue(op.result)] = None
+ if self.emitting_dissabled:
+ return
+ if op.is_guard():
+ self.emitted_guards += 1 # FIXME: can we use counter in self._emit_operation?
+ self._emit_operation(op)
+
+
+class OptUnfold(Optimization):
+ """ In contrast to the loop unroll optimization this optimization
+ unrolls the loop many times instead of just peeling off one trace.
+ """
+
+ inline_short_preamble = True
+
+ # for testing purpose only
+ # TODO: hide it from rpython
+ _force_unroll_factor = -1
+
+ def __init__(self, metainterp_sd, jitdriver_sd, loop, optimizations):
+ self.optimizer = UnfoldOptimizer(metainterp_sd, jitdriver_sd,
+ loop, optimizations)
+ self.boxes_created_this_iteration = None
+
+ def get_virtual_state(self, args):
+ modifier = VirtualStateConstructor(self.optimizer)
+ return modifier.get_virtual_state(args)
+
+ def fix_snapshot(self, jump_args, snapshot):
+ if snapshot is None:
+ return None
+ snapshot_args = snapshot.boxes
+ new_snapshot_args = []
+ for a in snapshot_args:
+ a = self.getvalue(a).get_key_box()
+ new_snapshot_args.append(a)
+ prev = self.fix_snapshot(jump_args, snapshot.prev)
+ return Snapshot(prev, new_snapshot_args)
+
+ def _rename_arguments_ssa(rename_map, label_args, jump_args):
+
+ for la,ja in zip(label_args, jump_args):
+ if la != ja:
+ rename_map[la] = ja
+
+ return new_jump_args
+
+ def propagate_all_forward(self, starting_state, export_state=True):
+
+ unroll_factor = 2
+
+ self.optimizer.exporting_state = export_state
+ loop = self.optimizer.loop
+ self.optimizer.clear_newoperations()
+
+
+ label_op = loop.operations[0]
+ jump_op = loop.operations[-1]
+ operations = loop.operations[1:-1]
+ loop.operations = []
+
+ iterations = [[op.clone() for op in operations]]
+ label_op_args = label_op.getarglist()
+
+ jump_op_args = jump_op.getarglist()
+
+ rename_map = {}
+ for unroll_i in range(2, unroll_factor+1):
+ _rename_arguments_ssa(rename_map, label_op_args, jump_op_args)
+ iteration_ops = []
+ for op in operations:
+ cop = op.clone()
+ iteration_ops.append(cop)
+ iterations.append(iteration_ops)
+
+ loop.operations.append(label_op)
+ for iteration in iterations:
+ for op in iteration:
+ loop.operations.append(op)
+ loop.operations.append(jump_op)
+
+ #start_label = loop.operations[0]
+ #if start_label.getopnum() == rop.LABEL:
+ # loop.operations = loop.operations[1:]
+ # # We need to emit the label op before import_state() as emitting it
+ # # will clear heap caches
+ # self.optimizer.send_extra_operation(start_label)
+ #else:
+ # start_label = None
+
+ #patchguardop = None
+ #if len(loop.operations) > 1:
+ # patchguardop = loop.operations[-2]
+ # if patchguardop.getopnum() != rop.GUARD_FUTURE_CONDITION:
+ # patchguardop = None
+
+ #jumpop = loop.operations[-1]
+ #if jumpop.getopnum() == rop.JUMP or jumpop.getopnum() == rop.LABEL:
+ # loop.operations = loop.operations[:-1]
+ #else:
+ # jumpop = None
+
+ #self.import_state(start_label, starting_state)
+ #self.optimizer.propagate_all_forward(clear=False)
+
+ #if not jumpop:
+ # return
+
+ #cell_token = jumpop.getdescr()
+ #assert isinstance(cell_token, JitCellToken)
+ #stop_label = ResOperation(rop.LABEL, jumpop.getarglist(), None, TargetToken(cell_token))
+
+ #if jumpop.getopnum() == rop.JUMP:
+ # if self.jump_to_already_compiled_trace(jumpop, patchguardop):
+ # # Found a compiled trace to jump to
+ # if self.short:
+ # # Construct our short preamble
+ # assert start_label
+ # self.close_bridge(start_label)
+ # return
+
+ # if start_label and self.jump_to_start_label(start_label, stop_label):
+ # # Initial label matches, jump to it
+ # jumpop = ResOperation(rop.JUMP, stop_label.getarglist(), None,
+ # descr=start_label.getdescr())
+ # if self.short:
+ # # Construct our short preamble
+ # self.close_loop(start_label, jumpop, patchguardop)
+ # else:
+ # self.optimizer.send_extra_operation(jumpop)
+ # return
+
+ # if cell_token.target_tokens:
+ # limit = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.retrace_limit
+ # if cell_token.retraced_count < limit:
+ # cell_token.retraced_count += 1
+ # debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit))
+ # else:
+ # debug_print("Retrace count reached, jumping to preamble")
+ # assert cell_token.target_tokens[0].virtual_state is None
+ # jumpop = jumpop.clone()
+ # jumpop.setdescr(cell_token.target_tokens[0])
+ # self.optimizer.send_extra_operation(jumpop)
+ # return
+
+ ## Found nothing to jump to, emit a label instead
+
+ #if self.short:
+ # # Construct our short preamble
+ # assert start_label
+ # self.close_bridge(start_label)
+
+ #self.optimizer.flush()
+ #if export_state:
+ # KillHugeIntBounds(self.optimizer).apply()
+
+ #loop.operations = self.optimizer.get_newoperations()
+ #if export_state:
+ # final_state = self.export_state(stop_label)
+ #else:
+ # final_state = None
+ #loop.operations.append(stop_label)
+ #return final_state
+ return loop
+
+ def jump_to_start_label(self, start_label, stop_label):
+ if not start_label or not stop_label:
+ return False
+
+ stop_target = stop_label.getdescr()
+ start_target = start_label.getdescr()
+ assert isinstance(stop_target, TargetToken)
+ assert isinstance(start_target, TargetToken)
+ return stop_target.targeting_jitcell_token is start_target.targeting_jitcell_token
+
+
+ def export_state(self, targetop):
+ original_jump_args = targetop.getarglist()
+ jump_args = [self.getvalue(a).get_key_box() for a in original_jump_args]
+
+ virtual_state = self.get_virtual_state(jump_args)
+
+ values = [self.getvalue(arg) for arg in jump_args]
+ inputargs = virtual_state.make_inputargs(values, self.optimizer)
+ short_inputargs = virtual_state.make_inputargs(values, self.optimizer, keyboxes=True)
+
+ if self.boxes_created_this_iteration is not None:
+ for box in self.inputargs:
+ self.boxes_created_this_iteration[box] = None
+
+ short_boxes = ShortBoxes(self.optimizer, inputargs)
+
+ self.optimizer.clear_newoperations()
+ for i in range(len(original_jump_args)):
+ srcbox = jump_args[i]
+ if values[i].is_virtual():
+ srcbox = values[i].force_box(self.optimizer)
+ if original_jump_args[i] is not srcbox:
+ op = ResOperation(rop.SAME_AS, [srcbox], original_jump_args[i])
+ self.optimizer.emit_operation(op)
+ inputarg_setup_ops = self.optimizer.get_newoperations()
+
+ target_token = targetop.getdescr()
+ assert isinstance(target_token, TargetToken)
+ targetop.initarglist(inputargs)
+ target_token.virtual_state = virtual_state
+ target_token.short_preamble = [ResOperation(rop.LABEL, short_inputargs, None)]
+
+ exported_values = {}
+ for box in inputargs:
+ exported_values[box] = self.optimizer.getvalue(box)
+ for op in short_boxes.operations():
+ if op and op.result:
+ box = op.result
+ exported_values[box] = self.optimizer.getvalue(box)
+
+ return ExportedState(short_boxes, inputarg_setup_ops, exported_values)
+
+ def import_state(self, targetop, exported_state):
+ if not targetop: # Trace did not start with a label
+ self.inputargs = self.optimizer.loop.inputargs
+ self.short = None
+ self.initial_virtual_state = None
+ return
+
+ self.inputargs = targetop.getarglist()
+ target_token = targetop.getdescr()
+ assert isinstance(target_token, TargetToken)
+ if not exported_state:
+ # No state exported, construct one without virtuals
+ self.short = None
+ virtual_state = self.get_virtual_state(self.inputargs)
+ self.initial_virtual_state = virtual_state
+ return
+
+ self.short = target_token.short_preamble[:]
+ self.short_seen = {}
+ self.short_boxes = exported_state.short_boxes
+ self.initial_virtual_state = target_token.virtual_state
+
+ for box in self.inputargs:
+ preamble_value = exported_state.exported_values[box]
+ value = self.optimizer.getvalue(box)
+ value.import_from(preamble_value, self.optimizer)
+
+ # Setup the state of the new optimizer by emiting the
+ # short operations and discarding the result
+ self.optimizer.emitting_dissabled = True
+ for op in exported_state.inputarg_setup_ops:
+ self.optimizer.send_extra_operation(op)
+
+ seen = {}
+ for op in self.short_boxes.operations():
+ self.ensure_short_op_emitted(op, self.optimizer, seen)
+ if op and op.result:
+ preamble_value = exported_state.exported_values[op.result]
+ value = self.optimizer.getvalue(op.result)
+ if not value.is_virtual() and not value.is_constant():
+ imp = ValueImporter(self, preamble_value, op)
+ self.optimizer.importable_values[value] = imp
+ newvalue = self.optimizer.getvalue(op.result)
+ newresult = newvalue.get_key_box()
+ # note that emitting here SAME_AS should not happen, but
+ # in case it does, we would prefer to be suboptimal in asm
+ # to a fatal RPython exception.
+ if newresult is not op.result and \
+ not self.short_boxes.has_producer(newresult) and \
+ not newvalue.is_constant():
+ op = ResOperation(rop.SAME_AS, [op.result], newresult)
+ self.optimizer._newoperations.append(op)
+ #if self.optimizer.loop.logops:
+ # debug_print(' Falling back to add extra: ' +
+ # self.optimizer.loop.logops.repr_of_resop(op))
+
+ self.optimizer.flush()
+ self.optimizer.emitting_dissabled = False
+
+ def close_bridge(self, start_label):
+ inputargs = self.inputargs
+ short_jumpargs = inputargs[:]
+
+ # We dont need to inline the short preamble we are creating as we are conneting
+ # the bridge to a different trace with a different short preamble
+ self.short_inliner = None
+
+ newoperations = self.optimizer.get_newoperations()
+ self.boxes_created_this_iteration = {}
+ i = 0
+ while i < len(newoperations):
+ self._import_op(newoperations[i], inputargs, short_jumpargs, [])
+ i += 1
+ newoperations = self.optimizer.get_newoperations()
+ self.short.append(ResOperation(rop.JUMP, short_jumpargs, None, descr=start_label.getdescr()))
+ self.finalize_short_preamble(start_label)
+
+ def close_loop(self, start_label, jumpop, patchguardop):
+ virtual_state = self.initial_virtual_state
+ short_inputargs = self.short[0].getarglist()
+ inputargs = self.inputargs
+ short_jumpargs = inputargs[:]
+
+ # Construct jumpargs from the virtual state
+ original_jumpargs = jumpop.getarglist()[:]
+ values = [self.getvalue(arg) for arg in jumpop.getarglist()]
+ try:
+ jumpargs = virtual_state.make_inputargs(values, self.optimizer)
+ except BadVirtualState:
+ raise InvalidLoop('The state of the optimizer at the end of ' +
+ 'peeled loop is inconsistent with the ' +
+ 'VirtualState at the beginning of the peeled ' +
+ 'loop')
+ jumpop.initarglist(jumpargs)
+
+ # Inline the short preamble at the end of the loop
+ jmp_to_short_args = virtual_state.make_inputargs(values,
+ self.optimizer,
+ keyboxes=True)
+ assert len(short_inputargs) == len(jmp_to_short_args)
+ args = {}
+ for i in range(len(short_inputargs)):
+ if short_inputargs[i] in args:
+ if args[short_inputargs[i]] != jmp_to_short_args[i]:
+ raise InvalidLoop('The short preamble wants the ' +
+ 'same box passed to multiple of its ' +
+ 'inputargs, but the jump at the ' +
+ 'end of this bridge does not do that.')
+
+ args[short_inputargs[i]] = jmp_to_short_args[i]
+ self.short_inliner = Inliner(short_inputargs, jmp_to_short_args)
+ self._inline_short_preamble(self.short, self.short_inliner,
+ patchguardop, self.short_boxes.assumed_classes)
+
+ # Import boxes produced in the preamble but used in the loop
+ newoperations = self.optimizer.get_newoperations()
+ self.boxes_created_this_iteration = {}
+ i = j = 0
+ while i < len(newoperations) or j < len(jumpargs):
+ if i == len(newoperations):
+ while j < len(jumpargs):
+ a = jumpargs[j]
+ #if self.optimizer.loop.logops:
+ # debug_print('J: ' + self.optimizer.loop.logops.repr_of_arg(a))
+ self.import_box(a, inputargs, short_jumpargs, jumpargs)
+ j += 1
+ else:
+ self._import_op(newoperations[i], inputargs, short_jumpargs, jumpargs)
+ i += 1
+ newoperations = self.optimizer.get_newoperations()
+
+ jumpop.initarglist(jumpargs)
+ self.optimizer.send_extra_operation(jumpop)
+ self.short.append(ResOperation(rop.JUMP, short_jumpargs, None, descr=jumpop.getdescr()))
+
+ # Verify that the virtual state at the end of the loop is one
+ # that is compatible with the virtual state at the start of the loop
+ final_virtual_state = self.get_virtual_state(original_jumpargs)
+ #debug_start('jit-log-virtualstate')
+ #virtual_state.debug_print('Closed loop with ')
+ bad = {}
+ if not virtual_state.generalization_of(final_virtual_state, bad,
+ cpu=self.optimizer.cpu):
+ # We ended up with a virtual state that is not compatible
+ # and we are thus unable to jump to the start of the loop
+ #final_virtual_state.debug_print("Bad virtual state at end of loop, ",
+ # bad)
+ #debug_stop('jit-log-virtualstate')
+ raise InvalidLoop('The virtual state at the end of the peeled ' +
+ 'loop is not compatible with the virtual ' +
+ 'state at the start of the loop which makes ' +
+ 'it impossible to close the loop')
+
+ #debug_stop('jit-log-virtualstate')
+
+ maxguards = self.optimizer.metainterp_sd.warmrunnerdesc.memory_manager.max_retrace_guards
+ if self.optimizer.emitted_guards > maxguards:
+ target_token = jumpop.getdescr()
+ assert isinstance(target_token, TargetToken)
+ target_token.targeting_jitcell_token.retraced_count = sys.maxint
+
+ self.finalize_short_preamble(start_label)
+
+ def finalize_short_preamble(self, start_label):
+ short = self.short
+ assert short[-1].getopnum() == rop.JUMP
+ target_token = start_label.getdescr()
+ assert isinstance(target_token, TargetToken)
+
+ # Turn guards into conditional jumps to the preamble
+ for i in range(len(short)):
+ op = short[i]
+ if op.is_guard():
+ op = op.clone()
+ op.setfailargs(None)
+ op.setdescr(None) # will be set to a proper descr when the preamble is used
+ short[i] = op
+
+ # Clone ops and boxes to get private versions and
+ short_inputargs = short[0].getarglist()
+ boxmap = {}
+ newargs = [None] * len(short_inputargs)
+ for i in range(len(short_inputargs)):
+ a = short_inputargs[i]
+ if a in boxmap:
+ newargs[i] = boxmap[a]
+ else:
+ newargs[i] = a.clonebox()
+ boxmap[a] = newargs[i]
+ inliner = Inliner(short_inputargs, newargs)
+ target_token.assumed_classes = {}
+ for i in range(len(short)):
+ op = short[i]
+ newop = inliner.inline_op(op)
+ if op.result and op.result in self.short_boxes.assumed_classes:
+ target_token.assumed_classes[newop.result] = self.short_boxes.assumed_classes[op.result]
+ short[i] = newop
+
+ # Forget the values to allow them to be freed
+ for box in short[0].getarglist():
+ box.forget_value()
+ for op in short:
+ if op.result:
+ op.result.forget_value()
+ target_token.short_preamble = self.short
+
+ def ensure_short_op_emitted(self, op, optimizer, seen):
+ if op is None:
+ return
+ if op.result is not None and op.result in seen:
+ return
+ for a in op.getarglist():
+ if not isinstance(a, Const) and a not in seen:
+ self.ensure_short_op_emitted(self.short_boxes.producer(a), optimizer,
+ seen)
+
+ #if self.optimizer.loop.logops:
+ # debug_print(' Emitting short op: ' +
+ # self.optimizer.loop.logops.repr_of_resop(op))
+
+ optimizer.send_extra_operation(op)
+ seen[op.result] = None
+ if op.is_ovf():
+ guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None)
+ optimizer.send_extra_operation(guard)
+
+ def add_op_to_short(self, op, emit=True, guards_needed=False):
+ if op is None:
+ return None
+ if op.result is not None and op.result in self.short_seen:
+ if emit and self.short_inliner:
+ return self.short_inliner.inline_arg(op.result)
+ else:
+ return None
+
+ for a in op.getarglist():
+ if not isinstance(a, Const) and a not in self.short_seen:
+ self.add_op_to_short(self.short_boxes.producer(a), emit, guards_needed)
+ if op.is_guard():
+ op.setdescr(None) # will be set to a proper descr when the preamble is used
+
+ if guards_needed and self.short_boxes.has_producer(op.result):
+ value_guards = self.getvalue(op.result).make_guards(op.result)
+ else:
+ value_guards = []
+
+ self.short.append(op)
+ self.short_seen[op.result] = None
+ if emit and self.short_inliner:
+ newop = self.short_inliner.inline_op(op)
+ self.optimizer.send_extra_operation(newop)
+ else:
+ newop = None
+
+ if op.is_ovf():
+ # FIXME: ensure that GUARD_OVERFLOW:ed ops not end up here
+ guard = ResOperation(rop.GUARD_NO_OVERFLOW, [], None)
+ self.add_op_to_short(guard, emit, guards_needed)
+ for guard in value_guards:
+ self.add_op_to_short(guard, emit, guards_needed)
+
+ if newop:
+ return newop.result
+ return None
+
+ def import_box(self, box, inputargs, short_jumpargs, jumpargs):
+ if isinstance(box, Const) or box in inputargs:
+ return
+ if box in self.boxes_created_this_iteration:
+ return
+
+ short_op = self.short_boxes.producer(box)
+ newresult = self.add_op_to_short(short_op)
+
+ short_jumpargs.append(short_op.result)
+ inputargs.append(box)
+ box = newresult
+ if box in self.optimizer.values:
+ box = self.optimizer.values[box].force_box(self.optimizer)
+ jumpargs.append(box)
+
+
+ def _import_op(self, op, inputargs, short_jumpargs, jumpargs):
+ self.boxes_created_this_iteration[op.result] = None
+ args = op.getarglist()
+ if op.is_guard():
+ args = args + op.getfailargs()
+
+ for a in args:
+ self.import_box(a, inputargs, short_jumpargs, jumpargs)
+
+ def jump_to_already_compiled_trace(self, jumpop, patchguardop):
+ jumpop = jumpop.clone()
+ assert jumpop.getopnum() == rop.JUMP
+ cell_token = jumpop.getdescr()
+
+ assert isinstance(cell_token, JitCellToken)
+ if not cell_token.target_tokens:
+ return False
+
+ if not self.inline_short_preamble:
+ assert cell_token.target_tokens[0].virtual_state is None
+ jumpop.setdescr(cell_token.target_tokens[0])
+ self.optimizer.send_extra_operation(jumpop)
+ return True
+
+ args = jumpop.getarglist()
+ virtual_state = self.get_virtual_state(args)
+ values = [self.getvalue(arg)
+ for arg in jumpop.getarglist()]
+ debug_start('jit-log-virtualstate')
+ virtual_state.debug_print("Looking for ", metainterp_sd=self.optimizer.metainterp_sd)
+
+ for target in cell_token.target_tokens:
+ if not target.virtual_state:
+ continue
+ extra_guards = []
+
+ try:
+ cpu = self.optimizer.cpu
+ state = target.virtual_state.generate_guards(virtual_state,
+ values,
+ cpu)
+
+ extra_guards = state.extra_guards
+ if extra_guards:
+ debugmsg = 'Guarded to match '
+ else:
+ debugmsg = 'Matched '
+ except VirtualStatesCantMatch, e:
+ debugmsg = 'Did not match:\n%s\n' % (e.msg, )
+ target.virtual_state.debug_print(debugmsg, e.state.bad, metainterp_sd=self.optimizer.metainterp_sd)
+ continue
+
+ assert patchguardop is not None or (extra_guards == [] and len(target.short_preamble) == 1)
+
+ target.virtual_state.debug_print(debugmsg, {})
+
+ debug_stop('jit-log-virtualstate')
+
+ args = target.virtual_state.make_inputargs(values, self.optimizer,
+ keyboxes=True)
+ short_inputargs = target.short_preamble[0].getarglist()
+ inliner = Inliner(short_inputargs, args)
+
+ for guard in extra_guards:
+ if guard.is_guard():
+ assert isinstance(patchguardop, GuardResOp)
+ assert isinstance(guard, GuardResOp)
+ guard.rd_snapshot = patchguardop.rd_snapshot
+ guard.rd_frame_info_list = patchguardop.rd_frame_info_list
+ guard.setdescr(compile.ResumeAtPositionDescr())
+ self.optimizer.send_extra_operation(guard)
+
+ try:
+ # NB: the short_preamble ends with a jump
+ self._inline_short_preamble(target.short_preamble, inliner,
+ patchguardop,
+ target.assumed_classes)
+ except InvalidLoop:
+ #debug_print("Inlining failed unexpectedly",
+ # "jumping to preamble instead")
+ assert cell_token.target_tokens[0].virtual_state is None
+ jumpop.setdescr(cell_token.target_tokens[0])
+ self.optimizer.send_extra_operation(jumpop)
+ return True
+ debug_stop('jit-log-virtualstate')
+ return False
+
+ def _inline_short_preamble(self, short_preamble, inliner, patchguardop,
+ assumed_classes):
+ i = 1
+ # XXX this is intentiontal :-(. short_preamble can change during the
+ # loop in some cases
+ while i < len(short_preamble):
+ shop = short_preamble[i]
+ newop = inliner.inline_op(shop)
+ if newop.is_guard():
+ if not patchguardop:
+ raise InvalidLoop("would like to have short preamble, but it has a guard and there's no guard_future_condition")
+ assert isinstance(newop, GuardResOp)
+ assert isinstance(patchguardop, GuardResOp)
+ newop.rd_snapshot = patchguardop.rd_snapshot
+ newop.rd_frame_info_list = patchguardop.rd_frame_info_list
+ newop.setdescr(compile.ResumeAtPositionDescr())
+ self.optimizer.send_extra_operation(newop)
+ if shop.result in assumed_classes:
+ classbox = self.getvalue(newop.result).get_constant_class(self.optimizer.cpu)
+ if not classbox or not classbox.same_constant(assumed_classes[shop.result]):
+ raise InvalidLoop('The class of an opaque pointer before the jump ' +
+ 'does not mach the class ' +
+ 'it has at the start of the target loop')
+ i += 1
+
+
+class ValueImporter(object):
+ def __init__(self, unroll, value, op):
+ self.unroll = unroll
+ self.preamble_value = value
+ self.op = op
+
+ def import_value(self, value):
+ value.import_from(self.preamble_value, self.unroll.optimizer)
+ self.unroll.add_op_to_short(self.op, False, True)
+
+
+class ExportedState(object):
+ def __init__(self, short_boxes, inputarg_setup_ops, exported_values):
+ self.short_boxes = short_boxes
+ self.inputarg_setup_ops = inputarg_setup_ops
+ self.exported_values = exported_values
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -448,7 +448,7 @@
"""Inconsistency in the JIT hints."""
ENABLE_ALL_OPTS = (
- 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll')
+ 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll:unfold')
PARAMETER_DOCS = {
'threshold': 'number of times a loop has to run for it to become hot',
More information about the pypy-commit
mailing list