[pypy-commit] pypy bridgeopt-improvements: experimental attempt to reduce the cost of call_loopinvariant in every bridge
cfbolz
pypy.commits at gmail.com
Sun Aug 6 18:11:47 EDT 2017
Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: bridgeopt-improvements
Changeset: r92102:3ab4221d7876
Date: 2017-08-07 00:09 +0200
http://bitbucket.org/pypy/pypy/changeset/3ab4221d7876/
Log: experimental attempt to reduce the cost of call_loopinvariant in
every bridge that calls a method.
approach: pass call_loopinvariant results into failargs (a bit
everywhere) and then reuse the result in the bridge.
diff --git a/rpython/jit/metainterp/optimizeopt/bridgeopt.py b/rpython/jit/metainterp/optimizeopt/bridgeopt.py
--- a/rpython/jit/metainterp/optimizeopt/bridgeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/bridgeopt.py
@@ -2,6 +2,7 @@
optimizer of the bridge attached to a guard. """
from rpython.jit.metainterp import resumecode
+from rpython.rlib.objectmodel import we_are_translated
# adds the following sections at the end of the resume code:
@@ -22,17 +23,22 @@
# (<box1> <index> <descr> <box2>) length times, if getarrayitem_gc(box1, index, descr) == box2
# both boxes should be in the liveboxes
#
+# <length>
+# (<const> <descr> <box1>) length times, if call_loop_invariant(const, descr) == box1
+# the box should be in the liveboxes
# ----
# maybe should be delegated to the optimization classes?
-def tag_box(box, liveboxes_from_env, memo):
+def tag_box(box, adder):
from rpython.jit.metainterp.history import Const
if isinstance(box, Const):
- return memo.getconst(box)
+ return adder.memo.getconst(box)
else:
- return liveboxes_from_env[box] # has to exist
+ if box in adder.liveboxes_from_env:
+ return adder.liveboxes_from_env[box]
+ return adder.liveboxes[box] # has to exist
def decode_box(resumestorage, tagged, liveboxes, cpu):
from rpython.jit.metainterp.resume import untag, TAGCONST, TAGINT, TAGBOX
@@ -54,10 +60,13 @@
raise AssertionError("unreachable")
return box
-def serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, liveboxes_from_env, memo):
+def serialize_optimizer_knowledge(adder, numb_state, liveboxes):
+ optimizer = adder.optimizer
+ liveboxes_from_env = adder.liveboxes_from_env
available_boxes = {}
for box in liveboxes:
- if box is not None and box in liveboxes_from_env:
+ if box is not None and (
+ box in adder.liveboxes_from_env or box in adder.liveboxes):
available_boxes[box] = None
metainterp_sd = optimizer.metainterp_sd
@@ -84,7 +93,6 @@
# heap knowledge: we store triples of known heap fields in non-virtual
# structs
- # XXX could be extended to arrays
if optimizer.optheap:
triples_struct, triples_array = optimizer.optheap.serialize_optheap(available_boxes)
# can only encode descrs that have a known index into
@@ -93,20 +101,32 @@
numb_state.append_int(len(triples_struct))
for box1, descr, box2 in triples_struct:
descr_index = descr.descr_index
- numb_state.append_short(tag_box(box1, liveboxes_from_env, memo))
+ numb_state.append_short(tag_box(box1, adder))
numb_state.append_int(descr_index)
- numb_state.append_short(tag_box(box2, liveboxes_from_env, memo))
+ numb_state.append_short(tag_box(box2, adder))
numb_state.append_int(len(triples_array))
for box1, index, descr, box2 in triples_array:
descr_index = descr.descr_index
- numb_state.append_short(tag_box(box1, liveboxes_from_env, memo))
+ numb_state.append_short(tag_box(box1, adder))
numb_state.append_int(index)
numb_state.append_int(descr_index)
- numb_state.append_short(tag_box(box2, liveboxes_from_env, memo))
+ numb_state.append_short(tag_box(box2, adder))
else:
numb_state.append_int(0)
numb_state.append_int(0)
+ # loop invariant calls
+ if optimizer.optrewrite:
+ triples = optimizer.optrewrite.serialize_optrewrite(available_boxes)
+ numb_state.append_int(len(triples))
+ for const, descr, box in triples:
+ descr_index = descr.descr_index
+ numb_state.append_short(tag_box(const, adder))
+ numb_state.append_int(descr_index)
+ numb_state.append_short(tag_box(box, adder))
+ else:
+ numb_state.append_int(0)
+
def deserialize_optimizer_knowledge(optimizer, resumestorage, frontend_boxes, liveboxes):
reader = resumecode.Reader(resumestorage.rd_numb)
assert len(frontend_boxes) == len(liveboxes)
@@ -115,6 +135,8 @@
# skip resume section
startcount = reader.next_item()
reader.jump(startcount - 1)
+ extracount = reader.next_item()
+ reader.jump(extracount)
# class knowledge
bitfield = 0
@@ -132,8 +154,6 @@
optimizer.make_constant_class(box, cls)
# heap knowledge
- if not optimizer.optheap:
- return
length = reader.next_item()
result_struct = []
for i in range(length):
@@ -155,4 +175,59 @@
tagged = reader.next_item()
box2 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu)
result_array.append((box1, index, descr, box2))
- optimizer.optheap.deserialize_optheap(result_struct, result_array)
+ if result_struct or result_array:
+ optimizer.optheap.deserialize_optheap(result_struct, result_array)
+
+ # loop_invariant knowledge
+ length = reader.next_item()
+ results = []
+ for i in range(length):
+ tagged = reader.next_item()
+ box1 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu)
+ descr_index = reader.next_item()
+ descr = metainterp_sd.all_descrs[descr_index]
+ tagged = reader.next_item()
+ box2 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu)
+ results.append((box1, descr, box2))
+ if results:
+ optimizer.optrewrite.deserialize_optrewrite(results)
+
+def consistency_checking_numbering(numb, liveboxes):
+ if we_are_translated():
+ return
+ # very much a "does not crash" kind of affair
+ reader = resumecode.Reader(numb)
+
+ # skip resume section
+ startcount = reader.next_item()
+ reader.jump(startcount - 1)
+ extracount = reader.next_item()
+ reader.jump(extracount)
+
+ mask = 0
+ for i, box in enumerate(liveboxes):
+ if box.type != "r":
+ continue
+ if not mask:
+ bitfield = reader.next_item()
+ mask = 0b100000
+ mask >>= 1
+
+ length = reader.next_item()
+ for i in range(length):
+ tagged = reader.next_item()
+ descr_index = reader.next_item()
+ tagged = reader.next_item()
+ length = reader.next_item()
+ for i in range(length):
+ tagged = reader.next_item()
+ index = reader.next_item()
+ descr_index = reader.next_item()
+ tagged = reader.next_item()
+
+ # loop_invariant knowledge
+ length = reader.next_item()
+ for i in range(length):
+ tagged = reader.next_item()
+ descr_index = reader.next_item()
+ tagged = reader.next_item()
diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py
--- a/rpython/jit/metainterp/optimizeopt/optimizer.py
+++ b/rpython/jit/metainterp/optimizeopt/optimizer.py
@@ -620,7 +620,12 @@
del self.replaces_guard[orig_op]
return
else:
- op = self.emit_guard_operation(op, pendingfields)
+ extra_liveboxes = []
+ # hack, but probably a good one
+ if len(self.optrewrite.loop_invariant_results) == 1:
+ extra_liveboxes = [
+ self.optrewrite.loop_invariant_results.values()[0][0].get_box_replacement()]
+ op = self.emit_guard_operation(op, pendingfields, extra_liveboxes)
elif op.can_raise():
self.exception_might_have_happened = True
opnum = op.opnum
@@ -633,7 +638,7 @@
self._really_emitted_operation = op
self._newoperations.append(op)
- def emit_guard_operation(self, op, pendingfields):
+ def emit_guard_operation(self, op, pendingfields, extra_liveboxes):
guard_op = op # self.replace_op_with(op, op.getopnum())
opnum = guard_op.getopnum()
# If guard_(no)_exception is merged with another previous guard, then
@@ -653,7 +658,8 @@
op = self._copy_resume_data_from(guard_op,
self._last_guard_op)
else:
- op = self.store_final_boxes_in_guard(guard_op, pendingfields)
+ op = self.store_final_boxes_in_guard(
+ guard_op, pendingfields, extra_liveboxes)
self._last_guard_op = op
# for unrolling
for farg in op.getfailargs():
@@ -723,7 +729,7 @@
new_descr.copy_all_attributes_from(old_descr)
self._newoperations[old_op_pos] = new_op
- def store_final_boxes_in_guard(self, op, pendingfields):
+ def store_final_boxes_in_guard(self, op, pendingfields, extra_liveboxes):
assert pendingfields is not None
if op.getdescr() is not None:
descr = op.getdescr()
@@ -736,7 +742,7 @@
modifier = resume.ResumeDataVirtualAdder(self, descr, op, self.trace,
self.resumedata_memo)
try:
- newboxes = modifier.finish(pendingfields)
+ newboxes = modifier.finish(pendingfields, extra_liveboxes)
if (newboxes is not None and
len(newboxes) > self.metainterp_sd.options.failargs_limit):
raise resume.TagOverflow
diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py
--- a/rpython/jit/metainterp/optimizeopt/rewrite.py
+++ b/rpython/jit/metainterp/optimizeopt/rewrite.py
@@ -28,7 +28,7 @@
def _callback(self, op, old_op):
key = make_hashable_int(op.getarg(0).getint())
self.opt.loop_invariant_producer[key] = self.opt.optimizer.getlastop()
- self.opt.loop_invariant_results[key] = old_op
+ self.opt.loop_invariant_results[key] = old_op, op.getarg(0), old_op.getdescr()
class OptRewrite(Optimization):
@@ -568,13 +568,15 @@
arg = op.getarg(0)
# 'arg' must be a Const, because residual_call in codewriter
# expects a compile-time constant
+ # XXX the descr is ignored! let's hope there are no different
+ # call_loop_invariant around
assert isinstance(arg, Const)
key = make_hashable_int(arg.getint())
- resvalue = self.loop_invariant_results.get(key, None)
+ resvalue, arg0, descr = self.loop_invariant_results.get(key, (None, None, None))
if resvalue is not None:
resvalue = self.optimizer.force_op_from_preamble(resvalue)
- self.loop_invariant_results[key] = resvalue
+ self.loop_invariant_results[key] = resvalue, arg0, descr
self.make_equal_to(op, resvalue)
self.last_emitted_operation = REMOVED
return
@@ -867,6 +869,18 @@
optimize_SAME_AS_R = optimize_SAME_AS_I
optimize_SAME_AS_F = optimize_SAME_AS_I
+ def serialize_optrewrite(self, available_boxes):
+ triples = []
+ for box, arg0, descr in self.loop_invariant_results.values():
+ triples.append((arg0, descr, box.get_box_replacement()))
+ return triples
+
+ def deserialize_optrewrite(self, triples):
+ for arg, descr, resvalue in triples:
+ assert isinstance(arg, Const)
+ key = make_hashable_int(arg.getint())
+ self.loop_invariant_results[key] = resvalue, arg, descr
+
dispatch_opt = make_dispatcher_method(OptRewrite, 'optimize_',
default=OptRewrite.emit)
optimize_guards = _findall(OptRewrite, 'optimize_', 'GUARD')
diff --git a/rpython/jit/metainterp/optimizeopt/shortpreamble.py b/rpython/jit/metainterp/optimizeopt/shortpreamble.py
--- a/rpython/jit/metainterp/optimizeopt/shortpreamble.py
+++ b/rpython/jit/metainterp/optimizeopt/shortpreamble.py
@@ -154,8 +154,11 @@
return
op = self.res
key = make_hashable_int(op.getarg(0).getint())
- optrewrite.loop_invariant_results[key] = PreambleOp(op, preamble_op,
- invented_name)
+ optrewrite.loop_invariant_results[key] = (
+ PreambleOp(op, preamble_op, invented_name),
+ op.getarg(0),
+ op.getdescr()
+ )
def add_op_to_short(self, sb):
op = self.res
diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py
--- a/rpython/jit/metainterp/resume.py
+++ b/rpython/jit/metainterp/resume.py
@@ -412,7 +412,8 @@
_, tagbits = untag(tagged)
return tagbits == TAGVIRTUAL
- def finish(self, pending_setfields=[]):
+ def finish(self, pending_setfields=[], extra_liveboxes=[]):
+ from rpython.jit.metainterp.optimizeopt.bridgeopt import consistency_checking_numbering
optimizer = self.optimizer
# compute the numbering
storage = self.storage
@@ -458,17 +459,31 @@
info = optimizer.getptrinfo(fieldbox)
assert info is not None and info.is_virtual()
info.visitor_walk_recursive(fieldbox, self, optimizer)
+ for box in extra_liveboxes:
+ box = optimizer.get_box_replacement(box)
+ self.register_box(box)
+ info = optimizer.getptrinfo(box)
+ assert info is None or not info.is_virtual()
self._number_virtuals(liveboxes, optimizer, num_virtuals)
self._add_pending_fields(optimizer, pending_setfields)
numb_state.patch(1, len(liveboxes))
- self._add_optimizer_sections(numb_state, liveboxes, liveboxes_from_env)
- storage.rd_numb = numb_state.create_numbering()
+ self._add_extra_box_section(extra_liveboxes, numb_state)
+
+ self._add_optimizer_sections(numb_state, liveboxes)
+ rd_numb = numb_state.create_numbering()
+ consistency_checking_numbering(rd_numb, liveboxes)
+ storage.rd_numb = rd_numb
storage.rd_consts = self.memo.consts
return liveboxes[:]
+ def _add_extra_box_section(self, extra_liveboxes, numb_state):
+ numb_state.append_int(len(extra_liveboxes))
+ for box in extra_liveboxes:
+ numb_state.append_short(self._gettagged(box.get_box_replacement()))
+
def _number_virtuals(self, liveboxes, optimizer, num_env_virtuals):
from rpython.jit.metainterp.optimizeopt.info import AbstractVirtualPtrInfo
@@ -584,11 +599,10 @@
return self.liveboxes_from_env[box]
return self.liveboxes[box]
- def _add_optimizer_sections(self, numb_state, liveboxes, liveboxes_from_env):
+ def _add_optimizer_sections(self, numb_state, liveboxes):
# add extra information about things the optimizer learned
from rpython.jit.metainterp.optimizeopt.bridgeopt import serialize_optimizer_knowledge
- serialize_optimizer_knowledge(
- self.optimizer, numb_state, liveboxes, liveboxes_from_env, self.memo)
+ serialize_optimizer_knowledge(self, numb_state, liveboxes)
class AbstractVirtualInfo(object):
kind = REF
@@ -1067,6 +1081,7 @@
resumereader.consume_boxes(f.get_current_position_info(),
f.registers_i, f.registers_r, f.registers_f)
f.handle_rvmprof_enter_on_resume()
+ resumereader.consume_extra_boxes()
return resumereader.liveboxes, virtualizable_boxes, virtualref_boxes
@@ -1113,6 +1128,11 @@
virtualref_boxes = self.consume_virtualref_boxes()
return virtualizable_boxes, virtualref_boxes
+ def consume_extra_boxes(self):
+ extra_boxes_size = self.resumecodereader.next_item()
+ for i in range(extra_boxes_size):
+ self.next_ref() # does nothing but read the box!
+
def allocate_with_vtable(self, descr=None):
return self.metainterp.execute_new_with_vtable(descr=descr)
diff --git a/rpython/jit/metainterp/resumecode.py b/rpython/jit/metainterp/resumecode.py
--- a/rpython/jit/metainterp/resumecode.py
+++ b/rpython/jit/metainterp/resumecode.py
@@ -18,6 +18,8 @@
until the size of the resume section
+ [<length> <numb> <numb> ... <numb>] more boxes for the optimizer section
+
# ----- optimization section
<more code> further sections according to bridgeopt.py
"""
diff --git a/rpython/jit/metainterp/test/test_bridgeopt.py b/rpython/jit/metainterp/test/test_bridgeopt.py
--- a/rpython/jit/metainterp/test/test_bridgeopt.py
+++ b/rpython/jit/metainterp/test/test_bridgeopt.py
@@ -27,6 +27,7 @@
class FakeOptimizer(object):
metainterp_sd = None
optheap = None
+ optrewrite = None
def __init__(self, dct={}, cpu=None):
self.dct = dct
@@ -46,6 +47,13 @@
def __init__(self, numb):
self.rd_numb = numb
+class FakeAdder(object):
+ def __init__(self, optimizer, liveboxes_from_env, liveboxes, memo):
+ self.optimizer = optimizer
+ self.liveboxes_from_env = liveboxes_from_env
+ self.liveboxes = liveboxes
+ self.memo = memo
+
def test_known_classes():
box1 = InputArgRef()
box2 = InputArgRef()
@@ -57,11 +65,13 @@
numb_state = NumberingState(4)
numb_state.append_int(1) # size of resume block
+ numb_state.append_int(0) # size of extra arg block
liveboxes = [InputArgInt(), box2, box1, box3]
+ adder = FakeAdder(optimizer, {}, {}, None)
- serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, {}, None)
+ serialize_optimizer_knowledge(adder, numb_state, liveboxes)
- assert unpack_numbering(numb_state.create_numbering()) == [1, 0b010000, 0, 0]
+ assert unpack_numbering(numb_state.create_numbering()) == [1, 0, 0b010000, 0, 0, 0]
rbox1 = InputArgRef()
rbox2 = InputArgRef()
@@ -93,11 +103,14 @@
numb_state = NumberingState(1)
numb_state.append_int(1) # size of resume block
+ numb_state.append_int(0) # size of extra arg block
liveboxes = [box for (box, _) in boxes_known_classes]
- serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, {}, None)
+ adder = FakeAdder(optimizer, {}, {}, None)
- assert len(numb_state.create_numbering().code) == 3 + math.ceil(len(refboxes) / 6.0)
+ serialize_optimizer_knowledge(adder, numb_state, liveboxes)
+
+ assert len(numb_state.create_numbering().code) == 5 + math.ceil(len(refboxes) / 6.0)
dct = {box: cls
for box, known_class in boxes_known_classes
@@ -140,6 +153,40 @@
self.check_trace_count(3)
self.check_resops(guard_class=1)
+ def Xtest_bridge_guard_class_virtual(self):
+ myjitdriver = jit.JitDriver(greens=[], reds='auto')
+ class A(object):
+ def f(self):
+ return 1
+ class B(A):
+ def f(self):
+ return 2
+ class Box(object):
+ def __init__(self, a):
+ self.a = a
+ def f(x, y, n):
+ if x:
+ a = A()
+ else:
+ a = B()
+ a.x = 0
+ box = Box(a)
+ res = 0
+ while y > 0:
+ myjitdriver.jit_merge_point()
+ res += box.a.f()
+ a.x += 1
+ if y > n:
+ res += 1
+ res += box.a.f()
+ y -= 1
+ box = Box(box.a)
+ return res
+ res = self.meta_interp(f, [6, 32, 16])
+ assert res == f(6, 32, 16)
+ self.check_trace_count(3)
+ self.check_resops(guard_class=1)
+
def test_bridge_field_read(self):
myjitdriver = jit.JitDriver(greens=[], reds=['y', 'res', 'n', 'a'])
class A(object):
@@ -282,3 +329,33 @@
self.check_trace_count(3)
self.check_resops(guard_value=1)
self.check_resops(getarrayitem_gc_i=5)
+
+ def test_loop_invariant_bridge(self):
+ myjitdriver = jit.JitDriver(greens = [], reds = ['x', 'res'])
+ class A(object):
+ pass
+ a = A()
+ a.current_a = A()
+ a.current_a.x = 12
+ @jit.loop_invariant
+ def f():
+ return a.current_a
+
+ def g(x):
+ res = 0
+ while x > 0:
+ myjitdriver.can_enter_jit(x=x, res=res)
+ myjitdriver.jit_merge_point(x=x, res=res)
+ res += jit.promote(f().x)
+ if x % 5 == 1:
+ res += 5
+ res += jit.promote(f().x)
+ res += jit.promote(f().x)
+ x -= 1
+ a.current_a = A()
+ a.current_a.x = 2
+ return res
+ res = self.meta_interp(g, [21])
+ assert res == g(21)
+ self.check_resops(call_r=1)
+
More information about the pypy-commit
mailing list