[pypy-commit] pypy bridgeopt-improvements: experimental attempt to reduce the cost of call_loopinvariant in every bridge

cfbolz pypy.commits at gmail.com
Sun Aug 6 18:11:47 EDT 2017


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: bridgeopt-improvements
Changeset: r92102:3ab4221d7876
Date: 2017-08-07 00:09 +0200
http://bitbucket.org/pypy/pypy/changeset/3ab4221d7876/

Log:	experimental attempt to reduce the cost of call_loopinvariant in
	every bridge that calls a method.

	approach: pass call_loopinvariant results into failargs (a bit
	everywhere) and then reuse the result in the bridge.

diff --git a/rpython/jit/metainterp/optimizeopt/bridgeopt.py b/rpython/jit/metainterp/optimizeopt/bridgeopt.py
--- a/rpython/jit/metainterp/optimizeopt/bridgeopt.py
+++ b/rpython/jit/metainterp/optimizeopt/bridgeopt.py
@@ -2,6 +2,7 @@
 optimizer of the bridge attached to a guard. """
 
 from rpython.jit.metainterp import resumecode
+from rpython.rlib.objectmodel import we_are_translated
 
 
 # adds the following sections at the end of the resume code:
@@ -22,17 +23,22 @@
 # (<box1> <index> <descr> <box2>) length times, if getarrayitem_gc(box1, index, descr) == box2
 #                                 both boxes should be in the liveboxes
 #
+# <length>
+# (<const> <descr> <box1>) length times, if call_loop_invariant(const, descr) == box1
+#                          the box should be in the liveboxes
 # ----
 
 
 # maybe should be delegated to the optimization classes?
 
-def tag_box(box, liveboxes_from_env, memo):
+def tag_box(box, adder):
     from rpython.jit.metainterp.history import Const
     if isinstance(box, Const):
-        return memo.getconst(box)
+        return adder.memo.getconst(box)
     else:
-        return liveboxes_from_env[box] # has to exist
+        if box in adder.liveboxes_from_env:
+            return adder.liveboxes_from_env[box]
+        return adder.liveboxes[box] # has to exist
 
 def decode_box(resumestorage, tagged, liveboxes, cpu):
     from rpython.jit.metainterp.resume import untag, TAGCONST, TAGINT, TAGBOX
@@ -54,10 +60,13 @@
         raise AssertionError("unreachable")
     return box
 
-def serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, liveboxes_from_env, memo):
+def serialize_optimizer_knowledge(adder, numb_state, liveboxes):
+    optimizer = adder.optimizer
+    liveboxes_from_env = adder.liveboxes_from_env
     available_boxes = {}
     for box in liveboxes:
-        if box is not None and box in liveboxes_from_env:
+        if box is not None and (
+                box in adder.liveboxes_from_env or box in adder.liveboxes):
             available_boxes[box] = None
     metainterp_sd = optimizer.metainterp_sd
 
@@ -84,7 +93,6 @@
 
     # heap knowledge: we store triples of known heap fields in non-virtual
     # structs
-    # XXX could be extended to arrays
     if optimizer.optheap:
         triples_struct, triples_array = optimizer.optheap.serialize_optheap(available_boxes)
         # can only encode descrs that have a known index into
@@ -93,20 +101,32 @@
         numb_state.append_int(len(triples_struct))
         for box1, descr, box2 in triples_struct:
             descr_index = descr.descr_index
-            numb_state.append_short(tag_box(box1, liveboxes_from_env, memo))
+            numb_state.append_short(tag_box(box1, adder))
             numb_state.append_int(descr_index)
-            numb_state.append_short(tag_box(box2, liveboxes_from_env, memo))
+            numb_state.append_short(tag_box(box2, adder))
         numb_state.append_int(len(triples_array))
         for box1, index, descr, box2 in triples_array:
             descr_index = descr.descr_index
-            numb_state.append_short(tag_box(box1, liveboxes_from_env, memo))
+            numb_state.append_short(tag_box(box1, adder))
             numb_state.append_int(index)
             numb_state.append_int(descr_index)
-            numb_state.append_short(tag_box(box2, liveboxes_from_env, memo))
+            numb_state.append_short(tag_box(box2, adder))
     else:
         numb_state.append_int(0)
         numb_state.append_int(0)
 
+    # loop invariant calls
+    if optimizer.optrewrite:
+        triples = optimizer.optrewrite.serialize_optrewrite(available_boxes)
+        numb_state.append_int(len(triples))
+        for const, descr, box in triples:
+            descr_index = descr.descr_index
+            numb_state.append_short(tag_box(const, adder))
+            numb_state.append_int(descr_index)
+            numb_state.append_short(tag_box(box, adder))
+    else:
+        numb_state.append_int(0)
+
 def deserialize_optimizer_knowledge(optimizer, resumestorage, frontend_boxes, liveboxes):
     reader = resumecode.Reader(resumestorage.rd_numb)
     assert len(frontend_boxes) == len(liveboxes)
@@ -115,6 +135,8 @@
     # skip resume section
     startcount = reader.next_item()
     reader.jump(startcount - 1)
+    extracount = reader.next_item()
+    reader.jump(extracount)
 
     # class knowledge
     bitfield = 0
@@ -132,8 +154,6 @@
             optimizer.make_constant_class(box, cls)
 
     # heap knowledge
-    if not optimizer.optheap:
-        return
     length = reader.next_item()
     result_struct = []
     for i in range(length):
@@ -155,4 +175,59 @@
         tagged = reader.next_item()
         box2 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu)
         result_array.append((box1, index, descr, box2))
-    optimizer.optheap.deserialize_optheap(result_struct, result_array)
+    if result_struct or result_array:
+        optimizer.optheap.deserialize_optheap(result_struct, result_array)
+
+    # loop_invariant knowledge
+    length = reader.next_item()
+    results = []
+    for i in range(length):
+        tagged = reader.next_item()
+        box1 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu)
+        descr_index = reader.next_item()
+        descr = metainterp_sd.all_descrs[descr_index]
+        tagged = reader.next_item()
+        box2 = decode_box(resumestorage, tagged, liveboxes, metainterp_sd.cpu)
+        results.append((box1, descr, box2))
+    if results:
+        optimizer.optrewrite.deserialize_optrewrite(results)
+
+def consistency_checking_numbering(numb, liveboxes):
+    if we_are_translated():
+        return
+    # very much a "does not crash" kind of affair
+    reader = resumecode.Reader(numb)
+
+    # skip resume section
+    startcount = reader.next_item()
+    reader.jump(startcount - 1)
+    extracount = reader.next_item()
+    reader.jump(extracount)
+
+    mask = 0
+    for i, box in enumerate(liveboxes):
+        if box.type != "r":
+            continue
+        if not mask:
+            bitfield = reader.next_item()
+            mask = 0b100000
+        mask >>= 1
+
+    length = reader.next_item()
+    for i in range(length):
+        tagged = reader.next_item()
+        descr_index = reader.next_item()
+        tagged = reader.next_item()
+    length = reader.next_item()
+    for i in range(length):
+        tagged = reader.next_item()
+        index = reader.next_item()
+        descr_index = reader.next_item()
+        tagged = reader.next_item()
+
+    # loop_invariant knowledge
+    length = reader.next_item()
+    for i in range(length):
+        tagged = reader.next_item()
+        descr_index = reader.next_item()
+        tagged = reader.next_item()
diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py
--- a/rpython/jit/metainterp/optimizeopt/optimizer.py
+++ b/rpython/jit/metainterp/optimizeopt/optimizer.py
@@ -620,7 +620,12 @@
                 del self.replaces_guard[orig_op]
                 return
             else:
-                op = self.emit_guard_operation(op, pendingfields)
+                extra_liveboxes = []
+                # hack, but probably a good one
+                if len(self.optrewrite.loop_invariant_results) == 1:
+                    extra_liveboxes = [
+                        self.optrewrite.loop_invariant_results.values()[0][0].get_box_replacement()]
+                op = self.emit_guard_operation(op, pendingfields, extra_liveboxes)
         elif op.can_raise():
             self.exception_might_have_happened = True
         opnum = op.opnum
@@ -633,7 +638,7 @@
         self._really_emitted_operation = op
         self._newoperations.append(op)
 
-    def emit_guard_operation(self, op, pendingfields):
+    def emit_guard_operation(self, op, pendingfields, extra_liveboxes):
         guard_op = op # self.replace_op_with(op, op.getopnum())
         opnum = guard_op.getopnum()
         # If guard_(no)_exception is merged with another previous guard, then
@@ -653,7 +658,8 @@
             op = self._copy_resume_data_from(guard_op,
                                              self._last_guard_op)
         else:
-            op = self.store_final_boxes_in_guard(guard_op, pendingfields)
+            op = self.store_final_boxes_in_guard(
+                    guard_op, pendingfields, extra_liveboxes)
             self._last_guard_op = op
             # for unrolling
             for farg in op.getfailargs():
@@ -723,7 +729,7 @@
         new_descr.copy_all_attributes_from(old_descr)
         self._newoperations[old_op_pos] = new_op
 
-    def store_final_boxes_in_guard(self, op, pendingfields):
+    def store_final_boxes_in_guard(self, op, pendingfields, extra_liveboxes):
         assert pendingfields is not None
         if op.getdescr() is not None:
             descr = op.getdescr()
@@ -736,7 +742,7 @@
         modifier = resume.ResumeDataVirtualAdder(self, descr, op, self.trace,
                                                  self.resumedata_memo)
         try:
-            newboxes = modifier.finish(pendingfields)
+            newboxes = modifier.finish(pendingfields, extra_liveboxes)
             if (newboxes is not None and
                 len(newboxes) > self.metainterp_sd.options.failargs_limit):
                 raise resume.TagOverflow
diff --git a/rpython/jit/metainterp/optimizeopt/rewrite.py b/rpython/jit/metainterp/optimizeopt/rewrite.py
--- a/rpython/jit/metainterp/optimizeopt/rewrite.py
+++ b/rpython/jit/metainterp/optimizeopt/rewrite.py
@@ -28,7 +28,7 @@
     def _callback(self, op, old_op):
         key = make_hashable_int(op.getarg(0).getint())
         self.opt.loop_invariant_producer[key] = self.opt.optimizer.getlastop()
-        self.opt.loop_invariant_results[key] = old_op
+        self.opt.loop_invariant_results[key] = old_op, op.getarg(0), old_op.getdescr()
 
 
 class OptRewrite(Optimization):
@@ -568,13 +568,15 @@
         arg = op.getarg(0)
         # 'arg' must be a Const, because residual_call in codewriter
         # expects a compile-time constant
+        # XXX the descr is ignored! let's hope there are no different
+        # call_loop_invariant around
         assert isinstance(arg, Const)
         key = make_hashable_int(arg.getint())
 
-        resvalue = self.loop_invariant_results.get(key, None)
+        resvalue, arg0, descr = self.loop_invariant_results.get(key, (None, None, None))
         if resvalue is not None:
             resvalue = self.optimizer.force_op_from_preamble(resvalue)
-            self.loop_invariant_results[key] = resvalue
+            self.loop_invariant_results[key] = resvalue, arg0, descr
             self.make_equal_to(op, resvalue)
             self.last_emitted_operation = REMOVED
             return
@@ -867,6 +869,18 @@
     optimize_SAME_AS_R = optimize_SAME_AS_I
     optimize_SAME_AS_F = optimize_SAME_AS_I
 
+    def serialize_optrewrite(self, available_boxes):
+        triples = []
+        for box, arg0, descr in self.loop_invariant_results.values():
+            triples.append((arg0, descr, box.get_box_replacement()))
+        return triples
+
+    def deserialize_optrewrite(self, triples):
+        for arg, descr, resvalue in triples:
+            assert isinstance(arg, Const)
+            key = make_hashable_int(arg.getint())
+            self.loop_invariant_results[key] = resvalue, arg, descr
+
 dispatch_opt = make_dispatcher_method(OptRewrite, 'optimize_',
                                       default=OptRewrite.emit)
 optimize_guards = _findall(OptRewrite, 'optimize_', 'GUARD')
diff --git a/rpython/jit/metainterp/optimizeopt/shortpreamble.py b/rpython/jit/metainterp/optimizeopt/shortpreamble.py
--- a/rpython/jit/metainterp/optimizeopt/shortpreamble.py
+++ b/rpython/jit/metainterp/optimizeopt/shortpreamble.py
@@ -154,8 +154,11 @@
             return
         op = self.res
         key = make_hashable_int(op.getarg(0).getint())
-        optrewrite.loop_invariant_results[key] = PreambleOp(op, preamble_op,
-                                                            invented_name)
+        optrewrite.loop_invariant_results[key] = (
+            PreambleOp(op, preamble_op, invented_name),
+            op.getarg(0),
+            op.getdescr()
+        )
 
     def add_op_to_short(self, sb):
         op = self.res
diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py
--- a/rpython/jit/metainterp/resume.py
+++ b/rpython/jit/metainterp/resume.py
@@ -412,7 +412,8 @@
         _, tagbits = untag(tagged)
         return tagbits == TAGVIRTUAL
 
-    def finish(self, pending_setfields=[]):
+    def finish(self, pending_setfields=[], extra_liveboxes=[]):
+        from rpython.jit.metainterp.optimizeopt.bridgeopt import consistency_checking_numbering
         optimizer = self.optimizer
         # compute the numbering
         storage = self.storage
@@ -458,17 +459,31 @@
             info = optimizer.getptrinfo(fieldbox)
             assert info is not None and info.is_virtual()
             info.visitor_walk_recursive(fieldbox, self, optimizer)
+        for box in extra_liveboxes:
+            box = optimizer.get_box_replacement(box)
+            self.register_box(box)
+            info = optimizer.getptrinfo(box)
+            assert info is None or not info.is_virtual()
 
         self._number_virtuals(liveboxes, optimizer, num_virtuals)
         self._add_pending_fields(optimizer, pending_setfields)
 
         numb_state.patch(1, len(liveboxes))
 
-        self._add_optimizer_sections(numb_state, liveboxes, liveboxes_from_env)
-        storage.rd_numb = numb_state.create_numbering()
+        self._add_extra_box_section(extra_liveboxes, numb_state)
+
+        self._add_optimizer_sections(numb_state, liveboxes)
+        rd_numb = numb_state.create_numbering()
+        consistency_checking_numbering(rd_numb, liveboxes)
+        storage.rd_numb = rd_numb
         storage.rd_consts = self.memo.consts
         return liveboxes[:]
 
+    def _add_extra_box_section(self, extra_liveboxes, numb_state):
+        numb_state.append_int(len(extra_liveboxes))
+        for box in extra_liveboxes:
+            numb_state.append_short(self._gettagged(box.get_box_replacement()))
+
     def _number_virtuals(self, liveboxes, optimizer, num_env_virtuals):
         from rpython.jit.metainterp.optimizeopt.info import AbstractVirtualPtrInfo
         
@@ -584,11 +599,10 @@
                 return self.liveboxes_from_env[box]
             return self.liveboxes[box]
 
-    def _add_optimizer_sections(self, numb_state, liveboxes, liveboxes_from_env):
+    def _add_optimizer_sections(self, numb_state, liveboxes):
         # add extra information about things the optimizer learned
         from rpython.jit.metainterp.optimizeopt.bridgeopt import serialize_optimizer_knowledge
-        serialize_optimizer_knowledge(
-            self.optimizer, numb_state, liveboxes, liveboxes_from_env, self.memo)
+        serialize_optimizer_knowledge(self, numb_state, liveboxes)
 
 class AbstractVirtualInfo(object):
     kind = REF
@@ -1067,6 +1081,7 @@
         resumereader.consume_boxes(f.get_current_position_info(),
                                    f.registers_i, f.registers_r, f.registers_f)
         f.handle_rvmprof_enter_on_resume()
+    resumereader.consume_extra_boxes()
     return resumereader.liveboxes, virtualizable_boxes, virtualref_boxes
 
 
@@ -1113,6 +1128,11 @@
         virtualref_boxes = self.consume_virtualref_boxes()
         return virtualizable_boxes, virtualref_boxes
 
+    def consume_extra_boxes(self):
+        extra_boxes_size = self.resumecodereader.next_item()
+        for i in range(extra_boxes_size):
+            self.next_ref() # does nothing but read the box!
+
     def allocate_with_vtable(self, descr=None):
         return self.metainterp.execute_new_with_vtable(descr=descr)
 
diff --git a/rpython/jit/metainterp/resumecode.py b/rpython/jit/metainterp/resumecode.py
--- a/rpython/jit/metainterp/resumecode.py
+++ b/rpython/jit/metainterp/resumecode.py
@@ -18,6 +18,8 @@
 
   until the size of the resume section
 
+  [<length> <numb> <numb> ... <numb>]              more boxes for the optimizer section
+
   # ----- optimization section
   <more code>                                      further sections according to bridgeopt.py
 """
diff --git a/rpython/jit/metainterp/test/test_bridgeopt.py b/rpython/jit/metainterp/test/test_bridgeopt.py
--- a/rpython/jit/metainterp/test/test_bridgeopt.py
+++ b/rpython/jit/metainterp/test/test_bridgeopt.py
@@ -27,6 +27,7 @@
 class FakeOptimizer(object):
     metainterp_sd = None
     optheap = None
+    optrewrite = None
 
     def __init__(self, dct={}, cpu=None):
         self.dct = dct
@@ -46,6 +47,13 @@
     def __init__(self, numb):
         self.rd_numb = numb
 
+class FakeAdder(object):
+    def __init__(self, optimizer, liveboxes_from_env, liveboxes, memo):
+        self.optimizer = optimizer
+        self.liveboxes_from_env = liveboxes_from_env
+        self.liveboxes = liveboxes
+        self.memo = memo
+
 def test_known_classes():
     box1 = InputArgRef()
     box2 = InputArgRef()
@@ -57,11 +65,13 @@
 
     numb_state = NumberingState(4)
     numb_state.append_int(1) # size of resume block
+    numb_state.append_int(0) # size of extra arg block
     liveboxes = [InputArgInt(), box2, box1, box3]
+    adder = FakeAdder(optimizer, {}, {}, None)
 
-    serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, {}, None)
+    serialize_optimizer_knowledge(adder, numb_state, liveboxes)
 
-    assert unpack_numbering(numb_state.create_numbering()) == [1, 0b010000, 0, 0]
+    assert unpack_numbering(numb_state.create_numbering()) == [1, 0, 0b010000, 0, 0, 0]
 
     rbox1 = InputArgRef()
     rbox2 = InputArgRef()
@@ -93,11 +103,14 @@
 
     numb_state = NumberingState(1)
     numb_state.append_int(1) # size of resume block
+    numb_state.append_int(0) # size of extra arg block
     liveboxes = [box for (box, _) in boxes_known_classes]
 
-    serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, {}, None)
+    adder = FakeAdder(optimizer, {}, {}, None)
 
-    assert len(numb_state.create_numbering().code) == 3 + math.ceil(len(refboxes) / 6.0)
+    serialize_optimizer_knowledge(adder, numb_state, liveboxes)
+
+    assert len(numb_state.create_numbering().code) == 5 + math.ceil(len(refboxes) / 6.0)
 
     dct = {box: cls
               for box, known_class in boxes_known_classes
@@ -140,6 +153,40 @@
         self.check_trace_count(3)
         self.check_resops(guard_class=1)
 
+    def Xtest_bridge_guard_class_virtual(self):
+        myjitdriver = jit.JitDriver(greens=[], reds='auto')
+        class A(object):
+            def f(self):
+                return 1
+        class B(A):
+            def f(self):
+                return 2
+        class Box(object):
+            def __init__(self, a):
+                self.a = a
+        def f(x, y, n):
+            if x:
+                a = A()
+            else:
+                a = B()
+            a.x = 0
+            box = Box(a)
+            res = 0
+            while y > 0:
+                myjitdriver.jit_merge_point()
+                res += box.a.f()
+                a.x += 1
+                if y > n:
+                    res += 1
+                res += box.a.f()
+                y -= 1
+                box = Box(box.a)
+            return res
+        res = self.meta_interp(f, [6, 32, 16])
+        assert res == f(6, 32, 16)
+        self.check_trace_count(3)
+        self.check_resops(guard_class=1)
+
     def test_bridge_field_read(self):
         myjitdriver = jit.JitDriver(greens=[], reds=['y', 'res', 'n', 'a'])
         class A(object):
@@ -282,3 +329,33 @@
         self.check_trace_count(3)
         self.check_resops(guard_value=1)
         self.check_resops(getarrayitem_gc_i=5)
+
+    def test_loop_invariant_bridge(self):
+        myjitdriver = jit.JitDriver(greens = [], reds = ['x', 'res'])
+        class A(object):
+            pass
+        a = A()
+        a.current_a = A()
+        a.current_a.x = 12
+        @jit.loop_invariant
+        def f():
+            return a.current_a
+
+        def g(x):
+            res = 0
+            while x > 0:
+                myjitdriver.can_enter_jit(x=x, res=res)
+                myjitdriver.jit_merge_point(x=x, res=res)
+                res += jit.promote(f().x)
+                if x % 5 == 1:
+                    res += 5
+                res += jit.promote(f().x)
+                res += jit.promote(f().x)
+                x -= 1
+            a.current_a = A()
+            a.current_a.x = 2
+            return res
+        res = self.meta_interp(g, [21])
+        assert res == g(21)
+        self.check_resops(call_r=1)
+


More information about the pypy-commit mailing list