[pypy-commit] pypy branch-prediction: Fix WriteBarrierSlowPath. Add ReacqGilSlowPath.

arigo pypy.commits at gmail.com
Fri Apr 7 07:25:56 EDT 2017


Author: Armin Rigo <arigo at tunes.org>
Branch: branch-prediction
Changeset: r91014:a47cbc896847
Date: 2017-04-07 13:24 +0200
http://bitbucket.org/pypy/pypy/changeset/a47cbc896847/

Log:	Fix WriteBarrierSlowPath. Add ReacqGilSlowPath.

diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -41,33 +41,6 @@
 from rpython.rlib.objectmodel import compute_unique_id
 
 
-class SlowPath(object):
-    def __init__(self, mc, condition):
-        mc.J_il(condition, 0xfffff)     # patched later
-        self.cond_jump_addr = mc.get_relative_pos(break_basic_block=False)
-        self.saved_scratch_value_1 = mc.get_scratch_register_known_value()
-
-    def set_continue_addr(self, mc):
-        self.continue_addr = mc.get_relative_pos(break_basic_block=False)
-        self.saved_scratch_value_2 = mc.get_scratch_register_known_value()
-
-    def generate(self, assembler, mc):
-        # no alignment here, prefer compactness for these slow-paths.
-        # patch the original jump to go here
-        offset = mc.get_relative_pos() - self.cond_jump_addr
-        mc.overwrite32(self.cond_jump_addr-4, offset)
-        # restore the knowledge of the scratch register value
-        # (this does not emit any code)
-        mc.restore_scratch_register_known_value(self.saved_scratch_value_1)
-        # generate the body of the slow-path
-        self.generate_body(assembler, mc)
-        # reload (if needed) the (possibly different) scratch register value
-        mc.load_scratch_if_known(self.saved_scratch_value_2)
-        # jump back
-        curpos = mc.get_relative_pos() + 5
-        mc.JMP_l(self.continue_addr - curpos)
-
-
 class Assembler386(BaseAssembler, VectorAssemblerMixin):
     _regalloc = None
     _output_loop_log = None
@@ -865,7 +838,7 @@
         for ofs in self.frame_depth_to_patch:
             self._patch_frame_depth(ofs + rawstart, framedepth)
 
-    class IncreaseStackSlowPath(SlowPath):
+    class IncreaseStackSlowPath(codebuf.SlowPath):
         def generate_body(self, assembler, mc):
             mc.MOV_si(WORD, 0xffffff)     # force writing 32 bit
             ofs2 = mc.get_relative_pos(break_basic_block=False) - 4
@@ -1033,7 +1006,7 @@
         if gcrootmap and gcrootmap.is_shadow_stack:
             self._call_header_shadowstack(gcrootmap)
 
-    class StackCheckSlowPath(SlowPath):
+    class StackCheckSlowPath(codebuf.SlowPath):
         def generate_body(self, assembler, mc):
             mc.CALL(imm(assembler.stack_check_slowpath))
 
@@ -2291,8 +2264,9 @@
 
     # ------------------- END CALL ASSEMBLER -----------------------
 
-    class WriteBarrierSlowPath(SlowPath):
+    class WriteBarrierSlowPath(codebuf.SlowPath):
         def generate_body(self, assembler, mc):
+            mc.force_frame_size(DEFAULT_FRAME_BYTES)
             # for cond_call_gc_wb_array, also add another fast path:
             # if GCFLAG_CARDS_SET, then we can just set one bit and be done
             card_marking = (self.loc_index is not None)
@@ -2312,6 +2286,8 @@
             elif (assembler._regalloc is not None and
                   assembler._regalloc.xrm.reg_bindings):
                 helper_num += 2
+            descr = self.descr
+            loc_base = self.loc_base
             if assembler.wb_slowpath[helper_num] == 0:    # tests only
                 assert not we_are_translated()
                 assembler.cpu.gc_ll_descr.write_barrier_descr = descr
@@ -2404,8 +2380,10 @@
             loc = addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs)
         mc.TEST8(loc, imm(mask))
         sp = self.WriteBarrierSlowPath(mc, rx86.Conditions['NZ'])
+        sp.loc_base = loc_base
         sp.loc_index = loc_index
         sp.is_frame = is_frame
+        sp.descr = descr
         sp.set_continue_addr(mc)
         self.pending_slowpaths.append(sp)
 
@@ -2438,7 +2416,7 @@
     def label(self):
         self._check_frame_depth_debug(self.mc)
 
-    class CondCallSlowPath(SlowPath):
+    class CondCallSlowPath(codebuf.SlowPath):
         guard_token_no_exception = None
 
         def generate_body(self, assembler, mc):
@@ -2508,7 +2486,7 @@
         sp.resloc = resloc
         self.pending_slowpaths.append(sp)
 
-    class MallocCondSlowPath(SlowPath):
+    class MallocCondSlowPath(codebuf.SlowPath):
         def generate_body(self, assembler, mc):
             assembler.push_gcmap(mc, self.gcmap, store=True)
             mc.CALL(imm(follow_jump(assembler.malloc_slowpath)))
@@ -2541,7 +2519,7 @@
         sp.set_continue_addr(self.mc)
         self.pending_slowpaths.append(sp)
 
-    class MallocCondVarsizeSlowPath(SlowPath):
+    class MallocCondVarsizeSlowPath(codebuf.SlowPath):
         def generate_body(self, assembler, mc):
             # save the gcmap
             assembler.push_gcmap(mc, self.gcmap, store=True)
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -11,6 +11,7 @@
     r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
     RegLoc, RawEspLoc, RawEbpLoc, imm, ImmedLoc)
 from rpython.jit.backend.x86.jump import remap_frame_layout
+from rpython.jit.backend.x86 import codebuf
 from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
 from rpython.jit.backend.llsupport import llerrno
 from rpython.rtyper.lltypesystem import llmemory, rffi
@@ -293,6 +294,41 @@
             tlofsreg = self.get_tlofs_reg()    # => esi (possibly reused)
             mc.MOV32_mr((tlofsreg.value, lasterror), eax.value)
 
+    class ReacqGilSlowPath(codebuf.SlowPath):
+        early_jump_addr = 0
+
+        def generate_body(self, assembler, mc):
+            if self.early_jump_addr != 0:
+                # This slow-path has two entry points, with two
+                # conditional jumps.  We can jump to the regular start
+                # of this slow-path with the 2nd conditional jump.  Or,
+                # we can jump past the "MOV(heap(fastgil), ecx)"
+                # instruction from the 1st conditional jump.
+                # This instruction reverts the rpy_fastgil acquired
+                # previously, so that the general 'reacqgil_addr'
+                # function can acquire it again.  It must only be done
+                # if we actually succeeded in acquiring rpy_fastgil.
+                from rpython.jit.backend.x86.assembler import heap
+                mc.MOV(heap(self.fastgil), ecx)
+                offset = mc.get_relative_pos() - self.early_jump_addr
+                mc.overwrite32(self.early_jump_addr-4, offset)
+                # scratch register forgotten here, by get_relative_pos()
+
+            # call the reacqgil() function
+            cb = self.callbuilder
+            if not cb.result_value_saved_early:
+                cb.save_result_value(save_edx=False)
+            if assembler._is_asmgcc():
+                if IS_X86_32:
+                    css_value = edx
+                    old_value = ecx
+                    mc.MOV_sr(4, old_value.value)
+                    mc.MOV_sr(0, css_value.value)
+                # on X86_64, they are already in the right registers
+            mc.CALL(imm(follow_jump(assembler.reacqgil_addr)))
+            if not cb.result_value_saved_early:
+                cb.restore_result_value(save_edx=False)
+
     def move_real_result_and_call_reacqgil_addr(self, fastgil):
         from rpython.jit.backend.x86 import rx86
         #
@@ -314,8 +350,8 @@
                     if not self.result_value_saved_early:
                         mc.MOV_sr(12, edx.value)
                         restore_edx = True
-                css_value = edx
-                old_value = ecx
+                css_value = edx    # note: duplicated in ReacqGilSlowPath
+                old_value = ecx    #
             elif IS_X86_64:
                 css_value = edi
                 old_value = esi
@@ -341,36 +377,25 @@
             # thread.  So here we check if the shadowstack pointer
             # is still the same as before we released the GIL (saved
             # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
-            jne_location = mc.emit_forward_jump('NE')
+            mc.J_il(rx86.Conditions['NE'], 0xfffff)     # patched later
+            early_jump_addr = mc.get_relative_pos(break_basic_block=False)
+            # ^^^ this jump will go to almost the same place as the
+            # ReacqGilSlowPath() computes, but one instruction farther,
+            # i.e. just after the "MOV(heap(fastgil), ecx)".
+
             # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
             # state before the XCHG, but the XCHG acquired it by writing 1)
             rst = gcrootmap.get_root_stack_top_addr()
             mc = self.mc
             mc.CMP(ebx, heap(rst))
-            # PPP FIX ME
-            je_location = mc.emit_forward_jump('E')
-            # revert the rpy_fastgil acquired above, so that the
-            # general 'reacqgil_addr' below can acquire it again...
-            mc.MOV(heap(fastgil), ecx)
-            # patch the JNE above
-            mc.patch_forward_jump(jne_location)
+            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
+            sp.early_jump_addr = early_jump_addr
+            sp.fastgil = fastgil
         else:
-            je_location = mc.emit_forward_jump('E')
-        #
-        # Yes, we need to call the reacqgil() function
-        if not self.result_value_saved_early:
-            self.save_result_value(save_edx=False)
-        if self.asm._is_asmgcc():
-            if IS_X86_32:
-                mc.MOV_sr(4, old_value.value)
-                mc.MOV_sr(0, css_value.value)
-            # on X86_64, they are already in the right registers
-        mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
-        if not self.result_value_saved_early:
-            self.restore_result_value(save_edx=False)
-        #
-        # patch the JE above
-        mc.patch_forward_jump(je_location)
+            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
+        sp.callbuilder = self
+        sp.set_continue_addr(mc)
+        self.asm.pending_slowpaths.append(sp)
         #
         if restore_edx:
             mc.MOV_rs(edx.value, 12)   # restore this
diff --git a/rpython/jit/backend/x86/codebuf.py b/rpython/jit/backend/x86/codebuf.py
--- a/rpython/jit/backend/x86/codebuf.py
+++ b/rpython/jit/backend/x86/codebuf.py
@@ -81,3 +81,30 @@
         if break_basic_block:
             self.forget_scratch_register()
         return BlockBuilderMixin.get_relative_pos(self)
+
+
+class SlowPath(object):
+    def __init__(self, mc, condition):
+        mc.J_il(condition, 0xfffff)     # patched later
+        self.cond_jump_addr = mc.get_relative_pos(break_basic_block=False)
+        self.saved_scratch_value_1 = mc.get_scratch_register_known_value()
+
+    def set_continue_addr(self, mc):
+        self.continue_addr = mc.get_relative_pos(break_basic_block=False)
+        self.saved_scratch_value_2 = mc.get_scratch_register_known_value()
+
+    def generate(self, assembler, mc):
+        # no alignment here, prefer compactness for these slow-paths.
+        # patch the original jump to go here
+        offset = mc.get_relative_pos() - self.cond_jump_addr
+        mc.overwrite32(self.cond_jump_addr-4, offset)
+        # restore the knowledge of the scratch register value
+        # (this does not emit any code)
+        mc.restore_scratch_register_known_value(self.saved_scratch_value_1)
+        # generate the body of the slow-path
+        self.generate_body(assembler, mc)
+        # reload (if needed) the (possibly different) scratch register value
+        mc.load_scratch_if_known(self.saved_scratch_value_2)
+        # jump back
+        curpos = mc.get_relative_pos() + 5
+        mc.JMP_l(self.continue_addr - curpos)


More information about the pypy-commit mailing list