[pypy-commit] pypy stmgc-c8-gil-like: in-progress: pypy jit

arigo noreply at buildbot.pypy.org
Sat Jun 13 17:59:06 CEST 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c8-gil-like
Changeset: r78084:720b5a8e744f
Date: 2015-06-13 17:59 +0200
http://bitbucket.org/pypy/pypy/changeset/720b5a8e744f/

Log:	in-progress: pypy jit

diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -78,6 +78,8 @@
         if self.cpu.supports_floats:
             support.ensure_sse2_floats()
             self._build_float_constants()
+        if self.cpu.gc_ll_descr.stm:
+            self._build_stm_enter_leave_transactional_zone_helpers()
 
     def setup(self, looptoken):
         assert self.memcpy_addr != 0, "setup_once() not called?"
@@ -125,6 +127,36 @@
         self.float_const_neg_addr = float_constants
         self.float_const_abs_addr = float_constants + 16
 
+    def _build_stm_enter_leave_transactional_zone_helpers(self):
+        assert IS_X86_64 and self.cpu.supports_floats
+        # a helper to call _stm_leave_noninevitable_transactional_zone(),
+        # preserving all registers that are used to pass arguments.
+        # (Push an odd total number of registers, to align the stack.)
+        mc = codebuf.MachineCodeBlockWrapper()
+        self._push_all_regs_to_frame(mc, [eax], True, callee_only=True)
+        mc.CALL(imm(rstm.adr_stm_leave_noninevitable_transactional_zone))
+        self._pop_all_regs_from_frame(mc, [eax], True, callee_only=True)
+        mc.RET()
+        self._stm_leave_noninevitable_tr_slowpath = mc.materialize(
+            self.cpu.asmmemmgr, [])
+        #
+        # a second helper to call _stm_reattach_transaction(tl),
+        # preserving only registers that might store the result of a call
+        mc = codebuf.MachineCodeBlockWrapper()
+        mc.SUB_ri(esp.value, 3 * WORD)     # 3 instead of 2 to align the stack
+        mc.MOV_sr(0, eax.value)     # not edx, we're not running 32-bit
+        mc.MOVSD_sx(1, xmm0.value)
+        # load the value of tl (== tl->self) into edi as argument
+        mc.MOV(edi, self.heap_stm_thread_local_self())
+        mc.CALL(imm(rstm.adr_stm_reattach_transaction))
+        # pop
+        mc.MOVSD_xs(xmm0.value, 1)
+        mc.MOV_rs(eax.value, 0)
+        mc.ADD_ri(esp.value, 3 * WORD)
+        mc.RET()
+        self._stm_reattach_tr_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
+
+
     def set_extra_stack_depth(self, mc, value):
         if self._is_asmgcc():
             extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
@@ -898,6 +930,16 @@
         """STM: AddressLoc for '&stm_thread_local.rjthread.moved_off_base'."""
         return self.heap_tl(rstm.adr_rjthread_moved_off_base)
 
+    def heap_stm_thread_local_self(self):
+        """STM: AddressLoc for '&stm_thread_local.self', i.e. such that
+        reading it returns the (absolute) address of 'stm_thread_local'."""
+        return self.heap_tl(rstm.adr_stm_thread_local_self)
+
+    def heap_stm_detached_inevitable_from_thread(self):
+        """STM: AddressLoc for '&stm_detached_inevitable_from_thread'."""
+        return heap(self.SEGMENT_NO,
+                    rstm.adr_stm_detached_inevitable_from_thread)
+
     def _call_header_shadowstack(self):
         # put the frame in ebp on the shadowstack for the GC to find
         # (ebp is a writeable object and does not need a write-barrier
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -684,41 +684,70 @@
             self.mc.MOV_rs(eax.value, 0)
 
     def call_stm_before_ex_call(self):
+        from rpython.jit.backend.x86 import rx86
         from rpython.rlib import rstm
-        # XXX slowish: before any CALL_RELEASE_GIL, invoke the
-        # pypy_stm_commit_if_not_atomic() function.  Messy because
-        # we need to save the register arguments first.
+        # Generate the same logic as stm_leave_transactional_zone()
         #
-        n = min(self.next_arg_gpr, len(self.ARGUMENTS_GPR))
-        for i in range(n):
-            self.mc.PUSH_r(self.ARGUMENTS_GPR[i].value)    # PUSH gpr arg
-        m = min(self.next_arg_xmm, len(self.ARGUMENTS_XMM))
-        extra = m + ((n + m) & 1)
-        # in total the stack is moved down by (n + extra) words,
-        # which needs to be an even value for alignment:
-        assert ((n + extra) & 1) == 0
-        if extra > 0:
-            self.mc.SUB_ri(esp.value, extra * WORD)        # SUB rsp, extra
-            for i in range(m):
-                self.mc.MOVSD_sx(i * WORD, self.ARGUMENTS_XMM[i].value)
-                                                           # MOVSD [rsp+..], xmm
+        # First, stm_is_inevitable(), which is '!rewind_jmp_armed()',
+        # which is 'moved_off_base == 0':
+        rjmovd_o_b = self.asm.heap_rjthread_moved_off_base()
+        mc = self.mc
+        mc.CMP(rjmovd_o_b, imm(0))
+        mc.J_il8(rx86.Conditions['E'], 0)
+        je_location = mc.get_relative_pos()
         #
-        self.mc.CALL(imm(rstm.adr_pypy_stm_commit_if_not_atomic))
+        # Slow path: call a helper that will save all registers and
+        # call _stm_leave_noninevitable_transactional_zone()
+        mc.CALL(imm(self.asm._stm_leave_noninevitable_tr_slowpath))
+        mc.JMP_l8(0)      # jump to done, patched later
+        jmp_location = mc.get_relative_pos()
         #
-        if extra > 0:
-            for i in range(m):
-                self.mc.MOVSD_xs(self.ARGUMENTS_XMM[i].value, i * WORD)
-            self.mc.ADD_ri(esp.value, extra * WORD)
-        for i in range(n-1, -1, -1):
-            self.mc.POP_r(self.ARGUMENTS_GPR[i].value)
+        offset = jmp_location - je_location
+        assert 0 < offset <= 127
+        mc.overwrite(je_location - 1, chr(offset))
+        #
+        # Fast path: inline _stm_detach_inevitable_transaction()
+        # <- Here comes the write_fence(), which is not needed in x86 assembler
+        # assert(_stm_detached_inevitable_from_thread == 0): dropped
+        # _stm_detached_inevitable_from_thread = tl (== tl->self):
+        mc.MOV(eax, self.asm.heap_stm_thread_local_self())
+        mc.MOV(self.asm.heap_stm_detached_inevitable_from_thread(), eax)
+        #
+        offset = mc.get_relative_pos() - jmp_location
+        assert 0 < offset <= 127
+        mc.overwrite(jmp_location - 1, chr(offset))
 
     def call_stm_after_ex_call(self):
+        from rpython.jit.backend.x86 import rx86
         from rpython.rlib import rstm
-        # after any CALL_RELEASE_GIL, invoke the
-        # pypy_stm_start_if_not_atomic() function
-        self.save_result_value(True)
-        self.mc.CALL(imm(rstm.adr_pypy_stm_start_if_not_atomic))
-        self.restore_result_value(True)
+        # Generate the same logic as stm_enter_transactional_zone()
+        #
+        # Need to save away the result value, which is (likely) in eax
+        assert not self.result_value_saved_early
+        mc = self.mc
+        mc.MOV(edi, eax)
+        #
+        # compare_and_swap(&_stm_detached_inevitable_from_thread, tl, 0)
+        mc.MOV(eax, self.asm.heap_stm_thread_local_self())
+        mc.XOR(esi, esi)
+        adr = self.asm.heap_stm_detached_inevitable_from_thread()
+        m_address = mc._addr_as_reg_offset(adr.value_j())
+        mc.LOCK()
+        mc.CMPXCHG_mr(m_address, esi.value)
+        #
+        # restore the result value, back to eax
+        mc.MOV(eax, edi)
+        #
+        # if successful, jump over the next CALL
+        mc.J_il8(rx86.Conditions['Z'], 0)
+        jz_location = mc.get_relative_pos()
+        #
+        # if unsuccessful, invoke _stm_reattach_transaction()
+        mc.CALL(imm(self.asm._stm_reattach_tr_slowpath))
+        #
+        offset = mc.get_relative_pos() - jz_location
+        assert 0 < offset <= 127
+        mc.overwrite(jz_location - 1, chr(offset))
 
 
 if IS_X86_32:
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -630,6 +630,11 @@
     FLDL_s  = insn('\xDD', orbyte(0<<3), stack_sp(1))
     FLDS_s  = insn('\xD9', orbyte(0<<3), stack_sp(1))
 
+    # the 'lock' and 'cmpxchg' instructions
+    LOCK = insn('\xF0')
+    CMPXCHG_mr = insn(rex_w, '\x0F\xB1', register(2,8), mem_reg_plus_const(1))
+    CMPXCHG_jr = insn(rex_w, '\x0F\xB1', register(2,8), abs_(1))
+
     # ------------------------------ Random mess -----------------------
     RDTSC = insn('\x0F\x31')
 
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -43,10 +43,14 @@
 
 adr_pypy__rewind_jmp_copy_stack_slice = (
     CFlexSymbolic('((long)&pypy__rewind_jmp_copy_stack_slice)'))
-#adr_pypy_stm_commit_if_not_atomic = (
-#    CFlexSymbolic('((long)&pypy_stm_commit_if_not_atomic)'))
-#adr_pypy_stm_start_if_not_atomic = (
-#    CFlexSymbolic('((long)&pypy_stm_start_if_not_atomic)'))
+adr_stm_detached_inevitable_from_thread = (
+    CFlexSymbolic('((long)&_stm_detach_inevitable_transaction)'))
+adr_stm_thread_local_self = (
+    CFlexSymbolic('((long)&stm_thread_local.self)'))
+adr_stm_leave_noninevitable_transactional_zone = (
+    CFlexSymbolic('((long)&_stm_leave_noninevitable_transactional_zone)'))
+adr_stm_reattach_transaction = (
+    CFlexSymbolic('((long)&_stm_reattach_transaction)'))
 
 
 def rewind_jmp_frame():


More information about the pypy-commit mailing list