[pypy-commit] pypy stmgc-c8-gil-like: in-progress: pypy jit
arigo
noreply at buildbot.pypy.org
Sat Jun 13 17:59:06 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: stmgc-c8-gil-like
Changeset: r78084:720b5a8e744f
Date: 2015-06-13 17:59 +0200
http://bitbucket.org/pypy/pypy/changeset/720b5a8e744f/
Log: in-progress: pypy jit
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -78,6 +78,8 @@
if self.cpu.supports_floats:
support.ensure_sse2_floats()
self._build_float_constants()
+ if self.cpu.gc_ll_descr.stm:
+ self._build_stm_enter_leave_transactional_zone_helpers()
def setup(self, looptoken):
assert self.memcpy_addr != 0, "setup_once() not called?"
@@ -125,6 +127,36 @@
self.float_const_neg_addr = float_constants
self.float_const_abs_addr = float_constants + 16
+ def _build_stm_enter_leave_transactional_zone_helpers(self):
+ assert IS_X86_64 and self.cpu.supports_floats
+ # a helper to call _stm_leave_noninevitable_transactional_zone(),
+ # preserving all registers that are used to pass arguments.
+ # (Push an odd total number of registers, to align the stack.)
+ mc = codebuf.MachineCodeBlockWrapper()
+ self._push_all_regs_to_frame(mc, [eax], True, callee_only=True)
+ mc.CALL(imm(rstm.adr_stm_leave_noninevitable_transactional_zone))
+ self._pop_all_regs_from_frame(mc, [eax], True, callee_only=True)
+ mc.RET()
+ self._stm_leave_noninevitable_tr_slowpath = mc.materialize(
+ self.cpu.asmmemmgr, [])
+ #
+ # a second helper to call _stm_reattach_transaction(tl),
+ # preserving only registers that might store the result of a call
+ mc = codebuf.MachineCodeBlockWrapper()
+ mc.SUB_ri(esp.value, 3 * WORD) # 3 instead of 2 to align the stack
+ mc.MOV_sr(0, eax.value) # not edx, we're not running 32-bit
+ mc.MOVSD_sx(1, xmm0.value)
+ # load the value of tl (== tl->self) into edi as argument
+ mc.MOV(edi, self.heap_stm_thread_local_self())
+ mc.CALL(imm(rstm.adr_stm_reattach_transaction))
+ # pop
+ mc.MOVSD_xs(xmm0.value, 1)
+ mc.MOV_rs(eax.value, 0)
+ mc.ADD_ri(esp.value, 3 * WORD)
+ mc.RET()
+ self._stm_reattach_tr_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
+
+
def set_extra_stack_depth(self, mc, value):
if self._is_asmgcc():
extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
@@ -898,6 +930,16 @@
"""STM: AddressLoc for '&stm_thread_local.rjthread.moved_off_base'."""
return self.heap_tl(rstm.adr_rjthread_moved_off_base)
+ def heap_stm_thread_local_self(self):
+ """STM: AddressLoc for '&stm_thread_local.self', i.e. such that
+ reading it returns the (absolute) address of 'stm_thread_local'."""
+ return self.heap_tl(rstm.adr_stm_thread_local_self)
+
+ def heap_stm_detached_inevitable_from_thread(self):
+ """STM: AddressLoc for '&stm_detached_inevitable_from_thread'."""
+ return heap(self.SEGMENT_NO,
+ rstm.adr_stm_detached_inevitable_from_thread)
+
def _call_header_shadowstack(self):
# put the frame in ebp on the shadowstack for the GC to find
# (ebp is a writeable object and does not need a write-barrier
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -684,41 +684,70 @@
self.mc.MOV_rs(eax.value, 0)
def call_stm_before_ex_call(self):
+ from rpython.jit.backend.x86 import rx86
from rpython.rlib import rstm
- # XXX slowish: before any CALL_RELEASE_GIL, invoke the
- # pypy_stm_commit_if_not_atomic() function. Messy because
- # we need to save the register arguments first.
+ # Generate the same logic as stm_leave_transactional_zone()
#
- n = min(self.next_arg_gpr, len(self.ARGUMENTS_GPR))
- for i in range(n):
- self.mc.PUSH_r(self.ARGUMENTS_GPR[i].value) # PUSH gpr arg
- m = min(self.next_arg_xmm, len(self.ARGUMENTS_XMM))
- extra = m + ((n + m) & 1)
- # in total the stack is moved down by (n + extra) words,
- # which needs to be an even value for alignment:
- assert ((n + extra) & 1) == 0
- if extra > 0:
- self.mc.SUB_ri(esp.value, extra * WORD) # SUB rsp, extra
- for i in range(m):
- self.mc.MOVSD_sx(i * WORD, self.ARGUMENTS_XMM[i].value)
- # MOVSD [rsp+..], xmm
+ # First, stm_is_inevitable(), which is '!rewind_jmp_armed()',
+ # which is 'moved_off_base == 0':
+ rjmovd_o_b = self.asm.heap_rjthread_moved_off_base()
+ mc = self.mc
+ mc.CMP(rjmovd_o_b, imm(0))
+ mc.J_il8(rx86.Conditions['E'], 0)
+ je_location = mc.get_relative_pos()
#
- self.mc.CALL(imm(rstm.adr_pypy_stm_commit_if_not_atomic))
+ # Slow path: call a helper that will save all registers and
+ # call _stm_leave_noninevitable_transactional_zone()
+ mc.CALL(imm(self.asm._stm_leave_noninevitable_tr_slowpath))
+ mc.JMP_l8(0) # jump to done, patched later
+ jmp_location = mc.get_relative_pos()
#
- if extra > 0:
- for i in range(m):
- self.mc.MOVSD_xs(self.ARGUMENTS_XMM[i].value, i * WORD)
- self.mc.ADD_ri(esp.value, extra * WORD)
- for i in range(n-1, -1, -1):
- self.mc.POP_r(self.ARGUMENTS_GPR[i].value)
+ offset = jmp_location - je_location
+ assert 0 < offset <= 127
+ mc.overwrite(je_location - 1, chr(offset))
+ #
+ # Fast path: inline _stm_detach_inevitable_transaction()
+ # <- Here comes the write_fence(), which is not needed in x86 assembler
+ # assert(_stm_detached_inevitable_from_thread == 0): dropped
+ # _stm_detached_inevitable_from_thread = tl (== tl->self):
+ mc.MOV(eax, self.asm.heap_stm_thread_local_self())
+ mc.MOV(self.asm.heap_stm_detached_inevitable_from_thread(), eax)
+ #
+ offset = mc.get_relative_pos() - jmp_location
+ assert 0 < offset <= 127
+ mc.overwrite(jmp_location - 1, chr(offset))
def call_stm_after_ex_call(self):
+ from rpython.jit.backend.x86 import rx86
from rpython.rlib import rstm
- # after any CALL_RELEASE_GIL, invoke the
- # pypy_stm_start_if_not_atomic() function
- self.save_result_value(True)
- self.mc.CALL(imm(rstm.adr_pypy_stm_start_if_not_atomic))
- self.restore_result_value(True)
+ # Generate the same logic as stm_enter_transactional_zone()
+ #
+ # Need to save away the result value, which is (likely) in eax
+ assert not self.result_value_saved_early
+ mc = self.mc
+ mc.MOV(edi, eax)
+ #
+ # compare_and_swap(&_stm_detached_inevitable_from_thread, tl, 0)
+ mc.MOV(eax, self.asm.heap_stm_thread_local_self())
+ mc.XOR(esi, esi)
+ adr = self.asm.heap_stm_detached_inevitable_from_thread()
+ m_address = mc._addr_as_reg_offset(adr.value_j())
+ mc.LOCK()
+ mc.CMPXCHG_mr(m_address, esi.value)
+ #
+ # restore the result value, back to eax
+ mc.MOV(eax, edi)
+ #
+ # if successful, jump over the next CALL
+ mc.J_il8(rx86.Conditions['Z'], 0)
+ jz_location = mc.get_relative_pos()
+ #
+ # if unsuccessful, invoke _stm_reattach_transaction()
+ mc.CALL(imm(self.asm._stm_reattach_tr_slowpath))
+ #
+ offset = mc.get_relative_pos() - jz_location
+ assert 0 < offset <= 127
+ mc.overwrite(jz_location - 1, chr(offset))
if IS_X86_32:
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -630,6 +630,11 @@
FLDL_s = insn('\xDD', orbyte(0<<3), stack_sp(1))
FLDS_s = insn('\xD9', orbyte(0<<3), stack_sp(1))
+ # the 'lock' and 'cmpxchg' instructions
+ LOCK = insn('\xF0')
+ CMPXCHG_mr = insn(rex_w, '\x0F\xB1', register(2,8), mem_reg_plus_const(1))
+ CMPXCHG_jr = insn(rex_w, '\x0F\xB1', register(2,8), abs_(1))
+
# ------------------------------ Random mess -----------------------
RDTSC = insn('\x0F\x31')
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -43,10 +43,14 @@
adr_pypy__rewind_jmp_copy_stack_slice = (
CFlexSymbolic('((long)&pypy__rewind_jmp_copy_stack_slice)'))
-#adr_pypy_stm_commit_if_not_atomic = (
-# CFlexSymbolic('((long)&pypy_stm_commit_if_not_atomic)'))
-#adr_pypy_stm_start_if_not_atomic = (
-# CFlexSymbolic('((long)&pypy_stm_start_if_not_atomic)'))
+adr_stm_detached_inevitable_from_thread = (
+ CFlexSymbolic('((long)&_stm_detach_inevitable_transaction)'))
+adr_stm_thread_local_self = (
+ CFlexSymbolic('((long)&stm_thread_local.self)'))
+adr_stm_leave_noninevitable_transactional_zone = (
+ CFlexSymbolic('((long)&_stm_leave_noninevitable_transactional_zone)'))
+adr_stm_reattach_transaction = (
+ CFlexSymbolic('((long)&_stm_reattach_transaction)'))
def rewind_jmp_frame():
More information about the pypy-commit
mailing list