[pypy-commit] pypy stmgc-c8: (arigo, remi) fix jit support for card marking in c8
Raemi
noreply at buildbot.pypy.org
Sat Feb 28 12:19:51 CET 2015
Author: Remi Meier <remi.meier at gmail.com>
Branch: stmgc-c8
Changeset: r76196:1257140b7eef
Date: 2015-02-28 12:19 +0100
http://bitbucket.org/pypy/pypy/changeset/1257140b7eef/
Log: (arigo, remi) fix jit support for card marking in c8
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -88,6 +88,8 @@
self._build_wb_slowpath(False)
self._build_wb_slowpath(True)
self._build_wb_slowpath(False, for_frame=True)
+ if gc_ll_descr.stm:
+ self._build_stm_wb_card_slowpath(False)
# only one of those
self.build_frame_realloc_slowpath()
if self.cpu.supports_floats:
@@ -95,6 +97,8 @@
self._build_failure_recovery(True, withfloats=True)
self._build_wb_slowpath(False, withfloats=True)
self._build_wb_slowpath(True, withfloats=True)
+ if gc_ll_descr.stm:
+ self._build_stm_wb_card_slowpath(True)
self._build_propagate_exception_path()
if gc_ll_descr.get_malloc_slowpath_addr() is not None:
@@ -390,4 +394,3 @@
(r_uint(descr_number), r_uint(rawstart),
r_uint(rawstart + codeendpos)))
debug_stop("jit-backend-addr")
-
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -54,6 +54,7 @@
self.malloc_slowpath = 0
self.malloc_slowpath_varsize = 0
self.wb_slowpath = [0, 0, 0, 0, 0]
+ self.wb_card_slowpath = [0, 0]
self.setup_failure_recovery()
self.datablockwrapper = None
self.stack_check_slowpath = 0
@@ -363,6 +364,26 @@
rawstart = mc.materialize(self.cpu.asmmemmgr, [])
self.stack_check_slowpath = rawstart
+ def _build_stm_wb_card_slowpath(self, withfloats):
+ mc = codebuf.MachineCodeBlockWrapper()
+
+ self._push_all_regs_to_frame(mc, [], withfloats, callee_only=True)
+
+ mc.MOV_rs(esi.value, WORD) #index
+ mc.MOV_rs(edi.value, 2*WORD) #obj
+
+ mc.PUSH(r11) # for alignment
+ func = rstm.adr_write_slowpath_card
+ mc.CALL(imm(func))
+ mc.POP(r11)
+
+ self._pop_all_regs_from_frame(mc, [], withfloats, callee_only=True)
+ mc.RET16_i(2 * WORD)
+
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.wb_card_slowpath[withfloats] = rawstart
+
+
def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
descr = self.cpu.gc_ll_descr.write_barrier_descr
exc0, exc1 = None, None
@@ -2366,10 +2387,11 @@
# Write only a CALL to the helper prepared in advance, passing it as
# argument the address of the structure we are writing into
# (the first argument to COND_CALL_GC_WB).
+ withfloats = self._regalloc is not None and bool(self._regalloc.xrm.reg_bindings)
helper_num = card_marking
if is_frame:
helper_num = 4
- elif self._regalloc is not None and self._regalloc.xrm.reg_bindings:
+ elif withfloats:
helper_num += 2
if self.wb_slowpath[helper_num] == 0: # tests only
assert not we_are_translated()
@@ -2400,6 +2422,7 @@
# So here, we can simply write again a 'JNS', which will be
# taken if GCFLAG_CARDS_SET is still not set.
if stm:
+ # here it's actually the result of _stm_write_slowpath_card_extra
mc.J_il8(rx86.Conditions['Z'], 0) # patched later
else:
mc.J_il8(rx86.Conditions['NS'], 0) # patched later
@@ -2415,10 +2438,9 @@
loc_index = arglocs[1]
if stm:
- # must write the value CARD_MARKED into the byte at:
- # write_locks_base + (object >> 4) + (index / CARD_SIZE)
+ # if CARD_MARKED, we are done
+ # (object >> 4) + (index / CARD_SIZE) + 1
#
- write_locks_base = rstm.adr__stm_write_slowpath_card_extra_base
if rstm.CARD_SIZE == 32:
card_bits = 5
elif rstm.CARD_SIZE == 64:
@@ -2428,12 +2450,12 @@
else:
raise AssertionError("CARD_SIZE should be 32/64/128")
#
- # idea: mov r11, write_locks_base<<4
- # add r11, loc_base # the object
+ # idea:
+ # mov r11, loc_base # the object
# and r11, ~15 # align
# lea r11, [loc_index + r11<<(card_bits-4)]
# shr r11, card_bits
- # mov [r11], card_marked
+ # cmp [r11+1], card_marked
#
# this assumes that the value computed up to the
# "shr r11, card_bits" instruction does not overflow
@@ -2444,15 +2466,13 @@
# and 2**X, for X <= 56).
#
r11 = X86_64_SCRATCH_REG
- initial_value = write_locks_base << 4
if isinstance(loc_index, RegLoc):
if isinstance(loc_base, RegLoc):
- mc.MOV_ri(r11.value, initial_value)
- mc.ADD_rr(r11.value, loc_base.value)
+ mc.MOV_ri(r11.value, loc_base.value)
mc.AND_ri(r11.value, ~15)
else:
assert isinstance(loc_base, ImmedLoc)
- initial_value += loc_base.value & ~15
+ initial_value = loc_base.value & ~15
mc.MOV_ri(r11.value, initial_value)
mc.LEA_ra(r11.value, (self.SEGMENT_NO,
loc_index.value,
@@ -2462,7 +2482,7 @@
mc.SHR_ri(r11.value, card_bits)
else:
assert isinstance(loc_index, ImmedLoc)
- initial_value += (loc_index.value >> card_bits) << 4
+ initial_value = (loc_index.value >> card_bits) << 4
if isinstance(loc_base, RegLoc):
mc.MOV_ri(r11.value, initial_value)
mc.ADD_rr(r11.value, loc_base.value)
@@ -2473,8 +2493,18 @@
initial_value >>= 4
mc.MOV_ri(r11.value, initial_value)
#
- mc.MOV8_mi((self.SEGMENT_NO, r11.value, 0),
+ mc.CMP8_mi((self.SEGMENT_GC, r11.value, 1),
rstm.CARD_MARKED)
+ mc.J_il8(rx86.Conditions['E'], 0) # patched later
+ before_loc = mc.get_relative_pos()
+ # slowpath: call _stm_write_slowpath_card
+ mc.PUSH(loc_base)
+ mc.PUSH(loc_index)
+ mc.CALL(imm(self.wb_card_slowpath[withfloats]))
+
+ offset = mc.get_relative_pos() - before_loc
+ assert 0 < offset <= 127
+ mc.overwrite(before_loc-1, chr(offset))
elif isinstance(loc_index, RegLoc):
if IS_X86_64 and isinstance(loc_base, RegLoc):
@@ -2811,7 +2841,7 @@
# It is only supported if 'translate_support_code' is
# true; otherwise, the execute_token() was done with a
# dummy value for the stack location THREADLOCAL_OFS
- #
+ #
assert self.cpu.translate_support_code
assert isinstance(resloc, RegLoc)
self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS)
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -516,6 +516,7 @@
CMP32_mi = insn(rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
CMP16_mi = insn('\x66', rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'h'))
CMP8_ri = insn(rex_fw, '\x80', byte_register(1), '\xF8', immediate(2, 'b'))
+ CMP8_mi = insn(rex_nw, '\x80', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'b'))
AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0')
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -11,6 +11,7 @@
from rpython.rlib.rarithmetic import LONG_BIT, r_uint
from rpython.rtyper.extregistry import ExtRegistryEntry
from rpython.translator.stm import stmgcintf
+from rpython.rlib import rstm
WORD = LONG_BIT // 8
NULL = llmemory.NULL
@@ -33,9 +34,9 @@
VISIT_FPTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
- JIT_WB_IF_FLAG = 0x01 # value of _STM_GCFLAG_WRITE_BARRIER
- JIT_WB_CARDS_SET = 0x08 # value of _STM_GCFLAG_CARDS_SET
- stm_fast_alloc = 66*1024 # value of _STM_FAST_ALLOC in stmgc.h
+ JIT_WB_IF_FLAG = 0x1 # from stmgc.h
+ JIT_WB_CARDS_SET = 0x8 # from stmgc.h
+ stm_fast_alloc = rstm.FAST_ALLOC
minimal_size_in_nursery = 16 # hard-coded lower limit
TRANSLATION_PARAMS = {
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -30,11 +30,16 @@
adr_write_slowpath = CFlexSymbolic('((long)&_stm_write_slowpath)')
adr_write_slowpath_card_extra = (
CFlexSymbolic('((long)&_stm_write_slowpath_card_extra)'))
-adr__stm_write_slowpath_card_extra_base = (
- CFlexSymbolic('(_stm_write_slowpath_card_extra_base()-0x4000000000000000L)'))
+adr_write_slowpath_card = (
+ CFlexSymbolic('((long)&_stm_write_slowpath_card)'))
+
CARD_MARKED = CFlexSymbolic('_STM_CARD_MARKED')
CARD_SIZE = CFlexSymbolic('_STM_CARD_SIZE')
+GCFLAG_CARDS_SET = CFlexSymbolic('_STM_GCFLAG_CARDS_SET')
+GCFLAG_WRITE_BARRIER = CFlexSymbolic('_STM_GCFLAG_WRITE_BARRIER')
+FAST_ALLOC = CFlexSymbolic('_STM_FAST_ALLOC')
+
adr_pypy__rewind_jmp_copy_stack_slice = (
CFlexSymbolic('((long)&pypy__rewind_jmp_copy_stack_slice)'))
adr_pypy_stm_commit_if_not_atomic = (
More information about the pypy-commit
mailing list