[pypy-commit] pypy stmgc-c8: (arigo, remi) fix jit support for card marking in c8

Raemi noreply at buildbot.pypy.org
Sat Feb 28 12:19:51 CET 2015


Author: Remi Meier <remi.meier at gmail.com>
Branch: stmgc-c8
Changeset: r76196:1257140b7eef
Date: 2015-02-28 12:19 +0100
http://bitbucket.org/pypy/pypy/changeset/1257140b7eef/

Log:	(arigo, remi) fix jit support for card marking in c8

diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -88,6 +88,8 @@
         self._build_wb_slowpath(False)
         self._build_wb_slowpath(True)
         self._build_wb_slowpath(False, for_frame=True)
+        if gc_ll_descr.stm:
+            self._build_stm_wb_card_slowpath(False)
         # only one of those
         self.build_frame_realloc_slowpath()
         if self.cpu.supports_floats:
@@ -95,6 +97,8 @@
             self._build_failure_recovery(True, withfloats=True)
             self._build_wb_slowpath(False, withfloats=True)
             self._build_wb_slowpath(True, withfloats=True)
+            if gc_ll_descr.stm:
+                self._build_stm_wb_card_slowpath(True)
         self._build_propagate_exception_path()
 
         if gc_ll_descr.get_malloc_slowpath_addr() is not None:
@@ -390,4 +394,3 @@
                 (r_uint(descr_number), r_uint(rawstart),
                     r_uint(rawstart + codeendpos)))
     debug_stop("jit-backend-addr")
-
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -54,6 +54,7 @@
         self.malloc_slowpath = 0
         self.malloc_slowpath_varsize = 0
         self.wb_slowpath = [0, 0, 0, 0, 0]
+        self.wb_card_slowpath = [0, 0]
         self.setup_failure_recovery()
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
@@ -363,6 +364,26 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
+    def _build_stm_wb_card_slowpath(self, withfloats):
+        mc = codebuf.MachineCodeBlockWrapper()
+
+        self._push_all_regs_to_frame(mc, [], withfloats, callee_only=True)
+
+        mc.MOV_rs(esi.value, WORD) #index
+        mc.MOV_rs(edi.value, 2*WORD) #obj
+
+        mc.PUSH(r11) # for alignment
+        func = rstm.adr_write_slowpath_card
+        mc.CALL(imm(func))
+        mc.POP(r11)
+
+        self._pop_all_regs_from_frame(mc, [], withfloats, callee_only=True)
+        mc.RET16_i(2 * WORD)
+
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.wb_card_slowpath[withfloats] = rawstart
+
+
     def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
         descr = self.cpu.gc_ll_descr.write_barrier_descr
         exc0, exc1 = None, None
@@ -2366,10 +2387,11 @@
         # Write only a CALL to the helper prepared in advance, passing it as
         # argument the address of the structure we are writing into
         # (the first argument to COND_CALL_GC_WB).
+        withfloats = self._regalloc is not None and bool(self._regalloc.xrm.reg_bindings)
         helper_num = card_marking
         if is_frame:
             helper_num = 4
-        elif self._regalloc is not None and self._regalloc.xrm.reg_bindings:
+        elif withfloats:
             helper_num += 2
         if self.wb_slowpath[helper_num] == 0:    # tests only
             assert not we_are_translated()
@@ -2400,6 +2422,7 @@
             # So here, we can simply write again a 'JNS', which will be
             # taken if GCFLAG_CARDS_SET is still not set.
             if stm:
+                # here it's actually the result of _stm_write_slowpath_card_extra
                 mc.J_il8(rx86.Conditions['Z'], 0) # patched later
             else:
                 mc.J_il8(rx86.Conditions['NS'], 0) # patched later
@@ -2415,10 +2438,9 @@
             loc_index = arglocs[1]
 
             if stm:
-                # must write the value CARD_MARKED into the byte at:
-                #     write_locks_base + (object >> 4) + (index / CARD_SIZE)
+                # if CARD_MARKED, we are done
+                #     (object >> 4) + (index / CARD_SIZE) + 1
                 #
-                write_locks_base = rstm.adr__stm_write_slowpath_card_extra_base
                 if rstm.CARD_SIZE == 32:
                     card_bits = 5
                 elif rstm.CARD_SIZE == 64:
@@ -2428,12 +2450,12 @@
                 else:
                     raise AssertionError("CARD_SIZE should be 32/64/128")
                 #
-                # idea:  mov r11, write_locks_base<<4
-                #        add r11, loc_base    # the object
+                # idea:
+                #        mov r11, loc_base    # the object
                 #        and r11, ~15         # align
                 #        lea r11, [loc_index + r11<<(card_bits-4)]
                 #        shr r11, card_bits
-                #        mov [r11], card_marked
+                #        cmp [r11+1], card_marked
                 #
                 # this assumes that the value computed up to the
                 # "shr r11, card_bits" instruction does not overflow
@@ -2444,15 +2466,13 @@
                 # and 2**X, for X <= 56).
                 #
                 r11 = X86_64_SCRATCH_REG
-                initial_value = write_locks_base << 4
                 if isinstance(loc_index, RegLoc):
                     if isinstance(loc_base, RegLoc):
-                        mc.MOV_ri(r11.value, initial_value)
-                        mc.ADD_rr(r11.value, loc_base.value)
+                        mc.MOV_ri(r11.value, loc_base.value)
                         mc.AND_ri(r11.value, ~15)
                     else:
                         assert isinstance(loc_base, ImmedLoc)
-                        initial_value += loc_base.value & ~15
+                        initial_value = loc_base.value & ~15
                         mc.MOV_ri(r11.value, initial_value)
                     mc.LEA_ra(r11.value, (self.SEGMENT_NO,
                                           loc_index.value,
@@ -2462,7 +2482,7 @@
                     mc.SHR_ri(r11.value, card_bits)
                 else:
                     assert isinstance(loc_index, ImmedLoc)
-                    initial_value += (loc_index.value >> card_bits) << 4
+                    initial_value = (loc_index.value >> card_bits) << 4
                     if isinstance(loc_base, RegLoc):
                         mc.MOV_ri(r11.value, initial_value)
                         mc.ADD_rr(r11.value, loc_base.value)
@@ -2473,8 +2493,18 @@
                         initial_value >>= 4
                         mc.MOV_ri(r11.value, initial_value)
                 #
-                mc.MOV8_mi((self.SEGMENT_NO, r11.value, 0),
+                mc.CMP8_mi((self.SEGMENT_GC, r11.value, 1),
                            rstm.CARD_MARKED)
+                mc.J_il8(rx86.Conditions['E'], 0) # patched later
+                before_loc = mc.get_relative_pos()
+                # slowpath: call _stm_write_slowpath_card
+                mc.PUSH(loc_base)
+                mc.PUSH(loc_index)
+                mc.CALL(imm(self.wb_card_slowpath[withfloats]))
+
+                offset = mc.get_relative_pos() - before_loc
+                assert 0 < offset <= 127
+                mc.overwrite(before_loc-1, chr(offset))
 
             elif isinstance(loc_index, RegLoc):
                 if IS_X86_64 and isinstance(loc_base, RegLoc):
@@ -2811,7 +2841,7 @@
         # It is only supported if 'translate_support_code' is
         # true; otherwise, the execute_token() was done with a
         # dummy value for the stack location THREADLOCAL_OFS
-        # 
+        #
         assert self.cpu.translate_support_code
         assert isinstance(resloc, RegLoc)
         self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS)
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -516,6 +516,7 @@
     CMP32_mi = insn(rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2))
     CMP16_mi = insn('\x66', rex_nw, '\x81', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'h'))
     CMP8_ri = insn(rex_fw, '\x80', byte_register(1), '\xF8', immediate(2, 'b'))
+    CMP8_mi = insn(rex_nw, '\x80', orbyte(7<<3), mem_reg_plus_const(1), immediate(2, 'b'))
 
     AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0')
 
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -11,6 +11,7 @@
 from rpython.rlib.rarithmetic import LONG_BIT, r_uint
 from rpython.rtyper.extregistry import ExtRegistryEntry
 from rpython.translator.stm import stmgcintf
+from rpython.rlib import rstm
 
 WORD = LONG_BIT // 8
 NULL = llmemory.NULL
@@ -33,9 +34,9 @@
 
     VISIT_FPTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
 
-    JIT_WB_IF_FLAG = 0x01            # value of _STM_GCFLAG_WRITE_BARRIER
-    JIT_WB_CARDS_SET = 0x08          # value of _STM_GCFLAG_CARDS_SET
-    stm_fast_alloc = 66*1024         # value of _STM_FAST_ALLOC in stmgc.h
+    JIT_WB_IF_FLAG = 0x1   # from stmgc.h
+    JIT_WB_CARDS_SET = 0x8 # from stmgc.h
+    stm_fast_alloc = rstm.FAST_ALLOC
     minimal_size_in_nursery = 16     # hard-coded lower limit
 
     TRANSLATION_PARAMS = {
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -30,11 +30,16 @@
 adr_write_slowpath = CFlexSymbolic('((long)&_stm_write_slowpath)')
 adr_write_slowpath_card_extra = (
     CFlexSymbolic('((long)&_stm_write_slowpath_card_extra)'))
-adr__stm_write_slowpath_card_extra_base = (
-   CFlexSymbolic('(_stm_write_slowpath_card_extra_base()-0x4000000000000000L)'))
+adr_write_slowpath_card = (
+    CFlexSymbolic('((long)&_stm_write_slowpath_card)'))
+
 CARD_MARKED = CFlexSymbolic('_STM_CARD_MARKED')
 CARD_SIZE   = CFlexSymbolic('_STM_CARD_SIZE')
 
+GCFLAG_CARDS_SET = CFlexSymbolic('_STM_GCFLAG_CARDS_SET')
+GCFLAG_WRITE_BARRIER = CFlexSymbolic('_STM_GCFLAG_WRITE_BARRIER')
+FAST_ALLOC = CFlexSymbolic('_STM_FAST_ALLOC')
+
 adr_pypy__rewind_jmp_copy_stack_slice = (
     CFlexSymbolic('((long)&pypy__rewind_jmp_copy_stack_slice)'))
 adr_pypy_stm_commit_if_not_atomic = (


More information about the pypy-commit mailing list