[pypy-commit] pypy default: hg merge

arigo noreply at buildbot.pypy.org
Sun Jun 3 12:19:03 CEST 2012


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r55281:b513e71c997d
Date: 2012-06-03 12:18 +0200
http://bitbucket.org/pypy/pypy/changeset/b513e71c997d/

Log:	hg merge

diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -577,7 +577,6 @@
     def __init__(self, gc_ll_descr):
         self.llop1 = gc_ll_descr.llop1
         self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
-        self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
         self.fielddescr_tid = gc_ll_descr.fielddescr_tid
         #
         GCClass = gc_ll_descr.GCClass
@@ -592,6 +591,11 @@
             self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT
             self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = (
                 self.extract_flag_byte(self.jit_wb_cards_set))
+            #
+            # the x86 backend uses the following "accidental" facts to
+            # avoid one instruction:
+            assert self.jit_wb_cards_set_byteofs == self.jit_wb_if_flag_byteofs
+            assert self.jit_wb_cards_set_singlebyte == -0x80
         else:
             self.jit_wb_cards_set = 0
 
@@ -615,7 +619,7 @@
         # returns a function with arguments [array, index, newvalue]
         llop1 = self.llop1
         funcptr = llop1.get_write_barrier_from_array_failing_case(
-            self.WB_ARRAY_FUNCPTR)
+            self.WB_FUNCPTR)
         funcaddr = llmemory.cast_ptr_to_adr(funcptr)
         return cpu.cast_adr_to_int(funcaddr)    # this may return 0
 
@@ -699,9 +703,7 @@
 
     def _setup_write_barrier(self):
         self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
-            [llmemory.Address, llmemory.Address], lltype.Void))
-        self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
-            [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void))
+            [llmemory.Address], lltype.Void))
         self.write_barrier_descr = WriteBarrierDescr(self)
 
     def _make_functions(self, really_not_translated):
@@ -859,8 +861,7 @@
             # the GC, and call it immediately
             llop1 = self.llop1
             funcptr = llop1.get_write_barrier_failing_case(self.WB_FUNCPTR)
-            funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
-                    llmemory.cast_ptr_to_adr(gcref_newptr))
+            funcptr(llmemory.cast_ptr_to_adr(gcref_struct))
 
     def can_use_nursery_malloc(self, size):
         return size < self.max_size_of_young_obj
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1835,12 +1835,12 @@
         assert not excvalue
 
     def test_cond_call_gc_wb(self):
-        def func_void(a, b):
-            record.append((a, b))
+        def func_void(a):
+            record.append(a)
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
@@ -1866,26 +1866,25 @@
                                    [BoxPtr(sgcref), ConstPtr(tgcref)],
                                    'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, t)]
+                assert record == [s]
             else:
                 assert record == []
 
     def test_cond_call_gc_wb_array(self):
-        def func_void(a, b, c):
-            record.append((a, b, c))
+        def func_void(a):
+            record.append(a)
         record = []
         #
         S = lltype.GcStruct('S', ('tid', lltype.Signed))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
-                             lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
             jit_wb_if_flag = 4096
             jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
-            jit_wb_cards_set = 0
-            def get_write_barrier_from_array_fn(self, cpu):
+            jit_wb_cards_set = 0       # <= without card marking
+            def get_write_barrier_fn(self, cpu):
                 return funcbox.getint()
         #
         for cond in [False, True]:
@@ -1902,13 +1901,15 @@
                        [BoxPtr(sgcref), ConstInt(123), BoxPtr(sgcref)],
                        'void', descr=WriteBarrierDescr())
             if cond:
-                assert record == [(s, 123, s)]
+                assert record == [s]
             else:
                 assert record == []
 
     def test_cond_call_gc_wb_array_card_marking_fast_path(self):
-        def func_void(a, b, c):
-            record.append((a, b, c))
+        def func_void(a):
+            record.append(a)
+            if cond == 1:      # the write barrier sets the flag
+                s.data.tid |= 32768
         record = []
         #
         S = lltype.Struct('S', ('tid', lltype.Signed))
@@ -1922,34 +1923,40 @@
                                      ('card6', lltype.Char),
                                      ('card7', lltype.Char),
                                      ('data',  S))
-        FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
-                             lltype.Void)
+        FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
         func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
             jit_wb_if_flag = 4096
             jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
-            jit_wb_cards_set = 8192
-            jit_wb_cards_set_byteofs = struct.pack("i", 8192).index('\x20')
-            jit_wb_cards_set_singlebyte = 0x20
+            jit_wb_cards_set = 32768
+            jit_wb_cards_set_byteofs = struct.pack("i", 32768).index('\x80')
+            jit_wb_cards_set_singlebyte = -0x80
             jit_wb_card_page_shift = 7
             def get_write_barrier_from_array_fn(self, cpu):
                 return funcbox.getint()
         #
-        for BoxIndexCls in [BoxInt, ConstInt]:
-            for cond in [False, True]:
+        for BoxIndexCls in [BoxInt, ConstInt]*3:
+            for cond in [-1, 0, 1, 2]:
+                # cond=-1:GCFLAG_TRACK_YOUNG_PTRS, GCFLAG_CARDS_SET are not set
+                # cond=0: GCFLAG_CARDS_SET is never set
+                # cond=1: GCFLAG_CARDS_SET is not set, but the wb sets it
+                # cond=2: GCFLAG_CARDS_SET is already set
                 print
                 print '_'*79
                 print 'BoxIndexCls =', BoxIndexCls
-                print 'JIT_WB_CARDS_SET =', cond
+                print 'testing cond =', cond
                 print
                 value = random.randrange(-sys.maxint, sys.maxint)
-                value |= 4096
-                if cond:
-                    value |= 8192
+                if cond >= 0:
+                    value |= 4096
                 else:
-                    value &= ~8192
+                    value &= ~4096
+                if cond == 2:
+                    value |= 32768
+                else:
+                    value &= ~32768
                 s = lltype.malloc(S_WITH_CARDS, immortal=True, zero=True)
                 s.data.tid = value
                 sgcref = rffi.cast(llmemory.GCREF, s.data)
@@ -1958,11 +1965,13 @@
                 self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
                            [BoxPtr(sgcref), box_index, BoxPtr(sgcref)],
                            'void', descr=WriteBarrierDescr())
-                if cond:
+                if cond in [0, 1]:
+                    assert record == [s.data]
+                else:
                     assert record == []
+                if cond in [1, 2]:
                     assert s.card6 == '\x02'
                 else:
-                    assert record == [(s.data, (9<<7) + 17, s.data)]
                     assert s.card6 == '\x00'
                 assert s.card0 == '\x00'
                 assert s.card1 == '\x00'
@@ -1971,6 +1980,9 @@
                 assert s.card4 == '\x00'
                 assert s.card5 == '\x00'
                 assert s.card7 == '\x00'
+                if cond == 1:
+                    value |= 32768
+                assert s.data.tid == value
 
     def test_force_operations_returning_void(self):
         values = []
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -10,7 +10,7 @@
 from pypy.rlib.jit import AsmInfo
 from pypy.jit.backend.model import CompiledLoopToken
 from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
-    gpr_reg_mgr_cls, _valid_addressing_size)
+    gpr_reg_mgr_cls, xmm_reg_mgr_cls, _valid_addressing_size)
 
 from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
                                        IS_X86_32, IS_X86_64)
@@ -83,6 +83,7 @@
         self.float_const_abs_addr = 0
         self.malloc_slowpath1 = 0
         self.malloc_slowpath2 = 0
+        self.wb_slowpath = [0, 0, 0, 0]
         self.memcpy_addr = 0
         self.setup_failure_recovery()
         self._debug = False
@@ -109,9 +110,13 @@
         self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
         self._build_failure_recovery(False)
         self._build_failure_recovery(True)
+        self._build_wb_slowpath(False)
+        self._build_wb_slowpath(True)
         if self.cpu.supports_floats:
             self._build_failure_recovery(False, withfloats=True)
             self._build_failure_recovery(True, withfloats=True)
+            self._build_wb_slowpath(False, withfloats=True)
+            self._build_wb_slowpath(True, withfloats=True)
             support.ensure_sse2_floats()
             self._build_float_constants()
         self._build_propagate_exception_path()
@@ -344,6 +349,82 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
+    def _build_wb_slowpath(self, withcards, withfloats=False):
+        descr = self.cpu.gc_ll_descr.write_barrier_descr
+        if descr is None:
+            return
+        if not withcards:
+            func = descr.get_write_barrier_fn(self.cpu)
+        else:
+            if descr.jit_wb_cards_set == 0:
+                return
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            if func == 0:
+                return
+        #
+        # This builds a helper function called from the slow path of
+        # write barriers.  It must save all registers, and optionally
+        # all XMM registers.  It takes a single argument just pushed
+        # on the stack even on X86_64.  It must restore stack alignment
+        # accordingly.
+        mc = codebuf.MachineCodeBlockWrapper()
+        #
+        frame_size = (1 +     # my argument, considered part of my frame
+                      1 +     # my return address
+                      len(gpr_reg_mgr_cls.save_around_call_regs))
+        if withfloats:
+            frame_size += 16     # X86_32: 16 words for 8 registers;
+                                 # X86_64: just 16 registers
+        if IS_X86_32:
+            frame_size += 1      # argument to pass to the call
+        #
+        # align to a multiple of 16 bytes
+        frame_size = (frame_size + (CALL_ALIGN-1)) & ~(CALL_ALIGN-1)
+        #
+        correct_esp_by = (frame_size - 2) * WORD
+        mc.SUB_ri(esp.value, correct_esp_by)
+        #
+        ofs = correct_esp_by
+        if withfloats:
+            for reg in xmm_reg_mgr_cls.save_around_call_regs:
+                ofs -= 8
+                mc.MOVSD_sx(ofs, reg.value)
+        for reg in gpr_reg_mgr_cls.save_around_call_regs:
+            ofs -= WORD
+            mc.MOV_sr(ofs, reg.value)
+        #
+        if IS_X86_32:
+            mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
+            mc.MOV_sr(0, eax.value)
+        elif IS_X86_64:
+            mc.MOV_rs(edi.value, (frame_size - 1) * WORD)
+        mc.CALL(imm(func))
+        #
+        if withcards:
+            # A final TEST8 before the RET, for the caller.  Careful to
+            # not follow this instruction with another one that changes
+            # the status of the CPU flags!
+            mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
+            mc.TEST8(addr_add_const(eax, descr.jit_wb_if_flag_byteofs),
+                     imm(-0x80))
+        #
+        ofs = correct_esp_by
+        if withfloats:
+            for reg in xmm_reg_mgr_cls.save_around_call_regs:
+                ofs -= 8
+                mc.MOVSD_xs(reg.value, ofs)
+        for reg in gpr_reg_mgr_cls.save_around_call_regs:
+            ofs -= WORD
+            mc.MOV_rs(reg.value, ofs)
+        #
+        # ADD esp, correct_esp_by --- but cannot use ADD, because
+        # of its effects on the CPU flags
+        mc.LEA_rs(esp.value, correct_esp_by)
+        mc.RET16_i(WORD)
+        #
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
     @staticmethod
     @rgc.no_collect
     def _release_gil_asmgcc(css):
@@ -2324,102 +2405,83 @@
 
     def genop_discard_cond_call_gc_wb(self, op, arglocs):
         # Write code equivalent to write_barrier() in the GC: it checks
-        # a flag in the object at arglocs[0], and if set, it calls the
-        # function remember_young_pointer() from the GC.  The arguments
-        # to the call are in arglocs[:N].  The rest, arglocs[N:], contains
-        # registers that need to be saved and restored across the call.
-        # N is either 2 (regular write barrier) or 3 (array write barrier).
+        # a flag in the object at arglocs[0], and if set, it calls a
+        # helper piece of assembler.  The latter saves registers as needed
+        # and call the function jit_remember_young_pointer() from the GC.
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
         #
         opnum = op.getopnum()
-        if opnum == rop.COND_CALL_GC_WB:
-            N = 2
-            func = descr.get_write_barrier_fn(self.cpu)
-            card_marking = False
-        elif opnum == rop.COND_CALL_GC_WB_ARRAY:
-            N = 3
-            func = descr.get_write_barrier_from_array_fn(self.cpu)
-            assert func != 0
-            card_marking = descr.jit_wb_cards_set != 0
-        else:
-            raise AssertionError(opnum)
+        card_marking = False
+        mask = descr.jit_wb_if_flag_singlebyte
+        if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+            # assumptions the rest of the function depends on:
+            assert (descr.jit_wb_cards_set_byteofs ==
+                    descr.jit_wb_if_flag_byteofs)
+            assert descr.jit_wb_cards_set_singlebyte == -0x80
+            card_marking = True
+            mask = descr.jit_wb_if_flag_singlebyte | -0x80
         #
         loc_base = arglocs[0]
         self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
-                      imm(descr.jit_wb_if_flag_singlebyte))
+                      imm(mask))
         self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
         jz_location = self.mc.get_relative_pos()
 
         # for cond_call_gc_wb_array, also add another fast path:
         # if GCFLAG_CARDS_SET, then we can just set one bit and be done
         if card_marking:
-            self.mc.TEST8(addr_add_const(loc_base,
-                                         descr.jit_wb_cards_set_byteofs),
-                          imm(descr.jit_wb_cards_set_singlebyte))
-            self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
-            jnz_location = self.mc.get_relative_pos()
+            # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
+            # been checked by the status flags of the previous TEST8
+            self.mc.J_il8(rx86.Conditions['S'], 0) # patched later
+            js_location = self.mc.get_relative_pos()
         else:
-            jnz_location = 0
+            js_location = 0
 
-        # the following is supposed to be the slow path, so whenever possible
-        # we choose the most compact encoding over the most efficient one.
-        if IS_X86_32:
-            limit = -1      # push all arglocs on the stack
-        elif IS_X86_64:
-            limit = N - 1   # push only arglocs[N:] on the stack
-        for i in range(len(arglocs)-1, limit, -1):
-            loc = arglocs[i]
-            if isinstance(loc, RegLoc):
-                self.mc.PUSH_r(loc.value)
-            else:
-                assert not IS_X86_64 # there should only be regs in arglocs[N:]
-                self.mc.PUSH_i32(loc.getint())
-        if IS_X86_64:
-            # We clobber these registers to pass the arguments, but that's
-            # okay, because consider_cond_call_gc_wb makes sure that any
-            # caller-save registers with values in them are present in
-            # arglocs[N:] too, so they are saved on the stack above and
-            # restored below.
-            if N == 2:
-                callargs = [edi, esi]
-            else:
-                callargs = [edi, esi, edx]
-            remap_frame_layout(self, arglocs[:N], callargs,
-                               X86_64_SCRATCH_REG)
+        # Write only a CALL to the helper prepared in advance, passing it as
+        # argument the address of the structure we are writing into
+        # (the first argument to COND_CALL_GC_WB).
+        helper_num = card_marking
+        if self._regalloc.xrm.reg_bindings:
+            helper_num += 2
+        if self.wb_slowpath[helper_num] == 0:    # tests only
+            assert not we_are_translated()
+            self.cpu.gc_ll_descr.write_barrier_descr = descr
+            self._build_wb_slowpath(card_marking,
+                                    bool(self._regalloc.xrm.reg_bindings))
+            assert self.wb_slowpath[helper_num] != 0
         #
-        # misaligned stack in the call, but it's ok because the write barrier
-        # is not going to call anything more.  Also, this assumes that the
-        # write barrier does not touch the xmm registers.  (Slightly delicate
-        # assumption, given that the write barrier can end up calling the
-        # platform's malloc() from AddressStack.append().  XXX may need to
-        # be done properly)
-        self.mc.CALL(imm(func))
-        if IS_X86_32:
-            self.mc.ADD_ri(esp.value, N*WORD)
-        for i in range(N, len(arglocs)):
-            loc = arglocs[i]
-            assert isinstance(loc, RegLoc)
-            self.mc.POP_r(loc.value)
+        self.mc.PUSH(loc_base)
+        self.mc.CALL(imm(self.wb_slowpath[helper_num]))
 
-        # if GCFLAG_CARDS_SET, then we can do the whole thing that would
-        # be done in the CALL above with just four instructions, so here
-        # is an inline copy of them
         if card_marking:
-            self.mc.JMP_l8(0) # jump to the exit, patched later
-            jmp_location = self.mc.get_relative_pos()
-            # patch the JNZ above
-            offset = self.mc.get_relative_pos() - jnz_location
+            # The helper ends again with a check of the flag in the object.
+            # So here, we can simply write again a 'JNS', which will be
+            # taken if GCFLAG_CARDS_SET is still not set.
+            self.mc.J_il8(rx86.Conditions['NS'], 0) # patched later
+            jns_location = self.mc.get_relative_pos()
+            #
+            # patch the JS above
+            offset = self.mc.get_relative_pos() - js_location
             assert 0 < offset <= 127
-            self.mc.overwrite(jnz_location-1, chr(offset))
+            self.mc.overwrite(js_location-1, chr(offset))
             #
+            # case GCFLAG_CARDS_SET: emit a few instructions to do
+            # directly the card flag setting
             loc_index = arglocs[1]
             if isinstance(loc_index, RegLoc):
-                # choose a scratch register
-                tmp1 = loc_index
-                self.mc.PUSH_r(tmp1.value)
+                if IS_X86_64 and isinstance(loc_base, RegLoc):
+                    # copy loc_index into r11
+                    tmp1 = X86_64_SCRATCH_REG
+                    self.mc.MOV_rr(tmp1.value, loc_index.value)
+                    final_pop = False
+                else:
+                    # must save the register loc_index before it is mutated
+                    self.mc.PUSH_r(loc_index.value)
+                    tmp1 = loc_index
+                    final_pop = True
                 # SHR tmp, card_page_shift
                 self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
                 # XOR tmp, -8
@@ -2427,7 +2489,9 @@
                 # BTS [loc_base], tmp
                 self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
                 # done
-                self.mc.POP_r(tmp1.value)
+                if final_pop:
+                    self.mc.POP_r(loc_index.value)
+                #
             elif isinstance(loc_index, ImmedLoc):
                 byte_index = loc_index.value >> descr.jit_wb_card_page_shift
                 byte_ofs = ~(byte_index >> 3)
@@ -2435,11 +2499,12 @@
                 self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
             else:
                 raise AssertionError("index is neither RegLoc nor ImmedLoc")
-            # patch the JMP above
-            offset = self.mc.get_relative_pos() - jmp_location
+            #
+            # patch the JNS above
+            offset = self.mc.get_relative_pos() - jns_location
             assert 0 < offset <= 127
-            self.mc.overwrite(jmp_location-1, chr(offset))
-        #
+            self.mc.overwrite(jns_location-1, chr(offset))
+
         # patch the JZ above
         offset = self.mc.get_relative_pos() - jz_location
         assert 0 < offset <= 127
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -980,16 +980,6 @@
         # or setarrayitem_gc. It avoids loading it twice from the memory.
         arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
                    for i in range(N)]
-        # add eax, ecx and edx as extra "arguments" to ensure they are
-        # saved and restored.  Fish in self.rm to know which of these
-        # registers really need to be saved (a bit of a hack).  Moreover,
-        # we don't save and restore any SSE register because the called
-        # function, a GC write barrier, is known not to touch them.
-        # See remember_young_pointer() in rpython/memory/gc/generation.py.
-        for v, reg in self.rm.reg_bindings.items():
-            if (reg in self.rm.save_around_call_regs
-                and self.rm.stays_alive(v)):
-                arglocs.append(reg)
         self.PerformDiscard(op, arglocs)
         self.rm.possibly_free_vars_for_op(op)
 
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -316,6 +316,13 @@
         assert rexbyte == 0
     return 0
 
+# REX prefixes: 'rex_w' generates a REX_W, forcing the instruction
+# to operate on 64-bit.  'rex_nw' doesn't, so the instruction operates
+# on 32-bit or less; the complete REX prefix is omitted if unnecessary.
+# 'rex_fw' is a special case which doesn't generate a REX_W but forces
+# the REX prefix in all cases.  It is only useful on instructions which
+# have an 8-bit register argument, to force access to the "sil" or "dil"
+# registers (as opposed to "ah-dh").
 rex_w  = encode_rex, 0, (0x40 | REX_W), None      # a REX.W prefix
 rex_nw = encode_rex, 0, 0, None                   # an optional REX prefix
 rex_fw = encode_rex, 0, 0x40, None                # a forced REX prefix
@@ -496,9 +503,9 @@
     AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0')
 
     OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0')
-    OR8_mi = insn(rex_fw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
+    OR8_mi = insn(rex_nw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
                   immediate(2, 'b'))
-    OR8_ji = insn(rex_fw, '\x80', orbyte(1<<3), abs_, immediate(1),
+    OR8_ji = insn(rex_nw, '\x80', orbyte(1<<3), abs_, immediate(1),
                   immediate(2, 'b'))
 
     NEG_r = insn(rex_w, '\xF7', register(1), '\xD8')
@@ -531,7 +538,13 @@
 
     PUSH_r = insn(rex_nw, register(1), '\x50')
     PUSH_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
+    PUSH_i8 = insn('\x6A', immediate(1, 'b'))
     PUSH_i32 = insn('\x68', immediate(1, 'i'))
+    def PUSH_i(mc, immed):
+        if single_byte(immed):
+            mc.PUSH_i8(immed)
+        else:
+            mc.PUSH_i32(immed)
 
     POP_r = insn(rex_nw, register(1), '\x58')
     POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
diff --git a/pypy/jit/backend/x86/test/test_rx86.py b/pypy/jit/backend/x86/test/test_rx86.py
--- a/pypy/jit/backend/x86/test/test_rx86.py
+++ b/pypy/jit/backend/x86/test/test_rx86.py
@@ -183,7 +183,8 @@
 
 def test_push32():
     cb = CodeBuilder32
-    assert_encodes_as(cb, 'PUSH_i32', (9,), '\x68\x09\x00\x00\x00')
+    assert_encodes_as(cb, 'PUSH_i', (0x10009,), '\x68\x09\x00\x01\x00')
+    assert_encodes_as(cb, 'PUSH_i', (9,), '\x6A\x09')
 
 def test_sub_ji8():
     cb = CodeBuilder32
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -111,10 +111,13 @@
 # The following flag is set on externally raw_malloc'ed arrays of pointers.
 # They are allocated with some extra space in front of them for a bitfield,
 # one bit per 'card_page_indices' indices.
-GCFLAG_HAS_CARDS    = first_gcflag << 5
-GCFLAG_CARDS_SET    = first_gcflag << 6     # <- at least one card bit is set
+GCFLAG_HAS_CARDS    = first_gcflag << 6
+GCFLAG_CARDS_SET    = first_gcflag << 7     # <- at least one card bit is set
+# note that GCFLAG_CARDS_SET is the most significant bit of a byte:
+# this is required for the JIT (x86)
 
-TID_MASK            = (first_gcflag << 7) - 1
+#GCFLAG_UNUSED      = first_gcflag << 5     # this flag is free
+TID_MASK            = (first_gcflag << 8) - 1
 
 
 FORWARDSTUB = lltype.GcStruct('forwarding_stub',
@@ -994,12 +997,9 @@
     def _init_writebarrier_logic(self):
         DEBUG = self.DEBUG
         # The purpose of attaching remember_young_pointer to the instance
-        # instead of keeping it as a regular method is to help the JIT call it.
-        # Additionally, it makes the code in write_barrier() marginally smaller
+        # instead of keeping it as a regular method is to
+        # make the code in write_barrier() marginally smaller
         # (which is important because it is inlined *everywhere*).
-        # For x86, there is also an extra requirement: when the JIT calls
-        # remember_young_pointer(), it assumes that it will not touch the SSE
-        # registers, so it does not save and restore them (that's a *hack*!).
         def remember_young_pointer(addr_struct, newvalue):
             # 'addr_struct' is the address of the object in which we write.
             # 'newvalue' is the address that we are going to write in there.
@@ -1033,6 +1033,17 @@
         remember_young_pointer._dont_inline_ = True
         self.remember_young_pointer = remember_young_pointer
         #
+        def jit_remember_young_pointer(addr_struct):
+            # minimal version of the above, with just one argument,
+            # called by the JIT when GCFLAG_TRACK_YOUNG_PTRS is set
+            self.old_objects_pointing_to_young.append(addr_struct)
+            objhdr = self.header(addr_struct)
+            objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
+            if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
+                objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
+                self.prebuilt_root_objects.append(addr_struct)
+        self.jit_remember_young_pointer = jit_remember_young_pointer
+        #
         if self.card_page_indices > 0:
             self._init_writebarrier_with_card_marker()
 
@@ -1087,60 +1098,21 @@
         self.remember_young_pointer_from_array2 = (
             remember_young_pointer_from_array2)
 
-        # xxx trying it out for the JIT: a 3-arguments version of the above
-        def remember_young_pointer_from_array3(addr_array, index, newvalue):
+        def jit_remember_young_pointer_from_array(addr_array):
+            # minimal version of the above, with just one argument,
+            # called by the JIT when GCFLAG_TRACK_YOUNG_PTRS is set
+            # but GCFLAG_CARDS_SET is cleared.  This tries to set
+            # GCFLAG_CARDS_SET if possible; otherwise, it falls back
+            # to jit_remember_young_pointer().
             objhdr = self.header(addr_array)
-            #
-            # a single check for the common case of neither GCFLAG_HAS_CARDS
-            # nor GCFLAG_NO_HEAP_PTRS
-            if objhdr.tid & (GCFLAG_HAS_CARDS | GCFLAG_NO_HEAP_PTRS) == 0:
-                # common case: fast path, jump to the end of the function
-                pass
-            elif objhdr.tid & GCFLAG_HAS_CARDS == 0:
-                # no cards, but GCFLAG_NO_HEAP_PTRS is set.
-                objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
-                self.prebuilt_root_objects.append(addr_array)
-                # jump to the end of the function
+            if objhdr.tid & GCFLAG_HAS_CARDS:
+                self.old_objects_with_cards_set.append(addr_array)
+                objhdr.tid |= GCFLAG_CARDS_SET
             else:
-                # case with cards.
-                #
-                # If the newly written address does not actually point to a
-                # young object, leave now.
-                if not self.appears_to_be_young(newvalue):
-                    return
-                #
-                # 'addr_array' is a raw_malloc'ed array with card markers
-                # in front.  Compute the index of the bit to set:
-                bitindex = index >> self.card_page_shift
-                byteindex = bitindex >> 3
-                bitmask = 1 << (bitindex & 7)
-                #
-                # If the bit is already set, leave now.
-                addr_byte = self.get_card(addr_array, byteindex)
-                byte = ord(addr_byte.char[0])
-                if byte & bitmask:
-                    return
-                addr_byte.char[0] = chr(byte | bitmask)
-                #
-                if objhdr.tid & GCFLAG_CARDS_SET == 0:
-                    self.old_objects_with_cards_set.append(addr_array)
-                    objhdr.tid |= GCFLAG_CARDS_SET
-                return
-            #
-            # Logic for the no-cards case, put here to minimize the number
-            # of checks done at the start of the function
-            if DEBUG:   # note: PYPY_GC_DEBUG=1 does not enable this
-                ll_assert(self.debug_is_old_object(addr_array),
-                        "young array with no card but GCFLAG_TRACK_YOUNG_PTRS")
-            #
-            if self.appears_to_be_young(newvalue):
-                self.old_objects_pointing_to_young.append(addr_array)
-                objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
+                self.jit_remember_young_pointer(addr_array)
 
-        remember_young_pointer_from_array3._dont_inline_ = True
-        assert self.card_page_indices > 0
-        self.remember_young_pointer_from_array3 = (
-            remember_young_pointer_from_array3)
+        self.jit_remember_young_pointer_from_array = (
+            jit_remember_young_pointer_from_array)
 
     def get_card(self, obj, byteindex):
         size_gc_header = self.gcheaderbuilder.size_gc_header
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -455,13 +455,12 @@
                                             annmodel.SomeAddress()],
                                            annmodel.s_None,
                                            inline=True)
-            func = getattr(gcdata.gc, 'remember_young_pointer', None)
+            func = getattr(gcdata.gc, 'jit_remember_young_pointer', None)
             if func is not None:
                 # func should not be a bound method, but a real function
                 assert isinstance(func, types.FunctionType)
                 self.write_barrier_failing_case_ptr = getfn(func,
-                                               [annmodel.SomeAddress(),
-                                                annmodel.SomeAddress()],
+                                               [annmodel.SomeAddress()],
                                                annmodel.s_None)
             func = getattr(GCClass, 'write_barrier_from_array', None)
             if func is not None:
@@ -472,16 +471,15 @@
                                             annmodel.SomeInteger()],
                                            annmodel.s_None,
                                            inline=True)
-                func = getattr(gcdata.gc, 'remember_young_pointer_from_array3',
+                func = getattr(gcdata.gc,
+                               'jit_remember_young_pointer_from_array',
                                None)
                 if func is not None:
                     # func should not be a bound method, but a real function
                     assert isinstance(func, types.FunctionType)
                     self.write_barrier_from_array_failing_case_ptr = \
                                              getfn(func,
-                                                   [annmodel.SomeAddress(),
-                                                    annmodel.SomeInteger(),
-                                                    annmodel.SomeAddress()],
+                                                   [annmodel.SomeAddress()],
                                                    annmodel.s_None)
         self.statistics_ptr = getfn(GCClass.statistics.im_func,
                                     [s_gc, annmodel.SomeInteger()],


More information about the pypy-commit mailing list