[pypy-commit] pypy default: hg merge
arigo
noreply at buildbot.pypy.org
Sun Jun 3 12:19:03 CEST 2012
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r55281:b513e71c997d
Date: 2012-06-03 12:18 +0200
http://bitbucket.org/pypy/pypy/changeset/b513e71c997d/
Log: hg merge
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -577,7 +577,6 @@
def __init__(self, gc_ll_descr):
self.llop1 = gc_ll_descr.llop1
self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
- self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
self.fielddescr_tid = gc_ll_descr.fielddescr_tid
#
GCClass = gc_ll_descr.GCClass
@@ -592,6 +591,11 @@
self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT
self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = (
self.extract_flag_byte(self.jit_wb_cards_set))
+ #
+ # the x86 backend uses the following "accidental" facts to
+ # avoid one instruction:
+ assert self.jit_wb_cards_set_byteofs == self.jit_wb_if_flag_byteofs
+ assert self.jit_wb_cards_set_singlebyte == -0x80
else:
self.jit_wb_cards_set = 0
@@ -615,7 +619,7 @@
# returns a function with arguments [array, index, newvalue]
llop1 = self.llop1
funcptr = llop1.get_write_barrier_from_array_failing_case(
- self.WB_ARRAY_FUNCPTR)
+ self.WB_FUNCPTR)
funcaddr = llmemory.cast_ptr_to_adr(funcptr)
return cpu.cast_adr_to_int(funcaddr) # this may return 0
@@ -699,9 +703,7 @@
def _setup_write_barrier(self):
self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
- [llmemory.Address, llmemory.Address], lltype.Void))
- self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
- [llmemory.Address, lltype.Signed, llmemory.Address], lltype.Void))
+ [llmemory.Address], lltype.Void))
self.write_barrier_descr = WriteBarrierDescr(self)
def _make_functions(self, really_not_translated):
@@ -859,8 +861,7 @@
# the GC, and call it immediately
llop1 = self.llop1
funcptr = llop1.get_write_barrier_failing_case(self.WB_FUNCPTR)
- funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
- llmemory.cast_ptr_to_adr(gcref_newptr))
+ funcptr(llmemory.cast_ptr_to_adr(gcref_struct))
def can_use_nursery_malloc(self, size):
return size < self.max_size_of_young_obj
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1835,12 +1835,12 @@
assert not excvalue
def test_cond_call_gc_wb(self):
- def func_void(a, b):
- record.append((a, b))
+ def func_void(a):
+ record.append(a)
record = []
#
S = lltype.GcStruct('S', ('tid', lltype.Signed))
- FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
+ FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
funcbox = self.get_funcbox(self.cpu, func_ptr)
class WriteBarrierDescr(AbstractDescr):
@@ -1866,26 +1866,25 @@
[BoxPtr(sgcref), ConstPtr(tgcref)],
'void', descr=WriteBarrierDescr())
if cond:
- assert record == [(s, t)]
+ assert record == [s]
else:
assert record == []
def test_cond_call_gc_wb_array(self):
- def func_void(a, b, c):
- record.append((a, b, c))
+ def func_void(a):
+ record.append(a)
record = []
#
S = lltype.GcStruct('S', ('tid', lltype.Signed))
- FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
- lltype.Void)
+ FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
funcbox = self.get_funcbox(self.cpu, func_ptr)
class WriteBarrierDescr(AbstractDescr):
jit_wb_if_flag = 4096
jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
jit_wb_if_flag_singlebyte = 0x10
- jit_wb_cards_set = 0
- def get_write_barrier_from_array_fn(self, cpu):
+ jit_wb_cards_set = 0 # <= without card marking
+ def get_write_barrier_fn(self, cpu):
return funcbox.getint()
#
for cond in [False, True]:
@@ -1902,13 +1901,15 @@
[BoxPtr(sgcref), ConstInt(123), BoxPtr(sgcref)],
'void', descr=WriteBarrierDescr())
if cond:
- assert record == [(s, 123, s)]
+ assert record == [s]
else:
assert record == []
def test_cond_call_gc_wb_array_card_marking_fast_path(self):
- def func_void(a, b, c):
- record.append((a, b, c))
+ def func_void(a):
+ record.append(a)
+ if cond == 1: # the write barrier sets the flag
+ s.data.tid |= 32768
record = []
#
S = lltype.Struct('S', ('tid', lltype.Signed))
@@ -1922,34 +1923,40 @@
('card6', lltype.Char),
('card7', lltype.Char),
('data', S))
- FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)],
- lltype.Void)
+ FUNC = self.FuncType([lltype.Ptr(S)], lltype.Void)
func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
funcbox = self.get_funcbox(self.cpu, func_ptr)
class WriteBarrierDescr(AbstractDescr):
jit_wb_if_flag = 4096
jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
jit_wb_if_flag_singlebyte = 0x10
- jit_wb_cards_set = 8192
- jit_wb_cards_set_byteofs = struct.pack("i", 8192).index('\x20')
- jit_wb_cards_set_singlebyte = 0x20
+ jit_wb_cards_set = 32768
+ jit_wb_cards_set_byteofs = struct.pack("i", 32768).index('\x80')
+ jit_wb_cards_set_singlebyte = -0x80
jit_wb_card_page_shift = 7
def get_write_barrier_from_array_fn(self, cpu):
return funcbox.getint()
#
- for BoxIndexCls in [BoxInt, ConstInt]:
- for cond in [False, True]:
+ for BoxIndexCls in [BoxInt, ConstInt]*3:
+ for cond in [-1, 0, 1, 2]:
+ # cond=-1:GCFLAG_TRACK_YOUNG_PTRS, GCFLAG_CARDS_SET are not set
+ # cond=0: GCFLAG_CARDS_SET is never set
+ # cond=1: GCFLAG_CARDS_SET is not set, but the wb sets it
+ # cond=2: GCFLAG_CARDS_SET is already set
print
print '_'*79
print 'BoxIndexCls =', BoxIndexCls
- print 'JIT_WB_CARDS_SET =', cond
+ print 'testing cond =', cond
print
value = random.randrange(-sys.maxint, sys.maxint)
- value |= 4096
- if cond:
- value |= 8192
+ if cond >= 0:
+ value |= 4096
else:
- value &= ~8192
+ value &= ~4096
+ if cond == 2:
+ value |= 32768
+ else:
+ value &= ~32768
s = lltype.malloc(S_WITH_CARDS, immortal=True, zero=True)
s.data.tid = value
sgcref = rffi.cast(llmemory.GCREF, s.data)
@@ -1958,11 +1965,13 @@
self.execute_operation(rop.COND_CALL_GC_WB_ARRAY,
[BoxPtr(sgcref), box_index, BoxPtr(sgcref)],
'void', descr=WriteBarrierDescr())
- if cond:
+ if cond in [0, 1]:
+ assert record == [s.data]
+ else:
assert record == []
+ if cond in [1, 2]:
assert s.card6 == '\x02'
else:
- assert record == [(s.data, (9<<7) + 17, s.data)]
assert s.card6 == '\x00'
assert s.card0 == '\x00'
assert s.card1 == '\x00'
@@ -1971,6 +1980,9 @@
assert s.card4 == '\x00'
assert s.card5 == '\x00'
assert s.card7 == '\x00'
+ if cond == 1:
+ value |= 32768
+ assert s.data.tid == value
def test_force_operations_returning_void(self):
values = []
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -10,7 +10,7 @@
from pypy.rlib.jit import AsmInfo
from pypy.jit.backend.model import CompiledLoopToken
from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
- gpr_reg_mgr_cls, _valid_addressing_size)
+ gpr_reg_mgr_cls, xmm_reg_mgr_cls, _valid_addressing_size)
from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
IS_X86_32, IS_X86_64)
@@ -83,6 +83,7 @@
self.float_const_abs_addr = 0
self.malloc_slowpath1 = 0
self.malloc_slowpath2 = 0
+ self.wb_slowpath = [0, 0, 0, 0]
self.memcpy_addr = 0
self.setup_failure_recovery()
self._debug = False
@@ -109,9 +110,13 @@
self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
self._build_failure_recovery(False)
self._build_failure_recovery(True)
+ self._build_wb_slowpath(False)
+ self._build_wb_slowpath(True)
if self.cpu.supports_floats:
self._build_failure_recovery(False, withfloats=True)
self._build_failure_recovery(True, withfloats=True)
+ self._build_wb_slowpath(False, withfloats=True)
+ self._build_wb_slowpath(True, withfloats=True)
support.ensure_sse2_floats()
self._build_float_constants()
self._build_propagate_exception_path()
@@ -344,6 +349,82 @@
rawstart = mc.materialize(self.cpu.asmmemmgr, [])
self.stack_check_slowpath = rawstart
+ def _build_wb_slowpath(self, withcards, withfloats=False):
+ descr = self.cpu.gc_ll_descr.write_barrier_descr
+ if descr is None:
+ return
+ if not withcards:
+ func = descr.get_write_barrier_fn(self.cpu)
+ else:
+ if descr.jit_wb_cards_set == 0:
+ return
+ func = descr.get_write_barrier_from_array_fn(self.cpu)
+ if func == 0:
+ return
+ #
+ # This builds a helper function called from the slow path of
+ # write barriers. It must save all registers, and optionally
+ # all XMM registers. It takes a single argument just pushed
+ # on the stack even on X86_64. It must restore stack alignment
+ # accordingly.
+ mc = codebuf.MachineCodeBlockWrapper()
+ #
+ frame_size = (1 + # my argument, considered part of my frame
+ 1 + # my return address
+ len(gpr_reg_mgr_cls.save_around_call_regs))
+ if withfloats:
+ frame_size += 16 # X86_32: 16 words for 8 registers;
+ # X86_64: just 16 registers
+ if IS_X86_32:
+ frame_size += 1 # argument to pass to the call
+ #
+ # align to a multiple of 16 bytes
+ frame_size = (frame_size + (CALL_ALIGN-1)) & ~(CALL_ALIGN-1)
+ #
+ correct_esp_by = (frame_size - 2) * WORD
+ mc.SUB_ri(esp.value, correct_esp_by)
+ #
+ ofs = correct_esp_by
+ if withfloats:
+ for reg in xmm_reg_mgr_cls.save_around_call_regs:
+ ofs -= 8
+ mc.MOVSD_sx(ofs, reg.value)
+ for reg in gpr_reg_mgr_cls.save_around_call_regs:
+ ofs -= WORD
+ mc.MOV_sr(ofs, reg.value)
+ #
+ if IS_X86_32:
+ mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
+ mc.MOV_sr(0, eax.value)
+ elif IS_X86_64:
+ mc.MOV_rs(edi.value, (frame_size - 1) * WORD)
+ mc.CALL(imm(func))
+ #
+ if withcards:
+ # A final TEST8 before the RET, for the caller. Careful to
+ # not follow this instruction with another one that changes
+ # the status of the CPU flags!
+ mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
+ mc.TEST8(addr_add_const(eax, descr.jit_wb_if_flag_byteofs),
+ imm(-0x80))
+ #
+ ofs = correct_esp_by
+ if withfloats:
+ for reg in xmm_reg_mgr_cls.save_around_call_regs:
+ ofs -= 8
+ mc.MOVSD_xs(reg.value, ofs)
+ for reg in gpr_reg_mgr_cls.save_around_call_regs:
+ ofs -= WORD
+ mc.MOV_rs(reg.value, ofs)
+ #
+ # ADD esp, correct_esp_by --- but cannot use ADD, because
+ # of its effects on the CPU flags
+ mc.LEA_rs(esp.value, correct_esp_by)
+ mc.RET16_i(WORD)
+ #
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
@staticmethod
@rgc.no_collect
def _release_gil_asmgcc(css):
@@ -2324,102 +2405,83 @@
def genop_discard_cond_call_gc_wb(self, op, arglocs):
# Write code equivalent to write_barrier() in the GC: it checks
- # a flag in the object at arglocs[0], and if set, it calls the
- # function remember_young_pointer() from the GC. The arguments
- # to the call are in arglocs[:N]. The rest, arglocs[N:], contains
- # registers that need to be saved and restored across the call.
- # N is either 2 (regular write barrier) or 3 (array write barrier).
+ # a flag in the object at arglocs[0], and if set, it calls a
+ # helper piece of assembler. The latter saves registers as needed
+ # and call the function jit_remember_young_pointer() from the GC.
descr = op.getdescr()
if we_are_translated():
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
assert cls is not None and isinstance(descr, cls)
#
opnum = op.getopnum()
- if opnum == rop.COND_CALL_GC_WB:
- N = 2
- func = descr.get_write_barrier_fn(self.cpu)
- card_marking = False
- elif opnum == rop.COND_CALL_GC_WB_ARRAY:
- N = 3
- func = descr.get_write_barrier_from_array_fn(self.cpu)
- assert func != 0
- card_marking = descr.jit_wb_cards_set != 0
- else:
- raise AssertionError(opnum)
+ card_marking = False
+ mask = descr.jit_wb_if_flag_singlebyte
+ if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+ # assumptions the rest of the function depends on:
+ assert (descr.jit_wb_cards_set_byteofs ==
+ descr.jit_wb_if_flag_byteofs)
+ assert descr.jit_wb_cards_set_singlebyte == -0x80
+ card_marking = True
+ mask = descr.jit_wb_if_flag_singlebyte | -0x80
#
loc_base = arglocs[0]
self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
- imm(descr.jit_wb_if_flag_singlebyte))
+ imm(mask))
self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
jz_location = self.mc.get_relative_pos()
# for cond_call_gc_wb_array, also add another fast path:
# if GCFLAG_CARDS_SET, then we can just set one bit and be done
if card_marking:
- self.mc.TEST8(addr_add_const(loc_base,
- descr.jit_wb_cards_set_byteofs),
- imm(descr.jit_wb_cards_set_singlebyte))
- self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
- jnz_location = self.mc.get_relative_pos()
+ # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
+ # been checked by the status flags of the previous TEST8
+ self.mc.J_il8(rx86.Conditions['S'], 0) # patched later
+ js_location = self.mc.get_relative_pos()
else:
- jnz_location = 0
+ js_location = 0
- # the following is supposed to be the slow path, so whenever possible
- # we choose the most compact encoding over the most efficient one.
- if IS_X86_32:
- limit = -1 # push all arglocs on the stack
- elif IS_X86_64:
- limit = N - 1 # push only arglocs[N:] on the stack
- for i in range(len(arglocs)-1, limit, -1):
- loc = arglocs[i]
- if isinstance(loc, RegLoc):
- self.mc.PUSH_r(loc.value)
- else:
- assert not IS_X86_64 # there should only be regs in arglocs[N:]
- self.mc.PUSH_i32(loc.getint())
- if IS_X86_64:
- # We clobber these registers to pass the arguments, but that's
- # okay, because consider_cond_call_gc_wb makes sure that any
- # caller-save registers with values in them are present in
- # arglocs[N:] too, so they are saved on the stack above and
- # restored below.
- if N == 2:
- callargs = [edi, esi]
- else:
- callargs = [edi, esi, edx]
- remap_frame_layout(self, arglocs[:N], callargs,
- X86_64_SCRATCH_REG)
+ # Write only a CALL to the helper prepared in advance, passing it as
+ # argument the address of the structure we are writing into
+ # (the first argument to COND_CALL_GC_WB).
+ helper_num = card_marking
+ if self._regalloc.xrm.reg_bindings:
+ helper_num += 2
+ if self.wb_slowpath[helper_num] == 0: # tests only
+ assert not we_are_translated()
+ self.cpu.gc_ll_descr.write_barrier_descr = descr
+ self._build_wb_slowpath(card_marking,
+ bool(self._regalloc.xrm.reg_bindings))
+ assert self.wb_slowpath[helper_num] != 0
#
- # misaligned stack in the call, but it's ok because the write barrier
- # is not going to call anything more. Also, this assumes that the
- # write barrier does not touch the xmm registers. (Slightly delicate
- # assumption, given that the write barrier can end up calling the
- # platform's malloc() from AddressStack.append(). XXX may need to
- # be done properly)
- self.mc.CALL(imm(func))
- if IS_X86_32:
- self.mc.ADD_ri(esp.value, N*WORD)
- for i in range(N, len(arglocs)):
- loc = arglocs[i]
- assert isinstance(loc, RegLoc)
- self.mc.POP_r(loc.value)
+ self.mc.PUSH(loc_base)
+ self.mc.CALL(imm(self.wb_slowpath[helper_num]))
- # if GCFLAG_CARDS_SET, then we can do the whole thing that would
- # be done in the CALL above with just four instructions, so here
- # is an inline copy of them
if card_marking:
- self.mc.JMP_l8(0) # jump to the exit, patched later
- jmp_location = self.mc.get_relative_pos()
- # patch the JNZ above
- offset = self.mc.get_relative_pos() - jnz_location
+ # The helper ends again with a check of the flag in the object.
+ # So here, we can simply write again a 'JNS', which will be
+ # taken if GCFLAG_CARDS_SET is still not set.
+ self.mc.J_il8(rx86.Conditions['NS'], 0) # patched later
+ jns_location = self.mc.get_relative_pos()
+ #
+ # patch the JS above
+ offset = self.mc.get_relative_pos() - js_location
assert 0 < offset <= 127
- self.mc.overwrite(jnz_location-1, chr(offset))
+ self.mc.overwrite(js_location-1, chr(offset))
#
+ # case GCFLAG_CARDS_SET: emit a few instructions to do
+ # directly the card flag setting
loc_index = arglocs[1]
if isinstance(loc_index, RegLoc):
- # choose a scratch register
- tmp1 = loc_index
- self.mc.PUSH_r(tmp1.value)
+ if IS_X86_64 and isinstance(loc_base, RegLoc):
+ # copy loc_index into r11
+ tmp1 = X86_64_SCRATCH_REG
+ self.mc.MOV_rr(tmp1.value, loc_index.value)
+ final_pop = False
+ else:
+ # must save the register loc_index before it is mutated
+ self.mc.PUSH_r(loc_index.value)
+ tmp1 = loc_index
+ final_pop = True
# SHR tmp, card_page_shift
self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
# XOR tmp, -8
@@ -2427,7 +2489,9 @@
# BTS [loc_base], tmp
self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
# done
- self.mc.POP_r(tmp1.value)
+ if final_pop:
+ self.mc.POP_r(loc_index.value)
+ #
elif isinstance(loc_index, ImmedLoc):
byte_index = loc_index.value >> descr.jit_wb_card_page_shift
byte_ofs = ~(byte_index >> 3)
@@ -2435,11 +2499,12 @@
self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
else:
raise AssertionError("index is neither RegLoc nor ImmedLoc")
- # patch the JMP above
- offset = self.mc.get_relative_pos() - jmp_location
+ #
+ # patch the JNS above
+ offset = self.mc.get_relative_pos() - jns_location
assert 0 < offset <= 127
- self.mc.overwrite(jmp_location-1, chr(offset))
- #
+ self.mc.overwrite(jns_location-1, chr(offset))
+
# patch the JZ above
offset = self.mc.get_relative_pos() - jz_location
assert 0 < offset <= 127
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -980,16 +980,6 @@
# or setarrayitem_gc. It avoids loading it twice from the memory.
arglocs = [self.rm.make_sure_var_in_reg(op.getarg(i), args)
for i in range(N)]
- # add eax, ecx and edx as extra "arguments" to ensure they are
- # saved and restored. Fish in self.rm to know which of these
- # registers really need to be saved (a bit of a hack). Moreover,
- # we don't save and restore any SSE register because the called
- # function, a GC write barrier, is known not to touch them.
- # See remember_young_pointer() in rpython/memory/gc/generation.py.
- for v, reg in self.rm.reg_bindings.items():
- if (reg in self.rm.save_around_call_regs
- and self.rm.stays_alive(v)):
- arglocs.append(reg)
self.PerformDiscard(op, arglocs)
self.rm.possibly_free_vars_for_op(op)
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -316,6 +316,13 @@
assert rexbyte == 0
return 0
+# REX prefixes: 'rex_w' generates a REX_W, forcing the instruction
+# to operate on 64-bit. 'rex_nw' doesn't, so the instruction operates
+# on 32-bit or less; the complete REX prefix is omitted if unnecessary.
+# 'rex_fw' is a special case which doesn't generate a REX_W but forces
+# the REX prefix in all cases. It is only useful on instructions which
+# have an 8-bit register argument, to force access to the "sil" or "dil"
+# registers (as opposed to "ah-dh").
rex_w = encode_rex, 0, (0x40 | REX_W), None # a REX.W prefix
rex_nw = encode_rex, 0, 0, None # an optional REX prefix
rex_fw = encode_rex, 0, 0x40, None # a forced REX prefix
@@ -496,9 +503,9 @@
AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0')
OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0')
- OR8_mi = insn(rex_fw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
+ OR8_mi = insn(rex_nw, '\x80', orbyte(1<<3), mem_reg_plus_const(1),
immediate(2, 'b'))
- OR8_ji = insn(rex_fw, '\x80', orbyte(1<<3), abs_, immediate(1),
+ OR8_ji = insn(rex_nw, '\x80', orbyte(1<<3), abs_, immediate(1),
immediate(2, 'b'))
NEG_r = insn(rex_w, '\xF7', register(1), '\xD8')
@@ -531,7 +538,13 @@
PUSH_r = insn(rex_nw, register(1), '\x50')
PUSH_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
+ PUSH_i8 = insn('\x6A', immediate(1, 'b'))
PUSH_i32 = insn('\x68', immediate(1, 'i'))
+ def PUSH_i(mc, immed):
+ if single_byte(immed):
+ mc.PUSH_i8(immed)
+ else:
+ mc.PUSH_i32(immed)
POP_r = insn(rex_nw, register(1), '\x58')
POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
diff --git a/pypy/jit/backend/x86/test/test_rx86.py b/pypy/jit/backend/x86/test/test_rx86.py
--- a/pypy/jit/backend/x86/test/test_rx86.py
+++ b/pypy/jit/backend/x86/test/test_rx86.py
@@ -183,7 +183,8 @@
def test_push32():
cb = CodeBuilder32
- assert_encodes_as(cb, 'PUSH_i32', (9,), '\x68\x09\x00\x00\x00')
+ assert_encodes_as(cb, 'PUSH_i', (0x10009,), '\x68\x09\x00\x01\x00')
+ assert_encodes_as(cb, 'PUSH_i', (9,), '\x6A\x09')
def test_sub_ji8():
cb = CodeBuilder32
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -111,10 +111,13 @@
# The following flag is set on externally raw_malloc'ed arrays of pointers.
# They are allocated with some extra space in front of them for a bitfield,
# one bit per 'card_page_indices' indices.
-GCFLAG_HAS_CARDS = first_gcflag << 5
-GCFLAG_CARDS_SET = first_gcflag << 6 # <- at least one card bit is set
+GCFLAG_HAS_CARDS = first_gcflag << 6
+GCFLAG_CARDS_SET = first_gcflag << 7 # <- at least one card bit is set
+# note that GCFLAG_CARDS_SET is the most significant bit of a byte:
+# this is required for the JIT (x86)
-TID_MASK = (first_gcflag << 7) - 1
+#GCFLAG_UNUSED = first_gcflag << 5 # this flag is free
+TID_MASK = (first_gcflag << 8) - 1
FORWARDSTUB = lltype.GcStruct('forwarding_stub',
@@ -994,12 +997,9 @@
def _init_writebarrier_logic(self):
DEBUG = self.DEBUG
# The purpose of attaching remember_young_pointer to the instance
- # instead of keeping it as a regular method is to help the JIT call it.
- # Additionally, it makes the code in write_barrier() marginally smaller
+ # instead of keeping it as a regular method is to
+ # make the code in write_barrier() marginally smaller
# (which is important because it is inlined *everywhere*).
- # For x86, there is also an extra requirement: when the JIT calls
- # remember_young_pointer(), it assumes that it will not touch the SSE
- # registers, so it does not save and restore them (that's a *hack*!).
def remember_young_pointer(addr_struct, newvalue):
# 'addr_struct' is the address of the object in which we write.
# 'newvalue' is the address that we are going to write in there.
@@ -1033,6 +1033,17 @@
remember_young_pointer._dont_inline_ = True
self.remember_young_pointer = remember_young_pointer
#
+ def jit_remember_young_pointer(addr_struct):
+ # minimal version of the above, with just one argument,
+ # called by the JIT when GCFLAG_TRACK_YOUNG_PTRS is set
+ self.old_objects_pointing_to_young.append(addr_struct)
+ objhdr = self.header(addr_struct)
+ objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
+ if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
+ objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
+ self.prebuilt_root_objects.append(addr_struct)
+ self.jit_remember_young_pointer = jit_remember_young_pointer
+ #
if self.card_page_indices > 0:
self._init_writebarrier_with_card_marker()
@@ -1087,60 +1098,21 @@
self.remember_young_pointer_from_array2 = (
remember_young_pointer_from_array2)
- # xxx trying it out for the JIT: a 3-arguments version of the above
- def remember_young_pointer_from_array3(addr_array, index, newvalue):
+ def jit_remember_young_pointer_from_array(addr_array):
+ # minimal version of the above, with just one argument,
+ # called by the JIT when GCFLAG_TRACK_YOUNG_PTRS is set
+ # but GCFLAG_CARDS_SET is cleared. This tries to set
+ # GCFLAG_CARDS_SET if possible; otherwise, it falls back
+ # to jit_remember_young_pointer().
objhdr = self.header(addr_array)
- #
- # a single check for the common case of neither GCFLAG_HAS_CARDS
- # nor GCFLAG_NO_HEAP_PTRS
- if objhdr.tid & (GCFLAG_HAS_CARDS | GCFLAG_NO_HEAP_PTRS) == 0:
- # common case: fast path, jump to the end of the function
- pass
- elif objhdr.tid & GCFLAG_HAS_CARDS == 0:
- # no cards, but GCFLAG_NO_HEAP_PTRS is set.
- objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
- self.prebuilt_root_objects.append(addr_array)
- # jump to the end of the function
+ if objhdr.tid & GCFLAG_HAS_CARDS:
+ self.old_objects_with_cards_set.append(addr_array)
+ objhdr.tid |= GCFLAG_CARDS_SET
else:
- # case with cards.
- #
- # If the newly written address does not actually point to a
- # young object, leave now.
- if not self.appears_to_be_young(newvalue):
- return
- #
- # 'addr_array' is a raw_malloc'ed array with card markers
- # in front. Compute the index of the bit to set:
- bitindex = index >> self.card_page_shift
- byteindex = bitindex >> 3
- bitmask = 1 << (bitindex & 7)
- #
- # If the bit is already set, leave now.
- addr_byte = self.get_card(addr_array, byteindex)
- byte = ord(addr_byte.char[0])
- if byte & bitmask:
- return
- addr_byte.char[0] = chr(byte | bitmask)
- #
- if objhdr.tid & GCFLAG_CARDS_SET == 0:
- self.old_objects_with_cards_set.append(addr_array)
- objhdr.tid |= GCFLAG_CARDS_SET
- return
- #
- # Logic for the no-cards case, put here to minimize the number
- # of checks done at the start of the function
- if DEBUG: # note: PYPY_GC_DEBUG=1 does not enable this
- ll_assert(self.debug_is_old_object(addr_array),
- "young array with no card but GCFLAG_TRACK_YOUNG_PTRS")
- #
- if self.appears_to_be_young(newvalue):
- self.old_objects_pointing_to_young.append(addr_array)
- objhdr.tid &= ~GCFLAG_TRACK_YOUNG_PTRS
+ self.jit_remember_young_pointer(addr_array)
- remember_young_pointer_from_array3._dont_inline_ = True
- assert self.card_page_indices > 0
- self.remember_young_pointer_from_array3 = (
- remember_young_pointer_from_array3)
+ self.jit_remember_young_pointer_from_array = (
+ jit_remember_young_pointer_from_array)
def get_card(self, obj, byteindex):
size_gc_header = self.gcheaderbuilder.size_gc_header
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -455,13 +455,12 @@
annmodel.SomeAddress()],
annmodel.s_None,
inline=True)
- func = getattr(gcdata.gc, 'remember_young_pointer', None)
+ func = getattr(gcdata.gc, 'jit_remember_young_pointer', None)
if func is not None:
# func should not be a bound method, but a real function
assert isinstance(func, types.FunctionType)
self.write_barrier_failing_case_ptr = getfn(func,
- [annmodel.SomeAddress(),
- annmodel.SomeAddress()],
+ [annmodel.SomeAddress()],
annmodel.s_None)
func = getattr(GCClass, 'write_barrier_from_array', None)
if func is not None:
@@ -472,16 +471,15 @@
annmodel.SomeInteger()],
annmodel.s_None,
inline=True)
- func = getattr(gcdata.gc, 'remember_young_pointer_from_array3',
+ func = getattr(gcdata.gc,
+ 'jit_remember_young_pointer_from_array',
None)
if func is not None:
# func should not be a bound method, but a real function
assert isinstance(func, types.FunctionType)
self.write_barrier_from_array_failing_case_ptr = \
getfn(func,
- [annmodel.SomeAddress(),
- annmodel.SomeInteger(),
- annmodel.SomeAddress()],
+ [annmodel.SomeAddress()],
annmodel.s_None)
self.statistics_ptr = getfn(GCClass.statistics.im_func,
[s_gc, annmodel.SomeInteger()],
More information about the pypy-commit
mailing list