[pypy-commit] pypy branch-prediction: WriteBarrierSlowPath
arigo
pypy.commits at gmail.com
Thu Apr 6 11:23:08 EDT 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: branch-prediction
Changeset: r90998:f34b447a2a19
Date: 2017-04-06 17:22 +0200
http://bitbucket.org/pypy/pypy/changeset/f34b447a2a19/
Log: WriteBarrierSlowPath
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -51,9 +51,8 @@
self.continue_addr = mc.get_relative_pos(break_basic_block=False)
self.saved_scratch_value_2 = mc.get_scratch_register_known_value()
- def generate(self, assembler):
- # no alignment here, prefer compactness for these slow-paths
- mc = assembler.mc
+ def generate(self, assembler, mc):
+ # no alignment here, prefer compactness for these slow-paths.
# patch the original jump to go here
offset = mc.get_relative_pos() - self.cond_jump_addr
mc.overwrite32(self.cond_jump_addr-4, offset)
@@ -119,7 +118,6 @@
self.frame_depth_to_patch = []
def teardown(self):
- self.pending_slowpaths = None
self.pending_guard_tokens = None
if WORD == 8:
self.pending_memoryerror_trampoline_from = None
@@ -212,6 +210,7 @@
""" This builds a general call slowpath, for whatever call happens to
come.
"""
+ self.pending_slowpaths = []
mc = codebuf.MachineCodeBlockWrapper()
# copy registers to the frame, with the exception of the
# 'cond_call_register_arguments' and eax, because these have already
@@ -242,6 +241,7 @@
self.pop_gcmap(mc) # cancel the push_gcmap(store=True) in the caller
self._pop_all_regs_from_frame(mc, [eax], supports_floats, callee_only)
mc.RET()
+ self.flush_pending_slowpaths(mc)
return mc.materialize(self.cpu, [])
def _build_malloc_slowpath(self, kind):
@@ -258,6 +258,7 @@
This function must preserve all registers apart from ecx and edx.
"""
assert kind in ['fixed', 'str', 'unicode', 'var']
+ self.pending_slowpaths = []
mc = codebuf.MachineCodeBlockWrapper()
self._push_all_regs_to_frame(mc, [ecx, edx], self.cpu.supports_floats)
# the caller already did push_gcmap(store=True)
@@ -330,6 +331,7 @@
mc.force_frame_size(DEFAULT_FRAME_BYTES + WORD)
mc.ADD_ri(esp.value, WORD)
mc.JMP(imm(self.propagate_exception_path))
+ self.flush_pending_slowpaths(mc)
#
rawstart = mc.materialize(self.cpu, [])
return rawstart
@@ -788,12 +790,17 @@
gcreftracers.append(tracer) # keepalive
self.teardown_gcrefs_list()
- def write_pending_failure_recoveries(self, regalloc):
+ def flush_pending_slowpaths(self, mc):
# for each pending slowpath, generate it now. Note that this
# may occasionally add an extra guard_token in
- # pending_guard_tokens, so it must be done before the next loop.
+ # pending_guard_tokens, so it must be done before the
+ # following loop in write_pending_failure_recoveries().
for sp in self.pending_slowpaths:
- sp.generate(self)
+ sp.generate(self, mc)
+ self.pending_slowpaths = None
+
+ def write_pending_failure_recoveries(self, regalloc):
+ self.flush_pending_slowpaths(self.mc)
# for each pending guard, generate the code of the recovery stub
# at the end of self.mc.
for tok in self.pending_guard_tokens:
@@ -2285,6 +2292,91 @@
# ------------------- END CALL ASSEMBLER -----------------------
+ class WriteBarrierSlowPath(SlowPath):
+ def generate_body(self, assembler, mc):
+ # for cond_call_gc_wb_array, also add another fast path:
+ # if GCFLAG_CARDS_SET, then we can just set one bit and be done
+ card_marking = (self.loc_index is not None)
+ if card_marking:
+ # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
+ # been checked by the sign flags of the previous TEST8
+ js_location = mc.emit_forward_jump('S') # patched later
+ else:
+ js_location = 0
+
+ # Write only a CALL to the helper prepared in advance, passing it as
+ # argument the address of the structure we are writing into
+ # (the first argument to COND_CALL_GC_WB).
+ helper_num = card_marking
+ if self.is_frame:
+ helper_num = 4
+ elif (assembler._regalloc is not None and
+ assembler._regalloc.xrm.reg_bindings):
+ helper_num += 2
+ if assembler.wb_slowpath[helper_num] == 0: # tests only
+ assert not we_are_translated()
+ assembler.cpu.gc_ll_descr.write_barrier_descr = descr
+ assembler._build_wb_slowpath(card_marking,
+ bool(assembler._regalloc.xrm.reg_bindings))
+ assert assembler.wb_slowpath[helper_num] != 0
+ #
+ if not self.is_frame:
+ mc.PUSH(loc_base)
+ mc.CALL(imm(assembler.wb_slowpath[helper_num]))
+ if not self.is_frame:
+ mc.stack_frame_size_delta(-WORD)
+
+ if card_marking:
+ # The helper ends again with a check of the flag in the object.
+ # So here, we can simply write again a 'JNS', which will be
+ # taken if GCFLAG_CARDS_SET is still not set.
+ jns_location = mc.emit_forward_jump('NS') # patched later
+ #
+ # patch the JS above
+ mc.patch_forward_jump(js_location)
+ #
+ # case GCFLAG_CARDS_SET: emit a few instructions to do
+ # directly the card flag setting
+ loc_index = self.loc_index
+ if isinstance(loc_index, RegLoc):
+ if IS_X86_64 and isinstance(loc_base, RegLoc):
+ # copy loc_index into r11
+ tmp1 = X86_64_SCRATCH_REG
+ mc.forget_scratch_register()
+ mc.MOV_rr(tmp1.value, loc_index.value)
+ final_pop = False
+ else:
+ # must save the register loc_index before it is mutated
+ mc.PUSH_r(loc_index.value)
+ tmp1 = loc_index
+ final_pop = True
+ # SHR tmp, card_page_shift
+ mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
+ # XOR tmp, -8
+ mc.XOR_ri(tmp1.value, -8)
+ # BTS [loc_base], tmp
+ if final_pop:
+ # r11 is not specially used, fall back to regloc.py
+ mc.BTS(addr_add_const(loc_base, 0), tmp1)
+ else:
+ # tmp1 is r11! but in this case, loc_base is a
+ # register so we can invoke directly rx86.py
+ mc.BTS_mr((loc_base.value, 0), tmp1.value)
+ # done
+ if final_pop:
+ mc.POP_r(loc_index.value)
+ #
+ elif isinstance(loc_index, ImmedLoc):
+ byte_index = loc_index.value >> descr.jit_wb_card_page_shift
+ byte_ofs = ~(byte_index >> 3)
+ byte_val = 1 << (byte_index & 7)
+ mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
+ else:
+ raise AssertionError("index is neither RegLoc nor ImmedLoc")
+ #
+ # patch the JNS above
+ mc.patch_forward_jump(jns_location)
+
def _write_barrier_fastpath(self, mc, descr, arglocs, array=False,
is_frame=False):
# Write code equivalent to write_barrier() in the GC: it checks
@@ -2295,14 +2387,14 @@
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
assert cls is not None and isinstance(descr, cls)
#
- card_marking = False
+ loc_index = None
mask = descr.jit_wb_if_flag_singlebyte
if array and descr.jit_wb_cards_set != 0:
# assumptions the rest of the function depends on:
assert (descr.jit_wb_cards_set_byteofs ==
descr.jit_wb_if_flag_byteofs)
assert descr.jit_wb_cards_set_singlebyte == -0x80
- card_marking = True
+ loc_index = arglocs[1]
mask = descr.jit_wb_if_flag_singlebyte | -0x80
#
loc_base = arglocs[0]
@@ -2312,92 +2404,11 @@
else:
loc = addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs)
mc.TEST8(loc, imm(mask))
- # PPP FIX ME
- jz_location = mc.emit_forward_jump('Z') # patched later
-
- # for cond_call_gc_wb_array, also add another fast path:
- # if GCFLAG_CARDS_SET, then we can just set one bit and be done
- if card_marking:
- # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
- # been checked by the sign flags of the previous TEST8
- js_location = mc.emit_forward_jump('S') # patched later
- else:
- js_location = 0
-
- # Write only a CALL to the helper prepared in advance, passing it as
- # argument the address of the structure we are writing into
- # (the first argument to COND_CALL_GC_WB).
- helper_num = card_marking
- if is_frame:
- helper_num = 4
- elif self._regalloc is not None and self._regalloc.xrm.reg_bindings:
- helper_num += 2
- if self.wb_slowpath[helper_num] == 0: # tests only
- assert not we_are_translated()
- self.cpu.gc_ll_descr.write_barrier_descr = descr
- self._build_wb_slowpath(card_marking,
- bool(self._regalloc.xrm.reg_bindings))
- assert self.wb_slowpath[helper_num] != 0
- #
- if not is_frame:
- mc.PUSH(loc_base)
- mc.CALL(imm(self.wb_slowpath[helper_num]))
- if not is_frame:
- mc.stack_frame_size_delta(-WORD)
-
- if card_marking:
- # The helper ends again with a check of the flag in the object.
- # So here, we can simply write again a 'JNS', which will be
- # taken if GCFLAG_CARDS_SET is still not set.
- jns_location = mc.emit_forward_jump('NS') # patched later
- #
- # patch the JS above
- mc.patch_forward_jump(js_location)
- #
- # case GCFLAG_CARDS_SET: emit a few instructions to do
- # directly the card flag setting
- loc_index = arglocs[1]
- if isinstance(loc_index, RegLoc):
- if IS_X86_64 and isinstance(loc_base, RegLoc):
- # copy loc_index into r11
- tmp1 = X86_64_SCRATCH_REG
- mc.forget_scratch_register()
- mc.MOV_rr(tmp1.value, loc_index.value)
- final_pop = False
- else:
- # must save the register loc_index before it is mutated
- mc.PUSH_r(loc_index.value)
- tmp1 = loc_index
- final_pop = True
- # SHR tmp, card_page_shift
- mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
- # XOR tmp, -8
- mc.XOR_ri(tmp1.value, -8)
- # BTS [loc_base], tmp
- if final_pop:
- # r11 is not specially used, fall back to regloc.py
- mc.BTS(addr_add_const(loc_base, 0), tmp1)
- else:
- # tmp1 is r11! but in this case, loc_base is a
- # register so we can invoke directly rx86.py
- mc.BTS_mr((loc_base.value, 0), tmp1.value)
- # done
- if final_pop:
- mc.POP_r(loc_index.value)
- #
- elif isinstance(loc_index, ImmedLoc):
- byte_index = loc_index.value >> descr.jit_wb_card_page_shift
- byte_ofs = ~(byte_index >> 3)
- byte_val = 1 << (byte_index & 7)
- mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
- else:
- raise AssertionError("index is neither RegLoc nor ImmedLoc")
- #
- # patch the JNS above
- mc.patch_forward_jump(jns_location)
-
- # patch the JZ above
- mc.patch_forward_jump(jz_location)
+ sp = self.WriteBarrierSlowPath(mc, rx86.Conditions['NZ'])
+ sp.loc_index = loc_index
+ sp.is_frame = is_frame
+ sp.set_continue_addr(mc)
+ self.pending_slowpaths.append(sp)
def genop_discard_cond_call_gc_wb(self, op, arglocs):
self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs)
More information about the pypy-commit
mailing list