[pypy-commit] pypy branch-prediction: Fix WriteBarrierSlowPath. Add ReacqGilSlowPath.
arigo
pypy.commits at gmail.com
Fri Apr 7 07:25:56 EDT 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: branch-prediction
Changeset: r91014:a47cbc896847
Date: 2017-04-07 13:24 +0200
http://bitbucket.org/pypy/pypy/changeset/a47cbc896847/
Log: Fix WriteBarrierSlowPath. Add ReacqGilSlowPath.
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -41,33 +41,6 @@
from rpython.rlib.objectmodel import compute_unique_id
-class SlowPath(object):
- def __init__(self, mc, condition):
- mc.J_il(condition, 0xfffff) # patched later
- self.cond_jump_addr = mc.get_relative_pos(break_basic_block=False)
- self.saved_scratch_value_1 = mc.get_scratch_register_known_value()
-
- def set_continue_addr(self, mc):
- self.continue_addr = mc.get_relative_pos(break_basic_block=False)
- self.saved_scratch_value_2 = mc.get_scratch_register_known_value()
-
- def generate(self, assembler, mc):
- # no alignment here, prefer compactness for these slow-paths.
- # patch the original jump to go here
- offset = mc.get_relative_pos() - self.cond_jump_addr
- mc.overwrite32(self.cond_jump_addr-4, offset)
- # restore the knowledge of the scratch register value
- # (this does not emit any code)
- mc.restore_scratch_register_known_value(self.saved_scratch_value_1)
- # generate the body of the slow-path
- self.generate_body(assembler, mc)
- # reload (if needed) the (possibly different) scratch register value
- mc.load_scratch_if_known(self.saved_scratch_value_2)
- # jump back
- curpos = mc.get_relative_pos() + 5
- mc.JMP_l(self.continue_addr - curpos)
-
-
class Assembler386(BaseAssembler, VectorAssemblerMixin):
_regalloc = None
_output_loop_log = None
@@ -865,7 +838,7 @@
for ofs in self.frame_depth_to_patch:
self._patch_frame_depth(ofs + rawstart, framedepth)
- class IncreaseStackSlowPath(SlowPath):
+ class IncreaseStackSlowPath(codebuf.SlowPath):
def generate_body(self, assembler, mc):
mc.MOV_si(WORD, 0xffffff) # force writing 32 bit
ofs2 = mc.get_relative_pos(break_basic_block=False) - 4
@@ -1033,7 +1006,7 @@
if gcrootmap and gcrootmap.is_shadow_stack:
self._call_header_shadowstack(gcrootmap)
- class StackCheckSlowPath(SlowPath):
+ class StackCheckSlowPath(codebuf.SlowPath):
def generate_body(self, assembler, mc):
mc.CALL(imm(assembler.stack_check_slowpath))
@@ -2291,8 +2264,9 @@
# ------------------- END CALL ASSEMBLER -----------------------
- class WriteBarrierSlowPath(SlowPath):
+ class WriteBarrierSlowPath(codebuf.SlowPath):
def generate_body(self, assembler, mc):
+ mc.force_frame_size(DEFAULT_FRAME_BYTES)
# for cond_call_gc_wb_array, also add another fast path:
# if GCFLAG_CARDS_SET, then we can just set one bit and be done
card_marking = (self.loc_index is not None)
@@ -2312,6 +2286,8 @@
elif (assembler._regalloc is not None and
assembler._regalloc.xrm.reg_bindings):
helper_num += 2
+ descr = self.descr
+ loc_base = self.loc_base
if assembler.wb_slowpath[helper_num] == 0: # tests only
assert not we_are_translated()
assembler.cpu.gc_ll_descr.write_barrier_descr = descr
@@ -2404,8 +2380,10 @@
loc = addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs)
mc.TEST8(loc, imm(mask))
sp = self.WriteBarrierSlowPath(mc, rx86.Conditions['NZ'])
+ sp.loc_base = loc_base
sp.loc_index = loc_index
sp.is_frame = is_frame
+ sp.descr = descr
sp.set_continue_addr(mc)
self.pending_slowpaths.append(sp)
@@ -2438,7 +2416,7 @@
def label(self):
self._check_frame_depth_debug(self.mc)
- class CondCallSlowPath(SlowPath):
+ class CondCallSlowPath(codebuf.SlowPath):
guard_token_no_exception = None
def generate_body(self, assembler, mc):
@@ -2508,7 +2486,7 @@
sp.resloc = resloc
self.pending_slowpaths.append(sp)
- class MallocCondSlowPath(SlowPath):
+ class MallocCondSlowPath(codebuf.SlowPath):
def generate_body(self, assembler, mc):
assembler.push_gcmap(mc, self.gcmap, store=True)
mc.CALL(imm(follow_jump(assembler.malloc_slowpath)))
@@ -2541,7 +2519,7 @@
sp.set_continue_addr(self.mc)
self.pending_slowpaths.append(sp)
- class MallocCondVarsizeSlowPath(SlowPath):
+ class MallocCondVarsizeSlowPath(codebuf.SlowPath):
def generate_body(self, assembler, mc):
# save the gcmap
assembler.push_gcmap(mc, self.gcmap, store=True)
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -11,6 +11,7 @@
r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
RegLoc, RawEspLoc, RawEbpLoc, imm, ImmedLoc)
from rpython.jit.backend.x86.jump import remap_frame_layout
+from rpython.jit.backend.x86 import codebuf
from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
from rpython.jit.backend.llsupport import llerrno
from rpython.rtyper.lltypesystem import llmemory, rffi
@@ -293,6 +294,41 @@
tlofsreg = self.get_tlofs_reg() # => esi (possibly reused)
mc.MOV32_mr((tlofsreg.value, lasterror), eax.value)
+ class ReacqGilSlowPath(codebuf.SlowPath):
+ early_jump_addr = 0
+
+ def generate_body(self, assembler, mc):
+ if self.early_jump_addr != 0:
+ # This slow-path has two entry points, with two
+ # conditional jumps. We can jump to the regular start
+ # of this slow-path with the 2nd conditional jump. Or,
+ # we can jump past the "MOV(heap(fastgil), ecx)"
+ # instruction from the 1st conditional jump.
+ # This instruction reverts the rpy_fastgil acquired
+ # previously, so that the general 'reacqgil_addr'
+ # function can acquire it again. It must only be done
+ # if we actually succeeded in acquiring rpy_fastgil.
+ from rpython.jit.backend.x86.assembler import heap
+ mc.MOV(heap(self.fastgil), ecx)
+ offset = mc.get_relative_pos() - self.early_jump_addr
+ mc.overwrite32(self.early_jump_addr-4, offset)
+ # scratch register forgotten here, by get_relative_pos()
+
+ # call the reacqgil() function
+ cb = self.callbuilder
+ if not cb.result_value_saved_early:
+ cb.save_result_value(save_edx=False)
+ if assembler._is_asmgcc():
+ if IS_X86_32:
+ css_value = edx
+ old_value = ecx
+ mc.MOV_sr(4, old_value.value)
+ mc.MOV_sr(0, css_value.value)
+ # on X86_64, they are already in the right registers
+ mc.CALL(imm(follow_jump(assembler.reacqgil_addr)))
+ if not cb.result_value_saved_early:
+ cb.restore_result_value(save_edx=False)
+
def move_real_result_and_call_reacqgil_addr(self, fastgil):
from rpython.jit.backend.x86 import rx86
#
@@ -314,8 +350,8 @@
if not self.result_value_saved_early:
mc.MOV_sr(12, edx.value)
restore_edx = True
- css_value = edx
- old_value = ecx
+ css_value = edx # note: duplicated in ReacqGilSlowPath
+ old_value = ecx #
elif IS_X86_64:
css_value = edi
old_value = esi
@@ -341,36 +377,25 @@
# thread. So here we check if the shadowstack pointer
# is still the same as before we released the GIL (saved
# in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
- jne_location = mc.emit_forward_jump('NE')
+ mc.J_il(rx86.Conditions['NE'], 0xfffff) # patched later
+ early_jump_addr = mc.get_relative_pos(break_basic_block=False)
+ # ^^^ this jump will go to almost the same place as the
+ # ReacqGilSlowPath() computes, but one instruction farther,
+ # i.e. just after the "MOV(heap(fastgil), ecx)".
+
# here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
# state before the XCHG, but the XCHG acquired it by writing 1)
rst = gcrootmap.get_root_stack_top_addr()
mc = self.mc
mc.CMP(ebx, heap(rst))
- # PPP FIX ME
- je_location = mc.emit_forward_jump('E')
- # revert the rpy_fastgil acquired above, so that the
- # general 'reacqgil_addr' below can acquire it again...
- mc.MOV(heap(fastgil), ecx)
- # patch the JNE above
- mc.patch_forward_jump(jne_location)
+ sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
+ sp.early_jump_addr = early_jump_addr
+ sp.fastgil = fastgil
else:
- je_location = mc.emit_forward_jump('E')
- #
- # Yes, we need to call the reacqgil() function
- if not self.result_value_saved_early:
- self.save_result_value(save_edx=False)
- if self.asm._is_asmgcc():
- if IS_X86_32:
- mc.MOV_sr(4, old_value.value)
- mc.MOV_sr(0, css_value.value)
- # on X86_64, they are already in the right registers
- mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
- if not self.result_value_saved_early:
- self.restore_result_value(save_edx=False)
- #
- # patch the JE above
- mc.patch_forward_jump(je_location)
+ sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
+ sp.callbuilder = self
+ sp.set_continue_addr(mc)
+ self.asm.pending_slowpaths.append(sp)
#
if restore_edx:
mc.MOV_rs(edx.value, 12) # restore this
diff --git a/rpython/jit/backend/x86/codebuf.py b/rpython/jit/backend/x86/codebuf.py
--- a/rpython/jit/backend/x86/codebuf.py
+++ b/rpython/jit/backend/x86/codebuf.py
@@ -81,3 +81,30 @@
if break_basic_block:
self.forget_scratch_register()
return BlockBuilderMixin.get_relative_pos(self)
+
+
+class SlowPath(object):
+ def __init__(self, mc, condition):
+ mc.J_il(condition, 0xfffff) # patched later
+ self.cond_jump_addr = mc.get_relative_pos(break_basic_block=False)
+ self.saved_scratch_value_1 = mc.get_scratch_register_known_value()
+
+ def set_continue_addr(self, mc):
+ self.continue_addr = mc.get_relative_pos(break_basic_block=False)
+ self.saved_scratch_value_2 = mc.get_scratch_register_known_value()
+
+ def generate(self, assembler, mc):
+ # no alignment here, prefer compactness for these slow-paths.
+ # patch the original jump to go here
+ offset = mc.get_relative_pos() - self.cond_jump_addr
+ mc.overwrite32(self.cond_jump_addr-4, offset)
+ # restore the knowledge of the scratch register value
+ # (this does not emit any code)
+ mc.restore_scratch_register_known_value(self.saved_scratch_value_1)
+ # generate the body of the slow-path
+ self.generate_body(assembler, mc)
+ # reload (if needed) the (possibly different) scratch register value
+ mc.load_scratch_if_known(self.saved_scratch_value_2)
+ # jump back
+ curpos = mc.get_relative_pos() + 5
+ mc.JMP_l(self.continue_addr - curpos)
More information about the pypy-commit
mailing list