[pypy-commit] pypy vecopt-merge: a all new stitch bridge that considers register mapping. works for accumulation values as well
plan_rich
noreply at buildbot.pypy.org
Sun Aug 23 17:24:05 CEST 2015
Author: Richard Plangger <rich at pasra.at>
Branch: vecopt-merge
Changeset: r79159:0a4078644343
Date: 2015-08-23 17:24 +0200
http://bitbucket.org/pypy/pypy/changeset/0a4078644343/
Log: a all new stitch bridge that considers register mapping. works for
accumulation values as well
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -590,9 +590,57 @@
rawstart, fullsize)
return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos, rawstart)
- def stitch_bridge(self, faildescr, target):
- assert target.rawstart != 0
- self.patch_jump_for_descr(faildescr, target.rawstart)
+ def stitch_bridge(self, faildescr, version):
+ """ Stitching means that one can enter a bridge with a complete different register
+ allocation. This needs remapping which is done here for both normal registers
+ and accumulation registers.
+ Why? Because this only generates a very small junk of memory, instead of
+ duplicating the loop assembler!
+ """
+ asminfo, bridge_faildescr, compiled_version, looptoken = version._compiled
+ assert asminfo.rawstart != 0
+ self.mc = codebuf.MachineCodeBlockWrapper()
+ allblocks = self.get_asmmemmgr_blocks(looptoken)
+ self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
+ allblocks)
+ frame_info = self.datablockwrapper.malloc_aligned(
+ jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
+
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
+ # if accumulation is saved at the guard, we need to update it here!
+ guard_locs = self.rebuild_faillocs_from_descr(faildescr, version.inputargs)
+ bridge_locs = self.rebuild_faillocs_from_descr(bridge_faildescr, compiled_version.inputargs)
+ guard_accum_info = faildescr.rd_accum_list
+ # O(n^2), but usually you only have at most 1 fail argument
+ while guard_accum_info:
+ bridge_accum_info = bridge_faildescr.rd_accum_list
+ while bridge_accum_info:
+ if bridge_accum_info.scalar_position == guard_accum_info.scalar_position:
+ # the mapping might be wrong!
+ if bridge_accum_info.vector_loc is not guard_accum_info.vector_loc:
+ self.mov(guard_accum_info.vector_loc, bridge_accum_info.vector_loc)
+ bridge_accum_info = bridge_accum_info.prev
+ guard_accum_info = guard_accum_info.prev
+
+ # register mapping is most likely NOT valid, thus remap it in this
+ # short piece of assembler
+ assert len(guard_locs) == len(bridge_locs)
+ for i,gloc in enumerate(guard_locs):
+ bloc = bridge_locs[i]
+ bstack = bloc.location_code() == 'b'
+ gstack = gloc.location_code() == 'b'
+ if bstack and gstack:
+ pass
+ elif gloc is not bloc:
+ self.mov(gloc, bloc)
+ self.mc.JMP_l(0)
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
+ offset = self.mc.get_relative_pos() - 4
+ rawstart = self.materialize_loop(looptoken)
+ # update the exit target
+ self._patch_jump_for_descr(rawstart + offset, asminfo.rawstart)
+ # update the guard to jump right to this custom piece of assembler
+ self.patch_jump_for_descr(faildescr, rawstart)
def write_pending_failure_recoveries(self, regalloc):
# for each pending guard, generate the code of the recovery stub
@@ -732,6 +780,10 @@
def patch_jump_for_descr(self, faildescr, adr_new_target):
adr_jump_offset = faildescr.adr_jump_offset
+ self._patch_jump_for_descr(adr_jump_offset, adr_new_target)
+ faildescr.adr_jump_offset = 0 # means "patched"
+
+ def _patch_jump_for_descr(self, adr_jump_offset, adr_new_target):
assert adr_jump_offset != 0
offset = adr_new_target - (adr_jump_offset + 4)
# If the new target fits within a rel32 of the jump, just patch
@@ -752,7 +804,6 @@
p = rffi.cast(rffi.INTP, adr_jump_offset)
adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
mc.copy_to_raw_memory(adr_target)
- faildescr.adr_jump_offset = 0 # means "patched"
def fixup_target_tokens(self, rawstart):
for targettoken in self.target_tokens_currently_compiling:
diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -209,9 +209,8 @@
version.operations, jitcell_token)
record_loop_or_bridge(metainterp_sd, vl)
assert asminfo is not None
- version._compiled = asminfo
+ version._compiled = (asminfo, faildescr, faildescr.version, jitcell_token)
faildescr.version = None
- # stitch the rest of the traces
for lv in loop.versions:
if not lv.compiled():
# the version was never compiled, do not bother
@@ -221,7 +220,7 @@
assert isinstance(faildescr, CompileLoopVersionDescr)
version = faildescr.version
if version and version.compiled():
- cpu.stitch_bridge(faildescr, version._compiled)
+ cpu.stitch_bridge(faildescr, version)
faildescr.version = None
loop.versions = None
More information about the pypy-commit
mailing list