[pypy-commit] pypy ppc-updated-backend: PPC Backend #1: merged "default" and the unfinished
arigo
noreply at buildbot.pypy.org
Fri Aug 28 09:13:03 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: ppc-updated-backend
Changeset: r79264:2b7e225663d7
Date: 2015-08-22 05:41 -0700
http://bitbucket.org/pypy/pypy/changeset/2b7e225663d7/
Log: PPC Backend #1: merged "default" and the unfinished "ppc-updated-
backend" branch, and then fixed stuff until we reach a state where
it doesn't completely segfaults whenever we try to run any test. At
this point, a bit more than half the tests of test_runner.py pass.
diff --git a/rpython/jit/backend/ppc/TODO b/rpython/jit/backend/ppc/TODO
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/ppc/TODO
@@ -0,0 +1,4 @@
+
+prepare_guard_int_lt & friends: re-enable in walk_operations()
+
+guard_xyz: fail if the target of the branch is too far away (+32k?)
diff --git a/rpython/jit/backend/ppc/arch.py b/rpython/jit/backend/ppc/arch.py
--- a/rpython/jit/backend/ppc/arch.py
+++ b/rpython/jit/backend/ppc/arch.py
@@ -72,10 +72,3 @@
STD_FRAME_SIZE_IN_BYTES = GPR_SAVE_AREA_OFFSET + len(REGISTERS_SAVED) * WORD
assert STD_FRAME_SIZE_IN_BYTES % 16 == 0
-
-
-# The JITFRAME_FIXED_SIZE is measured in words, and should be the
-# number of registers that need to be saved into the jitframe when
-# failing a guard, for example. (Note: it is about the jitframe,
-# not the frame.)
-JITFRAME_FIXED_SIZE = len(r.MANAGED_REGS) + len(r.MANAGED_FP_REGS)
diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -923,15 +923,15 @@
def flush_icache(x, y): pass
class PPCGuardToken(GuardToken):
- # We may have to find a suitable default value for fcond
- def __init__(self, cpu, gcmap, descr, failargs, faillocs, offset,
+ def __init__(self, cpu, gcmap, descr, failargs, faillocs,
exc, frame_depth, is_guard_not_invalidated=False,
- is_guard_not_forced=False, fcond=c.EQ):
+ is_guard_not_forced=False, fcond=c.UH):
+ assert fcond != c.UH
GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs, exc,
frame_depth, is_guard_not_invalidated,
is_guard_not_forced)
self.fcond = fcond
- self.offset = offset
+ #self.offset = offset
class OverwritingBuilder(PPCAssembler):
def __init__(self, cb, start, num_insts):
@@ -990,14 +990,6 @@
else:
self.ldx(rD.value, 0, rD.value)
- def store_reg(self, source_reg, addr):
- with scratch_reg(self):
- self.load_imm(r.SCRATCH, addr)
- if IS_PPC_32:
- self.stwx(source_reg.value, 0, r.SCRATCH.value)
- else:
- self.stdx(source_reg.value, 0, r.SCRATCH.value)
-
def b_offset(self, target):
curpos = self.currpos()
offset = target - curpos
@@ -1160,7 +1152,7 @@
self.writechar(chr(word & 0xFF))
def currpos(self):
- return self.get_rel_pos()
+ return self.get_relative_pos()
def flush_cache(self, addr):
startaddr = rffi.cast(lltype.Signed, addr)
diff --git a/rpython/jit/backend/ppc/condition.py b/rpython/jit/backend/ppc/condition.py
--- a/rpython/jit/backend/ppc/condition.py
+++ b/rpython/jit/backend/ppc/condition.py
@@ -9,6 +9,7 @@
LT = (0, SET)
EQ = (2, SET)
GE = (0, UNSET)
+UH = (-1, -1) # invalid
# values below are random ...
diff --git a/rpython/jit/backend/ppc/jump.py b/rpython/jit/backend/ppc/jump.py
--- a/rpython/jit/backend/ppc/jump.py
+++ b/rpython/jit/backend/ppc/jump.py
@@ -1,7 +1,5 @@
# ../x86/jump.py
# XXX combine with ../x86/jump.py and move to llsupport
-import sys
-from rpython.tool.pairtype import extendabletype
def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
pending_dests = len(dst_locations)
@@ -77,9 +75,9 @@
def remap_frame_layout_mixed(assembler,
src_locations1, dst_locations1, tmpreg1,
src_locations2, dst_locations2, tmpreg2):
- # find and push the xmm stack locations from src_locations2 that
+ # find and push the fp stack locations from src_locations2 that
# are going to be overwritten by dst_locations1
- from pypy.jit.backend.ppc.arch import WORD
+ from rpython.jit.backend.ppc.arch import WORD
extrapushes = []
dst_keys = {}
for loc in dst_locations1:
@@ -104,10 +102,10 @@
# remap the integer and pointer registers and stack locations
remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
#
- # remap the vfp registers and stack locations
+ # remap the fp registers and stack locations
remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
#
- # finally, pop the extra xmm stack locations
+ # finally, pop the extra fp stack locations
while len(extrapushes) > 0:
loc = extrapushes.pop()
assembler.regalloc_pop(loc)
diff --git a/rpython/jit/backend/ppc/locations.py b/rpython/jit/backend/ppc/locations.py
--- a/rpython/jit/backend/ppc/locations.py
+++ b/rpython/jit/backend/ppc/locations.py
@@ -1,9 +1,7 @@
from rpython.jit.metainterp.history import INT, FLOAT
import sys
-# TODO: solve the circular import: runner -> arch -> register -> locations ->
-# arch
-# XXX import from arch.py, currently we have a circular import
+# cannot import from arch.py, currently we have a circular import
if sys.maxint == (2**31 - 1):
WORD = 4
FWORD = 8
@@ -12,8 +10,6 @@
FWORD = 8
DWORD = 2 * WORD
-# JITFRAME_FIXED_SIZE is also duplicated because of the circular import
-JITFRAME_FIXED_SIZE = 27 + 31 + 1 + 4 + 1
class AssemblerLocation(object):
_immutable_ = True
@@ -148,11 +144,8 @@
def imm(val):
return ImmLocation(val)
-def get_spp_offset(pos):
- if pos < 0:
- return -pos * WORD
- else:
- return -(pos + 1) * WORD
-
def get_fp_offset(base_ofs, position):
- return base_ofs + position
+ from rpython.jit.backend.ppc.register import JITFRAME_FIXED_SIZE
+ # Argument is a frame position (0, 1, 2...).
+ # Returns the n'th word beyond the fixed frame size.
+ return base_ofs + WORD * (position + JITFRAME_FIXED_SIZE)
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -211,24 +211,24 @@
def emit_cast_float_to_int(self, op, arglocs, regalloc):
l0, temp_loc, res = arglocs
self.mc.fctidz(temp_loc.value, l0.value)
- self.mc.stfd(temp_loc.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
- self.mc.ld(res.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ self.mc.stfd(temp_loc.value, r.SP.value, -16)
+ self.mc.ld(res.value, r.SP.value, -16)
def emit_cast_int_to_float(self, op, arglocs, regalloc):
l0, temp_loc, res = arglocs
- self.mc.std(l0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
- self.mc.lfd(temp_loc.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ self.mc.std(l0.value, r.SP.value, -16)
+ self.mc.lfd(temp_loc.value, r.SP.value, -16)
self.mc.fcfid(res.value, temp_loc.value)
def emit_convert_float_bytes_to_longlong(self, op, arglocs, regalloc):
l0, res = arglocs
- self.mc.stfd(l0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
- self.mc.ld(res.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ self.mc.stfd(l0.value, r.SP.value, -16)
+ self.mc.ld(res.value, r.SP.value, -16)
def emit_convert_longlong_bytes_to_float(self, op, arglocs, regalloc):
l0, res = arglocs
- self.mc.std(l0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
- self.mc.lfd(res.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ self.mc.std(l0.value, r.SP.value, -16)
+ self.mc.lfd(res.value, r.SP.value, -16)
class GuardOpAssembler(object):
@@ -237,22 +237,22 @@
def _emit_guard(self, op, arglocs, fcond, save_exc=False,
is_guard_not_invalidated=False,
is_guard_not_forced=False):
- pos = self.mc.currpos()
- self.mc.nop() # has to be patched later on
token = self.build_guard_token(op, arglocs[0].value, arglocs[1:],
fcond, save_exc, is_guard_not_invalidated,
is_guard_not_forced)
- self.pending_guards.append(token)
+ token.pos_jump_offset = self.mc.currpos()
+ self.mc.nop() # has to be patched later on
+ self.pending_guard_tokens.append(token)
def build_guard_token(self, op, frame_depth, arglocs, fcond, save_exc,
is_guard_not_invalidated=False,
is_guard_not_forced=False):
descr = op.getdescr()
- offset = self.mc.currpos()
- gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
+ gcmap = allocate_gcmap(self, frame_depth, r.JITFRAME_FIXED_SIZE)
token = PPCGuardToken(self.cpu, gcmap, descr, op.getfailargs(),
arglocs, save_exc, frame_depth,
- is_guard_not_invalidated, is_guard_not_forced)
+ is_guard_not_invalidated, is_guard_not_forced,
+ fcond)
return token
def emit_guard_true(self, op, arglocs, regalloc):
@@ -356,7 +356,10 @@
base_ofs = self.cpu.get_baseofs_of_frame_field()
if len(arglocs) == 2:
[return_val, fail_descr_loc] = arglocs
- self.mc.std(return_val.value, r.SPP.value, base_ofs)
+ if op.getarg(0).type == FLOAT:
+ self.mc.stfd(return_val.value, r.SPP.value, base_ofs)
+ else:
+ self.mc.std(return_val.value, r.SPP.value, base_ofs)
else:
[fail_descr_loc] = arglocs
diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py
--- a/rpython/jit/backend/ppc/ppc_assembler.py
+++ b/rpython/jit/backend/ppc/ppc_assembler.py
@@ -7,12 +7,12 @@
LR_BC_OFFSET, REGISTERS_SAVED,
GPR_SAVE_AREA_OFFSET,
THREADLOCAL_ADDR_OFFSET,
- STD_FRAME_SIZE_IN_BYTES,
- JITFRAME_FIXED_SIZE)
+ STD_FRAME_SIZE_IN_BYTES)
from rpython.jit.backend.ppc.helper.assembler import Saved_Volatiles
from rpython.jit.backend.ppc.helper.regalloc import _check_imm_arg
import rpython.jit.backend.ppc.register as r
import rpython.jit.backend.ppc.condition as c
+from rpython.jit.backend.ppc.register import JITFRAME_FIXED_SIZE
from rpython.jit.metainterp.history import AbstractFailDescr
from rpython.jit.metainterp.history import ConstInt, BoxInt
from rpython.jit.backend.llsupport import jitframe
@@ -30,7 +30,7 @@
from rpython.rtyper.annlowlevel import llhelper
from rpython.rlib.objectmodel import we_are_translated, specialize
from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.jit.backend.ppc.locations import StackLocation, get_spp_offset, imm
+from rpython.jit.backend.ppc.locations import StackLocation, get_fp_offset, imm
from rpython.rlib.jit import AsmInfo
from rpython.rlib.objectmodel import compute_unique_id
from rpython.rlib.rarithmetic import r_uint
@@ -166,50 +166,29 @@
mc.addi(r.r15.value, r.r15.value, -2 * WORD) # SUB r15, r15, 2*WORD
mc.store(r.r15.value, r.r14.value, 0) # STR r15, [rootstacktop]
+ def new_stack_loc(self, i, tp):
+ base_ofs = self.cpu.get_baseofs_of_frame_field()
+ return StackLocation(i, get_fp_offset(base_ofs, i), tp)
+
def setup_failure_recovery(self):
-
- @rgc.no_collect
- def failure_recovery_func(mem_loc, spilling_pointer,
- managed_registers_pointer):
- """
- mem_loc is a pointer to the beginning of the encoding.
-
- spilling_pointer is the address of the spilling area.
- """
- regs = rffi.cast(rffi.LONGP, managed_registers_pointer)
- fpregs = rffi.ptradd(regs, len(r.MANAGED_REGS))
- fpregs = rffi.cast(rffi.LONGP, fpregs)
- return self.decode_registers_and_descr(mem_loc,
- spilling_pointer,
- regs, fpregs)
-
- self.failure_recovery_func = failure_recovery_func
- self.failure_recovery_code = [0, 0, 0]
-
- recovery_func_sign = lltype.Ptr(lltype.FuncType([lltype.Signed] * 3,
- lltype.Signed))
+ self.failure_recovery_code = [0, 0, 0, 0]
# TODO: see with we really need the ignored_regs argument
def _push_all_regs_to_jitframe(self, mc, ignored_regs, withfloats,
callee_only=False):
base_ofs = self.cpu.get_baseofs_of_frame_field()
if callee_only:
- # Only push registers used to pass arguments to the callee
- regs = r.VOLATILES
+ regs = XXX
else:
- regs = r.ALL_REGS
+ regs = r.MANAGED_REGS
# For now, just push all regs to the jitframe
- for i, reg in enumerate(regs):
- # XXX should we progress to higher addresses?
- mc.store_reg(reg, base_ofs - (i * WORD))
-
+ for reg in regs:
+ v = r.ALL_REG_INDEXES[reg]
+ mc.std(reg.value, r.SPP.value, base_ofs + v * WORD)
if withfloats:
- if callee_only:
- regs = r.VOLATILES_FLOAT
- else:
- regs = r.ALL_FLOAT_REGS
- for i, reg in enumerate(regs):
- pass # TODO find or create the proper store indexed for fpr's
+ for reg in r.MANAGED_FP_REGS:
+ v = r.ALL_REG_INDEXES[reg]
+ mc.stfd(reg.value, r.SPP.value, base_ofs + v * WORD)
def _pop_all_regs_from_jitframe(self, mc, ignored_regs, withfloats,
callee_only=False):
@@ -230,135 +209,36 @@
for i, reg in enumerate(regs):
pass # TODO find or create the proper load indexed for fpr's
- @rgc.no_collect
- def decode_registers_and_descr(self, mem_loc, spp, registers, fp_registers):
- """Decode locations encoded in memory at mem_loc and write the values
- to the failboxes. Values for spilled vars and registers are stored on
- stack at frame_loc """
- assert spp & 1 == 0
- self.fail_force_index = spp + FORCE_INDEX_OFS
- bytecode = rffi.cast(rffi.UCHARP, mem_loc)
- num = 0
- value = 0
- fvalue = 0
- code_inputarg = False
- while True:
- code = rffi.cast(lltype.Signed, bytecode[0])
- bytecode = rffi.ptradd(bytecode, 1)
- if code >= self.CODE_FROMSTACK:
- if code > 0x7F:
- shift = 7
- code &= 0x7F
- while True:
- nextcode = rffi.cast(lltype.Signed, bytecode[0])
- bytecode = rffi.ptradd(bytecode, 1)
- code |= (nextcode & 0x7F) << shift
- shift += 7
- if nextcode <= 0x7F:
- break
- # load the value from the stack
- kind = code & 3
- code = int((code - self.CODE_FROMSTACK) >> 2)
- if code_inputarg:
- code = ~code
- code_inputarg = False
- if kind == self.DESCR_FLOAT:
- start = spp + get_spp_offset(int(code))
- fvalue = rffi.cast(rffi.LONGP, start)[0]
- else:
- start = spp + get_spp_offset(int(code))
- value = rffi.cast(rffi.LONGP, start)[0]
- else:
- # 'code' identifies a register: load its value
- kind = code & 3
- if kind == self.DESCR_SPECIAL:
- if code == self.CODE_HOLE:
- num += 1
- continue
- if code == self.CODE_INPUTARG:
- code_inputarg = True
- continue
- assert code == self.CODE_STOP
- break
- code >>= 2
- if kind == self.DESCR_FLOAT:
- reg_index = r.get_managed_fpreg_index(code)
- fvalue = fp_registers[reg_index]
- else:
- reg_index = r.get_managed_reg_index(code)
- value = registers[reg_index]
- # store the loaded value into fail_boxes_<type>
- if kind == self.DESCR_FLOAT:
- tgt = self.fail_boxes_float.get_addr_for_num(num)
- rffi.cast(rffi.LONGP, tgt)[0] = fvalue
- else:
- if kind == self.DESCR_INT:
- tgt = self.fail_boxes_int.get_addr_for_num(num)
- elif kind == self.DESCR_REF:
- assert (value & 3) == 0, "misaligned pointer"
- tgt = self.fail_boxes_ptr.get_addr_for_num(num)
- else:
- assert 0, "bogus kind"
- rffi.cast(rffi.LONGP, tgt)[0] = value
- num += 1
- self.fail_boxes_count = num
- fail_index = rffi.cast(rffi.INTP, bytecode)[0]
- fail_index = rffi.cast(lltype.Signed, fail_index)
- return fail_index
+ def _build_failure_recovery(self, exc, withfloats=False):
+ mc = PPCBuilder()
+ self.mc = mc
- def decode_inputargs(self, code):
- descr_to_box_type = [REF, INT, FLOAT]
- bytecode = rffi.cast(rffi.UCHARP, code)
- arglocs = []
- code_inputarg = False
- while 1:
- # decode the next instruction from the bytecode
- code = rffi.cast(lltype.Signed, bytecode[0])
- bytecode = rffi.ptradd(bytecode, 1)
- if code >= self.CODE_FROMSTACK:
- # 'code' identifies a stack location
- if code > 0x7F:
- shift = 7
- code &= 0x7F
- while True:
- nextcode = rffi.cast(lltype.Signed, bytecode[0])
- bytecode = rffi.ptradd(bytecode, 1)
- code |= (nextcode & 0x7F) << shift
- shift += 7
- if nextcode <= 0x7F:
- break
- kind = code & 3
- code = (code - self.CODE_FROMSTACK) >> 2
- if code_inputarg:
- code = ~code
- code_inputarg = False
- loc = PPCFrameManager.frame_pos(code, descr_to_box_type[kind])
- elif code == self.CODE_STOP:
- break
- elif code == self.CODE_HOLE:
- continue
- elif code == self.CODE_INPUTARG:
- code_inputarg = True
- continue
- else:
- # 'code' identifies a register
- kind = code & 3
- code >>= 2
- if kind == self.DESCR_FLOAT:
- assert (r.ALL_FLOAT_REGS[code] is
- r.MANAGED_FP_REGS[r.get_managed_fpreg_index(code)])
- loc = r.ALL_FLOAT_REGS[code]
- else:
- #loc = r.all_regs[code]
- assert (r.ALL_REGS[code] is
- r.MANAGED_REGS[r.get_managed_reg_index(code)])
- loc = r.ALL_REGS[code]
- arglocs.append(loc)
- return arglocs[:]
+ # fill in the jf_descr and jf_gcmap fields of the frame according
+ # to which failure we are resuming from. These are set before
+ # this function is called (see generate_quick_failure()).
+ ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+ ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+ mc.store(r.r0.value, r.SPP.value, ofs)
+ mc.store(r.r2.value, r.SPP.value, ofs2)
- # TODO
- def _build_failure_recovery(self, exc, withfloats=False):
- pass
+ self._push_all_regs_to_jitframe(mc, [], withfloats)
+
+ if exc:
+ # We might have an exception pending. Load it into r2...
+ mc.write32(0)
+ #mc.MOV(ebx, heap(self.cpu.pos_exc_value()))
+ #mc.MOV(heap(self.cpu.pos_exception()), imm0)
+ #mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
+ ## ...and save ebx into 'jf_guard_exc'
+ #offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ #mc.MOV_br(offset, ebx.value)
+
+ # now we return from the complete frame, which starts from
+ # _call_header_with_stack_check(). The _call_footer below does it.
+ self._call_footer()
+ rawstart = mc.materialize(self.cpu, [])
+ self.failure_recovery_code[exc + 2 * withfloats] = rawstart
+ self.mc = None
# TODO
def build_frame_realloc_slowpath(self):
@@ -836,7 +716,7 @@
#
self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE,
rawstart)
- looptoken._ll_loop_code = looppos + rawstart
+ looptoken._ppc_loop_code = looppos + rawstart
debug_start("jit-backend-addr")
debug_print("Loop %d (%s) has address 0x%x to 0x%x (bootstrap 0x%x)" % (
looptoken.number, loopname,
@@ -881,56 +761,48 @@
frame_depth = max(frame_depth, target_frame_depth)
return frame_depth
- def assemble_bridge(self, faildescr, inputargs, operations, looptoken, log):
+ @rgc.no_release_gil
+ def assemble_bridge(self, faildescr, inputargs, operations,
+ original_loop_token, log, logger):
if not we_are_translated():
+ # Arguments should be unique
assert len(set(inputargs)) == len(inputargs)
- self.setup(looptoken)
+ self.setup(original_loop_token)
descr_number = compute_unique_id(faildescr)
if log:
operations = self._inject_debugging_code(faildescr, operations,
'b', descr_number)
- assert isinstance(faildescr, AbstractFailDescr)
+
arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
-
regalloc = Regalloc(assembler=self)
+ startpos = self.mc.get_relative_pos()
operations = regalloc.prepare_bridge(inputargs, arglocs,
operations,
self.current_clt.allgcrefs,
self.current_clt.frame_info)
-
- startpos = self.mc.currpos()
- spilling_area, param_depth = self._assemble(operations, regalloc)
- codeendpos = self.mc.currpos()
-
+ #self._check_frame_depth(self.mc, regalloc.get_gcmap())
+ frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
+ codeendpos = self.mc.get_relative_pos()
self.write_pending_failure_recoveries()
-
- rawstart = self.materialize_loop(looptoken, False)
- self.process_pending_guards(rawstart)
- self.patch_trace(faildescr, looptoken, rawstart, regalloc)
+ fullsize = self.mc.get_relative_pos()
+ #
+ rawstart = self.materialize_loop(original_loop_token)
+ self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE,
+ rawstart)
+ debug_bridge(descr_number, rawstart, codeendpos)
+ self.patch_pending_failure_recoveries(rawstart)
+ # patch the jump from original guard
+ self.patch_jump_for_descr(faildescr, rawstart)
+ ops_offset = self.mc.ops_offset
+ frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
+ frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
+ if logger:
+ logger.log_bridge(inputargs, operations, "rewritten",
+ ops_offset=ops_offset)
self.fixup_target_tokens(rawstart)
- self.current_clt.frame_depth = max(self.current_clt.frame_depth,
- spilling_area)
- self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
-
- if not we_are_translated():
- # for the benefit of tests
- faildescr._ppc_bridge_frame_depth = self.current_clt.frame_depth
- faildescr._ppc_bridge_param_depth = self.current_clt.param_depth
- if log:
- self.mc._dump_trace(rawstart, 'bridge_%d.asm' %
- self.cpu.total_compiled_bridges)
-
- self._patch_sp_offset(sp_patch_location, rawstart)
-
- ops_offset = self.mc.ops_offset
+ self.update_frame_depth(frame_depth)
self.teardown()
-
- debug_start("jit-backend-addr")
- debug_print("bridge out of Guard %d has address %x to %x" %
- (descr_number, rawstart, rawstart + codeendpos))
- debug_stop("jit-backend-addr")
-
return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
def _patch_sp_offset(self, sp_patch_location, rawstart):
@@ -1053,21 +925,20 @@
#print "=== Loop start is at %s ===" % hex(r_uint(start))
return start
- def push_gcmap(self, mc, gcmap, push=False, store=False):
+ def load_gcmap(self, mc, gcmap):
+ # load the current gcmap into register r2
ptr = rffi.cast(lltype.Signed, gcmap)
- if push:
- with scratch_reg(mc):
- mc.load_imm(r.SCRATCH, ptr)
- mc.stdu(r.SCRATCH.value, r.SP.value, -WORD)
- elif store:
- assert False, "Not implemented"
+ mc.load_imm(r.r2, ptr)
def generate_quick_failure(self, guardtok):
startpos = self.mc.currpos()
fail_descr, target = self.store_info_on_descr(startpos, guardtok)
- self.regalloc_push(imm(fail_descr))
- self.push_gcmap(self.mc, gcmap=guardtok.gcmap, push=True)
- self.mc.call(target)
+ assert target != 0
+ self.load_gcmap(self.mc, gcmap=guardtok.gcmap) # -> r2
+ self.mc.load_imm(r.r0, target)
+ self.mc.mtctr(r.r0.value)
+ self.mc.load_imm(r.r0, fail_descr)
+ self.mc.bctr()
return startpos
def write_pending_failure_recoveries(self):
@@ -1079,32 +950,37 @@
def patch_pending_failure_recoveries(self, rawstart):
clt = self.current_clt
for tok in self.pending_guard_tokens:
- xxxxxxxxx
-
- def process_pending_guards(self, block_start):
- clt = self.current_clt
- for tok in self.pending_guards:
- descr = tok.faildescr
- assert isinstance(descr, AbstractFailDescr)
- descr._ppc_block_start = block_start
-
+ addr = rawstart + tok.pos_jump_offset
+ #
+ # XXX see patch_jump_for_descr()
+ #tok.faildescr.adr_jump_offset = addr
+ tok.faildescr.adr_recovery_stub = rawstart + tok.pos_recovery_stub
+ #
+ relative_target = tok.pos_recovery_stub - tok.pos_jump_offset
+ #
if not tok.is_guard_not_invalidated:
mc = PPCBuilder()
- offset = tok.pos_recovery_stub - tok.offset
- mc.b_cond_offset(offset, tok.fcond)
- mc.copy_to_raw_memory(block_start + tok.offset)
+ mc.b_cond_offset(relative_target, tok.fcond)
+ mc.copy_to_raw_memory(addr)
else:
- clt.invalidate_positions.append((block_start + tok.offset,
- descr._ppc_guard_pos - tok.offset))
+ # GUARD_NOT_INVALIDATED, record an entry in
+ # clt.invalidate_positions of the form:
+ # (addr-in-the-code-of-the-not-yet-written-jump-target,
+ # relative-target-to-use)
+ relpos = tok.pos_jump_offset
+ clt.invalidate_positions.append((rawstart + relpos,
+ relative_target))
- def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
- # The first instruction (word) is not overwritten, because it is the
- # one that actually checks the condition
+ def patch_jump_for_descr(self, faildescr, adr_new_target):
+ # 'faildescr.adr_jump_offset' is the address of an instruction that is a
+ # conditional jump. We must patch this conditional jump to go
+ # to 'adr_new_target'. If the target is too far away, we can't
+ # patch it inplace, and instead we patch the quick failure code
+ # (which should be at least 5 instructions, so enough).
+ # --- XXX for now we always use the second solution ---
mc = PPCBuilder()
- patch_addr = faildescr._ppc_block_start + faildescr._ppc_guard_pos
- mc.b_abs(bridge_addr)
- mc.copy_to_raw_memory(patch_addr)
- faildescr._failure_recovery_code_ofs = 0
+ mc.b_abs(adr_new_target)
+ mc.copy_to_raw_memory(faildescr.adr_recovery_stub)
def get_asmmemmgr_blocks(self, looptoken):
clt = looptoken.compiled_loop_token
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -5,7 +5,7 @@
from rpython.jit.codewriter import longlong
from rpython.jit.backend.ppc.jump import (remap_frame_layout,
remap_frame_layout_mixed)
-from rpython.jit.backend.ppc.locations import imm, get_fp_offset, get_spp_offset
+from rpython.jit.backend.ppc.locations import imm, get_fp_offset
from rpython.jit.backend.ppc.helper.regalloc import (_check_imm_arg,
prepare_cmp_op,
prepare_unary_int_op,
@@ -192,7 +192,7 @@
@staticmethod
def get_loc_index(loc):
- assert loc.is_stack()
+ assert isinstance(loc, locations.StackLocation)
return loc.position
class Regalloc(BaseRegalloc):
@@ -227,28 +227,41 @@
# note: we need to make a copy of inputargs because possibly_free_vars
# is also used on op args, which is a non-resizable list
self.possibly_free_vars(list(inputargs))
+ self.min_bytes_before_label = 4 # for redirect_call_assembler()
return operations
- def prepare_bridge(self, inputargs, arglocs, ops):
- self._prepare(inputargs, ops)
+ def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs,
+ frame_info):
+ operations = self._prepare(inputargs, operations, allgcrefs)
self._update_bindings(arglocs, inputargs)
+ self.min_bytes_before_label = 0
+ return operations
+
+ def ensure_next_label_is_at_least_at_position(self, at_least_position):
+ self.min_bytes_before_label = max(self.min_bytes_before_label,
+ at_least_position)
def _update_bindings(self, locs, inputargs):
+ # XXX this should probably go to llsupport/regalloc.py
used = {}
i = 0
for loc in locs:
+ if loc is None: # xxx bit kludgy
+ loc = r.SPP
arg = inputargs[i]
i += 1
if loc.is_reg():
- self.rm.reg_bindings[arg] = loc
+ if loc is r.SPP:
+ self.rm.bindings_to_frame_reg[arg] = None
+ else:
+ self.rm.reg_bindings[arg] = loc
+ used[loc] = None
elif loc.is_fp_reg():
self.fprm.reg_bindings[arg] = loc
+ used[loc] = None
else:
assert loc.is_stack()
- self.frame_manager.set_binding(arg, loc)
- used[loc] = None
-
- # XXX combine with x86 code and move to llsupport
+ self.fm.bind(arg, loc)
self.rm.free_regs = []
for reg in self.rm.all_regs:
if reg not in used:
@@ -257,9 +270,10 @@
for reg in self.fprm.all_regs:
if reg not in used:
self.fprm.free_regs.append(reg)
- # note: we need to make a copy of inputargs because possibly_free_vars
- # is also used on op args, which is a non-resizable list
self.possibly_free_vars(list(inputargs))
+ self.fm.finish_binding()
+ self.rm._check_invariants()
+ self.fprm._check_invariants()
def get_final_frame_depth(self):
return self.fm.get_frame_depth()
@@ -317,7 +331,12 @@
i += 1
self.possibly_free_vars_for_op(op)
continue
- if self.can_merge_with_next_guard(op, i, operations):
+ if self.can_merge_with_next_guard(op, i, operations) and (
+ # XXX FIX
+ op.getopnum() in (rop.CALL_RELEASE_GIL, rop.CALL_ASSEMBLER,
+ rop.CALL_MAY_FORCE)
+ # XXX FIX
+ ):
arglocs = oplist_with_guard[op.getopnum()](self, op,
operations[i + 1])
assert arglocs is not None
@@ -339,11 +358,18 @@
i += 1
assert not self.rm.reg_bindings
assert not self.fprm.reg_bindings
- #self.flush_loop()
+ self.flush_loop()
self.assembler.mc.mark_op(None) # end of the loop
for arg in inputargs:
self.possibly_free_var(arg)
+ def flush_loop(self):
+ # Emit a nop in the rare case where we have a guard_not_invalidated
+ # immediately before a label
+ mc = self.assembler.mc
+ while self.min_bytes_before_label > mc.get_relative_pos():
+ mc.nop()
+
def loc(self, var):
if var.type == FLOAT:
return self.fprm.loc(var)
@@ -360,6 +386,10 @@
else:
self.rm.force_spill_var(var)
+ def _consider_force_spill(self, op):
+ # This operation is used only for testing
+ self.force_spill_var(op.getarg(0))
+
def before_call(self, force_store=[], save_all_regs=False):
self.rm.before_call(force_store, save_all_regs)
self.fprm.before_call(force_store, save_all_regs)
@@ -561,12 +591,13 @@
def _prepare_guard(self, op, args=None):
if args is None:
args = []
- args.append(imm(len(self.frame_manager.used)))
+ args.append(imm(self.fm.get_frame_depth()))
for arg in op.getfailargs():
if arg:
args.append(self.loc(arg))
else:
args.append(None)
+ self.possibly_free_vars(op.getfailargs())
return args
def prepare_guard_true(self, op):
@@ -695,7 +726,7 @@
if isinstance(box, Box):
loc = arglocs[i]
if loc is not None and loc.is_stack():
- self.frame_manager.hint_frame_locations[box] = loc
+ self.fm.hint_frame_pos[box] = self.fm.get_loc_index(loc)
def prepare_jump(self, op):
descr = op.getdescr()
@@ -1067,7 +1098,6 @@
return [res_loc]
def prepare_label(self, op):
- # XXX big refactoring needed?
descr = op.getdescr()
assert isinstance(descr, TargetToken)
inputargs = op.getarglist()
@@ -1082,15 +1112,26 @@
assert isinstance(arg, Box)
if self.last_real_usage.get(arg, -1) <= position:
self.force_spill_var(arg)
-
+ #
+ # we need to make sure that no variable is stored in spp (=r31)
+ for arg in inputargs:
+ if self.loc(arg) is r.SPP:
+ loc2 = self.fm.loc(arg)
+ self.assembler.mc.store(r.SPP, loc2)
+ self.rm.bindings_to_frame_reg.clear()
#
for i in range(len(inputargs)):
arg = inputargs[i]
assert isinstance(arg, Box)
loc = self.loc(arg)
+ assert loc is not r.SPP
arglocs[i] = loc
if loc.is_reg():
- self.frame_manager.mark_as_free(arg)
+ self.fm.mark_as_free(arg)
+ #
+ # if we are too close to the start of the loop, the label's target may
+ # get overridden by redirect_call_assembler(). (rare case)
+ self.flush_loop()
#
descr._ppc_arglocs = arglocs
descr._ppc_loop_code = self.assembler.mc.currpos()
diff --git a/rpython/jit/backend/ppc/register.py b/rpython/jit/backend/ppc/register.py
--- a/rpython/jit/backend/ppc/register.py
+++ b/rpython/jit/backend/ppc/register.py
@@ -37,6 +37,16 @@
MANAGED_FP_REGS = VOLATILES_FLOAT[1:] #+ NONVOLATILES_FLOAT
+
+# The JITFRAME_FIXED_SIZE is measured in words, and should be the
+# number of registers that need to be saved into the jitframe when
+# failing a guard, for example.
+ALL_REG_INDEXES = {}
+for _r in MANAGED_REGS + MANAGED_FP_REGS:
+ ALL_REG_INDEXES[_r] = len(ALL_REG_INDEXES)
+JITFRAME_FIXED_SIZE = len(ALL_REG_INDEXES)
+
+
PARAM_REGS = [r3, r4, r5, r6, r7, r8, r9, r10]
PARAM_FPREGS = [f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13]
diff --git a/rpython/jit/backend/ppc/runner.py b/rpython/jit/backend/ppc/runner.py
--- a/rpython/jit/backend/ppc/runner.py
+++ b/rpython/jit/backend/ppc/runner.py
@@ -18,8 +18,13 @@
IS_64_BIT = True
BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
+ from rpython.jit.backend.ppc.register import JITFRAME_FIXED_SIZE
frame_reg = r.SP
- all_reg_indexes = range(len(r.ALL_REGS))
+ all_reg_indexes = [-1] * 32
+ for _i, _r in enumerate(r.MANAGED_REGS):
+ all_reg_indexes[_r.value] = _i
+ gen_regs = r.MANAGED_REGS
+ float_regs = r.MANAGED_FP_REGS
def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
gcdescr=None):
@@ -31,7 +36,7 @@
AbstractLLCPU.__init__(self, rtyper, stats, opts,
translate_support_code, gcdescr)
- # floats are not supported yet
+ # floats are supported. singlefloats are not supported yet
self.supports_floats = True
def setup(self):
@@ -44,11 +49,11 @@
self.assembler.finish_once()
def compile_bridge(self, faildescr, inputargs, operations,
- original_loop_token, log=False):
+ original_loop_token, log=True, logger=None):
clt = original_loop_token.compiled_loop_token
clt.compiling_a_bridge()
return self.assembler.assemble_bridge(faildescr, inputargs, operations,
- original_loop_token, log=log)
+ original_loop_token, log, logger)
@staticmethod
def cast_ptr_to_int(x):
diff --git a/rpython/jit/backend/ppc/test/test_runner.py b/rpython/jit/backend/ppc/test/test_runner.py
--- a/rpython/jit/backend/ppc/test/test_runner.py
+++ b/rpython/jit/backend/ppc/test/test_runner.py
@@ -76,10 +76,11 @@
ARGS = [lltype.Signed] * numargs
RES = lltype.Signed
args = [i+1 for i in range(numargs)]
- res = self.cpu.execute_token(looptoken, *args)
- assert res is faildescr
+ deadframe = self.cpu.execute_token(looptoken, *args)
+ fail = self.cpu.get_latest_descr(deadframe)
+ assert fail is faildescr
for i in range(numargs):
- assert self.cpu.get_latest_value_int(i) == i + 1
+ assert self.cpu.get_int_value(deadframe, i) == i + 1
bridgeops = [arglist]
bridgeops.append("guard_value(i1, -5) %s" % arglist)
@@ -88,12 +89,14 @@
faildescr2 = bridge.operations[-1].getdescr()
self.cpu.compile_bridge(faildescr, bridge.inputargs, bridge.operations, looptoken)
- res2 = self.cpu.execute_token(looptoken, *args)
- assert res2 is faildescr2
+ deadframe = self.cpu.execute_token(looptoken, *args)
+ fail = self.cpu.get_latest_descr(deadframe)
+ assert fail is faildescr2
for i in range(numargs):
- assert self.cpu.get_latest_value_int(i) == i + 1
+ assert self.cpu.get_int_value(deadframe, i) == i + 1
def test_unicodesetitem_really_needs_temploc(self):
+ py.test.skip("XXX")
u_box = self.alloc_unicode(u"abcdsdasdsaddefg")
i0 = BoxInt()
@@ -128,6 +131,7 @@
assert self.cpu.get_latest_value_int(i) == args[i]
def test_debugger_on(self):
+ py.test.skip("XXX")
from pypy.rlib import debug
targettoken, preambletoken = TargetToken(), TargetToken()
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -230,6 +230,8 @@
self.cpu.compile_loop(inputargs, operations, looptoken)
if hasattr(looptoken, '_x86_ops_offset'):
del looptoken._x86_ops_offset # else it's kept alive
+ if hasattr(looptoken, '_ppc_ops_offset'):
+ del looptoken._ppc_ops_offset # else it's kept alive
del i0, i1, i2
del inputargs
del operations
More information about the pypy-commit
mailing list