[pypy-commit] pypy ppc-updated-backend: copy copy copy from x86
arigo
noreply at buildbot.pypy.org
Fri Aug 21 10:50:45 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: ppc-updated-backend
Changeset: r79094:e7a2b3e7bbff
Date: 2015-08-20 09:14 -0700
http://bitbucket.org/pypy/pypy/changeset/e7a2b3e7bbff/
Log: copy copy copy from x86
diff --git a/rpython/jit/backend/ppc/arch.py b/rpython/jit/backend/ppc/arch.py
--- a/rpython/jit/backend/ppc/arch.py
+++ b/rpython/jit/backend/ppc/arch.py
@@ -61,15 +61,21 @@
# register r31.
+LR_BC_OFFSET = 16
+PARAM_SAVE_AREA_OFFSET = 48
+THREADLOCAL_ADDR_OFFSET = 112
+GPR_SAVE_AREA_OFFSET = 120
+
REGISTERS_SAVED = [r.r25, r.r26, r.r27, r.r28, r.r29, r.r30, r.r31]
assert REGISTERS_SAVED == [_r for _r in r.NONVOLATILES
if _r in r.MANAGED_REGS or _r == r.r31]
-STD_FRAME_SIZE_IN_BYTES = 120 + len(REGISTERS_SAVED) * WORD
+STD_FRAME_SIZE_IN_BYTES = GPR_SAVE_AREA_OFFSET + len(REGISTERS_SAVED) * WORD
assert STD_FRAME_SIZE_IN_BYTES % 16 == 0
-# offset to LR in BACKCHAIN
-if IS_PPC_32:
- LR_BC_OFFSET = WORD
-else:
- LR_BC_OFFSET = 16
+
+# The JITFRAME_FIXED_SIZE is measured in words, and should be the
+# number of registers that need to be saved into the jitframe when
+# failing a guard, for example. (Note: it is about the jitframe,
+# not the frame.)
+JITFRAME_FIXED_SIZE = len(r.MANAGED_REGS) + len(r.MANAGED_FP_REGS)
diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py
--- a/rpython/jit/backend/ppc/ppc_assembler.py
+++ b/rpython/jit/backend/ppc/ppc_assembler.py
@@ -4,8 +4,11 @@
from rpython.jit.backend.ppc.codebuilder import (PPCBuilder, OverwritingBuilder,
scratch_reg)
from rpython.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD,
- LR_BC_OFFSET,
- STD_FRAME_SIZE_IN_BYTES)
+ LR_BC_OFFSET, REGISTERS_SAVED,
+ GPR_SAVE_AREA_OFFSET,
+ THREADLOCAL_ADDR_OFFSET,
+ STD_FRAME_SIZE_IN_BYTES,
+ JITFRAME_FIXED_SIZE)
from rpython.jit.backend.ppc.helper.assembler import Saved_Volatiles
from rpython.jit.backend.ppc.helper.regalloc import _check_imm_arg
import rpython.jit.backend.ppc.register as r
@@ -30,6 +33,7 @@
from rpython.jit.backend.ppc.locations import StackLocation, get_spp_offset, imm
from rpython.rlib.jit import AsmInfo
from rpython.rlib.objectmodel import compute_unique_id
+from rpython.rlib.rarithmetic import r_uint
memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
rffi.SIZE_T], lltype.Void,
@@ -128,6 +132,7 @@
# The code generated here allocates a new stackframe
# and is the first machine code to be executed.
def _make_frame(self, frame_depth):
+ XXX
self.mc.make_function_prologue(frame_depth)
# save SPP at the bottom of the stack frame
@@ -683,26 +688,20 @@
self.mc.store(r.SCRATCH.value, r.SP.value,
STD_FRAME_SIZE_IN_BYTES + LR_BC_OFFSET)
- XXXX
- # save SPP at the bottom of the stack frame
- self.mc.store(r.SPP.value, r.SP.value, WORD)
+ # save registers r25 to r31
+ for i, reg in enumerate(REGISTERS_SAVED):
+ self.mc.store(reg.value, r.SP.value,
+ GPR_SAVE_AREA_OFFSET + i * WORD)
- # compute spilling pointer (SPP)
- self.mc.addi(r.SPP.value, r.SP.value,
- frame_depth - self.OFFSET_SPP_TO_OLD_BACKCHAIN)
+ # save r4, the second argument, to THREADLOCAL_ADDR_OFFSET
+ self.mc.store(r.r4.value, r.SP.value, THREADLOCAL_ADDR_OFFSET)
- # save nonvolatile registers
- self._save_nonvolatiles()
+ # move r3, the first argument, to r31 (SPP): the jitframe object
+ self.mc.mr(r.SPP.value, r.r3.value)
- # save r31, use r30 as scratch register
- # this is safe because r30 has been saved already
- assert NONVOLATILES[-1] == r.SPP
- ofs_to_r31 = (self.OFFSET_SPP_TO_GPR_SAVE_AREA +
- WORD * (len(NONVOLATILES)-1))
- self.mc.load(r.r30.value, r.SP.value, WORD)
- self.mc.store(r.r30.value, r.SPP.value, ofs_to_r31)
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
+ XXX
self.gen_shadowstack_header(gcrootmap)
def _call_header_with_stack_check(self):
@@ -813,6 +812,10 @@
baseofs = self.cpu.get_baseofs_of_frame_field()
self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
+ def patch_stack_checks(self, framedepth, rawstart):
+ for ofs in self.frame_depth_to_patch:
+ self._patch_frame_depth(ofs + rawstart, framedepth)
+
@rgc.no_release_gil
def assemble_loop(self, jd_id, unique_id, logger, loopname, inputargs,
operations, looptoken, log):
@@ -864,9 +867,9 @@
ops_offset = self.mc.ops_offset
if not we_are_translated():
# used only by looptoken.dump() -- useful in tests
- looptoken._x86_rawstart = rawstart
- looptoken._x86_fullsize = full_size
- looptoken._x86_ops_offset = ops_offset
+ looptoken._ppc_rawstart = rawstart
+ looptoken._ppc_fullsize = full_size
+ looptoken._ppc_ops_offset = ops_offset
looptoken._ll_function_addr = rawstart
if logger:
logger.log_loop(inputargs, operations, 0, "rewritten",
@@ -875,26 +878,26 @@
self.fixup_target_tokens(rawstart)
self.teardown()
# oprofile support
- if self.cpu.profile_agent is not None:
- name = "Loop # %s: %s" % (looptoken.number, loopname)
- self.cpu.profile_agent.native_code_written(name,
- rawstart, full_size)
+ #if self.cpu.profile_agent is not None:
+ # name = "Loop # %s: %s" % (looptoken.number, loopname)
+ # self.cpu.profile_agent.native_code_written(name,
+ # rawstart, full_size)
return AsmInfo(ops_offset, rawstart + looppos,
size_excluding_failure_stuff - looppos)
- def _assemble(self, operations, regalloc):
+ def _assemble(self, regalloc, inputargs, operations):
+ self._regalloc = regalloc
regalloc.compute_hint_frame_locations(operations)
- self._walk_operations(operations, regalloc)
- frame_depth = regalloc.frame_manager.get_frame_depth()
- param_depth = self.max_stack_params
+ regalloc.walk_operations(inputargs, operations)
+ if 1: # we_are_translated() or self.cpu.dont_keepalive_stuff:
+ self._regalloc = None # else keep it around for debugging
+ frame_depth = regalloc.get_final_frame_depth()
jump_target_descr = regalloc.jump_target_descr
if jump_target_descr is not None:
- frame_depth = max(frame_depth,
- jump_target_descr._ppc_clt.frame_depth)
- param_depth = max(param_depth,
- jump_target_descr._ppc_clt.param_depth)
- return frame_depth, param_depth
-
+ tgt_depth = jump_target_descr._ppc_clt.frame_info.jfi_frame_depth
+ target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE
+ frame_depth = max(frame_depth, target_frame_depth)
+ return frame_depth
def assemble_bridge(self, faildescr, inputargs, operations, looptoken, log):
if not we_are_translated():
@@ -1032,64 +1035,6 @@
assert self.datablockwrapper is None
self.max_stack_params = 0
- def _walk_operations(self, operations, regalloc):
- self._regalloc = regalloc
- while regalloc.position() < len(operations) - 1:
- regalloc.next_instruction()
- pos = regalloc.position()
- op = operations[pos]
- opnum = op.getopnum()
- if op.has_no_side_effect() and op.result not in regalloc.longevity:
- regalloc.possibly_free_vars_for_op(op)
- elif self.can_merge_with_next_guard(op, pos, operations)\
- and opnum in (rop.CALL_RELEASE_GIL, rop.CALL_ASSEMBLER,\
- rop.CALL_MAY_FORCE): # XXX fix
- guard = operations[pos + 1]
- assert guard.is_guard()
- arglocs = regalloc.operations_with_guard[opnum](regalloc, op,
- guard)
- operations_with_guard[opnum](self, op,
- guard, arglocs, regalloc)
- regalloc.next_instruction()
- regalloc.possibly_free_vars_for_op(guard)
- regalloc.possibly_free_vars(guard.getfailargs())
- elif not we_are_translated() and op.getopnum() == -124:
- regalloc.prepare_force_spill(op)
- else:
- arglocs = regalloc.operations[opnum](regalloc, op)
- if arglocs is not None:
- self.operations[opnum](self, op, arglocs, regalloc)
- if op.is_guard():
- regalloc.possibly_free_vars(op.getfailargs())
- if op.result:
- regalloc.possibly_free_var(op.result)
- regalloc.possibly_free_vars_for_op(op)
- regalloc.free_temp_vars()
- regalloc._check_invariants()
-
- def can_merge_with_next_guard(self, op, i, operations):
- if (op.getopnum() == rop.CALL_MAY_FORCE or
- op.getopnum() == rop.CALL_ASSEMBLER or
- op.getopnum() == rop.CALL_RELEASE_GIL):
- assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
- return True
- if not op.is_comparison():
- if op.is_ovf():
- if (operations[i + 1].getopnum() != rop.GUARD_NO_OVERFLOW and
- operations[i + 1].getopnum() != rop.GUARD_OVERFLOW):
- assert 0, "int_xxx_ovf not followed by guard_(no)_overflow"
- return True
- return False
- if (operations[i + 1].getopnum() != rop.GUARD_TRUE and
- operations[i + 1].getopnum() != rop.GUARD_FALSE):
- return False
- if operations[i + 1].getarg(0) is not op.result:
- return False
- if (self._regalloc.longevity[op.result][1] > i + 1 or
- op.result in operations[i + 1].getfailargs()):
- return False
- return True
-
def gen_64_bit_func_descr(self):
return self.datablockwrapper.malloc_aligned(3*WORD, alignment=1)
@@ -1138,7 +1083,6 @@
allblocks = self.get_asmmemmgr_blocks(looptoken)
start = self.mc.materialize(self.cpu, allblocks,
self.cpu.gc_ll_descr.gcrootmap)
- #from pypy.rlib.rarithmetic import r_uint
#print "=== Loop start is at %s ===" % hex(r_uint(start))
return start
@@ -1160,9 +1104,16 @@
return startpos
def write_pending_failure_recoveries(self):
- for tok in self.pending_guards:
+ # for each pending guard, generate the code of the recovery stub
+ # at the end of self.mc.
+ for tok in self.pending_guard_tokens:
tok.pos_recovery_stub = self.generate_quick_failure(tok)
+ def patch_pending_failure_recoveries(self, rawstart):
+ clt = self.current_clt
+ for tok in self.pending_guard_tokens:
+ xxxxxxxxx
+
def process_pending_guards(self, block_start):
clt = self.current_clt
for tok in self.pending_guards:
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -198,26 +198,35 @@
def __init__(self, assembler=None):
self.cpu = assembler.cpu
- self.frame_manager = PPCFrameManager(self.cpu.get_baseofs_of_frame_field())
+ #self.frame_manager = PPCFrameManager(self.cpu.get_baseofs_of_frame_field())
self.assembler = assembler
self.jump_target_descr = None
self.final_jump_op = None
- def _prepare(self, inputargs, operations):
- self.fm = self.frame_manager
- longevity, last_real_usage = compute_vars_longevity(inputargs,
- operations)
+ def _prepare(self, inputargs, operations, allgcrefs):
+ cpu = self.assembler.cpu
+ self.fm = PPCFrameManager(cpu.get_baseofs_of_frame_field())
+ operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+ allgcrefs)
+ # compute longevity of variables
+ longevity, last_real_usage = compute_vars_longevity(
+ inputargs, operations)
self.longevity = longevity
self.last_real_usage = last_real_usage
- fm = self.frame_manager
- asm = self.assembler
- self.fprm = FPRegisterManager(longevity, fm, asm)
- self.rm = PPCRegisterManager(longevity, fm, asm)
+ self.rm = PPCRegisterManager(self.longevity,
+ frame_manager = self.fm,
+ assembler = self.assembler)
+ self.fprm = FPRegisterManager(self.longevity, frame_manager = self.fm,
+ assembler = self.assembler)
+ return operations
- def prepare_loop(self, inputargs, operations, looptoken):
- self._prepare(inputargs, operations)
+ def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+ operations = self._prepare(inputargs, operations, allgcrefs)
self._set_initial_bindings(inputargs, looptoken)
- self.possibly_free_vars(inputargs)
+ # note: we need to make a copy of inputargs because possibly_free_vars
+ # is also used on op args, which is a non-resizable list
+ self.possibly_free_vars(list(inputargs))
+ return operations
def prepare_bridge(self, inputargs, arglocs, ops):
self._prepare(inputargs, ops)
@@ -251,6 +260,9 @@
# is also used on op args, which is a non-resizable list
self.possibly_free_vars(list(inputargs))
+ def get_final_frame_depth(self):
+ return self.fm.get_frame_depth()
+
def possibly_free_var(self, var):
if var.type == FLOAT:
self.fprm.possibly_free_var(var)
@@ -289,9 +301,35 @@
forbidden_vars=forbidden_vars,
selected_reg=selected_reg)
- def _check_invariants(self):
- self.rm._check_invariants()
- self.fprm._check_invariants()
+ def walk_operations(self, inputargs, operations):
+ i = 0
+ #self.operations = operations
+ while i < len(operations):
+ op = operations[i]
+ self.assembler.mc.mark_op(op)
+ self.rm.position = i
+ self.fprm.position = i
+ if op.has_no_side_effect() and op.result not in self.longevity:
+ i += 1
+ self.possibly_free_vars_for_op(op)
+ continue
+ if self.can_merge_with_next_guard(op, i, operations):
+ oplist_with_guard[op.getopnum()](self, op, operations[i + 1])
+ i += 1
+ elif not we_are_translated() and op.getopnum() == -124:
+ self._consider_force_spill(op)
+ else:
+ oplist[op.getopnum()](self, op)
+ self.possibly_free_vars_for_op(op)
+ self.rm._check_invariants()
+ self.fprm._check_invariants()
+ i += 1
+ assert not self.rm.reg_bindings
+ assert not self.fprm.reg_bindings
+ #self.flush_loop()
+ self.assembler.mc.mark_op(None) # end of the loop
+ for arg in inputargs:
+ self.possibly_free_var(arg)
def loc(self, var):
if var.type == FLOAT:
@@ -299,9 +337,6 @@
else:
return self.rm.loc(var)
- def position(self):
- return self.rm.position
-
def next_instruction(self):
self.rm.next_instruction()
self.fprm.next_instruction()
@@ -681,11 +716,10 @@
return []
def prepare_setfield_gc(self, op):
- boxes = op.getarglist()
- a0, a1 = boxes
ofs, size, sign = unpack_fielddescr(op.getdescr())
- base_loc = self._ensure_value_is_boxed(a0, boxes)
- value_loc = self._ensure_value_is_boxed(a1, boxes)
+ args = op.getarglist()
+ base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
+ value_loc = self.make_sure_var_in_reg(op.getarg(1), args)
if _check_imm_arg(ofs):
ofs_loc = imm(ofs)
else:
@@ -1103,8 +1137,8 @@
-operations = [notimplemented] * (rop._LAST + 1)
-operations_with_guard = [notimplemented_with_guard] * (rop._LAST + 1)
+oplist = [notimplemented] * (rop._LAST + 1)
+oplist_with_guard = [notimplemented_with_guard] * (rop._LAST + 1)
def get_scale(size):
scale = 0
@@ -1120,7 +1154,7 @@
methname = 'prepare_%s' % key
if hasattr(Regalloc, methname):
func = getattr(Regalloc, methname).im_func
- operations[value] = func
+ oplist[value] = func
for key, value in rop.__dict__.items():
key = key.lower()
@@ -1129,8 +1163,5 @@
methname = 'prepare_guard_%s' % key
if hasattr(Regalloc, methname):
func = getattr(Regalloc, methname).im_func
- operations_with_guard[value] = func
- operations[value] = add_none_argument(func)
-
-Regalloc.operations = operations
-Regalloc.operations_with_guard = operations_with_guard
+ oplist_with_guard[value] = func
+ oplist[value] = add_none_argument(func)
More information about the pypy-commit
mailing list