[pypy-commit] pypy s390x-backend: rewritten many calls to use one stack frame less
plan_rich
pypy.commits at gmail.com
Mon Jan 25 07:33:03 EST 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: s390x-backend
Changeset: r81928:6a1b2984c003
Date: 2016-01-25 13:31 +0100
http://bitbucket.org/pypy/pypy/changeset/6a1b2984c003/
Log: rewritten many calls to use one stack frame less
diff --git a/rpython/jit/backend/zarch/arch.py b/rpython/jit/backend/zarch/arch.py
--- a/rpython/jit/backend/zarch/arch.py
+++ b/rpython/jit/backend/zarch/arch.py
@@ -34,7 +34,8 @@
# in reverse order to SP
STD_FRAME_SIZE_IN_BYTES = 160
-THREADLOCAL_ADDR_OFFSET = 16 # at position of r2, but r2 is never saved!!
+THREADLOCAL_ON_ENTER_JIT = 8
+THREADLOCAL_ADDR_OFFSET = STD_FRAME_SIZE_IN_BYTES + THREADLOCAL_ON_ENTER_JIT
assert STD_FRAME_SIZE_IN_BYTES % 2 == 0
diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py
--- a/rpython/jit/backend/zarch/assembler.py
+++ b/rpython/jit/backend/zarch/assembler.py
@@ -16,7 +16,8 @@
from rpython.jit.backend.zarch.arch import (WORD,
STD_FRAME_SIZE_IN_BYTES, THREADLOCAL_ADDR_OFFSET,
RECOVERY_GCMAP_POOL_OFFSET, RECOVERY_TARGET_POOL_OFFSET,
- JUMPABS_TARGET_ADDR__POOL_OFFSET, JUMPABS_POOL_ADDR_POOL_OFFSET)
+ JUMPABS_TARGET_ADDR__POOL_OFFSET, JUMPABS_POOL_ADDR_POOL_OFFSET,
+ THREADLOCAL_ON_ENTER_JIT)
from rpython.jit.backend.zarch.opassembler import OpAssembler
from rpython.jit.backend.zarch.regalloc import Regalloc
from rpython.jit.codewriter.effectinfo import EffectInfo
@@ -382,7 +383,7 @@
"""
# signature of these cond_call_slowpath functions:
# * on entry, r12 contains the function to call
- # * r3, r4, r5, r6 contain arguments for the call
+ # * r2, r3, r4, r5 contain arguments for the call
# * r0 is the gcmap
# * the old value of these regs must already be stored in the jitframe
# * on exit, all registers are restored from the jitframe
@@ -391,6 +392,8 @@
self.mc = mc
ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
mc.STG(r.SCRATCH2, l.addr(ofs2,r.SPP))
+ mc.STMG(r.r14,r.r15,l.addr(14*WORD, r.SP))
+ mc.push_std_frame()
# copy registers to the frame, with the exception of r3 to r6 and r12,
# because these have already been saved by the caller. Note that
@@ -406,21 +409,21 @@
reg is not r.r4 and
reg is not r.r5 and
reg is not r.r12]
- self._push_core_regs_to_jitframe(mc, regs + [r.r14])
+ self._push_core_regs_to_jitframe(mc, regs)
if supports_floats:
self._push_fp_regs_to_jitframe(mc)
# allocate a stack frame!
- mc.push_std_frame()
mc.raw_call(r.r12)
- mc.pop_std_frame()
# Finish
self._reload_frame_if_necessary(mc)
- self._pop_core_regs_from_jitframe(mc, saved_regs + [r.r14])
+ self._pop_core_regs_from_jitframe(mc, saved_regs)
if supports_floats:
self._pop_fp_regs_from_jitframe(mc)
+ size = STD_FRAME_SIZE_IN_BYTES
+ mc.LMG(r.r14, r.r15, l.addr(size+14*WORD, r.SP))
mc.BCR(c.ANY, r.RETURN)
self.mc = None
return mc.materialize(self.cpu, [])
@@ -446,8 +449,11 @@
mc.STG(r.SCRATCH, l.addr(ofs2, r.SPP))
saved_regs = [reg for reg in r.MANAGED_REGS
if reg is not r.RES and reg is not r.RSZ]
- self._push_core_regs_to_jitframe(mc, saved_regs + [r.r14])
+ self._push_core_regs_to_jitframe(mc, saved_regs)
self._push_fp_regs_to_jitframe(mc)
+ # alloc a frame for the callee
+ mc.STMG(r.r14, r.r15, l.addr(14*WORD, r.SP))
+ mc.push_std_frame()
#
if kind == 'fixed':
addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
@@ -478,10 +484,8 @@
# Do the call
addr = rffi.cast(lltype.Signed, addr)
- mc.push_std_frame()
mc.load_imm(mc.RAW_CALL_REG, addr)
mc.raw_call()
- mc.pop_std_frame()
self._reload_frame_if_necessary(mc)
@@ -490,7 +494,7 @@
# emit_call_malloc_gc()).
self.propagate_memoryerror_if_r2_is_null()
- self._pop_core_regs_from_jitframe(mc, saved_regs + [r.r14])
+ self._pop_core_regs_from_jitframe(mc, saved_regs)
self._pop_fp_regs_from_jitframe(mc)
nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
@@ -501,6 +505,8 @@
# r.RSZ is loaded from [SCRATCH], to make the caller's store a no-op here
mc.load(r.RSZ, r.r1, 0)
#
+ size = STD_FRAME_SIZE_IN_BYTES
+ mc.LMG(r.r14, r.r15, l.addr(size+14*WORD, r.SP))
mc.BCR(c.ANY, r.r14)
self.mc = None
return mc.materialize(self.cpu, [])
@@ -517,7 +523,7 @@
mc = InstrBuilder()
#
# store the link backwards
- self.mc.STMG(r.r14, r.r15, l.addr(14*WORD, r.SP))
+ mc.STMG(r.r14, r.r15, l.addr(14*WORD, r.SP))
mc.push_std_frame()
mc.LGR(r.r2, r.SP)
@@ -532,7 +538,7 @@
mc.cmp_op(r.SCRATCH, l.imm(0), imm=True)
#
size = STD_FRAME_SIZE_IN_BYTES
- self.mc.LMG(r.r14, r.r15, l.addr(size+14*WORD, r.SP)) # restore the link
+ mc.LMG(r.r14, r.r15, l.addr(size+14*WORD, r.SP)) # restore the link
# So we return to our caller, conditionally if "EQ"
mc.BCR(c.EQ, r.r14)
mc.trap() # debug if this is EVER executed!
@@ -590,11 +596,11 @@
# LGHI r0, ... (4 bytes)
# sum -> (14 bytes)
mc.write('\x00'*14)
- self.mc.push_std_frame()
+ mc.push_std_frame()
mc.load_imm(r.RETURN, self._frame_realloc_slowpath)
self.load_gcmap(mc, r.r1, gcmap)
mc.raw_call()
- self.mc.pop_std_frame()
+ mc.pop_std_frame()
self.frame_depth_to_patch.append((patch_pos, mc.currpos()))
@@ -1006,8 +1012,8 @@
# save the back chain
self.mc.STG(r.SP, l.addr(0, r.SP))
- # save r3, the second argument, to THREADLOCAL_ADDR_OFFSET
- self.mc.STG(r.r3, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP))
+ # save r3, the second argument, to the thread local position
+ self.mc.STG(r.r3, l.addr(THREADLOCAL_ON_ENTER_JIT, r.SP))
# push a standard frame for any call
self.mc.push_std_frame()
@@ -1418,9 +1424,7 @@
raise AssertionError(kind)
#
# call!
- mc.push_std_frame()
mc.branch_absolute(addr)
- mc.pop_std_frame()
jmp_location = mc.currpos()
mc.reserve_cond_jump(short=True) # jump forward, patched later
diff --git a/rpython/jit/backend/zarch/callbuilder.py b/rpython/jit/backend/zarch/callbuilder.py
--- a/rpython/jit/backend/zarch/callbuilder.py
+++ b/rpython/jit/backend/zarch/callbuilder.py
@@ -62,6 +62,7 @@
# called function will in turn call further functions (which must be passed the
# address of the new frame). This stack grows downwards from high addresses
# """
+ self.subtracted_to_sp = 0
gpr_regs = 0
fpr_regs = 0
@@ -83,18 +84,18 @@
stack_params.append(i)
self.subtracted_to_sp += len(stack_params) * WORD
- base = -len(stack_params) * WORD
+ base = len(stack_params) * WORD
if self.is_call_release_gil:
self.subtracted_to_sp += 8*WORD
- base -= 8*WORD
- # one additional owrd for remap frame layout
+ base += 8*WORD
+ # one additional word for remap frame layout
# regalloc_push will overwrite -8(r.SP) and destroy
# a parameter if we would not reserve that space
- base -= WORD
- self.subtracted_to_sp += WORD
+ # base += WORD
+ # TODO self.subtracted_to_sp += WORD
for idx,i in enumerate(stack_params):
loc = arglocs[i]
- offset = base + 8 * idx
+ offset = STD_FRAME_SIZE_IN_BYTES - base + 8 * idx
if loc.type == FLOAT:
if loc.is_fp_reg():
src = loc
@@ -148,15 +149,23 @@
def emit_raw_call(self):
# always allocate a stack frame for the new function
# save the SP back chain
- self.mc.STG(r.SP, l.addr(-self.subtracted_to_sp, r.SP))
+ #self.mc.STG(r.SP, l.addr(-self.subtracted_to_sp, r.SP))
# move the frame pointer
if self.subtracted_to_sp != 0:
self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP))
self.mc.raw_call()
+
+
+ def restore_stack_pointer(self):
+ # it must at LEAST be 160 bytes
+ if self.subtracted_to_sp != 0:
+ self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP))
+
+ def load_result(self):
+ assert (self.resloc is None or
+ self.resloc is r.GPR_RETURN or
+ self.resloc is r.FPR_RETURN)
#
- self.ensure_correct_signzero_extension()
-
- def ensure_correct_signzero_extension(self):
if self.restype == 'i' and self.ressize != WORD:
# we must be sure! libffi (s390x impl) will not return
# a sane 64 bit zero/sign extended value. fix for this
@@ -177,25 +186,14 @@
else:
assert 0, "cannot zero extend size %d" % self.ressize
-
- def restore_stack_pointer(self):
- # it must at LEAST be 160 bytes
- if self.subtracted_to_sp != 0:
- self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP))
-
- def load_result(self):
- assert (self.resloc is None or
- self.resloc is r.GPR_RETURN or
- self.resloc is r.FPR_RETURN)
-
-
def call_releasegil_addr_and_move_real_arguments(self, fastgil):
assert self.is_call_release_gil
RSHADOWOLD = self.RSHADOWOLD
RSHADOWPTR = self.RSHADOWPTR
RFASTGILPTR = self.RFASTGILPTR
#
- self.mc.STMG(r.r8, r.r13, l.addr(-7*WORD, r.SP))
+ pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD
+ self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP))
# 6 registers, 1 for a floating point return value!
# registered by prepare_arguments!
#
@@ -268,26 +266,27 @@
PARAM_SAVE_AREA_OFFSET = 0
if reg is not None:
# save 1 word below the stack pointer
+ pos = STD_FRAME_SIZE_IN_BYTES
if reg.is_core_reg():
- self.mc.STG(reg, l.addr(-1*WORD, r.SP))
+ self.mc.STG(reg, l.addr(pos-1*WORD, r.SP))
elif reg.is_fp_reg():
- self.mc.STD(reg, l.addr(-1*WORD, r.SP))
- self.mc.push_std_frame(8*WORD)
+ self.mc.STD(reg, l.addr(pos-1*WORD, r.SP))
self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr)
self.mc.raw_call()
- self.mc.pop_std_frame(8*WORD)
if reg is not None:
+ pos = STD_FRAME_SIZE_IN_BYTES
if reg.is_core_reg():
- self.mc.LG(reg, l.addr(-1*WORD, r.SP))
+ self.mc.LG(reg, l.addr(pos-1*WORD, r.SP))
elif reg.is_fp_reg():
- self.mc.LD(reg, l.addr(-1*WORD, r.SP))
+ self.mc.LD(reg, l.addr(pos-1*WORD, r.SP))
# replace b1_location with BEQ(here)
pmc = OverwritingBuilder(self.mc, b1_location, 1)
pmc.BRCL(c.EQ, l.imm(self.mc.currpos() - b1_location))
pmc.overwrite()
- self.mc.LMG(r.r8, r.r13, l.addr(-7*WORD, r.SP))
+ pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD
+ self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP))
def write_real_errno(self, save_err):
if save_err & rffi.RFFI_READSAVED_ERRNO:
diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py
--- a/rpython/jit/backend/zarch/codebuilder.py
+++ b/rpython/jit/backend/zarch/codebuilder.py
@@ -198,7 +198,7 @@
function pointer, which means on big-endian that it is actually
the address of a three-words descriptor.
"""
- self.BASR(r.RETURN, call_reg)
+ self.BASR(r.r14, call_reg)
def reserve_cond_jump(self, short=False):
self.trap() # conditional jump, patched later
diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -530,11 +530,7 @@
mc.LGR(r.r0, loc_base) # unusual argument location
mc.load_imm(r.r14, self.wb_slowpath[helper_num])
- # alloc a stack frame
- mc.push_std_frame()
mc.BASR(r.r14, r.r14)
- # destory the frame
- mc.pop_std_frame()
if card_marking_mask:
# The helper ends again with a check of the flag in the object.
More information about the pypy-commit
mailing list