[pypy-commit] pypy s390x-backend: merged the speed improvements from s390x-enhance-speed
plan_rich
pypy.commits at gmail.com
Wed Mar 9 03:53:50 EST 2016
Author: Richard Plangger <planrichi at gmail.com>
Branch: s390x-backend
Changeset: r82899:c63346ce0b33
Date: 2016-03-09 09:46 +0100
http://bitbucket.org/pypy/pypy/changeset/c63346ce0b33/
Log: merged the speed improvements from s390x-enhance-speed
diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py
--- a/rpython/jit/backend/zarch/assembler.py
+++ b/rpython/jit/backend/zarch/assembler.py
@@ -50,7 +50,7 @@
self.gcrootmap_retaddr_forced = 0
self.failure_recovery_code = [0, 0, 0, 0]
self.wb_slowpath = [0,0,0,0,0]
- # self.pool = None
+ self.pool = None
def setup(self, looptoken):
BaseAssembler.setup(self, looptoken)
@@ -58,7 +58,7 @@
if we_are_translated():
self.debug = False
self.current_clt = looptoken.compiled_loop_token
- # POOL self.pool = LiteralPool()
+ self.pool = LiteralPool()
self.mc = InstrBuilder(None)
self.pending_guard_tokens = []
self.pending_guard_tokens_recovered = 0
@@ -76,7 +76,7 @@
self.current_clt = None
self._regalloc = None
self.mc = None
- # self.pool = None
+ self.pool = None
def target_arglocs(self, looptoken):
@@ -350,8 +350,8 @@
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
- diff = mc.load_imm_plus(r.r5, gcrootmap.get_root_stack_top_addr())
- mc.load(r.r5, r.r5, diff)
+ diff = mc.load_imm(r.r5, gcrootmap.get_root_stack_top_addr())
+ mc.load(r.r5, r.r5, 0)
mc.store(r.r2, r.r5, -WORD)
self._pop_core_regs_from_jitframe(mc, r.MANAGED_REGS)
@@ -636,7 +636,7 @@
#
operations = regalloc.prepare_loop(inputargs, operations,
looptoken, clt.allgcrefs)
- # POOL self.pool.pre_assemble(self, operations)
+ self.pool.pre_assemble(self, operations)
entrypos = self.mc.get_relative_pos()
self._call_header_with_stack_check()
looppos = self.mc.get_relative_pos()
@@ -645,7 +645,7 @@
self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
#
size_excluding_failure_stuff = self.mc.get_relative_pos()
- # POOL self.pool.post_assemble(self)
+ #self.pool.post_assemble(self)
self.write_pending_failure_recoveries()
full_size = self.mc.get_relative_pos()
#
@@ -704,13 +704,13 @@
operations,
self.current_clt.allgcrefs,
self.current_clt.frame_info)
- # POOL self.pool.pre_assemble(self, operations, bridge=True)
+ self.pool.pre_assemble(self, operations, bridge=True)
startpos = self.mc.get_relative_pos()
- # POOL self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - startpos))
+ self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - startpos))
self._check_frame_depth(self.mc, regalloc.get_gcmap())
frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
codeendpos = self.mc.get_relative_pos()
- # POOL self.pool.post_assemble(self)
+ #self.pool.post_assemble(self)
self.write_pending_failure_recoveries()
fullsize = self.mc.get_relative_pos()
#
@@ -735,7 +735,6 @@
# 'faildescr.adr_jump_offset' is the address of an instruction that is a
# conditional jump. We must patch this conditional jump to go
# to 'adr_new_target'.
- # Updates the pool address
mc = InstrBuilder()
mc.b_abs(adr_new_target)
mc.copy_to_raw_memory(faildescr.adr_jump_offset)
@@ -922,14 +921,17 @@
return
assert 0, "not supported location"
elif prev_loc.is_in_pool():
+ if loc.is_core_reg():
+ self.mc.LG(loc, prev_loc)
+ return
# move immediate value to fp register
if loc.is_fp_reg():
- self.mc.LD(loc, prev_loc)
+ self.mc.LDY(loc, prev_loc)
return
# move immediate value to memory
elif loc.is_stack():
offset = loc.value
- self.mc.LD(r.FP_SCRATCH, prev_loc)
+ self.mc.LDY(r.FP_SCRATCH, prev_loc)
self.mc.STDY(r.FP_SCRATCH, l.addr(offset, r.SPP))
return
assert 0, "not supported location"
@@ -976,9 +978,8 @@
if gcrootmap:
if gcrootmap.is_shadow_stack:
if shadowstack_reg is None:
- diff = mc.load_imm_plus(r.SPP,
- gcrootmap.get_root_stack_top_addr())
- mc.load(r.SPP, r.SPP, diff)
+ diff = mc.load_imm(r.SPP, gcrootmap.get_root_stack_top_addr())
+ mc.load(r.SPP, r.SPP, 0)
shadowstack_reg = r.SPP
mc.load(r.SPP, shadowstack_reg, -WORD)
wbdescr = self.cpu.gc_ll_descr.write_barrier_descr
@@ -1019,7 +1020,7 @@
# Build a new stackframe of size STD_FRAME_SIZE_IN_BYTES
fpoff = JIT_ENTER_EXTRA_STACK_SPACE
self.mc.STMG(r.r6, r.r15, l.addr(-fpoff+6*WORD, r.SP))
- # POOL self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos()))
+ self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos()))
# f8 through f15 are saved registers (= non volatile)
# TODO it would be good to detect if any float is used in the loop
# and to skip this push/pop whenever no float operation occurs
@@ -1046,38 +1047,39 @@
def _call_header_shadowstack(self, gcrootmap):
# we need to put one word into the shadowstack: the jitframe (SPP)
# we saved all registers to the stack
- RCS1 = r.r2
- RCS2 = r.r3
- RCS3 = r.r4
+ RCS1 = r.r3
+ RCS2 = r.r4
+ RCS3 = r.r5
mc = self.mc
- diff = mc.load_imm_plus(RCS1, gcrootmap.get_root_stack_top_addr())
- mc.load(RCS2, RCS1, diff) # ld RCS2, [rootstacktop]
+ mc.load_imm(RCS1, gcrootmap.get_root_stack_top_addr())
+ mc.load(RCS2, RCS1, 0) # ld RCS2, [rootstacktop]
#
mc.LGR(RCS3, RCS2)
mc.AGHI(RCS3, l.imm(WORD)) # add RCS3, RCS2, WORD
mc.store(r.SPP, RCS2, 0) # std SPP, RCS2
#
- mc.store(RCS3, RCS1, diff) # std RCS3, [rootstacktop]
+ mc.store(RCS3, RCS1, 0) # std RCS3, [rootstacktop]
def _call_footer_shadowstack(self, gcrootmap):
# r6 -> r15 can be used freely, they will be restored by
# _call_footer after this call
- RCS1 = r.r9
- RCS2 = r.r10
+ RCS1 = r.r8
+ RCS2 = r.r7
mc = self.mc
- diff = mc.load_imm_plus(RCS1, gcrootmap.get_root_stack_top_addr())
- mc.load(RCS2, RCS1, diff) # ld RCS2, [rootstacktop]
+ mc.load_imm(RCS1, gcrootmap.get_root_stack_top_addr())
+ mc.load(RCS2, RCS1, 0) # ld RCS2, [rootstacktop]
mc.AGHI(RCS2, l.imm(-WORD)) # sub RCS2, RCS2, WORD
- mc.store(RCS2, RCS1, diff) # std RCS2, [rootstacktop]
+ mc.store(RCS2, RCS1, 0) # std RCS2, [rootstacktop]
def _call_footer(self):
- # the return value is the jitframe
- self.mc.LGR(r.r2, r.SPP)
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
self._call_footer_shadowstack(gcrootmap)
+ # the return value is the jitframe
+ self.mc.LGR(r.r2, r.SPP)
+
size = STD_FRAME_SIZE_IN_BYTES
# f8 through f15 are saved registers (= non volatile)
# TODO it would be good to detect if any float is used in the loop
@@ -1180,11 +1182,9 @@
# ASSEMBLER EMISSION
def emit_label(self, op, arglocs, regalloc):
- pass
- # POOL
- #offset = self.pool.pool_start - self.mc.get_relative_pos()
+ offset = self.pool.pool_start - self.mc.get_relative_pos()
# load the pool address at each label
- #self.mc.LARL(r.POOL, l.halfword(offset))
+ self.mc.LARL(r.POOL, l.halfword(offset))
def emit_jump(self, op, arglocs, regalloc):
# The backend's logic assumes that the target code is in a piece of
@@ -1201,7 +1201,7 @@
if descr in self.target_tokens_currently_compiling:
# a label has a LARL instruction that does not need
# to be executed, thus remove the first opcode
- self.mc.b_offset(descr._ll_loop_code) # POOL + self.mc.LARL_byte_count)
+ self.mc.b_offset(descr._ll_loop_code + self.mc.LARL_byte_count)
else:
# POOL
#offset = self.pool.get_descr_offset(descr) + \
@@ -1249,11 +1249,11 @@
gcmap = self._finish_gcmap
else:
gcmap = lltype.nullptr(jitframe.GCMAP)
- self.load_gcmap(self.mc, r.r2, gcmap)
+ self.load_gcmap(self.mc, r.r9, gcmap)
- self.mc.load_imm(r.r3, fail_descr_loc.getint())
- self.mc.STG(r.r3, l.addr(ofs, r.SPP))
- self.mc.STG(r.r2, l.addr(ofs2, r.SPP))
+ self.mc.load_imm(r.r10, fail_descr_loc.getint())
+ self.mc.STG(r.r9, l.addr(ofs2, r.SPP))
+ self.mc.STG(r.r10, l.addr(ofs, r.SPP))
# exit function
self._call_footer()
diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py
--- a/rpython/jit/backend/zarch/codebuilder.py
+++ b/rpython/jit/backend/zarch/codebuilder.py
@@ -35,7 +35,6 @@
GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs,
guard_opnum, frame_depth)
self.fcond = fcond
- # POOL self._pool_offset = -1
class AbstractZARCHBuilder(object):
diff --git a/rpython/jit/backend/zarch/helper/assembler.py b/rpython/jit/backend/zarch/helper/assembler.py
--- a/rpython/jit/backend/zarch/helper/assembler.py
+++ b/rpython/jit/backend/zarch/helper/assembler.py
@@ -12,8 +12,7 @@
l1 = arglocs[1]
assert not l0.is_imm()
# do the comparison
- # POOL self.mc.cmp_op(l0, l1, pool=l1.is_in_pool(), imm=l1.is_imm(), signed=signed, fp=fp)
- self.mc.cmp_op(l0, l1, imm=l1.is_imm(), signed=signed, fp=fp)
+ self.mc.cmp_op(l0, l1, pool=l1.is_in_pool(), imm=l1.is_imm(), signed=signed, fp=fp)
self.flush_cc(condition, arglocs[2])
@@ -30,31 +29,21 @@
f.name = 'emit_shift_' + func
return f
-def gen_emit_rr(rr_func):
+def gen_emit_rr_rp(rr_func, rp_func):
def f(self, op, arglocs, regalloc):
l0, l1 = arglocs
- getattr(self.mc, rr_func)(l0, l1)
+ if l1.is_in_pool():
+ getattr(self.mc, rp_func)(l0, l1)
+ else:
+ getattr(self.mc, rr_func)(l0, l1)
return f
-# POOL
-#def gen_emit_rr_or_rpool(rr_func, rp_func):
-# """ the parameters can either be both in registers or
-# the first is in the register, second in literal pool.
-# """
-# def f(self, op, arglocs, regalloc):
-# l0, l1 = arglocs
-# if l1.is_imm() and not l1.is_in_pool():
-# assert 0, "logical imm must reside in pool!"
-# if l1.is_in_pool():
-# getattr(self.mc, rp_func)(l0, l1)
-# else:
-# getattr(self.mc, rr_func)(l0, l1)
-# return f
-
-def gen_emit_rr_rh_ri(rr_func, rh_func, ri_func):
+def gen_emit_rr_rh_ri_rp(rr_func, rh_func, ri_func, rp_func):
def emit(self, op, arglocs, regalloc):
l0, l1 = arglocs
- if l1.is_imm():
+ if l1.is_in_pool():
+ getattr(self.mc, rp_func)(l0, l1)
+ elif l1.is_imm():
if check_imm_value(l1.value):
getattr(self.mc, rh_func)(l0, l1)
else:
@@ -63,27 +52,18 @@
getattr(self.mc, rr_func)(l0, l1)
return emit
-# POOL
-#def gen_emit_imm_pool_rr(imm_func, pool_func, rr_func):
-# def emit(self, op, arglocs, regalloc):
-# l0, l1 = arglocs
-# if l1.is_in_pool():
-# getattr(self.mc, pool_func)(l0, l1)
-# elif l1.is_imm():
-# getattr(self.mc, imm_func)(l0, l1)
-# else:
-# getattr(self.mc, rr_func)(l0, l1)
-# return emit
-
-def gen_emit_div_mod(rr_func):
+def gen_emit_div_mod(rr_func, rp_func):
def emit(self, op, arglocs, regalloc):
lr, lq, l1 = arglocs # lr == remainer, lq == quotient
# when entering the function lr contains the dividend
# after this operation either lr or lq is used further
assert not l1.is_imm(), "imm divider not supported"
- # remainer is always a even register r0, r2, ... , r14
+ # remainer is always an even register r0, r2, ... , r14
assert lr.is_even()
assert lq.is_odd()
self.mc.XGR(lr, lr)
- getattr(self.mc,rr_func)(lr, l1)
+ if l1.is_in_pool():
+ getattr(self.mc,rp_func)(lr, l1)
+ else:
+ getattr(self.mc,rr_func)(lr, l1)
return emit
diff --git a/rpython/jit/backend/zarch/helper/regalloc.py b/rpython/jit/backend/zarch/helper/regalloc.py
--- a/rpython/jit/backend/zarch/helper/regalloc.py
+++ b/rpython/jit/backend/zarch/helper/regalloc.py
@@ -26,8 +26,7 @@
if check_imm32(a1):
l1 = imm(a1.getint())
else:
- # POOL l1 = self.ensure_reg_or_pool(a1)
- l1 = self.ensure_reg(a1)
+ l1 = self.ensure_reg_or_pool(a1)
l0 = self.force_result_in_reg(op, a0)
return [l0, l1]
@@ -39,7 +38,7 @@
if check_imm32(a1):
l1 = imm(a1.getint())
else:
- l1 = self.ensure_reg(a1)
+ l1 = self.ensure_reg_or_pool(a1)
l0 = self.force_result_in_reg(op, a0)
return [l0, l1]
@@ -51,7 +50,7 @@
if check_imm32(a1):
l1 = imm(a1.getint())
else:
- l1 = self.ensure_reg(a1)
+ l1 = self.ensure_reg_or_pool(a1)
lr,lq = self.rm.ensure_even_odd_pair(a0, op, bind_first=False)
return [lr, lq, l1]
@@ -61,7 +60,7 @@
a1 = op.getarg(1)
l1 = self.ensure_reg(a1)
if isinstance(a0, Const):
- loc = self.ensure_reg(a0)
+ loc = self.ensure_reg_or_pool(a0)
lr,lq = self.rm.ensure_even_odd_pair(a0, op,
bind_first=modulus, must_exist=False,
move_regs=False)
@@ -78,7 +77,6 @@
a0 = op.getarg(0)
a1 = op.getarg(1)
# sub is not commotative, thus cannot swap operands
- # POOL l1 = self.ensure_reg_or_pool(a1)
l0 = self.ensure_reg(a0)
l1 = self.ensure_reg(a1)
res = self.force_allocate_reg(op)
diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -3,7 +3,7 @@
STD_FRAME_SIZE_IN_BYTES)
from rpython.jit.backend.zarch.arch import THREADLOCAL_ADDR_OFFSET
from rpython.jit.backend.zarch.helper.assembler import (gen_emit_cmp_op,
- gen_emit_rr, gen_emit_shift, gen_emit_rr_rh_ri, gen_emit_div_mod)
+ gen_emit_rr_rp, gen_emit_shift, gen_emit_rr_rh_ri_rp, gen_emit_div_mod)
from rpython.jit.backend.zarch.helper.regalloc import (check_imm,
check_imm_value)
from rpython.jit.metainterp.history import (ConstInt)
@@ -28,7 +28,7 @@
class IntOpAssembler(object):
_mixin_ = True
- emit_int_add = gen_emit_rr_rh_ri('AGR', 'AGHI', 'AGFI')
+ emit_int_add = gen_emit_rr_rh_ri_rp('AGR', 'AGHI', 'AGFI', 'AG')
emit_int_add_ovf = emit_int_add
emit_nursery_ptr_increment = emit_int_add
@@ -36,25 +36,16 @@
def emit_int_sub(self, op, arglocs, regalloc):
res, l0, l1 = arglocs
self.mc.SGRK(res, l0, l1)
- # POOL
- #if l1.is_imm() and not l1.is_in_pool():
- # assert 0, "logical imm must reside in pool!"
- #if l1.is_in_pool():
- # self.mc.SG(l0, l1)
- #else:
- # self.mc.SGR(l0, l1)
emit_int_sub_ovf = emit_int_sub
- emit_int_mul = gen_emit_rr_rh_ri('MSGR', 'MGHI', 'MSGFI')
+ emit_int_mul = gen_emit_rr_rh_ri_rp('MSGR', 'MGHI', 'MSGFI', 'MSG')
def emit_int_mul_ovf(self, op, arglocs, regalloc):
lr, lq, l1 = arglocs
- # POOL
- # if l1.is_in_pool():
- # self.mc.LG(r.SCRATCH, l1)
- # l1 = r.SCRATCH
- # elif
- if l1.is_imm():
+ if l1.is_in_pool():
+ self.mc.LG(r.SCRATCH, l1)
+ l1 = r.SCRATCH
+ elif l1.is_imm():
self.mc.LGFI(r.SCRATCH, l1)
l1 = r.SCRATCH
else:
@@ -169,11 +160,11 @@
omc.BRC(c.ANY, l.imm(label_end - jmp_neither_lqlr_overflow))
omc.overwrite()
- emit_int_floordiv = gen_emit_div_mod('DSGR')
- emit_uint_floordiv = gen_emit_div_mod('DLGR')
+ emit_int_floordiv = gen_emit_div_mod('DSGR', 'DSG')
+ emit_uint_floordiv = gen_emit_div_mod('DLGR', 'DLG')
# NOTE division sets one register with the modulo value, thus
# the regalloc ensures the right register survives.
- emit_int_mod = gen_emit_div_mod('DSGR')
+ emit_int_mod = gen_emit_div_mod('DSGR', 'DSG')
def emit_int_invert(self, op, arglocs, regalloc):
l0, = arglocs
@@ -213,9 +204,9 @@
self.mc.CGHI(l0, l.imm(0))
self.flush_cc(c.NE, res)
- emit_int_and = gen_emit_rr("NGR")
- emit_int_or = gen_emit_rr("OGR")
- emit_int_xor = gen_emit_rr("XGR")
+ emit_int_and = gen_emit_rr_rp("NGR", "NG")
+ emit_int_or = gen_emit_rr_rp("OGR", "OG")
+ emit_int_xor = gen_emit_rr_rp("XGR", "XG")
emit_int_rshift = gen_emit_shift("SRAG")
emit_int_lshift = gen_emit_shift("SLLG")
@@ -242,10 +233,10 @@
class FloatOpAssembler(object):
_mixin_ = True
- emit_float_add = gen_emit_rr('ADBR')
- emit_float_sub = gen_emit_rr('SDBR')
- emit_float_mul = gen_emit_rr('MDBR')
- emit_float_truediv = gen_emit_rr('DDBR')
+ emit_float_add = gen_emit_rr_rp('ADBR', 'ADB')
+ emit_float_sub = gen_emit_rr_rp('SDBR', 'SDB')
+ emit_float_mul = gen_emit_rr_rp('MDBR', 'MDB')
+ emit_float_truediv = gen_emit_rr_rp('DDBR', 'DDB')
# Support for NaNs: S390X sets condition code to 0x3 (unordered)
# whenever any operand is nan.
@@ -1072,7 +1063,7 @@
self._store_force_index(self._find_nearby_operation(regalloc, +1))
# 'result_loc' is either r2, f0 or None
self.call_assembler(op, argloc, vloc, result_loc, r.r2)
- # POOL self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos()))
+ self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos()))
emit_call_assembler_i = _genop_call_assembler
emit_call_assembler_r = _genop_call_assembler
diff --git a/rpython/jit/backend/zarch/pool.py b/rpython/jit/backend/zarch/pool.py
--- a/rpython/jit/backend/zarch/pool.py
+++ b/rpython/jit/backend/zarch/pool.py
@@ -1,5 +1,6 @@
from rpython.jit.backend.zarch import registers as r
from rpython.jit.backend.zarch import locations as l
+from rpython.rlib import rgil
from rpython.jit.metainterp.history import (INT, REF, FLOAT,
TargetToken)
from rpython.rlib.objectmodel import we_are_translated
@@ -19,81 +20,25 @@
self.size = 0
# the offset to index the pool
self.pool_start = 0
- self.label_offset = 0
- self.label_count = 0
# for constant offsets
self.offset_map = {}
# for descriptors
self.offset_descr = {}
- self.constant_64_zeros = -1
- self.constant_64_ones = -1
- self.constant_64_sign_bit = -1
- self.constant_max_64_positive = -1
+
+ def reset(self):
+ self.pool_start = 0
+ self.size = 0
+ self.offset_map = {}
+ self.offset_descr = {}
def ensure_can_hold_constants(self, asm, op):
- opnum = op.getopnum()
- if op.is_guard():
- # 1x gcmap pointer
- # 1x target address
- self.offset_descr[op.getdescr()] = self.size
- self.allocate_slot(2*8)
- elif op.getopnum() == rop.JUMP:
- descr = op.getdescr()
- if descr not in asm.target_tokens_currently_compiling:
- # this is a 'long' jump instead of a relative jump
- self.offset_descr[descr] = self.size
- self.allocate_slot(8)
- elif op.getopnum() == rop.LABEL:
- descr = op.getdescr()
- if descr not in asm.target_tokens_currently_compiling:
- # this is a 'long' jump instead of a relative jump
- self.offset_descr[descr] = self.size
- self.allocate_slot(8)
- elif op.getopnum() == rop.INT_INVERT:
- self.constant_64_ones = 1 # we need constant ones!!!
- elif op.getopnum() == rop.INT_MUL_OVF:
- self.constant_64_sign_bit = 1
- self.constant_max_64_positive = 1
- elif opnum == rop.INT_RSHIFT or opnum == rop.INT_LSHIFT or \
- opnum == rop.UINT_RSHIFT:
- a0 = op.getarg(0)
- if a0.is_constant():
- self.reserve_literal(8, a0)
+ # allocates 8 bytes in memory for pointers, long integers or floats
+ if op.is_jit_debug():
return
- elif opnum == rop.GC_STORE or opnum == rop.GC_STORE_INDEXED:
- arg = op.getarg(0)
- if arg.is_constant():
- self.reserve_literal(8, arg)
- arg = op.getarg(1)
- if arg.is_constant():
- self.reserve_literal(8, arg)
- arg = op.getarg(2)
- if arg.is_constant():
- self.reserve_literal(8, arg)
- return
- elif opnum in (rop.GC_LOAD_F,
- rop.GC_LOAD_I,
- rop.GC_LOAD_R,) \
- or opnum in (rop.GC_LOAD_INDEXED_F,
- rop.GC_LOAD_INDEXED_R,
- rop.GC_LOAD_INDEXED_I,):
- arg = op.getarg(0)
- if arg.is_constant():
- self.reserve_literal(8, arg)
- arg = op.getarg(1)
- if arg.is_constant():
- self.reserve_literal(8, arg)
- return
- elif op.is_call_release_gil():
- for arg in op.getarglist()[1:]:
- if arg.is_constant():
- self.reserve_literal(8, arg)
- return
- elif opnum == rop.COND_CALL_GC_WB_ARRAY:
- self.constant_64_ones = 1 # we need constant ones!!!
+
for arg in op.getarglist():
if arg.is_constant():
- self.reserve_literal(8, arg)
+ self.reserve_literal(8, arg, asm)
def contains_constant(self, unique_val):
return unique_val in self.offset_map
@@ -101,6 +46,10 @@
def get_descr_offset(self, descr):
return self.offset_descr[descr]
+ def contains_box(self, box):
+ uvalue = self.unique_value(box)
+ return self.contains_constant(uvalue)
+
def get_offset(self, box):
assert box.is_constant()
uvalue = self.unique_value(box)
@@ -108,11 +57,6 @@
assert self.offset_map[uvalue] >= 0
return self.offset_map[uvalue]
- def get_direct_offset(self, unique_val):
- """ Get the offset directly using a unique value,
- use get_offset if you have a Const box """
- return self.offset_map[unique_val]
-
def unique_value(self, val):
if val.type == FLOAT:
if val.getfloat() == 0.0:
@@ -124,21 +68,14 @@
assert val.type == REF
return rffi.cast(lltype.Signed, val.getref_base())
- def reserve_literal(self, size, box):
+ def reserve_literal(self, size, box, asm):
uvalue = self.unique_value(box)
- if uvalue not in self.offset_map:
- self.offset_map[uvalue] = self.size
- self.allocate_slot(size)
-
- def reset(self):
- self.pool_start = 0
- self.label_offset = 0
- self.size = 0
- self.offset_map = {}
- self.constant_64_zeros = -1
- self.constant_64_ones = -1
- self.constant_64_sign_bit = -1
- self.constant_max_64_positive = -1
+ if box.type == INT and -2**31 <= uvalue <= 2**31-1:
+ # we do not allocate non 64 bit values, these
+ # can be loaded as imm by LGHI/LGFI
+ return
+ #
+ self._ensure_value(uvalue, asm)
def check_size(self, size=-1):
if size == -1:
@@ -149,18 +86,19 @@
llop.debug_print(lltype.Void, msg)
raise PoolOverflow(msg)
+ def _ensure_value(self, uvalue, asm):
+ if uvalue not in self.offset_map:
+ self.offset_map[uvalue] = self.size
+ self.allocate_slot(8)
+ asm.mc.write_i64(uvalue)
+ return self.offset_map[uvalue]
+
def allocate_slot(self, size):
val = self.size + size
self.check_size(val)
self.size = val
assert val >= 0
- def ensure_value(self, val):
- if val not in self.offset_map:
- self.offset_map[val] = self.size
- self.allocate_slot(8)
- return self.offset_map[val]
-
def pre_assemble(self, asm, operations, bridge=False):
# O(len(operations)). I do not think there is a way
# around this.
@@ -179,27 +117,34 @@
self.pool_start = asm.mc.get_relative_pos()
for op in operations:
self.ensure_can_hold_constants(asm, op)
- self.ensure_value(asm.cpu.pos_exc_value())
+ self._ensure_value(asm.cpu.pos_exc_value(), asm)
+ # the top of shadow stack
+ gcrootmap = asm.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ self._ensure_value(gcrootmap.get_root_stack_top_addr(), asm)
+ # endaddr of insert stack check
+ endaddr, lengthaddr, _ = asm.cpu.insert_stack_check()
+ self._ensure_value(endaddr, asm)
+ # fast gil
+ fastgil = rffi.cast(lltype.Signed, rgil.gil_fetch_fastgil())
+ self._ensure_value(fastgil, asm)
# TODO add more values that are loaded with load_imm
- if self.size == 0:
- # no pool needed!
- return
- assert self.size % 2 == 0, "not aligned properly"
- if self.constant_64_ones != -1:
- self.constant_64_ones = self.ensure_value(-1)
- if self.constant_64_zeros != -1:
- self.constant_64_zeros = self.ensure_value(0x0)
- if self.constant_64_sign_bit != -1:
- self.constant_64_sign_bit = self.ensure_value(-2**63) # == 0x8000000000000000
- if self.constant_max_64_positive != -1:
- self.constant_max_64_positive = self.ensure_value(0x7fffFFFFffffFFFF)
- asm.mc.write('\x00' * self.size)
- wrote = 0
- for val, offset in self.offset_map.items():
- self.overwrite_64(asm.mc, offset, val)
- wrote += 8
- def overwrite_64(self, mc, index, value):
+ # XXX def post_assemble(self, asm):
+ # XXX mc = asm.mc
+ # XXX pending_guard_tokens = asm.pending_guard_tokens
+ # XXX if self.size == 0:
+ # XXX return
+ # XXX for guard_token in pending_guard_tokens:
+ # XXX descr = guard_token.faildescr
+ # XXX offset = self.offset_descr[descr]
+ # XXX assert isinstance(offset, int)
+ # XXX assert offset >= 0
+ # XXX assert guard_token._pool_offset != -1
+ # XXX ptr = rffi.cast(lltype.Signed, guard_token.gcmap)
+ # XXX self._overwrite_64(mc, offset + RECOVERY_GCMAP_POOL_OFFSET, ptr)
+
+ def _overwrite_64(self, mc, index, value):
index += self.pool_start
mc.overwrite(index, chr(value >> 56 & 0xff))
@@ -210,17 +155,3 @@
mc.overwrite(index+5, chr(value >> 16 & 0xff))
mc.overwrite(index+6, chr(value >> 8 & 0xff))
mc.overwrite(index+7, chr(value & 0xff))
-
- def post_assemble(self, asm):
- mc = asm.mc
- pending_guard_tokens = asm.pending_guard_tokens
- if self.size == 0:
- return
- for guard_token in pending_guard_tokens:
- descr = guard_token.faildescr
- offset = self.offset_descr[descr]
- assert isinstance(offset, int)
- assert offset >= 0
- assert guard_token._pool_offset != -1
- ptr = rffi.cast(lltype.Signed, guard_token.gcmap)
- self.overwrite_64(mc, offset + RECOVERY_GCMAP_POOL_OFFSET, ptr)
diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -62,44 +62,24 @@
assert set(save_around_call_regs).issubset(all_regs)
pool = None
- def convert_to_adr(self, c):
- assert isinstance(c, ConstFloat)
- adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
- x = c.getfloatstorage()
- rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
- return adr
-
- def convert_to_imm(self, c):
- adr = self.convert_to_adr(c)
- return l.ConstFloatLoc(adr)
-
- # POOL
- #def convert_to_imm(self, c):
- # off = self.pool.get_offset(c)
- # return l.pool(off, float=True)
-
def __init__(self, longevity, frame_manager=None, assembler=None):
RegisterManager.__init__(self, longevity, frame_manager, assembler)
def call_result_location(self, v):
return r.FPR_RETURN
- # POOL
- # def place_in_pool(self, var):
- # offset = self.assembler.pool.get_offset(var)
- # return l.pool(offset, float=True)
+ def convert_to_imm(self, c):
+ return l.pool(self.assembler.pool.get_offset(c), float=True)
- # POOL
- #def ensure_reg_or_pool(self, box):
- # if isinstance(box, Const):
- # loc = self.get_scratch_reg()
- # immvalue = self.convert_to_int(box)
- # self.assembler.mc.load_imm(loc, immvalue)
- # else:
- # assert box in self.temp_boxes
- # loc = self.make_sure_var_in_reg(box,
- # forbidden_vars=self.temp_boxes)
- # return loc
+ def ensure_reg_or_pool(self, box):
+ if isinstance(box, Const):
+ offset = self.assembler.pool.get_offset(box)
+ return l.pool(offset, float=True)
+ else:
+ assert box in self.temp_boxes
+ loc = self.make_sure_var_in_reg(box,
+ forbidden_vars=self.temp_boxes)
+ return loc
def get_scratch_reg(self):
box = TempVar()
@@ -109,21 +89,14 @@
def ensure_reg(self, box):
if isinstance(box, Const):
- # POOL
- #poolloc = self.place_in_pool(box)
- #tmp = TempVar()
- #reg = self.force_allocate_reg(tmp, self.temp_boxes)
- #self.temp_boxes.append(tmp)
- #assert poolloc.displace >= 0
- #if poolloc.displace <= 2**12-1:
- # self.assembler.mc.LD(reg, poolloc)
- #else:
- # self.assembler.mc.LDY(reg, poolloc)
- loc = self.get_scratch_reg()
- immadrvalue = self.convert_to_adr(box)
- mc = self.assembler.mc
- mc.load_imm(r.SCRATCH, immadrvalue)
- mc.LD(loc, l.addr(0, r.SCRATCH))
+ offset = self.assembler.pool.get_offset(box)
+ poolloc = l.pool(offset, float=True)
+ reg = self.get_scratch_reg()
+ if poolloc.displace <= 2**11-1:
+ self.assembler.mc.LD(reg, poolloc)
+ else:
+ self.assembler.mc.LDY(reg, poolloc)
+ return reg
else:
assert box in self.temp_boxes
loc = self.make_sure_var_in_reg(box,
@@ -159,32 +132,25 @@
assert isinstance(c, ConstPtr)
return rffi.cast(lltype.Signed, c.value)
+ def ensure_reg_or_pool(self, box):
+ if isinstance(box, Const):
+ if self.assembler.pool.contains_box(box):
+ offset = self.assembler.pool.get_offset(box)
+ return l.pool(offset)
+ else:
+ return self.ensure_reg(box)
+ else:
+ assert box in self.temp_boxes
+ loc = self.make_sure_var_in_reg(box,
+ forbidden_vars=self.temp_boxes)
+ return loc
+
def convert_to_imm(self, c):
- val = self.convert_to_int(c)
- return l.imm(val)
+ if self.assembler.pool.contains_box(c):
+ return l.pool(self.assembler.pool.get_offset(c))
+ immvalue = self.convert_to_int(c)
+ return l.imm(immvalue)
- # POOL
- #def convert_to_imm(self, c):
- # off = self.pool.get_offset(c)
- # return l.pool(off)
-
- #def ensure_reg_or_pool(self, box):
- # if isinstance(box, Const):
- # offset = self.assembler.pool.get_offset(box)
- # return l.pool(offset)
- # else:
- # assert box in self.temp_boxes
- # loc = self.make_sure_var_in_reg(box,
- # forbidden_vars=self.temp_boxes)
- # return loc
-
- # POOL
- #offset = self.assembler.pool.get_offset(box)
- #poolloc = l.pool(offset)
- #tmp = TempInt()
- #reg = self.force_allocate_reg(tmp, forbidden_vars=self.temp_boxes)
- #self.temp_boxes.append(tmp)
- #self.assembler.mc.LG(reg, poolloc)
def ensure_reg(self, box):
if isinstance(box, Const):
loc = self.get_scratch_reg()
@@ -388,10 +354,10 @@
self.rm = ZARCHRegisterManager(self.longevity,
frame_manager = self.fm,
assembler = self.assembler)
- #self.rm.pool = self.assembler.pool
+ self.rm.pool = self.assembler.pool
self.fprm = FPRegisterManager(self.longevity, frame_manager = self.fm,
assembler = self.assembler)
- #self.fprm.pool = self.assembler.pool
+ self.fprm.pool = self.assembler.pool
return operations
def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
@@ -607,12 +573,11 @@
else:
return self.rm.call_result_location(v)
- # POOL
- #def ensure_reg_or_pool(self, box):
- # if box.type == FLOAT:
- # return self.fprm.ensure_reg_or_pool(box)
- # else:
- # return self.rm.ensure_reg_or_pool(box)
+ def ensure_reg_or_pool(self, box):
+ if box.type == FLOAT:
+ return self.fprm.ensure_reg_or_pool(box)
+ else:
+ return self.rm.ensure_reg_or_pool(box)
def ensure_reg(self, box):
if box.type == FLOAT:
diff --git a/rpython/jit/backend/zarch/registers.py b/rpython/jit/backend/zarch/registers.py
--- a/rpython/jit/backend/zarch/registers.py
+++ b/rpython/jit/backend/zarch/registers.py
@@ -7,7 +7,7 @@
[r0,r1,r2,r3,r4,r5,r6,r7,r8,
r9,r10,r11,r12,r13,r14,r15] = registers
-MANAGED_REGS = [r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r13] # keep this list sorted (asc)!
+MANAGED_REGS = [r2,r3,r4,r5,r6,r7,r8,r9,r10,r11] # keep this list sorted (asc)!
MANAGED_REG_PAIRS = [(r2,r3), (r4,r5), (r6,r7), (r8,r9), (r10,r11)]
VOLATILES = [r2,r3,r4,r5,r6]
SP = r15
@@ -39,6 +39,7 @@
for _r in MANAGED_FP_REGS:
ALL_REG_INDEXES[_r] = len(ALL_REG_INDEXES)
# NOT used, but keeps JITFRAME_FIXED_SIZE even
+ALL_REG_INDEXES[f15] = len(ALL_REG_INDEXES)
JITFRAME_FIXED_SIZE = len(ALL_REG_INDEXES)
def odd_reg(r):
diff --git a/rpython/jit/backend/zarch/test/test_pool.py b/rpython/jit/backend/zarch/test/test_pool.py
--- a/rpython/jit/backend/zarch/test/test_pool.py
+++ b/rpython/jit/backend/zarch/test/test_pool.py
@@ -12,13 +12,18 @@
from rpython.jit.backend.detect_cpu import getcpuclass
from rpython.jit.tool.oparser import parse
+class FakeAsm(object):
+ def write_i64(self, val):
+ pass
+
class TestPoolZARCH(object):
def setup_class(self):
self.calldescr = None
def setup_method(self, name):
self.pool = LiteralPool()
- self.asm = None
+ self.asm = FakeAsm()
+ self.asm.mc = FakeAsm()
self.cpu = getcpuclass()(None, None)
self.cpu.setup_once()
@@ -34,20 +39,20 @@
return False
def test_constant_in_call_malloc(self):
- c = ConstPtr(rffi.cast(llmemory.GCREF, 0xdeadbeef))
+ c = ConstPtr(rffi.cast(llmemory.GCREF, 0xdeadbeef1234))
self.ensure_can_hold(rop.CALL_MALLOC_GC, [c], descr=self.calldescr)
assert self.const_in_pool(c)
- assert self.const_in_pool(ConstPtr(rffi.cast(llmemory.GCREF, 0xdeadbeef)))
+ assert self.const_in_pool(ConstPtr(rffi.cast(llmemory.GCREF, 0xdeadbeef1234)))
@py.test.mark.parametrize('opnum',
[rop.INT_ADD, rop.INT_SUB, rop.INT_MUL])
def test_constants_arith(self, opnum):
for c1 in [ConstInt(1), ConstInt(2**44), InputArgInt(1)]:
- for c2 in [InputArgInt(1), ConstInt(1), ConstInt(2**55)]:
+ for c2 in [InputArgInt(1), ConstInt(-2**33), ConstInt(2**55)]:
self.ensure_can_hold(opnum, [c1,c2])
- if c1.is_constant():
+ if c1.is_constant() and not -2**31 <= c1.getint() <= 2**31-1:
assert self.const_in_pool(c1)
- if c2.is_constant():
+ if c2.is_constant() and not -2**31 <= c1.getint() <= 2**31-1:
assert self.const_in_pool(c2)
def test_pool_overflow(self):
diff --git a/rpython/jit/backend/zarch/test/test_runner.py b/rpython/jit/backend/zarch/test/test_runner.py
--- a/rpython/jit/backend/zarch/test/test_runner.py
+++ b/rpython/jit/backend/zarch/test/test_runner.py
@@ -24,6 +24,6 @@
cpu.setup_once()
return cpu
- add_loop_instructions = "lg; lgr; agr; cgfi; jge; j;$"
- bridge_loop_instructions = "lg; cgfi; jnl; lghi; " \
- "iilf;( iihf;)? iilf;( iihf;)? basr; iilf;( iihf;)? br;$"
+ add_loop_instructions = "lg; lgr; larl; agr; cgfi; jge; j;$"
+ bridge_loop_instructions = "larl; lg; cgfi; jnl; lghi; " \
+ "(lgfi|iilf);( iihf;)? (lgfi|iilf);( iihf;)? basr; (lgfi|iilf);( iihf;)? br;$"
More information about the pypy-commit
mailing list