[pypy-commit] pypy default: merge emit-call-arm
bivab
noreply at buildbot.pypy.org
Mon May 27 14:52:32 CEST 2013
Author: David Schneider <david.schneider at picle.org>
Branch:
Changeset: r64580:9e31743395b4
Date: 2013-05-27 07:43 -0500
http://bitbucket.org/pypy/pypy/changeset/9e31743395b4/
Log: merge emit-call-arm
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -19,7 +19,7 @@
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.jit.backend.model import CompiledLoopToken
from rpython.jit.codewriter.effectinfo import EffectInfo
-from rpython.jit.metainterp.history import AbstractFailDescr, FLOAT
+from rpython.jit.metainterp.history import AbstractFailDescr, FLOAT, INT, VOID
from rpython.jit.metainterp.resoperation import rop
from rpython.rlib.debug import debug_print, debug_start, debug_stop
from rpython.rlib.jit import AsmInfo
@@ -27,6 +27,7 @@
from rpython.rlib.rarithmetic import r_uint
from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.jit.backend.arm import callbuilder
class AssemblerARM(ResOpAssembler):
@@ -934,23 +935,6 @@
asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
return fcond
- def _ensure_result_bit_extension(self, resloc, size, signed):
- if size == 4:
- return
- if size == 1:
- if not signed: # unsigned char
- self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
- else:
- self.mc.LSL_ri(resloc.value, resloc.value, 24)
- self.mc.ASR_ri(resloc.value, resloc.value, 24)
- elif size == 2:
- if not signed:
- self.mc.LSL_ri(resloc.value, resloc.value, 16)
- self.mc.LSR_ri(resloc.value, resloc.value, 16)
- else:
- self.mc.LSL_ri(resloc.value, resloc.value, 16)
- self.mc.ASR_ri(resloc.value, resloc.value, 16)
-
def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
b = InstrBuilder(self.cpu.cpuinfo.arch_version)
patch_addr = faildescr._arm_failure_recovery_block
@@ -1012,20 +996,32 @@
mc.gen_load_int(helper.value, ofs, cond=cond)
mc.STR_rr(source.value, base.value, helper.value, cond=cond)
+ def get_tmp_reg(self, forbidden_regs=None):
+ if forbidden_regs is None:
+ return r.ip, False
+ for x in [r.ip, r.lr]:
+ if x not in forbidden_regs:
+ return x, False
+ # pick some reg, that we need to save
+ for x in r.all_regs:
+ if x not in forbidden_regs:
+ return x, True
+ assert 0
+
def _mov_imm_to_loc(self, prev_loc, loc, cond=c.AL):
- if not loc.is_reg() and not (loc.is_stack() and loc.type != FLOAT):
+ if loc.type == FLOAT:
raise AssertionError("invalid target for move from imm value")
if loc.is_reg():
new_loc = loc
- elif loc.is_stack():
- self.mc.PUSH([r.lr.value], cond=cond)
+ elif loc.is_stack() or loc.is_raw_sp():
new_loc = r.lr
else:
raise AssertionError("invalid target for move from imm value")
self.mc.gen_load_int(new_loc.value, prev_loc.value, cond=cond)
if loc.is_stack():
self.regalloc_mov(new_loc, loc)
- self.mc.POP([r.lr.value], cond=cond)
+ elif loc.is_raw_sp():
+ self.store_reg(self.mc, new_loc, r.sp, loc.value, cond=cond, helper=r.ip)
def _mov_reg_to_loc(self, prev_loc, loc, cond=c.AL):
if loc.is_imm():
@@ -1034,60 +1030,77 @@
self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
elif loc.is_stack() and loc.type != FLOAT:
# spill a core register
- if prev_loc is r.ip:
- temp = r.lr
- else:
- temp = r.ip
+ temp, save = self.get_tmp_reg([prev_loc, loc])
offset = loc.value
is_imm = check_imm_arg(offset, size=0xFFF)
- if not is_imm:
+ if not is_imm and save:
self.mc.PUSH([temp.value], cond=cond)
self.store_reg(self.mc, prev_loc, r.fp, offset, helper=temp, cond=cond)
- if not is_imm:
+ if not is_imm and save:
self.mc.POP([temp.value], cond=cond)
+ elif loc.is_raw_sp() and loc.type != FLOAT:
+ temp, save = self.get_tmp_reg([prev_loc])
+ assert not save
+ self.store_reg(self.mc, prev_loc, r.sp, loc.value, cond=cond, helper=temp)
else:
assert 0, 'unsupported case'
def _mov_stack_to_loc(self, prev_loc, loc, cond=c.AL):
- # disabled for now, has side effects in combination with remap_frame_layout when called from a jump
- helper = None # self._regalloc.get_free_reg()
+ helper = None
+ offset = prev_loc.value
+ tmp = None
if loc.is_reg():
assert prev_loc.type != FLOAT, 'trying to load from an \
incompatible location into a core register'
- assert loc is not r.lr, 'lr is not supported as a target \
- when moving from the stack'
# unspill a core register
- offset = prev_loc.value
is_imm = check_imm_arg(offset, size=0xFFF)
- helper = r.lr if helper is None else helper
- save_helper = not is_imm and helper is r.lr
+ helper, save = self.get_tmp_reg([loc])
+ save_helper = not is_imm and save
elif loc.is_vfp_reg():
assert prev_loc.type == FLOAT, 'trying to load from an \
incompatible location into a float register'
# load spilled value into vfp reg
- offset = prev_loc.value
is_imm = check_imm_arg(offset)
- helper = r.ip if helper is None else helper
- save_helper = not is_imm and helper is r.ip
+ helper, save = self.get_tmp_reg()
+ save_helper = not is_imm and save
+ elif loc.is_raw_sp():
+ assert (loc.type == prev_loc.type == FLOAT
+ or (loc.type != FLOAT and prev_loc.type != FLOAT))
+ tmp = loc
+ if loc.is_float():
+ loc = r.vfp_ip
+ else:
+ loc, save_helper = self.get_tmp_reg()
+ assert not save_helper
+ helper, save_helper = self.get_tmp_reg([loc])
+ assert not save_helper
else:
assert 0, 'unsupported case'
+
if save_helper:
self.mc.PUSH([helper.value], cond=cond)
self.load_reg(self.mc, loc, r.fp, offset, cond=cond, helper=helper)
if save_helper:
self.mc.POP([helper.value], cond=cond)
+ if tmp and tmp.is_raw_sp():
+ self.store_reg(self.mc, loc, r.sp, tmp.value, cond=cond, helper=helper)
+
def _mov_imm_float_to_loc(self, prev_loc, loc, cond=c.AL):
if loc.is_vfp_reg():
- self.mc.PUSH([r.ip.value], cond=cond)
- self.mc.gen_load_int(r.ip.value, prev_loc.getint(), cond=cond)
- self.load_reg(self.mc, loc, r.ip, 0, cond=cond)
- self.mc.POP([r.ip.value], cond=cond)
- elif loc.is_stack():
- self.regalloc_push(r.vfp_ip)
+ helper, save_helper = self.get_tmp_reg([loc])
+ if save_helper:
+ self.mc.PUSH([helper.value], cond=cond)
+ self.mc.gen_load_int(helper.value, prev_loc.getint(), cond=cond)
+ self.load_reg(self.mc, loc, helper, 0, cond=cond)
+ if save_helper:
+ self.mc.POP([helper.value], cond=cond)
+ elif loc.is_stack() and loc.type == FLOAT:
self.regalloc_mov(prev_loc, r.vfp_ip, cond)
self.regalloc_mov(r.vfp_ip, loc, cond)
- self.regalloc_pop(r.vfp_ip)
+ elif loc.is_raw_sp() and loc.type == FLOAT:
+ self.regalloc_mov(prev_loc, r.vfp_ip, cond)
+ self.regalloc_mov(r.vfp_ip, loc, cond)
else:
assert 0, 'unsupported case'
@@ -1100,11 +1113,11 @@
# spill vfp register
offset = loc.value
is_imm = check_imm_arg(offset)
- if not is_imm:
- self.mc.PUSH([r.ip.value], cond=cond)
- self.store_reg(self.mc, prev_loc, r.fp, offset, cond=cond)
- if not is_imm:
- self.mc.POP([r.ip.value], cond=cond)
+ self.store_reg(self.mc, prev_loc, r.fp, offset, cond=cond, helper=r.ip)
+ elif loc.is_raw_sp():
+ assert loc.type == FLOAT, 'trying to store to an \
+ incompatible location from a float register'
+ self.store_reg(self.mc, prev_loc, r.sp, loc.value, cond=cond)
else:
assert 0, 'unsupported case'
@@ -1120,6 +1133,8 @@
self._mov_imm_float_to_loc(prev_loc, loc, cond)
elif prev_loc.is_vfp_reg():
self._mov_vfp_reg_to_loc(prev_loc, loc, cond)
+ elif prev_loc.is_raw_sp():
+ assert 0, 'raw sp locs are not supported as source loc'
else:
assert 0, 'unsupported case'
mov_loc_loc = regalloc_mov
@@ -1131,23 +1146,29 @@
if vfp_loc.is_vfp_reg():
self.mc.VMOV_rc(reg1.value, reg2.value, vfp_loc.value, cond=cond)
elif vfp_loc.is_imm_float():
- self.mc.PUSH([r.ip.value], cond=cond)
- self.mc.gen_load_int(r.ip.value, vfp_loc.getint(), cond=cond)
+ helper, save_helper = self.get_tmp_reg([reg1, reg2])
+ if save_helper:
+ self.mc.PUSH([helper.value], cond=cond)
+ self.mc.gen_load_int(helper.value, vfp_loc.getint(), cond=cond)
# we need to load one word to loc and one to loc+1 which are
# two 32-bit core registers
- self.mc.LDR_ri(reg1.value, r.ip.value, cond=cond)
- self.mc.LDR_ri(reg2.value, r.ip.value, imm=WORD, cond=cond)
- self.mc.POP([r.ip.value], cond=cond)
+ self.mc.LDR_ri(reg1.value, helper.value, cond=cond)
+ self.mc.LDR_ri(reg2.value, helper.value, imm=WORD, cond=cond)
+ if save_helper:
+ self.mc.POP([helper.value], cond=cond)
elif vfp_loc.is_stack() and vfp_loc.type == FLOAT:
# load spilled vfp value into two core registers
offset = vfp_loc.value
if not check_imm_arg(offset, size=0xFFF):
- self.mc.PUSH([r.ip.value], cond=cond)
- self.mc.gen_load_int(r.ip.value, offset, cond=cond)
- self.mc.LDR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
- self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
- self.mc.LDR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
- self.mc.POP([r.ip.value], cond=cond)
+ helper, save_helper = self.get_tmp_reg([reg1, reg2])
+ if save_helper:
+ self.mc.PUSH([helper.value], cond=cond)
+ self.mc.gen_load_int(helper.value, offset, cond=cond)
+ self.mc.LDR_rr(reg1.value, r.fp.value, helper.value, cond=cond)
+ self.mc.ADD_ri(helper.value, helper.value, imm=WORD, cond=cond)
+ self.mc.LDR_rr(reg2.value, r.fp.value, helper.value, cond=cond)
+ if save_helper:
+ self.mc.POP([helper.value], cond=cond)
else:
self.mc.LDR_ri(reg1.value, r.fp.value, imm=offset, cond=cond)
self.mc.LDR_ri(reg2.value, r.fp.value,
@@ -1165,12 +1186,15 @@
# move from two core registers to a float stack location
offset = vfp_loc.value
if not check_imm_arg(offset + WORD, size=0xFFF):
- self.mc.PUSH([r.ip.value], cond=cond)
- self.mc.gen_load_int(r.ip.value, offset, cond=cond)
- self.mc.STR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
- self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
- self.mc.STR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
- self.mc.POP([r.ip.value], cond=cond)
+ helper, save_helper = self.get_tmp_reg([reg1, reg2])
+ if save_helper:
+ self.mc.PUSH([helper.value], cond=cond)
+ self.mc.gen_load_int(helper.value, offset, cond=cond)
+ self.mc.STR_rr(reg1.value, r.fp.value, helper.value, cond=cond)
+ self.mc.ADD_ri(helper.value, helper.value, imm=WORD, cond=cond)
+ self.mc.STR_rr(reg2.value, r.fp.value, helper.value, cond=cond)
+ if save_helper:
+ self.mc.POP([helper.value], cond=cond)
else:
self.mc.STR_ri(reg1.value, r.fp.value, imm=offset, cond=cond)
self.mc.STR_ri(reg2.value, r.fp.value,
@@ -1417,6 +1441,26 @@
#
return shiftsize
+ def simple_call(self, fnloc, arglocs, result_loc=r.r0):
+ if result_loc is None:
+ result_type = VOID
+ result_size = 0
+ elif result_loc.is_vfp_reg():
+ result_type = FLOAT
+ result_size = DOUBLE_WORD
+ else:
+ result_type = INT
+ result_size = WORD
+ cb = callbuilder.get_callbuilder(self.cpu, self, fnloc, arglocs,
+ result_loc, result_type,
+ result_size)
+ cb.emit()
+
+ def simple_call_no_collect(self, fnloc, arglocs):
+ cb = callbuilder.get_callbuilder(self.cpu, self, fnloc, arglocs)
+ cb.emit_no_collect()
+
+
def not_implemented(msg):
os.write(2, '[ARM/asm] %s\n' % msg)
raise NotImplementedError(msg)
diff --git a/rpython/jit/backend/arm/callbuilder.py b/rpython/jit/backend/arm/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/arm/callbuilder.py
@@ -0,0 +1,304 @@
+from rpython.rlib.clibffi import FFI_DEFAULT_ABI
+from rpython.rlib.objectmodel import we_are_translated
+from rpython.jit.metainterp.history import INT, FLOAT, REF
+from rpython.jit.backend.arm.arch import WORD
+from rpython.jit.backend.arm import registers as r
+from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.arm.locations import RawSPStackLocation
+from rpython.jit.backend.arm.jump import remap_frame_layout
+from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
+from rpython.jit.backend.arm.helper.assembler import count_reg_args
+from rpython.jit.backend.arm.helper.assembler import saved_registers
+from rpython.jit.backend.arm.helper.regalloc import check_imm_arg
+
+
+class ARMCallbuilder(AbstractCallBuilder):
+ def __init__(self, assembler, fnloc, arglocs,
+ resloc=r.r0, restype=INT, ressize=WORD, ressigned=True):
+ AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
+ resloc, restype, ressize)
+ self.current_sp = 0
+
+ def push_gcmap(self):
+ assert not self.is_call_release_gil
+ # we push *now* the gcmap, describing the status of GC registers
+ # after the rearrangements done just above, ignoring the return
+ # value eax, if necessary
+ noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
+ gcmap = self.asm._regalloc.get_gcmap([r.r0], noregs=noregs)
+ self.asm.push_gcmap(self.mc, gcmap, store=True)
+
+ def pop_gcmap(self):
+ self.asm._reload_frame_if_necessary(self.mc)
+ self.asm.pop_gcmap(self.mc)
+
+ def emit_raw_call(self):
+ #the actual call
+ if self.fnloc.is_imm():
+ self.mc.BL(self.fnloc.value)
+ return
+ if self.fnloc.is_stack():
+ self.asm.mov_loc_loc(self.fnloc, r.ip)
+ self.fnloc = r.ip
+ assert self.fnloc.is_reg()
+ self.mc.BLX(self.fnloc.value)
+
+ def restore_stack_pointer(self):
+ # readjust the sp in case we passed some args on the stack
+ assert self.current_sp % 8 == 0 # sanity check
+ if self.current_sp != 0:
+ self._adjust_sp(self.current_sp)
+ self.current_sp = 0
+
+ def _push_stack_args(self, stack_args, on_stack):
+ assert on_stack % 8 == 0
+ self._adjust_sp(-on_stack)
+ self.current_sp = on_stack
+ ofs = 0
+ for i, arg in enumerate(stack_args):
+ if arg is not None:
+ sp_loc = RawSPStackLocation(ofs, arg.type)
+ self.asm.regalloc_mov(arg, sp_loc)
+ ofs += sp_loc.width
+ else: # alignment word
+ ofs += WORD
+
+ def _adjust_sp(self, n):
+ # adjust the current stack pointer by n bytes
+ if n > 0:
+ if check_imm_arg(n):
+ self.mc.ADD_ri(r.sp.value, r.sp.value, n)
+ else:
+ self.mc.gen_load_int(r.ip.value, n)
+ self.mc.ADD_rr(r.sp.value, r.sp.value, r.ip.value)
+ else:
+ n = abs(n)
+ if check_imm_arg(n):
+ self.mc.SUB_ri(r.sp.value, r.sp.value, n)
+ else:
+ self.mc.gen_load_int(r.ip.value, n)
+ self.mc.SUB_rr(r.sp.value, r.sp.value, r.ip.value)
+
+ def select_call_release_gil_mode(self):
+ AbstractCallBuilder.select_call_release_gil_mode(self)
+
+ def call_releasegil_addr_and_move_real_arguments(self):
+ assert not self.asm._is_asmgcc()
+ from rpython.jit.backend.arm.regalloc import CoreRegisterManager
+ with saved_registers(self.mc,
+ CoreRegisterManager.save_around_call_regs):
+ self.mc.BL(self.asm.releasegil_addr)
+
+ if not we_are_translated(): # for testing: we should not access
+ self.mc.ADD_ri(r.fp.value, r.fp.value, 1) # fp any more
+
+ def move_real_result_and_call_reacqgil_addr(self):
+ # save the result we just got
+ assert not self.asm._is_asmgcc()
+ gpr_to_save, vfp_to_save = self.get_result_locs()
+ with saved_registers(self.mc, gpr_to_save, vfp_to_save):
+ self.mc.BL(self.asm.reacqgil_addr)
+
+ if not we_are_translated(): # for testing: now we can accesss
+ self.mc.SUB_ri(r.fp.value, r.fp.value, 1) # fp again
+
+ # for shadowstack, done for us by _reload_frame_if_necessary()
+
+ def get_result_locs(self):
+ raise NotImplementedError
+
+ def _ensure_result_bit_extension(self, resloc, size, signed):
+ if size == 4:
+ return
+ if size == 1:
+ if not signed: # unsigned char
+ self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
+ else:
+ self.mc.LSL_ri(resloc.value, resloc.value, 24)
+ self.mc.ASR_ri(resloc.value, resloc.value, 24)
+ elif size == 2:
+ if not signed:
+ self.mc.LSL_ri(resloc.value, resloc.value, 16)
+ self.mc.LSR_ri(resloc.value, resloc.value, 16)
+ else:
+ self.mc.LSL_ri(resloc.value, resloc.value, 16)
+ self.mc.ASR_ri(resloc.value, resloc.value, 16)
+
+
+
+class SoftFloatCallBuilder(ARMCallbuilder):
+
+ def get_result_locs(self):
+ if self.resloc is None:
+ return [], []
+ if self.resloc.is_vfp_reg():
+ return [r.r0, r.r1], []
+ assert self.resloc.is_reg()
+ return [r.r0], []
+
+ def load_result(self):
+ # ensure the result is wellformed and stored in the correct location
+ resloc = self.resloc
+ if resloc is None:
+ return
+ if resloc.is_vfp_reg():
+ # move result to the allocated register
+ self.asm.mov_to_vfp_loc(r.r0, r.r1, resloc)
+ elif resloc.is_reg():
+ # move result to the allocated register
+ if resloc is not r.r0:
+ self.asm.mov_loc_loc(r.r0, resloc)
+ self._ensure_result_bit_extension(resloc,
+ self.ressize, self.ressign)
+
+
+ def _collect_and_push_stack_args(self, arglocs):
+ n_args = len(arglocs)
+ reg_args = count_reg_args(arglocs)
+ # all arguments past the 4th go on the stack
+ # first we need to prepare the list so it stays aligned
+ stack_args = []
+ count = 0
+ on_stack = 0
+ if n_args > reg_args:
+ for i in range(reg_args, n_args):
+ arg = arglocs[i]
+ if arg.type != FLOAT:
+ count += 1
+ on_stack += 1
+ else:
+ on_stack += 2
+ if count % 2 != 0:
+ stack_args.append(None)
+ count = 0
+ on_stack += 1
+ stack_args.append(arg)
+ if count % 2 != 0:
+ on_stack += 1
+ stack_args.append(None)
+ if on_stack > 0:
+ self._push_stack_args(stack_args, on_stack*WORD)
+
+ def prepare_arguments(self):
+ arglocs = self.arglocs
+ reg_args = count_reg_args(arglocs)
+ self._collect_and_push_stack_args(arglocs)
+ # collect variables that need to go in registers and the registers they
+ # will be stored in
+ num = 0
+ count = 0
+ non_float_locs = []
+ non_float_regs = []
+ float_locs = []
+ for i in range(reg_args):
+ arg = arglocs[i]
+ if arg.type == FLOAT and count % 2 != 0:
+ num += 1
+ count = 0
+ reg = r.caller_resp[num]
+
+ if arg.type == FLOAT:
+ float_locs.append((arg, reg))
+ else:
+ non_float_locs.append(arg)
+ non_float_regs.append(reg)
+
+ if arg.type == FLOAT:
+ num += 2
+ else:
+ num += 1
+ count += 1
+ # Check that the address of the function we want to call is not
+ # currently stored in one of the registers used to pass the arguments
+ # or on the stack, which we can not access later
+ # If this happens to be the case we remap the register to r4 and use r4
+ # to call the function
+ if self.fnloc in r.argument_regs or self.fnloc.is_stack():
+ non_float_locs.append(self.fnloc)
+ non_float_regs.append(r.r4)
+ self.fnloc = r.r4
+ # remap values stored in core registers
+ remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip)
+
+ for loc, reg in float_locs:
+ self.asm.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
+
+class HardFloatCallBuilder(ARMCallbuilder):
+
+ def prepare_arguments(self):
+ non_float_locs = []
+ non_float_regs = []
+ float_locs = []
+ float_regs = []
+ stack_args = []
+
+ arglocs = self.arglocs
+ argtypes = self.argtypes
+
+ count = 0 # stack alignment counter
+ on_stack = 0
+ for arg in arglocs:
+ if arg.type != FLOAT:
+ if len(non_float_regs) < len(r.argument_regs):
+ reg = r.argument_regs[len(non_float_regs)]
+ non_float_locs.append(arg)
+ non_float_regs.append(reg)
+ else: # non-float argument that needs to go on the stack
+ count += 1
+ on_stack += 1
+ stack_args.append(arg)
+ else:
+ if len(float_regs) < len(r.vfp_argument_regs):
+ reg = r.vfp_argument_regs[len(float_regs)]
+ float_locs.append(arg)
+ float_regs.append(reg)
+ else: # float argument that needs to go on the stack
+ if count % 2 != 0:
+ stack_args.append(None)
+ count = 0
+ on_stack += 1
+ stack_args.append(arg)
+ on_stack += 2
+ # align the stack
+ if count % 2 != 0:
+ stack_args.append(None)
+ on_stack += 1
+ self._push_stack_args(stack_args, on_stack*WORD)
+ # Check that the address of the function we want to call is not
+ # currently stored in one of the registers used to pass the arguments
+ # or on the stack, which we can not access later
+ # If this happens to be the case we remap the register to r4 and use r4
+ # to call the function
+ if self.fnloc in non_float_regs or self.fnloc.is_stack():
+ non_float_locs.append(self.fnloc)
+ non_float_regs.append(r.r4)
+ self.fnloc = r.r4
+ # remap values stored in core registers
+ remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip)
+ # remap values stored in vfp registers
+ remap_frame_layout(self.asm, float_locs, float_regs, r.vfp_ip)
+
+ def load_result(self):
+ resloc = self.resloc
+ # ensure the result is wellformed and stored in the correct location
+ if resloc is not None and resloc.is_reg():
+ self._ensure_result_bit_extension(resloc,
+ self.ressize, self.ressign)
+
+ def get_result_locs(self):
+ if self.resloc is None:
+ return [], []
+ if self.resloc.is_vfp_reg():
+ return [], [r.d0]
+ assert self.resloc.is_reg()
+ return [r.r0], []
+
+
+def get_callbuilder(cpu, assembler, fnloc, arglocs,
+ resloc=r.r0, restype=INT, ressize=WORD, ressigned=True):
+ if cpu.cpuinfo.hf_abi:
+ return HardFloatCallBuilder(assembler, fnloc, arglocs, resloc,
+ restype, ressize, ressigned)
+ else:
+ return SoftFloatCallBuilder(assembler, fnloc, arglocs, resloc,
+ restype, ressize, ressigned)
diff --git a/rpython/jit/backend/arm/locations.py b/rpython/jit/backend/arm/locations.py
--- a/rpython/jit/backend/arm/locations.py
+++ b/rpython/jit/backend/arm/locations.py
@@ -12,6 +12,9 @@
def is_stack(self):
return False
+ def is_raw_sp(self):
+ return False
+
def is_reg(self):
return False
@@ -145,7 +148,27 @@
return self.position + 10000
def is_float(self):
- return type == FLOAT
+ return self.type == FLOAT
+
+class RawSPStackLocation(AssemblerLocation):
+ _immutable_ = True
+
+ def __init__(self, sp_offset, type=INT):
+ if type == FLOAT:
+ self.width = DOUBLE_WORD
+ else:
+ self.width = WORD
+ self.value = sp_offset
+ self.type = type
+
+ def __repr__(self):
+ return 'SP(%s)+%d' % (self.type, self.value,)
+
+ def is_raw_sp(self):
+ return True
+
+ def is_float(self):
+ return self.type == FLOAT
def imm(i):
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -13,8 +13,7 @@
gen_emit_float_cmp_op,
gen_emit_float_cmp_op_guard,
gen_emit_unary_float_op,
- saved_registers,
- count_reg_args)
+ saved_registers)
from rpython.jit.backend.arm.helper.regalloc import check_imm_arg
from rpython.jit.backend.arm.codebuilder import InstrBuilder, OverwritingBuilder
from rpython.jit.backend.arm.jump import remap_frame_layout
@@ -31,8 +30,7 @@
from rpython.rlib.objectmodel import we_are_translated
from rpython.rtyper.lltypesystem import rstr, rffi, lltype
from rpython.rtyper.annlowlevel import cast_instance_to_gcref
-
-NO_FORCE_INDEX = -1
+from rpython.jit.backend.arm import callbuilder
class ArmGuardToken(GuardToken):
@@ -339,217 +337,36 @@
return fcond
def emit_op_call(self, op, arglocs, regalloc, fcond):
- resloc = arglocs[0]
- adr = arglocs[1]
- arglist = arglocs[2:]
+ return self._emit_call(op, arglocs, fcond=fcond)
+
+ def _emit_call(self, op, arglocs, is_call_release_gil=False, fcond=c.AL):
+ # args = [resloc, size, sign, args...]
+ from rpython.jit.backend.llsupport.descr import CallDescr
+
+ cb = callbuilder.get_callbuilder(self.cpu, self, arglocs[3], arglocs[4:], arglocs[0])
+
descr = op.getdescr()
- size = descr.get_result_size()
- signed = descr.is_result_signed()
- cond = self._emit_call(adr, arglist,
- fcond, resloc, (size, signed))
- return cond
+ assert isinstance(descr, CallDescr)
+ cb.callconv = descr.get_call_conv()
+ cb.argtypes = descr.get_arg_types()
+ cb.restype = descr.get_result_type()
+ sizeloc = arglocs[1]
+ assert sizeloc.is_imm()
+ cb.ressize = sizeloc.value
+ signloc = arglocs[2]
+ assert signloc.is_imm()
+ cb.ressign = signloc.value
- def _emit_call(self, adr, arglocs, fcond=c.AL, resloc=None,
- result_info=(-1, -1),
- # whether to worry about a CALL that can collect; this
- # is always true except in call_release_gil
- can_collect=True):
- if self.cpu.cpuinfo.hf_abi:
- stack_args, adr = self._setup_call_hf(adr, arglocs, fcond,
- resloc, result_info)
+ if is_call_release_gil:
+ cb.emit_call_release_gil()
else:
- stack_args, adr = self._setup_call_sf(adr, arglocs, fcond,
- resloc, result_info)
-
- if can_collect:
- # we push *now* the gcmap, describing the status of GC registers
- # after the rearrangements done just above, ignoring the return
- # value eax, if necessary
- noregs = self.cpu.gc_ll_descr.is_shadow_stack()
- gcmap = self._regalloc.get_gcmap([r.r0], noregs=noregs)
- self.push_gcmap(self.mc, gcmap, store=True)
- #the actual call
- if adr.is_imm():
- self.mc.BL(adr.value)
- elif adr.is_stack():
- self.mov_loc_loc(adr, r.ip)
- adr = r.ip
- else:
- assert adr.is_reg()
- if adr.is_reg():
- self.mc.BLX(adr.value)
- self._restore_sp(stack_args, fcond)
-
- # ensure the result is wellformed and stored in the correct location
- if resloc is not None:
- if resloc.is_vfp_reg() and not self.cpu.cpuinfo.hf_abi:
- # move result to the allocated register
- self.mov_to_vfp_loc(r.r0, r.r1, resloc)
- elif resloc.is_reg() and result_info != (-1, -1):
- self._ensure_result_bit_extension(resloc, result_info[0],
- result_info[1])
- if can_collect:
- self._reload_frame_if_necessary(self.mc)
- self.pop_gcmap(self.mc)
+ cb.emit()
return fcond
- def _restore_sp(self, stack_args, fcond):
- # readjust the sp in case we passed some args on the stack
- if len(stack_args) > 0:
- n = 0
- for arg in stack_args:
- if arg is None or arg.type != FLOAT:
- n += WORD
- else:
- n += DOUBLE_WORD
- self._adjust_sp(-n, fcond=fcond)
- assert n % 8 == 0 # sanity check
-
- def _adjust_sp(self, n, cb=None, fcond=c.AL, base_reg=r.sp):
- if cb is None:
- cb = self.mc
- if n < 0:
- n = -n
- rev = True
- else:
- rev = False
- if n <= 0xFF and fcond == c.AL:
- if rev:
- cb.ADD_ri(r.sp.value, base_reg.value, n)
- else:
- cb.SUB_ri(r.sp.value, base_reg.value, n)
- else:
- cb.gen_load_int(r.ip.value, n, cond=fcond)
- if rev:
- cb.ADD_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
- else:
- cb.SUB_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
-
-
- def _collect_stack_args_sf(self, arglocs):
- n_args = len(arglocs)
- reg_args = count_reg_args(arglocs)
- # all arguments past the 4th go on the stack
- # first we need to prepare the list so it stays aligned
- stack_args = []
- count = 0
- if n_args > reg_args:
- for i in range(reg_args, n_args):
- arg = arglocs[i]
- if arg.type != FLOAT:
- count += 1
- else:
- if count % 2 != 0:
- stack_args.append(None)
- count = 0
- stack_args.append(arg)
- if count % 2 != 0:
- stack_args.append(None)
- return stack_args
-
- def _push_stack_args(self, stack_args):
- #then we push every thing on the stack
- for i in range(len(stack_args) - 1, -1, -1):
- arg = stack_args[i]
- if arg is None:
- self.mc.PUSH([r.ip.value])
- else:
- self.regalloc_push(arg)
-
- def _setup_call_sf(self, adr, arglocs, fcond=c.AL,
- resloc=None, result_info=(-1, -1)):
- reg_args = count_reg_args(arglocs)
- stack_args = self._collect_stack_args_sf(arglocs)
- self._push_stack_args(stack_args)
- # collect variables that need to go in registers and the registers they
- # will be stored in
- num = 0
- count = 0
- non_float_locs = []
- non_float_regs = []
- float_locs = []
- for i in range(reg_args):
- arg = arglocs[i]
- if arg.type == FLOAT and count % 2 != 0:
- num += 1
- count = 0
- reg = r.caller_resp[num]
-
- if arg.type == FLOAT:
- float_locs.append((arg, reg))
- else:
- non_float_locs.append(arg)
- non_float_regs.append(reg)
-
- if arg.type == FLOAT:
- num += 2
- else:
- num += 1
- count += 1
- # Check that the address of the function we want to call is not
- # currently stored in one of the registers used to pass the arguments.
- # If this happens to be the case we remap the register to r4 and use r4
- # to call the function
- if adr in non_float_regs:
- non_float_locs.append(adr)
- non_float_regs.append(r.r4)
- adr = r.r4
- # remap values stored in core registers
- remap_frame_layout(self, non_float_locs, non_float_regs, r.ip)
-
- for loc, reg in float_locs:
- self.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
- return stack_args, adr
-
- def _setup_call_hf(self, adr, arglocs, fcond=c.AL,
- resloc=None, result_info=(-1, -1)):
- non_float_locs = []
- non_float_regs = []
- float_locs = []
- float_regs = []
- stack_args = []
- count = 0 # stack alignment counter
- for arg in arglocs:
- if arg.type != FLOAT:
- if len(non_float_regs) < len(r.argument_regs):
- reg = r.argument_regs[len(non_float_regs)]
- non_float_locs.append(arg)
- non_float_regs.append(reg)
- else: # non-float argument that needs to go on the stack
- count += 1
- stack_args.append(arg)
- else:
- if len(float_regs) < len(r.vfp_argument_regs):
- reg = r.vfp_argument_regs[len(float_regs)]
- float_locs.append(arg)
- float_regs.append(reg)
- else: # float argument that needs to go on the stack
- if count % 2 != 0:
- stack_args.append(None)
- count = 0
- stack_args.append(arg)
- # align the stack
- if count % 2 != 0:
- stack_args.append(None)
- self._push_stack_args(stack_args)
- # Check that the address of the function we want to call is not
- # currently stored in one of the registers used to pass the arguments.
- # If this happens to be the case we remap the register to r4 and use r4
- # to call the function
- if adr in non_float_regs:
- non_float_locs.append(adr)
- non_float_regs.append(r.r4)
- adr = r.r4
- # remap values stored in core registers
- remap_frame_layout(self, non_float_locs, non_float_regs, r.ip)
- # remap values stored in vfp registers
- remap_frame_layout(self, float_locs, float_regs, r.vfp_ip)
-
- return stack_args, adr
-
def emit_op_same_as(self, op, arglocs, regalloc, fcond):
argloc, resloc = arglocs
- self.mov_loc_loc(argloc, resloc)
+ if argloc is not resloc:
+ self.mov_loc_loc(argloc, resloc)
return fcond
emit_op_cast_ptr_to_int = emit_op_same_as
@@ -1037,9 +854,8 @@
length_loc = bytes_loc
# call memcpy()
regalloc.before_call()
- self._emit_call(imm(self.memcpy_addr),
- [dstaddr_loc, srcaddr_loc, length_loc],
- can_collect=False)
+ self.simple_call_no_collect(imm(self.memcpy_addr),
+ [dstaddr_loc, srcaddr_loc, length_loc])
regalloc.rm.possibly_free_var(length_box)
regalloc.rm.possibly_free_var(dstaddr_box)
regalloc.rm.possibly_free_var(srcaddr_box)
@@ -1127,14 +943,14 @@
vloc = imm(0)
self.call_assembler(op, guard_op, argloc, vloc, result_loc, tmploc)
self._emit_guard_may_force(guard_op,
- regalloc._prepare_guard(guard_op), guard_op.numargs())
+ regalloc._prepare_guard(guard_op))
return fcond
def _call_assembler_emit_call(self, addr, argloc, resloc):
- self._emit_call(addr, [argloc], resloc=resloc)
+ self.simple_call(addr, [argloc], result_loc=resloc)
def _call_assembler_emit_helper_call(self, addr, arglocs, resloc):
- self._emit_call(addr, arglocs, resloc=resloc)
+ self.simple_call(addr, arglocs, result_loc=resloc)
def _call_assembler_check_descr(self, value, tmploc):
ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
@@ -1213,20 +1029,14 @@
fcond):
self._store_force_index(guard_op)
numargs = op.numargs()
- callargs = arglocs[2:numargs + 1] # extract the arguments to the call
- adr = arglocs[1]
- resloc = arglocs[0]
+ callargs = arglocs[:numargs + 3] # extract the arguments to the call
+ guardargs = arglocs[len(callargs):]
#
- descr = op.getdescr()
- size = descr.get_result_size()
- signed = descr.is_result_signed()
- #
- self._emit_call(adr, callargs, fcond,
- resloc, (size, signed))
- self._emit_guard_may_force(guard_op, arglocs[1 + numargs:], numargs)
+ self._emit_call(op, callargs, fcond=fcond)
+ self._emit_guard_may_force(guard_op, guardargs)
return fcond
- def _emit_guard_may_force(self, guard_op, arglocs, numargs):
+ def _emit_guard_may_force(self, guard_op, arglocs):
ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
self.mc.LDR_ri(r.ip.value, r.fp.value, imm=ofs)
self.mc.CMP_ri(r.ip.value, 0)
@@ -1235,68 +1045,14 @@
def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc,
fcond):
-
+ numargs = op.numargs()
+ callargs = arglocs[:numargs + 3] # extract the arguments to the call
+ guardargs = arglocs[len(callargs):] # extrat the arguments for the guard
self._store_force_index(guard_op)
- # first, close the stack in the sense of the asmgcc GC root tracker
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- numargs = op.numargs()
- callargs = arglocs[2:numargs + 1] # extract the arguments to the call
- adr = arglocs[1]
- resloc = arglocs[0]
-
- if gcrootmap:
- # we put the gcmap now into the frame before releasing the GIL,
- # and pop it below after reacquiring the GIL. The assumption
- # is that this gcmap describes correctly the situation at any
- # point in-between: all values containing GC pointers should
- # be safely saved out of registers by now, and will not be
- # manipulated by any of the following CALLs.
- gcmap = self._regalloc.get_gcmap(noregs=True)
- self.push_gcmap(self.mc, gcmap, store=True)
- self.call_release_gil(gcrootmap, arglocs, regalloc, fcond)
- # do the call
- descr = op.getdescr()
- size = descr.get_result_size()
- signed = descr.is_result_signed()
- #
- self._emit_call(adr, callargs, fcond,
- resloc, (size, signed),
- can_collect=False)
- # then reopen the stack
- if gcrootmap:
- self.call_reacquire_gil(gcrootmap, resloc, regalloc, fcond)
- self.pop_gcmap(self.mc) # remove the gcmap saved above
-
- self._emit_guard_may_force(guard_op, arglocs[numargs+1:], numargs)
+ self._emit_call(op, callargs, is_call_release_gil=True)
+ self._emit_guard_may_force(guard_op, guardargs)
return fcond
- def call_release_gil(self, gcrootmap, save_registers, regalloc, fcond):
- # Save caller saved registers and do the call
- # NOTE: We assume that the floating point registers won't be modified.
- assert gcrootmap.is_shadow_stack
- with saved_registers(self.mc, regalloc.rm.save_around_call_regs):
- self._emit_call(imm(self.releasegil_addr), [],
- fcond, can_collect=False)
-
- def call_reacquire_gil(self, gcrootmap, save_loc, regalloc, fcond):
- # save the previous result into the stack temporarily, in case it is in
- # a caller saved register.
- # NOTE: like with call_release_gil(), we assume that we don't need to
- # save vfp regs in this case. Besides the result location
- regs_to_save = []
- vfp_regs_to_save = []
- if save_loc and save_loc in regalloc.rm.save_around_call_regs:
- regs_to_save.append(save_loc)
- regs_to_save.append(r.ip) # for alingment
- elif save_loc and save_loc in regalloc.vfprm.save_around_call_regs:
- vfp_regs_to_save.append(save_loc)
- assert gcrootmap.is_shadow_stack
- # call the reopenstack() function (also reacquiring the GIL)
- with saved_registers(self.mc, regs_to_save, vfp_regs_to_save):
- self._emit_call(imm(self.reacqgil_addr), [], fcond,
- can_collect=False)
- self._reload_frame_if_necessary(self.mc)
-
def _store_force_index(self, guard_op):
faildescr = guard_op.getdescr()
ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -34,6 +34,7 @@
from rpython.jit.backend.llsupport.descr import unpack_fielddescr
from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr
from rpython.rlib.rarithmetic import r_uint
+from rpython.jit.backend.llsupport.descr import CallDescr
# xxx hack: set a default value for TargetToken._ll_loop_code. If 0, we know
@@ -555,9 +556,27 @@
return self._prepare_call(op)
def _prepare_call(self, op, force_store=[], save_all_regs=False):
- args = [None] * (op.numargs() + 1)
+ args = [None] * (op.numargs() + 3)
+ calldescr = op.getdescr()
+ assert isinstance(calldescr, CallDescr)
+ assert len(calldescr.arg_classes) == op.numargs() - 1
+
for i in range(op.numargs()):
- args[i + 1] = self.loc(op.getarg(i))
+ args[i + 3] = self.loc(op.getarg(i))
+
+ size = calldescr.get_result_size()
+ sign = calldescr.is_result_signed()
+ if sign:
+ sign_loc = imm(1)
+ else:
+ sign_loc = imm(0)
+ args[1] = imm(size)
+ args[2] = sign_loc
+
+ args[0] = self._call(op, args, force_store, save_all_regs)
+ return args
+
+ def _call(self, op, arglocs, force_store=[], save_all_regs=False):
# spill variables that need to be saved around calls
self.vfprm.before_call(save_all_regs=save_all_regs)
if not save_all_regs:
@@ -565,11 +584,11 @@
if gcrootmap and gcrootmap.is_shadow_stack:
save_all_regs = 2
self.rm.before_call(save_all_regs=save_all_regs)
+ self.before_call_called = True
+ resloc = None
if op.result:
resloc = self.after_call(op.result)
- args[0] = resloc
- self.before_call_called = True
- return args
+ return resloc
def prepare_op_call_malloc_gc(self, op, fcond):
return self._prepare_call(op)
@@ -1153,9 +1172,9 @@
def prepare_guard_call_assembler(self, op, guard_op, fcond):
locs = self.locs_for_call_assembler(op, guard_op)
tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
- call_locs = self._prepare_call(op, save_all_regs=True)
+ resloc = self._call(op, locs + [tmploc], save_all_regs=True)
self.possibly_free_vars(guard_op.getfailargs())
- return locs + [call_locs[0], tmploc]
+ return locs + [resloc, tmploc]
def _prepare_args_for_new_op(self, new_args):
gc_ll_descr = self.cpu.gc_ll_descr
diff --git a/rpython/jit/backend/arm/test/test_regalloc_mov.py b/rpython/jit/backend/arm/test/test_regalloc_mov.py
--- a/rpython/jit/backend/arm/test/test_regalloc_mov.py
+++ b/rpython/jit/backend/arm/test/test_regalloc_mov.py
@@ -1,9 +1,10 @@
from rpython.rlib.objectmodel import instantiate
from rpython.jit.backend.arm.assembler import AssemblerARM
-from rpython.jit.backend.arm.locations import imm, ConstFloatLoc,\
- RegisterLocation, StackLocation, \
- VFPRegisterLocation, get_fp_offset
-from rpython.jit.backend.arm.registers import lr, ip, fp, vfp_ip
+from rpython.jit.backend.arm.locations import imm, ConstFloatLoc
+from rpython.jit.backend.arm.locations import RegisterLocation, StackLocation
+from rpython.jit.backend.arm.locations import VFPRegisterLocation, get_fp_offset
+from rpython.jit.backend.arm.locations import RawSPStackLocation
+from rpython.jit.backend.arm.registers import lr, ip, fp, vfp_ip, sp
from rpython.jit.backend.arm.conditions import AL
from rpython.jit.backend.arm.arch import WORD
from rpython.jit.metainterp.history import FLOAT
@@ -54,6 +55,12 @@
addr = int(value) # whatever
return ConstFloatLoc(addr)
+def raw_stack(i):
+ return RawSPStackLocation(i)
+
+def raw_stack_float(i):
+ return RawSPStackLocation(i, type=FLOAT)
+
class MockBuilder(object):
def __init__(self):
@@ -79,13 +86,13 @@
result = self.builder.instrs
assert result == expected
-
-class TestRegallocMov(BaseMovTest):
-
def mov(self, a, b, expected=None):
self.asm.regalloc_mov(a, b)
self.validate(expected)
+
+class TestRegallocMov(BaseMovTest):
+
def test_mov_imm_to_reg(self):
val = imm(123)
reg = r(7)
@@ -102,45 +109,37 @@
val = imm(100)
s = stack(7)
expected = [
- mi('PUSH', [lr.value], cond=AL),
mi('gen_load_int', lr.value, 100, cond=AL),
mi('STR_ri', lr.value, fp.value, imm=s.value, cond=AL),
- mi('POP', [lr.value], cond=AL)]
+ ]
self.mov(val, s, expected)
def test_mov_big_imm_to_stacklock(self):
val = imm(65536)
s = stack(7)
expected = [
- mi('PUSH', [lr.value], cond=AL),
mi('gen_load_int', lr.value, 65536, cond=AL),
mi('STR_ri', lr.value, fp.value, imm=s.value, cond=AL),
- mi('POP', [lr.value], cond=AL)]
-
+ ]
self.mov(val, s, expected)
def test_mov_imm_to_big_stacklock(self):
val = imm(100)
s = stack(8191)
- expected = [mi('PUSH', [lr.value], cond=AL),
- mi('gen_load_int', lr.value, 100, cond=AL),
- mi('PUSH', [ip.value], cond=AL),
+ expected = [ mi('gen_load_int', lr.value, 100, cond=AL),
mi('gen_load_int', ip.value, s.value, cond=AL),
mi('STR_rr', lr.value, fp.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL),
- mi('POP', [lr.value], cond=AL)]
+ ]
self.mov(val, s, expected)
def test_mov_big_imm_to_big_stacklock(self):
val = imm(65536)
s = stack(8191)
- expected = [mi('PUSH', [lr.value], cond=AL),
+ expected = [
mi('gen_load_int', lr.value, 65536, cond=AL),
- mi('PUSH', [ip.value], cond=AL),
mi('gen_load_int', ip.value, s.value, cond=AL),
mi('STR_rr', lr.value, fp.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL),
- mi('POP', [lr.value], cond=AL)]
+ ]
self.mov(val, s, expected)
def test_mov_reg_to_reg(self):
@@ -158,10 +157,10 @@
def test_mov_reg_to_big_stackloc(self):
s = stack(8191)
r6 = r(6)
- expected = [mi('PUSH', [ip.value], cond=AL),
+ expected = [
mi('gen_load_int', ip.value, s.value, cond=AL),
mi('STR_rr', r6.value, fp.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL)]
+ ]
self.mov(r6, s, expected)
def test_mov_stack_to_reg(self):
@@ -174,10 +173,8 @@
s = stack(8191)
r6 = r(6)
expected = [
- mi('PUSH', [lr.value], cond=AL),
- mi('gen_load_int', lr.value, 32940, cond=AL),
- mi('LDR_rr', r6.value, fp.value, lr.value, cond=AL),
- mi('POP', [lr.value], cond=AL),
+ mi('gen_load_int', ip.value, 32940, cond=AL),
+ mi('LDR_rr', r6.value, fp.value, ip.value, cond=AL),
]
self.mov(s, r6, expected)
@@ -185,10 +182,9 @@
f = imm_float(3.5)
reg = vfp(5)
expected = [
- mi('PUSH', [ip.value], cond=AL),
mi('gen_load_int', ip.value, f.value, cond=AL),
mi('VLDR', 5, ip.value, imm=0, cond=AL),
- mi('POP', [ip.value], cond=AL)]
+ ]
self.mov(f, reg, expected)
def test_mov_vfp_reg_to_vfp_reg(self):
@@ -206,11 +202,11 @@
def test_mov_vfp_reg_to_large_stackloc(self):
reg = vfp(7)
s = stack_float(800)
- expected = [mi('PUSH', [ip.value], cond=AL),
+ expected = [
mi('gen_load_int', ip.value, s.value, cond=AL),
mi('ADD_rr', ip.value, fp.value, ip.value, cond=AL),
mi('VSTR', reg.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL)]
+ ]
self.mov(reg, s, expected)
def test_mov_stack_to_vfp_reg(self):
@@ -222,11 +218,11 @@
def test_mov_big_stackloc_to_vfp_reg(self):
reg = vfp(7)
s = stack_float(800)
- expected = [mi('PUSH', [ip.value], cond=AL),
+ expected = [
mi('gen_load_int', ip.value, s.value, cond=AL),
mi('ADD_rr', ip.value, fp.value, ip.value, cond=AL),
mi('VSTR', reg.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL)]
+ ]
self.mov(reg, s, expected)
def test_unsopported_cases(self):
@@ -265,8 +261,6 @@
py.test.raises(AssertionError,
'self.asm.regalloc_mov(stack(1), vfp(2))')
py.test.raises(AssertionError,
- 'self.asm.regalloc_mov(stack(1), lr)')
- py.test.raises(AssertionError,
'self.asm.regalloc_mov(stack_float(1), imm(2))')
py.test.raises(AssertionError,
'self.asm.regalloc_mov(stack_float(1), imm_float(2))')
@@ -312,12 +306,11 @@
r1 = r(1)
r2 = r(2)
e = [
- mi('PUSH', [ip.value], cond=AL),
mi('gen_load_int', ip.value, s.value, cond=AL),
mi('LDR_rr', r1.value, fp.value, ip.value, cond=AL),
mi('ADD_ri', ip.value, ip.value, imm=WORD, cond=AL),
mi('LDR_rr', r2.value, fp.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL)]
+ ]
self.mov(s, r1, r2, e)
def test_from_imm_float(self):
@@ -325,11 +318,10 @@
r1 = r(1)
r2 = r(2)
e = [
- mi('PUSH', [ip.value], cond=AL),
mi('gen_load_int', ip.value, i.value, cond=AL),
mi('LDR_ri', r1.value, ip.value, cond=AL),
mi('LDR_ri', r2.value, ip.value, imm=4, cond=AL),
- mi('POP', [ip.value], cond=AL)]
+ ]
self.mov(i, r1, r2, e)
def test_unsupported(self):
@@ -369,12 +361,11 @@
r1 = r(1)
r2 = r(2)
e = [
- mi('PUSH', [ip.value], cond=AL),
mi('gen_load_int', ip.value, s.value, cond=AL),
mi('STR_rr', r1.value, fp.value, ip.value, cond=AL),
mi('ADD_ri', ip.value, ip.value, imm=4, cond=AL),
mi('STR_rr', r2.value, fp.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL)]
+ ]
self.mov(r1, r2, s, e)
def unsupported(self):
@@ -408,10 +399,9 @@
def test_push_imm_float(self):
f = imm_float(7)
- e = [mi('PUSH', [ip.value], cond=AL),
+ e = [
mi('gen_load_int', ip.value, 7, cond=AL),
mi('VLDR', vfp_ip.value, ip.value, imm=0, cond=AL),
- mi('POP', [ip.value], cond=AL),
mi('VPUSH', [vfp_ip.value], cond=AL)
]
self.push(f, e)
@@ -426,10 +416,8 @@
def test_push_big_stack(self):
s = stack(1025)
e = [
- mi('PUSH', [lr.value], cond=AL),
mi('gen_load_int', lr.value, s.value, cond=AL),
mi('LDR_rr', ip.value, fp.value, lr.value, cond=AL),
- mi('POP', [lr.value], cond=AL),
mi('PUSH', [ip.value], cond=AL)
]
self.push(s, e)
@@ -450,11 +438,9 @@
def test_push_large_stackfloat(self):
sf = stack_float(100)
e = [
- mi('PUSH', [ip.value], cond=AL),
mi('gen_load_int', ip.value, sf.value, cond=AL),
mi('ADD_rr', ip.value, fp.value, ip.value, cond=AL),
mi('VLDR', vfp_ip.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL),
mi('VPUSH', [vfp_ip.value], cond=AL),
]
self.push(sf, e)
@@ -486,10 +472,8 @@
s = stack(1200)
e = [
mi('POP', [ip.value], cond=AL),
- mi('PUSH', [lr.value], cond=AL),
mi('gen_load_int', lr.value, s.value, cond=AL),
mi('STR_rr', ip.value, fp.value, lr.value, cond=AL),
- mi('POP', [lr.value], cond=AL)
]
self.pop(s, e)
@@ -505,13 +489,88 @@
s = stack_float(1200)
e = [
mi('VPOP', [vfp_ip.value], cond=AL),
- mi('PUSH', [ip.value], cond=AL),
mi('gen_load_int', ip.value, s.value, cond=AL),
mi('ADD_rr', ip.value, fp.value, ip.value, cond=AL),
mi('VSTR', vfp_ip.value, ip.value, cond=AL),
- mi('POP', [ip.value], cond=AL)]
+ ]
self.pop(s, e)
def test_unsupported(self):
py.test.raises(AssertionError, 'self.asm.regalloc_pop(imm(1))')
py.test.raises(AssertionError, 'self.asm.regalloc_pop(imm_float(1))')
+
+class TestRawStackLocs(BaseMovTest):
+ def test_unsupported(self):
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(raw_stack(0), imm(1))')
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(raw_stack(0), imm_float(1))')
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(raw_stack(0), r(1))')
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(raw_stack(0), vfp(1))')
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(raw_stack(0), stack(1))')
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(raw_stack(0), stack_float(1))')
+
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(imm_float(1), raw_stack(1))')
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(imm(1), raw_stack_float(1))')
+
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(vfp(1), raw_stack(1))')
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(r(1), raw_stack_float(1))')
+
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(stack_float(1), raw_stack(1))')
+ py.test.raises(AssertionError, 'self.asm.regalloc_mov(stack(1), raw_stack_float(1))')
+
+ def test_from_imm(self):
+ s = raw_stack(1024)
+ i = imm(999)
+ e = [
+ mi('gen_load_int', lr.value, i.value, cond=AL),
+ mi('gen_load_int', ip.value, s.value, cond=AL),
+ mi('STR_rr', lr.value, sp.value, ip.value, cond=AL),
+ ]
+ self.mov(i, s, e)
+
+ def test_from_vfp_imm(self):
+ s = raw_stack_float(1024)
+ i = imm_float(999)
+ e = [
+ mi('gen_load_int', ip.value, i.value, cond=AL),
+ mi('VLDR', vfp_ip.value, ip.value, cond=AL, imm=0),
+ mi('gen_load_int', ip.value, s.value, cond=AL),
+ mi('ADD_rr', ip.value, sp.value, ip.value, cond=AL),
+ mi('VSTR', vfp_ip.value, ip.value, cond=AL),
+ ]
+ self.mov(i, s, e)
+
+ def test_from_reg(self):
+ s = raw_stack(1024)
+ reg = r(10)
+ e = [mi('gen_load_int', ip.value, s.value, cond=AL),
+ mi('STR_rr', reg.value, sp.value, ip.value, cond=AL),
+ ]
+ self.mov(reg, s, e)
+
+ def test_from_vfp_reg(self):
+ s = raw_stack_float(1024)
+ reg = vfp(10)
+ e = [mi('gen_load_int', ip.value, s.value, cond=AL),
+ mi('ADD_rr', ip.value, sp.value, ip.value, cond=AL),
+ mi('VSTR', reg.value, ip.value, cond=AL),
+ ]
+ self.mov(reg, s, e)
+
+ def test_from_stack(self):
+ s = raw_stack(1024)
+ reg = stack(10)
+ e = [mi('LDR_ri', ip.value, fp.value, imm=216, cond=AL),
+ mi('gen_load_int', lr.value, s.value, cond=AL),
+ mi('STR_rr', ip.value, sp.value, lr.value, cond=AL),
+ ]
+ self.mov(reg, s, e)
+
+ def test_from_vfp_stack(self):
+ s = raw_stack_float(1024)
+ reg = stack_float(10)
+ e = [mi('VLDR', vfp_ip.value, fp.value, imm=220, cond=AL),
+ mi('gen_load_int', ip.value, s.value, cond=AL),
+ mi('ADD_rr', ip.value, sp.value, ip.value, cond=AL),
+ mi('VSTR', vfp_ip.value, ip.value, cond=AL),
+ ]
+ self.mov(reg, s, e)
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -372,6 +372,9 @@
self.releasegil_addr = self.cpu.cast_ptr_to_int(releasegil_func)
self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
+ def _is_asmgcc(self):
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ return bool(gcrootmap) and not gcrootmap.is_shadow_stack
def debug_bridge(descr_number, rawstart, codeendpos):
diff --git a/rpython/jit/backend/llsupport/callbuilder.py b/rpython/jit/backend/llsupport/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/callbuilder.py
@@ -0,0 +1,92 @@
+from rpython.rlib.clibffi import FFI_DEFAULT_ABI
+
+class AbstractCallBuilder(object):
+
+ # this is the calling convention (can be FFI_STDCALL on Windows)
+ callconv = FFI_DEFAULT_ABI
+
+ # is it for the main CALL of a call_release_gil?
+ is_call_release_gil = False
+
+ # this can be set to guide more complex calls: gives the detailed
+ # type of the arguments
+ argtypes = ""
+ ressign = False
+
+
+ def __init__(self, assembler, fnloc, arglocs, resloc, restype, ressize):
+ self.fnloc = fnloc
+ self.arglocs = arglocs
+ self.asm = assembler
+ self.mc = assembler.mc
+ self.resloc = resloc
+ self.restype = restype
+ self.ressize = ressize
+
+ def emit_no_collect(self):
+ """Emit a call that cannot collect."""
+ self.prepare_arguments()
+ self.emit_raw_call()
+ self.restore_stack_pointer()
+ self.load_result()
+
+ def emit(self):
+ """Emit a regular call; not for CALL_RELEASE_GIL."""
+ self.prepare_arguments()
+ self.push_gcmap()
+ self.emit_raw_call()
+ self.restore_stack_pointer()
+ self.pop_gcmap()
+ self.load_result()
+
+ def emit_call_release_gil(self):
+ """Emit a CALL_RELEASE_GIL, including calls to releasegil_addr
+ and reacqgil_addr."""
+ self.select_call_release_gil_mode()
+ self.prepare_arguments()
+ self.push_gcmap_for_call_release_gil()
+ self.call_releasegil_addr_and_move_real_arguments()
+ self.emit_raw_call()
+ self.restore_stack_pointer()
+ self.move_real_result_and_call_reacqgil_addr()
+ self.pop_gcmap()
+ self.load_result()
+
+ def call_releasegil_addr_and_move_real_arguments(self):
+ raise NotImplementedError
+
+ def move_real_result_and_call_reacqgil_addr(self):
+ raise NotImplementedError
+
+ def select_call_release_gil_mode(self):
+ """Overridden in CallBuilder64"""
+ self.is_call_release_gil = True
+
+ def prepare_arguments(self):
+ raise NotImplementedError
+
+ def push_gcmap(self):
+ raise NotImplementedError
+
+ def push_gcmap_for_call_release_gil(self):
+ assert self.is_call_release_gil
+ # we put the gcmap now into the frame before releasing the GIL,
+ # and pop it after reacquiring the GIL. The assumption
+ # is that this gcmap describes correctly the situation at any
+ # point in-between: all values containing GC pointers should
+ # be safely saved out of registers by now, and will not be
+ # manipulated by any of the following CALLs.
+ gcmap = self.asm._regalloc.get_gcmap(noregs=True)
+ self.asm.push_gcmap(self.mc, gcmap, store=True)
+
+ def pop_gcmap(self):
+ raise NotImplementedError
+
+ def emit_raw_call(self):
+ raise NotImplementedError
+
+ def restore_stack_pointer(self):
+ raise NotImplementedError
+
+ def load_result(self):
+ raise NotImplementedError
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -999,10 +999,6 @@
self.implement_guard(guard_token, checkfalsecond)
return genop_cmp_guard_float
- def _is_asmgcc(self):
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- return bool(gcrootmap) and not gcrootmap.is_shadow_stack
-
def simple_call(self, fnloc, arglocs, result_loc=eax):
if result_loc is xmm0:
result_type = FLOAT
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -8,6 +8,7 @@
r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
RegLoc, RawEspLoc, RawEbpLoc, imm, ImmedLoc)
from rpython.jit.backend.x86.jump import remap_frame_layout
+from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
# darwin requires the stack to be 16 bytes aligned on calls.
@@ -18,77 +19,30 @@
return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
-
-class AbstractCallBuilder(object):
+class CallBuilderX86(AbstractCallBuilder):
# max number of words we have room in esp; if we need more for
# arguments, we need to decrease esp temporarily
stack_max = PASS_ON_MY_FRAME
- # this can be set to guide more complex calls: gives the detailed
- # type of the arguments
- argtypes = ""
- ressign = False
-
- # this is the calling convention (can be FFI_STDCALL on Windows)
- callconv = FFI_DEFAULT_ABI
-
- # is it for the main CALL of a call_release_gil?
- is_call_release_gil = False
-
# set by save_result_value()
tmpresloc = None
-
def __init__(self, assembler, fnloc, arglocs,
resloc=eax, restype=INT, ressize=WORD):
+ AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
+ resloc, restype, ressize)
# Avoid tons of issues with a non-immediate fnloc by sticking it
# as an extra argument if needed
self.fnloc_is_immediate = isinstance(fnloc, ImmedLoc)
- if self.fnloc_is_immediate:
- self.fnloc = fnloc
- self.arglocs = arglocs
- else:
+ if not self.fnloc_is_immediate:
+ self.fnloc = None
self.arglocs = arglocs + [fnloc]
- self.asm = assembler
- self.mc = assembler.mc
- self.resloc = resloc
- self.restype = restype
- self.ressize = ressize
self.current_esp = 0 # 0 or (usually) negative, counted in bytes
- def emit_no_collect(self):
- """Emit a call that cannot collect."""
- self.prepare_arguments()
- self.emit_raw_call()
- self.restore_esp()
- self.load_result()
-
- def emit(self):
- """Emit a regular call; not for CALL_RELEASE_GIL."""
- self.prepare_arguments()
- self.push_gcmap()
- self.emit_raw_call()
- self.restore_esp()
- self.pop_gcmap()
- self.load_result()
-
- def emit_call_release_gil(self):
- """Emit a CALL_RELEASE_GIL, including calls to releasegil_addr
- and reacqgil_addr."""
- self.select_call_release_gil_mode()
- self.prepare_arguments()
- self.push_gcmap_for_call_release_gil()
- self.call_releasegil_addr_and_move_real_arguments()
- self.emit_raw_call()
- self.restore_esp()
- self.move_real_result_and_call_reacqgil_addr()
- self.pop_gcmap()
- self.load_result()
-
def select_call_release_gil_mode(self):
"""Overridden in CallBuilder64"""
- self.is_call_release_gil = True
+ AbstractCallBuilder.select_call_release_gil_mode(self)
if self.asm._is_asmgcc():
from rpython.memory.gctransform import asmgcroot
self.stack_max = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS
@@ -105,7 +59,7 @@
self.current_esp -= align * WORD
self.mc.SUB_ri(esp.value, align * WORD)
- def restore_esp(self, target_esp=0):
+ def restore_stack_pointer(self, target_esp=0):
if self.current_esp != target_esp:
self.mc.ADD_ri(esp.value, target_esp - self.current_esp)
self.current_esp = target_esp
@@ -140,17 +94,6 @@
gcmap = self.asm._regalloc.get_gcmap([eax], noregs=noregs)
self.asm.push_gcmap(self.mc, gcmap, store=True)
- def push_gcmap_for_call_release_gil(self):
- assert self.is_call_release_gil
- # we put the gcmap now into the frame before releasing the GIL,
- # and pop it after reacquiring the GIL. The assumption
- # is that this gcmap describes correctly the situation at any
- # point in-between: all values containing GC pointers should
- # be safely saved out of registers by now, and will not be
- # manipulated by any of the following CALLs.
- gcmap = self.asm._regalloc.get_gcmap(noregs=True)
- self.asm.push_gcmap(self.mc, gcmap, store=True)
-
def pop_gcmap(self):
self.asm._reload_frame_if_necessary(self.mc)
if self.change_extra_stack_depth:
@@ -204,7 +147,7 @@
self.mc.ADD(ebp, imm(1)) # ebp any more
#
self.restore_register_arguments()
- self.restore_esp(initial_esp)
+ self.restore_stack_pointer(initial_esp)
def save_register_arguments(self):
"""Overridden in CallBuilder64"""
@@ -248,7 +191,7 @@
raise NotImplementedError
-class CallBuilder32(AbstractCallBuilder):
+class CallBuilder32(CallBuilderX86):
def prepare_arguments(self):
arglocs = self.arglocs
@@ -318,7 +261,7 @@
else:
self.mc.MOV(resloc, self.tmpresloc)
else:
- AbstractCallBuilder.load_result(self)
+ CallBuilderX86.load_result(self)
def save_result_value(self):
# Temporarily save the result value into [ESP+4]. We use "+4"
@@ -343,7 +286,7 @@
self.mc.MOV_sr(4, eax.value)
-class CallBuilder64(AbstractCallBuilder):
+class CallBuilder64(CallBuilderX86):
ARGUMENTS_GPR = [edi, esi, edx, ecx, r8, r9]
ARGUMENTS_XMM = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]
@@ -389,7 +332,7 @@
i += 1
def select_call_release_gil_mode(self):
- AbstractCallBuilder.select_call_release_gil_mode(self)
+ CallBuilderX86.select_call_release_gil_mode(self)
# We have to copy the arguments around a bit more in this mode,
# but on the other hand we don't need prepare_arguments() moving
# them in precisely the final registers. Here we look around for
@@ -502,7 +445,7 @@
# from the lower 32 bits of XMM0
self.mc.MOVD(self.resloc, xmm0)
else:
- AbstractCallBuilder.load_result(self)
+ CallBuilderX86.load_result(self)
def save_result_value(self):
# Temporarily save the result value into [ESP].
More information about the pypy-commit
mailing list