[pypy-commit] pypy ppc-jit-backend: update some arm files for comparison.
edelsohn
noreply at buildbot.pypy.org
Sun Aug 26 05:18:10 CEST 2012
Author: edelsohn
Branch: ppc-jit-backend
Changeset: r56860:6b8cbecfe63f
Date: 2012-08-25 23:17 -0400
http://bitbucket.org/pypy/pypy/changeset/6b8cbecfe63f/
Log: update some arm files for comparison.
diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
--- a/pypy/jit/backend/arm/assembler.py
+++ b/pypy/jit/backend/arm/assembler.py
@@ -59,6 +59,7 @@
self._exit_code_addr = 0
self.current_clt = None
self.malloc_slowpath = 0
+ self.wb_slowpath = [0, 0, 0, 0]
self._regalloc = None
self.datablockwrapper = None
self.propagate_exception_path = 0
@@ -107,6 +108,11 @@
# Addresses of functions called by new_xxx operations
gc_ll_descr = self.cpu.gc_ll_descr
gc_ll_descr.initialize()
+ self._build_wb_slowpath(False)
+ self._build_wb_slowpath(True)
+ if self.cpu.supports_floats:
+ self._build_wb_slowpath(False, withfloats=True)
+ self._build_wb_slowpath(True, withfloats=True)
self._build_propagate_exception_path()
if gc_ll_descr.get_malloc_slowpath_addr is not None:
self._build_malloc_slowpath()
@@ -286,6 +292,45 @@
rawstart = mc.materialize(self.cpu.asmmemmgr, [])
self.stack_check_slowpath = rawstart
+ def _build_wb_slowpath(self, withcards, withfloats=False):
+ descr = self.cpu.gc_ll_descr.write_barrier_descr
+ if descr is None:
+ return
+ if not withcards:
+ func = descr.get_write_barrier_fn(self.cpu)
+ else:
+ if descr.jit_wb_cards_set == 0:
+ return
+ func = descr.get_write_barrier_from_array_fn(self.cpu)
+ if func == 0:
+ return
+ #
+ # This builds a helper function called from the slow path of
+ # write barriers. It must save all registers, and optionally
+ # all vfp registers. It takes a single argument which is in r0.
+ # It must keep stack alignment accordingly.
+ mc = ARMv7Builder()
+ #
+ if withfloats:
+ floats = r.caller_vfp_resp
+ else:
+ floats = []
+ with saved_registers(mc, r.caller_resp + [r.ip, r.lr], floats):
+ mc.BL(func)
+ #
+ if withcards:
+ # A final TEST8 before the RET, for the caller. Careful to
+ # not follow this instruction with another one that changes
+ # the status of the CPU flags!
+ mc.LDRB_ri(r.ip.value, r.r0.value,
+ imm=descr.jit_wb_if_flag_byteofs)
+ mc.TST_ri(r.ip.value, imm=0x80)
+ #
+ mc.MOV_rr(r.pc.value, r.lr.value)
+ #
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
def setup_failure_recovery(self):
@rgc.no_collect
@@ -429,11 +474,14 @@
def _build_malloc_slowpath(self):
mc = ARMv7Builder()
- assert self.cpu.supports_floats
+ if self.cpu.supports_floats:
+ vfp_regs = r.all_vfp_regs
+ else:
+ vfp_regs = []
# We need to push two registers here because we are going to make a
# call an therefore the stack needs to be 8-byte aligned
mc.PUSH([r.ip.value, r.lr.value])
- with saved_registers(mc, [], r.all_vfp_regs):
+ with saved_registers(mc, [], vfp_regs):
# At this point we know that the values we need to compute the size
# are stored in r0 and r1.
mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value)
diff --git a/pypy/jit/backend/arm/opassembler.py b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -2,7 +2,7 @@
from pypy.jit.backend.arm import conditions as c
from pypy.jit.backend.arm import registers as r
from pypy.jit.backend.arm import shift
-from pypy.jit.backend.arm.arch import WORD
+from pypy.jit.backend.arm.arch import WORD, DOUBLE_WORD
from pypy.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call,
gen_emit_op_unary_cmp,
@@ -370,31 +370,69 @@
def _emit_call(self, force_index, adr, arglocs, fcond=c.AL,
resloc=None, result_info=(-1,-1)):
+ if self.cpu.use_hf_abi:
+ stack_args, adr = self._setup_call_hf(force_index, adr, arglocs, fcond, resloc, result_info)
+ else:
+ stack_args, adr = self._setup_call_sf(force_index, adr, arglocs, fcond, resloc, result_info)
+
+ #the actual call
+ #self.mc.BKPT()
+ if adr.is_imm():
+ self.mc.BL(adr.value)
+ elif adr.is_stack():
+ self.mov_loc_loc(adr, r.ip)
+ adr = r.ip
+ else:
+ assert adr.is_reg()
+ if adr.is_reg():
+ self.mc.BLX(adr.value)
+ self.mark_gc_roots(force_index)
+ self._restore_sp(stack_args, fcond)
+
+ # ensure the result is wellformed and stored in the correct location
+ if resloc is not None:
+ if resloc.is_vfp_reg() and not self.cpu.use_hf_abi:
+ # move result to the allocated register
+ self.mov_to_vfp_loc(r.r0, r.r1, resloc)
+ elif resloc.is_reg() and result_info != (-1, -1):
+ self._ensure_result_bit_extension(resloc, result_info[0],
+ result_info[1])
+ return fcond
+
+ def _restore_sp(self, stack_args, fcond):
+ # readjust the sp in case we passed some args on the stack
+ if len(stack_args) > 0:
+ n = 0
+ for arg in stack_args:
+ if arg is None or arg.type != FLOAT:
+ n += WORD
+ else:
+ n += DOUBLE_WORD
+ self._adjust_sp(-n, fcond=fcond)
+ assert n % 8 == 0 # sanity check
+
+ def _collect_stack_args_sf(self, arglocs):
n_args = len(arglocs)
reg_args = count_reg_args(arglocs)
# all arguments past the 4th go on the stack
- n = 0 # used to count the number of words pushed on the stack, so we
- #can later modify the SP back to its original value
+ # first we need to prepare the list so it stays aligned
+ stack_args = []
+ count = 0
if n_args > reg_args:
- # first we need to prepare the list so it stays aligned
- stack_args = []
- count = 0
for i in range(reg_args, n_args):
arg = arglocs[i]
if arg.type != FLOAT:
count += 1
- n += WORD
else:
- n += 2 * WORD
if count % 2 != 0:
stack_args.append(None)
- n += WORD
count = 0
stack_args.append(arg)
if count % 2 != 0:
- n += WORD
stack_args.append(None)
+ return stack_args
+ def _push_stack_args(self, stack_args):
#then we push every thing on the stack
for i in range(len(stack_args) - 1, -1, -1):
arg = stack_args[i]
@@ -402,6 +440,13 @@
self.mc.PUSH([r.ip.value])
else:
self.regalloc_push(arg)
+
+ def _setup_call_sf(self, force_index, adr, arglocs, fcond=c.AL,
+ resloc=None, result_info=(-1,-1)):
+ n_args = len(arglocs)
+ reg_args = count_reg_args(arglocs)
+ stack_args = self._collect_stack_args_sf(arglocs)
+ self._push_stack_args(stack_args)
# collect variables that need to go in registers and the registers they
# will be stored in
num = 0
@@ -440,32 +485,55 @@
for loc, reg in float_locs:
self.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
+ return stack_args, adr
- #the actual call
- if adr.is_imm():
- self.mc.BL(adr.value)
- elif adr.is_stack():
- self.mov_loc_loc(adr, r.ip)
- adr = r.ip
- else:
- assert adr.is_reg()
- if adr.is_reg():
- self.mc.BLX(adr.value)
- self.mark_gc_roots(force_index)
- # readjust the sp in case we passed some args on the stack
- if n > 0:
- self._adjust_sp(-n, fcond=fcond)
- # ensure the result is wellformed and stored in the correct location
- if resloc is not None:
- if resloc.is_vfp_reg():
- # move result to the allocated register
- self.mov_to_vfp_loc(r.r0, r.r1, resloc)
- elif result_info != (-1, -1):
- self._ensure_result_bit_extension(resloc, result_info[0],
- result_info[1])
+ def _setup_call_hf(self, force_index, adr, arglocs, fcond=c.AL,
+ resloc=None, result_info=(-1,-1)):
+ n_reg_args = n_vfp_args = 0
+ non_float_locs = []
+ non_float_regs = []
+ float_locs = []
+ float_regs = []
+ stack_args = []
+ count = 0 # stack alignment counter
+ for arg in arglocs:
+ if arg.type != FLOAT:
+ if len(non_float_regs) < len(r.argument_regs):
+ reg = r.argument_regs[len(non_float_regs)]
+ non_float_locs.append(arg)
+ non_float_regs.append(reg)
+ else: # non-float argument that needs to go on the stack
+ count += 1
+ stack_args.append(arg)
+ else:
+ if len(float_regs) < len(r.vfp_argument_regs):
+ reg = r.vfp_argument_regs[len(float_regs)]
+ float_locs.append(arg)
+ float_regs.append(reg)
+ else: # float argument that needs to go on the stack
+ if count % 2 != 0:
+ stack_args.append(None)
+ count = 0
+ stack_args.append(arg)
+ # align the stack
+ if count % 2 != 0:
+ stack_args.append(None)
+ self._push_stack_args(stack_args)
+ # Check that the address of the function we want to call is not
+ # currently stored in one of the registers used to pass the arguments.
+ # If this happens to be the case we remap the register to r4 and use r4
+ # to call the function
+ if adr in non_float_regs:
+ non_float_locs.append(adr)
+ non_float_regs.append(r.r4)
+ adr = r.r4
+ # remap values stored in core registers
+ remap_frame_layout(self, non_float_locs, non_float_regs, r.ip)
+ # remap values stored in vfp registers
+ remap_frame_layout(self, float_locs, float_regs, r.vfp_ip)
- return fcond
+ return stack_args, adr
def emit_op_same_as(self, op, arglocs, regalloc, fcond):
argloc, resloc = arglocs
@@ -506,32 +574,30 @@
def emit_op_cond_call_gc_wb(self, op, arglocs, regalloc, fcond):
# Write code equivalent to write_barrier() in the GC: it checks
- # a flag in the object at arglocs[0], and if set, it calls the
- # function remember_young_pointer() from the GC. The two arguments
- # to the call are in arglocs[:2]. The rest, arglocs[2:], contains
- # registers that need to be saved and restored across the call.
+ # a flag in the object at arglocs[0], and if set, it calls a
+ # helper piece of assembler. The latter saves registers as needed
+ # and call the function jit_remember_young_pointer() from the GC.
descr = op.getdescr()
if we_are_translated():
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
assert cls is not None and isinstance(descr, cls)
-
+ #
opnum = op.getopnum()
- if opnum == rop.COND_CALL_GC_WB:
- N = 2
- addr = descr.get_write_barrier_fn(self.cpu)
- card_marking = False
- elif opnum == rop.COND_CALL_GC_WB_ARRAY:
- N = 3
- addr = descr.get_write_barrier_from_array_fn(self.cpu)
- assert addr != 0
- card_marking = descr.jit_wb_cards_set != 0
- else:
- raise AssertionError(opnum)
+ card_marking = False
+ mask = descr.jit_wb_if_flag_singlebyte
+ if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+ # assumptions the rest of the function depends on:
+ assert (descr.jit_wb_cards_set_byteofs ==
+ descr.jit_wb_if_flag_byteofs)
+ assert descr.jit_wb_cards_set_singlebyte == -0x80
+ card_marking = True
+ mask = descr.jit_wb_if_flag_singlebyte | -0x80
+ #
loc_base = arglocs[0]
- assert check_imm_arg(descr.jit_wb_if_flag_byteofs)
- assert check_imm_arg(descr.jit_wb_if_flag_singlebyte)
- self.mc.LDRB_ri(r.ip.value, loc_base.value, imm=descr.jit_wb_if_flag_byteofs)
- self.mc.TST_ri(r.ip.value, imm=descr.jit_wb_if_flag_singlebyte)
+ self.mc.LDRB_ri(r.ip.value, loc_base.value,
+ imm=descr.jit_wb_if_flag_byteofs)
+ mask &= 0xFF
+ self.mc.TST_ri(r.ip.value, imm=mask)
jz_location = self.mc.currpos()
self.mc.BKPT()
@@ -539,68 +605,80 @@
# for cond_call_gc_wb_array, also add another fast path:
# if GCFLAG_CARDS_SET, then we can just set one bit and be done
if card_marking:
- assert check_imm_arg(descr.jit_wb_cards_set_byteofs)
- assert check_imm_arg(descr.jit_wb_cards_set_singlebyte)
- self.mc.LDRB_ri(r.ip.value, loc_base.value, imm=descr.jit_wb_cards_set_byteofs)
- self.mc.TST_ri(r.ip.value, imm=descr.jit_wb_cards_set_singlebyte)
- #
- jnz_location = self.mc.currpos()
+ # GCFLAG_CARDS_SET is in this byte at 0x80
+ self.mc.TST_ri(r.ip.value, imm=0x80)
+
+ js_location = self.mc.currpos() #
+ self.mc.BKPT()
+ else:
+ js_location = 0
+
+ # Write only a CALL to the helper prepared in advance, passing it as
+ # argument the address of the structure we are writing into
+ # (the first argument to COND_CALL_GC_WB).
+ helper_num = card_marking
+ if self._regalloc.vfprm.reg_bindings:
+ helper_num += 2
+ if self.wb_slowpath[helper_num] == 0: # tests only
+ assert not we_are_translated()
+ self.cpu.gc_ll_descr.write_barrier_descr = descr
+ self._build_wb_slowpath(card_marking,
+ bool(self._regalloc.vfprm.reg_bindings))
+ assert self.wb_slowpath[helper_num] != 0
+ #
+ if loc_base is not r.r0:
+ # push two registers to keep stack aligned
+ self.mc.PUSH([r.r0.value, loc_base.value])
+ remap_frame_layout(self, [loc_base], [r.r0], r.ip)
+ self.mc.BL(self.wb_slowpath[helper_num])
+ if loc_base is not r.r0:
+ self.mc.POP([r.r0.value, loc_base.value])
+
+ if card_marking:
+ # The helper ends again with a check of the flag in the object. So
+ # here, we can simply write again a conditional jump, which will be
+ # taken if GCFLAG_CARDS_SET is still not set.
+ jns_location = self.mc.currpos()
self.mc.BKPT()
#
- else:
- jnz_location = 0
-
- # the following is supposed to be the slow path, so whenever possible
- # we choose the most compact encoding over the most efficient one.
- with saved_registers(self.mc, r.caller_resp):
- if N == 2:
- callargs = [r.r0, r.r1]
- else:
- callargs = [r.r0, r.r1, r.r2]
- remap_frame_layout(self, arglocs, callargs, r.ip)
- func = rffi.cast(lltype.Signed, addr)
- # misaligned stack in the call, but it's ok because the write
- # barrier is not going to call anything more.
- self.mc.BL(func)
-
- # if GCFLAG_CARDS_SET, then we can do the whole thing that would
- # be done in the CALL above with just four instructions, so here
- # is an inline copy of them
- if card_marking:
- jmp_location = self.mc.get_relative_pos()
- self.mc.BKPT() # jump to the exit, patched later
- # patch the JNZ above
+ # patch the JS above
offset = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, jnz_location, WORD)
- pmc.B_offs(offset, c.NE)
+ pmc = OverwritingBuilder(self.mc, js_location, WORD)
+ pmc.B_offs(offset, c.NE) # We want to jump if the z flag is not set
#
+ # case GCFLAG_CARDS_SET: emit a few instructions to do
+ # directly the card flag setting
loc_index = arglocs[1]
assert loc_index.is_reg()
- tmp1 = arglocs[-2]
- tmp2 = arglocs[-1]
- #byteofs
- s = 3 + descr.jit_wb_card_page_shift
- self.mc.MVN_rr(r.lr.value, loc_index.value,
- imm=s, shifttype=shift.LSR)
- # byte_index
- self.mc.MOV_ri(r.ip.value, imm=7)
- self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
- imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+ # must save the register loc_index before it is mutated
+ self.mc.PUSH([loc_index.value])
+ tmp1 = loc_index
+ tmp2 = arglocs[2]
+ # lr = byteofs
+ s = 3 + descr.jit_wb_card_page_shift
+ self.mc.MVN_rr(r.lr.value, loc_index.value,
+ imm=s, shifttype=shift.LSR)
+
+ # tmp1 = byte_index
+ self.mc.MOV_ri(r.ip.value, imm=7)
+ self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
+ imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+
+ # set the bit
+ self.mc.MOV_ri(tmp2.value, imm=1)
+ self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
+ self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
+ tmp1.value, shifttype=shift.LSL)
+ self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
+ # done
+ self.mc.POP([loc_index.value])
+ #
+ #
+ # patch the JNS above
+ offset = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, jns_location, WORD)
+ pmc.B_offs(offset, c.EQ) # We want to jump if the z flag is set
- # set the bit
- self.mc.MOV_ri(tmp2.value, imm=1)
- self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
- self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
- tmp1.value, shifttype=shift.LSL)
- self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
- # done
-
- # patch the JMP above
- offset = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, jmp_location, WORD)
- pmc.B_offs(offset)
- #
- # patch the JZ above
offset = self.mc.currpos()
pmc = OverwritingBuilder(self.mc, jz_location, WORD)
pmc.B_offs(offset, c.EQ)
More information about the pypy-commit
mailing list