[pypy-commit] pypy arm64: enough scaffolding to actually run the first loop test. It does not pass yet, but it does run!
fijal
pypy.commits at gmail.com
Wed Mar 6 05:33:01 EST 2019
Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: arm64
Changeset: r96214:173ed1a7572d
Date: 2019-03-06 10:32 +0000
http://bitbucket.org/pypy/pypy/changeset/173ed1a7572d/
Log: enough scaffolding to actually run the first loop test. It does not
pass yet, but it does run!
diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py
--- a/rpython/jit/backend/aarch64/assembler.py
+++ b/rpython/jit/backend/aarch64/assembler.py
@@ -9,6 +9,7 @@
# CoreRegisterManager, check_imm_arg, VFPRegisterManager,
#from rpython.jit.backend.arm import callbuilder
from rpython.jit.backend.aarch64 import registers as r
+from rpython.jit.backend.arm import conditions as c
from rpython.jit.backend.llsupport import jitframe
from rpython.jit.backend.llsupport.assembler import BaseAssembler
from rpython.jit.backend.llsupport.regalloc import get_scale, valid_addressing_size
@@ -113,6 +114,7 @@
def setup(self, looptoken):
BaseAssembler.setup(self, looptoken)
+ self.failure_recovery_code = [0, 0, 0, 0]
assert self.memcpy_addr != 0, 'setup_once() not called?'
if we_are_translated():
self.debug = False
@@ -135,7 +137,48 @@
self.pending_guards = None
def _build_failure_recovery(self, exc, withfloats=False):
- pass # XXX
+ return # XXX
+ mc = InstrBuilder()
+ self._push_all_regs_to_jitframe(mc, [], withfloats)
+
+ if exc:
+ XXX
+ # We might have an exception pending. Load it into r4
+ # (this is a register saved across calls)
+ mc.gen_load_int(r.r5.value, self.cpu.pos_exc_value())
+ mc.LDR_ri(r.r4.value, r.r5.value)
+ # clear the exc flags
+ mc.gen_load_int(r.r6.value, 0)
+ mc.STR_ri(r.r6.value, r.r5.value) # pos_exc_value is still in r5
+ mc.gen_load_int(r.r5.value, self.cpu.pos_exception())
+ mc.STR_ri(r.r6.value, r.r5.value)
+ # save r4 into 'jf_guard_exc'
+ offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ assert check_imm_arg(abs(offset))
+ mc.STR_ri(r.r4.value, r.fp.value, imm=offset)
+ # now we return from the complete frame, which starts from
+ # _call_header_with_stack_check(). The LEA in _call_footer below
+ # throws away most of the frame, including all the PUSHes that we
+ # did just above.
+ ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+ assert check_imm_arg(abs(ofs))
+ ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+ assert check_imm_arg(abs(ofs2))
+ base_ofs = self.cpu.get_baseofs_of_frame_field()
+ # store the gcmap
+ mc.POP([r.ip.value])
+ mc.STR_ri(r.ip.value, r.fp.value, imm=ofs2)
+ # store the descr
+ mc.POP([r.ip.value])
+ mc.STR_ri(r.ip.value, r.fp.value, imm=ofs)
+
+ # set return value
+ assert check_imm_arg(base_ofs)
+ mc.MOV_rr(r.r0.value, r.fp.value)
+ #
+ self.gen_func_epilog(mc)
+ rawstart = mc.materialize(self.cpu, [])
+ self.failure_recovery_code[exc + 2 * withfloats] = rawstart
def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
pass # XXX
@@ -159,8 +202,25 @@
baseofs = self.cpu.get_baseofs_of_frame_field()
self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
+ def generate_quick_failure(self, guardtok):
+ startpos = self.mc.currpos()
+ faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
+ self.mc.SUB_ri(r.sp.value, r.sp.value, 2 * WORD)
+ self.load_from_gc_table(r.ip0.value, faildescrindex)
+ self.store_reg(self.mc, r.ip0, r.fp, WORD)
+ self.push_gcmap(self.mc, gcmap=guardtok.gcmap, ofs=0)
+ self.mc.BL(target)
+ return startpos
+
+ def push_gcmap(self, mc, gcmap, ofs):
+ ptr = rffi.cast(lltype.Signed, gcmap)
+ mc.gen_load_int(r.ip0.value, ptr)
+ self.store_reg(mc, r.ip0, r.fp, ofs)
+
def write_pending_failure_recoveries(self):
- pass # XXX
+ for tok in self.pending_guards:
+ #generate the exit stub and the encoded representation
+ tok.pos_recovery_stub = self.generate_quick_failure(tok)
def reserve_gcref_table(self, allgcrefs):
gcref_table_size = len(allgcrefs) * WORD
@@ -200,8 +260,25 @@
# self.codemap.get_final_bytecode(res, size))
return res
- def process_pending_guards(self, rawstart):
- pass
+ def process_pending_guards(self, block_start):
+ clt = self.current_clt
+ for tok in self.pending_guards:
+ descr = tok.faildescr
+ assert isinstance(descr, AbstractFailDescr)
+ failure_recovery_pos = block_start + tok.pos_recovery_stub
+ descr.adr_jump_offset = failure_recovery_pos
+ relative_offset = tok.pos_recovery_stub - tok.offset
+ guard_pos = block_start + tok.offset
+ if not tok.guard_not_invalidated():
+ # patch the guard jump to the stub
+ # overwrite the generate BRK with a B_offs to the pos of the
+ # stub
+ mc = InstrBuilder()
+ mc.B_ofs_cond(relative_offset, c.get_opposite_of(tok.fcond))
+ mc.copy_to_raw_memory(guard_pos)
+ else:
+ XX
+ clt.invalidate_positions.append((guard_pos, relative_offset))
def fixup_target_tokens(self, rawstart):
for targettoken in self.target_tokens_currently_compiling:
@@ -277,10 +354,11 @@
elif not we_are_translated() and op.getopnum() == rop.FORCE_SPILL:
regalloc.prepare_force_spill(op)
elif i < len(operations) - 1 and regalloc.next_op_can_accept_cc(operations, i):
- arglocs = guard_operations[operations[i + 1].getopnum()](
- regalloc, operations[i + 1], op)
+ guard_op = operations[i + 1]
+ guard_num = guard_op.getopnum()
+ arglocs, fcond = guard_operations[guard_num](regalloc, guard_op, op)
if arglocs is not None:
- xxx
+ asm_guard_operations[guard_num](self, guard_op, fcond, arglocs)
regalloc.next_instruction() # advance one more
else:
arglocs = regalloc_operations[opnum](regalloc, op)
@@ -302,8 +380,7 @@
opnum = op.getopnum()
arglocs = comp_operations[opnum](self._regalloc, op, True)
assert arglocs is not None
- asm_comp_operations[opnum](self, op, arglocs)
- return arglocs
+ return asm_comp_operations[opnum](self, op, arglocs)
# regalloc support
def load(self, loc, value):
@@ -353,6 +430,14 @@
# if save_helper:
# self.mc.POP([helper.value], cond=cond)
+ def _mov_reg_to_loc(self, prev_loc, loc):
+ if loc.is_core_reg():
+ self.mc.MOV_rr(loc.value, prev_loc.value)
+ elif loc.is_stack():
+ self.mc.STR_ri(r.fp.value, prev_loc.value, loc.value)
+ else:
+ XXX
+
def regalloc_mov(self, prev_loc, loc):
"""Moves a value from a previous location to some other location"""
if prev_loc.is_imm():
@@ -420,6 +505,18 @@
# mc.gen_load_int(r.ip1, ofs)
# mc.STR_rr(source.value, base.value, r.ip1)
+ def check_frame_before_jump(self, target_token):
+ if target_token in self.target_tokens_currently_compiling:
+ return
+ if target_token._arm_clt is self.current_clt:
+ return
+ # We can have a frame coming from god knows where that's
+ # passed to a jump to another loop. Make sure it has the
+ # correct depth
+ expected_size = target_token._arm_clt.frame_info.jfi_frame_depth
+ self._check_frame_depth(self.mc, self._regalloc.get_gcmap(),
+ expected_size=expected_size)
+
def not_implemented(msg):
msg = '[ARM/asm] %s\n' % msg
@@ -436,7 +533,12 @@
print "[ARM/asm] %s not implemented" % op.getopname()
raise NotImplementedError(op)
+def notimplemented_guard_op(self, op, fcond, arglocs):
+ print "[ARM/asm] %s not implemented" % op.getopname()
+ raise NotImplementedError(op)
+
asm_operations = [notimplemented_op] * (rop._LAST + 1)
+asm_guard_operations = [notimplemented_guard_op] * (rop._LAST + 1)
asm_comp_operations = [notimplemented_comp_op] * (rop._LAST + 1)
asm_extra_operations = {}
@@ -449,6 +551,10 @@
opname = name[len('emit_op_'):]
num = getattr(rop, opname.upper())
asm_operations[num] = value
+ elif name.startswith('emit_guard_op_'):
+ opname = name[len('emit_guard_op_'):]
+ num = getattr(rop, opname.upper())
+ asm_guard_operations[num] = value
elif name.startswith('emit_comp_op_'):
opname = name[len('emit_comp_op_'):]
num = getattr(rop, opname.upper())
diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py
--- a/rpython/jit/backend/aarch64/codebuilder.py
+++ b/rpython/jit/backend/aarch64/codebuilder.py
@@ -7,6 +7,7 @@
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.tool.udir import udir
+PC_OFFSET = 8
class AbstractAarch64Builder(object):
def write32(self, word):
@@ -70,6 +71,11 @@
self.write32((base << 22) | (constant << 10) |
(rn << 5) | rd)
+ def SUB_ri(self, rd, rn, constant):
+ base = 0b1101000100
+ assert 0 <= constant < 4096
+ self.write32((base << 22) | (constant << 10) | (rn << 5) | rd)
+
def LDP_rri(self, reg1, reg2, rn, offset):
base = 0b1010100101
assert -512 <= offset < 512
@@ -108,6 +114,34 @@
base = 0b11101011000
self.write32((base << 21) | (rm << 16) | (rn << 5) | 0b11111)
+ def B_ofs(self, ofs):
+ base = 0b000101
+ assert ofs & 0x3 == 0
+ pos = self.currpos()
+ target_ofs = ofs - (pos + PC_OFFSET)
+ assert -(1 << (26 + 2)) < target_ofs < 1<<(26 + 2)
+ if target_ofs < 0:
+ target_ofs = 1<<25 | (~target_ofs)
+ self.write32((base << 26) | (target_ofs >> 2))
+
+ def B_ofs_cond(self, ofs, cond):
+ base = 0b01010100
+ assert ofs & 0x3 == 0
+ assert -1 << 10 < ofs < 1 << 10
+ imm = ofs >> 2
+ if imm < 0:
+ xxx
+ self.write32((base << 24) | (imm << 5) | cond)
+
+ def BL(self, target):
+ target = rffi.cast(lltype.Signed, target)
+ self.gen_load_int(r.ip0.value, target)
+ self.BR(r.ip0.value)
+
+ def BR(self, reg):
+ base = 0b1101011000011111000000
+ self.write32((base << 10) | (reg << 5))
+
def BRK(self):
self.write32(0b11010100001 << 21)
@@ -116,9 +150,12 @@
register"""
# XXX optimize!
self.MOVZ_r_u16(r, value & 0xFFFF, 0)
- self.MOVK_r_u16(r, (value >> 16) & 0xFFFF, 16)
- self.MOVK_r_u16(r, (value >> 32) & 0xFFFF, 32)
- self.MOVK_r_u16(r, (value >> 48) & 0xFFFF, 48)
+ value = value >> 16
+ shift = 16
+ while value:
+ self.MOVK_r_u16(r, (value >> 16) & 0xFFFF, shift)
+ shift += 16
+ value >>= 16
class InstrBuilder(BlockBuilderMixin, AbstractAarch64Builder):
diff --git a/rpython/jit/backend/aarch64/locations.py b/rpython/jit/backend/aarch64/locations.py
--- a/rpython/jit/backend/aarch64/locations.py
+++ b/rpython/jit/backend/aarch64/locations.py
@@ -47,7 +47,6 @@
return True
def as_key(self): # 0 <= as_key <= 30, 31 being zero register
- xxx
return self.value
class VFPRegisterLocation(RegisterLocation):
@@ -64,7 +63,6 @@
return True
def as_key(self): # 40 <= as_key <= 71
- xxx
return self.value + 40
def is_float(self):
@@ -110,7 +108,6 @@
return True
def as_key(self): # an aligned word + 10000
- XXX
return self.position + 10000
def is_float(self):
@@ -127,7 +124,10 @@
return "xzr"
def as_key(self):
- return 31
+ raise ValueError("should never make it to jump")
+
+def imm(i):
+ return ImmLocation(i)
def get_fp_offset(base_ofs, position):
return base_ofs + WORD * (position + JITFRAME_FIXED_SIZE)
diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py
--- a/rpython/jit/backend/aarch64/opassembler.py
+++ b/rpython/jit/backend/aarch64/opassembler.py
@@ -2,8 +2,12 @@
from rpython.jit.metainterp.history import (AbstractFailDescr, ConstInt,
INT, FLOAT, REF)
from rpython.jit.backend.aarch64 import registers as r
-from rpython.jit.backend.arm import conditions as c # yes, arm, not aarch64
+from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.aarch64.arch import JITFRAME_FIXED_SIZE
from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler
+from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
+from rpython.jit.metainterp.history import TargetToken
+
class ResOpAssembler(BaseAssembler):
def emit_op_int_add(self, op, arglocs):
@@ -34,7 +38,13 @@
else:
self.mc.CMP_rr(l0.value, l1.value)
- emit_comp_op_int_le = emit_int_comp_op
+ def emit_comp_op_int_lt(self, op, arglocs):
+ self.emit_int_comp_op(op, arglocs)
+ return c.LT
+
+ def emit_comp_op_int_le(self, op, arglocs):
+ self.emit_int_comp_op(op, arglocs)
+ return c.LE
def emit_op_increment_debug_counter(self, op, arglocs):
return # XXXX
@@ -43,9 +53,52 @@
self.mc.ADD_ri(value_loc.value, value_loc.value, 1)
self.mc.STR_ri(value_loc.value, base_loc.value, 0)
+ def build_guard_token(self, op, frame_depth, arglocs, offset, fcond):
+ descr = op.getdescr()
+ assert isinstance(descr, AbstractFailDescr)
+
+ gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
+ faildescrindex = self.get_gcref_from_faildescr(descr)
+ token = GuardToken(self.cpu, gcmap, descr,
+ failargs=op.getfailargs(),
+ fail_locs=arglocs,
+ guard_opnum=op.getopnum(),
+ frame_depth=frame_depth,
+ faildescrindex=faildescrindex)
+ token.fcond = fcond
+ return token
+
+ def _emit_guard(self, op, fcond, arglocs, is_guard_not_invalidated=False):
+ pos = self.mc.currpos()
+ token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], pos,
+ fcond)
+ token.offset = pos
+ self.pending_guards.append(token)
+ assert token.guard_not_invalidated() == is_guard_not_invalidated
+ # For all guards that are not GUARD_NOT_INVALIDATED we emit a
+ # breakpoint to ensure the location is patched correctly. In the case
+ # of GUARD_NOT_INVALIDATED we use just a NOP, because it is only
+ # eventually patched at a later point.
+ if is_guard_not_invalidated:
+ self.mc.NOP()
+ else:
+ self.mc.BRK()
+
+ def emit_guard_op_guard_true(self, guard_op, fcond, arglocs):
+ self._emit_guard(guard_op, fcond, arglocs)
+
def emit_op_label(self, op, arglocs):
pass
+ def emit_op_jump(self, op, arglocs):
+ target_token = op.getdescr()
+ assert isinstance(target_token, TargetToken)
+ target = target_token._ll_loop_code
+ if target_token in self.target_tokens_currently_compiling:
+ self.mc.B_ofs(target)
+ else:
+ self.mc.B(target)
+
def emit_op_finish(self, op, arglocs):
base_ofs = self.cpu.get_baseofs_of_frame_field()
if len(arglocs) > 0:
diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py
--- a/rpython/jit/backend/aarch64/regalloc.py
+++ b/rpython/jit/backend/aarch64/regalloc.py
@@ -12,6 +12,8 @@
get_scale
from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
from rpython.jit.backend.aarch64 import registers as r
+from rpython.jit.backend.arm.jump import remap_frame_layout_mixed
+from rpython.jit.backend.aarch64.locations import imm
class TempInt(TempVar):
@@ -363,11 +365,54 @@
return locs
def prepare_guard_op_guard_true(self, op, prevop):
- arglocs = self.assembler.dispatch_comparison(prevop)
- xxx
+ fcond = self.assembler.dispatch_comparison(prevop)
+ # result is in CC
+
+ arglocs = [None] * (len(op.getfailargs()) + 1)
+ arglocs[0] = imm(self.frame_manager.get_frame_depth())
+ failargs = op.getfailargs()
+ for i in range(len(failargs)):
+ if failargs[i]:
+ arglocs[i + 1] = self.loc(failargs[i])
+ return arglocs, fcond
prepare_op_nursery_ptr_increment = prepare_op_int_add
+ def prepare_op_jump(self, op):
+ assert self.jump_target_descr is None
+ descr = op.getdescr()
+ assert isinstance(descr, TargetToken)
+ self.jump_target_descr = descr
+ arglocs = descr._arm_arglocs
+
+ # get temporary locs
+ tmploc = r.ip0
+ vfptmploc = None # XXX r.vfp_ip
+
+ # Part about non-floats
+ src_locations1 = []
+ dst_locations1 = []
+ # Part about floats
+ src_locations2 = []
+ dst_locations2 = []
+
+ # Build the four lists
+ for i in range(op.numargs()):
+ box = op.getarg(i)
+ src_loc = self.loc(box)
+ dst_loc = arglocs[i]
+ if box.type != FLOAT:
+ src_locations1.append(src_loc)
+ dst_locations1.append(dst_loc)
+ else:
+ src_locations2.append(src_loc)
+ dst_locations2.append(dst_loc)
+ self.assembler.check_frame_before_jump(self.jump_target_descr)
+ remap_frame_layout_mixed(self.assembler,
+ src_locations1, dst_locations1, tmploc,
+ src_locations2, dst_locations2, vfptmploc)
+ return []
+
def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None):
if var.type == FLOAT:
return self.vfprm.force_allocate_reg(var, forbidden_vars,
diff --git a/rpython/jit/backend/aarch64/runner.py b/rpython/jit/backend/aarch64/runner.py
--- a/rpython/jit/backend/aarch64/runner.py
+++ b/rpython/jit/backend/aarch64/runner.py
@@ -1,11 +1,14 @@
from rpython.rtyper.lltypesystem import llmemory, lltype
from rpython.jit.backend.aarch64.assembler import AssemblerARM64
+from rpython.jit.backend.aarch64 import registers as r
from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
class CPU_ARM64(AbstractLLCPU):
"""ARM 64"""
backend_name = "aarch64"
+ frame_reg = r.fp
+ all_reg_indexes = range(len(r.all_regs))
IS_64_BIT = True
diff --git a/rpython/jit/backend/aarch64/test/test_instr_builder.py b/rpython/jit/backend/aarch64/test/test_instr_builder.py
--- a/rpython/jit/backend/aarch64/test/test_instr_builder.py
+++ b/rpython/jit/backend/aarch64/test/test_instr_builder.py
@@ -11,6 +11,9 @@
def writechar(self, char):
self.buffer.append(char)
+ def currpos(self):
+ return 0
+
def hexdump(self):
return ''.join(self.buffer)
@@ -125,6 +128,15 @@
assert cb.hexdump() == assemble("ADD %r, %r, %r" % (rd, rn, rm))
@settings(max_examples=20)
+ @given(rd=st.sampled_from(r.registers),
+ rn=st.sampled_from(r.registers),
+ ofs=st.integers(min_value=0, max_value=4095))
+ def test_SUB_ri(self, rd, rn, ofs):
+ cb = CodeBuilder()
+ cb.SUB_ri(rd.value, rn.value, ofs)
+ assert cb.hexdump() == assemble("SUB %r, %r, %d" % (rd, rn, ofs))
+
+ @settings(max_examples=20)
@given(rn=st.sampled_from(r.registers),
rm=st.sampled_from(r.registers))
def test_CMP_rr(self, rn, rm):
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -185,6 +185,11 @@
""", namespace={'targettoken': targettoken,
'fdescr': BasicFailDescr(2)})
self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+ print "ONE"
+ deadframe = self.cpu.execute_token(looptoken, 10)
+ print "TWO"
+ fail = self.cpu.get_latest_descr(deadframe)
+ assert fail.identifier == 2
deadframe = self.cpu.execute_token(looptoken, 2)
fail = self.cpu.get_latest_descr(deadframe)
assert fail.identifier == 2
More information about the pypy-commit
mailing list