[pypy-commit] pypy arm64: add missing files and have some basic progress
fijal
pypy.commits at gmail.com
Sat Jun 22 11:45:01 EDT 2019
Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: arm64
Changeset: r96842:ea0ee0f66afd
Date: 2019-06-22 15:44 +0000
http://bitbucket.org/pypy/pypy/changeset/ea0ee0f66afd/
Log: add missing files and have some basic progress
diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py
--- a/rpython/jit/backend/aarch64/assembler.py
+++ b/rpython/jit/backend/aarch64/assembler.py
@@ -624,7 +624,8 @@
self.mc.BL(target)
return startpos
- def push_gcmap(self, mc, gcmap):
+ def push_gcmap(self, mc, gcmap, store=True):
+ assert store
ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
ptr = rffi.cast(lltype.Signed, gcmap)
mc.gen_load_int(r.ip0.value, ptr)
@@ -796,7 +797,8 @@
if guard_op.is_guard(): # can be also cond_call
regalloc.possibly_free_vars(guard_op.getfailargs())
regalloc.possibly_free_vars_for_op(guard_op)
- elif rop.is_call_may_force(op.getopnum()):
+ elif (rop.is_call_may_force(op.getopnum()) or
+ rop.is_call_release_gil(op.getopnum())):
guard_op = operations[i + 1] # has to exist
guard_num = guard_op.getopnum()
assert guard_num in (rop.GUARD_NOT_FORCED, rop.GUARD_NOT_FORCED_2)
diff --git a/rpython/jit/backend/aarch64/callbuilder.py b/rpython/jit/backend/aarch64/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/callbuilder.py
@@ -0,0 +1,163 @@
+
+from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
+from rpython.jit.backend.aarch64.arch import WORD
+from rpython.jit.metainterp.history import INT, FLOAT, REF
+from rpython.jit.backend.aarch64 import registers as r
+from rpython.jit.backend.aarch64.jump import remap_frame_layout # we use arm algo
+
+from rpython.rlib.objectmodel import we_are_translated
+from rpython.rtyper.lltypesystem import rffi
+
+class Aarch64CallBuilder(AbstractCallBuilder):
+ def __init__(self, assembler, fnloc, arglocs,
+ resloc=r.x0, restype=INT, ressize=WORD, ressigned=True):
+ AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
+ resloc, restype, ressize)
+ self.current_sp = 0
+
+ def prepare_arguments(self):
+ arglocs = self.arglocs
+ non_float_locs = []
+ non_float_regs = []
+ float_locs = []
+ float_regs = []
+ stack_locs = []
+ free_regs = [r.x7, r.x6, r.x5, r.x4, r.x3, r.x2, r.x1, r.x0]
+ free_float_regs = [r.d7, r.d6, r.d5, r.d4, r.d3, r.d2, r.d1, r.d0]
+ for arg in arglocs:
+ if arg.type == FLOAT:
+ if free_float_regs:
+ float_locs.append(arg)
+ float_regs.append(free_float_regs.pop())
+ else:
+ stack_locs.append(arg)
+ else:
+ if free_regs:
+ non_float_locs.append(arg)
+ non_float_regs.append(free_regs.pop())
+ else:
+ stack_locs.append(arg)
+ remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip0)
+ if float_locs:
+ remap_frame_layout(self.asm, float_locs, float_regs, r.d8)
+ # move the remaining things to stack and adjust the stack
+ if not stack_locs:
+ return
+ adj = len(stack_locs) + (len(stack_locs) & 1)
+ self.mc.SUB_ri(r.sp.value, r.sp.value, adj * WORD)
+ self.current_sp = adj
+ c = 0
+ for loc in stack_locs:
+ self.asm.mov_loc_to_raw_stack(loc, c)
+ c += WORD
+
+ def push_gcmap(self):
+ noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
+ gcmap = self.asm._regalloc.get_gcmap([r.x0], noregs=noregs)
+ self.asm.push_gcmap(self.mc, gcmap)
+
+ def pop_gcmap(self):
+ self.asm._reload_frame_if_necessary(self.mc)
+ self.asm.pop_gcmap(self.mc)
+
+ def emit_raw_call(self):
+ #the actual call
+ if self.fnloc.is_imm():
+ self.mc.BL(self.fnloc.value)
+ return
+ if self.fnloc.is_stack():
+ self.mc.LDR_ri(r.ip0.value, r.fp.value, self.fnloc.value)
+ self.mc.BLR_r(r.ip0.value)
+ else:
+ assert self.fnloc.is_core_reg()
+ self.mc.BLR_r(self.fnloc.value)
+
+ def restore_stack_pointer(self):
+ assert self.current_sp & 1 == 0 # always adjusted to 16 bytes
+ if self.current_sp == 0:
+ return
+ self.mc.ADD_ri(r.sp.value, r.sp.value, self.current_sp * WORD)
+ self.current_sp = 0
+
+ def load_result(self):
+ resloc = self.resloc
+ if self.restype == 'S':
+ XXX
+ self.mc.VMOV_sc(resloc.value, r.s0.value)
+ elif self.restype == 'L':
+ YYY
+ assert resloc.is_vfp_reg()
+ self.mc.FMDRR(resloc.value, r.r0.value, r.r1.value)
+ # ensure the result is wellformed and stored in the correct location
+ if resloc is not None and resloc.is_core_reg():
+ self._ensure_result_bit_extension(resloc,
+ self.ressize, self.ressign)
+
+ def _ensure_result_bit_extension(self, resloc, size, signed):
+ if size == WORD:
+ return
+ if size == 4:
+ if not signed: # unsigned int
+ self.mc.LSL_ri(resloc.value, resloc.value, 32)
+ self.mc.LSR_ri(resloc.value, resloc.value, 32)
+ else: # signed int
+ self.mc.LSL_ri(resloc.value, resloc.value, 32)
+ self.mc.ASR_ri(resloc.value, resloc.value, 32)
+ elif size == 2:
+ if not signed:
+ self.mc.LSL_ri(resloc.value, resloc.value, 48)
+ self.mc.LSR_ri(resloc.value, resloc.value, 48)
+ else:
+ self.mc.LSL_ri(resloc.value, resloc.value, 48)
+ self.mc.ASR_ri(resloc.value, resloc.value, 48)
+ elif size == 1:
+ if not signed: # unsigned char
+ self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
+ else:
+ self.mc.LSL_ri(resloc.value, resloc.value, 56)
+ self.mc.ASR_ri(resloc.value, resloc.value, 56)
+
+ def call_releasegil_addr_and_move_real_arguments(self, fastgil):
+ assert self.is_call_release_gil
+ assert not self.asm._is_asmgcc()
+
+ # Save this thread's shadowstack pointer into r7, for later comparison
+ gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap:
+ XXX
+ rst = gcrootmap.get_root_stack_top_addr()
+ self.mc.gen_load_int(r.r5.value, rst)
+ self.mc.LDR_ri(r.r7.value, r.r5.value)
+
+ # change 'rpy_fastgil' to 0 (it should be non-zero right now)
+ self.mc.DMB()
+ self.mc.gen_load_int(r.ip1.value, fastgil)
+ self.mc.MOVZ_r_u16(r.ip0.value, 0, 0)
+ self.mc.STR_ri(r.ip0.value, r.ip1.value, 0)
+
+ if not we_are_translated(): # for testing: we should not access
+ self.mc.ADD_ri(r.fp.value, r.fp.value, 1) # fp any more
+
+ def write_real_errno(self, save_err):
+ if save_err & rffi.RFFI_READSAVED_ERRNO:
+ xxx
+ elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
+ yyy
+
+ def read_real_errno(self, save_err):
+ if save_err & rffi.RFFI_SAVE_ERRNO:
+ xxx
+
+ def move_real_result_and_call_reacqgil_addr(self, fastgil):
+ xxx
+
+ def get_result_locs(self):
+ if self.resloc is None:
+ return [], []
+ if self.resloc.is_vfp_reg():
+ if self.restype == 'L': # long long
+ return [r.r0, r.r1], []
+ else:
+ return [], [r.d0]
+ assert self.resloc.is_core_reg()
+ return [r.r0], []
diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py
--- a/rpython/jit/backend/aarch64/codebuilder.py
+++ b/rpython/jit/backend/aarch64/codebuilder.py
@@ -372,6 +372,9 @@
def BRK(self):
self.write32(0b11010100001 << 21)
+ def DMB(self):
+ self.write32(0b1101010100000011001111110111111)
+
def gen_load_int_full(self, r, value):
self.MOVZ_r_u16(r, value & 0xFFFF, 0)
self.MOVK_r_u16(r, (value >> 16) & 0xFFFF, 16)
diff --git a/rpython/jit/backend/aarch64/jump.py b/rpython/jit/backend/aarch64/jump.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/jump.py
@@ -0,0 +1,113 @@
+
+from rpython.jit.backend.aarch64 import registers as r
+
+def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
+ pending_dests = len(dst_locations)
+ srccount = {} # maps dst_locations to how many times the same
+ # location appears in src_locations
+ for dst in dst_locations:
+ key = dst.as_key()
+ assert key not in srccount, "duplicate value in dst_locations!"
+ srccount[key] = 0
+ for i in range(len(dst_locations)):
+ src = src_locations[i]
+ if src.is_imm():
+ continue
+ key = src.as_key()
+ if key in srccount:
+ if key == dst_locations[i].as_key():
+ # ignore a move "x = x"
+ # setting any "large enough" negative value is ok, but
+ # be careful of overflows, don't use -sys.maxint
+ srccount[key] = -len(dst_locations) - 1
+ pending_dests -= 1
+ else:
+ srccount[key] += 1
+
+ while pending_dests > 0:
+ progress = False
+ for i in range(len(dst_locations)):
+ dst = dst_locations[i]
+ key = dst.as_key()
+ if srccount[key] == 0:
+ srccount[key] = -1 # means "it's done"
+ pending_dests -= 1
+ src = src_locations[i]
+ if not src.is_imm():
+ key = src.as_key()
+ if key in srccount:
+ srccount[key] -= 1
+ _move(assembler, src, dst, tmpreg)
+ progress = True
+ if not progress:
+ # we are left with only pure disjoint cycles
+ sources = {} # maps dst_locations to src_locations
+ for i in range(len(dst_locations)):
+ src = src_locations[i]
+ dst = dst_locations[i]
+ sources[dst.as_key()] = src
+ #
+ for i in range(len(dst_locations)):
+ dst = dst_locations[i]
+ originalkey = dst.as_key()
+ if srccount[originalkey] >= 0:
+ assembler.push_locations([dst])
+ while True:
+ key = dst.as_key()
+ assert srccount[key] == 1
+ # ^^^ because we are in a simple cycle
+ srccount[key] = -1
+ pending_dests -= 1
+ src = sources[key]
+ if src.as_key() == originalkey:
+ break
+ _move(assembler, src, dst, tmpreg)
+ dst = src
+ assembler.pop_locations([dst])
+ assert pending_dests == 0
+
+
+def _move(assembler, src, dst, tmpreg):
+ if dst.is_stack() and src.is_stack():
+ assembler.regalloc_mov(src, tmpreg)
+ src = tmpreg
+ assembler.regalloc_mov(src, dst)
+
+
+def remap_frame_layout_mixed(assembler,
+ src_locations1, dst_locations1, tmpreg1,
+ src_locations2, dst_locations2, tmpreg2):
+ # find and push the xmm stack locations from src_locations2 that
+ # are going to be overwritten by dst_locations1
+ extrapushes = []
+ extrapops = []
+ dst_keys = {}
+ for loc in dst_locations1:
+ dst_keys[loc.as_key()] = None
+ src_locations2red = []
+ dst_locations2red = []
+ for i in range(len(src_locations2)):
+ loc = src_locations2[i]
+ dstloc = dst_locations2[i]
+ if loc.is_stack():
+ key = loc.as_key()
+ if key in dst_keys:
+ extrapushes.append(loc)
+ extrapops.append(dstloc)
+ continue
+ src_locations2red.append(loc)
+ dst_locations2red.append(dstloc)
+ src_locations2 = src_locations2red
+ dst_locations2 = dst_locations2red
+
+ assembler.push_locations(extrapushes)
+
+ #
+ # remap the integer and pointer registers and stack locations
+ remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
+ #
+ # remap the vfp registers and stack locations
+ remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
+ #
+ # finally, pop the extra xmm stack locations
+ assembler.pop_locations(extrapops)
diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py
--- a/rpython/jit/backend/aarch64/opassembler.py
+++ b/rpython/jit/backend/aarch64/opassembler.py
@@ -774,7 +774,8 @@
emit_op_call_f = _genop_call
emit_op_call_n = _genop_call
- def _emit_call(self, op, arglocs, is_call_release_gil=False):
+ def _emit_call(self, op, arglocs):
+ is_call_release_gil = rop.is_call_release_gil(op.getopnum())
# args = [resloc, size, sign, args...]
from rpython.jit.backend.llsupport.descr import CallDescr
diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py
--- a/rpython/jit/backend/aarch64/regalloc.py
+++ b/rpython/jit/backend/aarch64/regalloc.py
@@ -311,6 +311,8 @@
return []
prepare_op_jit_debug = void
+ prepare_op_enter_portal_frame = void
+ prepare_op_leave_portal_frame = void
def prepare_int_ri(self, op, res_in_cc):
boxes = op.getarglist()
@@ -635,7 +637,11 @@
return self.rm.after_call(v)
def prepare_guard_op_guard_not_forced(self, op, prev_op):
- arglocs = self._prepare_call(prev_op, save_all_regs=True)
+ if rop.is_call_release_gil(prev_op.getopnum()):
+ arglocs = self._prepare_call(prev_op, save_all_regs=True,
+ first_arg_index=2)
+ else:
+ arglocs = self._prepare_call(prev_op, save_all_regs=True)
guard_locs = self._guard_impl(op)
return arglocs + guard_locs, len(arglocs)
diff --git a/rpython/jit/backend/aarch64/test/test_basic.py b/rpython/jit/backend/aarch64/test/test_basic.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/test/test_basic.py
@@ -0,0 +1,42 @@
+import py
+from rpython.jit.metainterp.test import test_ajit
+from rpython.rlib.jit import JitDriver
+from rpython.jit.metainterp.test.support import LLJitMixin
+from rpython.jit.backend.detect_cpu import getcpuclass
+
+class JitAarch64Mixin(LLJitMixin):
+ CPUClass = getcpuclass()
+ # we have to disable unroll
+ enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap"
+ basic = False
+
+ def check_jumps(self, maxcount):
+ pass
+
+class TestBasic(JitAarch64Mixin, test_ajit.BaseLLtypeTests):
+ # for the individual tests see
+ # ====> ../../../metainterp/test/test_ajit.py
+ def test_bug(self):
+ jitdriver = JitDriver(greens = [], reds = ['n'])
+ class X(object):
+ pass
+ def f(n):
+ while n > -100:
+ jitdriver.can_enter_jit(n=n)
+ jitdriver.jit_merge_point(n=n)
+ x = X()
+ x.arg = 5
+ if n <= 0: break
+ n -= x.arg
+ x.arg = 6 # prevents 'x.arg' from being annotated as constant
+ return n
+ res = self.meta_interp(f, [31], enable_opts='')
+ assert res == -4
+
+ def test_r_dict(self):
+ # a Struct that belongs to the hash table is not seen as being
+ # included in the larger Array
+ py.test.skip("issue with ll2ctypes")
+
+ def test_free_object(self):
+ py.test.skip("issue of freeing, probably with ll2ctypes")
More information about the pypy-commit
mailing list