[pypy-commit] pypy arm64: add missing files and have some basic progress

fijal pypy.commits at gmail.com
Sat Jun 22 11:45:01 EDT 2019


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: arm64
Changeset: r96842:ea0ee0f66afd
Date: 2019-06-22 15:44 +0000
http://bitbucket.org/pypy/pypy/changeset/ea0ee0f66afd/

Log:	add missing files and have some basic progress

diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py
--- a/rpython/jit/backend/aarch64/assembler.py
+++ b/rpython/jit/backend/aarch64/assembler.py
@@ -624,7 +624,8 @@
         self.mc.BL(target)
         return startpos
 
-    def push_gcmap(self, mc, gcmap):
+    def push_gcmap(self, mc, gcmap, store=True):
+        assert store
         ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
         ptr = rffi.cast(lltype.Signed, gcmap)
         mc.gen_load_int(r.ip0.value, ptr)
@@ -796,7 +797,8 @@
                 if guard_op.is_guard(): # can be also cond_call
                     regalloc.possibly_free_vars(guard_op.getfailargs())
                 regalloc.possibly_free_vars_for_op(guard_op)
-            elif rop.is_call_may_force(op.getopnum()):
+            elif (rop.is_call_may_force(op.getopnum()) or
+                  rop.is_call_release_gil(op.getopnum())):
                 guard_op = operations[i + 1] # has to exist
                 guard_num = guard_op.getopnum()
                 assert guard_num in (rop.GUARD_NOT_FORCED, rop.GUARD_NOT_FORCED_2)
diff --git a/rpython/jit/backend/aarch64/callbuilder.py b/rpython/jit/backend/aarch64/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/callbuilder.py
@@ -0,0 +1,163 @@
+
+from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
+from rpython.jit.backend.aarch64.arch import WORD
+from rpython.jit.metainterp.history import INT, FLOAT, REF
+from rpython.jit.backend.aarch64 import registers as r
+from rpython.jit.backend.aarch64.jump import remap_frame_layout # we use arm algo
+
+from rpython.rlib.objectmodel import we_are_translated
+from rpython.rtyper.lltypesystem import rffi
+
+class Aarch64CallBuilder(AbstractCallBuilder):
+    def __init__(self, assembler, fnloc, arglocs,
+                 resloc=r.x0, restype=INT, ressize=WORD, ressigned=True):
+        AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
+                                     resloc, restype, ressize)
+        self.current_sp = 0
+
+    def prepare_arguments(self):
+        arglocs = self.arglocs
+        non_float_locs = []
+        non_float_regs = []
+        float_locs = []
+        float_regs = []
+        stack_locs = []
+        free_regs = [r.x7, r.x6, r.x5, r.x4, r.x3, r.x2, r.x1, r.x0]
+        free_float_regs = [r.d7, r.d6, r.d5, r.d4, r.d3, r.d2, r.d1, r.d0]
+        for arg in arglocs:
+            if arg.type == FLOAT:
+                if free_float_regs:
+                    float_locs.append(arg)
+                    float_regs.append(free_float_regs.pop())
+                else:
+                    stack_locs.append(arg)
+            else:
+                if free_regs:
+                    non_float_locs.append(arg)
+                    non_float_regs.append(free_regs.pop())
+                else:
+                    stack_locs.append(arg)
+        remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip0)
+        if float_locs:
+            remap_frame_layout(self.asm, float_locs, float_regs, r.d8)
+        # move the remaining things to stack and adjust the stack
+        if not stack_locs:
+            return
+        adj = len(stack_locs) + (len(stack_locs) & 1)
+        self.mc.SUB_ri(r.sp.value, r.sp.value, adj * WORD)
+        self.current_sp = adj
+        c = 0
+        for loc in stack_locs:
+            self.asm.mov_loc_to_raw_stack(loc, c)
+            c += WORD
+
+    def push_gcmap(self):
+        noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
+        gcmap = self.asm._regalloc.get_gcmap([r.x0], noregs=noregs)
+        self.asm.push_gcmap(self.mc, gcmap)
+
+    def pop_gcmap(self):
+        self.asm._reload_frame_if_necessary(self.mc)
+        self.asm.pop_gcmap(self.mc)        
+
+    def emit_raw_call(self):
+        #the actual call
+        if self.fnloc.is_imm():
+            self.mc.BL(self.fnloc.value)
+            return
+        if self.fnloc.is_stack():
+            self.mc.LDR_ri(r.ip0.value, r.fp.value, self.fnloc.value)
+            self.mc.BLR_r(r.ip0.value)
+        else:
+            assert self.fnloc.is_core_reg()
+            self.mc.BLR_r(self.fnloc.value)
+
+    def restore_stack_pointer(self):
+        assert self.current_sp & 1 == 0 # always adjusted to 16 bytes
+        if self.current_sp == 0:
+            return
+        self.mc.ADD_ri(r.sp.value, r.sp.value, self.current_sp * WORD)
+        self.current_sp = 0
+
+    def load_result(self):
+        resloc = self.resloc
+        if self.restype == 'S':
+            XXX
+            self.mc.VMOV_sc(resloc.value, r.s0.value)
+        elif self.restype == 'L':
+            YYY
+            assert resloc.is_vfp_reg()
+            self.mc.FMDRR(resloc.value, r.r0.value, r.r1.value)
+        # ensure the result is wellformed and stored in the correct location
+        if resloc is not None and resloc.is_core_reg():
+            self._ensure_result_bit_extension(resloc,
+                                                  self.ressize, self.ressign)
+
+    def _ensure_result_bit_extension(self, resloc, size, signed):
+        if size == WORD:
+            return
+        if size == 4:
+            if not signed: # unsigned int
+                self.mc.LSL_ri(resloc.value, resloc.value, 32)
+                self.mc.LSR_ri(resloc.value, resloc.value, 32)
+            else: # signed int
+                self.mc.LSL_ri(resloc.value, resloc.value, 32)
+                self.mc.ASR_ri(resloc.value, resloc.value, 32)
+        elif size == 2:
+            if not signed:
+                self.mc.LSL_ri(resloc.value, resloc.value, 48)
+                self.mc.LSR_ri(resloc.value, resloc.value, 48)
+            else:
+                self.mc.LSL_ri(resloc.value, resloc.value, 48)
+                self.mc.ASR_ri(resloc.value, resloc.value, 48)
+        elif size == 1:
+            if not signed:  # unsigned char
+                self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
+            else:
+                self.mc.LSL_ri(resloc.value, resloc.value, 56)
+                self.mc.ASR_ri(resloc.value, resloc.value, 56)
+
+    def call_releasegil_addr_and_move_real_arguments(self, fastgil):
+        assert self.is_call_release_gil
+        assert not self.asm._is_asmgcc()
+
+        # Save this thread's shadowstack pointer into r7, for later comparison
+        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap:
+            XXX
+            rst = gcrootmap.get_root_stack_top_addr()
+            self.mc.gen_load_int(r.r5.value, rst)
+            self.mc.LDR_ri(r.r7.value, r.r5.value)
+
+        # change 'rpy_fastgil' to 0 (it should be non-zero right now)
+        self.mc.DMB()
+        self.mc.gen_load_int(r.ip1.value, fastgil)
+        self.mc.MOVZ_r_u16(r.ip0.value, 0, 0)
+        self.mc.STR_ri(r.ip0.value, r.ip1.value, 0)
+
+        if not we_are_translated():                     # for testing: we should not access
+            self.mc.ADD_ri(r.fp.value, r.fp.value, 1)   # fp any more
+
+    def write_real_errno(self, save_err):
+        if save_err & rffi.RFFI_READSAVED_ERRNO:
+            xxx
+        elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
+            yyy
+
+    def read_real_errno(self, save_err):
+        if save_err & rffi.RFFI_SAVE_ERRNO:
+            xxx        
+
+    def move_real_result_and_call_reacqgil_addr(self, fastgil):
+        xxx
+
+    def get_result_locs(self):
+        if self.resloc is None:
+            return [], []
+        if self.resloc.is_vfp_reg():
+            if self.restype == 'L':      # long long
+                return [r.r0, r.r1], []
+            else:
+                return [], [r.d0]
+        assert self.resloc.is_core_reg()
+        return [r.r0], []
diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py
--- a/rpython/jit/backend/aarch64/codebuilder.py
+++ b/rpython/jit/backend/aarch64/codebuilder.py
@@ -372,6 +372,9 @@
     def BRK(self):
         self.write32(0b11010100001 << 21)
 
+    def DMB(self):
+        self.write32(0b1101010100000011001111110111111)
+
     def gen_load_int_full(self, r, value):
         self.MOVZ_r_u16(r, value & 0xFFFF, 0)
         self.MOVK_r_u16(r, (value >> 16) & 0xFFFF, 16)
diff --git a/rpython/jit/backend/aarch64/jump.py b/rpython/jit/backend/aarch64/jump.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/jump.py
@@ -0,0 +1,113 @@
+
+from rpython.jit.backend.aarch64 import registers as r
+
+def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
+    pending_dests = len(dst_locations)
+    srccount = {}    # maps dst_locations to how many times the same
+                     # location appears in src_locations
+    for dst in dst_locations:
+        key = dst.as_key()
+        assert key not in srccount, "duplicate value in dst_locations!"
+        srccount[key] = 0
+    for i in range(len(dst_locations)):
+        src = src_locations[i]
+        if src.is_imm():
+            continue
+        key = src.as_key()
+        if key in srccount:
+            if key == dst_locations[i].as_key():
+                # ignore a move "x = x"
+                # setting any "large enough" negative value is ok, but
+                # be careful of overflows, don't use -sys.maxint
+                srccount[key] = -len(dst_locations) - 1
+                pending_dests -= 1
+            else:
+                srccount[key] += 1
+
+    while pending_dests > 0:
+        progress = False
+        for i in range(len(dst_locations)):
+            dst = dst_locations[i]
+            key = dst.as_key()
+            if srccount[key] == 0:
+                srccount[key] = -1       # means "it's done"
+                pending_dests -= 1
+                src = src_locations[i]
+                if not src.is_imm():
+                    key = src.as_key()
+                    if key in srccount:
+                        srccount[key] -= 1
+                _move(assembler, src, dst, tmpreg)
+                progress = True
+        if not progress:
+            # we are left with only pure disjoint cycles
+            sources = {}     # maps dst_locations to src_locations
+            for i in range(len(dst_locations)):
+                src = src_locations[i]
+                dst = dst_locations[i]
+                sources[dst.as_key()] = src
+            #
+            for i in range(len(dst_locations)):
+                dst = dst_locations[i]
+                originalkey = dst.as_key()
+                if srccount[originalkey] >= 0:
+                    assembler.push_locations([dst])
+                    while True:
+                        key = dst.as_key()
+                        assert srccount[key] == 1
+                        # ^^^ because we are in a simple cycle
+                        srccount[key] = -1
+                        pending_dests -= 1
+                        src = sources[key]
+                        if src.as_key() == originalkey:
+                            break
+                        _move(assembler, src, dst, tmpreg)
+                        dst = src
+                    assembler.pop_locations([dst])
+            assert pending_dests == 0
+
+
+def _move(assembler, src, dst, tmpreg):
+    if dst.is_stack() and src.is_stack():
+        assembler.regalloc_mov(src, tmpreg)
+        src = tmpreg
+    assembler.regalloc_mov(src, dst)
+
+
+def remap_frame_layout_mixed(assembler,
+                             src_locations1, dst_locations1, tmpreg1,
+                             src_locations2, dst_locations2, tmpreg2):
+    # find and push the xmm stack locations from src_locations2 that
+    # are going to be overwritten by dst_locations1
+    extrapushes = []
+    extrapops = []
+    dst_keys = {}
+    for loc in dst_locations1:
+        dst_keys[loc.as_key()] = None
+    src_locations2red = []
+    dst_locations2red = []
+    for i in range(len(src_locations2)):
+        loc = src_locations2[i]
+        dstloc = dst_locations2[i]
+        if loc.is_stack():
+            key = loc.as_key()
+            if key in dst_keys:
+                extrapushes.append(loc)
+                extrapops.append(dstloc)
+                continue
+        src_locations2red.append(loc)
+        dst_locations2red.append(dstloc)
+    src_locations2 = src_locations2red
+    dst_locations2 = dst_locations2red
+
+    assembler.push_locations(extrapushes)
+
+    #
+    # remap the integer and pointer registers and stack locations
+    remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
+    #
+    # remap the vfp registers and stack locations
+    remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
+    #
+    # finally, pop the extra xmm stack locations
+    assembler.pop_locations(extrapops)
diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py
--- a/rpython/jit/backend/aarch64/opassembler.py
+++ b/rpython/jit/backend/aarch64/opassembler.py
@@ -774,7 +774,8 @@
     emit_op_call_f = _genop_call
     emit_op_call_n = _genop_call
 
-    def _emit_call(self, op, arglocs, is_call_release_gil=False):
+    def _emit_call(self, op, arglocs):
+        is_call_release_gil = rop.is_call_release_gil(op.getopnum())
         # args = [resloc, size, sign, args...]
         from rpython.jit.backend.llsupport.descr import CallDescr
 
diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py
--- a/rpython/jit/backend/aarch64/regalloc.py
+++ b/rpython/jit/backend/aarch64/regalloc.py
@@ -311,6 +311,8 @@
         return []
 
     prepare_op_jit_debug = void
+    prepare_op_enter_portal_frame = void
+    prepare_op_leave_portal_frame = void
 
     def prepare_int_ri(self, op, res_in_cc):
         boxes = op.getarglist()
@@ -635,7 +637,11 @@
             return self.rm.after_call(v)
 
     def prepare_guard_op_guard_not_forced(self, op, prev_op):
-        arglocs = self._prepare_call(prev_op, save_all_regs=True)
+        if rop.is_call_release_gil(prev_op.getopnum()):
+            arglocs = self._prepare_call(prev_op, save_all_regs=True,
+                                         first_arg_index=2)
+        else:
+            arglocs = self._prepare_call(prev_op, save_all_regs=True)
         guard_locs = self._guard_impl(op)
         return arglocs + guard_locs, len(arglocs)
 
diff --git a/rpython/jit/backend/aarch64/test/test_basic.py b/rpython/jit/backend/aarch64/test/test_basic.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/test/test_basic.py
@@ -0,0 +1,42 @@
+import py
+from rpython.jit.metainterp.test import test_ajit
+from rpython.rlib.jit import JitDriver
+from rpython.jit.metainterp.test.support import LLJitMixin
+from rpython.jit.backend.detect_cpu import getcpuclass
+
+class JitAarch64Mixin(LLJitMixin):
+    CPUClass = getcpuclass()
+    # we have to disable unroll
+    enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap"
+    basic = False
+
+    def check_jumps(self, maxcount):
+        pass
+
+class TestBasic(JitAarch64Mixin, test_ajit.BaseLLtypeTests):
+    # for the individual tests see
+    # ====> ../../../metainterp/test/test_ajit.py
+    def test_bug(self):
+        jitdriver = JitDriver(greens = [], reds = ['n'])
+        class X(object):
+            pass
+        def f(n):
+            while n > -100:
+                jitdriver.can_enter_jit(n=n)
+                jitdriver.jit_merge_point(n=n)
+                x = X()
+                x.arg = 5
+                if n <= 0: break
+                n -= x.arg
+                x.arg = 6   # prevents 'x.arg' from being annotated as constant
+            return n
+        res = self.meta_interp(f, [31], enable_opts='')
+        assert res == -4
+
+    def test_r_dict(self):
+        # a Struct that belongs to the hash table is not seen as being
+        # included in the larger Array
+        py.test.skip("issue with ll2ctypes")
+
+    def test_free_object(self):
+        py.test.skip("issue of freeing, probably with ll2ctypes")


More information about the pypy-commit mailing list