[pypy-commit] pypy arm64: bridge basics

fijal pypy.commits at gmail.com
Wed Mar 6 10:05:14 EST 2019


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: arm64
Changeset: r96219:a50f3a71ef1d
Date: 2019-03-06 14:49 +0000
http://bitbucket.org/pypy/pypy/changeset/a50f3a71ef1d/

Log:	bridge basics

diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py
--- a/rpython/jit/backend/aarch64/assembler.py
+++ b/rpython/jit/backend/aarch64/assembler.py
@@ -117,6 +117,89 @@
         return AsmInfo(ops_offset, rawstart + loop_head,
                        size_excluding_failure_stuff - loop_head)
 
+    def assemble_bridge(self, logger, faildescr, inputargs, operations,
+                        original_loop_token, log):
+        if not we_are_translated():
+            # Arguments should be unique
+            assert len(set(inputargs)) == len(inputargs)
+
+        self.setup(original_loop_token)
+        #self.codemap.inherit_code_from_position(faildescr.adr_jump_offset)
+        descr_number = compute_unique_id(faildescr)
+        if log:
+            operations = self._inject_debugging_code(faildescr, operations,
+                                                     'b', descr_number)
+
+        assert isinstance(faildescr, AbstractFailDescr)
+
+        arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
+
+        regalloc = Regalloc(assembler=self)
+        allgcrefs = []
+        operations = regalloc.prepare_bridge(inputargs, arglocs,
+                                             operations,
+                                             allgcrefs,
+                                             self.current_clt.frame_info)
+        self.reserve_gcref_table(allgcrefs)
+        startpos = self.mc.get_relative_pos()
+
+        self._check_frame_depth(self.mc, regalloc.get_gcmap())
+
+        bridgestartpos = self.mc.get_relative_pos()
+        frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
+
+        codeendpos = self.mc.get_relative_pos()
+
+        self.write_pending_failure_recoveries()
+
+        fullsize = self.mc.get_relative_pos()
+        rawstart = self.materialize_loop(original_loop_token)
+
+        self.patch_gcref_table(original_loop_token, rawstart)
+        self.process_pending_guards(rawstart)
+
+        debug_start("jit-backend-addr")
+        debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+                    (r_uint(descr_number), r_uint(rawstart + startpos),
+                        r_uint(rawstart + codeendpos)))
+        debug_print("       gc table: 0x%x" % r_uint(rawstart))
+        debug_print("    jump target: 0x%x" % r_uint(rawstart + startpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + bridgestartpos))
+        debug_print("       failures: 0x%x" % r_uint(rawstart + codeendpos))
+        debug_print("            end: 0x%x" % r_uint(rawstart + fullsize))
+        debug_stop("jit-backend-addr")
+
+        # patch the jump from original guard
+        self.patch_trace(faildescr, original_loop_token,
+                                    rawstart + startpos, regalloc)
+
+        self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE,
+                                rawstart)
+        if not we_are_translated():
+            if log:
+                self.mc._dump_trace(rawstart, 'bridge.asm')
+
+        ops_offset = self.mc.ops_offset
+        frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
+                          frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
+        self.fixup_target_tokens(rawstart)
+        self.update_frame_depth(frame_depth)
+
+        if logger:
+            log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
+            log.write(inputargs, operations, ops_offset)
+            # log that the already written bridge is stitched to a descr!
+            logger.log_patch_guard(descr_number, rawstart)
+
+            # legacy
+            if logger.logger_ops:
+                logger.logger_ops.log_bridge(inputargs, operations, "rewritten",
+                                          faildescr, ops_offset=ops_offset)
+
+        self.teardown()
+
+        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+
     def setup(self, looptoken):
         BaseAssembler.setup(self, looptoken)
         assert self.memcpy_addr != 0, 'setup_once() not called?'
@@ -220,6 +303,9 @@
     def _check_frame_depth_debug(self, mc):
         pass
 
+    def _check_frame_depth(self, mc, gcmap):
+        pass # XXX
+
     def update_frame_depth(self, frame_depth):
         baseofs = self.cpu.get_baseofs_of_frame_field()
         self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
@@ -264,6 +350,9 @@
         gcreftracers.append(tracer)    # keepalive
         self.teardown_gcrefs_list()
 
+    def patch_stack_checks(self, framedepth, rawstart):
+        pass # XXX
+
     def load_from_gc_table(self, regnum, index):
         address_in_buffer = index * WORD   # at the start of the buffer
         p_location = self.mc.get_relative_pos(break_basic_block=False)
@@ -281,6 +370,14 @@
         #    self.codemap.get_final_bytecode(res, size))
         return res
 
+    def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
+        b = InstrBuilder()
+        patch_addr = faildescr.adr_jump_offset
+        assert patch_addr != 0
+        b.BL(bridge_addr)
+        b.copy_to_raw_memory(patch_addr)
+        faildescr.adr_jump_offset = 0
+
     def process_pending_guards(self, block_start):
         clt = self.current_clt
         for tok in self.pending_guards:
diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py
--- a/rpython/jit/backend/aarch64/codebuilder.py
+++ b/rpython/jit/backend/aarch64/codebuilder.py
@@ -137,7 +137,7 @@
 
     def BL(self, target):
         target = rffi.cast(lltype.Signed, target)
-        self.gen_load_int(r.ip0.value, target)
+        self.gen_load_int_full(r.ip0.value, target)
         self.BR(r.ip0.value)
 
     def BR(self, reg):
@@ -147,6 +147,12 @@
     def BRK(self):
         self.write32(0b11010100001 << 21)
 
+    def gen_load_int_full(self, r, value):
+        self.MOVZ_r_u16(r, value & 0xFFFF, 0)
+        self.MOVK_r_u16(r, (value >> 16) & 0xFFFF, 16)
+        self.MOVK_r_u16(r, (value >> 32) & 0xFFFF, 32)
+        self.MOVK_r_u16(r, (value >> 48) & 0xFFFF, 48)
+
     def gen_load_int(self, r, value):
         """r is the register number, value is the value to be loaded to the
         register"""
diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py
--- a/rpython/jit/backend/aarch64/opassembler.py
+++ b/rpython/jit/backend/aarch64/opassembler.py
@@ -120,7 +120,7 @@
         if target_token in self.target_tokens_currently_compiling:
             self.mc.B_ofs(target)
         else:
-            self.mc.B(target)
+            self.mc.BL(target)
 
     def emit_op_finish(self, op, arglocs):
         base_ofs = self.cpu.get_baseofs_of_frame_field()
diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py
--- a/rpython/jit/backend/aarch64/regalloc.py
+++ b/rpython/jit/backend/aarch64/regalloc.py
@@ -1,6 +1,7 @@
 
 from rpython.jit.backend.aarch64 import registers as r
 from rpython.jit.backend.aarch64 import locations
+from rpython.jit.backend.aarch64.arch import WORD, JITFRAME_FIXED_SIZE
 
 from rpython.jit.metainterp.history import (Const, ConstInt, ConstFloat,
                                             ConstPtr,
@@ -14,6 +15,8 @@
 from rpython.jit.backend.aarch64 import registers as r
 from rpython.jit.backend.arm.jump import remap_frame_layout_mixed
 from rpython.jit.backend.aarch64.locations import imm
+from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
+
 
 
 class TempInt(TempVar):
@@ -450,6 +453,64 @@
         self.rm._check_invariants()
         self.vfprm._check_invariants()
 
+    def prepare_bridge(self, inputargs, arglocs, operations, allgcrefs,
+                       frame_info):
+        operations = self._prepare(inputargs, operations, allgcrefs)
+        self._update_bindings(arglocs, inputargs)
+        return operations
+
+    def _update_bindings(self, locs, inputargs):
+        used = {}
+        i = 0
+        for loc in locs:
+            if loc is None:
+                loc = r.fp
+            arg = inputargs[i]
+            i += 1
+            if loc.is_core_reg():
+                self.rm.reg_bindings[arg] = loc
+                used[loc] = None
+            elif loc.is_vfp_reg():
+                self.vfprm.reg_bindings[arg] = loc
+                used[loc] = None
+            else:
+                assert loc.is_stack()
+                self.frame_manager.bind(arg, loc)
+
+        # XXX combine with x86 code and move to llsupport
+        self.rm.free_regs = []
+        for reg in self.rm.all_regs:
+            if reg not in used:
+                self.rm.free_regs.append(reg)
+        self.vfprm.free_regs = []
+        for reg in self.vfprm.all_regs:
+            if reg not in used:
+                self.vfprm.free_regs.append(reg)
+        # note: we need to make a copy of inputargs because possibly_free_vars
+        # is also used on op args, which is a non-resizable list
+        self.possibly_free_vars(list(inputargs))
+        self.fm.finish_binding()
+        self._check_invariants()
+
+    def get_gcmap(self, forbidden_regs=[], noregs=False):
+        frame_depth = self.fm.get_frame_depth()
+        gcmap = allocate_gcmap(self.assembler,
+                        frame_depth, JITFRAME_FIXED_SIZE)
+        for box, loc in self.rm.reg_bindings.iteritems():
+            if loc in forbidden_regs:
+                continue
+            if box.type == REF and self.rm.is_still_alive(box):
+                assert not noregs
+                assert loc.is_core_reg()
+                val = loc.value
+                gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+        for box, loc in self.fm.bindings.iteritems():
+            if box.type == REF and self.rm.is_still_alive(box):
+                assert loc.is_stack()
+                val = loc.position + JITFRAME_FIXED_SIZE
+                gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
+        return gcmap
+
     def get_final_frame_depth(self):
         return self.frame_manager.get_frame_depth()
 
diff --git a/rpython/jit/backend/aarch64/runner.py b/rpython/jit/backend/aarch64/runner.py
--- a/rpython/jit/backend/aarch64/runner.py
+++ b/rpython/jit/backend/aarch64/runner.py
@@ -2,6 +2,7 @@
 from rpython.rtyper.lltypesystem import llmemory, lltype
 from rpython.jit.backend.aarch64.assembler import AssemblerARM64
 from rpython.jit.backend.aarch64 import registers as r
+from rpython.jit.backend.aarch64.regalloc import VFPRegisterManager
 from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
 
 class CPU_ARM64(AbstractLLCPU):
@@ -9,6 +10,9 @@
     backend_name = "aarch64"
     frame_reg = r.fp
     all_reg_indexes = range(len(r.all_regs))
+    gen_regs = r.all_regs
+    float_regs = VFPRegisterManager.all_regs
+
 
     IS_64_BIT = True
 
@@ -23,6 +27,14 @@
     def setup_once(self):
         self.assembler.setup_once()
 
+    def compile_bridge(self, faildescr, inputargs, operations,
+                       original_loop_token, log=True, logger=None):
+        clt = original_loop_token.compiled_loop_token
+        clt.compiling_a_bridge()
+        return self.assembler.assemble_bridge(logger, faildescr, inputargs,
+                                              operations,
+                                              original_loop_token, log=log)
+
     def cast_ptr_to_int(x):
         adr = llmemory.cast_ptr_to_adr(x)
         return CPU_ARM64.cast_adr_to_int(adr)


More information about the pypy-commit mailing list