[pypy-commit] pypy py3.6: hg merge default
rlamy
pypy.commits at gmail.com
Wed Jul 24 10:16:35 EDT 2019
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.6
Changeset: r97020:7560947dd35d
Date: 2019-07-24 15:15 +0100
http://bitbucket.org/pypy/pypy/changeset/7560947dd35d/
Log: hg merge default
diff too long, truncating to 2000 out of 7363 lines
diff --git a/extra_tests/cffi_tests/cffi1/test_recompiler.py b/extra_tests/cffi_tests/cffi1/test_recompiler.py
--- a/extra_tests/cffi_tests/cffi1/test_recompiler.py
+++ b/extra_tests/cffi_tests/cffi1/test_recompiler.py
@@ -2413,3 +2413,15 @@
a = ffi.new("struct A *")
assert ffi.sizeof(a[0]) == ffi.sizeof("unsigned")
assert ffi.sizeof(b[0]) == ffi.sizeof(a[0])
+
+def test_struct_with_func_with_struct_arg():
+ ffi = FFI()
+ ffi.cdef("""struct BinaryTree {
+ int (* CompareKey)(struct BinaryTree tree);
+ };""")
+ lib = verify(ffi, "test_struct_with_func_with_struct_arg", """
+ struct BinaryTree {
+ int (* CompareKey)(struct BinaryTree tree);
+ };
+ """)
+ py.test.raises(RuntimeError, ffi.new, "struct BinaryTree *")
diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py
--- a/lib_pypy/cffi/cparser.py
+++ b/lib_pypy/cffi/cparser.py
@@ -145,12 +145,16 @@
return ''.join(parts)
def _warn_for_string_literal(csource):
- if '"' in csource:
- import warnings
- warnings.warn("String literal found in cdef() or type source. "
- "String literals are ignored here, but you should "
- "remove them anyway because some character sequences "
- "confuse pre-parsing.")
+ if '"' not in csource:
+ return
+ for line in csource.splitlines():
+ if '"' in line and not line.lstrip().startswith('#'):
+ import warnings
+ warnings.warn("String literal found in cdef() or type source. "
+ "String literals are ignored here, but you should "
+ "remove them anyway because some character sequences "
+ "confuse pre-parsing.")
+ break
def _preprocess(csource):
# Remove comments. NOTE: this only work because the cdef() section
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -49,3 +49,12 @@
Instead, replace it in ``rewrite.py`` with a direct call to ``memcpy()`` and
new basic operation, ``load_effective_address``, which the backend can
even decide not to implement.
+
+.. branch: arm64
+Add a JIT backend for ARM64 (aarch64)
+
+.. branch: fix-test-vmprof-closed-file
+
+
+.. branch: fix_darwin_list_dir_test
+
diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py
--- a/pypy/module/_cffi_backend/ctypefunc.py
+++ b/pypy/module/_cffi_backend/ctypefunc.py
@@ -10,6 +10,7 @@
from rpython.rlib.objectmodel import we_are_translated, instantiate
from rpython.rlib.objectmodel import keepalive_until_here
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rtyper.annlowlevel import llstr
from pypy.interpreter.error import OperationError, oefmt
from pypy.module import _cffi_backend
@@ -163,9 +164,9 @@
cif_descr = self.cif_descr # 'self' should have been promoted here
size = cif_descr.exchange_size
mustfree_max_plus_1 = 0
+ keepalives = [llstr(None)] * len(args_w) # llstrings
buffer = lltype.malloc(rffi.CCHARP.TO, size, flavor='raw')
try:
- keepalives = [None] * len(args_w) # None or strings
for i in range(len(args_w)):
data = rffi.ptradd(buffer, cif_descr.exchange_args[i])
w_obj = args_w[i]
@@ -191,9 +192,10 @@
if flag == 1:
lltype.free(raw_cdata, flavor='raw')
elif flag >= 4:
- value = keepalives[i]
- assert value is not None
- rffi.free_nonmovingbuffer(value, raw_cdata, chr(flag))
+ llobj = keepalives[i]
+ assert llobj # not NULL
+ rffi.free_nonmovingbuffer_ll(raw_cdata,
+ llobj, chr(flag))
lltype.free(buffer, flavor='raw')
keepalive_until_here(args_w)
return w_res
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -305,16 +305,8 @@
else:
return lltype.nullptr(rffi.CCHARP.TO)
- def _prepare_pointer_call_argument(self, w_init, cdata, keepalives, i):
+ def _prepare_pointer_call_argument(self, w_init, cdata):
space = self.space
- if self.accept_str and space.isinstance_w(w_init, space.w_bytes):
- # special case to optimize strings passed to a "char *" argument
- value = space.bytes_w(w_init)
- if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool):
- self._must_be_string_of_zero_or_one(value)
- keepalives[i] = value
- return misc.write_string_as_charp(cdata, value)
- #
if (space.isinstance_w(w_init, space.w_list) or
space.isinstance_w(w_init, space.w_tuple)):
length = space.int_w(space.len(w_init))
@@ -360,14 +352,27 @@
return 1
def convert_argument_from_object(self, cdata, w_ob, keepalives, i):
+ # writes the pointer to cdata[0], writes the must-free flag in
+ # the byte just before cdata[0], and returns True if something
+ # must be done later to free.
from pypy.module._cffi_backend.ctypefunc import set_mustfree_flag
- result = (not isinstance(w_ob, cdataobj.W_CData) and
- self._prepare_pointer_call_argument(w_ob, cdata,
- keepalives, i))
+ if isinstance(w_ob, cdataobj.W_CData):
+ result = 0
+ else:
+ space = self.space
+ if self.accept_str and space.isinstance_w(w_ob, space.w_bytes):
+ # special case to optimize strings passed to a "char *" argument
+ value = space.bytes_w(w_ob)
+ if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool):
+ self._must_be_string_of_zero_or_one(value)
+ keepalives[i] = misc.write_string_as_charp(cdata, value)
+ return True
+ result = self._prepare_pointer_call_argument(w_ob, cdata)
+
if result == 0:
self.convert_from_object(cdata, w_ob)
set_mustfree_flag(cdata, result)
- return result
+ return result == 1 # 0 or 2 => False, nothing to do later
def getcfield(self, attr):
from pypy.module._cffi_backend.ctypestruct import W_CTypeStructOrUnion
diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py
--- a/pypy/module/_cffi_backend/misc.py
+++ b/pypy/module/_cffi_backend/misc.py
@@ -112,11 +112,13 @@
def write_raw_longdouble_data(target, source):
rffi.cast(rffi.LONGDOUBLEP, target)[0] = source
- at jit.dont_look_inside # lets get_nonmovingbuffer_final_null be inlined
+ at jit.dont_look_inside # lets get_nonmovingbuffer_ll_final_null be inlined
def write_string_as_charp(target, string):
- buf, buf_flag = rffi.get_nonmovingbuffer_final_null(string)
+ from pypy.module._cffi_backend.ctypefunc import set_mustfree_flag
+ buf, llobj, buf_flag = rffi.get_nonmovingbuffer_ll_final_null(string)
+ set_mustfree_flag(target, ord(buf_flag)) # 4, 5 or 6
rffi.cast(rffi.CCHARPP, target)[0] = buf
- return ord(buf_flag) # 4, 5 or 6
+ return llobj
# ____________________________________________________________
diff --git a/pypy/module/_cffi_backend/realize_c_type.py b/pypy/module/_cffi_backend/realize_c_type.py
--- a/pypy/module/_cffi_backend/realize_c_type.py
+++ b/pypy/module/_cffi_backend/realize_c_type.py
@@ -405,6 +405,20 @@
if from_ffi and ffi.cached_types[index] is not None:
return ffi.cached_types[index]
+ opcodes[index] = rffi.cast(rffi.VOIDP, 255)
+ try:
+ x = realize_c_type_or_func_now(ffi, op, opcodes, index)
+ finally:
+ if opcodes[index] == rffi.cast(rffi.VOIDP, 255):
+ opcodes[index] = op
+
+ if from_ffi:
+ assert ffi.cached_types[index] is None or ffi.cached_types[index] is x
+ ffi.cached_types[index] = x
+
+ return x
+
+def realize_c_type_or_func_now(ffi, op, opcodes, index):
case = getop(op)
if case == cffi_opcode.OP_PRIMITIVE:
@@ -446,13 +460,16 @@
'c_type_index')
x = realize_c_type_or_func(ffi, ffi.ctxobj.ctx.c_types, type_index)
+ elif case == 255:
+ raise oefmt(ffi.space.w_RuntimeError,
+ "found a situation in which we try to build a type recursively. "
+ "This is known to occur e.g. in ``struct s { void(*callable)"
+ "(struct s); }''. Please report if you get this error and "
+ "really need support for your case.")
+
else:
raise oefmt(ffi.space.w_NotImplementedError, "op=%d", case)
- if from_ffi:
- assert ffi.cached_types[index] is None or ffi.cached_types[index] is x
- ffi.cached_types[index] = x
-
return x
diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py
--- a/pypy/module/_cffi_backend/test/test_recompiler.py
+++ b/pypy/module/_cffi_backend/test/test_recompiler.py
@@ -2141,3 +2141,19 @@
assert seen == [2 * 4, p]
ffi.release(p) # no effect
assert seen == [2 * 4, p]
+
+ def test_struct_with_func_with_struct_arg(self):
+ ffi, lib = self.prepare("""struct BinaryTree {
+ int (* CompareKey)(struct BinaryTree tree);
+ };""",
+ "test_struct_with_func_with_struct_arg", """
+ struct BinaryTree {
+ int (* CompareKey)(struct BinaryTree tree);
+ };
+ """)
+ e = raises(RuntimeError, ffi.new, "struct BinaryTree *")
+ assert str(e.value) == (
+ "found a situation in which we try to build a type recursively. "
+ "This is known to occur e.g. in ``struct s { void(*callable)"
+ "(struct s); }''. Please report if you get this error and "
+ "really need support for your case.")
diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -114,14 +114,17 @@
@py.test.mark.xfail(sys.platform.startswith('freebsd'), reason = "not implemented")
def test_get_profile_path(self):
import _vmprof
- tmpfile = open(self.tmpfilename, 'wb')
- assert _vmprof.get_profile_path() is None
- _vmprof.enable(tmpfile.fileno(), 0.01, 0, 0, 0, 0)
- path = _vmprof.get_profile_path()
+ with open(self.tmpfilename, "wb") as tmpfile:
+ assert _vmprof.get_profile_path() is None
+ _vmprof.enable(tmpfile.fileno(), 0.01, 0, 0, 0, 0)
+ path = _vmprof.get_profile_path()
+ _vmprof.disable()
+
if path != tmpfile.name:
with open(path, "rb") as fd1:
- assert fd1.read() == tmpfile.read()
- _vmprof.disable()
+ with open(self.tmpfilename, "rb") as fd2:
+ assert fd1.read() == fd2.read()
+
assert _vmprof.get_profile_path() is None
def test_stop_sampling(self):
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -1,5 +1,6 @@
import sys
import os
+import platform as _stdlib_platform
from rpython.config.config import OptionDescription, BoolOption, IntOption, ArbitraryOption, FloatOption
from rpython.config.config import ChoiceOption, StrOption, Config, ConflictConfigError
from rpython.config.config import ConfigError
@@ -30,7 +31,9 @@
False)
# Windows doesn't work. Please
# add other platforms here if it works on them.
-
+MACHINE = _stdlib_platform.machine()
+if MACHINE == 'aarch64':
+ SUPPORT__THREAD = False
# (*) NOTE: __thread on OS/X does not work together with
# pthread_key_create(): when the destructor is called, the __thread is
# already freed!
diff --git a/rpython/jit/backend/aarch64/TODO b/rpython/jit/backend/aarch64/TODO
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/TODO
@@ -0,0 +1,35 @@
+* cond_call and following guard_exception
+
+
+* We can try to make generate_quick_failure() emit two instructions less:
+ the two store_reg() [one in generate_quick_failure and the other in
+ push_gcmap]. Instead we'd load the values in ip2 and ip3, and the
+ store_regs would occur inside self.failure_recovery_code
+ (which 'target' points to).
+
+
+* use STP instead of STR in all long sequences of STR. Same with LDR
+
+* use "STR xzr, [..]" instead of "gen_load_int(ip, 0); STR ip, [..]".
+ Search around for gen_load_int(...0): it occurs at least in pop_gcmap()
+ _build_failure_recovery(), build_frame_realloc_slowpath(), etc.
+
+
+* malloc_cond() and malloc_cond_varsize_frame() hard-code forward jump
+ distances by guessing the number of instructions that follows. Bad
+ idea because some of these instructions could easily be optimized in
+ the future to be a bit shorter. Rewrite this two places to use the
+ proper way instead of a magic "40" (or at least assert that it was
+ really 40).
+
+
+* use "CBNZ register, offset" (compare-and-branch-if-not-zero)
+ instead of a CMP+BNE pair. Same with CBZ instead of CMP+BEQ
+
+
+* when we need to save things on the stack, we typically push two words
+ and pop them later. It would be cheaper if we reserved two locations
+ in the stack from _call_header, then we could just write there.
+ *OR*
+ maybe it's enough if we use the form "str x0, [sp, !#offset]" which
+ combines in a single instruction the "str" with the change of sp
diff --git a/rpython/jit/backend/aarch64/__init__.py b/rpython/jit/backend/aarch64/__init__.py
new file mode 100644
diff --git a/rpython/jit/backend/aarch64/arch.py b/rpython/jit/backend/aarch64/arch.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/arch.py
@@ -0,0 +1,14 @@
+
+WORD = 8
+
+# The stack contains the force_index and the, callee saved registers and
+# ABI required information
+# All the rest of the data is in a GC-managed variable-size "frame".
+# This jitframe object's address is always stored in the register FP
+# A jitframe is a jit.backend.llsupport.llmodel.jitframe.JITFRAME
+# Stack frame fixed area
+# Currently only the force_index
+NUM_MANAGED_REGS = 16
+NUM_VFP_REGS = 8
+JITFRAME_FIXED_SIZE = NUM_MANAGED_REGS + NUM_VFP_REGS
+# 16 GPR + 8 VFP Regs, for now
diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/assembler.py
@@ -0,0 +1,1482 @@
+
+from rpython.jit.backend.aarch64.arch import WORD, JITFRAME_FIXED_SIZE
+from rpython.jit.backend.aarch64.codebuilder import InstrBuilder, OverwritingBuilder
+from rpython.jit.backend.aarch64.locations import imm, StackLocation, get_fp_offset
+#from rpython.jit.backend.arm.helper.regalloc import VMEM_imm_size
+from rpython.jit.backend.aarch64.opassembler import ResOpAssembler
+from rpython.jit.backend.aarch64.regalloc import (Regalloc, check_imm_arg,
+ operations as regalloc_operations, guard_operations, comp_operations,
+ CoreRegisterManager, VFPRegisterManager)
+from rpython.jit.backend.aarch64 import registers as r
+from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.llsupport import jitframe, rewrite
+from rpython.jit.backend.llsupport.assembler import BaseAssembler
+from rpython.jit.backend.llsupport.regalloc import get_scale, valid_addressing_size
+from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
+from rpython.jit.backend.model import CompiledLoopToken
+from rpython.jit.codewriter.effectinfo import EffectInfo
+from rpython.jit.metainterp.history import AbstractFailDescr, FLOAT, INT, VOID
+from rpython.jit.metainterp.resoperation import rop
+from rpython.rlib.debug import debug_print, debug_start, debug_stop
+from rpython.rlib.jit import AsmInfo
+from rpython.rlib.objectmodel import we_are_translated, specialize, compute_unique_id
+from rpython.rlib.rarithmetic import r_uint
+from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.rlib.rjitlog import rjitlog as jl
+
+class AssemblerARM64(ResOpAssembler):
+ def __init__(self, cpu, translate_support_code=False):
+ ResOpAssembler.__init__(self, cpu, translate_support_code)
+ self.failure_recovery_code = [0, 0, 0, 0]
+ self.wb_slowpath = [0, 0, 0, 0, 0]
+
+ def assemble_loop(self, jd_id, unique_id, logger, loopname, inputargs,
+ operations, looptoken, log):
+ clt = CompiledLoopToken(self.cpu, looptoken.number)
+ clt._debug_nbargs = len(inputargs)
+ looptoken.compiled_loop_token = clt
+
+ if not we_are_translated():
+ # Arguments should be unique
+ assert len(set(inputargs)) == len(inputargs)
+
+ self.setup(looptoken)
+
+ frame_info = self.datablockwrapper.malloc_aligned(
+ jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
+ clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
+ clt.frame_info.clear() # for now
+
+ if log:
+ operations = self._inject_debugging_code(looptoken, operations,
+ 'e', looptoken.number)
+
+ regalloc = Regalloc(assembler=self)
+ allgcrefs = []
+ operations = regalloc.prepare_loop(inputargs, operations, looptoken,
+ allgcrefs)
+ self.reserve_gcref_table(allgcrefs)
+ functionpos = self.mc.get_relative_pos()
+
+ self._call_header_with_stack_check()
+ self._check_frame_depth_debug(self.mc)
+
+ loop_head = self.mc.get_relative_pos()
+ looptoken._ll_loop_code = loop_head
+ #
+ frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
+ self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
+ #
+ size_excluding_failure_stuff = self.mc.get_relative_pos()
+
+ self.write_pending_failure_recoveries()
+
+ full_size = self.mc.get_relative_pos()
+ rawstart = self.materialize_loop(looptoken)
+ looptoken._ll_function_addr = rawstart + functionpos
+
+ self.patch_gcref_table(looptoken, rawstart)
+ self.process_pending_guards(rawstart)
+ self.fixup_target_tokens(rawstart)
+
+ if log and not we_are_translated():
+ self.mc._dump_trace(rawstart,
+ 'loop.asm')
+
+ ops_offset = self.mc.ops_offset
+
+ if logger:
+ log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
+ log.write(inputargs, operations, ops_offset=ops_offset)
+
+ # legacy
+ if logger.logger_ops:
+ logger.logger_ops.log_loop(inputargs, operations, 0,
+ "rewritten", name=loopname,
+ ops_offset=ops_offset)
+
+ self.teardown()
+
+ debug_start("jit-backend-addr")
+ debug_print("Loop %d (%s) has address 0x%x to 0x%x (bootstrap 0x%x)" % (
+ looptoken.number, loopname,
+ r_uint(rawstart + loop_head),
+ r_uint(rawstart + size_excluding_failure_stuff),
+ r_uint(rawstart + functionpos)))
+ debug_print(" gc table: 0x%x" % r_uint(rawstart))
+ debug_print(" function: 0x%x" % r_uint(rawstart + functionpos))
+ debug_print(" resops: 0x%x" % r_uint(rawstart + loop_head))
+ debug_print(" failures: 0x%x" % r_uint(rawstart +
+ size_excluding_failure_stuff))
+ debug_print(" end: 0x%x" % r_uint(rawstart + full_size))
+ debug_stop("jit-backend-addr")
+
+ return AsmInfo(ops_offset, rawstart + loop_head,
+ size_excluding_failure_stuff - loop_head)
+
+ def assemble_bridge(self, logger, faildescr, inputargs, operations,
+ original_loop_token, log):
+ if not we_are_translated():
+ # Arguments should be unique
+ assert len(set(inputargs)) == len(inputargs)
+
+ self.setup(original_loop_token)
+ #self.codemap.inherit_code_from_position(faildescr.adr_jump_offset)
+ descr_number = compute_unique_id(faildescr)
+ if log:
+ operations = self._inject_debugging_code(faildescr, operations,
+ 'b', descr_number)
+
+ assert isinstance(faildescr, AbstractFailDescr)
+
+ arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
+
+ regalloc = Regalloc(assembler=self)
+ allgcrefs = []
+ operations = regalloc.prepare_bridge(inputargs, arglocs,
+ operations,
+ allgcrefs,
+ self.current_clt.frame_info)
+ self.reserve_gcref_table(allgcrefs)
+ startpos = self.mc.get_relative_pos()
+
+ self._check_frame_depth(self.mc, regalloc.get_gcmap())
+
+ bridgestartpos = self.mc.get_relative_pos()
+ frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
+
+ codeendpos = self.mc.get_relative_pos()
+
+ self.write_pending_failure_recoveries()
+
+ fullsize = self.mc.get_relative_pos()
+ rawstart = self.materialize_loop(original_loop_token)
+
+ self.patch_gcref_table(original_loop_token, rawstart)
+ self.process_pending_guards(rawstart)
+
+ debug_start("jit-backend-addr")
+ debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+ (r_uint(descr_number), r_uint(rawstart + startpos),
+ r_uint(rawstart + codeendpos)))
+ debug_print(" gc table: 0x%x" % r_uint(rawstart))
+ debug_print(" jump target: 0x%x" % r_uint(rawstart + startpos))
+ debug_print(" resops: 0x%x" % r_uint(rawstart + bridgestartpos))
+ debug_print(" failures: 0x%x" % r_uint(rawstart + codeendpos))
+ debug_print(" end: 0x%x" % r_uint(rawstart + fullsize))
+ debug_stop("jit-backend-addr")
+
+ # patch the jump from original guard
+ self.patch_trace(faildescr, original_loop_token,
+ rawstart + startpos, regalloc)
+
+ self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE,
+ rawstart)
+ if not we_are_translated():
+ if log:
+ self.mc._dump_trace(rawstart, 'bridge.asm')
+
+ ops_offset = self.mc.ops_offset
+ frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
+ frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
+ self.fixup_target_tokens(rawstart)
+ self.update_frame_depth(frame_depth)
+
+ if logger:
+ log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
+ log.write(inputargs, operations, ops_offset)
+ # log that the already written bridge is stitched to a descr!
+ logger.log_patch_guard(descr_number, rawstart)
+
+ # legacy
+ if logger.logger_ops:
+ logger.logger_ops.log_bridge(inputargs, operations, "rewritten",
+ faildescr, ops_offset=ops_offset)
+
+ self.teardown()
+
+ return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+
+ def setup(self, looptoken):
+ BaseAssembler.setup(self, looptoken)
+ assert self.memcpy_addr != 0, 'setup_once() not called?'
+ if we_are_translated():
+ self.debug = False
+ self.current_clt = looptoken.compiled_loop_token
+ self.mc = InstrBuilder()
+ self.pending_guards = []
+ #assert self.datablockwrapper is None --- but obscure case
+ # possible, e.g. getting MemoryError and continuing
+ allblocks = self.get_asmmemmgr_blocks(looptoken)
+ self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
+ allblocks)
+ self.mc.datablockwrapper = self.datablockwrapper
+ self.target_tokens_currently_compiling = {}
+ self.frame_depth_to_patch = []
+
+ def teardown(self):
+ self.current_clt = None
+ self._regalloc = None
+ self.mc = None
+ self.pending_guards = None
+
+ def _push_all_regs_to_jitframe(self, mc, ignored_regs, withfloats,
+ callee_only=False):
+ # Push general purpose registers
+ base_ofs = self.cpu.get_baseofs_of_frame_field()
+ if callee_only:
+ regs = CoreRegisterManager.save_around_call_regs
+ else:
+ regs = CoreRegisterManager.all_regs
+ # XXX add special case if ignored_regs are a block at the start of regs
+ if not ignored_regs: # we want to push a contiguous block of regs
+ assert base_ofs < 0x100
+ for i, reg in enumerate(regs):
+ mc.STR_ri(reg.value, r.fp.value, base_ofs + i * WORD)
+ else:
+ for reg in ignored_regs:
+ assert not reg.is_vfp_reg() # sanity check
+ # we can have holes in the list of regs
+ for i, gpr in enumerate(regs):
+ if gpr in ignored_regs:
+ continue
+ self.store_reg(mc, gpr, r.fp, base_ofs + i * WORD)
+
+ if withfloats:
+ # Push VFP regs
+ regs = VFPRegisterManager.all_regs
+ ofs = len(CoreRegisterManager.all_regs) * WORD
+ for reg in regs:
+ mc.STR_di(reg.value, r.fp.value, ofs + base_ofs + reg.value * WORD)
+
+ def _pop_all_regs_from_jitframe(self, mc, ignored_regs, withfloats,
+ callee_only=False):
+ # Pop general purpose registers
+ base_ofs = self.cpu.get_baseofs_of_frame_field()
+ if callee_only:
+ regs = CoreRegisterManager.save_around_call_regs
+ else:
+ regs = CoreRegisterManager.all_regs
+ # XXX add special case if ignored_regs are a block at the start of regs
+ if not ignored_regs: # we want to pop a contiguous block of regs
+ assert base_ofs < 0x100
+ for i, reg in enumerate(regs):
+ mc.LDR_ri(reg.value, r.fp.value, base_ofs + i * WORD)
+ else:
+ for reg in ignored_regs:
+ assert not reg.is_vfp_reg() # sanity check
+ # we can have holes in the list of regs
+ for i, gpr in enumerate(regs):
+ if gpr in ignored_regs:
+ continue
+ ofs = i * WORD + base_ofs
+ self.load_reg(mc, gpr, r.fp, ofs)
+ if withfloats:
+ # Pop VFP regs
+ regs = VFPRegisterManager.all_regs
+ ofs = len(CoreRegisterManager.all_regs) * WORD
+ for reg in regs:
+ mc.LDR_di(reg.value, r.fp.value, ofs + base_ofs + reg.value * WORD)
+
+ def _build_failure_recovery(self, exc, withfloats=False):
+ mc = InstrBuilder()
+ self._push_all_regs_to_jitframe(mc, [], withfloats)
+
+ if exc:
+ # We might have an exception pending. Load it into r4
+ # (this is a register saved across calls)
+ mc.gen_load_int(r.x5.value, self.cpu.pos_exc_value())
+ mc.LDR_ri(r.x4.value, r.x5.value, 0)
+ # clear the exc flags
+ mc.gen_load_int(r.x6.value, 0)
+ mc.STR_ri(r.x6.value, r.x5.value, 0) # pos_exc_value is still in r5
+ mc.gen_load_int(r.x5.value, self.cpu.pos_exception())
+ mc.STR_ri(r.x6.value, r.x5.value, 0)
+ # save r4 into 'jf_guard_exc'
+ offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ assert check_imm_arg(abs(offset))
+ mc.STR_ri(r.x4.value, r.fp.value, offset)
+ # now we return from the complete frame, which starts from
+ # _call_header_with_stack_check(). The LEA in _call_footer below
+ # throws away most of the frame, including all the PUSHes that we
+ # did just above.
+
+ # set return value
+ mc.MOV_rr(r.x0.value, r.fp.value)
+
+ self.gen_func_epilog(mc)
+ rawstart = mc.materialize(self.cpu, [])
+ self.failure_recovery_code[exc + 2 * withfloats] = rawstart
+
+ def propagate_memoryerror_if_reg_is_null(self, reg_loc):
+ # see ../x86/assembler.py:genop_discard_check_memory_error()
+ self.mc.CMP_ri(reg_loc.value, 0)
+ self.mc.B_ofs_cond(6 * 4, c.NE)
+ self.mc.B(self.propagate_exception_path)
+
+ def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
+ descr = self.cpu.gc_ll_descr.write_barrier_descr
+ if descr is None:
+ return
+ if not withcards:
+ func = descr.get_write_barrier_fn(self.cpu)
+ else:
+ if descr.jit_wb_cards_set == 0:
+ return
+ func = descr.get_write_barrier_from_array_fn(self.cpu)
+ if func == 0:
+ return
+ #
+ # This builds a helper function called from the slow path of
+ # write barriers. It must save all registers, and optionally
+ # all vfp registers. It takes a single argument which is in x0.
+ # It must keep stack alignment accordingly.
+ mc = InstrBuilder()
+ #
+ mc.SUB_ri(r.sp.value, r.sp.value, 2 * WORD)
+ mc.STR_ri(r.lr.value, r.sp.value, 0)
+ if not for_frame:
+ self._push_all_regs_to_jitframe(mc, [], withfloats, callee_only=True)
+ else:
+ # NOTE: don't save registers on the jitframe here! It might
+ # override already-saved values that will be restored
+ # later...
+ #
+ # we're possibly called from the slowpath of malloc
+ # save the caller saved registers
+ # assuming we do not collect here
+ exc0, exc1 = r.x19, r.x20
+ mc.SUB_ri(r.sp.value, r.sp.value, (len(r.caller_resp) + 2 + len(r.caller_vfp_resp)) * WORD)
+ cur_stack = 0
+ for i in range(0, len(r.caller_resp), 2):
+ mc.STP_rri(r.caller_resp[i].value, r.caller_resp[i + 1].value, r.sp.value, i * WORD)
+ cur_stack = len(r.caller_resp)
+ mc.STP_rri(exc0.value, exc1.value, r.sp.value, cur_stack * WORD)
+ cur_stack += 2
+ for i in range(len(r.caller_vfp_resp)):
+ mc.STR_di(r.caller_vfp_resp[i].value, r.sp.value, cur_stack * WORD)
+ cur_stack += 1
+
+ self._store_and_reset_exception(mc, exc0, exc1)
+ mc.BL(func)
+ #
+ if not for_frame:
+ self._pop_all_regs_from_jitframe(mc, [], withfloats, callee_only=True)
+ else:
+ exc0, exc1 = r.x19, r.x20
+ self._restore_exception(mc, exc0, exc1)
+
+ cur_stack = 0
+ for i in range(0, len(r.caller_resp), 2):
+ mc.LDP_rri(r.caller_resp[i].value, r.caller_resp[i + 1].value, r.sp.value, i * WORD)
+ cur_stack = len(r.caller_resp)
+ mc.LDP_rri(exc0.value, exc1.value, r.sp.value, cur_stack * WORD)
+ cur_stack += 2
+ for i in range(len(r.caller_vfp_resp)):
+ mc.LDR_di(r.caller_vfp_resp[i].value, r.sp.value, cur_stack * WORD)
+ cur_stack += 1
+
+ assert exc0 is not None
+ assert exc1 is not None
+
+ mc.ADD_ri(r.sp.value, r.sp.value, (len(r.caller_resp) + 2 + len(r.caller_vfp_resp)) * WORD)
+
+ #
+ if withcards:
+ # A final TEST8 before the RET, for the caller. Careful to
+ # not follow this instruction with another one that changes
+ # the status of the CPU flags!
+ mc.LDRB_ri(r.ip0.value, r.x0.value, descr.jit_wb_if_flag_byteofs)
+ mc.MOVZ_r_u16(r.ip1.value, 0x80, 0)
+ mc.TST_rr_shift(r.ip0.value, r.ip1.value, 0)
+ #
+ mc.LDR_ri(r.ip1.value, r.sp.value, 0)
+ mc.ADD_ri(r.sp.value, r.sp.value, 2 * WORD)
+ mc.RET_r(r.ip1.value)
+ #
+ rawstart = mc.materialize(self.cpu, [])
+ if for_frame:
+ self.wb_slowpath[4] = rawstart
+ else:
+ self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
+ def build_frame_realloc_slowpath(self):
+ # this code should do the following steps
+ # a) store all registers in the jitframe
+ # b) fish for the arguments passed by the caller
+ # c) store the gcmap in the jitframe
+ # d) call realloc_frame
+ # e) set the fp to point to the new jitframe
+ # f) store the address of the new jitframe in the shadowstack
+ # c) set the gcmap field to 0 in the new jitframe
+ # g) restore registers and return
+ mc = InstrBuilder()
+ self._push_all_regs_to_jitframe(mc, [], self.cpu.supports_floats)
+ # this is the gcmap stored by push_gcmap(mov=True) in _check_stack_frame
+ # and the expected_size pushed in _check_stack_frame
+ # pop the values passed on the stack, gcmap -> r0, expected_size -> r1
+ mc.LDP_rri(r.x0.value, r.x1.value, r.sp.value, 0)
+
+ mc.STR_ri(r.lr.value, r.sp.value, 0)
+
+ # store the current gcmap(r0) in the jitframe
+ gcmap_ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+ mc.STR_ri(r.x0.value, r.fp.value, gcmap_ofs)
+
+ # set first arg, which is the old jitframe address
+ mc.MOV_rr(r.x0.value, r.fp.value)
+
+ # store a possibly present exception
+ self._store_and_reset_exception(mc, None, r.x19, on_frame=True)
+
+ # call realloc_frame, it takes two arguments
+ # arg0: the old jitframe
+ # arg1: the new size
+ #
+ mc.BL(self.cpu.realloc_frame)
+
+ # set fp to the new jitframe returned from the previous call
+ mc.MOV_rr(r.fp.value, r.x0.value)
+
+ # restore a possibly present exception
+ self._restore_exception(mc, None, r.x19)
+
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ self._load_shadowstack_top(mc, r.x19, gcrootmap)
+ # store the new jitframe addr in the shadowstack
+ mc.SUB_ri(r.x19.value, r.x19.value, WORD)
+ mc.STR_ri(r.x0.value, r.x19.value, 0)
+
+ # reset the jf_gcmap field in the jitframe
+ mc.gen_load_int(r.ip0.value, 0)
+ mc.STR_ri(r.ip0.value, r.fp.value, gcmap_ofs)
+
+ # restore registers
+ self._pop_all_regs_from_jitframe(mc, [], self.cpu.supports_floats)
+
+ # return
+ mc.LDR_ri(r.lr.value, r.sp.value, 0)
+ mc.ADD_ri(r.sp.value, r.sp.value, 2*WORD)
+ mc.RET_r(r.lr.value)
+ self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
+
+ def _load_shadowstack_top(self, mc, reg, gcrootmap):
+ rst = gcrootmap.get_root_stack_top_addr()
+ mc.gen_load_int(reg.value, rst)
+ self.load_reg(mc, reg, reg)
+ return rst
+
+ def _store_and_reset_exception(self, mc, excvalloc=None, exctploc=None,
+ on_frame=False):
+ """ Resest the exception. If excvalloc is None, then store it on the
+ frame in jf_guard_exc
+ """
+ assert excvalloc is not r.ip0
+ assert exctploc is not r.ip0
+ tmpreg = r.ip1
+ mc.gen_load_int(r.ip0.value, self.cpu.pos_exc_value())
+ if excvalloc is not None: # store
+ assert excvalloc.is_core_reg()
+ self.load_reg(mc, excvalloc, r.ip0)
+ if on_frame:
+ # store exc_value in JITFRAME
+ ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ assert check_imm_arg(ofs)
+ #
+ self.load_reg(mc, r.ip0, r.ip0, helper=tmpreg)
+ #
+ self.store_reg(mc, r.ip0, r.fp, ofs, helper=tmpreg)
+ if exctploc is not None:
+ # store pos_exception in exctploc
+ assert exctploc.is_core_reg()
+ mc.gen_load_int(r.ip0.value, self.cpu.pos_exception())
+ self.load_reg(mc, exctploc, r.ip0, helper=tmpreg)
+
+ if on_frame or exctploc is not None:
+ mc.gen_load_int(r.ip0.value, self.cpu.pos_exc_value())
+
+ # reset exception
+ mc.gen_load_int(tmpreg.value, 0)
+
+ self.store_reg(mc, tmpreg, r.ip0, 0)
+
+ mc.gen_load_int(r.ip0.value, self.cpu.pos_exception())
+ self.store_reg(mc, tmpreg, r.ip0, 0)
+
+ def _restore_exception(self, mc, excvalloc, exctploc):
+ assert excvalloc is not r.ip0
+ assert exctploc is not r.ip0
+ mc.gen_load_int(r.ip0.value, self.cpu.pos_exc_value())
+ if excvalloc is not None:
+ assert excvalloc.is_core_reg()
+ self.store_reg(mc, excvalloc, r.ip0)
+ else:
+ assert exctploc is not r.fp
+ # load exc_value from JITFRAME and put it in pos_exc_value
+ ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ self.load_reg(mc, r.ip1, r.fp, ofs)
+ self.store_reg(mc, r.ip1, r.ip0)
+ # reset exc_value in the JITFRAME
+ mc.gen_load_int(r.ip1.value, 0)
+ self.store_reg(mc, r.ip1, r.fp, ofs)
+
+ # restore pos_exception from exctploc register
+ mc.gen_load_int(r.ip0.value, self.cpu.pos_exception())
+ self.store_reg(mc, exctploc, r.ip0)
+
+ def _build_propagate_exception_path(self):
+ mc = InstrBuilder()
+ self._store_and_reset_exception(mc, r.x0)
+ ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ # make sure ofs fits into a register
+ assert check_imm_arg(ofs)
+ self.store_reg(mc, r.x0, r.fp, ofs)
+ propagate_exception_descr = rffi.cast(lltype.Signed,
+ cast_instance_to_gcref(self.cpu.propagate_exception_descr))
+ # put propagate_exception_descr into frame
+ ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+ # make sure ofs fits into a register
+ assert check_imm_arg(ofs)
+ mc.gen_load_int(r.x0.value, propagate_exception_descr)
+ self.store_reg(mc, r.x0, r.fp, ofs)
+ mc.MOV_rr(r.x0.value, r.fp.value)
+ self.gen_func_epilog(mc)
+ rawstart = mc.materialize(self.cpu, [])
+ self.propagate_exception_path = rawstart
+
+ def _build_cond_call_slowpath(self, supports_floats, callee_only):
+ """ This builds a general call slowpath, for whatever call happens to
+ come.
+
+ The address of function to call comes in ip1. the result is also stored
+ in ip1 or ivfp
+ """
+ mc = InstrBuilder()
+ #
+ self._push_all_regs_to_jitframe(mc, [], self.cpu.supports_floats, callee_only)
+ ## args are in their respective positions
+ mc.SUB_ri(r.sp.value, r.sp.value, 2 * WORD)
+ mc.STR_ri(r.ip0.value, r.sp.value, WORD)
+ mc.STR_ri(r.lr.value, r.sp.value, 0)
+ mc.BLR_r(r.ip1.value)
+ # callee saved
+ self._reload_frame_if_necessary(mc) # <- this will not touch x0
+ mc.MOV_rr(r.ip1.value, r.x0.value)
+ self._pop_all_regs_from_jitframe(mc, [], supports_floats,
+ callee_only) # <- this does not touch ip1
+ # return
+ mc.LDR_ri(r.ip0.value, r.sp.value, 0)
+ mc.ADD_ri(r.sp.value, r.sp.value, 2 * WORD)
+ mc.RET_r(r.ip0.value)
+ return mc.materialize(self.cpu, [])
+
+ def _build_malloc_slowpath(self, kind):
+ """ While arriving on slowpath, we have a gcpattern on stack 0.
+ The arguments are passed in r0 and r10, as follows:
+
+ kind == 'fixed': nursery_head in r0 and the size in r1 - r0.
+
+ kind == 'str/unicode': length of the string to allocate in r0.
+
+ kind == 'var': length to allocate in r1, tid in r0,
+ and itemsize on the stack.
+
+ This function must preserve all registers apart from r0 and r1.
+ """
+ assert kind in ['fixed', 'str', 'unicode', 'var']
+ mc = InstrBuilder()
+ #
+ self._push_all_regs_to_jitframe(mc, [r.x0, r.x1], True)
+ #
+ if kind == 'fixed':
+ addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+ elif kind == 'str':
+ addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
+ elif kind == 'unicode':
+ addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
+ else:
+ addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
+ if kind == 'fixed':
+ # At this point we know that the values we need to compute the size
+ # are stored in x0 and x1.
+ mc.SUB_rr(r.x0.value, r.x1.value, r.x0.value) # compute the size we want
+
+ if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+ mc.MOV_rr(r.x1.value, r.fp.value)
+ elif kind == 'str' or kind == 'unicode':
+ mc.MOV_rr(r.x0.value, r.x1.value)
+ else: # var
+ # tid is in x0
+ # length is in x1
+ # gcmap in ip1
+ # itemsize in ip2
+ mc.MOV_rr(r.x2.value, r.x1.value)
+ mc.MOV_rr(r.x1.value, r.x0.value)
+ mc.MOV_rr(r.x0.value, r.ip2.value) # load itemsize, ip2 now free
+ # store the gc pattern
+ ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+ mc.STR_ri(r.ip1.value, r.fp.value, ofs)
+ #
+ mc.SUB_ri(r.sp.value, r.sp.value, 2 * WORD)
+ mc.STR_ri(r.lr.value, r.sp.value, 0)
+ #
+ mc.BL(addr)
+ #
+ # If the slowpath malloc failed, we raise a MemoryError that
+ # always interrupts the current loop, as a "good enough"
+ # approximation.
+ mc.CMP_ri(r.x0.value, 0)
+ mc.B_ofs_cond(4 * 6, c.NE)
+ mc.B(self.propagate_exception_path)
+ # jump here
+ self._reload_frame_if_necessary(mc)
+ self._pop_all_regs_from_jitframe(mc, [r.x0, r.x1], self.cpu.supports_floats)
+ #
+ nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
+ mc.gen_load_int(r.x1.value, nursery_free_adr)
+ mc.LDR_ri(r.x1.value, r.x1.value, 0)
+ # clear the gc pattern
+ mc.gen_load_int(r.ip0.value, 0)
+ self.store_reg(mc, r.ip0, r.fp, ofs)
+ # return
+ mc.LDR_ri(r.lr.value, r.sp.value, 0)
+ mc.ADD_ri(r.sp.value, r.sp.value, 2 * WORD)
+ mc.RET_r(r.lr.value)
+
+ #
+ rawstart = mc.materialize(self.cpu, [])
+ return rawstart
+
+ def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
+ assert size & (WORD-1) == 0
+
+ self.mc.gen_load_int(r.x0.value, nursery_free_adr)
+ self.mc.LDR_ri(r.x0.value, r.x0.value, 0)
+
+ if check_imm_arg(size):
+ self.mc.ADD_ri(r.x1.value, r.x0.value, size)
+ else:
+ self.mc.gen_load_int(r.x1.value, size)
+ self.mc.ADD_rr(r.x1.value, r.x0.value, r.x1.value)
+
+ self.mc.gen_load_int(r.ip0.value, nursery_top_adr)
+ self.mc.LDR_ri(r.ip0.value, r.ip0.value, 0)
+
+ self.mc.CMP_rr(r.x1.value, r.ip0.value)
+
+ # We load into r0 the address stored at nursery_free_adr We calculate
+ # the new value for nursery_free_adr and store in r1 The we load the
+ # address stored in nursery_top_adr into IP If the value in r1 is
+ # (unsigned) bigger than the one in ip we conditionally call
+ # malloc_slowpath in case we called malloc_slowpath, which returns the
+ # new value of nursery_free_adr in r1 and the adr of the new object in
+ # r0.
+
+ self.mc.B_ofs_cond(10 * 4, c.LO) # 4 for gcmap load, 5 for BL, 1 for B_ofs_cond
+ self.mc.gen_load_int_full(r.ip1.value, rffi.cast(lltype.Signed, gcmap))
+
+ self.mc.BL(self.malloc_slowpath)
+
+ self.mc.gen_load_int(r.ip0.value, nursery_free_adr)
+ self.mc.STR_ri(r.x1.value, r.ip0.value, 0)
+
+ def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr,
+ sizeloc, gcmap):
+ if sizeloc is r.x0:
+ self.mc.MOV_rr(r.x1.value, r.x0.value)
+ sizeloc = r.x1
+ self.mc.gen_load_int(r.x0.value, nursery_free_adr)
+ self.mc.LDR_ri(r.x0.value, r.x0.value, 0)
+ #
+ self.mc.ADD_rr(r.x1.value, r.x0.value, sizeloc.value)
+ #
+ self.mc.gen_load_int(r.ip0.value, nursery_top_adr)
+ self.mc.LDR_ri(r.ip0.value, r.ip0.value, 0)
+
+ self.mc.CMP_rr(r.x1.value, r.ip0.value)
+ #
+ self.mc.B_ofs_cond(40, c.LO) # see calculations in malloc_cond
+ self.mc.gen_load_int_full(r.ip1.value, rffi.cast(lltype.Signed, gcmap))
+
+ self.mc.BL(self.malloc_slowpath)
+
+ self.mc.gen_load_int(r.ip0.value, nursery_free_adr)
+ self.mc.STR_ri(r.x1.value, r.ip0.value, 0)
+
+ def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr,
+ lengthloc, itemsize, maxlength, gcmap,
+ arraydescr):
+ from rpython.jit.backend.llsupport.descr import ArrayDescr
+ assert isinstance(arraydescr, ArrayDescr)
+
+ # lengthloc is the length of the array, which we must not modify!
+ assert lengthloc is not r.x0 and lengthloc is not r.x1
+ if lengthloc.is_core_reg():
+ varsizeloc = lengthloc
+ else:
+ assert lengthloc.is_stack()
+ self.regalloc_mov(lengthloc, r.x1)
+ varsizeloc = r.x1
+ #
+ if check_imm_arg(maxlength):
+ self.mc.CMP_ri(varsizeloc.value, maxlength)
+ else:
+ self.mc.gen_load_int(r.ip0.value, maxlength)
+ self.mc.CMP_rr(varsizeloc.value, r.ip0.value)
+ jmp_adr0 = self.mc.currpos() # jump to (large)
+ self.mc.BRK()
+ #
+ self.mc.gen_load_int(r.x0.value, nursery_free_adr)
+ self.mc.LDR_ri(r.x0.value, r.x0.value, 0)
+
+
+ if valid_addressing_size(itemsize):
+ shiftsize = get_scale(itemsize)
+ else:
+ shiftsize = self._mul_const_scaled(self.mc, r.lr, varsizeloc,
+ itemsize)
+ varsizeloc = r.lr
+ # now varsizeloc is a register != x0. The size of
+ # the variable part of the array is (varsizeloc << shiftsize)
+ assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
+ constsize = arraydescr.basesize + self.gc_size_of_header
+ force_realignment = (itemsize % WORD) != 0
+ if force_realignment:
+ constsize += WORD - 1
+ self.mc.gen_load_int(r.ip0.value, constsize)
+ # constsize + (varsizeloc << shiftsize)
+ self.mc.ADD_rr_shifted(r.x1.value, r.ip0.value, varsizeloc.value,
+ shiftsize)
+ self.mc.ADD_rr(r.x1.value, r.x1.value, r.x0.value)
+ if force_realignment:
+ # -WORD = 0xfffffffffffffff8
+ self.mc.gen_load_int(r.ip0.value, -WORD)
+ self.mc.AND_rr(r.x1.value, r.x1.value, r.ip0.value)
+ # now x1 contains the total size in bytes, rounded up to a multiple
+ # of WORD, plus nursery_free_adr
+ #
+ self.mc.gen_load_int(r.ip0.value, nursery_top_adr)
+ self.mc.LDR_ri(r.ip0.value, r.ip0.value, 0)
+
+ self.mc.CMP_rr(r.x1.value, r.ip0.value)
+ jmp_adr1 = self.mc.currpos() # jump to (after-call)
+ self.mc.BRK()
+ #
+ # (large)
+ currpos = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, jmp_adr0, WORD)
+ pmc.B_ofs_cond(currpos - jmp_adr0, c.GT)
+ #
+ # save the gcmap
+ self.mc.gen_load_int_full(r.ip1.value, rffi.cast(lltype.Signed, gcmap))
+ #
+
+ if kind == rewrite.FLAG_ARRAY:
+ self.mc.gen_load_int(r.x0.value, arraydescr.tid)
+ self.regalloc_mov(lengthloc, r.x1)
+ self.mc.gen_load_int(r.ip2.value, itemsize)
+ addr = self.malloc_slowpath_varsize
+ else:
+ if kind == rewrite.FLAG_STR:
+ addr = self.malloc_slowpath_str
+ else:
+ assert kind == rewrite.FLAG_UNICODE
+ addr = self.malloc_slowpath_unicode
+ self.regalloc_mov(lengthloc, r.x1)
+ self.mc.BL(addr)
+ #
+ jmp_location = self.mc.currpos() # jump to (done)
+ self.mc.BRK()
+ # (after-call)
+ currpos = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, jmp_adr1, WORD)
+ pmc.B_ofs_cond(currpos - jmp_adr1, c.LS)
+ #
+ # write down the tid, but not if it's the result of the CALL
+ self.mc.gen_load_int(r.ip0.value, arraydescr.tid)
+ self.mc.STR_ri(r.ip0.value, r.x0.value, 0)
+
+ # while we're at it, this line is not needed if we've done the CALL
+ self.mc.gen_load_int(r.ip0.value, nursery_free_adr)
+ self.mc.STR_ri(r.x1.value, r.ip0.value, 0)
+ # (done)
+ # skip instructions after call
+ currpos = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, jmp_location, WORD)
+ pmc.B_ofs(currpos - jmp_location)
+
+ def _mul_const_scaled(self, mc, targetreg, sourcereg, itemsize):
+ """Produce one operation to do roughly
+ targetreg = sourcereg * itemsize
+ except that the targetreg may still need shifting by 0,1,2,3.
+ """
+ if (itemsize & 7) == 0:
+ shiftsize = 3
+ elif (itemsize & 3) == 0:
+ shiftsize = 2
+ elif (itemsize & 1) == 0:
+ shiftsize = 1
+ else:
+ shiftsize = 0
+ itemsize >>= shiftsize
+ #
+ if valid_addressing_size(itemsize - 1):
+ self.mc.ADD_rr_shifted(targetreg.value, sourcereg.value, sourcereg.value,
+ get_scale(itemsize - 1))
+ elif valid_addressing_size(itemsize):
+ self.mc.LSL_ri(targetreg.value, sourcereg.value,
+ get_scale(itemsize))
+ else:
+ mc.gen_load_int(targetreg.value, itemsize)
+ mc.MUL_rr(targetreg.value, sourcereg.value, targetreg.value)
+ #
+ return shiftsize
+
+
+ def _build_stack_check_slowpath(self):
+ _, _, slowpathaddr = self.cpu.insert_stack_check()
+ if slowpathaddr == 0 or not self.cpu.propagate_exception_descr:
+ return # no stack check (for tests, or non-translated)
+ #
+ # make a "function" that is called immediately at the start of
+ # an assembler function. In particular, the stack looks like:
+ #
+ # | retaddr of caller | <-- aligned to a multiple of 16
+ # | saved argument regs |
+ # | my own retaddr | <-- sp
+ # +-----------------------+
+ #
+ mc = InstrBuilder()
+ # save argument registers and return address
+ mc.SUB_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 2) * WORD)
+ mc.STR_ri(r.lr.value, r.sp.value, 0)
+ for i in range(0, len(r.argument_regs), 2):
+ mc.STP_rri(r.argument_regs[i].value, r.argument_regs[i + 1].value,
+ r.sp.value, (i + 2) * WORD)
+ # stack is aligned here
+ # Pass current stack pointer as argument to the call
+ mc.SUB_ri(r.x0.value, r.sp.value, 0)
+ #
+ mc.BL(slowpathaddr)
+
+ # check for an exception
+ mc.gen_load_int(r.x0.value, self.cpu.pos_exception())
+ mc.LDR_ri(r.x0.value, r.x0.value, 0)
+ mc.TST_rr_shift(r.x0.value, r.x0.value, 0)
+ #
+ # restore registers and return
+ # We check for c.EQ here, meaning all bits zero in this case
+
+ jmp = mc.currpos()
+ mc.BRK()
+
+ for i in range(0, len(r.argument_regs), 2):
+ mc.LDP_rri(r.argument_regs[i].value, r.argument_regs[i + 1].value,
+ r.sp.value, (i + 2) * WORD)
+ mc.LDR_ri(r.ip0.value, r.sp.value, 0)
+ mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 2) * WORD)
+ mc.RET_r(r.ip0.value)
+
+ # jump here
+
+ pmc = OverwritingBuilder(mc, jmp, WORD)
+ pmc.B_ofs_cond(mc.currpos() - jmp, c.NE)
+
+ mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 2) * WORD)
+ mc.B(self.propagate_exception_path)
+ #
+
+ rawstart = mc.materialize(self.cpu, [])
+ self.stack_check_slowpath = rawstart
+
+ def _check_frame_depth_debug(self, mc):
+ pass
+
+ def _check_frame_depth(self, mc, gcmap, expected_size=-1):
+ """ check if the frame is of enough depth to follow this bridge.
+ Otherwise reallocate the frame in a helper.
+ There are other potential solutions
+ to that, but this one does not sound too bad.
+ """
+ descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
+ ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
+ mc.LDR_ri(r.ip0.value, r.fp.value, ofs)
+ stack_check_cmp_ofs = mc.currpos()
+ if expected_size == -1:
+ for _ in range(mc.get_max_size_of_gen_load_int()):
+ mc.NOP()
+ else:
+ mc.gen_load_int(r.ip1.value, expected_size)
+ mc.CMP_rr(r.ip0.value, r.ip1.value)
+
+ jg_location = mc.currpos()
+ mc.BRK()
+
+ # the size value is still stored in ip1
+ mc.SUB_ri(r.sp.value, r.sp.value, 2*WORD)
+ mc.STR_ri(r.ip1.value, r.sp.value, WORD)
+
+ mc.gen_load_int(r.ip0.value, rffi.cast(lltype.Signed, gcmap))
+ mc.STR_ri(r.ip0.value, r.sp.value, 0)
+
+ mc.BL(self._frame_realloc_slowpath)
+
+ # patch jg_location above
+ currpos = mc.currpos()
+ pmc = OverwritingBuilder(mc, jg_location, WORD)
+ pmc.B_ofs_cond(currpos - jg_location, c.GE)
+
+ self.frame_depth_to_patch.append(stack_check_cmp_ofs)
+
+ def update_frame_depth(self, frame_depth):
+ baseofs = self.cpu.get_baseofs_of_frame_field()
+ self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
+
+ def _reload_frame_if_necessary(self, mc):
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ rst = gcrootmap.get_root_stack_top_addr()
+ mc.gen_load_int(r.ip0.value, rst)
+ self.load_reg(mc, r.ip0, r.ip0)
+ mc.SUB_ri(r.ip0.value, r.ip0.value, WORD)
+ mc.LDR_ri(r.fp.value, r.ip0.value, 0)
+ wbdescr = self.cpu.gc_ll_descr.write_barrier_descr
+ if gcrootmap and wbdescr:
+ # frame never uses card marking, so we enforce this is not
+ # an array
+ self._write_barrier_fastpath(mc, wbdescr, [r.fp], array=False,
+ is_frame=True)
+
+ def generate_quick_failure(self, guardtok):
+ startpos = self.mc.currpos()
+ faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
+ self.load_from_gc_table(r.ip0.value, faildescrindex)
+ ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+ self.store_reg(self.mc, r.ip0, r.fp, ofs)
+ self.push_gcmap(self.mc, gcmap=guardtok.gcmap)
+ assert target
+ self.mc.BL(target)
+ return startpos
+
+ def push_gcmap(self, mc, gcmap, store=True):
+ assert store
+ ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+ ptr = rffi.cast(lltype.Signed, gcmap)
+ mc.gen_load_int(r.ip0.value, ptr)
+ self.store_reg(mc, r.ip0, r.fp, ofs)
+
+ def pop_gcmap(self, mc):
+ ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
+ mc.gen_load_int(r.ip0.value, 0)
+ self.store_reg(mc, r.ip0, r.fp, ofs)
+
+ def write_pending_failure_recoveries(self):
+ for tok in self.pending_guards:
+ #generate the exit stub and the encoded representation
+ tok.pos_recovery_stub = self.generate_quick_failure(tok)
+
+ def reserve_gcref_table(self, allgcrefs):
+ gcref_table_size = len(allgcrefs) * WORD
+ # align to a multiple of 16 and reserve space at the beginning
+ # of the machine code for the gc table. This lets us write
+ # machine code with relative addressing (LDR literal).
+ gcref_table_size = (gcref_table_size + 15) & ~15
+ mc = self.mc
+ assert mc.get_relative_pos() == 0
+ for i in range(gcref_table_size):
+ mc.writechar('\x00')
+ self.setup_gcrefs_list(allgcrefs)
+
+ def patch_gcref_table(self, looptoken, rawstart):
+ # the gc table is at the start of the machine code
+ self.gc_table_addr = rawstart
+ tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
+ self._allgcrefs)
+ gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
+ gcreftracers.append(tracer) # keepalive
+ self.teardown_gcrefs_list()
+
+ def patch_stack_checks(self, framedepth, rawstart):
+ for ofs in self.frame_depth_to_patch:
+ mc = InstrBuilder()
+ mc.gen_load_int(r.ip1.value, framedepth)
+ mc.copy_to_raw_memory(ofs + rawstart)
+
+ def load_from_gc_table(self, regnum, index):
+ address_in_buffer = index * WORD # at the start of the buffer
+ p_location = self.mc.get_relative_pos(break_basic_block=False)
+ offset = address_in_buffer - p_location
+ self.mc.LDR_r_literal(regnum, offset)
+
+ def materialize_loop(self, looptoken):
+ self.datablockwrapper.done() # finish using cpu.asmmemmgr
+ self.datablockwrapper = None
+ allblocks = self.get_asmmemmgr_blocks(looptoken)
+ size = self.mc.get_relative_pos()
+ res = self.mc.materialize(self.cpu, allblocks,
+ self.cpu.gc_ll_descr.gcrootmap)
+ #self.cpu.codemap.register_codemap(
+ # self.codemap.get_final_bytecode(res, size))
+ return res
+
+ def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
+ b = InstrBuilder()
+ patch_addr = faildescr.adr_jump_offset
+ assert patch_addr != 0
+ b.BL(bridge_addr)
+ b.copy_to_raw_memory(patch_addr)
+ faildescr.adr_jump_offset = 0
+
+ def process_pending_guards(self, block_start):
+ clt = self.current_clt
+ for tok in self.pending_guards:
+ descr = tok.faildescr
+ assert isinstance(descr, AbstractFailDescr)
+ failure_recovery_pos = block_start + tok.pos_recovery_stub
+ descr.adr_jump_offset = failure_recovery_pos
+ relative_offset = tok.pos_recovery_stub - tok.offset
+ guard_pos = block_start + tok.offset
+ if not tok.guard_not_invalidated():
+ # patch the guard jump to the stub
+ # overwrite the generate BRK with a B_offs to the pos of the
+ # stub
+ mc = InstrBuilder()
+ mc.B_ofs_cond(relative_offset, c.get_opposite_of(tok.fcond))
+ mc.copy_to_raw_memory(guard_pos)
+ if tok.extra_offset != -1:
+ mc = InstrBuilder()
+ relative_offset = tok.pos_recovery_stub - tok.extra_offset
+ guard_pos = block_start + tok.extra_offset
+ mc.B_ofs_cond(relative_offset, c.get_opposite_of(tok.extra_cond))
+ mc.copy_to_raw_memory(guard_pos)
+ else:
+ clt.invalidate_positions.append((guard_pos, relative_offset))
+
+ def fixup_target_tokens(self, rawstart):
+ for targettoken in self.target_tokens_currently_compiling:
+ targettoken._ll_loop_code += rawstart
+ self.target_tokens_currently_compiling = None
+
+ def _call_header_with_stack_check(self):
+ self._call_header()
+ if self.stack_check_slowpath == 0:
+ pass # no stack check (e.g. not translated)
+ else:
+ endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
+ # load stack end
+ self.mc.gen_load_int(r.lr.value, endaddr) # load lr, [end]
+ self.mc.LDR_ri(r.lr.value, r.lr.value, 0) # LDR lr, lr
+ # load stack length
+ self.mc.gen_load_int(r.ip1.value, lengthaddr) # load ip1, lengh
+ self.mc.LDR_ri(r.ip1.value, r.ip1.value, 0) # ldr ip1, *lengh
+ # calculate ofs
+ self.mc.SUB_ri(r.ip0.value, r.sp.value, 0) # ip0 = sp
+ # otherwise we can't use sp
+ self.mc.SUB_rr(r.lr.value, r.lr.value, r.ip0.value) # lr = lr - ip0
+ # if ofs
+ self.mc.CMP_rr(r.lr.value, r.ip1.value) # CMP ip0, ip1
+ pos = self.mc.currpos()
+ self.mc.BRK()
+ self.mc.BL(self.stack_check_slowpath) # call if ip0 > ip1
+ pmc = OverwritingBuilder(self.mc, pos, WORD)
+ pmc.B_ofs_cond(self.mc.currpos() - pos, c.LS)
+
+ def _call_header(self):
+ stack_size = (len(r.callee_saved_registers) + 4) * WORD
+ self.mc.STP_rr_preindex(r.lr.value, r.fp.value, r.sp.value, -stack_size)
+ for i in range(0, len(r.callee_saved_registers), 2):
+ self.mc.STP_rri(r.callee_saved_registers[i].value,
+ r.callee_saved_registers[i + 1].value,
+ r.sp.value,
+ (i + 4) * WORD)
+
+ self.saved_threadlocal_addr = 3 * WORD # at offset 3 from location 'sp'
+ self.mc.STR_ri(r.x1.value, r.sp.value, 3 * WORD)
+
+ # set fp to point to the JITFRAME, passed in argument 'x0'
+ self.mc.MOV_rr(r.fp.value, r.x0.value)
+ #
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ self.gen_shadowstack_header(gcrootmap)
+
+ def _assemble(self, regalloc, inputargs, operations):
+ #self.guard_success_cc = c.cond_none
+ regalloc.compute_hint_frame_locations(operations)
+ self._walk_operations(inputargs, operations, regalloc)
+ #assert self.guard_success_cc == c.cond_none
+ frame_depth = regalloc.get_final_frame_depth()
+ jump_target_descr = regalloc.jump_target_descr
+ if jump_target_descr is not None:
+ tgt_depth = jump_target_descr._arm_clt.frame_info.jfi_frame_depth
+ target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE
+ frame_depth = max(frame_depth, target_frame_depth)
+ return frame_depth
+
+ def _walk_operations(self, inputargs, operations, regalloc):
+ self._regalloc = regalloc
+ regalloc.operations = operations
+ while regalloc.position() < len(operations) - 1:
+ regalloc.next_instruction()
+ i = regalloc.position()
+ op = operations[i]
+ self.mc.mark_op(op)
+ opnum = op.getopnum()
+ if rop.has_no_side_effect(opnum) and op not in regalloc.longevity:
+ regalloc.possibly_free_vars_for_op(op)
+ elif not we_are_translated() and op.getopnum() == rop.FORCE_SPILL:
+ regalloc.force_spill_var(op.getarg(0))
+ elif ((rop.returns_bool_result(opnum) or op.is_ovf()) and
+ i < len(operations) - 1 and
+ regalloc.next_op_can_accept_cc(operations, i) or
+ operations[i].is_ovf()):
+ if operations[i].is_ovf():
+ assert operations[i + 1].getopnum() in [rop.GUARD_OVERFLOW,
+ rop.GUARD_NO_OVERFLOW]
+ guard_op = operations[i + 1]
+ guard_num = guard_op.getopnum()
+ arglocs, fcond = guard_operations[guard_num](regalloc, guard_op, op)
+ if arglocs is not None:
+ asm_guard_operations[guard_num](self, op, guard_op, fcond, arglocs)
+ regalloc.next_instruction() # advance one more
+ if guard_op.is_guard(): # can be also cond_call
+ regalloc.possibly_free_vars(guard_op.getfailargs())
+ regalloc.possibly_free_vars_for_op(guard_op)
+ elif (rop.is_call_may_force(op.getopnum()) or
+ rop.is_call_release_gil(op.getopnum()) or
+ rop.is_call_assembler(op.getopnum())):
+ guard_op = operations[i + 1] # has to exist
+ guard_num = guard_op.getopnum()
+ assert guard_num in (rop.GUARD_NOT_FORCED, rop.GUARD_NOT_FORCED_2)
+ arglocs, fcond = guard_operations[guard_num](regalloc, guard_op, op)
+ if arglocs is not None:
+ asm_guard_operations[guard_num](self, op, guard_op, fcond, arglocs)
+ # fcond is abused here to pass the number of args
+ regalloc.next_instruction() # advance one more
+ regalloc.possibly_free_vars(guard_op.getfailargs())
+ regalloc.possibly_free_vars_for_op(guard_op)
+ else:
+ arglocs = regalloc_operations[opnum](regalloc, op)
+ if arglocs is not None:
+ asm_operations[opnum](self, op, arglocs)
+ if rop.is_guard(opnum):
+ regalloc.possibly_free_vars(op.getfailargs())
+ if op.type != 'v':
+ regalloc.possibly_free_var(op)
+ regalloc.possibly_free_vars_for_op(op)
+ regalloc.free_temp_vars()
+ regalloc._check_invariants()
+ if not we_are_translated():
+ self.mc.BRK()
+ self.mc.mark_op(None) # end of the loop
+ regalloc.operations = None
+
+ def dispatch_comparison(self, op):
+ opnum = op.getopnum()
+ arglocs = comp_operations[opnum](self._regalloc, op, True)
+ assert arglocs is not None
+ return asm_comp_operations[opnum](self, op, arglocs)
+
+ # regalloc support
+ def load(self, loc, value):
+ """load an immediate value into a register"""
+ assert (loc.is_core_reg() and value.is_imm()
+ or loc.is_vfp_reg() and value.is_imm_float())
+ if value.is_imm():
+ self.mc.gen_load_int(loc.value, value.getint())
+ elif value.is_imm_float():
+ self.mc.gen_load_int(r.ip0.value, value.getint())
+ self.mc.LDR_di(loc.value, r.ip0.value, 0)
+
+ def _mov_stack_to_loc(self, prev_loc, loc):
+ offset = prev_loc.value
+ if loc.is_core_reg():
+ assert prev_loc.type != FLOAT, 'trying to load from an \
+ incompatible location into a core register'
+ # unspill a core register
+ assert 0 <= offset <= (1<<15) - 1
+ self.mc.LDR_ri(loc.value, r.fp.value, offset)
+ return
+ if loc.is_vfp_reg():
+ assert prev_loc.type == FLOAT, 'trying to load from an \
+ incompatible location into a float register'
+ assert 0 <= offset <= (1 << 15) - 1
+ self.mc.LDR_di(loc.value, r.fp.value, offset)
+ return
+ assert False
+ # elif loc.is_vfp_reg():
+ # assert prev_loc.type == FLOAT, 'trying to load from an \
+ # incompatible location into a float register'
+ # # load spilled value into vfp reg
+ # is_imm = check_imm_arg(offset)
+ # helper, save = self.get_tmp_reg()
+ # save_helper = not is_imm and save
+ # elif loc.is_raw_sp():
+ # assert (loc.type == prev_loc.type == FLOAT
+ # or (loc.type != FLOAT and prev_loc.type != FLOAT))
+ # tmp = loc
+ # if loc.is_float():
+ # loc = r.vfp_ip
+ # else:
+ # loc, save_helper = self.get_tmp_reg()
+ # assert not save_helper
+ # helper, save_helper = self.get_tmp_reg([loc])
+ # assert not save_helper
+ # else:
+ # assert 0, 'unsupported case'
+
+ # if save_helper:
+ # self.mc.PUSH([helper.value], cond=cond)
+ # self.load_reg(self.mc, loc, r.fp, offset, cond=cond, helper=helper)
+ # if save_helper:
+ # self.mc.POP([helper.value], cond=cond)
+
+ def _mov_reg_to_loc(self, prev_loc, loc):
+ if loc.is_core_reg():
+ self.mc.MOV_rr(loc.value, prev_loc.value)
+ elif loc.is_stack():
+ self.mc.STR_ri(prev_loc.value, r.fp.value, loc.value)
+ else:
+ assert False
+
+ def _mov_imm_to_loc(self, prev_loc, loc):
+ if loc.is_core_reg():
+ self.mc.gen_load_int(loc.value, prev_loc.value)
+ elif loc.is_stack():
+ self.mc.gen_load_int(r.ip0.value, prev_loc.value)
+ self.mc.STR_ri(r.ip0.value, r.fp.value, loc.value)
+ else:
+ assert False
+
+ def new_stack_loc(self, i, tp):
+ base_ofs = self.cpu.get_baseofs_of_frame_field()
+ return StackLocation(i, get_fp_offset(base_ofs, i), tp)
+
+ def mov_loc_to_raw_stack(self, loc, pos):
+ if loc.is_core_reg():
+ self.mc.STR_ri(loc.value, r.sp.value, pos)
+ elif loc.is_stack():
+ self.mc.LDR_ri(r.ip0.value, r.fp.value, loc.value)
+ self.mc.STR_ri(r.ip0.value, r.sp.value, pos)
+ elif loc.is_vfp_reg():
+ self.mc.STR_di(loc.value, r.sp.value, pos)
+ elif loc.is_imm():
+ self.mc.gen_load_int(r.ip0.value, loc.value)
+ self.mc.STR_ri(r.ip0.value, r.sp.value, pos)
+ else:
+ assert False, "wrong loc"
+
+ def mov_raw_stack_to_loc(self, pos, loc):
+ if loc.is_core_reg():
+ self.mc.LDR_ri(loc.value, r.sp.value, pos)
+ elif loc.is_stack():
+ self.mc.LDR_ri(r.ip0.value, r.sp.value, pos)
+ self.mc.STR_ri(r.ip0.value, r.fp.value, loc.value)
+ elif loc.is_vfp_reg():
+ self.mc.LDR_di(loc.value, r.sp.value, pos)
+ else:
+ assert False, "wrong loc"
+
+ def _mov_imm_float_to_loc(self, prev_loc, loc):
+ if loc.is_vfp_reg():
+ self.load(loc, prev_loc)
+ elif loc.is_stack():
+ self.load(r.vfp_ip, prev_loc)
+ self._mov_vfp_reg_to_loc(r.vfp_ip, loc)
+ else:
+ assert False, "wrong loc"
+
+ def _mov_vfp_reg_to_loc(self, prev_loc, loc):
+ if loc.is_stack():
+ self.mc.STR_di(prev_loc.value, r.fp.value, loc.value)
+ elif loc.is_vfp_reg():
+ self.mc.FMOV_dd(loc.value, prev_loc.value)
+ else:
+ assert False, "wrong loc"
+
+ def push_locations(self, locs):
+ if not locs:
+ return
+ depth = len(locs) * WORD
+ depth += depth & WORD # align
+ self.mc.SUB_ri(r.sp.value, r.sp.value, depth)
+ for i, loc in enumerate(locs):
+ self.mov_loc_to_raw_stack(loc, i * WORD)
+
+ def pop_locations(self, locs):
+ if not locs:
+ return
+ depth = len(locs) * WORD
+ depth += depth & WORD # align
+ for i, loc in enumerate(locs):
+ self.mov_raw_stack_to_loc(i * WORD, loc)
+ self.mc.ADD_ri(r.sp.value, r.sp.value, depth)
+
+ def regalloc_mov(self, prev_loc, loc):
+ """Moves a value from a previous location to some other location"""
+ if prev_loc.is_imm():
+ return self._mov_imm_to_loc(prev_loc, loc)
+ elif prev_loc.is_core_reg():
+ self._mov_reg_to_loc(prev_loc, loc)
+ elif prev_loc.is_stack():
+ self._mov_stack_to_loc(prev_loc, loc)
+ elif prev_loc.is_imm_float():
+ self._mov_imm_float_to_loc(prev_loc, loc)
+ elif prev_loc.is_vfp_reg():
+ self._mov_vfp_reg_to_loc(prev_loc, loc)
+ else:
+ assert 0, 'unsupported case'
+ mov_loc_loc = regalloc_mov
+
+ def gen_func_epilog(self, mc=None):
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if mc is None:
+ mc = self.mc
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ self.gen_footer_shadowstack(gcrootmap, mc)
+
+ # pop all callee saved registers
+
+ stack_size = (len(r.callee_saved_registers) + 4) * WORD
+
+ for i in range(0, len(r.callee_saved_registers), 2):
+ mc.LDP_rri(r.callee_saved_registers[i].value,
+ r.callee_saved_registers[i + 1].value,
+ r.sp.value,
+ (i + 4) * WORD)
+ mc.LDP_rr_postindex(r.lr.value, r.fp.value, r.sp.value, stack_size)
+
+
+ mc.RET_r(r.lr.value)
+
+ def gen_shadowstack_header(self, gcrootmap):
+ # we push two words, like the x86 backend does:
+ # the '1' is to benefit from the shadowstack 'is_minor' optimization
+ rst = gcrootmap.get_root_stack_top_addr()
+ self.mc.gen_load_int(r.ip1.value, rst)
+ # x8 = *ip1
+ self.load_reg(self.mc, r.x8, r.ip1)
+ # x8[0] = 1
+ self.mc.gen_load_int(r.ip0.value, 1)
+ self.store_reg(self.mc, r.ip0, r.x8)
+ # x8[1] = r.fp
+ self.store_reg(self.mc, r.fp, r.x8, WORD)
+ # *ip1 = x8 + 2 * WORD
+ self.mc.ADD_ri(r.x8.value, r.x8.value, 2 * WORD)
+ self.store_reg(self.mc, r.x8, r.ip1)
+
+ def gen_footer_shadowstack(self, gcrootmap, mc):
+ rst = gcrootmap.get_root_stack_top_addr()
+ mc.gen_load_int(r.ip0.value, rst)
+ self.load_reg(mc, r.ip1, r.ip0)
+ mc.SUB_ri(r.ip1.value, r.ip1.value, 2 * WORD) # two words, see above
+ self.store_reg(mc, r.ip1, r.ip0)
+
+ def store_reg(self, mc, source, base, ofs=0, helper=None):
+ if source.is_vfp_reg():
+ return self._store_vfp_reg(mc, source, base, ofs)
+ else:
+ return self._store_core_reg(mc, source, base, ofs)
+
+ def _store_vfp_reg(self, mc, source, base, ofs):
+ assert ofs <= (1 << 15) - 1
+ mc.STR_di(source.value, base.value, ofs)
+
+ def _store_core_reg(self, mc, source, base, ofs):
+ # XXX fix:
+ assert ofs & 0x7 == 0
+ assert 0 <= ofs < 32768
+ mc.STR_ri(source.value, base.value, ofs)
+ #if check_imm_arg(ofs):
+ # mc.STR_ri(source.value, base.value, imm=ofs)
+ #else:
+ # mc.gen_load_int(r.ip1, ofs)
+ # mc.STR_rr(source.value, base.value, r.ip1)
+
+ def load_reg(self, mc, target, base, ofs=0, helper=r.ip0):
+ assert target.is_core_reg()
+ if check_imm_arg(abs(ofs)):
+ mc.LDR_ri(target.value, base.value, ofs)
+ else:
+ mc.gen_load_int(helper.value, ofs)
+ mc.LDR_rr(target.value, base.value, helper.value)
+
+ def check_frame_before_jump(self, target_token):
+ if target_token in self.target_tokens_currently_compiling:
+ return
+ if target_token._arm_clt is self.current_clt:
+ return
+ # We can have a frame coming from god knows where that's
+ # passed to a jump to another loop. Make sure it has the
+ # correct depth
+ expected_size = target_token._arm_clt.frame_info.jfi_frame_depth
+ self._check_frame_depth(self.mc, self._regalloc.get_gcmap(),
+ expected_size=expected_size)
+
+ # ../x86/assembler.py:668
+ def redirect_call_assembler(self, oldlooptoken, newlooptoken):
+ # some minimal sanity checking
+ old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
+ new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
+ assert old_nbargs == new_nbargs
+ # we overwrite the instructions at the old _ll_function_addr
+ # to start with a JMP to the new _ll_function_addr.
+ # Ideally we should rather patch all existing CALLs, but well.
+ oldadr = oldlooptoken._ll_function_addr
+ target = newlooptoken._ll_function_addr
+ # copy frame-info data
+ baseofs = self.cpu.get_baseofs_of_frame_field()
+ newlooptoken.compiled_loop_token.update_frame_info(
+ oldlooptoken.compiled_loop_token, baseofs)
+ mc = InstrBuilder()
+ mc.B(target)
+ mc.copy_to_raw_memory(oldadr)
+ #
+ jl.redirect_assembler(oldlooptoken, newlooptoken, newlooptoken.number)
+
+
+
+def not_implemented(msg):
+ msg = '[ARM64/asm] %s\n' % msg
+ if we_are_translated():
+ llop.debug_print(lltype.Void, msg)
+ raise NotImplementedError(msg)
+
+
+def notimplemented_op(self, op, arglocs):
+ print "[ARM64/asm] %s not implemented" % op.getopname()
+ raise NotImplementedError(op)
+
+def notimplemented_comp_op(self, op, arglocs):
+ print "[ARM64/asm] %s not implemented" % op.getopname()
+ raise NotImplementedError(op)
+
+def notimplemented_guard_op(self, op, guard_op, fcond, arglocs):
+ print "[ARM64/asm] %s not implemented" % op.getopname()
+ raise NotImplementedError(op)
+
+asm_operations = [notimplemented_op] * (rop._LAST + 1)
+asm_guard_operations = [notimplemented_guard_op] * (rop._LAST + 1)
+asm_comp_operations = [notimplemented_comp_op] * (rop._LAST + 1)
+asm_extra_operations = {}
+
+for name, value in ResOpAssembler.__dict__.iteritems():
+ if name.startswith('emit_opx_'):
+ opname = name[len('emit_opx_'):]
+ num = getattr(EffectInfo, 'OS_' + opname.upper())
+ asm_extra_operations[num] = value
+ elif name.startswith('emit_op_'):
+ opname = name[len('emit_op_'):]
+ num = getattr(rop, opname.upper())
+ asm_operations[num] = value
+ elif name.startswith('emit_guard_op_'):
+ opname = name[len('emit_guard_op_'):]
+ num = getattr(rop, opname.upper())
+ asm_guard_operations[num] = value
+ elif name.startswith('emit_comp_op_'):
+ opname = name[len('emit_comp_op_'):]
+ num = getattr(rop, opname.upper())
+ asm_comp_operations[num] = value
diff --git a/rpython/jit/backend/aarch64/callbuilder.py b/rpython/jit/backend/aarch64/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/aarch64/callbuilder.py
@@ -0,0 +1,291 @@
+
+from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
+from rpython.jit.backend.aarch64.arch import WORD
+from rpython.jit.metainterp.history import INT, FLOAT, REF
+from rpython.jit.backend.aarch64 import registers as r
+from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.aarch64.jump import remap_frame_layout # we use arm algo
+from rpython.jit.backend.llsupport import llerrno
+from rpython.jit.backend.aarch64.codebuilder import OverwritingBuilder
+
+from rpython.rlib.objectmodel import we_are_translated
+from rpython.rtyper.lltypesystem import rffi
+
+class Aarch64CallBuilder(AbstractCallBuilder):
+ def __init__(self, assembler, fnloc, arglocs,
+ resloc=r.x0, restype=INT, ressize=WORD, ressigned=True):
+ AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
+ resloc, restype, ressize)
+ self.current_sp = 0
+
+ def prepare_arguments(self):
+ arglocs = self.arglocs
+ non_float_locs = []
+ non_float_regs = []
+ float_locs = []
+ float_regs = []
+ stack_locs = []
+ free_regs = [r.x7, r.x6, r.x5, r.x4, r.x3, r.x2, r.x1, r.x0]
+ free_float_regs = [r.d7, r.d6, r.d5, r.d4, r.d3, r.d2, r.d1, r.d0]
+ for arg in arglocs:
+ if arg.type == FLOAT:
+ if free_float_regs:
+ float_locs.append(arg)
+ float_regs.append(free_float_regs.pop())
+ else:
+ stack_locs.append(arg)
+ else:
+ if free_regs:
+ non_float_locs.append(arg)
+ non_float_regs.append(free_regs.pop())
+ else:
+ stack_locs.append(arg)
+
+ if stack_locs:
+ adj = len(stack_locs) + (len(stack_locs) & 1)
+ self.mc.SUB_ri(r.sp.value, r.sp.value, adj * WORD)
+ self.current_sp = adj * WORD
+ c = 0
+ for loc in stack_locs:
+ self.asm.mov_loc_to_raw_stack(loc, c)
+ c += WORD
+
+ move_back = False
+ if not self.fnloc.is_imm():
+ if self.fnloc.is_core_reg():
+ self.mc.MOV_rr(r.ip1.value, self.fnloc.value)
+ else:
+ assert self.fnloc.is_stack()
+ self.mc.LDR_ri(r.ip1.value, r.fp.value, self.fnloc.value)
+ self.fnloc = r.x8
+ move_back = True
+
+ remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip0)
+ if float_locs:
+ remap_frame_layout(self.asm, float_locs, float_regs, r.d8)
+
+ if move_back:
+ self.mc.MOV_rr(r.x8.value, r.ip1.value)
+
+ def push_gcmap(self):
+ noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
+ gcmap = self.asm._regalloc.get_gcmap([r.x0], noregs=noregs)
+ self.asm.push_gcmap(self.mc, gcmap)
+
+ def pop_gcmap(self):
+ self.asm._reload_frame_if_necessary(self.mc)
+ self.asm.pop_gcmap(self.mc)
+
+ def emit_raw_call(self):
+ #the actual call
+ if self.fnloc.is_imm():
+ self.mc.BL(self.fnloc.value)
+ return
+ if self.fnloc.is_stack():
+ assert False, "we should never be here"
+ else:
+ assert self.fnloc.is_core_reg()
+ assert self.fnloc is r.x8
+ self.mc.BLR_r(self.fnloc.value)
+
+ def restore_stack_pointer(self):
+ assert self.current_sp & 1 == 0 # always adjusted to 16 bytes
+ if self.current_sp == 0:
+ return
+ self.mc.ADD_ri(r.sp.value, r.sp.value, self.current_sp)
+ self.current_sp = 0
+
+ def load_result(self):
+ resloc = self.resloc
+ if self.restype == 'S':
+ assert False, "not supported yet"
+ XXX
+ self.mc.VMOV_sc(resloc.value, r.s0.value)
+ elif self.restype == 'L':
+ assert False, "not possible on 64bit backend"
+ YYY
+ assert resloc.is_vfp_reg()
+ self.mc.FMDRR(resloc.value, r.r0.value, r.r1.value)
+ # ensure the result is wellformed and stored in the correct location
+ if resloc is not None and resloc.is_core_reg():
+ self._ensure_result_bit_extension(resloc,
+ self.ressize, self.ressign)
+
+ def _ensure_result_bit_extension(self, resloc, size, signed):
+ if size == WORD:
+ return
+ if size == 4:
+ if not signed: # unsigned int
+ self.mc.LSL_ri(resloc.value, resloc.value, 32)
+ self.mc.LSR_ri(resloc.value, resloc.value, 32)
+ else: # signed int
+ self.mc.LSL_ri(resloc.value, resloc.value, 32)
+ self.mc.ASR_ri(resloc.value, resloc.value, 32)
+ elif size == 2:
+ if not signed:
+ self.mc.LSL_ri(resloc.value, resloc.value, 48)
+ self.mc.LSR_ri(resloc.value, resloc.value, 48)
+ else:
+ self.mc.LSL_ri(resloc.value, resloc.value, 48)
+ self.mc.ASR_ri(resloc.value, resloc.value, 48)
+ elif size == 1:
+ if not signed: # unsigned char
+ self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
+ else:
+ self.mc.LSL_ri(resloc.value, resloc.value, 56)
+ self.mc.ASR_ri(resloc.value, resloc.value, 56)
+
+ def call_releasegil_addr_and_move_real_arguments(self, fastgil):
+ assert self.is_call_release_gil
+ assert not self.asm._is_asmgcc()
+ RFASTGILPTR = r.x19 # constant &rpy_fastgil
+ RSHADOWOLD = r.x20 # old value of the shadowstack pointer,
+ # which we save here for later comparison
+
+ gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap:
+ rst = gcrootmap.get_root_stack_top_addr()
+ self.mc.gen_load_int(r.ip1.value, rst)
+ self.mc.LDR_ri(RSHADOWOLD.value, r.ip1.value, 0)
+
+ # change 'rpy_fastgil' to 0 (it should be non-zero right now)
+ self.mc.gen_load_int(RFASTGILPTR.value, fastgil)
+ self.mc.STLR(r.xzr.value, RFASTGILPTR.value)
+
+ if not we_are_translated(): # for testing: we should not access
+ self.mc.ADD_ri(r.fp.value, r.fp.value, 1) # fp any more
+
+ def write_real_errno(self, save_err):
+ if save_err & rffi.RFFI_READSAVED_ERRNO:
+ # Just before a call, read '*_errno' and write it into the
+ # real 'errno'. The x0-x7 registers contain arguments to the
More information about the pypy-commit
mailing list