[pypy-commit] pypy reflex-support: merge default into branch
wlav
noreply at buildbot.pypy.org
Sat Sep 29 02:49:20 CEST 2012
Author: Wim Lavrijsen <WLavrijsen at lbl.gov>
Branch: reflex-support
Changeset: r57644:4455780f16f1
Date: 2012-09-05 12:55 -0700
http://bitbucket.org/pypy/pypy/changeset/4455780f16f1/
Log: merge default into branch
diff too long, truncating to 2000 out of 17500 lines
diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -1,5 +1,6 @@
syntax: glob
*.py[co]
+*.sw[po]
*~
.*.swp
.idea
diff --git a/lib-python/2.7/test/test_winreg.py b/lib-python/2.7/test/test_winreg.py
--- a/lib-python/2.7/test/test_winreg.py
+++ b/lib-python/2.7/test/test_winreg.py
@@ -1,7 +1,7 @@
# Test the windows specific win32reg module.
# Only win32reg functions not hit here: FlushKey, LoadKey and SaveKey
-import os, sys
+import os, sys, errno
import unittest
from test import test_support
threading = test_support.import_module("threading")
@@ -283,7 +283,13 @@
def test_dynamic_key(self):
# Issue2810, when the value is dynamically generated, these
# throw "WindowsError: More data is available" in 2.6 and 3.1
- EnumValue(HKEY_PERFORMANCE_DATA, 0)
+ try:
+ EnumValue(HKEY_PERFORMANCE_DATA, 0)
+ except OSError as e:
+ if e.errno in (errno.EPERM, errno.EACCES):
+ self.skipTest("access denied to registry key "
+ "(are you running in a non-interactive session?)")
+ raise
QueryValueEx(HKEY_PERFORMANCE_DATA, None)
# Reflection requires XP x64/Vista at a minimum. XP doesn't have this stuff
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -281,7 +281,7 @@
RegrTest('test_isinstance.py', core=True),
RegrTest('test_iter.py', core=True),
RegrTest('test_iterlen.py', skip="undocumented internal API behavior __length_hint__"),
- RegrTest('test_itertools.py', core=True),
+ RegrTest('test_itertools.py', core=True, usemodules="itertools struct"),
RegrTest('test_json.py'),
RegrTest('test_kqueue.py'),
RegrTest('test_largefile.py'),
diff --git a/pypy/annotation/policy.py b/pypy/annotation/policy.py
--- a/pypy/annotation/policy.py
+++ b/pypy/annotation/policy.py
@@ -27,11 +27,6 @@
callback()
del annotator.bookkeeper.pending_specializations[:]
- def _adjust_space_config(self, space):
- # allow to override space options.
- if getattr(self, 'do_imports_immediately', None) is not None:
- space.do_imports_immediately = self.do_imports_immediately
-
class AnnotatorPolicy(BasicAnnotatorPolicy):
"""
Possibly subclass and pass an instance to the annotator to control special casing during annotation
@@ -67,7 +62,7 @@
def specialize_with_parms(funcdesc, args_s):
return specializer(funcdesc, args_s, *parms)
return specialize_with_parms
-
+
# common specializations
default_specialize = staticmethod(default)
diff --git a/pypy/annotation/unaryop.py b/pypy/annotation/unaryop.py
--- a/pypy/annotation/unaryop.py
+++ b/pypy/annotation/unaryop.py
@@ -530,7 +530,7 @@
if not s_enc.is_constant():
raise TypeError("Non-constant encoding not supported")
enc = s_enc.const
- if enc not in ('ascii', 'latin-1'):
+ if enc not in ('ascii', 'latin-1', 'utf-8'):
raise TypeError("Encoding %s not supported for unicode" % (enc,))
return SomeString()
method_encode.can_only_throw = [UnicodeEncodeError]
@@ -553,7 +553,7 @@
if not s_enc.is_constant():
raise TypeError("Non-constant encoding not supported")
enc = s_enc.const
- if enc not in ('ascii', 'latin-1'):
+ if enc not in ('ascii', 'latin-1', 'utf-8'):
raise TypeError("Encoding %s not supported for strings" % (enc,))
return SomeUnicodeString()
method_decode.can_only_throw = [UnicodeDecodeError]
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -24,6 +24,7 @@
'maemo',
'host',
'distutils',
+ 'arm',
]
translation_optiondescription = OptionDescription(
@@ -117,7 +118,7 @@
("translation.gcrootfinder", DEFL_ROOTFINDER_WITHJIT),
("translation.list_comprehension_operations", True)]),
ChoiceOption("jit_backend", "choose the backend for the JIT",
- ["auto", "x86", "x86-without-sse2", "llvm"],
+ ["auto", "x86", "x86-without-sse2", "llvm", 'arm'],
default="auto", cmdline="--jit-backend"),
ChoiceOption("jit_profiler", "integrate profiler support into the JIT",
["off", "oprofile"],
@@ -406,7 +407,7 @@
set_platform(config.translation.platform, config.translation.cc)
def get_platform(config):
- from pypy.translator.platform import pick_platform
+ from pypy.translator.platform import pick_platform
opt = config.translation.platform
cc = config.translation.cc
return pick_platform(opt, cc)
diff --git a/pypy/conftest.py b/pypy/conftest.py
--- a/pypy/conftest.py
+++ b/pypy/conftest.py
@@ -553,6 +553,7 @@
def _spawn(self, *args, **kwds):
import pexpect
+ kwds.setdefault('timeout', 600)
child = pexpect.spawn(*args, **kwds)
child.logfile = sys.stdout
return child
diff --git a/pypy/doc/arm.rst b/pypy/doc/arm.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/arm.rst
@@ -0,0 +1,150 @@
+=========================
+Cross-translating for ARM
+=========================
+
+
+Here we describe the setup required and the steps needed to follow to translate
+an interpreter using the RPython translator to target ARM using a cross
+compilation toolchain.
+
+To translate an RPython program for ARM we can either
+translate directly on an ARM device following the normal translation steps. Unfortunately this is not really feasible on most ARM machines. The alternative is to cross-translate using a cross-compilation toolchain.
+
+To cross-translate we run the translation on a more powerful (usually
+x86) machine and generate a binary for ARM using a cross-compiler to compile
+the generated C code. There are several constraints when doing this. In
+particular we currently only support Linux as translation host and target
+platforms (tested on Ubuntu). Also we need a 32-bit environment to run the
+translation. This can be done either on a 32bit host or in 32bit chroot.
+
+
+Requirements
+------------
+
+The tools required to cross translate from a Linux based host to an ARM based Linux target are:
+
+- A checkout of PyPy's arm-backend-2 branch.
+- The GCC ARM cross compiler (on Ubuntu it is the ``gcc-arm-linux-gnueabi package``) but other toolchains should also work.
+- Scratchbox 2, a cross-compilation engine (``scratchbox2`` Ubuntu package).
+- A 32-bit PyPy or Python.
+- And the following (or corresponding) packages need to be installed to create an ARM based chroot:
+
+ * ``debootstrap``
+ * ``schroot``
+ * ``binfmt-support``
+ * ``qemu-system``
+ * ``qemu-user-static``
+
+
+Creating a Qemu based ARM chroot
+--------------------------------
+
+First we will need to create a rootfs containing the packages and dependencies
+required in order to translate PyPy or other interpreters. We are going to
+assume, that the files will be placed in ``/srv/chroot/precise_arm``.
+
+Create the rootfs by calling:
+
+::
+
+ mkdir -p /srv/chroot/precise_arm
+ qemu-debootstrap --variant=buildd --arch=armel precise /srv/chroot/precise_arm/ http://ports.ubuntu.com/ubuntu-ports/
+
+Next, copy the qemu-arm-static binary to the rootfs.
+
+::
+
+ cp /usr/bin/qemu-arm-static /srv/chroot/precise_arm/usr/bin/qemu-arm-static
+
+For easier configuration and management we will create a schroot pointing to
+the rootfs. We need to add a configuration block (like the one below) to the
+schroot configuration file in /etc/schroot/schroot.conf.
+
+
+::
+
+ [precise_arm]
+ directory=/srv/chroot/precise_arm
+ users=USERNAME
+ root-users=USERNAME
+ groups=users
+ aliases=default
+ type=directory
+
+
+To verify that everything is working in the chroot, running ``schroot -c
+precise_arm`` should start a shell running in the schroot environment using
+qemu-arm to execute the ARM binaries. Running ``uname -m`` in the chroot should
+yeild a result like ``armv7l``. Showing that we are emulating an ARM system.
+
+Start the schroot as the user root in order to configure the apt sources and
+to install the following packages:
+
+
+::
+
+ schroot -c precise_arm -u root
+ echo "deb http://ports.ubuntu.com/ubuntu-ports/ precise main universe restricted" > /etc/apt/sources.list
+ apt-get update
+ apt-get install libffi-dev libgc-dev python-dev build-essential libncurses5-dev libbz2-dev
+
+
+Now all dependencies should be in place and we can exit the schroot environment.
+
+
+Configuring scratchbox2
+-----------------------
+
+To configure the scratchbox we need to cd into the root directory of the rootfs
+we created before. From there we can call the sb2 configuration tools which
+will take the current directory as the base directory for the scratchbox2
+environment.
+
+::
+
+ cd /srv/chroot/precise_arm
+ sb2-init -c `which qemu-arm` ARM `which arm-linux-gnueabi-gcc`
+
+This will create a scratchbox2 based environment called ARM that maps calls to
+gcc done within the scratchbox to the arm-linux-gnueabi-gcc outside the
+scratchbox. Now we should have a working cross compilation toolchain in place
+and can start cross-translating programs for ARM.
+
+Translation
+-----------
+
+Having performed all the preliminary steps we should now be able to cross
+translate a program for ARM. You can use this_ minimal
+target to test your setup before applying it to a larger project.
+
+Before starting the translator we need to set two environment variables, so the
+translator knows how to use the scratchbox environment. We need to set the
+**SB2** environment variable to point to the rootfs and the **SB2OPT** should
+contain the command line options for the sb2 command. If our rootfs is in the
+folder /srv/chroot/precise_arm and the scratchbox environment is called "ARM",
+the variables would be defined as follows.
+
+
+::
+
+ export SB2=/srv/chroot/precise_arm
+ export SB2OPT='-t ARM'
+
+Once this is set, you can call the translator
+
+::
+
+ pypy ~/path_to_pypy_checkout/pypy/translator/goal/translate.py -O1 --platform=arm target.py
+
+If everything worked correctly this should yield an ARM binary. Running this binary in the ARM chroot or on an ARM device should produce the output ``"Hello World"``.
+
+.. _`this`:
+
+::
+
+ def main(args):
+ print "Hello World"
+ return 0
+
+ def target(*args):
+ return main, None
\ No newline at end of file
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -35,3 +35,7 @@
.. branch: better-enforceargs
.. branch: rpython-unicode-formatting
.. branch: jit-opaque-licm
+.. branch: rpython-utf8
+Support for utf-8 encoding in RPython
+.. branch: arm-backend-2
+Support ARM in the JIT.
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -21,28 +21,22 @@
def as_constant_truth(self, space):
"""Return the truth of this node if known."""
- raise AssertionError("only for expressions")
-
- def as_constant(self):
- """Return the value of this node as a wrapped constant if possible."""
- raise AssertionError("only for expressions")
-
- def accept_jump_if(self, gen, condition, target):
- raise AssertionError("only for expressions")
-
-
-class __extend__(ast.expr):
-
- def as_constant_truth(self, space):
const = self.as_constant()
if const is None:
return CONST_NOT_CONST
return int(space.is_true(const))
def as_constant(self):
+ """Return the value of this node as a wrapped constant if possible."""
return None
def accept_jump_if(self, gen, condition, target):
+ raise AssertionError("only for expressions")
+
+
+class __extend__(ast.expr):
+
+ def accept_jump_if(self, gen, condition, target):
self.walkabout(gen)
if condition:
gen.emit_jump(ops.POP_JUMP_IF_TRUE, target, True)
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -19,6 +19,10 @@
from pypy.rlib.objectmodel import compute_hash
from pypy.tool.stdlib_opcode import opcodedesc, HAVE_ARGUMENT
+
+class BytecodeCorruption(Exception):
+ """Detected bytecode corruption. Never caught; it's an error."""
+
# helper
def unpack_str_tuple(space,w_str_tuple):
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -8,7 +8,7 @@
from pypy.interpreter.error import OperationError, operationerrfmt
from pypy.interpreter.baseobjspace import Wrappable
from pypy.interpreter import gateway, function, eval, pyframe, pytraceback
-from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.pycode import PyCode, BytecodeCorruption
from pypy.tool.sourcetools import func_with_new_name
from pypy.rlib.objectmodel import we_are_translated
from pypy.rlib import jit, rstackovf
@@ -1172,9 +1172,6 @@
def __init__(self, operr):
self.operr = operr
-class BytecodeCorruption(Exception):
- """Detected bytecode corruption. Never caught; it's an error."""
-
### Frame Blocks ###
diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -851,7 +851,7 @@
('a = 14%4', '(2)'), # binary modulo
('a = 2+3', '(5)'), # binary add
('a = 13-4', '(9)'), # binary subtract
- # ('a = (12,13)[1]', '(13)'), # binary subscr - pointless optimization
+ ('a = (12,13)[1]', '(13)'), # binary subscr
('a = 13 << 2', '(52)'), # binary lshift
('a = 13 >> 2', '(3)'), # binary rshift
('a = 13 & 7', '(5)'), # binary and
@@ -872,6 +872,10 @@
asm = dis_single('a="x"*1000')
assert '(1000)' in asm
+ def test_folding_of_binops_on_constants_crash(self):
+ compile('()[...]', '', 'eval')
+ # assert did not crash
+
def test_dis_stopcode(self):
source = """def _f(a):
print a
diff --git a/pypy/jit/backend/arm/__init__.py b/pypy/jit/backend/arm/__init__.py
new file mode 100644
diff --git a/pypy/jit/backend/arm/arch.py b/pypy/jit/backend/arm/arch.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/arch.py
@@ -0,0 +1,67 @@
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.rarithmetic import r_uint
+
+
+FUNC_ALIGN = 8
+WORD = 4
+DOUBLE_WORD = 8
+
+# the number of registers that we need to save around malloc calls
+N_REGISTERS_SAVED_BY_MALLOC = 9
+# the offset from the FP where the list of the registers mentioned above starts
+MY_COPY_OF_REGS = WORD
+# The Address in the PC points two words befind the current instruction
+PC_OFFSET = 8
+FORCE_INDEX_OFS = 0
+
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+eci = ExternalCompilationInfo(post_include_bits=["""
+static int pypy__arm_int_div(int a, int b) {
+ return a/b;
+}
+static uint pypy__arm_uint_div(uint a, uint b) {
+ return a/b;
+}
+static int pypy__arm_int_mod(uint a, uint b) {
+ return a % b;
+}
+"""])
+
+
+def arm_int_div_emulator(a, b):
+ return int(a / float(b))
+arm_int_div_sign = lltype.Ptr(
+ lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed))
+arm_int_div = rffi.llexternal(
+ "pypy__arm_int_div", [lltype.Signed, lltype.Signed], lltype.Signed,
+ _callable=arm_int_div_emulator,
+ compilation_info=eci,
+ _nowrapper=True, elidable_function=True)
+
+
+def arm_uint_div_emulator(a, b):
+ return r_uint(a) / r_uint(b)
+arm_uint_div_sign = lltype.Ptr(
+ lltype.FuncType([lltype.Unsigned, lltype.Unsigned], lltype.Unsigned))
+arm_uint_div = rffi.llexternal(
+ "pypy__arm_uint_div", [lltype.Unsigned, lltype.Unsigned], lltype.Unsigned,
+ _callable=arm_uint_div_emulator,
+ compilation_info=eci,
+ _nowrapper=True, elidable_function=True)
+
+
+def arm_int_mod_emulator(a, b):
+ sign = 1
+ if a < 0:
+ a = -1 * a
+ sign = -1
+ if b < 0:
+ b = -1 * b
+ res = a % b
+ return sign * res
+arm_int_mod_sign = arm_int_div_sign
+arm_int_mod = rffi.llexternal(
+ "pypy__arm_int_mod", [lltype.Signed, lltype.Signed], lltype.Signed,
+ _callable=arm_int_mod_emulator,
+ compilation_info=eci,
+ _nowrapper=True, elidable_function=True)
diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/assembler.py
@@ -0,0 +1,1358 @@
+from __future__ import with_statement
+import os
+from pypy.jit.backend.arm.helper.assembler import saved_registers
+from pypy.jit.backend.arm import conditions as c
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm.arch import WORD, DOUBLE_WORD, FUNC_ALIGN, \
+ N_REGISTERS_SAVED_BY_MALLOC
+from pypy.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
+from pypy.jit.backend.arm.locations import get_fp_offset
+from pypy.jit.backend.arm.regalloc import (Regalloc, ARMFrameManager,
+ ARMv7RegisterManager, check_imm_arg,
+ operations as regalloc_operations,
+ operations_with_guard as regalloc_operations_with_guard)
+from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
+from pypy.jit.backend.model import CompiledLoopToken
+from pypy.jit.codewriter import longlong
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
+from pypy.jit.metainterp.history import BoxInt, ConstInt
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.rlib import rgc
+from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.rpython.annlowlevel import llhelper
+from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.rpython.lltypesystem.lloperation import llop
+from pypy.jit.backend.arm.opassembler import ResOpAssembler
+from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
+ have_debug_prints)
+from pypy.rlib.jit import AsmInfo
+from pypy.rlib.objectmodel import compute_unique_id
+
+# XXX Move to llsupport
+from pypy.jit.backend.x86.support import values_array, memcpy_fn
+
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+ ('type', lltype.Char), # 'b'ridge, 'l'abel or
+ # 'e'ntry point
+ ('number', lltype.Signed))
+
+
+class AssemblerARM(ResOpAssembler):
+
+ STACK_FIXED_AREA = -1
+
+ debug = True
+
+ def __init__(self, cpu, failargs_limit=1000):
+ self.cpu = cpu
+ self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
+ self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
+ failargs_limit)
+ self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
+ self.fail_boxes_count = 0
+ self.fail_force_index = 0
+ self.setup_failure_recovery()
+ self.mc = None
+ self.memcpy_addr = 0
+ self.pending_guards = None
+ self._exit_code_addr = 0
+ self.current_clt = None
+ self.malloc_slowpath = 0
+ self.wb_slowpath = [0, 0, 0, 0]
+ self._regalloc = None
+ self.datablockwrapper = None
+ self.propagate_exception_path = 0
+ self.stack_check_slowpath = 0
+ self._compute_stack_size()
+ self._debug = False
+ self.loop_run_counters = []
+ self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
+
+ def set_debug(self, v):
+ r = self._debug
+ self._debug = v
+ return r
+
+ def _compute_stack_size(self):
+ self.STACK_FIXED_AREA = len(r.callee_saved_registers) * WORD
+ self.STACK_FIXED_AREA += WORD # FORCE_TOKEN
+ self.STACK_FIXED_AREA += N_REGISTERS_SAVED_BY_MALLOC * WORD
+ if self.cpu.supports_floats:
+ self.STACK_FIXED_AREA += (len(r.callee_saved_vfp_registers)
+ * DOUBLE_WORD)
+ if self.STACK_FIXED_AREA % 8 != 0:
+ self.STACK_FIXED_AREA += WORD # Stack alignment
+ assert self.STACK_FIXED_AREA % 8 == 0
+
+ def setup(self, looptoken, operations):
+ self.current_clt = looptoken.compiled_loop_token
+ operations = self.cpu.gc_ll_descr.rewrite_assembler(self.cpu,
+ operations, self.current_clt.allgcrefs)
+ assert self.memcpy_addr != 0, 'setup_once() not called?'
+ self.mc = ARMv7Builder()
+ self.pending_guards = []
+ assert self.datablockwrapper is None
+ allblocks = self.get_asmmemmgr_blocks(looptoken)
+ self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
+ allblocks)
+ self.target_tokens_currently_compiling = {}
+ return operations
+
+ def teardown(self):
+ self.current_clt = None
+ self._regalloc = None
+ self.mc = None
+ self.pending_guards = None
+ assert self.datablockwrapper is None
+
+ def setup_once(self):
+ # Addresses of functions called by new_xxx operations
+ gc_ll_descr = self.cpu.gc_ll_descr
+ gc_ll_descr.initialize()
+ self._build_wb_slowpath(False)
+ self._build_wb_slowpath(True)
+ if self.cpu.supports_floats:
+ self._build_wb_slowpath(False, withfloats=True)
+ self._build_wb_slowpath(True, withfloats=True)
+ self._build_propagate_exception_path()
+ if gc_ll_descr.get_malloc_slowpath_addr is not None:
+ self._build_malloc_slowpath()
+ self._build_stack_check_slowpath()
+ if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack:
+ self._build_release_gil(gc_ll_descr.gcrootmap)
+ self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
+ self._exit_code_addr = self._gen_exit_path()
+ self._leave_jitted_hook_save_exc = \
+ self._gen_leave_jitted_hook_code(True)
+ self._leave_jitted_hook = self._gen_leave_jitted_hook_code(False)
+ if not self._debug:
+ # if self._debug is already set it means that someone called
+ # set_debug by hand before initializing the assembler. Leave it
+ # as it is
+ debug_start('jit-backend-counts')
+ self.set_debug(have_debug_prints())
+ debug_stop('jit-backend-counts')
+
+ def finish_once(self):
+ if self._debug:
+ debug_start('jit-backend-counts')
+ for i in range(len(self.loop_run_counters)):
+ struct = self.loop_run_counters[i]
+ if struct.type == 'l':
+ prefix = 'TargetToken(%d)' % struct.number
+ elif struct.type == 'b':
+ prefix = 'bridge ' + str(struct.number)
+ else:
+ prefix = 'entry ' + str(struct.number)
+ debug_print(prefix + ':' + str(struct.i))
+ debug_stop('jit-backend-counts')
+
+ # XXX: merge with x86
+ def _register_counter(self, tp, number, token):
+ # YYY very minor leak -- we need the counters to stay alive
+ # forever, just because we want to report them at the end
+ # of the process
+ struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
+ track_allocation=False)
+ struct.i = 0
+ struct.type = tp
+ if tp == 'b' or tp == 'e':
+ struct.number = number
+ else:
+ assert token
+ struct.number = compute_unique_id(token)
+ self.loop_run_counters.append(struct)
+ return struct
+
+ def _append_debugging_code(self, operations, tp, number, token):
+ counter = self._register_counter(tp, number, token)
+ c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
+ box = BoxInt()
+ box2 = BoxInt()
+ ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
+ box, descr=self.debug_counter_descr),
+ ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
+ ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
+ None, descr=self.debug_counter_descr)]
+ operations.extend(ops)
+
+ @specialize.argtype(1)
+ def _inject_debugging_code(self, looptoken, operations, tp, number):
+ if self._debug:
+ # before doing anything, let's increase a counter
+ s = 0
+ for op in operations:
+ s += op.getopnum()
+ looptoken._arm_debug_checksum = s
+
+ newoperations = []
+ self._append_debugging_code(newoperations, tp, number,
+ None)
+ for op in operations:
+ newoperations.append(op)
+ if op.getopnum() == rop.LABEL:
+ self._append_debugging_code(newoperations, 'l', number,
+ op.getdescr())
+ operations = newoperations
+ return operations
+
+ @staticmethod
+ def _release_gil_shadowstack():
+ before = rffi.aroundstate.before
+ if before:
+ before()
+
+ @staticmethod
+ def _reacquire_gil_shadowstack():
+ after = rffi.aroundstate.after
+ if after:
+ after()
+
+ _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+
+ def _build_release_gil(self, gcrootmap):
+ assert gcrootmap.is_shadow_stack
+ releasegil_func = llhelper(self._NOARG_FUNC,
+ self._release_gil_shadowstack)
+ reacqgil_func = llhelper(self._NOARG_FUNC,
+ self._reacquire_gil_shadowstack)
+ self.releasegil_addr = rffi.cast(lltype.Signed, releasegil_func)
+ self.reacqgil_addr = rffi.cast(lltype.Signed, reacqgil_func)
+
+ def _gen_leave_jitted_hook_code(self, save_exc):
+ mc = ARMv7Builder()
+ if self.cpu.supports_floats:
+ floats = r.caller_vfp_resp
+ else:
+ floats = []
+ with saved_registers(mc, r.caller_resp + [r.lr], floats):
+ addr = self.cpu.get_on_leave_jitted_int(save_exception=save_exc)
+ mc.BL(addr)
+ assert self._exit_code_addr != 0
+ mc.B(self._exit_code_addr)
+ return mc.materialize(self.cpu.asmmemmgr, [],
+ self.cpu.gc_ll_descr.gcrootmap)
+
+ def _build_propagate_exception_path(self):
+ if self.cpu.propagate_exception_v < 0:
+ return # not supported (for tests, or non-translated)
+ #
+ mc = ARMv7Builder()
+ # call on_leave_jitted_save_exc()
+ if self.cpu.supports_floats:
+ floats = r.caller_vfp_resp
+ else:
+ floats = []
+ with saved_registers(mc, r.caller_resp + [r.lr], floats):
+ addr = self.cpu.get_on_leave_jitted_int(save_exception=True,
+ default_to_memoryerror=True)
+ mc.BL(addr)
+ mc.gen_load_int(r.ip.value, self.cpu.propagate_exception_v)
+ mc.MOV_rr(r.r0.value, r.ip.value)
+ self.gen_func_epilog(mc=mc)
+ self.propagate_exception_path = mc.materialize(self.cpu.asmmemmgr, [])
+
+ def _build_stack_check_slowpath(self):
+ _, _, slowpathaddr = self.cpu.insert_stack_check()
+ if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
+ return # no stack check (for tests, or non-translated)
+ #
+ # make a "function" that is called immediately at the start of
+ # an assembler function. In particular, the stack looks like:
+ #
+ # | retaddr of caller | <-- aligned to a multiple of 16
+ # | saved argument regs |
+ # | my own retaddr | <-- sp
+ # +-----------------------+
+ #
+ mc = ARMv7Builder()
+ # save argument registers and return address
+ mc.PUSH([reg.value for reg in r.argument_regs] + [r.lr.value])
+ # stack is aligned here
+ # Pass current stack pointer as argument to the call
+ mc.MOV_rr(r.r0.value, r.sp.value)
+ #
+ mc.BL(slowpathaddr)
+
+ # check for an exception
+ mc.gen_load_int(r.r0.value, self.cpu.pos_exception())
+ mc.LDR_ri(r.r0.value, r.r0.value)
+ mc.TST_rr(r.r0.value, r.r0.value)
+ # restore registers and return
+ # We check for c.EQ here, meaning all bits zero in this case
+ mc.POP([reg.value for reg in r.argument_regs] + [r.pc.value], cond=c.EQ)
+ # call on_leave_jitted_save_exc()
+ addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
+ mc.BL(addr)
+ #
+ mc.gen_load_int(r.r0.value, self.cpu.propagate_exception_v)
+ #
+ # footer -- note the ADD, which skips the return address of this
+ # function, and will instead return to the caller's caller. Note
+ # also that we completely ignore the saved arguments, because we
+ # are interrupting the function.
+ mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 1) * WORD)
+ mc.POP([r.pc.value])
+ #
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.stack_check_slowpath = rawstart
+
+ def _build_wb_slowpath(self, withcards, withfloats=False):
+ descr = self.cpu.gc_ll_descr.write_barrier_descr
+ if descr is None:
+ return
+ if not withcards:
+ func = descr.get_write_barrier_fn(self.cpu)
+ else:
+ if descr.jit_wb_cards_set == 0:
+ return
+ func = descr.get_write_barrier_from_array_fn(self.cpu)
+ if func == 0:
+ return
+ #
+ # This builds a helper function called from the slow path of
+ # write barriers. It must save all registers, and optionally
+ # all vfp registers. It takes a single argument which is in r0.
+ # It must keep stack alignment accordingly.
+ mc = ARMv7Builder()
+ #
+ if withfloats:
+ floats = r.caller_vfp_resp
+ else:
+ floats = []
+ with saved_registers(mc, r.caller_resp + [r.ip, r.lr], floats):
+ mc.BL(func)
+ #
+ if withcards:
+ # A final TEST8 before the RET, for the caller. Careful to
+ # not follow this instruction with another one that changes
+ # the status of the CPU flags!
+ mc.LDRB_ri(r.ip.value, r.r0.value,
+ imm=descr.jit_wb_if_flag_byteofs)
+ mc.TST_ri(r.ip.value, imm=0x80)
+ #
+ mc.MOV_rr(r.pc.value, r.lr.value)
+ #
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
+ def setup_failure_recovery(self):
+
+ @rgc.no_collect
+ def failure_recovery_func(mem_loc, frame_pointer, stack_pointer):
+ """mem_loc is a structure in memory describing where the values for
+ the failargs are stored. frame loc is the address of the frame
+ pointer for the frame to be decoded frame """
+ vfp_registers = rffi.cast(rffi.LONGLONGP, stack_pointer)
+ registers = rffi.ptradd(vfp_registers, len(r.all_vfp_regs))
+ registers = rffi.cast(rffi.LONGP, registers)
+ return self.decode_registers_and_descr(mem_loc, frame_pointer,
+ registers, vfp_registers)
+
+ self.failure_recovery_func = failure_recovery_func
+
+ recovery_func_sign = lltype.Ptr(lltype.FuncType([lltype.Signed] * 3,
+ lltype.Signed))
+
+ @rgc.no_collect
+ def decode_registers_and_descr(self, mem_loc, frame_pointer,
+ registers, vfp_registers):
+ """Decode locations encoded in memory at mem_loc and write the values
+ to the failboxes. Values for spilled vars and registers are stored on
+ stack at frame_loc """
+ assert frame_pointer & 1 == 0
+ self.fail_force_index = frame_pointer
+ bytecode = rffi.cast(rffi.UCHARP, mem_loc)
+ num = 0
+ value = 0
+ fvalue = 0
+ code_inputarg = False
+ while True:
+ code = rffi.cast(lltype.Signed, bytecode[0])
+ bytecode = rffi.ptradd(bytecode, 1)
+ if code >= self.CODE_FROMSTACK:
+ if code > 0x7F:
+ shift = 7
+ code &= 0x7F
+ while True:
+ nextcode = rffi.cast(lltype.Signed, bytecode[0])
+ bytecode = rffi.ptradd(bytecode, 1)
+ code |= (nextcode & 0x7F) << shift
+ shift += 7
+ if nextcode <= 0x7F:
+ break
+ # load the value from the stack
+ kind = code & 3
+ code = int((code - self.CODE_FROMSTACK) >> 2)
+ if code_inputarg:
+ code = ~code
+ code_inputarg = False
+ if kind == self.DESCR_FLOAT:
+ # we use code + 1 to get the hi word of the double worded float
+ stackloc = frame_pointer - get_fp_offset(int(code) + 1)
+ assert stackloc & 3 == 0
+ fvalue = rffi.cast(rffi.LONGLONGP, stackloc)[0]
+ else:
+ stackloc = frame_pointer - get_fp_offset(int(code))
+ assert stackloc & 1 == 0
+ value = rffi.cast(rffi.LONGP, stackloc)[0]
+ else:
+ # 'code' identifies a register: load its value
+ kind = code & 3
+ if kind == self.DESCR_SPECIAL:
+ if code == self.CODE_HOLE:
+ num += 1
+ continue
+ if code == self.CODE_INPUTARG:
+ code_inputarg = True
+ continue
+ assert code == self.CODE_STOP
+ break
+ code >>= 2
+ if kind == self.DESCR_FLOAT:
+ fvalue = vfp_registers[code]
+ else:
+ value = registers[code]
+ # store the loaded value into fail_boxes_<type>
+ if kind == self.DESCR_FLOAT:
+ tgt = self.fail_boxes_float.get_addr_for_num(num)
+ rffi.cast(rffi.LONGLONGP, tgt)[0] = fvalue
+ else:
+ if kind == self.DESCR_INT:
+ tgt = self.fail_boxes_int.get_addr_for_num(num)
+ elif kind == self.DESCR_REF:
+ assert (value & 3) == 0, "misaligned pointer"
+ tgt = self.fail_boxes_ptr.get_addr_for_num(num)
+ else:
+ assert 0, "bogus kind"
+ rffi.cast(rffi.LONGP, tgt)[0] = value
+ num += 1
+ self.fail_boxes_count = num
+ fail_index = rffi.cast(rffi.INTP, bytecode)[0]
+ fail_index = rffi.cast(lltype.Signed, fail_index)
+ return fail_index
+
+ def decode_inputargs(self, code):
+ descr_to_box_type = [REF, INT, FLOAT]
+ bytecode = rffi.cast(rffi.UCHARP, code)
+ arglocs = []
+ code_inputarg = False
+ while 1:
+ # decode the next instruction from the bytecode
+ code = rffi.cast(lltype.Signed, bytecode[0])
+ bytecode = rffi.ptradd(bytecode, 1)
+ if code >= self.CODE_FROMSTACK:
+ # 'code' identifies a stack location
+ if code > 0x7F:
+ shift = 7
+ code &= 0x7F
+ while True:
+ nextcode = rffi.cast(lltype.Signed, bytecode[0])
+ bytecode = rffi.ptradd(bytecode, 1)
+ code |= (nextcode & 0x7F) << shift
+ shift += 7
+ if nextcode <= 0x7F:
+ break
+ kind = code & 3
+ code = (code - self.CODE_FROMSTACK) >> 2
+ if code_inputarg:
+ code = ~code
+ code_inputarg = False
+ loc = ARMFrameManager.frame_pos(code, descr_to_box_type[kind])
+ elif code == self.CODE_STOP:
+ break
+ elif code == self.CODE_HOLE:
+ continue
+ elif code == self.CODE_INPUTARG:
+ code_inputarg = True
+ continue
+ else:
+ # 'code' identifies a register
+ kind = code & 3
+ code >>= 2
+ if kind == self.DESCR_FLOAT:
+ loc = r.all_vfp_regs[code]
+ else:
+ loc = r.all_regs[code]
+ arglocs.append(loc)
+ return arglocs[:]
+
+ def _build_malloc_slowpath(self):
+ mc = ARMv7Builder()
+ if self.cpu.supports_floats:
+ vfp_regs = r.all_vfp_regs
+ else:
+ vfp_regs = []
+ # We need to push two registers here because we are going to make a
+ # call an therefore the stack needs to be 8-byte aligned
+ mc.PUSH([r.ip.value, r.lr.value])
+ with saved_registers(mc, [], vfp_regs):
+ # At this point we know that the values we need to compute the size
+ # are stored in r0 and r1.
+ mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value)
+ addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+ for reg, ofs in ARMv7RegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+ mc.STR_ri(reg.value, r.fp.value, imm=ofs)
+ mc.BL(addr)
+ for reg, ofs in ARMv7RegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+ mc.LDR_ri(reg.value, r.fp.value, imm=ofs)
+
+ mc.CMP_ri(r.r0.value, 0)
+ mc.B(self.propagate_exception_path, c=c.EQ)
+ nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
+ mc.gen_load_int(r.r1.value, nursery_free_adr)
+ mc.LDR_ri(r.r1.value, r.r1.value)
+ # see above
+ mc.POP([r.ip.value, r.pc.value])
+
+ rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ self.malloc_slowpath = rawstart
+
+ def propagate_memoryerror_if_r0_is_null(self):
+ # see ../x86/assembler.py:propagate_memoryerror_if_eax_is_null
+ self.mc.CMP_ri(r.r0.value, 0)
+ self.mc.B(self.propagate_exception_path, c=c.EQ)
+
+ def _gen_exit_path(self):
+ mc = ARMv7Builder()
+ decode_registers_addr = llhelper(self.recovery_func_sign,
+ self.failure_recovery_func)
+ self._insert_checks(mc)
+ with saved_registers(mc, r.all_regs, r.all_vfp_regs):
+ # move mem block address, to r0 to pass as
+ mc.MOV_rr(r.r0.value, r.lr.value)
+ # pass the current frame pointer as second param
+ mc.MOV_rr(r.r1.value, r.fp.value)
+ # pass the current stack pointer as third param
+ mc.MOV_rr(r.r2.value, r.sp.value)
+ self._insert_checks(mc)
+ mc.BL(rffi.cast(lltype.Signed, decode_registers_addr))
+ mc.MOV_rr(r.ip.value, r.r0.value)
+ mc.MOV_rr(r.r0.value, r.ip.value)
+ self.gen_func_epilog(mc=mc)
+ return mc.materialize(self.cpu.asmmemmgr, [],
+ self.cpu.gc_ll_descr.gcrootmap)
+
+ DESCR_REF = 0x00
+ DESCR_INT = 0x01
+ DESCR_FLOAT = 0x02
+ DESCR_SPECIAL = 0x03
+ CODE_FROMSTACK = 64
+ CODE_STOP = 0 | DESCR_SPECIAL
+ CODE_HOLE = 4 | DESCR_SPECIAL
+ CODE_INPUTARG = 8 | DESCR_SPECIAL
+
+ def gen_descr_encoding(self, descr, failargs, locs):
+ assert self.mc is not None
+ for i in range(len(failargs)):
+ arg = failargs[i]
+ if arg is not None:
+ if arg.type == REF:
+ kind = self.DESCR_REF
+ elif arg.type == INT:
+ kind = self.DESCR_INT
+ elif arg.type == FLOAT:
+ kind = self.DESCR_FLOAT
+ else:
+ raise AssertionError("bogus kind")
+ loc = locs[i]
+ if loc.is_stack():
+ pos = loc.position
+ if pos < 0:
+ self.mc.writechar(chr(self.CODE_INPUTARG))
+ pos = ~pos
+ n = self.CODE_FROMSTACK // 4 + pos
+ else:
+ assert loc.is_reg() or loc.is_vfp_reg()
+ n = loc.value
+ n = kind + 4 * n
+ while n > 0x7F:
+ self.mc.writechar(chr((n & 0x7F) | 0x80))
+ n >>= 7
+ else:
+ n = self.CODE_HOLE
+ self.mc.writechar(chr(n))
+ self.mc.writechar(chr(self.CODE_STOP))
+
+ fdescr = self.cpu.get_fail_descr_number(descr)
+ self.mc.write32(fdescr)
+ self.align()
+
+ # assert that the fail_boxes lists are big enough
+ assert len(failargs) <= self.fail_boxes_int.SIZE
+
+ def _gen_path_to_exit_path(self, descr, args, arglocs,
+ save_exc, fcond=c.AL):
+ assert isinstance(save_exc, bool)
+ self.gen_exit_code(self.mc, save_exc, fcond)
+ self.gen_descr_encoding(descr, args, arglocs[1:])
+
+ def gen_exit_code(self, mc, save_exc, fcond=c.AL):
+ assert isinstance(save_exc, bool)
+ if save_exc:
+ path = self._leave_jitted_hook_save_exc
+ else:
+ path = self._leave_jitted_hook
+ mc.BL(path)
+
+ def align(self):
+ while(self.mc.currpos() % FUNC_ALIGN != 0):
+ self.mc.writechar(chr(0))
+
+ def gen_func_epilog(self, mc=None, cond=c.AL):
+ stack_size = self.STACK_FIXED_AREA
+ stack_size -= len(r.callee_saved_registers) * WORD
+ if self.cpu.supports_floats:
+ stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
+
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if mc is None:
+ mc = self.mc
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ self.gen_footer_shadowstack(gcrootmap, mc)
+ mc.MOV_rr(r.sp.value, r.fp.value, cond=cond)
+ mc.ADD_ri(r.sp.value, r.sp.value, stack_size, cond=cond)
+ if self.cpu.supports_floats:
+ mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
+ cond=cond)
+ mc.POP([reg.value for reg in r.callee_restored_registers], cond=cond)
+
+ def gen_func_prolog(self):
+ stack_size = self.STACK_FIXED_AREA
+ stack_size -= len(r.callee_saved_registers) * WORD
+ if self.cpu.supports_floats:
+ stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
+
+ self.mc.PUSH([reg.value for reg in r.callee_saved_registers])
+ if self.cpu.supports_floats:
+ self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
+ # here we modify the stack pointer to leave room for the 9 registers
+ # that are going to be saved here around malloc calls and one word to
+ # store the force index
+ self.mc.SUB_ri(r.sp.value, r.sp.value, stack_size)
+ self.mc.MOV_rr(r.fp.value, r.sp.value)
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ self.gen_shadowstack_header(gcrootmap)
+
+ def gen_shadowstack_header(self, gcrootmap):
+ # we need to put two words into the shadowstack: the MARKER_FRAME
+ # and the address of the frame (fp, actually)
+ rst = gcrootmap.get_root_stack_top_addr()
+ self.mc.gen_load_int(r.ip.value, rst)
+ self.mc.LDR_ri(r.r4.value, r.ip.value) # LDR r4, [rootstacktop]
+ #
+ MARKER = gcrootmap.MARKER_FRAME
+ self.mc.ADD_ri(r.r5.value, r.r4.value,
+ imm=2 * WORD) # ADD r5, r4 [2*WORD]
+ self.mc.gen_load_int(r.r6.value, MARKER)
+ self.mc.STR_ri(r.r6.value, r.r4.value, WORD) # STR MARKER, r4 [WORD]
+ self.mc.STR_ri(r.fp.value, r.r4.value) # STR fp, r4
+ #
+ self.mc.STR_ri(r.r5.value, r.ip.value) # STR r5 [rootstacktop]
+
+ def gen_footer_shadowstack(self, gcrootmap, mc):
+ rst = gcrootmap.get_root_stack_top_addr()
+ mc.gen_load_int(r.ip.value, rst)
+ mc.LDR_ri(r.r4.value, r.ip.value) # LDR r4, [rootstacktop]
+ mc.SUB_ri(r.r5.value, r.r4.value, imm=2 * WORD) # ADD r5, r4 [2*WORD]
+ mc.STR_ri(r.r5.value, r.ip.value)
+
+ def _dump(self, ops, type='loop'):
+ debug_start('jit-backend-ops')
+ debug_print(type)
+ for op in ops:
+ debug_print(op.repr())
+ debug_stop('jit-backend-ops')
+
+ def _call_header(self):
+ self.align()
+ self.gen_func_prolog()
+
+ def _call_header_with_stack_check(self):
+ if self.stack_check_slowpath == 0:
+ pass # no stack check (e.g. not translated)
+ else:
+ endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
+ self.mc.PUSH([r.lr.value])
+ # load stack end
+ self.mc.gen_load_int(r.ip.value, endaddr) # load ip, [end]
+ self.mc.LDR_ri(r.ip.value, r.ip.value) # LDR ip, ip
+ # load stack length
+ self.mc.gen_load_int(r.lr.value, lengthaddr) # load lr, lengh
+ self.mc.LDR_ri(r.lr.value, r.lr.value) # ldr lr, *lengh
+ # calculate ofs
+ self.mc.SUB_rr(r.ip.value, r.ip.value, r.sp.value) # SUB ip, current
+ # if ofs
+ self.mc.CMP_rr(r.ip.value, r.lr.value) # CMP ip, lr
+ self.mc.BL(self.stack_check_slowpath, c=c.HI) # call if ip > lr
+ #
+ self.mc.POP([r.lr.value])
+ self._call_header()
+
+ # cpu interface
+ def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
+ clt = CompiledLoopToken(self.cpu, looptoken.number)
+ clt.allgcrefs = []
+ looptoken.compiled_loop_token = clt
+ clt._debug_nbargs = len(inputargs)
+
+ if not we_are_translated():
+ # Arguments should be unique
+ assert len(set(inputargs)) == len(inputargs)
+
+ operations = self.setup(looptoken, operations)
+ if log:
+ operations = self._inject_debugging_code(looptoken, operations,
+ 'e', looptoken.number)
+
+ self._call_header_with_stack_check()
+ sp_patch_location = self._prepare_sp_patch_position()
+
+ regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
+ regalloc.prepare_loop(inputargs, operations)
+
+ loop_head = self.mc.get_relative_pos()
+ looptoken._arm_loop_code = loop_head
+ #
+ clt.frame_depth = -1
+ frame_depth = self._assemble(operations, regalloc)
+ clt.frame_depth = frame_depth
+ #
+ size_excluding_failure_stuff = self.mc.get_relative_pos()
+
+ self._patch_sp_offset(sp_patch_location, frame_depth)
+ self.write_pending_failure_recoveries()
+
+ rawstart = self.materialize_loop(looptoken)
+ looptoken._arm_func_addr = rawstart
+
+ self.process_pending_guards(rawstart)
+ self.fixup_target_tokens(rawstart)
+
+ if log and not we_are_translated():
+ self.mc._dump_trace(rawstart,
+ 'loop_%s.asm' % self.cpu.total_compiled_loops)
+
+ ops_offset = self.mc.ops_offset
+ self.teardown()
+
+ debug_start("jit-backend-addr")
+ debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
+ looptoken.number, loopname,
+ rawstart + loop_head,
+ rawstart + size_excluding_failure_stuff,
+ rawstart))
+ debug_stop("jit-backend-addr")
+
+ return AsmInfo(ops_offset, rawstart + loop_head,
+ size_excluding_failure_stuff - loop_head)
+
+ def _assemble(self, operations, regalloc):
+ regalloc.compute_hint_frame_locations(operations)
+ self._walk_operations(operations, regalloc)
+ frame_depth = regalloc.frame_manager.get_frame_depth()
+ jump_target_descr = regalloc.jump_target_descr
+ if jump_target_descr is not None:
+ frame_depth = max(frame_depth,
+ jump_target_descr._arm_clt.frame_depth)
+ return frame_depth
+
+ def assemble_bridge(self, faildescr, inputargs, operations,
+ original_loop_token, log):
+ operations = self.setup(original_loop_token, operations)
+ descr_number = self.cpu.get_fail_descr_number(faildescr)
+ if log:
+ operations = self._inject_debugging_code(faildescr, operations,
+ 'b', descr_number)
+ assert isinstance(faildescr, AbstractFailDescr)
+ code = self._find_failure_recovery_bytecode(faildescr)
+ frame_depth = faildescr._arm_current_frame_depth
+ arglocs = self.decode_inputargs(code)
+ if not we_are_translated():
+ assert len(inputargs) == len(arglocs)
+
+ regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
+ regalloc.prepare_bridge(inputargs, arglocs, operations)
+
+ sp_patch_location = self._prepare_sp_patch_position()
+
+ startpos = self.mc.get_relative_pos()
+
+ frame_depth = self._assemble(operations, regalloc)
+
+ codeendpos = self.mc.get_relative_pos()
+
+ self._patch_sp_offset(sp_patch_location, frame_depth)
+
+ self.write_pending_failure_recoveries()
+
+ rawstart = self.materialize_loop(original_loop_token)
+
+ self.process_pending_guards(rawstart)
+ self.fixup_target_tokens(rawstart)
+
+ self.patch_trace(faildescr, original_loop_token,
+ rawstart, regalloc)
+
+ if not we_are_translated():
+ # for the benefit of tests
+ faildescr._arm_bridge_frame_depth = frame_depth
+ if log:
+ self.mc._dump_trace(rawstart, 'bridge_%d.asm' %
+ self.cpu.total_compiled_bridges)
+ self.current_clt.frame_depth = max(self.current_clt.frame_depth,
+ frame_depth)
+ ops_offset = self.mc.ops_offset
+ self.teardown()
+
+ debug_start("jit-backend-addr")
+ debug_print("bridge out of Guard %d has address %x to %x" %
+ (descr_number, rawstart, rawstart + codeendpos))
+ debug_stop("jit-backend-addr")
+
+ return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+
+ def _find_failure_recovery_bytecode(self, faildescr):
+ guard_stub_addr = faildescr._arm_failure_recovery_block
+ if guard_stub_addr == 0:
+ # This case should be prevented by the logic in compile.py:
+ # look for CNT_BUSY_FLAG, which disables tracing from a guard
+ # when another tracing from the same guard is already in progress.
+ raise BridgeAlreadyCompiled
+ # a guard requires 3 words to encode the jump to the exit code.
+ return guard_stub_addr + 3 * WORD
+
+ def fixup_target_tokens(self, rawstart):
+ for targettoken in self.target_tokens_currently_compiling:
+ targettoken._arm_loop_code += rawstart
+ self.target_tokens_currently_compiling = None
+
+ def target_arglocs(self, loop_token):
+ return loop_token._arm_arglocs
+
+ def materialize_loop(self, looptoken):
+ self.datablockwrapper.done() # finish using cpu.asmmemmgr
+ self.datablockwrapper = None
+ allblocks = self.get_asmmemmgr_blocks(looptoken)
+ return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
+ self.cpu.gc_ll_descr.gcrootmap)
+
+ def write_pending_failure_recoveries(self):
+ for tok in self.pending_guards:
+ descr = tok.descr
+ #generate the exit stub and the encoded representation
+ pos = self.mc.currpos()
+ tok.pos_recovery_stub = pos
+
+ self._gen_path_to_exit_path(descr, tok.failargs,
+ tok.faillocs, save_exc=tok.save_exc)
+ # store info on the descr
+ descr._arm_current_frame_depth = tok.faillocs[0].getint()
+
+ def process_pending_guards(self, block_start):
+ clt = self.current_clt
+ for tok in self.pending_guards:
+ descr = tok.descr
+ assert isinstance(descr, AbstractFailDescr)
+ failure_recovery_pos = block_start + tok.pos_recovery_stub
+ descr._arm_failure_recovery_block = failure_recovery_pos
+ relative_offset = tok.pos_recovery_stub - tok.offset
+ guard_pos = block_start + tok.offset
+ if not tok.is_invalidate:
+ # patch the guard jumpt to the stub
+ # overwrite the generate NOP with a B_offs to the pos of the
+ # stub
+ mc = ARMv7Builder()
+ mc.B_offs(relative_offset, c.get_opposite_of(tok.fcond))
+ mc.copy_to_raw_memory(guard_pos)
+ else:
+ clt.invalidate_positions.append((guard_pos, relative_offset))
+
+ def get_asmmemmgr_blocks(self, looptoken):
+ clt = looptoken.compiled_loop_token
+ if clt.asmmemmgr_blocks is None:
+ clt.asmmemmgr_blocks = []
+ return clt.asmmemmgr_blocks
+
+ def _prepare_sp_patch_position(self):
+ """Generate NOPs as placeholder to patch the instruction(s) to update
+ the sp according to the number of spilled variables"""
+ size = (self.mc.size_of_gen_load_int + WORD)
+ l = self.mc.currpos()
+ for _ in range(size // WORD):
+ self.mc.NOP()
+ return l
+
+ def _patch_sp_offset(self, pos, frame_depth):
+ cb = OverwritingBuilder(self.mc, pos,
+ OverwritingBuilder.size_of_gen_load_int + WORD)
+ n = frame_depth * WORD
+
+ # ensure the sp is 8 byte aligned when patching it
+ if n % 8 != 0:
+ n += WORD
+ assert n % 8 == 0
+
+ self._adjust_sp(n, cb, base_reg=r.fp)
+
+ def _adjust_sp(self, n, cb=None, fcond=c.AL, base_reg=r.sp):
+ if cb is None:
+ cb = self.mc
+ if n < 0:
+ n = -n
+ rev = True
+ else:
+ rev = False
+ if n <= 0xFF and fcond == c.AL:
+ if rev:
+ cb.ADD_ri(r.sp.value, base_reg.value, n)
+ else:
+ cb.SUB_ri(r.sp.value, base_reg.value, n)
+ else:
+ cb.gen_load_int(r.ip.value, n, cond=fcond)
+ if rev:
+ cb.ADD_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
+ else:
+ cb.SUB_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
+
+ def _walk_operations(self, operations, regalloc):
+ fcond = c.AL
+ self._regalloc = regalloc
+ while regalloc.position() < len(operations) - 1:
+ regalloc.next_instruction()
+ i = regalloc.position()
+ op = operations[i]
+ self.mc.mark_op(op)
+ opnum = op.getopnum()
+ if op.has_no_side_effect() and op.result not in regalloc.longevity:
+ regalloc.possibly_free_vars_for_op(op)
+ elif self.can_merge_with_next_guard(op, i, operations):
+ guard = operations[i + 1]
+ assert guard.is_guard()
+ arglocs = regalloc_operations_with_guard[opnum](regalloc, op,
+ guard, fcond)
+ fcond = asm_operations_with_guard[opnum](self, op,
+ guard, arglocs, regalloc, fcond)
+ regalloc.next_instruction()
+ regalloc.possibly_free_vars_for_op(guard)
+ regalloc.possibly_free_vars(guard.getfailargs())
+ elif not we_are_translated() and op.getopnum() == -124:
+ regalloc.prepare_force_spill(op, fcond)
+ else:
+ arglocs = regalloc_operations[opnum](regalloc, op, fcond)
+ if arglocs is not None:
+ fcond = asm_operations[opnum](self, op, arglocs,
+ regalloc, fcond)
+ if op.is_guard():
+ regalloc.possibly_free_vars(op.getfailargs())
+ if op.result:
+ regalloc.possibly_free_var(op.result)
+ regalloc.possibly_free_vars_for_op(op)
+ regalloc.free_temp_vars()
+ regalloc._check_invariants()
+ self.mc.mark_op(None) # end of the loop
+
+ # from ../x86/regalloc.py
+ def can_merge_with_next_guard(self, op, i, operations):
+ if (op.getopnum() == rop.CALL_MAY_FORCE or
+ op.getopnum() == rop.CALL_ASSEMBLER or
+ op.getopnum() == rop.CALL_RELEASE_GIL):
+ assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
+ return True
+ if not op.is_comparison():
+ if op.is_ovf():
+ if (operations[i + 1].getopnum() != rop.GUARD_NO_OVERFLOW and
+ operations[i + 1].getopnum() != rop.GUARD_OVERFLOW):
+ not_implemented("int_xxx_ovf not followed by "
+ "guard_(no)_overflow")
+ return True
+ return False
+ if (operations[i + 1].getopnum() != rop.GUARD_TRUE and
+ operations[i + 1].getopnum() != rop.GUARD_FALSE):
+ return False
+ if operations[i + 1].getarg(0) is not op.result:
+ return False
+ if (self._regalloc.longevity[op.result][1] > i + 1 or
+ op.result in operations[i + 1].getfailargs()):
+ return False
+ return True
+
+ def regalloc_emit_llong(self, op, arglocs, fcond, regalloc):
+ effectinfo = op.getdescr().get_extra_info()
+ oopspecindex = effectinfo.oopspecindex
+ asm_llong_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
+ return fcond
+
+ def regalloc_emit_math(self, op, arglocs, fcond, regalloc):
+ effectinfo = op.getdescr().get_extra_info()
+ oopspecindex = effectinfo.oopspecindex
+ asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
+ return fcond
+
+
+ def _insert_checks(self, mc=None):
+ if not we_are_translated() and self._debug:
+ if mc is None:
+ mc = self.mc
+ mc.CMP_rr(r.fp.value, r.sp.value)
+ mc.MOV_rr(r.pc.value, r.pc.value, cond=c.GE)
+ mc.BKPT()
+
+ def _ensure_result_bit_extension(self, resloc, size, signed):
+ if size == 4:
+ return
+ if size == 1:
+ if not signed: # unsigned char
+ self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
+ else:
+ self.mc.LSL_ri(resloc.value, resloc.value, 24)
+ self.mc.ASR_ri(resloc.value, resloc.value, 24)
+ elif size == 2:
+ if not signed:
+ self.mc.LSL_ri(resloc.value, resloc.value, 16)
+ self.mc.LSR_ri(resloc.value, resloc.value, 16)
+ else:
+ self.mc.LSL_ri(resloc.value, resloc.value, 16)
+ self.mc.ASR_ri(resloc.value, resloc.value, 16)
+
+ def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
+ b = ARMv7Builder()
+ patch_addr = faildescr._arm_failure_recovery_block
+ assert patch_addr != 0
+ b.B(bridge_addr)
+ b.copy_to_raw_memory(patch_addr)
+ faildescr._arm_failure_recovery_block = 0
+
+ # regalloc support
+ def load(self, loc, value):
+ assert (loc.is_reg() and value.is_imm()
+ or loc.is_vfp_reg() and value.is_imm_float())
+ if value.is_imm():
+ self.mc.gen_load_int(loc.value, value.getint())
+ elif value.is_imm_float():
+ self.mc.gen_load_int(r.ip.value, value.getint())
+ self.mc.VLDR(loc.value, r.ip.value)
+
+ def _mov_imm_to_loc(self, prev_loc, loc, cond=c.AL):
+ if not loc.is_reg() and not (loc.is_stack() and loc.type != FLOAT):
+ raise AssertionError("invalid target for move from imm value")
+ if loc.is_reg():
+ new_loc = loc
+ elif loc.is_stack():
+ self.mc.PUSH([r.lr.value], cond=cond)
+ new_loc = r.lr
+ else:
+ raise AssertionError("invalid target for move from imm value")
+ self.mc.gen_load_int(new_loc.value, prev_loc.value, cond=cond)
+ if loc.is_stack():
+ self.regalloc_mov(new_loc, loc)
+ self.mc.POP([r.lr.value], cond=cond)
+
+ def _mov_reg_to_loc(self, prev_loc, loc, cond=c.AL):
+ if loc.is_imm():
+ raise AssertionError("mov reg to imm doesn't make sense")
+ if loc.is_reg():
+ self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
+ elif loc.is_stack() and loc.type != FLOAT:
+ # spill a core register
+ if prev_loc is r.ip:
+ temp = r.lr
+ else:
+ temp = r.ip
+ offset = loc.value
+ if not check_imm_arg(offset, size=0xFFF):
+ self.mc.PUSH([temp.value], cond=cond)
+ self.mc.gen_load_int(temp.value, -offset, cond=cond)
+ self.mc.STR_rr(prev_loc.value, r.fp.value,
+ temp.value, cond=cond)
+ self.mc.POP([temp.value], cond=cond)
+ else:
+ self.mc.STR_ri(prev_loc.value, r.fp.value,
+ imm=-offset, cond=cond)
+ else:
+ assert 0, 'unsupported case'
+
+ def _mov_stack_to_loc(self, prev_loc, loc, cond=c.AL):
+ pushed = False
+ if loc.is_reg():
+ assert prev_loc.type != FLOAT, 'trying to load from an \
+ incompatible location into a core register'
+ assert loc is not r.lr, 'lr is not supported as a target \
+ when moving from the stack'
+ # unspill a core register
+ offset = prev_loc.value
+ if not check_imm_arg(offset, size=0xFFF):
+ self.mc.PUSH([r.lr.value], cond=cond)
+ pushed = True
+ self.mc.gen_load_int(r.lr.value, -offset, cond=cond)
+ self.mc.LDR_rr(loc.value, r.fp.value, r.lr.value, cond=cond)
+ else:
+ self.mc.LDR_ri(loc.value, r.fp.value, imm=-offset, cond=cond)
+ if pushed:
+ self.mc.POP([r.lr.value], cond=cond)
+ elif loc.is_vfp_reg():
+ assert prev_loc.type == FLOAT, 'trying to load from an \
+ incompatible location into a float register'
+ # load spilled value into vfp reg
+ offset = prev_loc.value
+ self.mc.PUSH([r.ip.value], cond=cond)
+ pushed = True
+ if not check_imm_arg(offset):
+ self.mc.gen_load_int(r.ip.value, offset, cond=cond)
+ self.mc.SUB_rr(r.ip.value, r.fp.value, r.ip.value, cond=cond)
+ else:
+ self.mc.SUB_ri(r.ip.value, r.fp.value, offset, cond=cond)
+ self.mc.VLDR(loc.value, r.ip.value, cond=cond)
+ if pushed:
+ self.mc.POP([r.ip.value], cond=cond)
+ else:
+ assert 0, 'unsupported case'
+
+ def _mov_imm_float_to_loc(self, prev_loc, loc, cond=c.AL):
+ if loc.is_vfp_reg():
+ self.mc.PUSH([r.ip.value], cond=cond)
+ self.mc.gen_load_int(r.ip.value, prev_loc.getint(), cond=cond)
+ self.mc.VLDR(loc.value, r.ip.value, cond=cond)
+ self.mc.POP([r.ip.value], cond=cond)
+ elif loc.is_stack():
+ self.regalloc_push(r.vfp_ip)
+ self.regalloc_mov(prev_loc, r.vfp_ip, cond)
+ self.regalloc_mov(r.vfp_ip, loc, cond)
+ self.regalloc_pop(r.vfp_ip)
+ else:
+ assert 0, 'unsupported case'
+
+ def _mov_vfp_reg_to_loc(self, prev_loc, loc, cond=c.AL):
+ if loc.is_vfp_reg():
+ self.mc.VMOV_cc(loc.value, prev_loc.value, cond=cond)
+ elif loc.is_stack():
+ assert loc.type == FLOAT, 'trying to store to an \
+ incompatible location from a float register'
+ # spill vfp register
+ self.mc.PUSH([r.ip.value], cond=cond)
+ offset = loc.value
+ if not check_imm_arg(offset):
+ self.mc.gen_load_int(r.ip.value, offset, cond=cond)
+ self.mc.SUB_rr(r.ip.value, r.fp.value, r.ip.value, cond=cond)
+ else:
+ self.mc.SUB_ri(r.ip.value, r.fp.value, offset, cond=cond)
+ self.mc.VSTR(prev_loc.value, r.ip.value, cond=cond)
+ self.mc.POP([r.ip.value], cond=cond)
+ else:
+ assert 0, 'unsupported case'
+
+ def regalloc_mov(self, prev_loc, loc, cond=c.AL):
+ """Moves a value from a previous location to some other location"""
+ if prev_loc.is_imm():
+ return self._mov_imm_to_loc(prev_loc, loc, cond)
+ elif prev_loc.is_reg():
+ self._mov_reg_to_loc(prev_loc, loc, cond)
+ elif prev_loc.is_stack():
+ self._mov_stack_to_loc(prev_loc, loc, cond)
+ elif prev_loc.is_imm_float():
+ self._mov_imm_float_to_loc(prev_loc, loc, cond)
+ elif prev_loc.is_vfp_reg():
+ self._mov_vfp_reg_to_loc(prev_loc, loc, cond)
+ else:
+ assert 0, 'unsupported case'
+ mov_loc_loc = regalloc_mov
+
+ def mov_from_vfp_loc(self, vfp_loc, reg1, reg2, cond=c.AL):
+ """Moves floating point values either as an immediate, in a vfp
+ register or at a stack location to a pair of core registers"""
+ assert reg1.value + 1 == reg2.value
+ if vfp_loc.is_vfp_reg():
+ self.mc.VMOV_rc(reg1.value, reg2.value, vfp_loc.value, cond=cond)
+ elif vfp_loc.is_imm_float():
+ self.mc.PUSH([r.ip.value], cond=cond)
+ self.mc.gen_load_int(r.ip.value, vfp_loc.getint(), cond=cond)
+ # we need to load one word to loc and one to loc+1 which are
+ # two 32-bit core registers
+ self.mc.LDR_ri(reg1.value, r.ip.value, cond=cond)
+ self.mc.LDR_ri(reg2.value, r.ip.value, imm=WORD, cond=cond)
+ self.mc.POP([r.ip.value], cond=cond)
+ elif vfp_loc.is_stack() and vfp_loc.type == FLOAT:
+ # load spilled vfp value into two core registers
+ offset = vfp_loc.value
+ if not check_imm_arg(offset, size=0xFFF):
+ self.mc.PUSH([r.ip.value], cond=cond)
+ self.mc.gen_load_int(r.ip.value, -offset, cond=cond)
+ self.mc.LDR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
+ self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
+ self.mc.LDR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
+ self.mc.POP([r.ip.value], cond=cond)
+ else:
+ self.mc.LDR_ri(reg1.value, r.fp.value, imm=-offset, cond=cond)
+ self.mc.LDR_ri(reg2.value, r.fp.value,
+ imm=-offset + WORD, cond=cond)
+ else:
+ assert 0, 'unsupported case'
+
+ def mov_to_vfp_loc(self, reg1, reg2, vfp_loc, cond=c.AL):
+ """Moves a floating point value from to consecutive core registers to a
+ vfp location, either a vfp regsiter or a stacklocation"""
+ assert reg1.value + 1 == reg2.value
+ if vfp_loc.is_vfp_reg():
+ self.mc.VMOV_cr(vfp_loc.value, reg1.value, reg2.value, cond=cond)
+ elif vfp_loc.is_stack():
+ # move from two core registers to a float stack location
+ offset = vfp_loc.value
+ if not check_imm_arg(offset, size=0xFFF):
+ self.mc.PUSH([r.ip.value], cond=cond)
+ self.mc.gen_load_int(r.ip.value, -offset, cond=cond)
+ self.mc.STR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
+ self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
+ self.mc.STR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
+ self.mc.POP([r.ip.value], cond=cond)
+ else:
+ self.mc.STR_ri(reg1.value, r.fp.value, imm=-offset, cond=cond)
+ self.mc.STR_ri(reg2.value, r.fp.value,
+ imm=-offset + WORD, cond=cond)
+ else:
+ assert 0, 'unsupported case'
+
+ def regalloc_push(self, loc, cond=c.AL):
+ """Pushes the value stored in loc to the stack
+ Can trash the current value of the IP register when pushing a stack
+ loc"""
+
+ if loc.is_stack():
+ if loc.type != FLOAT:
+ scratch_reg = r.ip
+ else:
+ scratch_reg = r.vfp_ip
+ self.regalloc_mov(loc, scratch_reg, cond)
+ self.regalloc_push(scratch_reg, cond)
+ elif loc.is_reg():
+ self.mc.PUSH([loc.value], cond=cond)
+ elif loc.is_vfp_reg():
+ self.mc.VPUSH([loc.value], cond=cond)
+ elif loc.is_imm():
+ self.regalloc_mov(loc, r.ip)
+ self.mc.PUSH([r.ip.value], cond=cond)
+ elif loc.is_imm_float():
+ self.regalloc_mov(loc, r.vfp_ip)
+ self.mc.VPUSH([r.vfp_ip.value], cond=cond)
+ else:
+ raise AssertionError('Trying to push an invalid location')
+
+ def regalloc_pop(self, loc, cond=c.AL):
+ """Pops the value on top of the stack to loc Can trash the current
+ value of the IP register when popping to a stack loc"""
+ if loc.is_stack():
+ if loc.type != FLOAT:
+ scratch_reg = r.ip
+ else:
+ scratch_reg = r.vfp_ip
+ self.regalloc_pop(scratch_reg)
+ self.regalloc_mov(scratch_reg, loc)
+ elif loc.is_reg():
+ self.mc.POP([loc.value], cond=cond)
+ elif loc.is_vfp_reg():
+ self.mc.VPOP([loc.value], cond=cond)
+ else:
+ raise AssertionError('Trying to pop to an invalid location')
+
+ def leave_jitted_hook(self):
+ ptrs = self.fail_boxes_ptr.ar
+ llop.gc_assume_young_pointers(lltype.Void,
+ llmemory.cast_ptr_to_adr(ptrs))
+
+ def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
+ assert size & (WORD-1) == 0 # must be correctly aligned
+
+ self.mc.gen_load_int(r.r0.value, nursery_free_adr)
+ self.mc.LDR_ri(r.r0.value, r.r0.value)
+
+ if check_imm_arg(size):
+ self.mc.ADD_ri(r.r1.value, r.r0.value, size)
+ else:
+ self.mc.gen_load_int(r.r1.value, size)
+ self.mc.ADD_rr(r.r1.value, r.r0.value, r.r1.value)
+
+ self.mc.gen_load_int(r.ip.value, nursery_top_adr)
+ self.mc.LDR_ri(r.ip.value, r.ip.value)
+
+ self.mc.CMP_rr(r.r1.value, r.ip.value)
+
+ # We load into r0 the address stored at nursery_free_adr We calculate
+ # the new value for nursery_free_adr and store in r1 The we load the
+ # address stored in nursery_top_adr into IP If the value in r1 is
+ # (unsigned) bigger than the one in ip we conditionally call
+ # malloc_slowpath in case we called malloc_slowpath, which returns the
+ # new value of nursery_free_adr in r1 and the adr of the new object in
+ # r0.
+ self.mark_gc_roots(self.write_new_force_index(),
+ use_copy_area=True)
+ self.mc.BL(self.malloc_slowpath, c=c.HI)
+
+ self.mc.gen_load_int(r.ip.value, nursery_free_adr)
+ self.mc.STR_ri(r.r1.value, r.ip.value)
+
+ def mark_gc_roots(self, force_index, use_copy_area=False):
+ if force_index < 0:
+ return # not needed
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap:
+ mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
+ assert gcrootmap.is_shadow_stack
+ gcrootmap.write_callshape(mark, force_index)
+
+ def write_new_force_index(self):
+ # for shadowstack only: get a new, unused force_index number and
+ # write it to FORCE_INDEX_OFS. Used to record the call shape
+ # (i.e. where the GC pointers are in the stack) around a CALL
+ # instruction that doesn't already have a force_index.
+ gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+ if gcrootmap and gcrootmap.is_shadow_stack:
+ clt = self.current_clt
+ force_index = clt.reserve_and_record_some_faildescr_index()
+ self._write_fail_index(force_index)
+ return force_index
+ else:
+ return 0
+
+
+def not_implemented(msg):
+ os.write(2, '[ARM/asm] %s\n' % msg)
+ raise NotImplementedError(msg)
+
+
+def notimplemented_op(self, op, arglocs, regalloc, fcond):
+ print "[ARM/asm] %s not implemented" % op.getopname()
+ raise NotImplementedError(op)
+
+
+def notimplemented_op_with_guard(self, op, guard_op, arglocs, regalloc, fcond):
+ print "[ARM/asm] %s with guard %s not implemented" % \
+ (op.getopname(), guard_op.getopname())
+ raise NotImplementedError(op)
+
+asm_operations = [notimplemented_op] * (rop._LAST + 1)
+asm_operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1)
+asm_llong_operations = {}
+asm_math_operations = {}
+
+for name, value in ResOpAssembler.__dict__.iteritems():
+ if name.startswith('emit_guard_'):
+ opname = name[len('emit_guard_'):]
+ num = getattr(rop, opname.upper())
+ asm_operations_with_guard[num] = value
+ elif name.startswith('emit_op_llong_'):
+ opname = name[len('emit_op_llong_'):]
+ num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
+ asm_llong_operations[num] = value
+ elif name.startswith('emit_op_math_'):
+ opname = name[len('emit_op_math_'):]
+ num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
+ asm_math_operations[num] = value
+ elif name.startswith('emit_op_'):
+ opname = name[len('emit_op_'):]
+ num = getattr(rop, opname.upper())
+ asm_operations[num] = value
+
+
+class BridgeAlreadyCompiled(Exception):
+ pass
diff --git a/pypy/jit/backend/arm/codebuilder.py b/pypy/jit/backend/arm/codebuilder.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/codebuilder.py
@@ -0,0 +1,311 @@
+from pypy.jit.backend.arm import arch
+from pypy.jit.backend.arm import conditions as cond
+from pypy.jit.backend.arm import registers as reg
+from pypy.jit.backend.arm.arch import (WORD, FUNC_ALIGN)
+from pypy.jit.backend.arm.instruction_builder import define_instructions
+from pypy.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.tool.udir import udir
+
+clear_cache = rffi.llexternal(
+ "__clear_cache",
+ [llmemory.Address, llmemory.Address],
+ lltype.Void,
+ _nowrapper=True,
+ sandboxsafe=True)
+
+
+def binary_helper_call(name):
+ function = getattr(arch, 'arm_%s' % name)
+
+ def f(self, c=cond.AL):
+ """Generates a call to a helper function, takes its
+ arguments in r0 and r1, result is placed in r0"""
+ addr = rffi.cast(lltype.Signed, function)
+ self.BL(addr, c)
+ return f
+
+
+class AbstractARMv7Builder(object):
+
+ def __init__(self):
+ pass
+
+ def align(self):
+ while(self.currpos() % FUNC_ALIGN != 0):
+ self.writechar(chr(0))
+
+ def NOP(self):
+ self.MOV_rr(0, 0)
+
+ def PUSH(self, regs, cond=cond.AL):
+ assert reg.sp.value not in regs
+ instr = self._encode_reg_list(cond << 28 | 0x92D << 16, regs)
+ self.write32(instr)
+
+ def VPUSH(self, regs, cond=cond.AL):
+ nregs = len(regs)
+ assert nregs > 0 and nregs <= 16
+ freg = regs[0]
+ D = (freg & 0x10) >> 4
+ Dd = (freg & 0xF)
+ nregs *= 2
+ instr = (cond << 28
+ | 0xD2D << 16
+ | D << 22
+ | Dd << 12
+ | 0xB << 8
+ | nregs)
+ self.write32(instr)
+
+ def VPOP(self, regs, cond=cond.AL):
+ nregs = len(regs)
+ assert nregs > 0 and nregs <= 16
+ freg = regs[0]
+ D = (freg & 0x10) >> 4
+ Dd = (freg & 0xF)
+ nregs *= 2
+ instr = (cond << 28
+ | 0xCBD << 16
+ | D << 22
+ | Dd << 12
+ | 0xB << 8
+ | nregs)
+ self.write32(instr)
+
+ def VMOV_rc(self, rt, rt2, dm, cond=cond.AL):
+ """This instruction copies two words from two ARM core registers into a
+ doubleword extension register, or from a doubleword extension register
+ to two ARM core registers.
+ """
+ op = 1
+ instr = (cond << 28
+ | 0xC << 24
+ | 0x4 << 20
+ | op << 20
+ | (rt2 & 0xF) << 16
+ | (rt & 0xF) << 12
+ | 0xB << 8
+ | 0x1 << 4
+ | (dm & 0xF))
+ self.write32(instr)
+
+ # VMOV<c> <Dm>, <Rt>, <Rt2>
+ def VMOV_cr(self, dm, rt, rt2, cond=cond.AL):
+ """This instruction copies two words from two ARM core registers into a
+ doubleword extension register, or from a doubleword extension register
+ to two ARM core registers.
+ """
+ op = 0
+ instr = (cond << 28
+ | 0xC << 24
+ | 0x4 << 20
+ | op << 20
+ | (rt2 & 0xF) << 16
+ | (rt & 0xF) << 12
+ | 0xB << 8
+ | 0x1 << 4
+ | (dm & 0xF))
+ self.write32(instr)
+
+ def VMOV_cc(self, dd, dm, cond=cond.AL):
+ sz = 1 # for 64-bit mode
+ instr = (cond << 28
+ | 0xEB << 20
+ | (dd & 0xF) << 12
+ | 0x5 << 9
+ | (sz & 0x1) << 8
+ | 0x1 << 6
+ | (dm & 0xF))
+ self.write32(instr)
+
+ def VCVT_float_to_int(self, target, source, cond=cond.AL):
+ opc2 = 0x5
+ sz = 1
+ self._VCVT(target, source, cond, opc2, sz)
+
+ def VCVT_int_to_float(self, target, source, cond=cond.AL):
+ self._VCVT(target, source, cond, 0, 1)
+
+ def _VCVT(self, target, source, cond, opc2, sz):
+ D = 0x0
+ M = 0
+ op = 1
+ instr = (cond << 28
+ | 0xEB8 << 16
+ | D << 22
+ | opc2 << 16
+ | (target & 0xF) << 12
+ | 0x5 << 9
+ | sz << 8
+ | op << 7
+ | 1 << 6
+ | M << 5
+ | (source & 0xF))
+ self.write32(instr)
+
+ def POP(self, regs, cond=cond.AL):
+ instr = self._encode_reg_list(cond << 28 | 0x8BD << 16, regs)
+ self.write32(instr)
+
+ def BKPT(self):
+ """Unconditional breakpoint"""
+ self.write32(cond.AL << 28 | 0x1200070)
+
+ # corresponds to the instruction vmrs APSR_nzcv, fpscr
+ def VMRS(self, cond=cond.AL):
+ self.write32(cond << 28 | 0xEF1FA10)
+
More information about the pypy-commit
mailing list