[pypy-commit] pypy reflex-support: merge default into branch

Sat Sep 29 02:49:20 CEST 2012

Author: Wim Lavrijsen <WLavrijsen at lbl.gov>
Branch: reflex-support
Changeset: r57644:4455780f16f1
Date: 2012-09-05 12:55 -0700
http://bitbucket.org/pypy/pypy/changeset/4455780f16f1/

Log:	merge default into branch

diff too long, truncating to 2000 out of 17500 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -1,5 +1,6 @@
 syntax: glob
 *.py[co]
+*.sw[po]
 *~
 .*.swp
 .idea
diff --git a/lib-python/2.7/test/test_winreg.py b/lib-python/2.7/test/test_winreg.py
--- a/lib-python/2.7/test/test_winreg.py
+++ b/lib-python/2.7/test/test_winreg.py
@@ -1,7 +1,7 @@
 # Test the windows specific win32reg module.
 # Only win32reg functions not hit here: FlushKey, LoadKey and SaveKey
 
-import os, sys
+import os, sys, errno
 import unittest
 from test import test_support
 threading = test_support.import_module("threading")
@@ -283,7 +283,13 @@
     def test_dynamic_key(self):
         # Issue2810, when the value is dynamically generated, these
         # throw "WindowsError: More data is available" in 2.6 and 3.1
-        EnumValue(HKEY_PERFORMANCE_DATA, 0)
+        try:
+            EnumValue(HKEY_PERFORMANCE_DATA, 0)
+        except OSError as e:
+            if e.errno in (errno.EPERM, errno.EACCES):
+                self.skipTest("access denied to registry key "
+                              "(are you running in a non-interactive session?)")
+            raise
         QueryValueEx(HKEY_PERFORMANCE_DATA, None)
 
     # Reflection requires XP x64/Vista at a minimum. XP doesn't have this stuff
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -281,7 +281,7 @@
     RegrTest('test_isinstance.py', core=True),
     RegrTest('test_iter.py', core=True),
     RegrTest('test_iterlen.py', skip="undocumented internal API behavior __length_hint__"),
-    RegrTest('test_itertools.py', core=True),
+    RegrTest('test_itertools.py', core=True, usemodules="itertools struct"),
     RegrTest('test_json.py'),
     RegrTest('test_kqueue.py'),
     RegrTest('test_largefile.py'),
diff --git a/pypy/annotation/policy.py b/pypy/annotation/policy.py
--- a/pypy/annotation/policy.py
+++ b/pypy/annotation/policy.py
@@ -27,11 +27,6 @@
             callback()
         del annotator.bookkeeper.pending_specializations[:]
 
-    def _adjust_space_config(self, space):
-        # allow to override space options.
-        if getattr(self, 'do_imports_immediately', None) is not None:
-            space.do_imports_immediately = self.do_imports_immediately
-
 class AnnotatorPolicy(BasicAnnotatorPolicy):
     """
     Possibly subclass and pass an instance to the annotator to control special casing during annotation
@@ -67,7 +62,7 @@
                 def specialize_with_parms(funcdesc, args_s):
                     return specializer(funcdesc, args_s, *parms)
                 return specialize_with_parms
-        
+
     # common specializations
 
     default_specialize = staticmethod(default)
diff --git a/pypy/annotation/unaryop.py b/pypy/annotation/unaryop.py
--- a/pypy/annotation/unaryop.py
+++ b/pypy/annotation/unaryop.py
@@ -530,7 +530,7 @@
         if not s_enc.is_constant():
             raise TypeError("Non-constant encoding not supported")
         enc = s_enc.const
-        if enc not in ('ascii', 'latin-1'):
+        if enc not in ('ascii', 'latin-1', 'utf-8'):
             raise TypeError("Encoding %s not supported for unicode" % (enc,))
         return SomeString()
     method_encode.can_only_throw = [UnicodeEncodeError]
@@ -553,7 +553,7 @@
         if not s_enc.is_constant():
             raise TypeError("Non-constant encoding not supported")
         enc = s_enc.const
-        if enc not in ('ascii', 'latin-1'):
+        if enc not in ('ascii', 'latin-1', 'utf-8'):
             raise TypeError("Encoding %s not supported for strings" % (enc,))
         return SomeUnicodeString()
     method_decode.can_only_throw = [UnicodeDecodeError]
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -24,6 +24,7 @@
     'maemo',
     'host',
     'distutils',
+    'arm',
 ]
 
 translation_optiondescription = OptionDescription(
@@ -117,7 +118,7 @@
                          ("translation.gcrootfinder", DEFL_ROOTFINDER_WITHJIT),
                          ("translation.list_comprehension_operations", True)]),
     ChoiceOption("jit_backend", "choose the backend for the JIT",
-                 ["auto", "x86", "x86-without-sse2", "llvm"],
+                 ["auto", "x86", "x86-without-sse2", "llvm", 'arm'],
                  default="auto", cmdline="--jit-backend"),
     ChoiceOption("jit_profiler", "integrate profiler support into the JIT",
                  ["off", "oprofile"],
@@ -406,7 +407,7 @@
     set_platform(config.translation.platform, config.translation.cc)
 
 def get_platform(config):
-    from pypy.translator.platform import pick_platform    
+    from pypy.translator.platform import pick_platform
     opt = config.translation.platform
     cc = config.translation.cc
     return pick_platform(opt, cc)
diff --git a/pypy/conftest.py b/pypy/conftest.py
--- a/pypy/conftest.py
+++ b/pypy/conftest.py
@@ -553,6 +553,7 @@
 
     def _spawn(self, *args, **kwds):
         import pexpect
+        kwds.setdefault('timeout', 600)
         child = pexpect.spawn(*args, **kwds)
         child.logfile = sys.stdout
         return child
diff --git a/pypy/doc/arm.rst b/pypy/doc/arm.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/arm.rst
@@ -0,0 +1,150 @@
+=========================
+Cross-translating for ARM
+=========================
+
+
+Here we describe the setup required and the steps needed to follow to translate
+an interpreter using the RPython translator to target ARM using a cross
+compilation toolchain.
+
+To translate an RPython program for ARM we can either
+translate directly on an ARM device following the normal translation steps. Unfortunately this is not really feasible on most ARM machines. The alternative is to cross-translate using a cross-compilation toolchain.
+
+To cross-translate we run the translation on a more powerful (usually
+x86) machine and generate a binary for ARM using a cross-compiler to compile
+the generated C code. There are several constraints when doing this. In
+particular we currently only support Linux as translation host and target
+platforms (tested on Ubuntu). Also we need a 32-bit environment to run the
+translation. This can be done either on a 32bit host or in 32bit chroot.
+
+
+Requirements
+------------
+
+The tools required to cross translate from a Linux based host to an ARM based Linux target are:
+
+- A checkout of PyPy's arm-backend-2 branch.
+- The GCC ARM cross compiler (on Ubuntu it is the ``gcc-arm-linux-gnueabi package``) but other toolchains should also work.
+- Scratchbox 2, a cross-compilation engine (``scratchbox2`` Ubuntu package).
+- A 32-bit PyPy or Python.
+- And the following (or corresponding) packages need to be installed to create an ARM based chroot:
+
+  * ``debootstrap`` 
+  * ``schroot``
+  * ``binfmt-support``
+  * ``qemu-system``
+  * ``qemu-user-static``
+
+
+Creating a Qemu based ARM chroot
+--------------------------------
+
+First we will need to create a rootfs containing the packages and dependencies
+required in order to translate PyPy or other interpreters. We are going to
+assume, that the files will be placed in ``/srv/chroot/precise_arm``.
+
+Create the rootfs by calling:
+
+::
+
+   mkdir -p /srv/chroot/precise_arm
+   qemu-debootstrap --variant=buildd --arch=armel precise /srv/chroot/precise_arm/  http://ports.ubuntu.com/ubuntu-ports/
+
+Next, copy the qemu-arm-static binary to the rootfs.
+
+:: 
+
+  cp /usr/bin/qemu-arm-static /srv/chroot/precise_arm/usr/bin/qemu-arm-static
+
+For easier configuration and management we will create a schroot pointing to
+the rootfs. We need to add a configuration block (like the one below) to the
+schroot configuration file in /etc/schroot/schroot.conf.
+
+
+::
+
+  [precise_arm]
+  directory=/srv/chroot/precise_arm
+  users=USERNAME
+  root-users=USERNAME
+  groups=users
+  aliases=default
+  type=directory
+
+
+To verify that everything is working in the chroot, running ``schroot -c
+precise_arm`` should start a shell running in the schroot environment using
+qemu-arm to execute the ARM binaries. Running ``uname -m`` in the chroot should 
+yeild a result like ``armv7l``. Showing that we are emulating an ARM system.
+
+Start the schroot as the user root in order to configure the apt sources and
+to install the following packages:
+
+
+::
+
+  schroot -c precise_arm -u root
+  echo "deb http://ports.ubuntu.com/ubuntu-ports/ precise main universe restricted" > /etc/apt/sources.list
+  apt-get update
+  apt-get install libffi-dev libgc-dev python-dev build-essential libncurses5-dev libbz2-dev
+
+
+Now all dependencies should be in place and we can exit the schroot environment.
+
+
+Configuring scratchbox2
+-----------------------
+
+To configure the scratchbox we need to cd into the root directory of the rootfs
+we created before. From there we can call the sb2 configuration tools which
+will take the current directory as the base directory for the scratchbox2
+environment.
+
+::
+
+  cd /srv/chroot/precise_arm
+  sb2-init -c `which qemu-arm` ARM `which arm-linux-gnueabi-gcc`
+
+This will create a scratchbox2 based environment called ARM that maps calls to
+gcc done within the scratchbox to the arm-linux-gnueabi-gcc outside the
+scratchbox. Now we should have a working cross compilation toolchain in place
+and can start cross-translating programs for ARM.
+
+Translation
+-----------
+
+Having performed all the preliminary steps we should now be able to cross
+translate a program for ARM.  You can use this_ minimal
+target to test your setup before applying it to a larger project.
+
+Before starting the translator we need to set two environment variables, so the
+translator knows how to use the scratchbox environment. We need to set the
+**SB2** environment variable to point to the rootfs and the **SB2OPT** should
+contain the command line options for the sb2 command. If our rootfs is in the
+folder /srv/chroot/precise_arm and the scratchbox environment is called "ARM",
+the variables would be defined as follows.
+
+
+::
+
+  export SB2=/srv/chroot/precise_arm
+  export SB2OPT='-t ARM'
+
+Once this is set, you can call the translator 
+
+::
+
+  pypy ~/path_to_pypy_checkout/pypy/translator/goal/translate.py -O1 --platform=arm target.py
+
+If everything worked correctly this should yield an ARM binary. Running this binary in the ARM chroot or on an ARM device should produce the output ``"Hello World"``.
+
+.. _`this`:
+
+::
+
+  def main(args):
+      print "Hello World"
+      return 0
+
+  def target(*args):
+      return main, None
\ No newline at end of file
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -35,3 +35,7 @@
 .. branch: better-enforceargs
 .. branch: rpython-unicode-formatting
 .. branch: jit-opaque-licm
+.. branch: rpython-utf8
+Support for utf-8 encoding in RPython
+.. branch: arm-backend-2
+Support ARM in the JIT.
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -21,28 +21,22 @@
 
     def as_constant_truth(self, space):
         """Return the truth of this node if known."""
-        raise AssertionError("only for expressions")
-
-    def as_constant(self):
-        """Return the value of this node as a wrapped constant if possible."""
-        raise AssertionError("only for expressions")
-
-    def accept_jump_if(self, gen, condition, target):
-        raise AssertionError("only for expressions")
-
-
-class __extend__(ast.expr):
-
-    def as_constant_truth(self, space):
         const = self.as_constant()
         if const is None:
             return CONST_NOT_CONST
         return int(space.is_true(const))
 
     def as_constant(self):
+        """Return the value of this node as a wrapped constant if possible."""
         return None
 
     def accept_jump_if(self, gen, condition, target):
+        raise AssertionError("only for expressions")
+
+
+class __extend__(ast.expr):
+
+    def accept_jump_if(self, gen, condition, target):
         self.walkabout(gen)
         if condition:
             gen.emit_jump(ops.POP_JUMP_IF_TRUE, target, True)
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -19,6 +19,10 @@
 from pypy.rlib.objectmodel import compute_hash
 from pypy.tool.stdlib_opcode import opcodedesc, HAVE_ARGUMENT
 
+
+class BytecodeCorruption(Exception):
+    """Detected bytecode corruption.  Never caught; it's an error."""
+
 # helper
 
 def unpack_str_tuple(space,w_str_tuple):
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -8,7 +8,7 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter import gateway, function, eval, pyframe, pytraceback
-from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.pycode import PyCode, BytecodeCorruption
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib import jit, rstackovf
@@ -1172,9 +1172,6 @@
     def __init__(self, operr):
         self.operr = operr
 
-class BytecodeCorruption(Exception):
-    """Detected bytecode corruption.  Never caught; it's an error."""
-
 
 ### Frame Blocks ###
 
diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -851,7 +851,7 @@
             ('a = 14%4', '(2)'),                    # binary modulo
             ('a = 2+3', '(5)'),                     # binary add
             ('a = 13-4', '(9)'),                    # binary subtract
-            # ('a = (12,13)[1]', '(13)'),             # binary subscr - pointless optimization
+            ('a = (12,13)[1]', '(13)'),             # binary subscr
             ('a = 13 << 2', '(52)'),                # binary lshift
             ('a = 13 >> 2', '(3)'),                 # binary rshift
             ('a = 13 & 7', '(5)'),                  # binary and
@@ -872,6 +872,10 @@
         asm = dis_single('a="x"*1000')
         assert '(1000)' in asm
 
+    def test_folding_of_binops_on_constants_crash(self):
+        compile('()[...]', '', 'eval')
+        # assert did not crash
+
     def test_dis_stopcode(self):
         source = """def _f(a):
                 print a
diff --git a/pypy/jit/backend/arm/__init__.py b/pypy/jit/backend/arm/__init__.py
new file mode 100644
diff --git a/pypy/jit/backend/arm/arch.py b/pypy/jit/backend/arm/arch.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/arch.py
@@ -0,0 +1,67 @@
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.rarithmetic import r_uint
+
+
+FUNC_ALIGN = 8
+WORD = 4
+DOUBLE_WORD = 8
+
+# the number of registers that we need to save around malloc calls
+N_REGISTERS_SAVED_BY_MALLOC = 9
+# the offset from the FP where the list of the registers mentioned above starts
+MY_COPY_OF_REGS = WORD
+# The Address in the PC points two words befind the current instruction
+PC_OFFSET = 8
+FORCE_INDEX_OFS = 0
+
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+eci = ExternalCompilationInfo(post_include_bits=["""
+static int pypy__arm_int_div(int a, int b) {
+    return a/b;
+}
+static uint pypy__arm_uint_div(uint a, uint b) {
+    return a/b;
+}
+static int pypy__arm_int_mod(uint a, uint b) {
+    return a % b;
+}
+"""])
+
+
+def arm_int_div_emulator(a, b):
+    return int(a / float(b))
+arm_int_div_sign = lltype.Ptr(
+        lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed))
+arm_int_div = rffi.llexternal(
+    "pypy__arm_int_div", [lltype.Signed, lltype.Signed], lltype.Signed,
+                        _callable=arm_int_div_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
+
+
+def arm_uint_div_emulator(a, b):
+    return r_uint(a) / r_uint(b)
+arm_uint_div_sign = lltype.Ptr(
+        lltype.FuncType([lltype.Unsigned, lltype.Unsigned], lltype.Unsigned))
+arm_uint_div = rffi.llexternal(
+    "pypy__arm_uint_div", [lltype.Unsigned, lltype.Unsigned], lltype.Unsigned,
+                        _callable=arm_uint_div_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
+
+
+def arm_int_mod_emulator(a, b):
+    sign = 1
+    if a < 0:
+        a = -1 * a
+        sign = -1
+    if b < 0:
+        b = -1 * b
+    res = a % b
+    return sign * res
+arm_int_mod_sign = arm_int_div_sign
+arm_int_mod = rffi.llexternal(
+    "pypy__arm_int_mod", [lltype.Signed, lltype.Signed], lltype.Signed,
+                        _callable=arm_int_mod_emulator,
+                        compilation_info=eci,
+                        _nowrapper=True, elidable_function=True)
diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/assembler.py
@@ -0,0 +1,1358 @@
+from __future__ import with_statement
+import os
+from pypy.jit.backend.arm.helper.assembler import saved_registers
+from pypy.jit.backend.arm import conditions as c
+from pypy.jit.backend.arm import registers as r
+from pypy.jit.backend.arm.arch import WORD, DOUBLE_WORD, FUNC_ALIGN, \
+                                    N_REGISTERS_SAVED_BY_MALLOC
+from pypy.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
+from pypy.jit.backend.arm.locations import get_fp_offset
+from pypy.jit.backend.arm.regalloc import (Regalloc, ARMFrameManager,
+                    ARMv7RegisterManager, check_imm_arg,
+                    operations as regalloc_operations,
+                    operations_with_guard as regalloc_operations_with_guard)
+from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
+from pypy.jit.backend.model import CompiledLoopToken
+from pypy.jit.codewriter import longlong
+from pypy.jit.codewriter.effectinfo import EffectInfo
+from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
+from pypy.jit.metainterp.history import BoxInt, ConstInt
+from pypy.jit.metainterp.resoperation import rop, ResOperation
+from pypy.rlib import rgc
+from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.rpython.annlowlevel import llhelper
+from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.rpython.lltypesystem.lloperation import llop
+from pypy.jit.backend.arm.opassembler import ResOpAssembler
+from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
+                             have_debug_prints)
+from pypy.rlib.jit import AsmInfo
+from pypy.rlib.objectmodel import compute_unique_id
+
+# XXX Move to llsupport
+from pypy.jit.backend.x86.support import values_array, memcpy_fn
+
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+                              ('type', lltype.Char),  # 'b'ridge, 'l'abel or
+                                                      # 'e'ntry point
+                              ('number', lltype.Signed))
+
+
+class AssemblerARM(ResOpAssembler):
+
+    STACK_FIXED_AREA = -1
+
+    debug = True
+
+    def __init__(self, cpu, failargs_limit=1000):
+        self.cpu = cpu
+        self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
+        self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
+                                                            failargs_limit)
+        self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
+        self.fail_boxes_count = 0
+        self.fail_force_index = 0
+        self.setup_failure_recovery()
+        self.mc = None
+        self.memcpy_addr = 0
+        self.pending_guards = None
+        self._exit_code_addr = 0
+        self.current_clt = None
+        self.malloc_slowpath = 0
+        self.wb_slowpath = [0, 0, 0, 0]
+        self._regalloc = None
+        self.datablockwrapper = None
+        self.propagate_exception_path = 0
+        self.stack_check_slowpath = 0
+        self._compute_stack_size()
+        self._debug = False
+        self.loop_run_counters = []
+        self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
+
+    def set_debug(self, v):
+        r = self._debug
+        self._debug = v
+        return r
+
+    def _compute_stack_size(self):
+        self.STACK_FIXED_AREA = len(r.callee_saved_registers) * WORD
+        self.STACK_FIXED_AREA += WORD  # FORCE_TOKEN
+        self.STACK_FIXED_AREA += N_REGISTERS_SAVED_BY_MALLOC * WORD
+        if self.cpu.supports_floats:
+            self.STACK_FIXED_AREA += (len(r.callee_saved_vfp_registers)
+                                        * DOUBLE_WORD)
+        if self.STACK_FIXED_AREA % 8 != 0:
+            self.STACK_FIXED_AREA += WORD  # Stack alignment
+        assert self.STACK_FIXED_AREA % 8 == 0
+
+    def setup(self, looptoken, operations):
+        self.current_clt = looptoken.compiled_loop_token
+        operations = self.cpu.gc_ll_descr.rewrite_assembler(self.cpu,
+                        operations, self.current_clt.allgcrefs)
+        assert self.memcpy_addr != 0, 'setup_once() not called?'
+        self.mc = ARMv7Builder()
+        self.pending_guards = []
+        assert self.datablockwrapper is None
+        allblocks = self.get_asmmemmgr_blocks(looptoken)
+        self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
+                                                        allblocks)
+        self.target_tokens_currently_compiling = {}
+        return operations
+
+    def teardown(self):
+        self.current_clt = None
+        self._regalloc = None
+        self.mc = None
+        self.pending_guards = None
+        assert self.datablockwrapper is None
+
+    def setup_once(self):
+        # Addresses of functions called by new_xxx operations
+        gc_ll_descr = self.cpu.gc_ll_descr
+        gc_ll_descr.initialize()
+        self._build_wb_slowpath(False)
+        self._build_wb_slowpath(True)
+        if self.cpu.supports_floats:
+            self._build_wb_slowpath(False, withfloats=True)
+            self._build_wb_slowpath(True, withfloats=True)
+        self._build_propagate_exception_path()
+        if gc_ll_descr.get_malloc_slowpath_addr is not None:
+            self._build_malloc_slowpath()
+        self._build_stack_check_slowpath()
+        if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack:
+            self._build_release_gil(gc_ll_descr.gcrootmap)
+        self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
+        self._exit_code_addr = self._gen_exit_path()
+        self._leave_jitted_hook_save_exc = \
+                                    self._gen_leave_jitted_hook_code(True)
+        self._leave_jitted_hook = self._gen_leave_jitted_hook_code(False)
+        if not self._debug:
+            # if self._debug is already set it means that someone called
+            # set_debug by hand before initializing the assembler. Leave it
+            # as it is
+            debug_start('jit-backend-counts')
+            self.set_debug(have_debug_prints())
+            debug_stop('jit-backend-counts')
+
+    def finish_once(self):
+        if self._debug:
+            debug_start('jit-backend-counts')
+            for i in range(len(self.loop_run_counters)):
+                struct = self.loop_run_counters[i]
+                if struct.type == 'l':
+                    prefix = 'TargetToken(%d)' % struct.number
+                elif struct.type == 'b':
+                    prefix = 'bridge ' + str(struct.number)
+                else:
+                    prefix = 'entry ' + str(struct.number)
+                debug_print(prefix + ':' + str(struct.i))
+            debug_stop('jit-backend-counts')
+
+    # XXX: merge with x86
+    def _register_counter(self, tp, number, token):
+        # YYY very minor leak -- we need the counters to stay alive
+        # forever, just because we want to report them at the end
+        # of the process
+        struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
+                               track_allocation=False)
+        struct.i = 0
+        struct.type = tp
+        if tp == 'b' or tp == 'e':
+            struct.number = number
+        else:
+            assert token
+            struct.number = compute_unique_id(token)
+        self.loop_run_counters.append(struct)
+        return struct
+
+    def _append_debugging_code(self, operations, tp, number, token):
+        counter = self._register_counter(tp, number, token)
+        c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
+        box = BoxInt()
+        box2 = BoxInt()
+        ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
+                            box, descr=self.debug_counter_descr),
+               ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
+               ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
+                            None, descr=self.debug_counter_descr)]
+        operations.extend(ops)
+
+    @specialize.argtype(1)
+    def _inject_debugging_code(self, looptoken, operations, tp, number):
+        if self._debug:
+            # before doing anything, let's increase a counter
+            s = 0
+            for op in operations:
+                s += op.getopnum()
+            looptoken._arm_debug_checksum = s
+
+            newoperations = []
+            self._append_debugging_code(newoperations, tp, number,
+                                        None)
+            for op in operations:
+                newoperations.append(op)
+                if op.getopnum() == rop.LABEL:
+                    self._append_debugging_code(newoperations, 'l', number,
+                                                op.getdescr())
+            operations = newoperations
+        return operations
+
+    @staticmethod
+    def _release_gil_shadowstack():
+        before = rffi.aroundstate.before
+        if before:
+            before()
+
+    @staticmethod
+    def _reacquire_gil_shadowstack():
+        after = rffi.aroundstate.after
+        if after:
+            after()
+
+    _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
+
+    def _build_release_gil(self, gcrootmap):
+        assert gcrootmap.is_shadow_stack
+        releasegil_func = llhelper(self._NOARG_FUNC,
+                                   self._release_gil_shadowstack)
+        reacqgil_func = llhelper(self._NOARG_FUNC,
+                                 self._reacquire_gil_shadowstack)
+        self.releasegil_addr = rffi.cast(lltype.Signed, releasegil_func)
+        self.reacqgil_addr = rffi.cast(lltype.Signed, reacqgil_func)
+
+    def _gen_leave_jitted_hook_code(self, save_exc):
+        mc = ARMv7Builder()
+        if self.cpu.supports_floats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        with saved_registers(mc, r.caller_resp + [r.lr], floats):
+            addr = self.cpu.get_on_leave_jitted_int(save_exception=save_exc)
+            mc.BL(addr)
+        assert self._exit_code_addr != 0
+        mc.B(self._exit_code_addr)
+        return mc.materialize(self.cpu.asmmemmgr, [],
+                               self.cpu.gc_ll_descr.gcrootmap)
+
+    def _build_propagate_exception_path(self):
+        if self.cpu.propagate_exception_v < 0:
+            return      # not supported (for tests, or non-translated)
+        #
+        mc = ARMv7Builder()
+        # call on_leave_jitted_save_exc()
+        if self.cpu.supports_floats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        with saved_registers(mc, r.caller_resp + [r.lr], floats):
+            addr = self.cpu.get_on_leave_jitted_int(save_exception=True,
+                                                default_to_memoryerror=True)
+            mc.BL(addr)
+        mc.gen_load_int(r.ip.value, self.cpu.propagate_exception_v)
+        mc.MOV_rr(r.r0.value, r.ip.value)
+        self.gen_func_epilog(mc=mc)
+        self.propagate_exception_path = mc.materialize(self.cpu.asmmemmgr, [])
+
+    def _build_stack_check_slowpath(self):
+        _, _, slowpathaddr = self.cpu.insert_stack_check()
+        if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
+            return      # no stack check (for tests, or non-translated)
+        #
+        # make a "function" that is called immediately at the start of
+        # an assembler function.  In particular, the stack looks like:
+        #
+        #    |  retaddr of caller    |   <-- aligned to a multiple of 16
+        #    |  saved argument regs  |
+        #    |  my own retaddr       |    <-- sp
+        #    +-----------------------+
+        #
+        mc = ARMv7Builder()
+        # save argument registers and return address
+        mc.PUSH([reg.value for reg in r.argument_regs] + [r.lr.value])
+        # stack is aligned here
+        # Pass current stack pointer as argument to the call
+        mc.MOV_rr(r.r0.value, r.sp.value)
+        #
+        mc.BL(slowpathaddr)
+
+        # check for an exception
+        mc.gen_load_int(r.r0.value, self.cpu.pos_exception())
+        mc.LDR_ri(r.r0.value, r.r0.value)
+        mc.TST_rr(r.r0.value, r.r0.value)
+        # restore registers and return 
+        # We check for c.EQ here, meaning all bits zero in this case
+        mc.POP([reg.value for reg in r.argument_regs] + [r.pc.value], cond=c.EQ)
+        # call on_leave_jitted_save_exc()
+        addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
+        mc.BL(addr)
+        #
+        mc.gen_load_int(r.r0.value, self.cpu.propagate_exception_v)
+        #
+        # footer -- note the ADD, which skips the return address of this
+        # function, and will instead return to the caller's caller.  Note
+        # also that we completely ignore the saved arguments, because we
+        # are interrupting the function.
+        mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 1) * WORD)
+        mc.POP([r.pc.value])
+        #
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.stack_check_slowpath = rawstart
+
+    def _build_wb_slowpath(self, withcards, withfloats=False):
+        descr = self.cpu.gc_ll_descr.write_barrier_descr
+        if descr is None:
+            return
+        if not withcards:
+            func = descr.get_write_barrier_fn(self.cpu)
+        else:
+            if descr.jit_wb_cards_set == 0:
+                return
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            if func == 0:
+                return
+        #
+        # This builds a helper function called from the slow path of
+        # write barriers.  It must save all registers, and optionally
+        # all vfp registers.  It takes a single argument which is in r0.
+        # It must keep stack alignment accordingly.
+        mc = ARMv7Builder()
+        #
+        if withfloats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        with saved_registers(mc, r.caller_resp + [r.ip, r.lr], floats):
+            mc.BL(func)
+        #
+        if withcards:
+            # A final TEST8 before the RET, for the caller.  Careful to
+            # not follow this instruction with another one that changes
+            # the status of the CPU flags!
+            mc.LDRB_ri(r.ip.value, r.r0.value,
+                                    imm=descr.jit_wb_if_flag_byteofs)
+            mc.TST_ri(r.ip.value, imm=0x80)
+        #
+        mc.MOV_rr(r.pc.value, r.lr.value)
+        #
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
+    def setup_failure_recovery(self):
+
+        @rgc.no_collect
+        def failure_recovery_func(mem_loc, frame_pointer, stack_pointer):
+            """mem_loc is a structure in memory describing where the values for
+            the failargs are stored.  frame loc is the address of the frame
+            pointer for the frame to be decoded frame """
+            vfp_registers = rffi.cast(rffi.LONGLONGP, stack_pointer)
+            registers = rffi.ptradd(vfp_registers, len(r.all_vfp_regs))
+            registers = rffi.cast(rffi.LONGP, registers)
+            return self.decode_registers_and_descr(mem_loc, frame_pointer,
+                                                    registers, vfp_registers)
+
+        self.failure_recovery_func = failure_recovery_func
+
+    recovery_func_sign = lltype.Ptr(lltype.FuncType([lltype.Signed] * 3,
+                                                        lltype.Signed))
+
+    @rgc.no_collect
+    def decode_registers_and_descr(self, mem_loc, frame_pointer,
+                                                registers, vfp_registers):
+        """Decode locations encoded in memory at mem_loc and write the values
+        to the failboxes.  Values for spilled vars and registers are stored on
+        stack at frame_loc """
+        assert frame_pointer & 1 == 0
+        self.fail_force_index = frame_pointer
+        bytecode = rffi.cast(rffi.UCHARP, mem_loc)
+        num = 0
+        value = 0
+        fvalue = 0
+        code_inputarg = False
+        while True:
+            code = rffi.cast(lltype.Signed, bytecode[0])
+            bytecode = rffi.ptradd(bytecode, 1)
+            if code >= self.CODE_FROMSTACK:
+                if code > 0x7F:
+                    shift = 7
+                    code &= 0x7F
+                    while True:
+                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
+                        bytecode = rffi.ptradd(bytecode, 1)
+                        code |= (nextcode & 0x7F) << shift
+                        shift += 7
+                        if nextcode <= 0x7F:
+                            break
+                # load the value from the stack
+                kind = code & 3
+                code = int((code - self.CODE_FROMSTACK) >> 2)
+                if code_inputarg:
+                    code = ~code
+                    code_inputarg = False
+                if kind == self.DESCR_FLOAT:
+                    # we use code + 1 to get the hi word of the double worded float
+                    stackloc = frame_pointer - get_fp_offset(int(code) + 1)
+                    assert stackloc & 3 == 0
+                    fvalue = rffi.cast(rffi.LONGLONGP, stackloc)[0]
+                else:
+                    stackloc = frame_pointer - get_fp_offset(int(code))
+                    assert stackloc & 1 == 0
+                    value = rffi.cast(rffi.LONGP, stackloc)[0]
+            else:
+                # 'code' identifies a register: load its value
+                kind = code & 3
+                if kind == self.DESCR_SPECIAL:
+                    if code == self.CODE_HOLE:
+                        num += 1
+                        continue
+                    if code == self.CODE_INPUTARG:
+                        code_inputarg = True
+                        continue
+                    assert code == self.CODE_STOP
+                    break
+                code >>= 2
+                if kind == self.DESCR_FLOAT:
+                    fvalue = vfp_registers[code]
+                else:
+                    value = registers[code]
+            # store the loaded value into fail_boxes_<type>
+            if kind == self.DESCR_FLOAT:
+                tgt = self.fail_boxes_float.get_addr_for_num(num)
+                rffi.cast(rffi.LONGLONGP, tgt)[0] = fvalue
+            else:
+                if kind == self.DESCR_INT:
+                    tgt = self.fail_boxes_int.get_addr_for_num(num)
+                elif kind == self.DESCR_REF:
+                    assert (value & 3) == 0, "misaligned pointer"
+                    tgt = self.fail_boxes_ptr.get_addr_for_num(num)
+                else:
+                    assert 0, "bogus kind"
+                rffi.cast(rffi.LONGP, tgt)[0] = value
+            num += 1
+        self.fail_boxes_count = num
+        fail_index = rffi.cast(rffi.INTP, bytecode)[0]
+        fail_index = rffi.cast(lltype.Signed, fail_index)
+        return fail_index
+
+    def decode_inputargs(self, code):
+        descr_to_box_type = [REF, INT, FLOAT]
+        bytecode = rffi.cast(rffi.UCHARP, code)
+        arglocs = []
+        code_inputarg = False
+        while 1:
+            # decode the next instruction from the bytecode
+            code = rffi.cast(lltype.Signed, bytecode[0])
+            bytecode = rffi.ptradd(bytecode, 1)
+            if code >= self.CODE_FROMSTACK:
+                # 'code' identifies a stack location
+                if code > 0x7F:
+                    shift = 7
+                    code &= 0x7F
+                    while True:
+                        nextcode = rffi.cast(lltype.Signed, bytecode[0])
+                        bytecode = rffi.ptradd(bytecode, 1)
+                        code |= (nextcode & 0x7F) << shift
+                        shift += 7
+                        if nextcode <= 0x7F:
+                            break
+                kind = code & 3
+                code = (code - self.CODE_FROMSTACK) >> 2
+                if code_inputarg:
+                    code = ~code
+                    code_inputarg = False
+                loc = ARMFrameManager.frame_pos(code, descr_to_box_type[kind])
+            elif code == self.CODE_STOP:
+                break
+            elif code == self.CODE_HOLE:
+                continue
+            elif code == self.CODE_INPUTARG:
+                code_inputarg = True
+                continue
+            else:
+                # 'code' identifies a register
+                kind = code & 3
+                code >>= 2
+                if kind == self.DESCR_FLOAT:
+                    loc = r.all_vfp_regs[code]
+                else:
+                    loc = r.all_regs[code]
+            arglocs.append(loc)
+        return arglocs[:]
+
+    def _build_malloc_slowpath(self):
+        mc = ARMv7Builder()
+        if self.cpu.supports_floats:
+            vfp_regs = r.all_vfp_regs
+        else:
+            vfp_regs = []
+        # We need to push two registers here because we are going to make a
+        # call an therefore the stack needs to be 8-byte aligned
+        mc.PUSH([r.ip.value, r.lr.value])
+        with saved_registers(mc, [], vfp_regs):
+            # At this point we know that the values we need to compute the size
+            # are stored in r0 and r1.
+            mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value)
+            addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+            for reg, ofs in ARMv7RegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+                mc.STR_ri(reg.value, r.fp.value, imm=ofs)
+            mc.BL(addr)
+            for reg, ofs in ARMv7RegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+                mc.LDR_ri(reg.value, r.fp.value, imm=ofs)
+
+        mc.CMP_ri(r.r0.value, 0)
+        mc.B(self.propagate_exception_path, c=c.EQ)
+        nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
+        mc.gen_load_int(r.r1.value, nursery_free_adr)
+        mc.LDR_ri(r.r1.value, r.r1.value)
+        # see above
+        mc.POP([r.ip.value, r.pc.value])
+
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.malloc_slowpath = rawstart
+
+    def propagate_memoryerror_if_r0_is_null(self):
+        # see ../x86/assembler.py:propagate_memoryerror_if_eax_is_null
+        self.mc.CMP_ri(r.r0.value, 0)
+        self.mc.B(self.propagate_exception_path, c=c.EQ)
+
+    def _gen_exit_path(self):
+        mc = ARMv7Builder()
+        decode_registers_addr = llhelper(self.recovery_func_sign,
+                                            self.failure_recovery_func)
+        self._insert_checks(mc)
+        with saved_registers(mc, r.all_regs, r.all_vfp_regs):
+            # move mem block address, to r0 to pass as
+            mc.MOV_rr(r.r0.value, r.lr.value)
+            # pass the current frame pointer as second param
+            mc.MOV_rr(r.r1.value, r.fp.value)
+            # pass the current stack pointer as third param
+            mc.MOV_rr(r.r2.value, r.sp.value)
+            self._insert_checks(mc)
+            mc.BL(rffi.cast(lltype.Signed, decode_registers_addr))
+            mc.MOV_rr(r.ip.value, r.r0.value)
+        mc.MOV_rr(r.r0.value, r.ip.value)
+        self.gen_func_epilog(mc=mc)
+        return mc.materialize(self.cpu.asmmemmgr, [],
+                                   self.cpu.gc_ll_descr.gcrootmap)
+
+    DESCR_REF       = 0x00
+    DESCR_INT       = 0x01
+    DESCR_FLOAT     = 0x02
+    DESCR_SPECIAL   = 0x03
+    CODE_FROMSTACK  = 64
+    CODE_STOP       = 0 | DESCR_SPECIAL
+    CODE_HOLE       = 4 | DESCR_SPECIAL
+    CODE_INPUTARG   = 8 | DESCR_SPECIAL
+
+    def gen_descr_encoding(self, descr, failargs, locs):
+        assert self.mc is not None
+        for i in range(len(failargs)):
+            arg = failargs[i]
+            if arg is not None:
+                if arg.type == REF:
+                    kind = self.DESCR_REF
+                elif arg.type == INT:
+                    kind = self.DESCR_INT
+                elif arg.type == FLOAT:
+                    kind = self.DESCR_FLOAT
+                else:
+                    raise AssertionError("bogus kind")
+                loc = locs[i]
+                if loc.is_stack():
+                    pos = loc.position
+                    if pos < 0:
+                        self.mc.writechar(chr(self.CODE_INPUTARG))
+                        pos = ~pos
+                    n = self.CODE_FROMSTACK // 4 + pos
+                else:
+                    assert loc.is_reg() or loc.is_vfp_reg()
+                    n = loc.value
+                n = kind + 4 * n
+                while n > 0x7F:
+                    self.mc.writechar(chr((n & 0x7F) | 0x80))
+                    n >>= 7
+            else:
+                n = self.CODE_HOLE
+            self.mc.writechar(chr(n))
+        self.mc.writechar(chr(self.CODE_STOP))
+
+        fdescr = self.cpu.get_fail_descr_number(descr)
+        self.mc.write32(fdescr)
+        self.align()
+
+        # assert that the fail_boxes lists are big enough
+        assert len(failargs) <= self.fail_boxes_int.SIZE
+
+    def _gen_path_to_exit_path(self, descr, args, arglocs,
+                                            save_exc, fcond=c.AL):
+        assert isinstance(save_exc, bool)
+        self.gen_exit_code(self.mc, save_exc, fcond)
+        self.gen_descr_encoding(descr, args, arglocs[1:])
+
+    def gen_exit_code(self, mc, save_exc, fcond=c.AL):
+        assert isinstance(save_exc, bool)
+        if save_exc:
+            path = self._leave_jitted_hook_save_exc
+        else:
+            path = self._leave_jitted_hook
+        mc.BL(path)
+
+    def align(self):
+        while(self.mc.currpos() % FUNC_ALIGN != 0):
+            self.mc.writechar(chr(0))
+
+    def gen_func_epilog(self, mc=None, cond=c.AL):
+        stack_size = self.STACK_FIXED_AREA
+        stack_size -= len(r.callee_saved_registers) * WORD
+        if self.cpu.supports_floats:
+            stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
+
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if mc is None:
+            mc = self.mc
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            self.gen_footer_shadowstack(gcrootmap, mc)
+        mc.MOV_rr(r.sp.value, r.fp.value, cond=cond)
+        mc.ADD_ri(r.sp.value, r.sp.value, stack_size, cond=cond)
+        if self.cpu.supports_floats:
+            mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
+                                                                    cond=cond)
+        mc.POP([reg.value for reg in r.callee_restored_registers], cond=cond)
+
+    def gen_func_prolog(self):
+        stack_size = self.STACK_FIXED_AREA
+        stack_size -= len(r.callee_saved_registers) * WORD
+        if self.cpu.supports_floats:
+            stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
+
+        self.mc.PUSH([reg.value for reg in r.callee_saved_registers])
+        if self.cpu.supports_floats:
+            self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
+        # here we modify the stack pointer to leave room for the 9 registers
+        # that are going to be saved here around malloc calls and one word to
+        # store the force index
+        self.mc.SUB_ri(r.sp.value, r.sp.value, stack_size)
+        self.mc.MOV_rr(r.fp.value, r.sp.value)
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            self.gen_shadowstack_header(gcrootmap)
+
+    def gen_shadowstack_header(self, gcrootmap):
+        # we need to put two words into the shadowstack: the MARKER_FRAME
+        # and the address of the frame (fp, actually)
+        rst = gcrootmap.get_root_stack_top_addr()
+        self.mc.gen_load_int(r.ip.value, rst)
+        self.mc.LDR_ri(r.r4.value, r.ip.value)  # LDR r4, [rootstacktop]
+        #
+        MARKER = gcrootmap.MARKER_FRAME
+        self.mc.ADD_ri(r.r5.value, r.r4.value,
+                                    imm=2 * WORD)  # ADD r5, r4 [2*WORD]
+        self.mc.gen_load_int(r.r6.value, MARKER)
+        self.mc.STR_ri(r.r6.value, r.r4.value, WORD)  # STR MARKER, r4 [WORD]
+        self.mc.STR_ri(r.fp.value, r.r4.value)  # STR fp, r4
+        #
+        self.mc.STR_ri(r.r5.value, r.ip.value)  # STR r5 [rootstacktop]
+
+    def gen_footer_shadowstack(self, gcrootmap, mc):
+        rst = gcrootmap.get_root_stack_top_addr()
+        mc.gen_load_int(r.ip.value, rst)
+        mc.LDR_ri(r.r4.value, r.ip.value)  # LDR r4, [rootstacktop]
+        mc.SUB_ri(r.r5.value, r.r4.value, imm=2 * WORD)  # ADD r5, r4 [2*WORD]
+        mc.STR_ri(r.r5.value, r.ip.value)
+
+    def _dump(self, ops, type='loop'):
+        debug_start('jit-backend-ops')
+        debug_print(type)
+        for op in ops:
+            debug_print(op.repr())
+        debug_stop('jit-backend-ops')
+
+    def _call_header(self):
+        self.align()
+        self.gen_func_prolog()
+
+    def _call_header_with_stack_check(self):
+        if self.stack_check_slowpath == 0:
+            pass                # no stack check (e.g. not translated)
+        else:
+            endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
+            self.mc.PUSH([r.lr.value])
+            # load stack end
+            self.mc.gen_load_int(r.ip.value, endaddr)          # load ip, [end]
+            self.mc.LDR_ri(r.ip.value, r.ip.value)             # LDR ip, ip
+            # load stack length
+            self.mc.gen_load_int(r.lr.value, lengthaddr)       # load lr, lengh
+            self.mc.LDR_ri(r.lr.value, r.lr.value)             # ldr lr, *lengh
+            # calculate ofs
+            self.mc.SUB_rr(r.ip.value, r.ip.value, r.sp.value) # SUB ip, current
+            # if ofs 
+            self.mc.CMP_rr(r.ip.value, r.lr.value)             # CMP ip, lr
+            self.mc.BL(self.stack_check_slowpath, c=c.HI)      # call if ip > lr
+            #
+            self.mc.POP([r.lr.value])
+        self._call_header()
+
+    # cpu interface
+    def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
+        clt = CompiledLoopToken(self.cpu, looptoken.number)
+        clt.allgcrefs = []
+        looptoken.compiled_loop_token = clt
+        clt._debug_nbargs = len(inputargs)
+
+        if not we_are_translated():
+            # Arguments should be unique
+            assert len(set(inputargs)) == len(inputargs)
+
+        operations = self.setup(looptoken, operations)
+        if log:
+            operations = self._inject_debugging_code(looptoken, operations,
+                                                     'e', looptoken.number)
+
+        self._call_header_with_stack_check()
+        sp_patch_location = self._prepare_sp_patch_position()
+
+        regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
+        regalloc.prepare_loop(inputargs, operations)
+
+        loop_head = self.mc.get_relative_pos()
+        looptoken._arm_loop_code = loop_head
+        #
+        clt.frame_depth = -1
+        frame_depth = self._assemble(operations, regalloc)
+        clt.frame_depth = frame_depth
+        #
+        size_excluding_failure_stuff = self.mc.get_relative_pos()
+
+        self._patch_sp_offset(sp_patch_location, frame_depth)
+        self.write_pending_failure_recoveries()
+
+        rawstart = self.materialize_loop(looptoken)
+        looptoken._arm_func_addr = rawstart
+
+        self.process_pending_guards(rawstart)
+        self.fixup_target_tokens(rawstart)
+
+        if log and not we_are_translated():
+            self.mc._dump_trace(rawstart,
+                    'loop_%s.asm' % self.cpu.total_compiled_loops)
+
+        ops_offset = self.mc.ops_offset
+        self.teardown()
+
+        debug_start("jit-backend-addr")
+        debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
+            looptoken.number, loopname,
+            rawstart + loop_head,
+            rawstart + size_excluding_failure_stuff,
+            rawstart))
+        debug_stop("jit-backend-addr")
+
+        return AsmInfo(ops_offset, rawstart + loop_head,
+                       size_excluding_failure_stuff - loop_head)
+
+    def _assemble(self, operations, regalloc):
+        regalloc.compute_hint_frame_locations(operations)
+        self._walk_operations(operations, regalloc)
+        frame_depth = regalloc.frame_manager.get_frame_depth()
+        jump_target_descr = regalloc.jump_target_descr
+        if jump_target_descr is not None:
+            frame_depth = max(frame_depth,
+                                jump_target_descr._arm_clt.frame_depth)
+        return frame_depth
+
+    def assemble_bridge(self, faildescr, inputargs, operations,
+                                                    original_loop_token, log):
+        operations = self.setup(original_loop_token, operations)
+        descr_number = self.cpu.get_fail_descr_number(faildescr)
+        if log:
+            operations = self._inject_debugging_code(faildescr, operations,
+                                                     'b', descr_number)
+        assert isinstance(faildescr, AbstractFailDescr)
+        code = self._find_failure_recovery_bytecode(faildescr)
+        frame_depth = faildescr._arm_current_frame_depth
+        arglocs = self.decode_inputargs(code)
+        if not we_are_translated():
+            assert len(inputargs) == len(arglocs)
+
+        regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
+        regalloc.prepare_bridge(inputargs, arglocs, operations)
+
+        sp_patch_location = self._prepare_sp_patch_position()
+
+        startpos = self.mc.get_relative_pos()
+
+        frame_depth = self._assemble(operations, regalloc)
+
+        codeendpos = self.mc.get_relative_pos()
+
+        self._patch_sp_offset(sp_patch_location, frame_depth)
+
+        self.write_pending_failure_recoveries()
+
+        rawstart = self.materialize_loop(original_loop_token)
+
+        self.process_pending_guards(rawstart)
+        self.fixup_target_tokens(rawstart)
+
+        self.patch_trace(faildescr, original_loop_token,
+                                    rawstart, regalloc)
+
+        if not we_are_translated():
+            # for the benefit of tests
+            faildescr._arm_bridge_frame_depth = frame_depth
+            if log:
+                self.mc._dump_trace(rawstart, 'bridge_%d.asm' %
+                self.cpu.total_compiled_bridges)
+        self.current_clt.frame_depth = max(self.current_clt.frame_depth,
+                                                                frame_depth)
+        ops_offset = self.mc.ops_offset
+        self.teardown()
+
+        debug_start("jit-backend-addr")
+        debug_print("bridge out of Guard %d has address %x to %x" %
+                    (descr_number, rawstart, rawstart + codeendpos))
+        debug_stop("jit-backend-addr")
+
+        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+
+    def _find_failure_recovery_bytecode(self, faildescr):
+        guard_stub_addr = faildescr._arm_failure_recovery_block
+        if guard_stub_addr == 0:
+            # This case should be prevented by the logic in compile.py:
+            # look for CNT_BUSY_FLAG, which disables tracing from a guard
+            # when another tracing from the same guard is already in progress.
+            raise BridgeAlreadyCompiled
+        # a guard requires 3 words to encode the jump to the exit code.
+        return guard_stub_addr + 3 * WORD
+
+    def fixup_target_tokens(self, rawstart):
+        for targettoken in self.target_tokens_currently_compiling:
+            targettoken._arm_loop_code += rawstart
+        self.target_tokens_currently_compiling = None
+
+    def target_arglocs(self, loop_token):
+        return loop_token._arm_arglocs
+
+    def materialize_loop(self, looptoken):
+        self.datablockwrapper.done()      # finish using cpu.asmmemmgr
+        self.datablockwrapper = None
+        allblocks = self.get_asmmemmgr_blocks(looptoken)
+        return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
+                                   self.cpu.gc_ll_descr.gcrootmap)
+
+    def write_pending_failure_recoveries(self):
+        for tok in self.pending_guards:
+            descr = tok.descr
+            #generate the exit stub and the encoded representation
+            pos = self.mc.currpos()
+            tok.pos_recovery_stub = pos
+
+            self._gen_path_to_exit_path(descr, tok.failargs,
+                                        tok.faillocs, save_exc=tok.save_exc)
+            # store info on the descr
+            descr._arm_current_frame_depth = tok.faillocs[0].getint()
+
+    def process_pending_guards(self, block_start):
+        clt = self.current_clt
+        for tok in self.pending_guards:
+            descr = tok.descr
+            assert isinstance(descr, AbstractFailDescr)
+            failure_recovery_pos = block_start + tok.pos_recovery_stub
+            descr._arm_failure_recovery_block = failure_recovery_pos
+            relative_offset = tok.pos_recovery_stub - tok.offset
+            guard_pos = block_start + tok.offset
+            if not tok.is_invalidate:
+                # patch the guard jumpt to the stub
+                # overwrite the generate NOP with a B_offs to the pos of the
+                # stub
+                mc = ARMv7Builder()
+                mc.B_offs(relative_offset, c.get_opposite_of(tok.fcond))
+                mc.copy_to_raw_memory(guard_pos)
+            else:
+                clt.invalidate_positions.append((guard_pos, relative_offset))
+
+    def get_asmmemmgr_blocks(self, looptoken):
+        clt = looptoken.compiled_loop_token
+        if clt.asmmemmgr_blocks is None:
+            clt.asmmemmgr_blocks = []
+        return clt.asmmemmgr_blocks
+
+    def _prepare_sp_patch_position(self):
+        """Generate NOPs as placeholder to patch the instruction(s) to update
+        the sp according to the number of spilled variables"""
+        size = (self.mc.size_of_gen_load_int + WORD)
+        l = self.mc.currpos()
+        for _ in range(size // WORD):
+            self.mc.NOP()
+        return l
+
+    def _patch_sp_offset(self, pos, frame_depth):
+        cb = OverwritingBuilder(self.mc, pos,
+                                OverwritingBuilder.size_of_gen_load_int + WORD)
+        n = frame_depth * WORD
+
+        # ensure the sp is 8 byte aligned when patching it
+        if n % 8 != 0:
+            n += WORD
+        assert n % 8 == 0
+
+        self._adjust_sp(n, cb, base_reg=r.fp)
+
+    def _adjust_sp(self, n, cb=None, fcond=c.AL, base_reg=r.sp):
+        if cb is None:
+            cb = self.mc
+        if n < 0:
+            n = -n
+            rev = True
+        else:
+            rev = False
+        if n <= 0xFF and fcond == c.AL:
+            if rev:
+                cb.ADD_ri(r.sp.value, base_reg.value, n)
+            else:
+                cb.SUB_ri(r.sp.value, base_reg.value, n)
+        else:
+            cb.gen_load_int(r.ip.value, n, cond=fcond)
+            if rev:
+                cb.ADD_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
+            else:
+                cb.SUB_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
+
+    def _walk_operations(self, operations, regalloc):
+        fcond = c.AL
+        self._regalloc = regalloc
+        while regalloc.position() < len(operations) - 1:
+            regalloc.next_instruction()
+            i = regalloc.position()
+            op = operations[i]
+            self.mc.mark_op(op)
+            opnum = op.getopnum()
+            if op.has_no_side_effect() and op.result not in regalloc.longevity:
+                regalloc.possibly_free_vars_for_op(op)
+            elif self.can_merge_with_next_guard(op, i, operations):
+                guard = operations[i + 1]
+                assert guard.is_guard()
+                arglocs = regalloc_operations_with_guard[opnum](regalloc, op,
+                                        guard, fcond)
+                fcond = asm_operations_with_guard[opnum](self, op,
+                                        guard, arglocs, regalloc, fcond)
+                regalloc.next_instruction()
+                regalloc.possibly_free_vars_for_op(guard)
+                regalloc.possibly_free_vars(guard.getfailargs())
+            elif not we_are_translated() and op.getopnum() == -124:
+                regalloc.prepare_force_spill(op, fcond)
+            else:
+                arglocs = regalloc_operations[opnum](regalloc, op, fcond)
+                if arglocs is not None:
+                    fcond = asm_operations[opnum](self, op, arglocs,
+                                                        regalloc, fcond)
+            if op.is_guard():
+                regalloc.possibly_free_vars(op.getfailargs())
+            if op.result:
+                regalloc.possibly_free_var(op.result)
+            regalloc.possibly_free_vars_for_op(op)
+            regalloc.free_temp_vars()
+            regalloc._check_invariants()
+        self.mc.mark_op(None)  # end of the loop
+
+    # from ../x86/regalloc.py
+    def can_merge_with_next_guard(self, op, i, operations):
+        if (op.getopnum() == rop.CALL_MAY_FORCE or
+            op.getopnum() == rop.CALL_ASSEMBLER or
+            op.getopnum() == rop.CALL_RELEASE_GIL):
+            assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
+            return True
+        if not op.is_comparison():
+            if op.is_ovf():
+                if (operations[i + 1].getopnum() != rop.GUARD_NO_OVERFLOW and
+                    operations[i + 1].getopnum() != rop.GUARD_OVERFLOW):
+                    not_implemented("int_xxx_ovf not followed by "
+                                    "guard_(no)_overflow")
+                return True
+            return False
+        if (operations[i + 1].getopnum() != rop.GUARD_TRUE and
+            operations[i + 1].getopnum() != rop.GUARD_FALSE):
+            return False
+        if operations[i + 1].getarg(0) is not op.result:
+            return False
+        if (self._regalloc.longevity[op.result][1] > i + 1 or
+            op.result in operations[i + 1].getfailargs()):
+            return False
+        return True
+
+    def regalloc_emit_llong(self, op, arglocs, fcond, regalloc):
+        effectinfo = op.getdescr().get_extra_info()
+        oopspecindex = effectinfo.oopspecindex
+        asm_llong_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
+        return fcond 
+
+    def regalloc_emit_math(self, op, arglocs, fcond, regalloc):
+        effectinfo = op.getdescr().get_extra_info()
+        oopspecindex = effectinfo.oopspecindex
+        asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
+        return fcond
+
+
+    def _insert_checks(self, mc=None):
+        if not we_are_translated() and self._debug:
+            if mc is None:
+                mc = self.mc
+            mc.CMP_rr(r.fp.value, r.sp.value)
+            mc.MOV_rr(r.pc.value, r.pc.value, cond=c.GE)
+            mc.BKPT()
+
+    def _ensure_result_bit_extension(self, resloc, size, signed):
+        if size == 4:
+            return
+        if size == 1:
+            if not signed:  # unsigned char
+                self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
+            else:
+                self.mc.LSL_ri(resloc.value, resloc.value, 24)
+                self.mc.ASR_ri(resloc.value, resloc.value, 24)
+        elif size == 2:
+            if not signed:
+                self.mc.LSL_ri(resloc.value, resloc.value, 16)
+                self.mc.LSR_ri(resloc.value, resloc.value, 16)
+            else:
+                self.mc.LSL_ri(resloc.value, resloc.value, 16)
+                self.mc.ASR_ri(resloc.value, resloc.value, 16)
+
+    def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
+        b = ARMv7Builder()
+        patch_addr = faildescr._arm_failure_recovery_block
+        assert patch_addr != 0
+        b.B(bridge_addr)
+        b.copy_to_raw_memory(patch_addr)
+        faildescr._arm_failure_recovery_block = 0
+
+    # regalloc support
+    def load(self, loc, value):
+        assert (loc.is_reg() and value.is_imm()
+                    or loc.is_vfp_reg() and value.is_imm_float())
+        if value.is_imm():
+            self.mc.gen_load_int(loc.value, value.getint())
+        elif value.is_imm_float():
+            self.mc.gen_load_int(r.ip.value, value.getint())
+            self.mc.VLDR(loc.value, r.ip.value)
+
+    def _mov_imm_to_loc(self, prev_loc, loc, cond=c.AL):
+        if not loc.is_reg() and not (loc.is_stack() and loc.type != FLOAT):
+            raise AssertionError("invalid target for move from imm value")
+        if loc.is_reg():
+            new_loc = loc
+        elif loc.is_stack():
+            self.mc.PUSH([r.lr.value], cond=cond)
+            new_loc = r.lr
+        else:
+            raise AssertionError("invalid target for move from imm value")
+        self.mc.gen_load_int(new_loc.value, prev_loc.value, cond=cond)
+        if loc.is_stack():
+            self.regalloc_mov(new_loc, loc)
+            self.mc.POP([r.lr.value], cond=cond)
+
+    def _mov_reg_to_loc(self, prev_loc, loc, cond=c.AL):
+        if loc.is_imm():
+            raise AssertionError("mov reg to imm doesn't make sense")
+        if loc.is_reg():
+            self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
+        elif loc.is_stack() and loc.type != FLOAT:
+            # spill a core register
+            if prev_loc is r.ip:
+                temp = r.lr
+            else:
+                temp = r.ip
+            offset = loc.value
+            if not check_imm_arg(offset, size=0xFFF):
+                self.mc.PUSH([temp.value], cond=cond)
+                self.mc.gen_load_int(temp.value, -offset, cond=cond)
+                self.mc.STR_rr(prev_loc.value, r.fp.value,
+                                            temp.value, cond=cond)
+                self.mc.POP([temp.value], cond=cond)
+            else:
+                self.mc.STR_ri(prev_loc.value, r.fp.value,
+                                            imm=-offset, cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def _mov_stack_to_loc(self, prev_loc, loc, cond=c.AL):
+        pushed = False
+        if loc.is_reg():
+            assert prev_loc.type != FLOAT, 'trying to load from an \
+                incompatible location into a core register'
+            assert loc is not r.lr, 'lr is not supported as a target \
+                when moving from the stack'
+            # unspill a core register
+            offset = prev_loc.value
+            if not check_imm_arg(offset, size=0xFFF):
+                self.mc.PUSH([r.lr.value], cond=cond)
+                pushed = True
+                self.mc.gen_load_int(r.lr.value, -offset, cond=cond)
+                self.mc.LDR_rr(loc.value, r.fp.value, r.lr.value, cond=cond)
+            else:
+                self.mc.LDR_ri(loc.value, r.fp.value, imm=-offset, cond=cond)
+            if pushed:
+                self.mc.POP([r.lr.value], cond=cond)
+        elif loc.is_vfp_reg():
+            assert prev_loc.type == FLOAT, 'trying to load from an \
+                incompatible location into a float register'
+            # load spilled value into vfp reg
+            offset = prev_loc.value
+            self.mc.PUSH([r.ip.value], cond=cond)
+            pushed = True
+            if not check_imm_arg(offset):
+                self.mc.gen_load_int(r.ip.value, offset, cond=cond)
+                self.mc.SUB_rr(r.ip.value, r.fp.value, r.ip.value, cond=cond)
+            else:
+                self.mc.SUB_ri(r.ip.value, r.fp.value, offset, cond=cond)
+            self.mc.VLDR(loc.value, r.ip.value, cond=cond)
+            if pushed:
+                self.mc.POP([r.ip.value], cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def _mov_imm_float_to_loc(self, prev_loc, loc, cond=c.AL):
+        if loc.is_vfp_reg():
+            self.mc.PUSH([r.ip.value], cond=cond)
+            self.mc.gen_load_int(r.ip.value, prev_loc.getint(), cond=cond)
+            self.mc.VLDR(loc.value, r.ip.value, cond=cond)
+            self.mc.POP([r.ip.value], cond=cond)
+        elif loc.is_stack():
+            self.regalloc_push(r.vfp_ip)
+            self.regalloc_mov(prev_loc, r.vfp_ip, cond)
+            self.regalloc_mov(r.vfp_ip, loc, cond)
+            self.regalloc_pop(r.vfp_ip)
+        else:
+            assert 0, 'unsupported case'
+
+    def _mov_vfp_reg_to_loc(self, prev_loc, loc, cond=c.AL):
+        if loc.is_vfp_reg():
+            self.mc.VMOV_cc(loc.value, prev_loc.value, cond=cond)
+        elif loc.is_stack():
+            assert loc.type == FLOAT, 'trying to store to an \
+                incompatible location from a float register'
+            # spill vfp register
+            self.mc.PUSH([r.ip.value], cond=cond)
+            offset = loc.value
+            if not check_imm_arg(offset):
+                self.mc.gen_load_int(r.ip.value, offset, cond=cond)
+                self.mc.SUB_rr(r.ip.value, r.fp.value, r.ip.value, cond=cond)
+            else:
+                self.mc.SUB_ri(r.ip.value, r.fp.value, offset, cond=cond)
+            self.mc.VSTR(prev_loc.value, r.ip.value, cond=cond)
+            self.mc.POP([r.ip.value], cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def regalloc_mov(self, prev_loc, loc, cond=c.AL):
+        """Moves a value from a previous location to some other location"""
+        if prev_loc.is_imm():
+            return self._mov_imm_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_reg():
+            self._mov_reg_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_stack():
+            self._mov_stack_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_imm_float():
+            self._mov_imm_float_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_vfp_reg():
+            self._mov_vfp_reg_to_loc(prev_loc, loc, cond)
+        else:
+            assert 0, 'unsupported case'
+    mov_loc_loc = regalloc_mov
+
+    def mov_from_vfp_loc(self, vfp_loc, reg1, reg2, cond=c.AL):
+        """Moves floating point values either as an immediate, in a vfp
+        register or at a stack location to a pair of core registers"""
+        assert reg1.value + 1 == reg2.value
+        if vfp_loc.is_vfp_reg():
+            self.mc.VMOV_rc(reg1.value, reg2.value, vfp_loc.value, cond=cond)
+        elif vfp_loc.is_imm_float():
+            self.mc.PUSH([r.ip.value], cond=cond)
+            self.mc.gen_load_int(r.ip.value, vfp_loc.getint(), cond=cond)
+            # we need to load one word to loc and one to loc+1 which are
+            # two 32-bit core registers
+            self.mc.LDR_ri(reg1.value, r.ip.value, cond=cond)
+            self.mc.LDR_ri(reg2.value, r.ip.value, imm=WORD, cond=cond)
+            self.mc.POP([r.ip.value], cond=cond)
+        elif vfp_loc.is_stack() and vfp_loc.type == FLOAT:
+            # load spilled vfp value into two core registers
+            offset = vfp_loc.value
+            if not check_imm_arg(offset, size=0xFFF):
+                self.mc.PUSH([r.ip.value], cond=cond)
+                self.mc.gen_load_int(r.ip.value, -offset, cond=cond)
+                self.mc.LDR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
+                self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
+                self.mc.LDR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
+                self.mc.POP([r.ip.value], cond=cond)
+            else:
+                self.mc.LDR_ri(reg1.value, r.fp.value, imm=-offset, cond=cond)
+                self.mc.LDR_ri(reg2.value, r.fp.value,
+                                                imm=-offset + WORD, cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def mov_to_vfp_loc(self, reg1, reg2, vfp_loc, cond=c.AL):
+        """Moves a floating point value from to consecutive core registers to a
+        vfp location, either a vfp regsiter or a stacklocation"""
+        assert reg1.value + 1 == reg2.value
+        if vfp_loc.is_vfp_reg():
+            self.mc.VMOV_cr(vfp_loc.value, reg1.value, reg2.value, cond=cond)
+        elif vfp_loc.is_stack():
+            # move from two core registers to a float stack location
+            offset = vfp_loc.value
+            if not check_imm_arg(offset, size=0xFFF):
+                self.mc.PUSH([r.ip.value], cond=cond)
+                self.mc.gen_load_int(r.ip.value, -offset, cond=cond)
+                self.mc.STR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
+                self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
+                self.mc.STR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
+                self.mc.POP([r.ip.value], cond=cond)
+            else:
+                self.mc.STR_ri(reg1.value, r.fp.value, imm=-offset, cond=cond)
+                self.mc.STR_ri(reg2.value, r.fp.value,
+                                                imm=-offset + WORD, cond=cond)
+        else:
+            assert 0, 'unsupported case'
+
+    def regalloc_push(self, loc, cond=c.AL):
+        """Pushes the value stored in loc to the stack
+        Can trash the current value of the IP register when pushing a stack
+        loc"""
+
+        if loc.is_stack():
+            if loc.type != FLOAT:
+                scratch_reg = r.ip
+            else:
+                scratch_reg = r.vfp_ip
+            self.regalloc_mov(loc, scratch_reg, cond)
+            self.regalloc_push(scratch_reg, cond)
+        elif loc.is_reg():
+            self.mc.PUSH([loc.value], cond=cond)
+        elif loc.is_vfp_reg():
+            self.mc.VPUSH([loc.value], cond=cond)
+        elif loc.is_imm():
+            self.regalloc_mov(loc, r.ip)
+            self.mc.PUSH([r.ip.value], cond=cond)
+        elif loc.is_imm_float():
+            self.regalloc_mov(loc, r.vfp_ip)
+            self.mc.VPUSH([r.vfp_ip.value], cond=cond)
+        else:
+            raise AssertionError('Trying to push an invalid location')
+
+    def regalloc_pop(self, loc, cond=c.AL):
+        """Pops the value on top of the stack to loc Can trash the current
+        value of the IP register when popping to a stack loc"""
+        if loc.is_stack():
+            if loc.type != FLOAT:
+                scratch_reg = r.ip
+            else:
+                scratch_reg = r.vfp_ip
+            self.regalloc_pop(scratch_reg)
+            self.regalloc_mov(scratch_reg, loc)
+        elif loc.is_reg():
+            self.mc.POP([loc.value], cond=cond)
+        elif loc.is_vfp_reg():
+            self.mc.VPOP([loc.value], cond=cond)
+        else:
+            raise AssertionError('Trying to pop to an invalid location')
+
+    def leave_jitted_hook(self):
+        ptrs = self.fail_boxes_ptr.ar
+        llop.gc_assume_young_pointers(lltype.Void,
+                                      llmemory.cast_ptr_to_adr(ptrs))
+
+    def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
+        assert size & (WORD-1) == 0     # must be correctly aligned
+
+        self.mc.gen_load_int(r.r0.value, nursery_free_adr)
+        self.mc.LDR_ri(r.r0.value, r.r0.value)
+
+        if check_imm_arg(size):
+            self.mc.ADD_ri(r.r1.value, r.r0.value, size)
+        else:
+            self.mc.gen_load_int(r.r1.value, size)
+            self.mc.ADD_rr(r.r1.value, r.r0.value, r.r1.value)
+
+        self.mc.gen_load_int(r.ip.value, nursery_top_adr)
+        self.mc.LDR_ri(r.ip.value, r.ip.value)
+
+        self.mc.CMP_rr(r.r1.value, r.ip.value)
+
+        # We load into r0 the address stored at nursery_free_adr We calculate
+        # the new value for nursery_free_adr and store in r1 The we load the
+        # address stored in nursery_top_adr into IP If the value in r1 is
+        # (unsigned) bigger than the one in ip we conditionally call
+        # malloc_slowpath in case we called malloc_slowpath, which returns the
+        # new value of nursery_free_adr in r1 and the adr of the new object in
+        # r0.
+        self.mark_gc_roots(self.write_new_force_index(),
+                           use_copy_area=True)
+        self.mc.BL(self.malloc_slowpath, c=c.HI)
+
+        self.mc.gen_load_int(r.ip.value, nursery_free_adr)
+        self.mc.STR_ri(r.r1.value, r.ip.value)
+
+    def mark_gc_roots(self, force_index, use_copy_area=False):
+        if force_index < 0:
+            return     # not needed
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap:
+            mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
+            assert gcrootmap.is_shadow_stack
+            gcrootmap.write_callshape(mark, force_index)
+
+    def write_new_force_index(self):
+        # for shadowstack only: get a new, unused force_index number and
+        # write it to FORCE_INDEX_OFS.  Used to record the call shape
+        # (i.e. where the GC pointers are in the stack) around a CALL
+        # instruction that doesn't already have a force_index.
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        if gcrootmap and gcrootmap.is_shadow_stack:
+            clt = self.current_clt
+            force_index = clt.reserve_and_record_some_faildescr_index()
+            self._write_fail_index(force_index)
+            return force_index
+        else:
+            return 0
+
+
+def not_implemented(msg):
+    os.write(2, '[ARM/asm] %s\n' % msg)
+    raise NotImplementedError(msg)
+
+
+def notimplemented_op(self, op, arglocs, regalloc, fcond):
+    print "[ARM/asm] %s not implemented" % op.getopname()
+    raise NotImplementedError(op)
+
+
+def notimplemented_op_with_guard(self, op, guard_op, arglocs, regalloc, fcond):
+    print "[ARM/asm] %s with guard %s not implemented" % \
+                        (op.getopname(), guard_op.getopname())
+    raise NotImplementedError(op)
+
+asm_operations = [notimplemented_op] * (rop._LAST + 1)
+asm_operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1)
+asm_llong_operations = {}
+asm_math_operations = {}
+
+for name, value in ResOpAssembler.__dict__.iteritems():
+    if name.startswith('emit_guard_'):
+        opname = name[len('emit_guard_'):]
+        num = getattr(rop, opname.upper())
+        asm_operations_with_guard[num] = value
+    elif name.startswith('emit_op_llong_'):
+        opname = name[len('emit_op_llong_'):]
+        num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
+        asm_llong_operations[num] = value
+    elif name.startswith('emit_op_math_'):
+        opname = name[len('emit_op_math_'):]
+        num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
+        asm_math_operations[num] = value
+    elif name.startswith('emit_op_'):
+        opname = name[len('emit_op_'):]
+        num = getattr(rop, opname.upper())
+        asm_operations[num] = value
+
+
+class BridgeAlreadyCompiled(Exception):
+    pass
diff --git a/pypy/jit/backend/arm/codebuilder.py b/pypy/jit/backend/arm/codebuilder.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/arm/codebuilder.py
@@ -0,0 +1,311 @@
+from pypy.jit.backend.arm import arch
+from pypy.jit.backend.arm import conditions as cond
+from pypy.jit.backend.arm import registers as reg
+from pypy.jit.backend.arm.arch import (WORD, FUNC_ALIGN)
+from pypy.jit.backend.arm.instruction_builder import define_instructions
+from pypy.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rpython.lltypesystem import lltype, rffi, llmemory
+from pypy.tool.udir import udir
+
+clear_cache = rffi.llexternal(
+    "__clear_cache",
+    [llmemory.Address, llmemory.Address],
+    lltype.Void,
+    _nowrapper=True,
+    sandboxsafe=True)
+
+
+def binary_helper_call(name):
+    function = getattr(arch, 'arm_%s' % name)
+
+    def f(self, c=cond.AL):
+        """Generates a call to a helper function, takes its
+        arguments in r0 and r1, result is placed in r0"""
+        addr = rffi.cast(lltype.Signed, function)
+        self.BL(addr, c)
+    return f
+
+
+class AbstractARMv7Builder(object):
+
+    def __init__(self):
+        pass
+
+    def align(self):
+        while(self.currpos() % FUNC_ALIGN != 0):
+            self.writechar(chr(0))
+
+    def NOP(self):
+        self.MOV_rr(0, 0)
+
+    def PUSH(self, regs, cond=cond.AL):
+        assert reg.sp.value not in regs
+        instr = self._encode_reg_list(cond << 28 | 0x92D << 16, regs)
+        self.write32(instr)
+
+    def VPUSH(self, regs, cond=cond.AL):
+        nregs = len(regs)
+        assert nregs > 0 and nregs <= 16
+        freg = regs[0]
+        D = (freg & 0x10) >> 4
+        Dd = (freg & 0xF)
+        nregs *= 2
+        instr = (cond << 28
+                | 0xD2D << 16
+                | D << 22
+                | Dd << 12
+                | 0xB << 8
+                | nregs)
+        self.write32(instr)
+
+    def VPOP(self, regs, cond=cond.AL):
+        nregs = len(regs)
+        assert nregs > 0 and nregs <= 16
+        freg = regs[0]
+        D = (freg & 0x10) >> 4
+        Dd = (freg & 0xF)
+        nregs *= 2
+        instr = (cond << 28
+                | 0xCBD << 16
+                | D << 22
+                | Dd << 12
+                | 0xB << 8
+                | nregs)
+        self.write32(instr)
+
+    def VMOV_rc(self, rt, rt2, dm, cond=cond.AL):
+        """This instruction copies two words from two ARM core registers into a
+        doubleword extension register, or from a doubleword extension register
+        to two ARM core registers.
+        """
+        op = 1
+        instr = (cond << 28
+                | 0xC << 24
+                | 0x4 << 20
+                | op << 20
+                | (rt2 & 0xF) << 16
+                | (rt & 0xF) << 12
+                | 0xB << 8
+                | 0x1 << 4
+                | (dm & 0xF))
+        self.write32(instr)
+
+    # VMOV<c> <Dm>, <Rt>, <Rt2>
+    def VMOV_cr(self, dm, rt, rt2, cond=cond.AL):
+        """This instruction copies two words from two ARM core registers into a
+        doubleword extension register, or from a doubleword extension register
+        to two ARM core registers.
+        """
+        op = 0
+        instr = (cond << 28
+                | 0xC << 24
+                | 0x4 << 20
+                | op << 20
+                | (rt2 & 0xF) << 16
+                | (rt & 0xF) << 12
+                | 0xB << 8
+                | 0x1 << 4
+                | (dm & 0xF))
+        self.write32(instr)
+
+    def VMOV_cc(self, dd, dm, cond=cond.AL):
+        sz = 1  # for 64-bit mode
+        instr = (cond << 28
+                | 0xEB << 20
+                | (dd & 0xF) << 12
+                | 0x5 << 9
+                | (sz & 0x1) << 8
+                | 0x1 << 6
+                | (dm & 0xF))
+        self.write32(instr)
+
+    def VCVT_float_to_int(self, target, source, cond=cond.AL):
+        opc2 = 0x5
+        sz = 1
+        self._VCVT(target, source, cond, opc2, sz)
+
+    def VCVT_int_to_float(self, target, source, cond=cond.AL):
+        self._VCVT(target, source, cond, 0, 1)
+
+    def _VCVT(self, target, source, cond, opc2, sz):
+        D = 0x0
+        M = 0
+        op = 1
+        instr = (cond << 28
+                | 0xEB8 << 16
+                | D << 22
+                | opc2 << 16
+                | (target & 0xF) << 12
+                | 0x5 << 9
+                | sz << 8
+                | op << 7
+                | 1 << 6
+                | M << 5
+                | (source & 0xF))
+        self.write32(instr)
+
+    def POP(self, regs, cond=cond.AL):
+        instr = self._encode_reg_list(cond << 28 | 0x8BD << 16, regs)
+        self.write32(instr)
+
+    def BKPT(self):
+        """Unconditional breakpoint"""
+        self.write32(cond.AL << 28 | 0x1200070)
+
+    # corresponds to the instruction vmrs APSR_nzcv, fpscr
+    def VMRS(self, cond=cond.AL):
+        self.write32(cond << 28 | 0xEF1FA10)
+