[pypy-commit] pypy refactor-str-types: hg merge default

Tue May 28 14:55:43 CEST 2013

Author: Manuel Jacob
Branch: refactor-str-types
Changeset: r64617:3032a93d573d
Date: 2013-05-28 14:53 +0200
http://bitbucket.org/pypy/pypy/changeset/3032a93d573d/

Log:	hg merge default

diff too long, truncating to 2000 out of 2697 lines

diff --git a/lib-python/2.7/logging/__init__.py b/lib-python/2.7/logging/__init__.py
--- a/lib-python/2.7/logging/__init__.py
+++ b/lib-python/2.7/logging/__init__.py
@@ -134,20 +134,22 @@
 DEBUG = 10
 NOTSET = 0
 
-_levelNames = {
-    CRITICAL : 'CRITICAL',
-    ERROR : 'ERROR',
-    WARNING : 'WARNING',
-    INFO : 'INFO',
-    DEBUG : 'DEBUG',
-    NOTSET : 'NOTSET',
-    'CRITICAL' : CRITICAL,
-    'ERROR' : ERROR,
-    'WARN' : WARNING,
-    'WARNING' : WARNING,
-    'INFO' : INFO,
-    'DEBUG' : DEBUG,
-    'NOTSET' : NOTSET,
+_levelToName = {
+    CRITICAL: 'CRITICAL',
+    ERROR: 'ERROR',
+    WARNING: 'WARNING',
+    INFO: 'INFO',
+    DEBUG: 'DEBUG',
+    NOTSET: 'NOTSET',
+}
+_nameToLevel = {
+    'CRITICAL': CRITICAL,
+    'ERROR': ERROR,
+    'WARN': WARNING,
+    'WARNING': WARNING,
+    'INFO': INFO,
+    'DEBUG': DEBUG,
+    'NOTSET': NOTSET,
 }
 
 def getLevelName(level):
@@ -164,7 +166,7 @@
 
     Otherwise, the string "Level %s" % level is returned.
     """
-    return _levelNames.get(level, ("Level %s" % level))
+    return _levelToName.get(level, ("Level %s" % level))
 
 def addLevelName(level, levelName):
     """
@@ -174,8 +176,8 @@
     """
     _acquireLock()
     try:    #unlikely to cause an exception, but you never know...
-        _levelNames[level] = levelName
-        _levelNames[levelName] = level
+        _levelToName[level] = levelName
+        _nameToLevel[levelName] = level
     finally:
         _releaseLock()
 
@@ -183,9 +185,9 @@
     if isinstance(level, int):
         rv = level
     elif str(level) == level:
-        if level not in _levelNames:
+        if level not in _nameToLevel:
             raise ValueError("Unknown level: %r" % level)
-        rv = _levelNames[level]
+        rv = _nameToLevel[level]
     else:
         raise TypeError("Level not an integer or a valid string: %r" % level)
     return rv
@@ -277,7 +279,7 @@
         self.lineno = lineno
         self.funcName = func
         self.created = ct
-        self.msecs = (ct - long(ct)) * 1000
+        self.msecs = (ct - int(ct)) * 1000
         self.relativeCreated = (self.created - _startTime) * 1000
         if logThreads and thread:
             self.thread = thread.get_ident()
diff --git a/lib-python/2.7/logging/config.py b/lib-python/2.7/logging/config.py
--- a/lib-python/2.7/logging/config.py
+++ b/lib-python/2.7/logging/config.py
@@ -156,7 +156,7 @@
         h = klass(*args)
         if "level" in opts:
             level = cp.get(sectname, "level")
-            h.setLevel(logging._levelNames[level])
+            h.setLevel(level)
         if len(fmt):
             h.setFormatter(formatters[fmt])
         if issubclass(klass, logging.handlers.MemoryHandler):
@@ -187,7 +187,7 @@
     opts = cp.options(sectname)
     if "level" in opts:
         level = cp.get(sectname, "level")
-        log.setLevel(logging._levelNames[level])
+        log.setLevel(level)
     for h in root.handlers[:]:
         root.removeHandler(h)
     hlist = cp.get(sectname, "handlers")
@@ -237,7 +237,7 @@
             existing.remove(qn)
         if "level" in opts:
             level = cp.get(sectname, "level")
-            logger.setLevel(logging._levelNames[level])
+            logger.setLevel(level)
         for h in logger.handlers[:]:
             logger.removeHandler(h)
         logger.propagate = propagate
diff --git a/lib-python/2.7/test/test_logging.py b/lib-python/2.7/test/test_logging.py
--- a/lib-python/2.7/test/test_logging.py
+++ b/lib-python/2.7/test/test_logging.py
@@ -65,7 +65,8 @@
             self.saved_handlers = logging._handlers.copy()
             self.saved_handler_list = logging._handlerList[:]
             self.saved_loggers = logger_dict.copy()
-            self.saved_level_names = logging._levelNames.copy()
+            self.saved_name_to_level = logging._nameToLevel.copy()
+            self.saved_level_to_name = logging._levelToName.copy()
         finally:
             logging._releaseLock()
 
@@ -97,8 +98,10 @@
         self.root_logger.setLevel(self.original_logging_level)
         logging._acquireLock()
         try:
-            logging._levelNames.clear()
-            logging._levelNames.update(self.saved_level_names)
+            logging._levelToName.clear()
+            logging._levelToName.update(self.saved_level_to_name)
+            logging._nameToLevel.clear()
+            logging._nameToLevel.update(self.saved_name_to_level)
             logging._handlers.clear()
             logging._handlers.update(self.saved_handlers)
             logging._handlerList[:] = self.saved_handler_list
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -166,8 +166,7 @@
         if self is StructOrUnion:
             return
         if '_fields_' not in self.__dict__:
-            self._fields_ = []
-            _set_shape(self, [], self._is_union)
+            self._fields_ = []  # As a side-effet, this also sets the ffishape.
 
     __setattr__ = struct_setattr
 
diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -124,14 +124,21 @@
 
     @entrypoint('main', [], c_name='pypy_init_threads')
     def pypy_init_threads():
-        if space.config.objspace.usemodules.thread:
-            os_thread.setup_threads(space)
-            rffi.aroundstate.before()
+        if not space.config.objspace.usemodules.thread:
+            return
+        os_thread.setup_threads(space)
+        rffi.aroundstate.before()
 
     @entrypoint('main', [], c_name='pypy_thread_attach')
     def pypy_thread_attach():
-        if space.config.objspace.usemodules.thread:
-            rthread.gc_thread_start()
+        if not space.config.objspace.usemodules.thread:
+            return
+        os_thread.setup_threads(space)
+        os_thread.bootstrapper.acquire(space, None, None)
+        rthread.gc_thread_start()
+        os_thread.bootstrapper.nbthreads += 1
+        os_thread.bootstrapper.release()
+        rffi.aroundstate.before()
 
     w_globals = space.newdict()
     space.setitem(w_globals, space.wrap('__builtins__'),
diff --git a/pypy/module/_io/interp_iobase.py b/pypy/module/_io/interp_iobase.py
--- a/pypy/module/_io/interp_iobase.py
+++ b/pypy/module/_io/interp_iobase.py
@@ -16,6 +16,11 @@
     else:
         return space.int_w(w_size)
 
+def unsupported(space, message):
+    w_exc = space.getattr(space.getbuiltinmodule('_io'),
+                          space.wrap('UnsupportedOperation'))
+    return OperationError(w_exc, space.wrap(message))
+
 # May be called with any object
 def check_readable_w(space, w_obj):
     if not space.is_true(space.call_method(w_obj, 'readable')):
@@ -86,6 +91,9 @@
         # attribute as returned by whatever subclass.
         return self.__IOBase_closed
 
+    def _unsupportedoperation(self, space, message):
+        raise unsupported(space, message)
+
     def _check_closed(self, space, message=None):
         if message is None:
             message = "I/O operation on closed file"
@@ -111,9 +119,18 @@
                 space.w_ValueError,
                 space.wrap("I/O operation on closed file"))
 
+    def seek_w(self, space, w_offset, w_whence=None):
+        self._unsupportedoperation(space, "seek")
+
     def tell_w(self, space):
         return space.call_method(self, "seek", space.wrap(0), space.wrap(1))
 
+    def truncate_w(self, space, w_size=None):
+        self._unsupportedoperation(space, "truncate")
+
+    def fileno_w(self, space):
+        self._unsupportedoperation(space, "fileno")
+
     def enter_w(self, space):
         self._check_closed(space)
         return space.wrap(self)
@@ -248,11 +265,15 @@
     next = interp2app(W_IOBase.next_w),
     close = interp2app(W_IOBase.close_w),
     flush = interp2app(W_IOBase.flush_w),
+    seek = interp2app(W_IOBase.seek_w),
     tell = interp2app(W_IOBase.tell_w),
+    truncate = interp2app(W_IOBase.truncate_w),
+    fileno = interp2app(W_IOBase.fileno_w),
     isatty = interp2app(W_IOBase.isatty_w),
     readable = interp2app(W_IOBase.readable_w),
     writable = interp2app(W_IOBase.writable_w),
     seekable = interp2app(W_IOBase.seekable_w),
+
     _checkReadable = interp2app(check_readable_w),
     _checkWritable = interp2app(check_writable_w),
     _checkSeekable = interp2app(check_seekable_w),
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -196,11 +196,6 @@
     def __init__(self, space):
         W_IOBase.__init__(self, space)
 
-    def _unsupportedoperation(self, space, message):
-        w_exc = space.getattr(space.getbuiltinmodule('_io'),
-                              space.wrap('UnsupportedOperation'))
-        raise OperationError(w_exc, space.wrap(message))
-
     def read_w(self, space, w_size=None):
         self._unsupportedoperation(space, "read")
 
diff --git a/pypy/module/_io/test/test_io.py b/pypy/module/_io/test/test_io.py
--- a/pypy/module/_io/test/test_io.py
+++ b/pypy/module/_io/test/test_io.py
@@ -43,6 +43,13 @@
         import _io
         e = _io.UnsupportedOperation("seek")
 
+    def test_default_implementations(self):
+        import _io
+        file = _io._IOBase()
+        raises(_io.UnsupportedOperation, file.seek, 0, 1)
+        raises(_io.UnsupportedOperation, file.fileno)
+        raises(_io.UnsupportedOperation, file.truncate)
+
     def test_blockingerror(self):
         import _io
         try:
diff --git a/pypy/module/_io/test/test_textio.py b/pypy/module/_io/test/test_textio.py
--- a/pypy/module/_io/test/test_textio.py
+++ b/pypy/module/_io/test/test_textio.py
@@ -26,6 +26,14 @@
         assert t.readable()
         assert t.seekable()
 
+    def test_default_implementations(self):
+        import _io
+        file = _io._TextIOBase()
+        raises(_io.UnsupportedOperation, file.read)
+        raises(_io.UnsupportedOperation, file.seek, 0)
+        raises(_io.UnsupportedOperation, file.readline)
+        raises(_io.UnsupportedOperation, file.detach)
+
     def test_unreadable(self):
         import _io
         class UnReadable(_io.BytesIO):
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -13,6 +13,7 @@
         'empty': 'interp_numarray.zeros',
         'ones': 'interp_numarray.ones',
         '_reconstruct' : 'interp_numarray._reconstruct',
+        'scalar' : 'interp_numarray.build_scalar',
         'dot': 'interp_arrayops.dot',
         'fromstring': 'interp_support.fromstring',
         'flatiter': 'interp_flatiter.W_FlatIterator',
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -46,11 +46,11 @@
         self.name = name
 
 class FakeSpace(object):
-    w_ValueError = "ValueError"
-    w_TypeError = "TypeError"
-    w_IndexError = "IndexError"
-    w_OverflowError = "OverflowError"
-    w_NotImplementedError = "NotImplementedError"
+    w_ValueError = W_TypeObject("ValueError")
+    w_TypeError = W_TypeObject("TypeError")
+    w_IndexError = W_TypeObject("IndexError")
+    w_OverflowError = W_TypeObject("OverflowError")
+    w_NotImplementedError = W_TypeObject("NotImplementedError")
     w_None = None
 
     w_bool = W_TypeObject("bool")
diff --git a/pypy/module/micronumpy/interp_boxes.py b/pypy/module/micronumpy/interp_boxes.py
--- a/pypy/module/micronumpy/interp_boxes.py
+++ b/pypy/module/micronumpy/interp_boxes.py
@@ -11,6 +11,8 @@
 from rpython.rtyper.lltypesystem import rffi
 from rpython.tool.sourcetools import func_with_new_name
 from pypy.module.micronumpy.arrayimpl.voidbox import VoidBoxStorage
+from rpython.rlib.objectmodel import specialize
+from pypy.interpreter.mixedmodule import MixedModule
 
 MIXIN_32 = (int_typedef,) if LONG_BIT == 32 else ()
 MIXIN_64 = (int_typedef,) if LONG_BIT == 64 else ()
@@ -33,7 +35,11 @@
     def new(space, w_subtype, w_value):
         dtype = _get_dtype(space)
         return dtype.itemtype.coerce_subtype(space, w_subtype, w_value)
-    return func_with_new_name(new, name + "_box_new"), staticmethod(_get_dtype)
+
+    def descr_reduce(self, space):
+        return self.reduce(space)
+
+    return func_with_new_name(new, name + "_box_new"), staticmethod(_get_dtype), func_with_new_name(descr_reduce, "descr_reduce")
 
 
 class PrimitiveBox(object):
@@ -48,6 +54,26 @@
     def __repr__(self):
         return '%s(%s)' % (self.__class__.__name__, self.value)
 
+    def reduce(self, space):
+        from rpython.rlib.rstring import StringBuilder
+        from rpython.rtyper.lltypesystem import rffi, lltype
+
+        numpypy = space.getbuiltinmodule("_numpypy")
+        assert isinstance(numpypy, MixedModule)
+        multiarray = numpypy.get("multiarray")
+        assert isinstance(multiarray, MixedModule)
+        scalar = multiarray.get("scalar")
+
+        value = lltype.malloc(rffi.CArray(lltype.typeOf(self.value)), 1, flavor="raw")
+        value[0] = self.value
+
+        builder = StringBuilder()
+        builder.append_charpsize(rffi.cast(rffi.CCHARP, value), rffi.sizeof(lltype.typeOf(self.value)))
+
+        ret = space.newtuple([scalar, space.newtuple([space.wrap(self._get_dtype(space)), space.wrap(builder.build())])])
+        lltype.free(value, flavor="raw")
+        return ret
+
 class ComplexBox(object):
     _mixin_ = True
 
@@ -64,6 +90,26 @@
     def convert_imag_to(self, dtype):
         return dtype.box(self.imag)
 
+    def reduce(self, space):
+        from rpython.rlib.rstring import StringBuilder
+        from rpython.rtyper.lltypesystem import rffi, lltype
+
+        numpypy = space.getbuiltinmodule("_numpypy")
+        assert isinstance(numpypy, MixedModule)
+        multiarray = numpypy.get("multiarray")
+        assert isinstance(multiarray, MixedModule)
+        scalar = multiarray.get("scalar")
+
+        value = lltype.malloc(rffi.CArray(lltype.typeOf(self.real)), 2, flavor="raw")
+        value[0] = self.real
+        value[1] = self.imag
+
+        builder = StringBuilder()
+        builder.append_charpsize(rffi.cast(rffi.CCHARP, value), rffi.sizeof(lltype.typeOf(self.real)) * 2)
+
+        ret = space.newtuple([scalar, space.newtuple([space.wrap(self._get_dtype(space)), space.wrap(builder.build())])])
+        lltype.free(value, flavor="raw")
+        return ret
 
 class W_GenericBox(W_Root):
     _attrs_ = ()
@@ -187,7 +233,7 @@
         return convert_to_array(space, w_values)
 
 class W_BoolBox(W_GenericBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("bool")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("bool")
 
 class W_NumberBox(W_GenericBox):
     _attrs_ = ()
@@ -203,40 +249,40 @@
     pass
 
 class W_Int8Box(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("int8")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("int8")
 
 class W_UInt8Box(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("uint8")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("uint8")
 
 class W_Int16Box(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("int16")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("int16")
 
 class W_UInt16Box(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("uint16")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("uint16")
 
 class W_Int32Box(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("int32")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("int32")
 
 class W_UInt32Box(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("uint32")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("uint32")
 
 class W_LongBox(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("long")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("long")
 
 class W_ULongBox(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("ulong")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("ulong")
 
 class W_Int64Box(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("int64")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("int64")
 
 class W_LongLongBox(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter('longlong')
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter('longlong')
 
 class W_UInt64Box(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("uint64")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("uint64")
 
 class W_ULongLongBox(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter('ulonglong')
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter('ulonglong')
 
 class W_InexactBox(W_NumberBox):
     _attrs_ = ()
@@ -245,15 +291,17 @@
     _attrs_ = ()
 
 class W_Float16Box(W_FloatingBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("float16")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("float16")
 
 class W_Float32Box(W_FloatingBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("float32")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("float32")
 
 class W_Float64Box(W_FloatingBox, PrimitiveBox):
-    descr__new__, _get_dtype = new_dtype_getter("float64")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("float64")
 
 class W_FlexibleBox(W_GenericBox):
+    _attrs_ = ['ofs', 'dtype', 'arr']
+    _immutable_fields_ = ['ofs']
     def __init__(self, arr, ofs, dtype):
         self.arr = arr # we have to keep array alive
         self.ofs = ofs
@@ -354,33 +402,33 @@
 
 
 class W_Complex64Box(ComplexBox, W_ComplexFloatingBox):
-    descr__new__, _get_dtype = new_dtype_getter("complex64")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("complex64")
     _COMPONENTS_BOX = W_Float32Box
 
 
 class W_Complex128Box(ComplexBox, W_ComplexFloatingBox):
-    descr__new__, _get_dtype = new_dtype_getter("complex128")
+    descr__new__, _get_dtype, descr_reduce = new_dtype_getter("complex128")
     _COMPONENTS_BOX = W_Float64Box
 
 if ENABLED_LONG_DOUBLE and long_double_size == 12:
     class W_Float96Box(W_FloatingBox, PrimitiveBox):
-        descr__new__, _get_dtype = new_dtype_getter("float96")
+        descr__new__, _get_dtype, descr_reduce = new_dtype_getter("float96")
 
     W_LongDoubleBox = W_Float96Box
 
     class W_Complex192Box(ComplexBox, W_ComplexFloatingBox):
-        descr__new__, _get_dtype = new_dtype_getter("complex192")
+        descr__new__, _get_dtype, descr_reduce = new_dtype_getter("complex192")
         _COMPONENTS_BOX = W_Float96Box
 
     W_CLongDoubleBox = W_Complex192Box
 
 elif ENABLED_LONG_DOUBLE and long_double_size == 16:
     class W_Float128Box(W_FloatingBox, PrimitiveBox):
-        descr__new__, _get_dtype = new_dtype_getter("float128")
+        descr__new__, _get_dtype, descr_reduce = new_dtype_getter("float128")
     W_LongDoubleBox = W_Float128Box
 
     class W_Complex256Box(ComplexBox, W_ComplexFloatingBox):
-        descr__new__, _get_dtype = new_dtype_getter("complex256")
+        descr__new__, _get_dtype, descr_reduce = new_dtype_getter("complex256")
         _COMPONENTS_BOX = W_Float128Box
 
     W_CLongDoubleBox = W_Complex256Box
@@ -456,6 +504,7 @@
     __module__ = "numpypy",
     __new__ = interp2app(W_BoolBox.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_BoolBox.descr_reduce),
 )
 
 W_NumberBox.typedef = TypeDef("number", W_GenericBox.typedef,
@@ -478,42 +527,49 @@
     __module__ = "numpypy",
     __new__ = interp2app(W_Int8Box.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_Int8Box.descr_reduce),
 )
 
 W_UInt8Box.typedef = TypeDef("uint8", W_UnsignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_UInt8Box.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_UInt8Box.descr_reduce),
 )
 
 W_Int16Box.typedef = TypeDef("int16", W_SignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_Int16Box.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_Int16Box.descr_reduce),
 )
 
 W_UInt16Box.typedef = TypeDef("uint16", W_UnsignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_UInt16Box.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_UInt16Box.descr_reduce),
 )
 
 W_Int32Box.typedef = TypeDef("int32", (W_SignedIntegerBox.typedef,) + MIXIN_32,
     __module__ = "numpypy",
     __new__ = interp2app(W_Int32Box.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_Int32Box.descr_reduce),
 )
 
 W_UInt32Box.typedef = TypeDef("uint32", W_UnsignedIntegerBox.typedef,
     __module__ = "numpypy",
     __new__ = interp2app(W_UInt32Box.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_UInt32Box.descr_reduce),
 )
 
 W_Int64Box.typedef = TypeDef("int64", (W_SignedIntegerBox.typedef,) + MIXIN_64,
     __module__ = "numpypy",
     __new__ = interp2app(W_Int64Box.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_Int64Box.descr_reduce),
 )
 
 if LONG_BIT == 32:
@@ -527,6 +583,7 @@
     __module__ = "numpypy",
     __new__ = interp2app(W_UInt64Box.descr__new__.im_func),
     __index__ = interp2app(descr_index),
+    __reduce__ = interp2app(W_UInt64Box.descr_reduce),
 )
 
 W_InexactBox.typedef = TypeDef("inexact", W_NumberBox.typedef,
@@ -541,23 +598,27 @@
     __module__ = "numpypy",
 
     __new__ = interp2app(W_Float16Box.descr__new__.im_func),
+    __reduce__ = interp2app(W_Float16Box.descr_reduce),
 )
 
 W_Float32Box.typedef = TypeDef("float32", W_FloatingBox.typedef,
     __module__ = "numpypy",
 
     __new__ = interp2app(W_Float32Box.descr__new__.im_func),
+    __reduce__ = interp2app(W_Float32Box.descr_reduce),
 )
 
 W_Float64Box.typedef = TypeDef("float64", (W_FloatingBox.typedef, float_typedef),
     __module__ = "numpypy",
 
     __new__ = interp2app(W_Float64Box.descr__new__.im_func),
+    __reduce__ = interp2app(W_Float64Box.descr_reduce),
 )
 
 if ENABLED_LONG_DOUBLE and long_double_size == 12:
     W_Float96Box.typedef = TypeDef("float96", (W_FloatingBox.typedef),
         __module__ = "numpypy",
+        __reduce__ = interp2app(W_Float96Box.descr_reduce),
 
         __new__ = interp2app(W_Float96Box.descr__new__.im_func),
     )
@@ -565,6 +626,7 @@
     W_Complex192Box.typedef = TypeDef("complex192", (W_ComplexFloatingBox.typedef, complex_typedef),
         __module__ = "numpypy",
         __new__ = interp2app(W_Complex192Box.descr__new__.im_func),
+        __reduce__ = interp2app(W_Complex192Box.descr_reduce),
         real = GetSetProperty(W_ComplexFloatingBox.descr_get_real),
         imag = GetSetProperty(W_ComplexFloatingBox.descr_get_imag),
     )
@@ -574,11 +636,13 @@
         __module__ = "numpypy",
 
         __new__ = interp2app(W_Float128Box.descr__new__.im_func),
+        __reduce__ = interp2app(W_Float128Box.descr_reduce),
     )
 
     W_Complex256Box.typedef = TypeDef("complex256", (W_ComplexFloatingBox.typedef, complex_typedef),
         __module__ = "numpypy",
         __new__ = interp2app(W_Complex256Box.descr__new__.im_func),
+        __reduce__ = interp2app(W_Complex256Box.descr_reduce),
         real = GetSetProperty(W_ComplexFloatingBox.descr_get_real),
         imag = GetSetProperty(W_ComplexFloatingBox.descr_get_imag),
     )
@@ -616,6 +680,7 @@
 W_Complex128Box.typedef = TypeDef("complex128", (W_ComplexFloatingBox.typedef, complex_typedef),
     __module__ = "numpypy",
     __new__ = interp2app(W_Complex128Box.descr__new__.im_func),
+    __reduce__ = interp2app(W_Complex128Box.descr_reduce),
     real = GetSetProperty(W_ComplexFloatingBox.descr_get_real),
     imag = GetSetProperty(W_ComplexFloatingBox.descr_get_imag),
 )
@@ -623,6 +688,7 @@
 W_Complex64Box.typedef = TypeDef("complex64", (W_ComplexFloatingBox.typedef),
     __module__ = "numpypy",
     __new__ = interp2app(W_Complex64Box.descr__new__.im_func),
+    __reduce__ = interp2app(W_Complex64Box.descr_reduce),
     real = GetSetProperty(W_ComplexFloatingBox .descr_get_real),
     imag = GetSetProperty(W_ComplexFloatingBox.descr_get_imag),
 )
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -1051,6 +1051,17 @@
 def _reconstruct(space, w_subtype, w_shape, w_dtype):
     return descr_new_array(space, w_subtype, w_shape, w_dtype)
 
+ def build_scalar(space, w_dtype, w_state):
+    from rpython.rtyper.lltypesystem import rffi, lltype
+
+    assert isinstance(w_dtype, interp_dtype.W_Dtype)
+
+    state = rffi.str2charp(space.str_w(w_state))
+    box = w_dtype.itemtype.box_raw_data(state)
+    lltype.free(state, flavor="raw")
+    return box
+
+
 W_FlatIterator.typedef = TypeDef(
     'flatiter',
     __iter__ = interp2app(W_FlatIterator.descr_iter),
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -786,7 +786,7 @@
 
     def test_create_subarrays(self):
         from numpypy import dtype
-        d = dtype([("x", "float", (2,)), ("y", "int", (2,))])
+        d = dtype([("x", "float", (2,)), ("y", "int64", (2,))])
         assert d.itemsize == 32
         assert d.name == "void256"
         keys = d.fields.keys()
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -2715,7 +2715,7 @@
         a[0]["x"][0] = 200
         assert a[0]["x"][0] == 200
 
-        d = dtype([("x", "int", (2, 3))])
+        d = dtype([("x", "int64", (2, 3))])
         a = array([([[1, 2, 3], [4, 5, 6]],)], dtype=d)
 
         assert a[0]["x"].dtype == dtype("int64")
@@ -2735,7 +2735,7 @@
     def test_multidim_subarray(self):
         from numpypy import dtype, array
 
-        d = dtype([("x", "int", (2, 3))])
+        d = dtype([("x", "int64", (2, 3))])
         a = array([([[1, 2, 3], [4, 5, 6]],)], dtype=d)
 
         assert a[0]["x"].dtype == dtype("int64")
diff --git a/pypy/module/micronumpy/test/test_scalar.py b/pypy/module/micronumpy/test/test_scalar.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/micronumpy/test/test_scalar.py
@@ -0,0 +1,23 @@
+from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
+
+class AppTestScalar(BaseNumpyAppTest):
+    spaceconfig = dict(usemodules=["micronumpy", "binascii", "struct"])
+
+    def test_pickle(self):
+        from numpypy import dtype, int32, float64, complex128, zeros, sum
+        from numpypy.core.multiarray import scalar
+        from cPickle import loads, dumps
+        i = int32(1337)
+        f = float64(13.37)
+        c = complex128(13 + 37.j)
+
+        assert i.__reduce__() == (scalar, (dtype('int32'), '9\x05\x00\x00'))
+        assert f.__reduce__() == (scalar, (dtype('float64'), '=\n\xd7\xa3p\xbd*@'))
+        assert c.__reduce__() == (scalar, (dtype('complex128'), '\x00\x00\x00\x00\x00\x00*@\x00\x00\x00\x00\x00\x80B@'))
+
+        assert loads(dumps(i)) == i
+        assert loads(dumps(f)) == f
+        assert loads(dumps(c)) == c
+
+        a = zeros(3)
+        assert loads(dumps(sum(a))) == sum(a)
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -145,6 +145,11 @@
         #XXX this is the place to display a warning
         return self.box(real)
 
+    def box_raw_data(self, data):
+        # For pickle
+        array = rffi.cast(rffi.CArrayPtr(self.T), data)
+        return self.box(array[0])
+
     @specialize.argtype(1)
     def unbox(self, box):
         assert isinstance(box, self.BoxType)
@@ -1108,6 +1113,11 @@
             rffi.cast(self.T, real),
             rffi.cast(self.T, imag))
 
+    def box_raw_data(self, data):
+        # For pickle
+        array = rffi.cast(rffi.CArrayPtr(self.T), data)
+        return self.box_complex(array[0], array[1])
+
     def unbox(self, box):
         assert isinstance(box, self.BoxType)
         # do this in two stages since real, imag are read only
@@ -1705,8 +1715,10 @@
 
     def _coerce(self, space, arr, ofs, dtype, w_items, shape):
         # TODO: Make sure the shape and the array match
+        from interp_dtype import W_Dtype
         items_w = space.fixedview(w_items)
         subdtype = dtype.subdtype
+        assert isinstance(subdtype, W_Dtype)
         itemtype = subdtype.itemtype
         if len(shape) <= 1:
             for i in range(len(items_w)):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
@@ -239,6 +239,9 @@
             pass
         pos = POSITION(1, 2)
         assert (pos.x, pos.y) == (1, 2)
+        # Try a second time, result may be different (cf. issue1498)
+        pos = POSITION(1, 2)
+        assert (pos.x, pos.y) == (1, 2)
         
 
     def test_invalid_field_types(self):
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -412,7 +412,7 @@
             # No more items to compare -- compare sizes
             return space.newbool(op(self.length(), w_list2.length()))
 
-        return func_with_new_name(compare_unwrappeditems, name + '__List_List')
+        return func_with_new_name(compare_unwrappeditems, 'descr_' + name)
 
     descr_lt = _make_list_comparison('lt')
     descr_le = _make_list_comparison('le')
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -19,7 +19,7 @@
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.model import CompiledLoopToken
 from rpython.jit.codewriter.effectinfo import EffectInfo
-from rpython.jit.metainterp.history import AbstractFailDescr, FLOAT
+from rpython.jit.metainterp.history import AbstractFailDescr, FLOAT, INT, VOID
 from rpython.jit.metainterp.resoperation import rop
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
 from rpython.rlib.jit import AsmInfo
@@ -27,6 +27,7 @@
 from rpython.rlib.rarithmetic import r_uint
 from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
 from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.jit.backend.arm import callbuilder
 
 class AssemblerARM(ResOpAssembler):
 
@@ -934,23 +935,6 @@
         asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
         return fcond
 
-    def _ensure_result_bit_extension(self, resloc, size, signed):
-        if size == 4:
-            return
-        if size == 1:
-            if not signed:  # unsigned char
-                self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
-            else:
-                self.mc.LSL_ri(resloc.value, resloc.value, 24)
-                self.mc.ASR_ri(resloc.value, resloc.value, 24)
-        elif size == 2:
-            if not signed:
-                self.mc.LSL_ri(resloc.value, resloc.value, 16)
-                self.mc.LSR_ri(resloc.value, resloc.value, 16)
-            else:
-                self.mc.LSL_ri(resloc.value, resloc.value, 16)
-                self.mc.ASR_ri(resloc.value, resloc.value, 16)
-
     def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
         b = InstrBuilder(self.cpu.cpuinfo.arch_version)
         patch_addr = faildescr._arm_failure_recovery_block
@@ -1012,20 +996,32 @@
             mc.gen_load_int(helper.value, ofs, cond=cond)
             mc.STR_rr(source.value, base.value, helper.value, cond=cond)
 
+    def get_tmp_reg(self, forbidden_regs=None):
+        if forbidden_regs is None:
+            return r.ip, False
+        for x in [r.ip, r.lr]:
+            if x not in forbidden_regs:
+                return x, False
+        # pick some reg, that we need to save
+        for x in r.all_regs:
+            if x not in forbidden_regs:
+                return x, True
+        assert 0
+
     def _mov_imm_to_loc(self, prev_loc, loc, cond=c.AL):
-        if not loc.is_reg() and not (loc.is_stack() and loc.type != FLOAT):
+        if loc.type == FLOAT:
             raise AssertionError("invalid target for move from imm value")
         if loc.is_reg():
             new_loc = loc
-        elif loc.is_stack():
-            self.mc.PUSH([r.lr.value], cond=cond)
+        elif loc.is_stack() or loc.is_raw_sp():
             new_loc = r.lr
         else:
             raise AssertionError("invalid target for move from imm value")
         self.mc.gen_load_int(new_loc.value, prev_loc.value, cond=cond)
         if loc.is_stack():
             self.regalloc_mov(new_loc, loc)
-            self.mc.POP([r.lr.value], cond=cond)
+        elif loc.is_raw_sp():
+            self.store_reg(self.mc, new_loc, r.sp, loc.value, cond=cond, helper=r.ip)
 
     def _mov_reg_to_loc(self, prev_loc, loc, cond=c.AL):
         if loc.is_imm():
@@ -1034,60 +1030,77 @@
             self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
         elif loc.is_stack() and loc.type != FLOAT:
             # spill a core register
-            if prev_loc is r.ip:
-                temp = r.lr
-            else:
-                temp = r.ip
+            temp, save = self.get_tmp_reg([prev_loc, loc])
             offset = loc.value
             is_imm = check_imm_arg(offset, size=0xFFF)
-            if not is_imm:
+            if not is_imm and save:
                 self.mc.PUSH([temp.value], cond=cond)
             self.store_reg(self.mc, prev_loc, r.fp, offset, helper=temp, cond=cond)
-            if not is_imm:
+            if not is_imm and save:
                 self.mc.POP([temp.value], cond=cond)
+        elif loc.is_raw_sp() and loc.type != FLOAT:
+            temp, save = self.get_tmp_reg([prev_loc])
+            assert not save
+            self.store_reg(self.mc, prev_loc, r.sp, loc.value, cond=cond, helper=temp)
         else:
             assert 0, 'unsupported case'
 
     def _mov_stack_to_loc(self, prev_loc, loc, cond=c.AL):
-        # disabled for now, has side effects in combination with remap_frame_layout when called from a jump
-        helper = None # self._regalloc.get_free_reg()
+        helper = None
+        offset = prev_loc.value
+        tmp = None
         if loc.is_reg():
             assert prev_loc.type != FLOAT, 'trying to load from an \
                 incompatible location into a core register'
-            assert loc is not r.lr, 'lr is not supported as a target \
-                when moving from the stack'
             # unspill a core register
-            offset = prev_loc.value
             is_imm = check_imm_arg(offset, size=0xFFF)
-            helper = r.lr if helper is None else helper
-            save_helper = not is_imm and helper is r.lr
+            helper, save = self.get_tmp_reg([loc])
+            save_helper = not is_imm and save
         elif loc.is_vfp_reg():
             assert prev_loc.type == FLOAT, 'trying to load from an \
                 incompatible location into a float register'
             # load spilled value into vfp reg
-            offset = prev_loc.value
             is_imm = check_imm_arg(offset)
-            helper = r.ip if helper is None else helper
-            save_helper = not is_imm and helper is r.ip
+            helper, save = self.get_tmp_reg()
+            save_helper = not is_imm and save
+        elif loc.is_raw_sp():
+            assert (loc.type == prev_loc.type == FLOAT
+                    or (loc.type != FLOAT and prev_loc.type != FLOAT))
+            tmp = loc
+            if loc.is_float():
+                loc = r.vfp_ip
+            else:
+                loc, save_helper = self.get_tmp_reg()
+                assert not save_helper
+            helper, save_helper = self.get_tmp_reg([loc])
+            assert not save_helper
         else:
             assert 0, 'unsupported case'
+
         if save_helper:
             self.mc.PUSH([helper.value], cond=cond)
         self.load_reg(self.mc, loc, r.fp, offset, cond=cond, helper=helper)
         if save_helper:
             self.mc.POP([helper.value], cond=cond)
 
+        if tmp and tmp.is_raw_sp():
+            self.store_reg(self.mc, loc, r.sp, tmp.value, cond=cond, helper=helper)
+
     def _mov_imm_float_to_loc(self, prev_loc, loc, cond=c.AL):
         if loc.is_vfp_reg():
-            self.mc.PUSH([r.ip.value], cond=cond)
-            self.mc.gen_load_int(r.ip.value, prev_loc.getint(), cond=cond)
-            self.load_reg(self.mc, loc, r.ip, 0, cond=cond)
-            self.mc.POP([r.ip.value], cond=cond)
-        elif loc.is_stack():
-            self.regalloc_push(r.vfp_ip)
+            helper, save_helper = self.get_tmp_reg([loc])
+            if save_helper:
+                self.mc.PUSH([helper.value], cond=cond)
+            self.mc.gen_load_int(helper.value, prev_loc.getint(), cond=cond)
+            self.load_reg(self.mc, loc, helper, 0, cond=cond)
+            if save_helper:
+                self.mc.POP([helper.value], cond=cond)
+        elif loc.is_stack() and loc.type == FLOAT:
             self.regalloc_mov(prev_loc, r.vfp_ip, cond)
             self.regalloc_mov(r.vfp_ip, loc, cond)
-            self.regalloc_pop(r.vfp_ip)
+        elif loc.is_raw_sp() and loc.type == FLOAT:
+            self.regalloc_mov(prev_loc, r.vfp_ip, cond)
+            self.regalloc_mov(r.vfp_ip, loc, cond)
         else:
             assert 0, 'unsupported case'
 
@@ -1100,11 +1113,11 @@
             # spill vfp register
             offset = loc.value
             is_imm = check_imm_arg(offset)
-            if not is_imm:
-                self.mc.PUSH([r.ip.value], cond=cond)
-            self.store_reg(self.mc, prev_loc, r.fp, offset, cond=cond)
-            if not is_imm:
-                self.mc.POP([r.ip.value], cond=cond)
+            self.store_reg(self.mc, prev_loc, r.fp, offset, cond=cond, helper=r.ip)
+        elif loc.is_raw_sp():
+            assert loc.type == FLOAT, 'trying to store to an \
+                incompatible location from a float register'
+            self.store_reg(self.mc, prev_loc, r.sp, loc.value, cond=cond)
         else:
             assert 0, 'unsupported case'
 
@@ -1120,6 +1133,8 @@
             self._mov_imm_float_to_loc(prev_loc, loc, cond)
         elif prev_loc.is_vfp_reg():
             self._mov_vfp_reg_to_loc(prev_loc, loc, cond)
+        elif prev_loc.is_raw_sp():
+            assert 0, 'raw sp locs are not supported as source loc'
         else:
             assert 0, 'unsupported case'
     mov_loc_loc = regalloc_mov
@@ -1131,23 +1146,29 @@
         if vfp_loc.is_vfp_reg():
             self.mc.VMOV_rc(reg1.value, reg2.value, vfp_loc.value, cond=cond)
         elif vfp_loc.is_imm_float():
-            self.mc.PUSH([r.ip.value], cond=cond)
-            self.mc.gen_load_int(r.ip.value, vfp_loc.getint(), cond=cond)
+            helper, save_helper = self.get_tmp_reg([reg1, reg2])
+            if save_helper:
+                self.mc.PUSH([helper.value], cond=cond)
+            self.mc.gen_load_int(helper.value, vfp_loc.getint(), cond=cond)
             # we need to load one word to loc and one to loc+1 which are
             # two 32-bit core registers
-            self.mc.LDR_ri(reg1.value, r.ip.value, cond=cond)
-            self.mc.LDR_ri(reg2.value, r.ip.value, imm=WORD, cond=cond)
-            self.mc.POP([r.ip.value], cond=cond)
+            self.mc.LDR_ri(reg1.value, helper.value, cond=cond)
+            self.mc.LDR_ri(reg2.value, helper.value, imm=WORD, cond=cond)
+            if save_helper:
+                self.mc.POP([helper.value], cond=cond)
         elif vfp_loc.is_stack() and vfp_loc.type == FLOAT:
             # load spilled vfp value into two core registers
             offset = vfp_loc.value
             if not check_imm_arg(offset, size=0xFFF):
-                self.mc.PUSH([r.ip.value], cond=cond)
-                self.mc.gen_load_int(r.ip.value, offset, cond=cond)
-                self.mc.LDR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
-                self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
-                self.mc.LDR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
-                self.mc.POP([r.ip.value], cond=cond)
+                helper, save_helper = self.get_tmp_reg([reg1, reg2])
+                if save_helper:
+                    self.mc.PUSH([helper.value], cond=cond)
+                self.mc.gen_load_int(helper.value, offset, cond=cond)
+                self.mc.LDR_rr(reg1.value, r.fp.value, helper.value, cond=cond)
+                self.mc.ADD_ri(helper.value, helper.value, imm=WORD, cond=cond)
+                self.mc.LDR_rr(reg2.value, r.fp.value, helper.value, cond=cond)
+                if save_helper:
+                    self.mc.POP([helper.value], cond=cond)
             else:
                 self.mc.LDR_ri(reg1.value, r.fp.value, imm=offset, cond=cond)
                 self.mc.LDR_ri(reg2.value, r.fp.value,
@@ -1165,12 +1186,15 @@
             # move from two core registers to a float stack location
             offset = vfp_loc.value
             if not check_imm_arg(offset + WORD, size=0xFFF):
-                self.mc.PUSH([r.ip.value], cond=cond)
-                self.mc.gen_load_int(r.ip.value, offset, cond=cond)
-                self.mc.STR_rr(reg1.value, r.fp.value, r.ip.value, cond=cond)
-                self.mc.ADD_ri(r.ip.value, r.ip.value, imm=WORD, cond=cond)
-                self.mc.STR_rr(reg2.value, r.fp.value, r.ip.value, cond=cond)
-                self.mc.POP([r.ip.value], cond=cond)
+                helper, save_helper = self.get_tmp_reg([reg1, reg2])
+                if save_helper:
+                    self.mc.PUSH([helper.value], cond=cond)
+                self.mc.gen_load_int(helper.value, offset, cond=cond)
+                self.mc.STR_rr(reg1.value, r.fp.value, helper.value, cond=cond)
+                self.mc.ADD_ri(helper.value, helper.value, imm=WORD, cond=cond)
+                self.mc.STR_rr(reg2.value, r.fp.value, helper.value, cond=cond)
+                if save_helper:
+                    self.mc.POP([helper.value], cond=cond)
             else:
                 self.mc.STR_ri(reg1.value, r.fp.value, imm=offset, cond=cond)
                 self.mc.STR_ri(reg2.value, r.fp.value,
@@ -1417,6 +1441,26 @@
         #
         return shiftsize
 
+    def simple_call(self, fnloc, arglocs, result_loc=r.r0):
+        if result_loc is None:
+            result_type = VOID
+            result_size = 0
+        elif result_loc.is_vfp_reg():
+            result_type = FLOAT
+            result_size = DOUBLE_WORD
+        else:
+            result_type = INT
+            result_size = WORD
+        cb = callbuilder.get_callbuilder(self.cpu, self, fnloc, arglocs,
+                                     result_loc, result_type,
+                                     result_size)
+        cb.emit()
+
+    def simple_call_no_collect(self, fnloc, arglocs):
+        cb = callbuilder.get_callbuilder(self.cpu, self, fnloc, arglocs)
+        cb.emit_no_collect()
+
+
 def not_implemented(msg):
     os.write(2, '[ARM/asm] %s\n' % msg)
     raise NotImplementedError(msg)
diff --git a/rpython/jit/backend/arm/callbuilder.py b/rpython/jit/backend/arm/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/arm/callbuilder.py
@@ -0,0 +1,304 @@
+from rpython.rlib.clibffi import FFI_DEFAULT_ABI
+from rpython.rlib.objectmodel import we_are_translated
+from rpython.jit.metainterp.history import INT, FLOAT, REF
+from rpython.jit.backend.arm.arch import WORD
+from rpython.jit.backend.arm import registers as r
+from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.arm.locations import RawSPStackLocation
+from rpython.jit.backend.arm.jump import remap_frame_layout
+from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
+from rpython.jit.backend.arm.helper.assembler import count_reg_args
+from rpython.jit.backend.arm.helper.assembler import saved_registers
+from rpython.jit.backend.arm.helper.regalloc import check_imm_arg
+
+
+class ARMCallbuilder(AbstractCallBuilder):
+    def __init__(self, assembler, fnloc, arglocs,
+                 resloc=r.r0, restype=INT, ressize=WORD, ressigned=True):
+        AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
+                                     resloc, restype, ressize)
+        self.current_sp = 0
+
+    def push_gcmap(self):
+        assert not self.is_call_release_gil
+        # we push *now* the gcmap, describing the status of GC registers
+        # after the rearrangements done just above, ignoring the return
+        # value eax, if necessary
+        noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
+        gcmap = self.asm._regalloc.get_gcmap([r.r0], noregs=noregs)
+        self.asm.push_gcmap(self.mc, gcmap, store=True)
+
+    def pop_gcmap(self):
+        self.asm._reload_frame_if_necessary(self.mc)
+        self.asm.pop_gcmap(self.mc)
+
+    def emit_raw_call(self):
+        #the actual call
+        if self.fnloc.is_imm():
+            self.mc.BL(self.fnloc.value)
+            return
+        if self.fnloc.is_stack():
+            self.asm.mov_loc_loc(self.fnloc, r.ip)
+            self.fnloc = r.ip
+        assert self.fnloc.is_reg()
+        self.mc.BLX(self.fnloc.value)
+
+    def restore_stack_pointer(self):
+        # readjust the sp in case we passed some args on the stack
+        assert self.current_sp % 8 == 0  # sanity check
+        if self.current_sp != 0:
+            self._adjust_sp(self.current_sp)
+        self.current_sp = 0
+
+    def _push_stack_args(self, stack_args, on_stack):
+        assert on_stack % 8 == 0
+        self._adjust_sp(-on_stack)
+        self.current_sp = on_stack
+        ofs = 0
+        for i, arg in enumerate(stack_args):
+            if arg is not None:
+                sp_loc = RawSPStackLocation(ofs, arg.type)
+                self.asm.regalloc_mov(arg, sp_loc)
+                ofs += sp_loc.width
+            else:  # alignment word
+                ofs += WORD
+
+    def _adjust_sp(self, n):
+        # adjust the current stack pointer by n bytes
+        if n > 0:
+            if check_imm_arg(n):
+                self.mc.ADD_ri(r.sp.value, r.sp.value, n)
+            else:
+                self.mc.gen_load_int(r.ip.value, n)
+                self.mc.ADD_rr(r.sp.value, r.sp.value, r.ip.value)
+        else:
+            n = abs(n)
+            if check_imm_arg(n):
+                self.mc.SUB_ri(r.sp.value, r.sp.value, n)
+            else:
+                self.mc.gen_load_int(r.ip.value, n)
+                self.mc.SUB_rr(r.sp.value, r.sp.value, r.ip.value)
+
+    def select_call_release_gil_mode(self):
+        AbstractCallBuilder.select_call_release_gil_mode(self)
+
+    def call_releasegil_addr_and_move_real_arguments(self):
+        assert not self.asm._is_asmgcc()
+        from rpython.jit.backend.arm.regalloc import CoreRegisterManager
+        with saved_registers(self.mc,
+                            CoreRegisterManager.save_around_call_regs):
+            self.mc.BL(self.asm.releasegil_addr)
+
+        if not we_are_translated():                     # for testing: we should not access
+            self.mc.ADD_ri(r.fp.value, r.fp.value, 1)   # fp any more
+
+    def move_real_result_and_call_reacqgil_addr(self):
+        # save the result we just got
+        assert not self.asm._is_asmgcc()
+        gpr_to_save, vfp_to_save = self.get_result_locs()
+        with saved_registers(self.mc, gpr_to_save, vfp_to_save):
+            self.mc.BL(self.asm.reacqgil_addr)
+
+        if not we_are_translated():                    # for testing: now we can accesss
+            self.mc.SUB_ri(r.fp.value, r.fp.value, 1)  # fp again
+
+        #   for shadowstack, done for us by _reload_frame_if_necessary()
+
+    def get_result_locs(self):
+        raise NotImplementedError
+
+    def _ensure_result_bit_extension(self, resloc, size, signed):
+        if size == 4:
+            return
+        if size == 1:
+            if not signed:  # unsigned char
+                self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
+            else:
+                self.mc.LSL_ri(resloc.value, resloc.value, 24)
+                self.mc.ASR_ri(resloc.value, resloc.value, 24)
+        elif size == 2:
+            if not signed:
+                self.mc.LSL_ri(resloc.value, resloc.value, 16)
+                self.mc.LSR_ri(resloc.value, resloc.value, 16)
+            else:
+                self.mc.LSL_ri(resloc.value, resloc.value, 16)
+                self.mc.ASR_ri(resloc.value, resloc.value, 16)
+
+
+
+class SoftFloatCallBuilder(ARMCallbuilder):
+
+    def get_result_locs(self):
+        if self.resloc is None:
+            return [], []
+        if self.resloc.is_vfp_reg():
+            return [r.r0, r.r1], []
+        assert self.resloc.is_reg()
+        return [r.r0], []
+
+    def load_result(self):
+        # ensure the result is wellformed and stored in the correct location
+        resloc = self.resloc
+        if resloc is None:
+            return
+        if resloc.is_vfp_reg():
+            # move result to the allocated register
+            self.asm.mov_to_vfp_loc(r.r0, r.r1, resloc)
+        elif resloc.is_reg():
+            # move result to the allocated register
+            if resloc is not r.r0:
+                self.asm.mov_loc_loc(r.r0, resloc)
+            self._ensure_result_bit_extension(resloc,
+                                              self.ressize, self.ressign)
+
+
+    def _collect_and_push_stack_args(self, arglocs):
+        n_args = len(arglocs)
+        reg_args = count_reg_args(arglocs)
+        # all arguments past the 4th go on the stack
+        # first we need to prepare the list so it stays aligned
+        stack_args = []
+        count = 0
+        on_stack = 0
+        if n_args > reg_args:
+            for i in range(reg_args, n_args):
+                arg = arglocs[i]
+                if arg.type != FLOAT:
+                    count += 1
+                    on_stack += 1
+                else:
+                    on_stack += 2
+                    if count % 2 != 0:
+                        stack_args.append(None)
+                        count = 0
+                        on_stack += 1
+                stack_args.append(arg)
+            if count % 2 != 0:
+                on_stack += 1
+                stack_args.append(None)
+        if on_stack > 0:
+            self._push_stack_args(stack_args, on_stack*WORD)
+
+    def prepare_arguments(self):
+        arglocs = self.arglocs
+        reg_args = count_reg_args(arglocs)
+        self._collect_and_push_stack_args(arglocs)
+        # collect variables that need to go in registers and the registers they
+        # will be stored in
+        num = 0
+        count = 0
+        non_float_locs = []
+        non_float_regs = []
+        float_locs = []
+        for i in range(reg_args):
+            arg = arglocs[i]
+            if arg.type == FLOAT and count % 2 != 0:
+                    num += 1
+                    count = 0
+            reg = r.caller_resp[num]
+
+            if arg.type == FLOAT:
+                float_locs.append((arg, reg))
+            else:
+                non_float_locs.append(arg)
+                non_float_regs.append(reg)
+
+            if arg.type == FLOAT:
+                num += 2
+            else:
+                num += 1
+                count += 1
+        # Check that the address of the function we want to call is not
+        # currently stored in one of the registers used to pass the arguments
+        # or on the stack, which we can not access later
+        # If this happens to be the case we remap the register to r4 and use r4
+        # to call the function
+        if self.fnloc in r.argument_regs or self.fnloc.is_stack():
+            non_float_locs.append(self.fnloc)
+            non_float_regs.append(r.r4)
+            self.fnloc = r.r4
+        # remap values stored in core registers
+        remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip)
+
+        for loc, reg in float_locs:
+            self.asm.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
+
+class HardFloatCallBuilder(ARMCallbuilder):
+
+    def prepare_arguments(self):
+        non_float_locs = []
+        non_float_regs = []
+        float_locs = []
+        float_regs = []
+        stack_args = []
+
+        arglocs = self.arglocs
+        argtypes = self.argtypes
+
+        count = 0                      # stack alignment counter
+        on_stack = 0
+        for arg in arglocs:
+            if arg.type != FLOAT:
+                if len(non_float_regs) < len(r.argument_regs):
+                    reg = r.argument_regs[len(non_float_regs)]
+                    non_float_locs.append(arg)
+                    non_float_regs.append(reg)
+                else:  # non-float argument that needs to go on the stack
+                    count += 1
+                    on_stack += 1
+                    stack_args.append(arg)
+            else:
+                if len(float_regs) < len(r.vfp_argument_regs):
+                    reg = r.vfp_argument_regs[len(float_regs)]
+                    float_locs.append(arg)
+                    float_regs.append(reg)
+                else:  # float argument that needs to go on the stack
+                    if count % 2 != 0:
+                        stack_args.append(None)
+                        count = 0
+                        on_stack += 1
+                    stack_args.append(arg)
+                    on_stack += 2
+        # align the stack
+        if count % 2 != 0:
+            stack_args.append(None)
+            on_stack += 1
+        self._push_stack_args(stack_args, on_stack*WORD)
+        # Check that the address of the function we want to call is not
+        # currently stored in one of the registers used to pass the arguments
+        # or on the stack, which we can not access later
+        # If this happens to be the case we remap the register to r4 and use r4
+        # to call the function
+        if self.fnloc in non_float_regs or self.fnloc.is_stack():
+            non_float_locs.append(self.fnloc)
+            non_float_regs.append(r.r4)
+            self.fnloc = r.r4
+        # remap values stored in core registers
+        remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip)
+        # remap values stored in vfp registers
+        remap_frame_layout(self.asm, float_locs, float_regs, r.vfp_ip)
+
+    def load_result(self):
+        resloc = self.resloc
+        # ensure the result is wellformed and stored in the correct location
+        if resloc is not None and resloc.is_reg():
+            self._ensure_result_bit_extension(resloc,
+                                                  self.ressize, self.ressign)
+
+    def get_result_locs(self):
+        if self.resloc is None:
+            return [], []
+        if self.resloc.is_vfp_reg():
+            return [], [r.d0]
+        assert self.resloc.is_reg()
+        return [r.r0], []
+
+
+def get_callbuilder(cpu, assembler, fnloc, arglocs,
+                 resloc=r.r0, restype=INT, ressize=WORD, ressigned=True):
+    if cpu.cpuinfo.hf_abi:
+        return HardFloatCallBuilder(assembler, fnloc, arglocs, resloc,
+                                        restype, ressize, ressigned)
+    else:
+        return SoftFloatCallBuilder(assembler, fnloc, arglocs, resloc,
+                                        restype, ressize, ressigned)
diff --git a/rpython/jit/backend/arm/locations.py b/rpython/jit/backend/arm/locations.py
--- a/rpython/jit/backend/arm/locations.py
+++ b/rpython/jit/backend/arm/locations.py
@@ -12,6 +12,9 @@
     def is_stack(self):
         return False
 
+    def is_raw_sp(self):
+        return False
+
     def is_reg(self):
         return False
 
@@ -145,7 +148,27 @@
         return self.position + 10000
 
     def is_float(self):
-        return type == FLOAT
+        return self.type == FLOAT
+
+class RawSPStackLocation(AssemblerLocation):
+    _immutable_ = True
+
+    def __init__(self, sp_offset, type=INT):
+        if type == FLOAT:
+            self.width = DOUBLE_WORD
+        else:
+            self.width = WORD
+        self.value = sp_offset
+        self.type = type
+
+    def __repr__(self):
+        return 'SP(%s)+%d' % (self.type, self.value,)
+
+    def is_raw_sp(self):
+        return True
+
+    def is_float(self):
+        return self.type == FLOAT
 
 
 def imm(i):
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -13,8 +13,7 @@
                                                 gen_emit_float_cmp_op,
                                                 gen_emit_float_cmp_op_guard,
                                                 gen_emit_unary_float_op,
-                                                saved_registers,
-                                                count_reg_args)
+                                                saved_registers)
 from rpython.jit.backend.arm.helper.regalloc import check_imm_arg
 from rpython.jit.backend.arm.codebuilder import InstrBuilder, OverwritingBuilder
 from rpython.jit.backend.arm.jump import remap_frame_layout
@@ -31,8 +30,7 @@
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rtyper.lltypesystem import rstr, rffi, lltype
 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
-
-NO_FORCE_INDEX = -1
+from rpython.jit.backend.arm import callbuilder
 
 
 class ArmGuardToken(GuardToken):
@@ -339,217 +337,36 @@
         return fcond
 
     def emit_op_call(self, op, arglocs, regalloc, fcond):
-        resloc = arglocs[0]
-        adr = arglocs[1]
-        arglist = arglocs[2:]
+        return self._emit_call(op, arglocs, fcond=fcond)
+
+    def _emit_call(self, op, arglocs, is_call_release_gil=False, fcond=c.AL):
+        # args = [resloc, size, sign, args...]
+        from rpython.jit.backend.llsupport.descr import CallDescr
+
+        cb = callbuilder.get_callbuilder(self.cpu, self, arglocs[3], arglocs[4:], arglocs[0])
+
         descr = op.getdescr()
-        size = descr.get_result_size()
-        signed = descr.is_result_signed()
-        cond = self._emit_call(adr, arglist,
-                                            fcond, resloc, (size, signed))
-        return cond
+        assert isinstance(descr, CallDescr)
+        cb.callconv = descr.get_call_conv()
+        cb.argtypes = descr.get_arg_types()
+        cb.restype  = descr.get_result_type()
+        sizeloc = arglocs[1]
+        assert sizeloc.is_imm()
+        cb.ressize = sizeloc.value
+        signloc = arglocs[2]
+        assert signloc.is_imm()
+        cb.ressign = signloc.value
 
-    def _emit_call(self, adr, arglocs, fcond=c.AL, resloc=None,
-                    result_info=(-1, -1),
-                    # whether to worry about a CALL that can collect; this
-                    # is always true except in call_release_gil
-                    can_collect=True):
-        if self.cpu.cpuinfo.hf_abi:
-            stack_args, adr = self._setup_call_hf(adr, arglocs, fcond,
-                                            resloc, result_info)
+        if is_call_release_gil:
+            cb.emit_call_release_gil()
         else:
-            stack_args, adr = self._setup_call_sf(adr, arglocs, fcond,
-                                            resloc, result_info)
-
-        if can_collect:
-            # we push *now* the gcmap, describing the status of GC registers
-            # after the rearrangements done just above, ignoring the return
-            # value eax, if necessary
-            noregs = self.cpu.gc_ll_descr.is_shadow_stack()
-            gcmap = self._regalloc.get_gcmap([r.r0], noregs=noregs)
-            self.push_gcmap(self.mc, gcmap, store=True)
-        #the actual call
-        if adr.is_imm():
-            self.mc.BL(adr.value)
-        elif adr.is_stack():
-            self.mov_loc_loc(adr, r.ip)
-            adr = r.ip
-        else:
-            assert adr.is_reg()
-        if adr.is_reg():
-            self.mc.BLX(adr.value)
-        self._restore_sp(stack_args, fcond)
-
-        # ensure the result is wellformed and stored in the correct location
-        if resloc is not None:
-            if resloc.is_vfp_reg() and not self.cpu.cpuinfo.hf_abi:
-                # move result to the allocated register
-                self.mov_to_vfp_loc(r.r0, r.r1, resloc)
-            elif resloc.is_reg() and result_info != (-1, -1):
-                self._ensure_result_bit_extension(resloc, result_info[0],
-                                                          result_info[1])
-        if can_collect:
-            self._reload_frame_if_necessary(self.mc)
-            self.pop_gcmap(self.mc)
+            cb.emit()
         return fcond
 
-    def _restore_sp(self, stack_args, fcond):
-        # readjust the sp in case we passed some args on the stack
-        if len(stack_args) > 0:
-            n = 0
-            for arg in stack_args:
-                if arg is None or arg.type != FLOAT:
-                    n += WORD
-                else:
-                    n += DOUBLE_WORD
-            self._adjust_sp(-n, fcond=fcond)
-            assert n % 8 == 0  # sanity check
-
-    def _adjust_sp(self, n, cb=None, fcond=c.AL, base_reg=r.sp):
-        if cb is None:
-            cb = self.mc
-        if n < 0:
-            n = -n
-            rev = True
-        else:
-            rev = False
-        if n <= 0xFF and fcond == c.AL:
-            if rev:
-                cb.ADD_ri(r.sp.value, base_reg.value, n)
-            else:
-                cb.SUB_ri(r.sp.value, base_reg.value, n)
-        else:
-            cb.gen_load_int(r.ip.value, n, cond=fcond)
-            if rev:
-                cb.ADD_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
-            else:
-                cb.SUB_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
-
-
-    def _collect_stack_args_sf(self, arglocs):
-        n_args = len(arglocs)
-        reg_args = count_reg_args(arglocs)
-        # all arguments past the 4th go on the stack
-        # first we need to prepare the list so it stays aligned
-        stack_args = []
-        count = 0
-        if n_args > reg_args:
-            for i in range(reg_args, n_args):
-                arg = arglocs[i]
-                if arg.type != FLOAT:
-                    count += 1
-                else:
-                    if count % 2 != 0:
-                        stack_args.append(None)
-                        count = 0
-                stack_args.append(arg)
-            if count % 2 != 0:
-                stack_args.append(None)
-        return stack_args
-
-    def _push_stack_args(self, stack_args):
-            #then we push every thing on the stack
-            for i in range(len(stack_args) - 1, -1, -1):
-                arg = stack_args[i]
-                if arg is None:
-                    self.mc.PUSH([r.ip.value])
-                else:
-                    self.regalloc_push(arg)
-
-    def _setup_call_sf(self, adr, arglocs, fcond=c.AL,
-                                         resloc=None, result_info=(-1, -1)):
-        reg_args = count_reg_args(arglocs)
-        stack_args = self._collect_stack_args_sf(arglocs)
-        self._push_stack_args(stack_args)
-        # collect variables that need to go in registers and the registers they
-        # will be stored in
-        num = 0
-        count = 0
-        non_float_locs = []
-        non_float_regs = []
-        float_locs = []
-        for i in range(reg_args):
-            arg = arglocs[i]
-            if arg.type == FLOAT and count % 2 != 0:
-                    num += 1
-                    count = 0
-            reg = r.caller_resp[num]
-
-            if arg.type == FLOAT:
-                float_locs.append((arg, reg))
-            else:
-                non_float_locs.append(arg)
-                non_float_regs.append(reg)
-
-            if arg.type == FLOAT:
-                num += 2
-            else:
-                num += 1
-                count += 1
-        # Check that the address of the function we want to call is not
-        # currently stored in one of the registers used to pass the arguments.
-        # If this happens to be the case we remap the register to r4 and use r4
-        # to call the function
-        if adr in non_float_regs:
-            non_float_locs.append(adr)
-            non_float_regs.append(r.r4)
-            adr = r.r4
-        # remap values stored in core registers
-        remap_frame_layout(self, non_float_locs, non_float_regs, r.ip)
-
-        for loc, reg in float_locs:
-            self.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
-        return stack_args, adr
-
-    def _setup_call_hf(self, adr, arglocs, fcond=c.AL,
-                                         resloc=None, result_info=(-1, -1)):
-        non_float_locs = []
-        non_float_regs = []
-        float_locs = []
-        float_regs = []
-        stack_args = []
-        count = 0                      # stack alignment counter
-        for arg in arglocs:
-            if arg.type != FLOAT:
-                if len(non_float_regs) < len(r.argument_regs):
-                    reg = r.argument_regs[len(non_float_regs)]
-                    non_float_locs.append(arg)
-                    non_float_regs.append(reg)
-                else:  # non-float argument that needs to go on the stack
-                    count += 1
-                    stack_args.append(arg)
-            else:
-                if len(float_regs) < len(r.vfp_argument_regs):
-                    reg = r.vfp_argument_regs[len(float_regs)]
-                    float_locs.append(arg)
-                    float_regs.append(reg)
-                else:  # float argument that needs to go on the stack
-                    if count % 2 != 0:
-                        stack_args.append(None)
-                        count = 0
-                    stack_args.append(arg)
-        # align the stack
-        if count % 2 != 0:
-            stack_args.append(None)
-        self._push_stack_args(stack_args)
-        # Check that the address of the function we want to call is not
-        # currently stored in one of the registers used to pass the arguments.
-        # If this happens to be the case we remap the register to r4 and use r4
-        # to call the function
-        if adr in non_float_regs:
-            non_float_locs.append(adr)
-            non_float_regs.append(r.r4)
-            adr = r.r4
-        # remap values stored in core registers
-        remap_frame_layout(self, non_float_locs, non_float_regs, r.ip)
-        # remap values stored in vfp registers
-        remap_frame_layout(self, float_locs, float_regs, r.vfp_ip)
-
-        return stack_args, adr
-
     def emit_op_same_as(self, op, arglocs, regalloc, fcond):
         argloc, resloc = arglocs
-        self.mov_loc_loc(argloc, resloc)
+        if argloc is not resloc:
+            self.mov_loc_loc(argloc, resloc)
         return fcond
 
     emit_op_cast_ptr_to_int = emit_op_same_as
@@ -1037,9 +854,8 @@
             length_loc = bytes_loc
         # call memcpy()
         regalloc.before_call()
-        self._emit_call(imm(self.memcpy_addr),
-                                  [dstaddr_loc, srcaddr_loc, length_loc],
-                                  can_collect=False)
+        self.simple_call_no_collect(imm(self.memcpy_addr),
+                                  [dstaddr_loc, srcaddr_loc, length_loc])
         regalloc.rm.possibly_free_var(length_box)
         regalloc.rm.possibly_free_var(dstaddr_box)
         regalloc.rm.possibly_free_var(srcaddr_box)
@@ -1127,14 +943,14 @@
             vloc = imm(0)
         self.call_assembler(op, guard_op, argloc, vloc, result_loc, tmploc)
         self._emit_guard_may_force(guard_op,
-                        regalloc._prepare_guard(guard_op), guard_op.numargs())
+                        regalloc._prepare_guard(guard_op))
         return fcond
 
     def _call_assembler_emit_call(self, addr, argloc, resloc):
-        self._emit_call(addr, [argloc], resloc=resloc)
+        self.simple_call(addr, [argloc], result_loc=resloc)
 
     def _call_assembler_emit_helper_call(self, addr, arglocs, resloc):
-        self._emit_call(addr, arglocs, resloc=resloc)
+        self.simple_call(addr, arglocs, result_loc=resloc)
 
     def _call_assembler_check_descr(self, value, tmploc):
         ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
@@ -1213,20 +1029,14 @@
                                                                     fcond):
         self._store_force_index(guard_op)
         numargs = op.numargs()
-        callargs = arglocs[2:numargs + 1]  # extract the arguments to the call
-        adr = arglocs[1]
-        resloc = arglocs[0]
+        callargs = arglocs[:numargs + 3]  # extract the arguments to the call
+        guardargs = arglocs[len(callargs):]
         #
-        descr = op.getdescr()
-        size = descr.get_result_size()
-        signed = descr.is_result_signed()
-        #
-        self._emit_call(adr, callargs, fcond,
-                                    resloc, (size, signed))
-        self._emit_guard_may_force(guard_op, arglocs[1 + numargs:], numargs)
+        self._emit_call(op, callargs, fcond=fcond)
+        self._emit_guard_may_force(guard_op, guardargs)
         return fcond
 
-    def _emit_guard_may_force(self, guard_op, arglocs, numargs):
+    def _emit_guard_may_force(self, guard_op, arglocs):
         ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
         self.mc.LDR_ri(r.ip.value, r.fp.value, imm=ofs)
         self.mc.CMP_ri(r.ip.value, 0)
@@ -1235,68 +1045,14 @@
 
     def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc,
                                                                     fcond):
-
+        numargs = op.numargs()
+        callargs = arglocs[:numargs + 3]     # extract the arguments to the call
+        guardargs = arglocs[len(callargs):]  # extrat the arguments for the guard
         self._store_force_index(guard_op)
-        # first, close the stack in the sense of the asmgcc GC root tracker
-        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
-        numargs = op.numargs()
-        callargs = arglocs[2:numargs + 1]  # extract the arguments to the call
-        adr = arglocs[1]
-        resloc = arglocs[0]
-
-        if gcrootmap:
-            # we put the gcmap now into the frame before releasing the GIL,
-            # and pop it below after reacquiring the GIL.  The assumption
-            # is that this gcmap describes correctly the situation at any
-            # point in-between: all values containing GC pointers should
-            # be safely saved out of registers by now, and will not be
-            # manipulated by any of the following CALLs.
-            gcmap = self._regalloc.get_gcmap(noregs=True)
-            self.push_gcmap(self.mc, gcmap, store=True)
-            self.call_release_gil(gcrootmap, arglocs, regalloc, fcond)
-        # do the call
-        descr = op.getdescr()
-        size = descr.get_result_size()
-        signed = descr.is_result_signed()
-        #
-        self._emit_call(adr, callargs, fcond,
-                                    resloc, (size, signed),
-                                    can_collect=False)
-        # then reopen the stack
-        if gcrootmap:
-            self.call_reacquire_gil(gcrootmap, resloc, regalloc, fcond)
-            self.pop_gcmap(self.mc)     # remove the gcmap saved above
-
-        self._emit_guard_may_force(guard_op, arglocs[numargs+1:], numargs)
+        self._emit_call(op, callargs, is_call_release_gil=True)
+        self._emit_guard_may_force(guard_op, guardargs)
         return fcond
 
-    def call_release_gil(self, gcrootmap, save_registers, regalloc, fcond):
-        # Save caller saved registers and do the call
-        # NOTE: We assume that  the floating point registers won't be modified.
-        assert gcrootmap.is_shadow_stack
-        with saved_registers(self.mc, regalloc.rm.save_around_call_regs):
-            self._emit_call(imm(self.releasegil_addr), [],
-                                        fcond, can_collect=False)
-
-    def call_reacquire_gil(self, gcrootmap, save_loc, regalloc, fcond):
-        # save the previous result into the stack temporarily, in case it is in
-        # a caller saved register.
-        # NOTE: like with call_release_gil(), we assume that we don't need to
-        # save vfp regs in this case. Besides the result location
-        regs_to_save = []
-        vfp_regs_to_save = []
-        if save_loc and save_loc in regalloc.rm.save_around_call_regs:
-            regs_to_save.append(save_loc)
-            regs_to_save.append(r.ip)  # for alingment
-        elif save_loc and save_loc in regalloc.vfprm.save_around_call_regs:
-            vfp_regs_to_save.append(save_loc)
-        assert gcrootmap.is_shadow_stack
-        # call the reopenstack() function (also reacquiring the GIL)
-        with saved_registers(self.mc, regs_to_save, vfp_regs_to_save):
-            self._emit_call(imm(self.reacqgil_addr), [], fcond,
-                    can_collect=False)
-        self._reload_frame_if_necessary(self.mc)
-
     def _store_force_index(self, guard_op):
         faildescr = guard_op.getdescr()
         ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -34,6 +34,7 @@
 from rpython.jit.backend.llsupport.descr import unpack_fielddescr
 from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr
 from rpython.rlib.rarithmetic import r_uint
+from rpython.jit.backend.llsupport.descr import CallDescr
 
 
 # xxx hack: set a default value for TargetToken._ll_loop_code.  If 0, we know
@@ -555,9 +556,27 @@
         return self._prepare_call(op)
 
     def _prepare_call(self, op, force_store=[], save_all_regs=False):
-        args = [None] * (op.numargs() + 1)
+        args = [None] * (op.numargs() + 3)
+        calldescr = op.getdescr()
+        assert isinstance(calldescr, CallDescr)
+        assert len(calldescr.arg_classes) == op.numargs() - 1
+
         for i in range(op.numargs()):
-            args[i + 1] = self.loc(op.getarg(i))
+            args[i + 3] = self.loc(op.getarg(i))
+
+        size = calldescr.get_result_size()
+        sign = calldescr.is_result_signed()
+        if sign:
+            sign_loc = imm(1)
+        else:
+            sign_loc = imm(0)
+        args[1] = imm(size)
+        args[2] = sign_loc
+
+        args[0] = self._call(op, args, force_store, save_all_regs)
+        return args
+
+    def _call(self, op, arglocs, force_store=[], save_all_regs=False):
         # spill variables that need to be saved around calls
         self.vfprm.before_call(save_all_regs=save_all_regs)
         if not save_all_regs:
@@ -565,11 +584,11 @@
             if gcrootmap and gcrootmap.is_shadow_stack:
                 save_all_regs = 2
         self.rm.before_call(save_all_regs=save_all_regs)
+        self.before_call_called = True
+        resloc = None
         if op.result:
             resloc = self.after_call(op.result)
-            args[0] = resloc
-        self.before_call_called = True
-        return args
+        return resloc
 
     def prepare_op_call_malloc_gc(self, op, fcond):
         return self._prepare_call(op)
@@ -1153,9 +1172,9 @@
     def prepare_guard_call_assembler(self, op, guard_op, fcond):
         locs = self.locs_for_call_assembler(op, guard_op)
         tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
-        call_locs = self._prepare_call(op, save_all_regs=True)
+        resloc = self._call(op, locs + [tmploc], save_all_regs=True)
         self.possibly_free_vars(guard_op.getfailargs())
-        return locs + [call_locs[0], tmploc]
+        return locs + [resloc, tmploc]
 
     def _prepare_args_for_new_op(self, new_args):
         gc_ll_descr = self.cpu.gc_ll_descr
diff --git a/rpython/jit/backend/arm/test/test_regalloc_mov.py b/rpython/jit/backend/arm/test/test_regalloc_mov.py
--- a/rpython/jit/backend/arm/test/test_regalloc_mov.py
+++ b/rpython/jit/backend/arm/test/test_regalloc_mov.py
@@ -1,9 +1,10 @@
 from rpython.rlib.objectmodel import instantiate
 from rpython.jit.backend.arm.assembler import AssemblerARM
-from rpython.jit.backend.arm.locations import imm, ConstFloatLoc,\
-                                        RegisterLocation, StackLocation, \
-                                        VFPRegisterLocation, get_fp_offset
-from rpython.jit.backend.arm.registers import lr, ip, fp, vfp_ip
+from rpython.jit.backend.arm.locations import imm, ConstFloatLoc
+from rpython.jit.backend.arm.locations import RegisterLocation, StackLocation
+from rpython.jit.backend.arm.locations import VFPRegisterLocation, get_fp_offset
+from rpython.jit.backend.arm.locations import RawSPStackLocation
+from rpython.jit.backend.arm.registers import lr, ip, fp, vfp_ip, sp
 from rpython.jit.backend.arm.conditions import AL
 from rpython.jit.backend.arm.arch import WORD
 from rpython.jit.metainterp.history import FLOAT
@@ -54,6 +55,12 @@
     addr = int(value)  # whatever
     return ConstFloatLoc(addr)
 
+def raw_stack(i):
+    return RawSPStackLocation(i)
+
+def raw_stack_float(i):
+    return RawSPStackLocation(i, type=FLOAT)
+
 
 class MockBuilder(object):
     def __init__(self):
@@ -79,13 +86,13 @@
         result = self.builder.instrs
         assert result == expected
 
-
-class TestRegallocMov(BaseMovTest):
-
     def mov(self, a, b, expected=None):
         self.asm.regalloc_mov(a, b)
         self.validate(expected)
 
+
+class TestRegallocMov(BaseMovTest):
+
     def test_mov_imm_to_reg(self):
         val = imm(123)
         reg = r(7)
@@ -102,45 +109,37 @@
         val = imm(100)
         s = stack(7)
         expected = [
-                mi('PUSH', [lr.value], cond=AL),
                 mi('gen_load_int', lr.value, 100, cond=AL),
                 mi('STR_ri', lr.value, fp.value, imm=s.value, cond=AL),
-                mi('POP', [lr.value], cond=AL)]
+        ]
         self.mov(val, s, expected)
 
     def test_mov_big_imm_to_stacklock(self):
         val = imm(65536)
         s = stack(7)
         expected = [
-                mi('PUSH', [lr.value], cond=AL),
                 mi('gen_load_int', lr.value, 65536, cond=AL),
                 mi('STR_ri', lr.value, fp.value, imm=s.value, cond=AL),
-                mi('POP', [lr.value], cond=AL)]
-
+                ]
         self.mov(val, s, expected)
 
     def test_mov_imm_to_big_stacklock(self):
         val = imm(100)
         s = stack(8191)
-        expected = [mi('PUSH', [lr.value], cond=AL),
-                    mi('gen_load_int', lr.value, 100, cond=AL),
-                    mi('PUSH', [ip.value], cond=AL),
+        expected = [ mi('gen_load_int', lr.value, 100, cond=AL),
                     mi('gen_load_int', ip.value, s.value, cond=AL),
                     mi('STR_rr', lr.value, fp.value, ip.value, cond=AL),
-                    mi('POP', [ip.value], cond=AL),
-                    mi('POP', [lr.value], cond=AL)]
+                    ]
         self.mov(val, s, expected)
 
     def test_mov_big_imm_to_big_stacklock(self):
         val = imm(65536)
         s = stack(8191)
-        expected = [mi('PUSH', [lr.value], cond=AL),