[pypy-commit] pypy cpyext-ext: merge default into branch

Mon Mar 14 15:42:23 EDT 2016

Author: mattip <matti.picus at gmail.com>
Branch: cpyext-ext
Changeset: r83055:ef1022e1d513
Date: 2016-03-14 21:41 +0200
http://bitbucket.org/pypy/pypy/changeset/ef1022e1d513/

Log:	merge default into branch

diff too long, truncating to 2000 out of 13184 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -74,5 +74,6 @@
 ^rpython/doc/_build/.*$
 ^compiled
 ^.git/
+^.hypothesis/
 ^release/
 ^rpython/_cache$
diff --git a/lib_pypy/ctypes_config_cache/rebuild.py b/lib_pypy/ctypes_config_cache/rebuild.py
--- a/lib_pypy/ctypes_config_cache/rebuild.py
+++ b/lib_pypy/ctypes_config_cache/rebuild.py
@@ -9,9 +9,8 @@
 
 _dirpath = os.path.dirname(__file__) or os.curdir
 
-from rpython.tool.ansi_print import ansi_log
-log = py.log.Producer("ctypes_config_cache")
-py.log.setconsumer("ctypes_config_cache", ansi_log)
+from rpython.tool.ansi_print import AnsiLogger
+log = AnsiLogger("ctypes_config_cache")
 
 
 def rebuild_one(name):
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -76,5 +76,4 @@
 
 * add a tag on the pypy/jitviewer repo that corresponds to pypy release
 * add a tag on the codespeed web site that corresponds to pypy release
-* update the version number in {rpython,pypy}/doc/conf.py.
 * revise versioning at https://readthedocs.org/projects/pypy
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -167,22 +167,13 @@
 * `hg`
 
 
-Embedding PyPy and improving CFFI
----------------------------------
-
-PyPy has some basic :doc:`embedding infrastructure <embedding>`. The idea would be to improve
-upon that with cffi hacks that can automatically generate embeddable .so/.dll
-library
-
-
 Optimising cpyext (CPython C-API compatibility layer)
 -----------------------------------------------------
 
 A lot of work has gone into PyPy's implementation of CPython's C-API over
 the last years to let it reach a practical level of compatibility, so that
 C extensions for CPython work on PyPy without major rewrites. However,
-there are still many edges and corner cases where it misbehaves, and it has
-not received any substantial optimisation so far.
+there are still many edges and corner cases where it misbehaves.
 
 The objective of this project is to fix bugs in cpyext and to optimise
 several performance critical parts of it, such as the reference counting
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,3 +5,21 @@
 .. this is a revision shortly after release-5.0
 .. startrev: b238b48f9138
 
+.. branch: s390x-backend
+
+The jit compiler backend implementation for the s390x architecutre.
+The backend manages 64-bit values in the literal pool of the assembly instead of loading them as immediates.
+It includes a simplification for the operation 'zero_array'. Start and length parameters are bytes instead of size.
+
+.. branch: remove-py-log
+
+Replace py.log with something simpler, which should speed up logging
+
+.. branch: where_1_arg
+
+Implemented numpy.where for 1 argument (thanks sergem)
+
+.. branch: fix_indexing_by_numpy_int
+
+Implement yet another strange numpy indexing compatibility; indexing by a scalar 
+returns a scalar
diff --git a/pypy/interpreter/test/test_app_main.py b/pypy/interpreter/test/test_app_main.py
--- a/pypy/interpreter/test/test_app_main.py
+++ b/pypy/interpreter/test/test_app_main.py
@@ -9,6 +9,11 @@
 from pypy.conftest import pypydir
 from lib_pypy._pypy_interact import irc_header
 
+try:
+    import __pypy__
+except ImportError:
+    __pypy__ = None
+
 banner = sys.version.splitlines()[0]
 
 app_main = os.path.join(os.path.realpath(os.path.dirname(__file__)), os.pardir, 'app_main.py')
@@ -106,6 +111,8 @@
             sys.argv[:] = saved_sys_argv
             sys.stdout = saved_sys_stdout
             sys.stderr = saved_sys_stderr
+            if __pypy__:
+                __pypy__.set_debug(True)
 
     def test_all_combinations_I_can_think_of(self):
         self.check([], {}, sys_argv=[''], run_stdin=True)
@@ -601,9 +608,7 @@
     def run_with_status_code(self, cmdline, senddata='', expect_prompt=False,
             expect_banner=False, python_flags='', env=None):
         if os.name == 'nt':
-            try:
-                import __pypy__
-            except:
+            if __pypy__ is None:
                 py.test.skip('app_main cannot run on non-pypy for windows')
         cmdline = '%s %s "%s" %s' % (sys.executable, python_flags,
                                      app_main, cmdline)
diff --git a/pypy/module/__pypy__/test/test_signal.py b/pypy/module/__pypy__/test/test_signal.py
--- a/pypy/module/__pypy__/test/test_signal.py
+++ b/pypy/module/__pypy__/test/test_signal.py
@@ -2,7 +2,6 @@
 
 from pypy.module.thread.test.support import GenericTestThread
 
-
 class AppTestMinimal:
     spaceconfig = dict(usemodules=['__pypy__'])
 
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -353,10 +353,11 @@
 # ____________________________________________________________
 
 
-rffi_fdopen = rffi.llexternal("fdopen", [rffi.INT, rffi.CCHARP], rffi.CCHARP,
+FILEP = rffi.COpaquePtr("FILE")
+rffi_fdopen = rffi.llexternal("fdopen", [rffi.INT, rffi.CCHARP], FILEP,
                               save_err=rffi.RFFI_SAVE_ERRNO)
-rffi_setbuf = rffi.llexternal("setbuf", [rffi.CCHARP, rffi.CCHARP], lltype.Void)
-rffi_fclose = rffi.llexternal("fclose", [rffi.CCHARP], rffi.INT)
+rffi_setbuf = rffi.llexternal("setbuf", [FILEP, rffi.CCHARP], lltype.Void)
+rffi_fclose = rffi.llexternal("fclose", [FILEP], rffi.INT)
 
 class CffiFileObj(object):
     _immutable_ = True
@@ -382,4 +383,4 @@
             fileobj.cffi_fileobj = CffiFileObj(fd, fileobj.mode)
         except OSError, e:
             raise wrap_oserror(space, e)
-    return fileobj.cffi_fileobj.llf
+    return rffi.cast(rffi.CCHARP, fileobj.cffi_fileobj.llf)
diff --git a/pypy/module/_file/test/test_file_extra.py b/pypy/module/_file/test/test_file_extra.py
--- a/pypy/module/_file/test/test_file_extra.py
+++ b/pypy/module/_file/test/test_file_extra.py
@@ -389,6 +389,7 @@
 
     def test_writelines(self):
         import array
+        import sys
         fn = self.temptestfile
         with file(fn, 'w') as f:
             f.writelines(['abc'])
@@ -406,7 +407,10 @@
             exc = raises(TypeError, f.writelines, [memoryview('jkl')])
             assert str(exc.value) == "writelines() argument must be a sequence of strings"
         out = open(fn, 'rb').readlines()[0]
-        assert out[0:5] == 'abcd\x00'
+        if sys.byteorder == 'big':
+            assert out[0:7] == 'abc\x00\x00\x00d'
+        else:
+            assert out[0:5] == 'abcd\x00'
         assert out[-3:] == 'ghi'
 
         with file(fn, 'wb') as f:
diff --git a/pypy/module/_rawffi/callback.py b/pypy/module/_rawffi/callback.py
--- a/pypy/module/_rawffi/callback.py
+++ b/pypy/module/_rawffi/callback.py
@@ -1,17 +1,23 @@
-
+import sys
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from rpython.rtyper.lltypesystem import lltype, rffi
 from pypy.module._rawffi.interp_rawffi import write_ptr
 from pypy.module._rawffi.structure import W_Structure
 from pypy.module._rawffi.interp_rawffi import (W_DataInstance, letter2tp,
-     unwrap_value, unpack_argshapes, got_libffi_error)
+     unwrap_value, unpack_argshapes, got_libffi_error, is_narrow_integer_type,
+     LL_TYPEMAP, NARROW_INTEGER_TYPES)
 from rpython.rlib.clibffi import USERDATA_P, CallbackFuncPtr, FUNCFLAG_CDECL
 from rpython.rlib.clibffi import ffi_type_void, LibFFIError
 from rpython.rlib import rweakref
 from pypy.module._rawffi.tracker import tracker
 from pypy.interpreter.error import OperationError
 from pypy.interpreter import gateway
+from rpython.rlib.unroll import unrolling_iterable
+
+BIGENDIAN = sys.byteorder == 'big'
+
+unroll_narrow_integer_types = unrolling_iterable(NARROW_INTEGER_TYPES)
 
 app = gateway.applevel('''
     def tbprint(tb, err):
@@ -42,8 +48,17 @@
                 args_w[i] = space.wrap(rffi.cast(rffi.ULONG, ll_args[i]))
         w_res = space.call(w_callable, space.newtuple(args_w))
         if callback_ptr.result is not None: # don't return void
-            unwrap_value(space, write_ptr, ll_res, 0,
-                         callback_ptr.result, w_res)
+            ptr = ll_res
+            letter = callback_ptr.result
+            if BIGENDIAN:
+                # take care of narrow integers!
+                for int_type in unroll_narrow_integer_types:
+                    if int_type == letter:
+                        T = LL_TYPEMAP[int_type]
+                        n = rffi.sizeof(lltype.Signed) - rffi.sizeof(T)
+                        ptr = rffi.ptradd(ptr, n)
+                        break
+            unwrap_value(space, write_ptr, ptr, 0, letter, w_res)
     except OperationError, e:
         tbprint(space, space.wrap(e.get_traceback()),
                 space.wrap(e.errorstr(space)))
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -1,3 +1,4 @@
+import sys
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
 from pypy.interpreter.gateway import interp2app, unwrap_spec
@@ -19,6 +20,8 @@
 from pypy.module._rawffi.buffer import RawFFIBuffer
 from pypy.module._rawffi.tracker import tracker
 
+BIGENDIAN = sys.byteorder == 'big'
+
 TYPEMAP = {
     # XXX A mess with unsigned/signed/normal chars :-/
     'c' : ffi_type_uchar,
@@ -331,10 +334,14 @@
             if tracker.DO_TRACING:
                 ll_buf = rffi.cast(lltype.Signed, self.ll_buffer)
                 tracker.trace_allocation(ll_buf, self)
+        self._ll_buffer = self.ll_buffer
 
     def getbuffer(self, space):
         return space.wrap(rffi.cast(lltype.Unsigned, self.ll_buffer))
 
+    def buffer_advance(self, n):
+        self.ll_buffer = rffi.ptradd(self.ll_buffer, n)
+
     def byptr(self, space):
         from pypy.module._rawffi.array import ARRAY_OF_PTRS
         array = ARRAY_OF_PTRS.allocate(space, 1)
@@ -342,16 +349,17 @@
         return space.wrap(array)
 
     def free(self, space):
-        if not self.ll_buffer:
+        if not self._ll_buffer:
             raise segfault_exception(space, "freeing NULL pointer")
         self._free()
 
     def _free(self):
         if tracker.DO_TRACING:
-            ll_buf = rffi.cast(lltype.Signed, self.ll_buffer)
+            ll_buf = rffi.cast(lltype.Signed, self._ll_buffer)
             tracker.trace_free(ll_buf)
-        lltype.free(self.ll_buffer, flavor='raw')
+        lltype.free(self._ll_buffer, flavor='raw')
         self.ll_buffer = lltype.nullptr(rffi.VOIDP.TO)
+        self._ll_buffer = self.ll_buffer
 
     def buffer_w(self, space, flags):
         return RawFFIBuffer(self)
@@ -432,12 +440,19 @@
                          space.wrap("cannot directly read value"))
 wrap_value._annspecialcase_ = 'specialize:arg(1)'
 
+NARROW_INTEGER_TYPES = 'cbhiBIH?'
+
+def is_narrow_integer_type(letter):
+    return letter in NARROW_INTEGER_TYPES
 
 class W_FuncPtr(W_Root):
     def __init__(self, space, ptr, argshapes, resshape):
         self.ptr = ptr
         self.argshapes = argshapes
         self.resshape = resshape
+        self.narrow_integer = False
+        if resshape is not None:
+            self.narrow_integer = is_narrow_integer_type(resshape.itemcode.lower())
 
     def getbuffer(self, space):
         return space.wrap(rffi.cast(lltype.Unsigned, self.ptr.funcsym))
@@ -497,6 +512,10 @@
                 result = self.resshape.allocate(space, 1, autofree=True)
                 # adjust_return_size() was used here on result.ll_buffer
                 self.ptr.call(args_ll, result.ll_buffer)
+                if BIGENDIAN and self.narrow_integer:
+                    # we get a 8 byte value in big endian
+                    n = rffi.sizeof(lltype.Signed) - result.shape.size
+                    result.buffer_advance(n)
                 return space.wrap(result)
             else:
                 self.ptr.call(args_ll, lltype.nullptr(rffi.VOIDP.TO))
diff --git a/pypy/module/_rawffi/structure.py b/pypy/module/_rawffi/structure.py
--- a/pypy/module/_rawffi/structure.py
+++ b/pypy/module/_rawffi/structure.py
@@ -18,6 +18,9 @@
 from rpython.rlib.rarithmetic import intmask, signedtype, r_uint, \
     r_ulonglong
 from rpython.rtyper.lltypesystem import lltype, rffi
+import sys
+
+IS_BIG_ENDIAN = sys.byteorder == 'big'
 
 
 
@@ -114,20 +117,32 @@
                 size += intmask(fieldsize)
                 bitsizes.append(fieldsize)
             elif field_type == NEW_BITFIELD:
-                bitsizes.append((bitsize << 16) + bitoffset)
+                if IS_BIG_ENDIAN:
+                    off = last_size - bitoffset - bitsize
+                else:
+                    off = bitoffset
+                bitsizes.append((bitsize << 16) + off)
                 bitoffset = bitsize
                 size = round_up(size, fieldalignment)
                 pos.append(size)
                 size += fieldsize
             elif field_type == CONT_BITFIELD:
-                bitsizes.append((bitsize << 16) + bitoffset)
+                if IS_BIG_ENDIAN:
+                    off = last_size - bitoffset - bitsize
+                else:
+                    off = bitoffset
+                bitsizes.append((bitsize << 16) + off)
                 bitoffset += bitsize
                 # offset is already updated for the NEXT field
                 pos.append(size - fieldsize)
             elif field_type == EXPAND_BITFIELD:
                 size += fieldsize - last_size / 8
                 last_size = fieldsize * 8
-                bitsizes.append((bitsize << 16) + bitoffset)
+                if IS_BIG_ENDIAN:
+                    off = last_size - bitoffset - bitsize
+                else:
+                    off = bitoffset
+                bitsizes.append((bitsize << 16) + off)
                 bitoffset += bitsize
                 # offset is already updated for the NEXT field
                 pos.append(size - fieldsize)
diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py
--- a/pypy/module/_rawffi/test/test__rawffi.py
+++ b/pypy/module/_rawffi/test/test__rawffi.py
@@ -704,7 +704,6 @@
         def compare(a, b):
             a1 = _rawffi.Array('i').fromaddress(_rawffi.Array('P').fromaddress(a, 1)[0], 1)
             a2 = _rawffi.Array('i').fromaddress(_rawffi.Array('P').fromaddress(b, 1)[0], 1)
-            print "comparing", a1[0], "with", a2[0]
             if a1[0] not in [1,2,3,4] or a2[0] not in [1,2,3,4]:
                 bogus_args.append((a1[0], a2[0]))
             if a1[0] > a2[0]:
@@ -715,7 +714,7 @@
         a2[0] = len(ll_to_sort)
         a3 = _rawffi.Array('l')(1)
         a3[0] = struct.calcsize('i')
-        cb = _rawffi.CallbackPtr(compare, ['P', 'P'], 'i')
+        cb = _rawffi.CallbackPtr(compare, ['P', 'P'], 'l')
         a4 = cb.byptr()
         qsort(a1, a2, a3, a4)
         res = [ll_to_sort[i] for i in range(len(ll_to_sort))]
@@ -896,11 +895,21 @@
         b = _rawffi.Array('c').fromaddress(a.buffer, 38)
         if sys.maxunicode > 65535:
             # UCS4 build
-            assert b[0] == 'x'
-            assert b[1] == '\x00'
-            assert b[2] == '\x00'
-            assert b[3] == '\x00'
-            assert b[4] == 'y'
+            if sys.byteorder == 'big':
+                assert b[0] == '\x00'
+                assert b[1] == '\x00'
+                assert b[2] == '\x00'
+                assert b[3] == 'x'
+                assert b[4] == '\x00'
+                assert b[5] == '\x00'
+                assert b[6] == '\x00'
+                assert b[7] == 'y'
+            else:
+                assert b[0] == 'x'
+                assert b[1] == '\x00'
+                assert b[2] == '\x00'
+                assert b[3] == '\x00'
+                assert b[4] == 'y'
         else:
             # UCS2 build
             assert b[0] == 'x'
diff --git a/pypy/module/_rawffi/test/test_struct.py b/pypy/module/_rawffi/test/test_struct.py
--- a/pypy/module/_rawffi/test/test_struct.py
+++ b/pypy/module/_rawffi/test/test_struct.py
@@ -1,4 +1,4 @@
-
+import sys
 from pypy.module._rawffi.structure import size_alignment_pos
 from pypy.module._rawffi.interp_rawffi import TYPEMAP, letter2tp
 
@@ -63,4 +63,7 @@
          for (name, t, size) in fields])
     assert size == 8
     assert pos == [0, 0, 0]
-    assert bitsizes == [0x10000, 0x3e0001, 0x1003f]
+    if sys.byteorder == 'little':
+        assert bitsizes == [0x10000, 0x3e0001, 0x1003f]
+    else:
+        assert bitsizes == [0x1003f, 0x3e0001, 0x10000]
diff --git a/pypy/module/_vmprof/conftest.py b/pypy/module/_vmprof/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_vmprof/conftest.py
@@ -0,0 +1,6 @@
+import py, platform
+
+def pytest_collect_directory(path, parent):
+    if platform.machine() == 's390x':
+        py.test.skip("zarch tests skipped")
+pytest_collect_file = pytest_collect_directory
diff --git a/pypy/module/cppyy/src/dummy_backend.cxx b/pypy/module/cppyy/src/dummy_backend.cxx
--- a/pypy/module/cppyy/src/dummy_backend.cxx
+++ b/pypy/module/cppyy/src/dummy_backend.cxx
@@ -390,7 +390,7 @@
         ((dummy::cppyy_test_data*)self)->destroy_arrays();
     } else if (idx == s_methods["cppyy_test_data::set_bool"]) {
         assert(self && nargs == 1);
-        ((dummy::cppyy_test_data*)self)->set_bool((bool)((CPPYY_G__value*)args)[0].obj.in);
+        ((dummy::cppyy_test_data*)self)->set_bool((bool)((CPPYY_G__value*)args)[0].obj.i);
     } else if (idx == s_methods["cppyy_test_data::set_char"]) {
         assert(self && nargs == 1);
         ((dummy::cppyy_test_data*)self)->set_char(((CPPYY_G__value*)args)[0].obj.ch);
diff --git a/pypy/module/cpyext/test/test_arraymodule.py b/pypy/module/cpyext/test/test_arraymodule.py
--- a/pypy/module/cpyext/test/test_arraymodule.py
+++ b/pypy/module/cpyext/test/test_arraymodule.py
@@ -51,13 +51,19 @@
         assert arr.tolist() == [1, 23, 4]
 
     def test_buffer(self):
+        import sys
         module = self.import_module(name='array')
         arr = module.array('i', [1,2,3,4])
         buf = buffer(arr)
         exc = raises(TypeError, "buf[1] = '1'")
         assert str(exc.value) == "buffer is read-only"
-        # XXX big-endian
-        assert str(buf) == ('\x01\0\0\0'
-                            '\x02\0\0\0'
-                            '\x03\0\0\0'
-                            '\x04\0\0\0')
+        if sys.byteorder == 'big':
+            assert str(buf) == ('\0\0\0\x01'
+                                '\0\0\0\x02'
+                                '\0\0\0\x03'
+                                '\0\0\0\x04')
+        else:
+            assert str(buf) == ('\x01\0\0\0'
+                                '\x02\0\0\0'
+                                '\x03\0\0\0'
+                                '\x04\0\0\0')
diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py
--- a/pypy/module/cpyext/test/test_methodobject.py
+++ b/pypy/module/cpyext/test/test_methodobject.py
@@ -103,7 +103,7 @@
         c_func = ApiFunction([PyObject, PyObject], PyObject, func)
         func.api_func = c_func
         ml = lltype.malloc(PyMethodDef, flavor='raw', zero=True)
-        namebuf = rffi.str2charp('func')
+        namebuf = rffi.cast(rffi.CONST_CCHARP, rffi.str2charp('func'))
         ml.c_ml_name = namebuf
         ml.c_ml_meth = rffi.cast(PyCFunction_typedef,
                                  c_func.get_llhelper(space))
diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -729,7 +729,7 @@
                 int intval;
                 PyObject *name;
 
-                if (!PyArg_ParseTuple(args, "i", &intval))
+                if (!PyArg_ParseTuple(args, "l", &intval))
                     return NULL;
 
                 IntLike_Type.tp_flags |= Py_TPFLAGS_DEFAULT;
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -398,11 +398,11 @@
                 lltype.free(pendian, flavor='raw')
 
         test("\x61\x00\x62\x00\x63\x00\x64\x00", -1)
-
-        test("\x61\x00\x62\x00\x63\x00\x64\x00", None)
-
+        if sys.byteorder == 'big':
+            test("\x00\x61\x00\x62\x00\x63\x00\x64", None)
+        else:
+            test("\x61\x00\x62\x00\x63\x00\x64\x00", None)
         test("\x00\x61\x00\x62\x00\x63\x00\x64", 1)
-
         test("\xFE\xFF\x00\x61\x00\x62\x00\x63\x00\x64", 0, 1)
         test("\xFF\xFE\x61\x00\x62\x00\x63\x00\x64\x00", 0, -1)
 
@@ -435,7 +435,10 @@
 
         test("\x61\x00\x00\x00\x62\x00\x00\x00", -1)
 
-        test("\x61\x00\x00\x00\x62\x00\x00\x00", None)
+        if sys.byteorder == 'big':
+            test("\x00\x00\x00\x61\x00\x00\x00\x62", None)
+        else:
+            test("\x61\x00\x00\x00\x62\x00\x00\x00", None)
 
         test("\x00\x00\x00\x61\x00\x00\x00\x62", 1)
 
diff --git a/pypy/module/marshal/test/test_marshalimpl.py b/pypy/module/marshal/test/test_marshalimpl.py
--- a/pypy/module/marshal/test/test_marshalimpl.py
+++ b/pypy/module/marshal/test/test_marshalimpl.py
@@ -64,14 +64,17 @@
     import marshal, struct
 
     class FakeM:
+        # NOTE: marshal is platform independent, running this test must assume
+        # that self.seen gets values from the endianess of the marshal module.
+        # (which is little endian!)
         def __init__(self):
             self.seen = []
         def start(self, code):
             self.seen.append(code)
         def put_int(self, value):
-            self.seen.append(struct.pack("i", value))
+            self.seen.append(struct.pack("<i", value))
         def put_short(self, value):
-            self.seen.append(struct.pack("h", value))
+            self.seen.append(struct.pack("<h", value))
 
     def _marshal_check(x):
         expected = marshal.dumps(long(x))
diff --git a/pypy/module/micronumpy/arrayops.py b/pypy/module/micronumpy/arrayops.py
--- a/pypy/module/micronumpy/arrayops.py
+++ b/pypy/module/micronumpy/arrayops.py
@@ -71,8 +71,8 @@
     """
     if space.is_none(w_y):
         if space.is_none(w_x):
-            raise OperationError(space.w_NotImplementedError, space.wrap(
-                "1-arg where unsupported right now"))
+            arr = convert_to_array(space, w_arr)
+            return arr.descr_nonzero(space)
         raise OperationError(space.w_ValueError, space.wrap(
             "Where should be called with either 1 or 3 arguments"))
     if space.is_none(w_x):
diff --git a/pypy/module/micronumpy/ndarray.py b/pypy/module/micronumpy/ndarray.py
--- a/pypy/module/micronumpy/ndarray.py
+++ b/pypy/module/micronumpy/ndarray.py
@@ -267,6 +267,11 @@
                         "interpreted as a valid boolean index")
         elif isinstance(w_idx, boxes.W_GenericBox):
             w_ret = self.getitem_array_int(space, w_idx)
+
+            if isinstance(w_idx, boxes.W_IntegerBox):
+                # if w_idx is integer then getitem_array_int must contain a single value and we must return it.
+                # Get 0-th element of the w_ret.
+                w_ret = w_ret.implementation.descr_getitem(space, self, space.wrap(0))
         else:
             try:
                 w_ret = self.implementation.descr_getitem(space, self, w_idx)
diff --git a/pypy/module/micronumpy/test/test_arrayops.py b/pypy/module/micronumpy/test/test_arrayops.py
--- a/pypy/module/micronumpy/test/test_arrayops.py
+++ b/pypy/module/micronumpy/test/test_arrayops.py
@@ -54,8 +54,24 @@
         assert (where(False, 1, [1, 2, 3]) == [1, 2, 3]).all()
         assert (where([1, 2, 3], True, False) == [True, True, True]).all()
 
-    #def test_where_1_arg(self):
-    #    xxx
+    def test_where_1_arg(self):
+        from numpy import where, array
+
+        result = where([1,0,1])
+
+        assert isinstance(result, tuple)
+        assert len(result) == 1
+        assert (result[0] == array([0, 2])).all()
+
+    def test_where_1_arg_2d(self):
+        from numpy import where, array
+
+        result = where([[1,0,1],[2,-1,-1]])
+
+        assert isinstance(result, tuple)
+        assert len(result) == 2
+        assert (result[0] == array([0, 0, 1, 1, 1])).all()
+        assert (result[1] == array([0, 2, 0, 1, 2])).all()
 
     def test_where_invalidates(self):
         from numpy import where, ones, zeros, array
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -351,7 +351,10 @@
         assert np.dtype(xyz).name == 'xyz'
         # another obscure API, used in numpy record.py
         a = np.dtype((xyz, [('x', 'int32'), ('y', 'float32')]))
-        assert "[('x', '<i4'), ('y', '<f4')]" in repr(a)
+        if sys.byteorder == 'big':
+            assert "[('x', '>i4'), ('y', '>f4')]" in repr(a)
+        else:
+            assert "[('x', '<i4'), ('y', '<f4')]" in repr(a)
         assert 'xyz' in repr(a)
         data = [(1, 'a'), (2, 'bbb')]
         b = np.dtype((xyz, [('a', int), ('b', object)]))
@@ -361,7 +364,10 @@
             arr = np.array(data, dtype=b)
             assert arr[0][0] == 1
             assert arr[0][1] == 'a'
-        b = np.dtype((xyz, [("col1", "<i4"), ("col2", "<i4"), ("col3", "<i4")]))
+        # NOTE if micronumpy is completed, we might extend this test to check both
+        # "<i4" and ">i4"
+        E = '<' if sys.byteorder == 'little' else '>'
+        b = np.dtype((xyz, [("col1", E+"i4"), ("col2", E+"i4"), ("col3", E+"i4")]))
         data = [(1, 2,3), (4, 5, 6)]
         a = np.array(data, dtype=b)
         x = pickle.loads(pickle.dumps(a))
@@ -423,18 +429,20 @@
         assert hash(t5) != hash(t6)
 
     def test_pickle(self):
+        import sys
         import numpy as np
         from numpy import array, dtype
         from cPickle import loads, dumps
         a = array([1,2,3])
+        E = '<' if sys.byteorder == 'little' else '>'
         if self.ptr_size == 8:
-            assert a.dtype.__reduce__() == (dtype, ('i8', 0, 1), (3, '<', None, None, None, -1, -1, 0))
+            assert a.dtype.__reduce__() == (dtype, ('i8', 0, 1), (3, E, None, None, None, -1, -1, 0))
         else:
-            assert a.dtype.__reduce__() == (dtype, ('i4', 0, 1), (3, '<', None, None, None, -1, -1, 0))
+            assert a.dtype.__reduce__() == (dtype, ('i4', 0, 1), (3, E, None, None, None, -1, -1, 0))
         assert loads(dumps(a.dtype)) == a.dtype
         assert np.dtype('bool').__reduce__() == (dtype, ('b1', 0, 1), (3, '|', None, None, None, -1, -1, 0))
         assert np.dtype('|V16').__reduce__() == (dtype, ('V16', 0, 1), (3, '|', None, None, None, 16, 1, 0))
-        assert np.dtype(('<f8', 2)).__reduce__() == (dtype, ('V16', 0, 1), (3, '|', (dtype('float64'), (2,)), None, None, 16, 8, 0))
+        assert np.dtype((E+'f8', 2)).__reduce__() == (dtype, ('V16', 0, 1), (3, '|', (dtype('float64'), (2,)), None, None, 16, 8, 0))
 
     def test_newbyteorder(self):
         import numpy as np
@@ -916,6 +924,7 @@
 
     def test_dtype_str(self):
         from numpy import dtype
+        import sys
         byteorder = self.native_prefix
         assert dtype('i8').str == byteorder + 'i8'
         assert dtype('<i8').str == '<i8'
@@ -937,7 +946,8 @@
         assert dtype('unicode').str == byteorder + 'U0'
         assert dtype(('string', 7)).str == '|S7'
         assert dtype('=S5').str == '|S5'
-        assert dtype(('unicode', 7)).str == '<U7'
+        assert dtype(('unicode', 7)).str == \
+               ('<' if sys.byteorder == 'little' else '>')+'U7'
         assert dtype([('', 'f8')]).str == "|V8"
         assert dtype(('f8', 2)).str == "|V16"
 
@@ -968,8 +978,12 @@
 
     def test_isnative(self):
         from numpy import dtype
+        import sys
         assert dtype('i4').isnative == True
-        assert dtype('>i8').isnative == False
+        if sys.byteorder == 'big':
+            assert dtype('<i8').isnative == False
+        else:
+            assert dtype('>i8').isnative == False
 
     def test_any_all_nonzero(self):
         import numpy
@@ -1185,6 +1199,7 @@
     def test_setstate(self):
         import numpy as np
         import sys
+        E = '<' if sys.byteorder == 'little' else '>'
         d = np.dtype('f8')
         d.__setstate__((3, '|', (np.dtype('float64'), (2,)), None, None, 20, 1, 0))
         assert d.str == ('<' if sys.byteorder == 'little' else '>') + 'f8'
@@ -1201,7 +1216,7 @@
         assert d.shape == (2,)
         assert d.itemsize == 8
         assert d.subdtype is not None
-        assert repr(d) == "dtype(('<f8', (2,)))"
+        assert repr(d) == "dtype(('{E}f8', (2,)))".format(E=E)
 
         d = np.dtype(('<f8', 2))
         assert d.fields is None
@@ -1216,7 +1231,7 @@
         assert d.shape == (2,)
         assert d.itemsize == 20
         assert d.subdtype is not None
-        assert repr(d) == "dtype(('<f8', (2,)))"
+        assert repr(d) == "dtype(('{E}f8', (2,)))".format(E=E)
 
         d = np.dtype(('<f8', 2))
         d.__setstate__((3, '|', (np.dtype('float64'), 2), None, None, 20, 1, 0))
@@ -1224,7 +1239,7 @@
         assert d.shape == (2,)
         assert d.itemsize == 20
         assert d.subdtype is not None
-        assert repr(d) == "dtype(('<f8', (2,)))"
+        assert repr(d) == "dtype(('{E}f8', (2,)))".format(E=E)
 
         d = np.dtype(('<f8', 2))
         exc = raises(ValueError, "d.__setstate__((3, '|', None, ('f0', 'f1'), None, 16, 1, 0))")
@@ -1256,14 +1271,14 @@
         assert d.fields is not None
         assert d.shape == (2,)
         assert d.subdtype is not None
-        assert repr(d) == "dtype([('f0', '<f8'), ('f1', '<f8')])"
+        assert repr(d) == "dtype([('f0', '{E}f8'), ('f1', '{E}f8')])".format(E=E)
 
         d = np.dtype(('<f8', 2))
         d.__setstate__((3, '|', None, ('f0', 'f1'), {'f0': (np.dtype('float64'), 0), 'f1': (np.dtype('float64'), 8)}, 16, 1, 0))
         assert d.fields is not None
         assert d.shape == ()
         assert d.subdtype is None
-        assert repr(d) == "dtype([('f0', '<f8'), ('f1', '<f8')])"
+        assert repr(d) == "dtype([('f0', '{E}f8'), ('f1', '{E}f8')])".format(E=E)
 
         d = np.dtype(('<f8', 2))
         d.__setstate__((3, '|', None, ('f0', 'f1'), {'f0': (np.dtype('float64'), 0), 'f1': (np.dtype('float64'), 8)}, 16, 1, 0))
@@ -1271,7 +1286,7 @@
         assert d.fields is not None
         assert d.shape == (2,)
         assert d.subdtype is not None
-        assert repr(d) == "dtype([('f0', '<f8'), ('f1', '<f8')])"
+        assert repr(d) == "dtype([('f0', '{E}f8'), ('f1', '{E}f8')])".format(E=E)
 
     def test_pickle_record(self):
         from numpy import array, dtype
@@ -1317,6 +1332,7 @@
         raises(ValueError, np.dtype, [('a', 'f4', (-1, -1))])
 
     def test_aligned_size(self):
+        import sys
         import numpy as np
         if self.test_for_core_internal:
             try:
@@ -1335,9 +1351,10 @@
         dt = np.dtype({'f0': ('i4', 0), 'f1':('u1', 4)}, align=True)
         assert dt.itemsize == 8
         assert dt.alignment == 4
-        assert str(dt) == "{'names':['f0','f1'], 'formats':['<i4','u1'], 'offsets':[0,4], 'itemsize':8, 'aligned':True}"
+        E = '<' if sys.byteorder == 'little' else '>'
+        assert str(dt) == "{'names':['f0','f1'], 'formats':['%si4','u1'], 'offsets':[0,4], 'itemsize':8, 'aligned':True}" % E
         dt = np.dtype([('f1', 'u1'), ('f0', 'i4')], align=True)
-        assert str(dt) == "{'names':['f1','f0'], 'formats':['u1','<i4'], 'offsets':[0,4], 'itemsize':8, 'aligned':True}"
+        assert str(dt) == "{'names':['f1','f0'], 'formats':['u1','%si4'], 'offsets':[0,4], 'itemsize':8, 'aligned':True}" % E
         # Nesting should preserve that alignment
         dt1 = np.dtype([('f0', 'i4'),
                        ('f1', [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')]),
@@ -1357,12 +1374,12 @@
         assert dt3.itemsize == 20
         assert dt1 == dt2
         answer = "{'names':['f0','f1','f2'], " + \
-                    "'formats':['<i4',{'names':['f1','f2','f3'], " + \
-                                      "'formats':['i1','<i4','i1'], " + \
+                    "'formats':['%si4',{'names':['f1','f2','f3'], " + \
+                                      "'formats':['i1','%si4','i1'], " + \
                                       "'offsets':[0,4,8], 'itemsize':12}," + \
                                  "'i1'], " + \
                     "'offsets':[0,4,16], 'itemsize':20, 'aligned':True}"
-        assert str(dt3) == answer
+        assert str(dt3) == answer % (E,E)
         assert dt2 == dt3
         # Nesting should preserve packing
         dt1 = np.dtype([('f0', 'i4'),
diff --git a/pypy/module/micronumpy/test/test_ndarray.py b/pypy/module/micronumpy/test/test_ndarray.py
--- a/pypy/module/micronumpy/test/test_ndarray.py
+++ b/pypy/module/micronumpy/test/test_ndarray.py
@@ -1791,6 +1791,7 @@
 
     def test_scalar_view(self):
         from numpy import array
+        import sys
         a = array(3, dtype='int32')
         b = a.view(dtype='float32')
         assert b.shape == ()
@@ -1799,17 +1800,27 @@
         assert exc.value[0] == "new type not compatible with array."
         exc = raises(TypeError, a.view, 'string')
         assert exc.value[0] == "data-type must not be 0-sized"
-        assert a.view('S4') == '\x03'
+        if sys.byteorder == 'big':
+            assert a.view('S4') == '\x00\x00\x00\x03'
+        else:
+            assert a.view('S4') == '\x03'
         a = array('abc1', dtype='c')
         assert (a == ['a', 'b', 'c', '1']).all()
         assert a.view('S4') == 'abc1'
         b = a.view([('a', 'i2'), ('b', 'i2')])
         assert b.shape == (1,)
-        assert b[0][0] == 25185
-        assert b[0][1] == 12643
+        if sys.byteorder == 'big':
+            assert b[0][0] == 0x6162
+            assert b[0][1] == 0x6331
+        else:
+            assert b[0][0] == 25185
+            assert b[0][1] == 12643
         a = array([(1, 2)], dtype=[('a', 'int64'), ('b', 'int64')])[0]
         assert a.shape == ()
-        assert a.view('S16') == '\x01' + '\x00' * 7 + '\x02'
+        if sys.byteorder == 'big':
+            assert a.view('S16') == '\x00' * 7 + '\x01' + '\x00' * 7 + '\x02'
+        else:
+            assert a.view('S16') == '\x01' + '\x00' * 7 + '\x02'
         a = array(2, dtype='<i8')
         b = a.view('<c8')
         assert 0 < b.real < 1
@@ -1818,15 +1829,23 @@
 
     def test_array_view(self):
         from numpy import array, dtype
+        import sys
         x = array((1, 2), dtype='int8')
         assert x.shape == (2,)
         y = x.view(dtype='int16')
         assert x.shape == (2,)
-        assert y[0] == 513
+        if sys.byteorder == 'big':
+            assert y[0] == 0x0102
+        else:
+            assert y[0] == 513 == 0x0201
         assert y.dtype == dtype('int16')
         y[0] = 670
-        assert x[0] == -98
-        assert x[1] == 2
+        if sys.byteorder == 'little':
+            assert x[0] == -98
+            assert x[1] == 2
+        else:
+            assert x[0] == 2
+            assert x[1] == -98
         f = array([1000, -1234], dtype='i4')
         nnp = self.non_native_prefix
         d = f.view(dtype=nnp + 'i4')
@@ -1847,7 +1866,10 @@
         assert x.view('S4')[0] == 'abc'
         assert x.view('S4')[1] == 'defg'
         a = array([(1, 2)], dtype=[('a', 'int64'), ('b', 'int64')])
-        assert a.view('S16')[0] == '\x01' + '\x00' * 7 + '\x02'
+        if sys.byteorder == 'big':
+            assert a.view('S16')[0] == '\x00' * 7 + '\x01' + '\x00' * 7 + '\x02'
+        else:
+            assert a.view('S16')[0] == '\x01' + '\x00' * 7 + '\x02'
 
     def test_half_conversions(self):
         from numpy import array, arange
@@ -2425,11 +2447,16 @@
         from numpy import array
         import sys
         a = array([1, 2, 3, 4], dtype='i4')
-        assert a.data[0] == '\x01'
+        assert a.data[0] == ('\x01' if sys.byteorder == 'little' else '\x00')
         assert a.data[1] == '\x00'
-        assert a.data[4] == '\x02'
-        a.data[4] = '\xff'
-        assert a[1] == 0xff
+        assert a.data[3] == ('\x00' if sys.byteorder == 'little' else '\x01')
+        assert a.data[4] == ('\x02' if sys.byteorder == 'little' else '\x00')
+        a.data[4] = '\x7f'
+        if sys.byteorder == 'big':
+            a.data[7] = '\x00' # make sure 0x02 is reset to 0
+            assert a[1] == (0x7f000000)
+        else:
+            assert a[1] == 0x7f
         assert len(a.data) == 16
         assert type(a.data) is buffer
         if '__pypy__' in sys.builtin_module_names:
@@ -2501,12 +2528,17 @@
     def test__reduce__(self):
         from numpy import array, dtype
         from cPickle import loads, dumps
+        import sys
 
         a = array([1, 2], dtype="int64")
         data = a.__reduce__()
 
-        assert data[2][4] == '\x01\x00\x00\x00\x00\x00\x00\x00' \
-                             '\x02\x00\x00\x00\x00\x00\x00\x00'
+        if sys.byteorder == 'big':
+            assert data[2][4] == '\x00\x00\x00\x00\x00\x00\x00\x01' \
+                                 '\x00\x00\x00\x00\x00\x00\x00\x02'
+        else:
+            assert data[2][4] == '\x01\x00\x00\x00\x00\x00\x00\x00' \
+                                 '\x02\x00\x00\x00\x00\x00\x00\x00'
 
         pickled_data = dumps(a)
         assert (loads(pickled_data) == a).all()
@@ -2639,12 +2671,16 @@
     def test_ndarray_from_buffer(self):
         import numpy as np
         import array
+        import sys
         buf = array.array('c', ['\x00']*2*3)
         a = np.ndarray((3,), buffer=buf, dtype='i2')
         a[0] = ord('b')
         a[1] = ord('a')
         a[2] = ord('r')
-        assert list(buf) == ['b', '\x00', 'a', '\x00', 'r', '\x00']
+        if sys.byteorder == 'big':
+            assert list(buf) == ['\x00', 'b', '\x00', 'a', '\x00', 'r']
+        else:
+            assert list(buf) == ['b', '\x00', 'a', '\x00', 'r', '\x00']
         assert a.base is buf
 
     def test_ndarray_subclass_from_buffer(self):
@@ -2659,13 +2695,17 @@
     def test_ndarray_from_buffer_and_offset(self):
         import numpy as np
         import array
+        import sys
         buf = array.array('c', ['\x00']*7)
         buf[0] = 'X'
         a = np.ndarray((3,), buffer=buf, offset=1, dtype='i2')
         a[0] = ord('b')
         a[1] = ord('a')
         a[2] = ord('r')
-        assert list(buf) == ['X', 'b', '\x00', 'a', '\x00', 'r', '\x00']
+        if sys.byteorder == 'big':
+            assert list(buf) == ['X', '\x00', 'b', '\x00', 'a', '\x00', 'r']
+        else:
+            assert list(buf) == ['X', 'b', '\x00', 'a', '\x00', 'r', '\x00']
 
     def test_ndarray_from_buffer_out_of_bounds(self):
         import numpy as np
@@ -3397,6 +3437,21 @@
         a.itemset(1, 2, 100)
         assert a[1, 2] == 100
 
+    def test_index_int(self):
+        import numpy as np
+        a = np.array([10, 20, 30])
+        res = a[np.int64(1)]
+        assert isinstance(res, np.int64)
+        assert res == 20
+        res = a[np.int32(0)]
+        assert isinstance(res, np.int64)
+        assert res == 10
+
+        b = a.astype(float)
+        res = b[np.int64(1)]
+        assert res == 20.0
+        assert isinstance(res, np.float64)
+
     def test_index(self):
         import numpy as np
         a = np.array([1], np.uint16)
@@ -3408,6 +3463,7 @@
             assert exc.value.message == 'only integer arrays with one element ' \
                                         'can be converted to an index'
 
+
     def test_int_array_index(self):
         from numpy import array
         assert (array([])[[]] == []).all()
@@ -3501,7 +3557,11 @@
         BaseNumpyAppTest.setup_class.im_func(cls)
         cls.w_data = cls.space.wrap(struct.pack('dddd', 1, 2, 3, 4))
         cls.w_fdata = cls.space.wrap(struct.pack('f', 2.3))
-        cls.w_float16val = cls.space.wrap('\x00E') # 5.0 in float16
+        import sys
+        if sys.byteorder == 'big':
+            cls.w_float16val = cls.space.wrap('E\x00') # 5.0 in float16
+        else:
+            cls.w_float16val = cls.space.wrap('\x00E') # 5.0 in float16
         cls.w_float32val = cls.space.wrap(struct.pack('f', 5.2))
         cls.w_float64val = cls.space.wrap(struct.pack('d', 300.4))
         cls.w_ulongval = cls.space.wrap(struct.pack('L', 12))
@@ -3609,9 +3669,15 @@
         assert (t == []).all()
         u = fromstring("\x01\x00\x00\x00\x00\x00\x00\x00", dtype=int)
         if sys.maxint > 2 ** 31 - 1:
-            assert (u == [1]).all()
+            if sys.byteorder == 'big':
+                assert (u == [0x0100000000000000]).all()
+            else:
+                assert (u == [1]).all()
         else:
-            assert (u == [1, 0]).all()
+            if sys.byteorder == 'big':
+                assert (u == [0x01000000, 0]).all()
+            else:
+                assert (u == [1, 0]).all()
         v = fromstring("abcd", dtype="|S2")
         assert v[0] == "ab"
         assert v[1] == "cd"
@@ -3668,9 +3734,15 @@
         k = fromstring(self.float16val, dtype='float16')
         assert k[0] == dtype('float16').type(5.)
         dt =  array([5], dtype='longfloat').dtype
+        print(dt.itemsize)
         if dt.itemsize == 8:
-            m = fromstring('\x00\x00\x00\x00\x00\x00\x14@',
-                           dtype='float64')
+            import sys
+            if sys.byteorder == 'big':
+                m = fromstring('@\x14\x00\x00\x00\x00\x00\x00',
+                               dtype='float64')
+            else:
+                m = fromstring('\x00\x00\x00\x00\x00\x00\x14@',
+                               dtype='float64')
         elif dt.itemsize == 12:
             m = fromstring('\x00\x00\x00\x00\x00\x00\x00\xa0\x01@\x00\x00',
                            dtype='float96')
@@ -3692,8 +3764,13 @@
 
     def test_tostring(self):
         from numpy import array
-        assert array([1, 2, 3], 'i2').tostring() == '\x01\x00\x02\x00\x03\x00'
-        assert array([1, 2, 3], 'i2')[::2].tostring() == '\x01\x00\x03\x00'
+        import sys
+        if sys.byteorder == 'big':
+            assert array([1, 2, 3], 'i2').tostring() == '\x00\x01\x00\x02\x00\x03'
+            assert array([1, 2, 3], 'i2')[::2].tostring() == '\x00\x01\x00\x03'
+        else:
+            assert array([1, 2, 3], 'i2').tostring() == '\x01\x00\x02\x00\x03\x00'
+            assert array([1, 2, 3], 'i2')[::2].tostring() == '\x01\x00\x03\x00'
         assert array([1, 2, 3], '<i2')[::2].tostring() == '\x01\x00\x03\x00'
         assert array([1, 2, 3], '>i2')[::2].tostring() == '\x00\x01\x00\x03'
         assert array(0, dtype='i2').tostring() == '\x00\x00'
@@ -4189,7 +4266,11 @@
         v = a.view(('float32', 4))
         assert v.dtype == np.dtype('float32')
         assert v.shape == (10, 4)
-        assert v[0][-1] == 2.53125
+        import sys
+        if sys.byteorder == 'big':
+            assert v[0][-2] == 2.53125
+        else:
+            assert v[0][-1] == 2.53125
         exc = raises(ValueError, "a.view(('float32', 2))")
         assert exc.value[0] == 'new type not compatible with array.'
 
diff --git a/pypy/module/micronumpy/test/test_scalar.py b/pypy/module/micronumpy/test/test_scalar.py
--- a/pypy/module/micronumpy/test/test_scalar.py
+++ b/pypy/module/micronumpy/test/test_scalar.py
@@ -109,6 +109,7 @@
 
     def test_pickle(self):
         from numpy import dtype, zeros
+        import sys
         try:
             from numpy.core.multiarray import scalar
         except ImportError:
@@ -119,9 +120,11 @@
         f = dtype('float64').type(13.37)
         c = dtype('complex128').type(13 + 37.j)
 
-        assert i.__reduce__() == (scalar, (dtype('int32'), '9\x05\x00\x00'))
-        assert f.__reduce__() == (scalar, (dtype('float64'), '=\n\xd7\xa3p\xbd*@'))
-        assert c.__reduce__() == (scalar, (dtype('complex128'), '\x00\x00\x00\x00\x00\x00*@\x00\x00\x00\x00\x00\x80B@'))
+        swap = lambda s: (''.join(reversed(s))) if sys.byteorder == 'big' else s
+        assert i.__reduce__() == (scalar, (dtype('int32'), swap('9\x05\x00\x00')))
+        assert f.__reduce__() == (scalar, (dtype('float64'), swap('=\n\xd7\xa3p\xbd*@')))
+        assert c.__reduce__() == (scalar, (dtype('complex128'), swap('\x00\x00\x00\x00\x00\x00*@') + \
+                                                                swap('\x00\x00\x00\x00\x00\x80B@')))
 
         assert loads(dumps(i)) == i
         assert loads(dumps(f)) == f
@@ -256,13 +259,20 @@
         assert t < 7e-323
         t = s.view('complex64')
         assert type(t) is np.complex64
-        assert 0 < t.real < 1
-        assert t.imag == 0
+        if sys.byteorder == 'big':
+            assert 0 < t.imag < 1
+            assert t.real == 0
+        else:
+            assert 0 < t.real < 1
+            assert t.imag == 0
         exc = raises(TypeError, s.view, 'string')
         assert exc.value[0] == "data-type must not be 0-sized"
         t = s.view('S8')
         assert type(t) is np.string_
-        assert t == '\x0c'
+        if sys.byteorder == 'big':
+            assert t == '\x00' * 7 + '\x0c'
+        else:
+            assert t == '\x0c'
         s = np.dtype('string').type('abc1')
         assert s.view('S4') == 'abc1'
         if '__pypy__' in sys.builtin_module_names:
diff --git a/pypy/module/micronumpy/test/test_selection.py b/pypy/module/micronumpy/test/test_selection.py
--- a/pypy/module/micronumpy/test/test_selection.py
+++ b/pypy/module/micronumpy/test/test_selection.py
@@ -327,10 +327,15 @@
 # tests from numpy/core/tests/test_regression.py
     def test_sort_bigendian(self):
         from numpy import array, dtype
-        a = array(range(11), dtype='float64')
-        c = a.astype(dtype('<f8'))
-        c.sort()
-        assert max(abs(a-c)) < 1e-32
+        import sys
+
+        # little endian sorting for big endian machine
+        # is not yet supported! IMPL ME
+        if sys.byteorder == 'little':
+            a = array(range(11), dtype='float64')
+            c = a.astype(dtype('<f8'))
+            c.sort()
+            assert max(abs(a-c)) < 1e-32
 
     def test_string_argsort_with_zeros(self):
         import numpy as np
diff --git a/pypy/module/micronumpy/test/test_subtype.py b/pypy/module/micronumpy/test/test_subtype.py
--- a/pypy/module/micronumpy/test/test_subtype.py
+++ b/pypy/module/micronumpy/test/test_subtype.py
@@ -478,6 +478,7 @@
                 (version, shp, typ, isf, raw) = state
                 ndarray.__setstate__(self, (shp, typ, isf, raw))
 
+        E = '<' if sys.byteorder == 'little' else '>'
         D.__module__ = 'mod'
         mod = new.module('mod')
         mod.D = D
@@ -510,7 +511,7 @@
             tp9
             Rp10
             (I3
-            S'<'
+            S'{E}'
             p11
             NNNI-1
             I-1
@@ -520,7 +521,7 @@
             S'\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00@'
             p13
             tp14
-            b.'''.replace('            ','')
+            b.'''.replace('            ','').format(E=E)
         for ss,sn in zip(s.split('\n')[1:],s_from_numpy.split('\n')[1:]):
             if len(ss)>10:
                 # ignore binary data, it will be checked later
diff --git a/pypy/module/pypyjit/test_pypy_c/test_buffers.py b/pypy/module/pypyjit/test_pypy_c/test_buffers.py
--- a/pypy/module/pypyjit/test_pypy_c/test_buffers.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_buffers.py
@@ -34,7 +34,7 @@
             i = 0
             while i < n:
                 i += 1
-                struct.unpack('i', a)  # ID: unpack
+                struct.unpack('<i', a)  # ID: unpack
             return i
         log = self.run(main, [1000])
         assert log.result == 1000
diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
--- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
@@ -1,5 +1,5 @@
 import py
-
+import sys
 from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 from rpython.rlib.rawstorage import misaligned_is_fine
 
@@ -99,6 +99,10 @@
         assert log.result is False
         assert len(log.loops) == 1
         loop = log._filter(log.loops[0])
+        if sys.byteorder == 'big':
+            bit = ord('>')
+        else:
+            bit = ord('<')
         assert loop.match("""
             guard_class(p1, #, descr=...)
             p4 = getfield_gc_r(p1, descr=<FieldP pypy.module.micronumpy.iterators.ArrayIter.inst_array \d+ pure>)
@@ -109,7 +113,7 @@
             i9 = getfield_gc_i(p4, descr=<FieldU pypy.module.micronumpy.concrete.BaseConcreteArray.inst_storage \d+ pure>)
             i10 = getfield_gc_i(p6, descr=<FieldU pypy.module.micronumpy.descriptor.W_Dtype.inst_byteorder \d+ pure>)
             i12 = int_eq(i10, 61)
-            i14 = int_eq(i10, 60)
+            i14 = int_eq(i10, %d)
             i15 = int_or(i12, i14)
             f16 = raw_load_f(i9, i5, descr=<ArrayF \d+>)
             guard_true(i15, descr=...)
@@ -142,7 +146,7 @@
             setfield_gc(p34, i30, descr=<FieldS pypy.module.micronumpy.iterators.IterState.inst_offset \d+>)
             }}}
             jump(..., descr=...)
-        """)
+        """ % (bit,))
 
     def test_reduce_logical_and(self):
         def main():
diff --git a/pypy/module/pypyjit/test_pypy_c/test_struct.py b/pypy/module/pypyjit/test_pypy_c/test_struct.py
--- a/pypy/module/pypyjit/test_pypy_c/test_struct.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_struct.py
@@ -19,8 +19,8 @@
             import struct
             i = 1
             while i < n:
-                buf = struct.pack("i", i)       # ID: pack
-                x = struct.unpack("i", buf)[0]  # ID: unpack
+                buf = struct.pack("<i", i)       # ID: pack
+                x = struct.unpack("<i", buf)[0]  # ID: unpack
                 i += x / i
             return i
 
@@ -43,20 +43,36 @@
             i20 = int_and(i19, 255)
         """ % extra)
 
-        # the newstr and the strsetitems are because the string is forced,
-        # which is in turn because the optimizer doesn't know how to handle a
-        # gc_load_indexed_i on a virtual string. It could be improved, but it
-        # is also true that in real life cases struct.unpack is called on
-        # strings which come from the outside, so it's a minor issue.
-        assert loop.match_by_id("unpack", """
-            # struct.unpack
-            p88 = newstr(4)
-            strsetitem(p88, 0, i11)
-            strsetitem(p88, 1, i14)
-            strsetitem(p88, 2, i17)
-            strsetitem(p88, 3, i20)
-            i91 = gc_load_indexed_i(p88, 0, 1, _, -4)
-        """)
+        if sys.byteorder == 'little':
+            # the newstr and the strsetitems are because the string is forced,
+            # which is in turn because the optimizer doesn't know how to handle a
+            # gc_load_indexed_i on a virtual string. It could be improved, but it
+            # is also true that in real life cases struct.unpack is called on
+            # strings which come from the outside, so it's a minor issue.
+            assert loop.match_by_id("unpack", """
+                # struct.unpack
+                p88 = newstr(4)
+                strsetitem(p88, 0, i11)
+                strsetitem(p88, 1, i14)
+                strsetitem(p88, 2, i17)
+                strsetitem(p88, 3, i20)
+                i91 = gc_load_indexed_i(p88, 0, 1, _, -4)
+            """)
+        else:
+            # on a big endian machine we cannot just write into
+            # a char buffer and then use load gc to read the integer,
+            # here manual shifting is applied
+            assert loop.match_by_id("unpack", """
+                # struct.unpack
+                i95 = int_lshift(i90, 8)
+                i96 = int_or(i88, i95)
+                i97 = int_lshift(i92, 16)
+                i98 = int_or(i96, i97)
+                i99 = int_ge(i94, 128)
+                guard_false(i99, descr=...)
+                i100 = int_lshift(i94, 24)
+                i101 = int_or(i98, i100)
+            """)
 
     def test_struct_object(self):
         def main(n):
@@ -72,31 +88,32 @@
         log = self.run(main, [1000])
         assert log.result == main(1000)
 
-        loop, = log.loops_by_filename(self.filepath)
-        assert loop.match_by_id('pack', """
-            guard_not_invalidated(descr=...)
-            # struct.pack
-            %s
-            i11 = int_and(i4, 255)
-            i13 = int_rshift(i4, 8)
-            i14 = int_and(i13, 255)
-            i16 = int_rshift(i13, 8)
-            i17 = int_and(i16, 255)
-            i19 = int_rshift(i16, 8)
-            i20 = int_and(i19, 255)
-        """ % extra)
+        if sys.byteorder == 'little':
+            loop, = log.loops_by_filename(self.filepath)
+            assert loop.match_by_id('pack', """
+                guard_not_invalidated(descr=...)
+                # struct.pack
+                %s
+                i11 = int_and(i4, 255)
+                i13 = int_rshift(i4, 8)
+                i14 = int_and(i13, 255)
+                i16 = int_rshift(i13, 8)
+                i17 = int_and(i16, 255)
+                i19 = int_rshift(i16, 8)
+                i20 = int_and(i19, 255)
+            """ % extra)
 
-        assert loop.match_by_id('unpack', """
-            # struct.unpack
-            p88 = newstr(8)
-            strsetitem(p88, 0, 255)
-            strsetitem(p88, 1, 255)
-            strsetitem(p88, 2, 255)
-            strsetitem(p88, 3, 255)
-            strsetitem(p88, 4, i11)
-            strsetitem(p88, 5, i14)
-            strsetitem(p88, 6, i17)
-            strsetitem(p88, 7, i20)
-            i90 = gc_load_indexed_i(p88, 0, 1, _, -4)
-            i91 = gc_load_indexed_i(p88, 4, 1, _, -4)
-        """)
+            assert loop.match_by_id('unpack', """
+                # struct.unpack
+                p88 = newstr(8)
+                strsetitem(p88, 0, 255)
+                strsetitem(p88, 1, 255)
+                strsetitem(p88, 2, 255)
+                strsetitem(p88, 3, 255)
+                strsetitem(p88, 4, i11)
+                strsetitem(p88, 5, i14)
+                strsetitem(p88, 6, i17)
+                strsetitem(p88, 7, i20)
+                i90 = gc_load_indexed_i(p88, 0, 1, _, -4)
+                i91 = gc_load_indexed_i(p88, 4, 1, _, -4)
+            """)
diff --git a/pypy/module/select/test/test_select.py b/pypy/module/select/test/test_select.py
--- a/pypy/module/select/test/test_select.py
+++ b/pypy/module/select/test/test_select.py
@@ -287,7 +287,8 @@
             t = thread.start_new_thread(pollster.poll, ())
             try:
                 time.sleep(0.3)
-                for i in range(5): print '',  # to release GIL untranslated
+                # TODO restore print '', if this is not the reason
+                for i in range(5): print 'release gil select'  # to release GIL untranslated
                 # trigger ufds array reallocation
                 for fd in rfds:
                     pollster.unregister(fd)
@@ -328,6 +329,10 @@
         "usemodules": ["select", "_socket", "time", "thread"],
     }
 
+    import os
+    if os.uname()[4] == 's390x':
+        py.test.skip("build bot for s390x cannot open sockets")
+
     def w_make_server(self):
         import socket
         if hasattr(self, 'sock'):
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py b/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_unions.py
@@ -1,4 +1,4 @@
-
+import sys
 from ctypes import *
 from support import BaseCTypesTestChecker
 
@@ -8,8 +8,11 @@
             _fields_ = [('x', c_char), ('y', c_int)]
 
         stuff = Stuff()
-        stuff.y = ord('x')
-        assert stuff.x == 'x'
+        stuff.y = ord('x') | (ord('z') << 24)
+        if sys.byteorder == 'little':
+            assert stuff.x == 'x'
+        else:
+            assert stuff.x == 'z'
 
     def test_union_of_structures(self):
         class Stuff(Structure):
diff --git a/pypy/module/thread/test/test_lock.py b/pypy/module/thread/test/test_lock.py
--- a/pypy/module/thread/test/test_lock.py
+++ b/pypy/module/thread/test/test_lock.py
@@ -3,6 +3,7 @@
 import sys, os
 from pypy.module.thread.test.support import GenericTestThread
 from rpython.translator.c.test.test_genc import compile
+import platform
 
 
 class AppTestLock(GenericTestThread):
@@ -63,6 +64,8 @@
         else:
             assert self.runappdirect, "missing lock._py3k_acquire()"
 
+    @py.test.mark.xfail(platform.machine() == 's390x',
+                        reason='may fail this test under heavy load')
     def test_ping_pong(self):
         # The purpose of this test is that doing a large number of ping-pongs
         # between two threads, using locks, should complete in a reasonable
diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -609,13 +609,16 @@
     def descr_startswith(self, space, w_prefix, w_start=None, w_end=None):
         (value, start, end) = self._convert_idx_params(space, w_start, w_end)
         if space.isinstance_w(w_prefix, space.w_tuple):
-            for w_prefix in space.fixedview(w_prefix):
-                if self._startswith(space, value, w_prefix, start, end):
-                    return space.w_True
-            return space.w_False
+            return self._startswith_tuple(space, value, w_prefix, start, end)
         return space.newbool(self._startswith(space, value, w_prefix, start,
                                               end))
 
+    def _startswith_tuple(self, space, value, w_prefix, start, end):
+        for w_prefix in space.fixedview(w_prefix):
+            if self._startswith(space, value, w_prefix, start, end):
+                return space.w_True
+        return space.w_False
+
     def _startswith(self, space, value, w_prefix, start, end):
         prefix = self._op_val(space, w_prefix)
         if start > len(value):
@@ -629,13 +632,16 @@
     def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
         (value, start, end) = self._convert_idx_params(space, w_start, w_end)
         if space.isinstance_w(w_suffix, space.w_tuple):
-            for w_suffix in space.fixedview(w_suffix):
-                if self._endswith(space, value, w_suffix, start, end):
-                    return space.w_True
-            return space.w_False
+            return self._endswith_tuple(space, value, w_suffix, start, end)
         return space.newbool(self._endswith(space, value, w_suffix, start,
                                             end))
 
+    def _endswith_tuple(self, space, value, w_suffix, start, end):
+        for w_suffix in space.fixedview(w_suffix):
+            if self._endswith(space, value, w_suffix, start, end):
+                return space.w_True
+        return space.w_False
+
     def _endswith(self, space, value, w_prefix, start, end):
         prefix = self._op_val(space, w_prefix)
         if start > len(value):
@@ -795,5 +801,3 @@
 
 def _get_buffer(space, w_obj):
     return space.buffer_w(w_obj, space.BUF_SIMPLE)
-
-
diff --git a/pypy/testrunner_cfg.py b/pypy/testrunner_cfg.py
--- a/pypy/testrunner_cfg.py
+++ b/pypy/testrunner_cfg.py
@@ -5,6 +5,7 @@
     'translator/c', 'rlib',
     'memory/test', 'jit/metainterp',
     'jit/backend/arm', 'jit/backend/x86',
+    'jit/backend/zarch',
 ]
 
 def collect_one_testdir(testdirs, reldir, tests):
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -9,7 +9,7 @@
 # download source, assuming a tag for the release already exists, and repackage them.
 # The script should be run in an empty directory, i.e. /tmp/release_xxx
 
-for plat in linux linux64 linux-armhf-raspbian linux-armhf-raring linux-armel osx64 freebsd64
+for plat in linux linux64 linux-armhf-raspbian linux-armhf-raring linux-armel osx64
   do
     wget http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.tar.bz2
     tar -xf pypy-c-jit-latest-$plat.tar.bz2
@@ -29,15 +29,16 @@
 # Do this after creating a tag, note the untarred directory is pypy-pypy-<hash>
 # so make sure there is not another one
 wget https://bitbucket.org/pypy/pypy/get/$tagname.tar.bz2
-tar -xf release-$maj.$min.$rev.tar.bz2
+tar -xf $tagname.tar.bz2
 mv pypy-pypy-* pypy-$maj.$min.$rev-src
 tar --owner=root --group=root --numeric-owner -cvjf pypy-$maj.$min.$rev-src.tar.bz2 pypy-$maj.$min.$rev-src
 zip -r pypy-$maj.$min.$rev-src.zip pypy-$maj.$min.$rev-src
 rm -rf pypy-$maj.$min.$rev-src
 
-# Print out the md5, sha1
+# Print out the md5, sha1, sha256
 md5sum *.bz2 *.zip
 sha1sum *.bz2 *.zip
+sha256sum *.bz2 *.zip
 
 # Now upload all the bz2 and zip
 
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 # hypothesis is used for test generation on untranslated jit tests
 hypothesis
 enum>=0.4.6 # is a dependency, but old pip does not pick it up
+enum34>=1.1.2
diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -3,7 +3,7 @@
 import types
 from collections import defaultdict
 
-from rpython.tool.ansi_print import ansi_log
+from rpython.tool.ansi_print import AnsiLogger
 from rpython.tool.pairtype import pair
 from rpython.tool.error import (format_blocked_annotation_error,
                              gather_error, source_lines)
@@ -15,9 +15,7 @@
 from rpython.annotator.bookkeeper import Bookkeeper
 from rpython.rtyper.normalizecalls import perform_normalizations
 
-import py
-log = py.log.Producer("annrpython")
-py.log.setconsumer("annrpython", ansi_log)
+log = AnsiLogger("annrpython")
 
 
 class RPythonAnnotator(object):
diff --git a/rpython/doc/index.rst b/rpython/doc/index.rst
--- a/rpython/doc/index.rst
+++ b/rpython/doc/index.rst
@@ -37,6 +37,7 @@
 
    arm
    logging
+   s390x
 
 
 Writing your own interpreter in RPython
diff --git a/rpython/doc/s390x.rst b/rpython/doc/s390x.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/s390x.rst
@@ -0,0 +1,20 @@
+.. _s390x:
+
+S390X JIT Backend
+=================
+
+Our JIT implements the 64 bit version of the IBM Mainframe called s390x.
+Note that this architecture is big endian.
+
+The following facilities need to be installed to operate
+correctly (all of the machines used for development these where installed):
+
+* General-Instructions-Extension
+* Long-Displacement
+* Binary Floating Point (IEEE)
+
+Translating
+-----------
+
+Ensure that libffi is installed (version should do > 3.0.+).
+CPython should be version 2.7.+.
diff --git a/rpython/jit/backend/arm/locations.py b/rpython/jit/backend/arm/locations.py
--- a/rpython/jit/backend/arm/locations.py
+++ b/rpython/jit/backend/arm/locations.py
@@ -1,7 +1,6 @@
 from rpython.jit.metainterp.history import INT, FLOAT
 from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE
 
-
 class AssemblerLocation(object):
     _immutable_ = True
     type = INT
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -1143,35 +1143,42 @@
     def emit_op_zero_array(self, op, arglocs, regalloc, fcond):
         from rpython.jit.backend.llsupport.descr import unpack_arraydescr
         assert len(arglocs) == 0
-        length_box = op.getarg(2)
-        if isinstance(length_box, ConstInt) and length_box.getint() == 0:
+        size_box = op.getarg(2)
+        if isinstance(size_box, ConstInt) and size_box.getint() == 0:
             return fcond     # nothing to do
         itemsize, baseofs, _ = unpack_arraydescr(op.getdescr())
         args = op.getarglist()
+        #
+        # ZERO_ARRAY(base_loc, start, size, 1, 1)
+        # 'start' and 'size' are both expressed in bytes,
+        # and the two scaling arguments should always be ConstInt(1) on ARM.
+        assert args[3].getint() == 1
+        assert args[4].getint() == 1
+        #
         base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args)
-        sibox = args[1]
-        if isinstance(sibox, ConstInt):
-            startindex_loc = None
-            startindex = sibox.getint()
-            assert startindex >= 0
+        startbyte_box = args[1]
+        if isinstance(startbyte_box, ConstInt):
+            startbyte_loc = None
+            startbyte = startbyte_box.getint()
+            assert startbyte >= 0
         else:
-            startindex_loc = regalloc.rm.make_sure_var_in_reg(sibox, args)
-            startindex = -1
+            startbyte_loc = regalloc.rm.make_sure_var_in_reg(startbyte_box,
+                                                             args)
+            startbyte = -1
 
-        # base_loc and startindex_loc are in two regs here (or they are
-        # immediates).  Compute the dstaddr_loc, which is the raw
+        # base_loc and startbyte_loc are in two regs here (or startbyte_loc
+        # is an immediate).  Compute the dstaddr_loc, which is the raw
         # address that we will pass as first argument to memset().
         # It can be in the same register as either one, but not in
         # args[2], because we're still needing the latter.
         dstaddr_box = TempVar()
         dstaddr_loc = regalloc.rm.force_allocate_reg(dstaddr_box, [args[2]])
-        if startindex >= 0:    # a constant
-            ofs = baseofs + startindex * itemsize
+        if startbyte >= 0:    # a constant
+            ofs = baseofs + startbyte
             reg = base_loc.value
         else:
-            self.mc.gen_load_int(r.ip.value, itemsize)
-            self.mc.MLA(dstaddr_loc.value, r.ip.value,
-                        startindex_loc.value, base_loc.value)
+            self.mc.ADD_rr(dstaddr_loc.value,
+                           base_loc.value, startbyte_loc.value)
             ofs = baseofs
             reg = dstaddr_loc.value
         if check_imm_arg(ofs):
@@ -1180,20 +1187,27 @@
             self.mc.gen_load_int(r.ip.value, ofs)
             self.mc.ADD_rr(dstaddr_loc.value, reg, r.ip.value)
 
-        if (isinstance(length_box, ConstInt) and
-                length_box.getint() <= 14 and     # same limit as GCC
-                itemsize in (4, 2, 1)):
+        # We use STRB, STRH or STR based on whether we know the array
+        # item size is a multiple of 1, 2 or 4.
+        if   itemsize & 1: itemsize = 1
+        elif itemsize & 2: itemsize = 2
+        else:              itemsize = 4
+        limit = itemsize
+        next_group = -1
+        if itemsize < 4 and startbyte >= 0:
+            # we optimize STRB/STRH into STR, but this needs care:
+            # it only works if startindex_loc is a constant, otherwise
+            # we'd be doing unaligned accesses.
+            next_group = (-startbyte) & 3
+            limit = 4
+
+        if (isinstance(size_box, ConstInt) and
+                size_box.getint() <= 14 * limit):     # same limit as GCC
             # Inline a series of STR operations, starting at 'dstaddr_loc'.
-            next_group = -1
-            if itemsize < 4 and startindex >= 0:
-                # we optimize STRB/STRH into STR, but this needs care:
-                # it only works if startindex_loc is a constant, otherwise
-                # we'd be doing unaligned accesses.
-                next_group = (-startindex * itemsize) & 3
             #
             self.mc.gen_load_int(r.ip.value, 0)
             i = 0
-            total_size = length_box.getint() * itemsize
+            total_size = size_box.getint()
             while i < total_size:
                 sz = itemsize
                 if i == next_group:
@@ -1209,29 +1223,18 @@
                 i += sz
 
         else:
-            if isinstance(length_box, ConstInt):
-                length_loc = imm(length_box.getint() * itemsize)
+            if isinstance(size_box, ConstInt):
+                size_loc = imm(size_box.getint())
             else:
-                # load length_loc in a register different than dstaddr_loc
-                length_loc = regalloc.rm.make_sure_var_in_reg(length_box,
-                                                              [dstaddr_box])
-                if itemsize > 1:
-                    # we need a register that is different from dstaddr_loc,
-                    # but which can be identical to length_loc (as usual,
-                    # only if the length_box is not used by future operations)
-                    bytes_box = TempVar()
-                    bytes_loc = regalloc.rm.force_allocate_reg(bytes_box,
-                                                               [dstaddr_box])
-                    self.mc.gen_load_int(r.ip.value, itemsize)
-                    self.mc.MUL(bytes_loc.value, r.ip.value, length_loc.value)
-                    length_box = bytes_box
-                    length_loc = bytes_loc
+                # load size_loc in a register different than dstaddr_loc
+                size_loc = regalloc.rm.make_sure_var_in_reg(size_box,
+                                                            [dstaddr_box])
             #
             # call memset()
             regalloc.before_call()
             self.simple_call_no_collect(imm(self.memset_addr),
-                                        [dstaddr_loc, imm(0), length_loc])
-            regalloc.rm.possibly_free_var(length_box)
+                                        [dstaddr_loc, imm(0), size_loc])
+            regalloc.rm.possibly_free_var(size_box)
         regalloc.rm.possibly_free_var(dstaddr_box)
         return fcond
 
diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -16,6 +16,7 @@
 MODEL_X86_64_SSE4 = 'x86-64-sse4'
 MODEL_ARM         = 'arm'
 MODEL_PPC_64      = 'ppc-64'
+MODEL_S390_64     = 's390x'
 # don't use '_' in the model strings; they are replaced by '-'
 
 
@@ -27,6 +28,7 @@
         MODEL_ARM:    ['__arm__', '__thumb__','_M_ARM_EP'],
         MODEL_X86:    ['i386', '__i386', '__i386__', '__i686__','_M_IX86'],
         MODEL_PPC_64: ['__powerpc64__'],
+        MODEL_S390_64:['__s390x__'],
     }
     for k, v in mapping.iteritems():
         for macro in v:
@@ -67,6 +69,7 @@
             'armv7l': MODEL_ARM,
             'armv6l': MODEL_ARM,
             'arm': MODEL_ARM,      # freebsd
+            's390x': MODEL_S390_64
             }.get(mach)
 
     if result is None:
@@ -88,7 +91,6 @@
             if feature.detect_x32_mode():
                 raise ProcessorAutodetectError(
                     'JITting in x32 mode is not implemented')
-
     #
     if result.startswith('arm'):
         from rpython.jit.backend.arm.detect import detect_float
@@ -122,6 +124,8 @@
         return "rpython.jit.backend.arm.runner", "CPU_ARM"
     elif backend_name == MODEL_PPC_64:
         return "rpython.jit.backend.ppc.runner", "PPC_CPU"
+    elif backend_name == MODEL_S390_64:
+        return "rpython.jit.backend.zarch.runner", "CPU_S390_64"
     else:
         raise ProcessorAutodetectError, (
             "we have no JIT backend for this cpu: '%s'" % backend_name)
@@ -142,6 +146,7 @@
         MODEL_X86_64_SSE4: ['floats', 'singlefloats'],
         MODEL_ARM: ['floats', 'singlefloats', 'longlong'],
         MODEL_PPC_64: [], # we don't even have PPC directory, so no
+        MODEL_S390_64: ['floats'],
     }[backend_name]
 
 if __name__ == '__main__':
diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -164,13 +164,11 @@
             array_index = moving_obj_tracker.get_array_index(v)
 
             size, offset, _ = unpack_arraydescr(moving_obj_tracker.ptr_array_descr)
-            scale = size
+            array_index = array_index * size + offset
             args = [moving_obj_tracker.const_ptr_gcref_array,
                     ConstInt(array_index),
-                    ConstInt(scale),
-                    ConstInt(offset),
                     ConstInt(size)]
-            load_op = ResOperation(rop.GC_LOAD_INDEXED_R, args)
+            load_op = ResOperation(rop.GC_LOAD_R, args)
             newops.append(load_op)
             op.setarg(arg_i, load_op)
         #
diff --git a/rpython/jit/backend/llsupport/jump.py b/rpython/jit/backend/llsupport/jump.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/jump.py
@@ -0,0 +1,107 @@
+def remap_frame_layout(assembler, src_locations, dst_locations, tmpreg):
+    pending_dests = len(dst_locations)
+    srccount = {}    # maps dst_locations to how many times the same
+                     # location appears in src_locations
+    for dst in dst_locations:
+        key = dst.as_key()
+        assert key not in srccount, "duplicate value in dst_locations!"
+        srccount[key] = 0
+    for i in range(len(dst_locations)):
+        src = src_locations[i]
+        if src.is_imm():
+            continue
+        key = src.as_key()
+        if key in srccount:
+            if key == dst_locations[i].as_key():
+                # ignore a move "x = x"
+                # setting any "large enough" negative value is ok, but
+                # be careful of overflows, don't use -sys.maxint
+                srccount[key] = -len(dst_locations) - 1
+                pending_dests -= 1
+            else:
+                srccount[key] += 1
+
+    while pending_dests > 0:
+        progress = False
+        for i in range(len(dst_locations)):
+            dst = dst_locations[i]
+            key = dst.as_key()
+            if srccount[key] == 0:
+                srccount[key] = -1       # means "it's done"
+                pending_dests -= 1
+                src = src_locations[i]
+                if not src.is_imm():
+                    key = src.as_key()
+                    if key in srccount:
+                        srccount[key] -= 1
+                _move(assembler, src, dst, tmpreg)
+                progress = True
+        if not progress:
+            # we are left with only pure disjoint cycles
+            sources = {}     # maps dst_locations to src_locations
+            for i in range(len(dst_locations)):
+                src = src_locations[i]
+                dst = dst_locations[i]
+                sources[dst.as_key()] = src
+            #
+            for i in range(len(dst_locations)):
+                dst = dst_locations[i]
+                originalkey = dst.as_key()
+                if srccount[originalkey] >= 0:
+                    assembler.regalloc_push(dst, 0)
+                    while True:
+                        key = dst.as_key()
+                        assert srccount[key] == 1
+                        # ^^^ because we are in a simple cycle
+                        srccount[key] = -1
+                        pending_dests -= 1
+                        src = sources[key]
+                        if src.as_key() == originalkey:
+                            break
+                        _move(assembler, src, dst, tmpreg)
+                        dst = src
+                    assembler.regalloc_pop(dst, 0)
+            assert pending_dests == 0
+
+def _move(assembler, src, dst, tmpreg):
+    # some assembler cannot handle memory to memory moves without
+    # a tmp register, thus prepare src according to the ISA capabilities
+    src = assembler.regalloc_prepare_move(src, dst, tmpreg)
+    assembler.regalloc_mov(src, dst)
+
+def remap_frame_layout_mixed(assembler,
+                             src_locations1, dst_locations1, tmpreg1,
+                             src_locations2, dst_locations2, tmpreg2, WORD):
+    # find and push the fp stack locations from src_locations2 that
+    # are going to be overwritten by dst_locations1
+    extrapushes = []
+    dst_keys = {}
+    for loc in dst_locations1:
+        dst_keys[loc.as_key()] = None
+    src_locations2red = []
+    dst_locations2red = []
+    for i in range(len(src_locations2)):
+        loc    = src_locations2[i]
+        dstloc = dst_locations2[i]
+        if loc.is_stack():
+            key = loc.as_key()
+            if (key in dst_keys or (loc.width > WORD and
+                                    (key + 1) in dst_keys)):
+                assembler.regalloc_push(loc, len(extrapushes))
+                extrapushes.append(dstloc)
+                continue
+        src_locations2red.append(loc)
+        dst_locations2red.append(dstloc)
+    src_locations2 = src_locations2red
+    dst_locations2 = dst_locations2red
+    #
+    # remap the integer and pointer registers and stack locations
+    remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
+    #
+    # remap the fp registers and stack locations
+    remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
+    #
+    # finally, pop the extra fp stack locations
+    while len(extrapushes) > 0:
+        loc = extrapushes.pop()
+        assembler.regalloc_pop(loc, len(extrapushes))
diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -203,38 +203,47 @@
     def transform_to_gc_load(self, op):
         NOT_SIGNED = 0
         CINT_ZERO = ConstInt(0)
+        opnum = op.getopnum()
+        #if opnum == rop.CALL_MALLOC_NURSERY_VARSIZE:
+        #    v_length = op.getarg(2)
+        #    scale = op.getarg(1).getint()
+        #    if scale not in self.cpu.load_supported_factors:
+        #        scale, offset, v_length = \
+        #                self._emit_mul_if_factor_offset_not_supported(v_length, scale, 0)
+        #        op.setarg(1, ConstInt(scale))
+        #        op.setarg(2, v_length)
         if op.is_getarrayitem() or \
-           op.getopnum() in (rop.GETARRAYITEM_RAW_I,
-                             rop.GETARRAYITEM_RAW_F):
+           opnum in (rop.GETARRAYITEM_RAW_I,
+                     rop.GETARRAYITEM_RAW_F):
             self.handle_getarrayitem(op)
-        elif op.getopnum() in (rop.SETARRAYITEM_GC, rop.SETARRAYITEM_RAW):
+        elif opnum in (rop.SETARRAYITEM_GC, rop.SETARRAYITEM_RAW):
             self.handle_setarrayitem(op)
-        elif op.getopnum() == rop.RAW_STORE:
+        elif opnum == rop.RAW_STORE:
             itemsize, ofs, _ = unpack_arraydescr(op.getdescr())
             ptr_box = op.getarg(0)
             index_box = op.getarg(1)
             value_box = op.getarg(2)
             self.emit_gc_store_or_indexed(op, ptr_box, index_box, value_box, itemsize, 1, ofs)
-        elif op.getopnum() in (rop.RAW_LOAD_I, rop.RAW_LOAD_F):
+        elif opnum in (rop.RAW_LOAD_I, rop.RAW_LOAD_F):
             itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
             ptr_box = op.getarg(0)
             index_box = op.getarg(1)
             self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize, 1, ofs, sign)
-        elif op.getopnum() in (rop.GETINTERIORFIELD_GC_I, rop.GETINTERIORFIELD_GC_R,
-                               rop.GETINTERIORFIELD_GC_F):
+        elif opnum in (rop.GETINTERIORFIELD_GC_I, rop.GETINTERIORFIELD_GC_R,
+                       rop.GETINTERIORFIELD_GC_F):
             ofs, itemsize, fieldsize, sign = unpack_interiorfielddescr(op.getdescr())
             ptr_box = op.getarg(0)
             index_box = op.getarg(1)
             self.emit_gc_load_or_indexed(op, ptr_box, index_box, fieldsize, itemsize, ofs, sign)
-        elif op.getopnum() in (rop.SETINTERIORFIELD_RAW, rop.SETINTERIORFIELD_GC):
+        elif opnum in (rop.SETINTERIORFIELD_RAW, rop.SETINTERIORFIELD_GC):
             ofs, itemsize, fieldsize, sign = unpack_interiorfielddescr(op.getdescr())
             ptr_box = op.getarg(0)
             index_box = op.getarg(1)
             value_box = op.getarg(2)
             self.emit_gc_store_or_indexed(op, ptr_box, index_box, value_box,
                                           fieldsize, itemsize, ofs)
-        elif op.getopnum() in (rop.GETFIELD_GC_I, rop.GETFIELD_GC_F, rop.GETFIELD_GC_R,
-                               rop.GETFIELD_RAW_I, rop.GETFIELD_RAW_F, rop.GETFIELD_RAW_R):
+        elif opnum in (rop.GETFIELD_GC_I, rop.GETFIELD_GC_F, rop.GETFIELD_GC_R,
+                       rop.GETFIELD_RAW_I, rop.GETFIELD_RAW_F, rop.GETFIELD_RAW_R):
             ofs, itemsize, sign = unpack_fielddescr(op.getdescr())
             ptr_box = op.getarg(0)
             if op.getopnum() in (rop.GETFIELD_GC_F, rop.GETFIELD_GC_I, rop.GETFIELD_GC_R):
@@ -249,45 +258,45 @@
                 self.emit_op(op)
                 return True
             self.emit_gc_load_or_indexed(op, ptr_box, ConstInt(0), itemsize, 1, ofs, sign)
-        elif op.getopnum() in (rop.SETFIELD_GC, rop.SETFIELD_RAW):
+        elif opnum in (rop.SETFIELD_GC, rop.SETFIELD_RAW):
             ofs, itemsize, sign = unpack_fielddescr(op.getdescr())
             ptr_box = op.getarg(0)
             value_box = op.getarg(1)
             self.emit_gc_store_or_indexed(op, ptr_box, ConstInt(0), value_box, itemsize, 1, ofs)
-        elif op.getopnum() == rop.ARRAYLEN_GC:
+        elif opnum == rop.ARRAYLEN_GC:
             descr = op.getdescr()
             assert isinstance(descr, ArrayDescr)
             ofs = descr.lendescr.offset
             self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
                                          WORD, 1, ofs, NOT_SIGNED)
-        elif op.getopnum() == rop.STRLEN:
+        elif opnum == rop.STRLEN:
             basesize, itemsize, ofs_length = get_array_token(rstr.STR,
                                                  self.cpu.translate_support_code)
             self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
                                          WORD, 1, ofs_length, NOT_SIGNED)
-        elif op.getopnum() == rop.UNICODELEN:
+        elif opnum == rop.UNICODELEN:
             basesize, itemsize, ofs_length = get_array_token(rstr.UNICODE,
                                                  self.cpu.translate_support_code)
             self.emit_gc_load_or_indexed(op, op.getarg(0), ConstInt(0),
                                          WORD, 1, ofs_length, NOT_SIGNED)
-        elif op.getopnum() == rop.STRGETITEM:
+        elif opnum == rop.STRGETITEM:
             basesize, itemsize, ofs_length = get_array_token(rstr.STR,
                                                  self.cpu.translate_support_code)
             assert itemsize == 1
             self.emit_gc_load_or_indexed(op, op.getarg(0), op.getarg(1),
                                          itemsize, itemsize, basesize, NOT_SIGNED)
-        elif op.getopnum() == rop.UNICODEGETITEM:
+        elif opnum == rop.UNICODEGETITEM:
             basesize, itemsize, ofs_length = get_array_token(rstr.UNICODE,
                                                  self.cpu.translate_support_code)