[pypy-commit] pypy ppc-updated-backend: PPC Backend #4: get test_runner fully passing.

arigo noreply at buildbot.pypy.org
Fri Sep 18 08:19:45 CEST 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: ppc-updated-backend
Changeset: r79681:72dfc868373f
Date: 2015-09-14 10:45 +0200
http://bitbucket.org/pypy/pypy/changeset/72dfc868373f/

Log:	PPC Backend #4: get test_runner fully passing.

	Fix many details, remove old code, etc.

diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -59,6 +59,7 @@
             'x86': MODEL_X86,      # Apple
             'Power Macintosh': MODEL_PPC_64,
             'ppc64': MODEL_PPC_64,
+            'ppc64le': MODEL_PPC_64,
             'x86_64': MODEL_X86,
             'amd64': MODEL_X86,    # freebsd
             'AMD64': MODEL_X86,    # win64
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -213,6 +213,23 @@
                                                     self.mc.get_relative_pos())
 
     def call_assembler(self, op, argloc, vloc, result_loc, tmploc):
+        """
+            * argloc: location of the frame argument that we're passing to
+                      the called assembler (this is the first return value
+                      of locs_for_call_assembler())
+
+            * vloc:   location of the virtualizable (not in a register;
+                      this is the optional second return value of
+                      locs_for_call_assembler(), or imm(0) if none returned)
+
+            * result_loc: location of op.result (which is not be
+                          confused with the next one)
+
+            * tmploc: location where the actual call to the other piece
+                      of assembler will return its jitframe result
+                      (which is always a REF), before the helper may be
+                      called
+        """
         descr = op.getdescr()
         assert isinstance(descr, JitCellToken)
         #
diff --git a/rpython/jit/backend/ppc/_flush_icache.c b/rpython/jit/backend/ppc/_flush_icache.c
deleted file mode 100644
--- a/rpython/jit/backend/ppc/_flush_icache.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <Python.h>
-#include "../../../translator/c/src/asm_ppc.h"
-
-static PyObject*
-_flush_icache(PyObject *self, PyObject *args)
-{
-	long base, size;
-
-	if (!PyArg_ParseTuple(args, "ii:_flush_icache", &base, &size))
-		return NULL;
-
-	LL_flush_icache(base, size);
-	Py_INCREF(Py_None);
-	return Py_None;
-}
-
-PyMethodDef _flush_icache_methods[] = {
-	{"_flush_icache", _flush_icache, METH_VARARGS, ""},
-	{0, 0}
-};
-
-PyMODINIT_FUNC
-init_flush_icache(void)
-{
-	Py_InitModule("_flush_icache", _flush_icache_methods);
-}
diff --git a/rpython/jit/backend/ppc/_ppcgen.c b/rpython/jit/backend/ppc/_ppcgen.c
deleted file mode 100644
--- a/rpython/jit/backend/ppc/_ppcgen.c
+++ /dev/null
@@ -1,154 +0,0 @@
-#include <Python.h>
-#include <sys/mman.h>
-
-#define __dcbf(base, index)     \
-        __asm__ ("dcbf %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
-
-
-static PyTypeObject* mmap_type;
-
-#if defined(__APPLE__)
-
-#include <mach-o/dyld.h>
-
-static PyObject*
-_ppy_NSLookupAndBindSymbol(PyObject* self, PyObject* args)
-{
-	char *s;
-	NSSymbol sym;
-
-	if (!PyArg_ParseTuple(args, "s", &s))
-		return NULL;
-
-	if (!NSIsSymbolNameDefined(s)) {
-		return PyErr_Format(PyExc_ValueError,
-				    "symbol '%s' not found", s);
-	}
-		
-	sym = NSLookupAndBindSymbol(s);
-	
-	return PyInt_FromLong((long)NSAddressOfSymbol(sym));
-}
-
-
-#elif defined(linux)
-
-#include <dlfcn.h>
-
-static PyObject*
-_ppy_dlsym(PyObject* self, PyObject* args)
-{
-	char *s;
-	void *handle;
-	void *sym;
-
-	if (!PyArg_ParseTuple(args, "s", &s))
-		return NULL;
-
-	handle = dlopen(RTLD_DEFAULT, RTLD_LAZY);
-	sym = dlsym(handle, s);
-	if (sym == NULL) {
-		return PyErr_Format(PyExc_ValueError,
-				    "symbol '%s' not found", s);
-	}
-	return PyInt_FromLong((long)sym);
-}
-
-#else
-
-#error "OS not supported"
-
-#endif
-
-
-static PyObject*
-_ppy_mmap_exec(PyObject* self, PyObject* args)
-{
-	PyObject* code_args;
-	PyObject* r;
-	PyObject* mmap_obj;
-	char* code;
-	size_t size;
-
-	if (!PyArg_ParseTuple(args, "O!O!:mmap_exec",
-			      mmap_type, &mmap_obj, 
-			      &PyTuple_Type, &code_args)) 
-		return NULL;
-
-	code = *((char**)mmap_obj + 2);
-	size = *((size_t*)mmap_obj + 3);
-
-	r = ((PyCFunction)code)(NULL, code_args);
-
-	Py_DECREF(args);
-
-	return r;
-}
-
-static PyObject*
-_ppy_mmap_flush(PyObject* self, PyObject* arg)
-{
-	char* code;
-	size_t size;
-	int i = 0;
-
-	if (!PyObject_TypeCheck(arg, mmap_type)) {
-		PyErr_SetString(PyExc_TypeError,
-			"mmap_flush: single argument must be mmap object");
-	}
-
-	code = *((char**)arg + 2);
-	size = *((size_t*)arg + 3);
-
-	for (; i < size; i += 32){
-		__dcbf(code, i);
-	}
-
-	Py_INCREF(Py_None);
-	return Py_None;
-}
-
-
-PyMethodDef _ppy_methods[] = {
-#if defined(__APPLE__)
-	{"NSLookupAndBindSymbol", _ppy_NSLookupAndBindSymbol, 
-	 METH_VARARGS, ""},
-#elif defined(linux)
-	{"dlsym", _ppy_dlsym, METH_VARARGS, ""},
-#endif
-	{"mmap_exec", _ppy_mmap_exec, METH_VARARGS, ""},
-	{"mmap_flush", _ppy_mmap_flush, METH_O, ""},
-	{0, 0}
-};
-
-#if !defined(MAP_ANON) && defined(__APPLE__)
-#define MAP_ANON 0x1000
-#endif
-
-PyMODINIT_FUNC
-init_ppcgen(void)
-{
-    PyObject* m;
-    PyObject* mmap_module;
-    PyObject* mmap_func;
-    PyObject* mmap_obj;
-
-    m =	Py_InitModule("_ppcgen", _ppy_methods);
-
-    /* argh */
-    /* time to campaign for a C API for the mmap module! */
-    mmap_module = PyImport_ImportModule("mmap");
-    if (!mmap_module)
-	    return;
-    mmap_func = PyObject_GetAttrString(mmap_module, "mmap");
-    if (!mmap_func)
-	    return;
-    mmap_obj = PyEval_CallFunction(mmap_func, "iii", -1, 0, MAP_ANON);
-    if (!mmap_obj)
-	    return;
-    mmap_type = mmap_obj->ob_type;
-    Py_INCREF(mmap_type);
-    Py_DECREF(mmap_obj);
-    Py_DECREF(mmap_func);
-    Py_DECREF(mmap_module);
-}
diff --git a/rpython/jit/backend/ppc/callbuilder.py b/rpython/jit/backend/ppc/callbuilder.py
--- a/rpython/jit/backend/ppc/callbuilder.py
+++ b/rpython/jit/backend/ppc/callbuilder.py
@@ -214,7 +214,7 @@
         # replace b1_location with BEQ(here)
         jmp_target = self.mc.currpos()
         pmc = OverwritingBuilder(self.mc, b1_location, 1)
-        pmc.bc(12, 2, jmp_target - b1_location)    # "beq"
+        pmc.beq(jmp_target - b1_location)
         pmc.overwrite()
 
         if not we_are_translated():        # for testing: now we can access
diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -16,6 +16,14 @@
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.jit.backend.ppc.rassemblermaker import make_rassembler
 
+
+# these are the *forbidden* encodings that don't accept register r0:
+#    addi rX, r0, immed
+#    subi rX, r0, immed
+#    addis rX, r0, immed
+#    subis rX, r0, immed
+
+
 A = Form("frD", "frA", "frB", "XO3", "Rc")
 A1 = Form("frD", "frB", "XO3", "Rc")
 A2 = Form("frD", "frA", "frC", "XO3", "Rc")
@@ -910,30 +918,27 @@
 def high(w):
     return (w >> 16) & 0x0000FFFF
 
-# XXX check this
-if we_are_translated():
-    eci = ExternalCompilationInfo(includes = ['asm_ppc.h'])
+_eci = ExternalCompilationInfo(post_include_bits=[
+    '#define rpython_flush_icache()  asm("isync":::"memory")\n'
+    ])
+flush_icache = rffi.llexternal(
+    "rpython_flush_icache",
+    [],
+    lltype.Void,
+    compilation_info=_eci,
+    _nowrapper=True,
+    sandboxsafe=True)
 
-    flush_icache = rffi.llexternal(
-        "LL_flush_icache",
-        [lltype.Signed, lltype.Signed],
-        lltype.Void,
-        compilation_info=eci,
-        _nowrapper=True,
-        sandboxsafe=True)
-else:
-    def flush_icache(x, y): pass
 
 class PPCGuardToken(GuardToken):
     def __init__(self, cpu, gcmap, descr, failargs, faillocs,
                  exc, frame_depth, is_guard_not_invalidated=False,
                  is_guard_not_forced=False, fcond=c.cond_none):
-        assert fcond != c.cond_none
         GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs, exc,
                             frame_depth, is_guard_not_invalidated,
                             is_guard_not_forced)
         self.fcond = fcond
-        #self.offset = offset
+
 
 class OverwritingBuilder(PPCAssembler):
     def __init__(self, mc, start, num_insts=0):
@@ -1205,14 +1210,10 @@
     def currpos(self):
         return self.get_relative_pos()
 
-    def flush_cache(self, addr):
-        startaddr = rffi.cast(lltype.Signed, addr)
-        size = rffi.cast(lltype.Signed, self.get_relative_pos())
-        flush_icache(startaddr, size)
-
     def copy_to_raw_memory(self, addr):
         self._copy_to_raw_memory(addr)
-        self.flush_cache(addr)
+        if we_are_translated():
+            flush_icache()
         self._dump(addr, "jit-backend-dump", 'ppc')
 
     def cmp_op(self, block, a, b, imm=False, signed=True, fp=False):
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -6,7 +6,9 @@
 from rpython.jit.backend.ppc.locations import imm as make_imm_loc
 from rpython.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD,
                                           MAX_REG_PARAMS, MAX_FREG_PARAMS,
-                                          PARAM_SAVE_AREA_OFFSET)
+                                          PARAM_SAVE_AREA_OFFSET,
+                                          THREADLOCAL_ADDR_OFFSET,
+                                          IS_BIG_ENDIAN)
 
 from rpython.jit.metainterp.history import (JitCellToken, TargetToken, Box,
                                             AbstractFailDescr, FLOAT, INT, REF)
@@ -22,6 +24,7 @@
 from rpython.rtyper.lltypesystem import rstr, rffi, lltype
 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
 from rpython.jit.metainterp.resoperation import rop
+from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.backend.ppc import callbuilder
 
 class IntOpAssembler(object):
@@ -209,7 +212,7 @@
         l0, res = arglocs
         self.mc.fabs(res.value, l0.value)
 
-    def emit_math_sqrt(self, op, arglocs, regalloc):
+    def _emit_math_sqrt(self, op, arglocs, regalloc):
         l0, res = arglocs
         self.mc.fsqrt(res.value, l0.value)
 
@@ -320,7 +323,7 @@
         self.mc.trap()
         self._cmp_guard_class(op, arglocs, regalloc)
         pmc = OverwritingBuilder(self.mc, patch_pos, 1)
-        pmc.bc(12, 0, self.mc.currpos() - patch_pos)    # LT
+        pmc.blt(self.mc.currpos() - patch_pos)
         pmc.overwrite()
         self.guard_success_cc = c.EQ
         self._emit_guard(op, arglocs[3:])
@@ -355,6 +358,13 @@
         self.guard_success_cc = c.EQ
         self._emit_guard(op, arglocs)
 
+    def emit_guard_not_forced_2(self, op, arglocs, regalloc):
+        guard_token = self.build_guard_token(op, arglocs[0].value, arglocs[1:],
+                                             c.cond_none, save_exc=False)
+        self._finish_gcmap = guard_token.gcmap
+        self._store_force_index(op)
+        self.store_info_on_descr(0, guard_token)
+
 
 class MiscOpAssembler(object):
 
@@ -448,6 +458,8 @@
             pmc.overwrite()
 
     def emit_guard_exception(self, op, arglocs, regalloc):
+        # XXX FIXME
+        # XXX pos_exc_value and pos_exception are 8 bytes apart, don't need both
         loc, loc1, resloc, pos_exc_value, pos_exception = arglocs[:5]
         failargs = arglocs[5:]
         self.mc.load_imm(loc1, pos_exception.value)
@@ -490,6 +502,9 @@
             cb.emit()
 
     def emit_call(self, op, arglocs, regalloc):
+        oopspecindex = regalloc.get_oopspecindex(op)
+        if oopspecindex == EffectInfo.OS_MATH_SQRT:
+            return self._emit_math_sqrt(op, arglocs, regalloc)
         self._emit_call(op, arglocs)
 
     def emit_call_may_force(self, op, arglocs, regalloc):
@@ -832,7 +847,7 @@
 
             if jz_location != -1:
                 pmc = OverwritingBuilder(self.mc, jz_location, 1)
-                pmc.bc(4, 1, self.mc.currpos() - jz_location)    # !GT
+                pmc.ble(self.mc.currpos() - jz_location)    # !GT
                 pmc.overwrite()
 
 class StrOpAssembler(object):
@@ -843,118 +858,61 @@
     emit_strgetitem = FieldOpAssembler.emit_getarrayitem_gc
     emit_strsetitem = FieldOpAssembler.emit_setarrayitem_gc
 
-    #from ../x86/regalloc.py:928 ff.
     def emit_copystrcontent(self, op, arglocs, regalloc):
-        assert len(arglocs) == 0
-        self._emit_copystrcontent(op, regalloc, is_unicode=False)
+        self._emit_copycontent(arglocs, is_unicode=False)
 
     def emit_copyunicodecontent(self, op, arglocs, regalloc):
-        assert len(arglocs) == 0
-        self._emit_copystrcontent(op, regalloc, is_unicode=True)
+        self._emit_copycontent(arglocs, is_unicode=True)
 
-    def _emit_copystrcontent(self, op, regalloc, is_unicode):
-        # compute the source address
-        args = op.getarglist()
-        base_loc = regalloc._ensure_value_is_boxed(args[0], args)
-        ofs_loc = regalloc._ensure_value_is_boxed(args[2], args)
-        assert args[0] is not args[1]    # forbidden case of aliasing
-        regalloc.possibly_free_var(args[0])
-        if args[3] is not args[2] is not args[4]:  # MESS MESS MESS: don't free
-            regalloc.possibly_free_var(args[2])     # it if ==args[3] or args[4]
-        srcaddr_box = TempPtr()
-        forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
-        srcaddr_loc = regalloc.force_allocate_reg(srcaddr_box)
-        self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
-                                        is_unicode=is_unicode)
+    def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
+        if src_ofs.is_imm():
+            value = src_ofs.value << scale
+            if value < 32768:
+                self.mc.addi(dst.value, src_ptr.value, value)
+            else:
+                self.mc.load_imm(dst, value)
+                self.mc.add(dst.value, src_ptr.value, dst.value)
+        elif scale == 0:
+            self.mc.add(dst.value, src_ptr.value, src_ofs.value)
+        else:
+            self.mc.sldi(dst.value, src_ofs.value, scale)
+            self.mc.add(dst.value, src_ptr.value, dst.value)
 
-        # compute the destination address
-        forbidden_vars = [args[4], args[3], srcaddr_box]
-        dstaddr_box = TempPtr()
-        dstaddr_loc = regalloc.force_allocate_reg(dstaddr_box)
-        forbidden_vars.append(dstaddr_box)
-        base_loc = regalloc._ensure_value_is_boxed(args[1], forbidden_vars)
-        ofs_loc = regalloc._ensure_value_is_boxed(args[3], forbidden_vars)
-        assert base_loc.is_reg()
-        assert ofs_loc.is_reg()
-        regalloc.possibly_free_var(args[1])
-        if args[3] is not args[4]:     # more of the MESS described above
-            regalloc.possibly_free_var(args[3])
-        regalloc.free_temp_vars()
-        self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
-                                        is_unicode=is_unicode)
+    def _emit_copycontent(self, arglocs, is_unicode):
+        [src_ptr_loc, dst_ptr_loc,
+         src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
 
-        # compute the length in bytes
-        forbidden_vars = [srcaddr_box, dstaddr_box]
-        if isinstance(args[4], Box):
-            length_box = args[4]
-            length_loc = regalloc.make_sure_var_in_reg(args[4], forbidden_vars)
+        if is_unicode:
+            basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
+                                        self.cpu.translate_support_code)
+            if   itemsize == 2: scale = 1
+            elif itemsize == 4: scale = 2
+            else: raise AssertionError
         else:
-            length_box = TempInt()
-            length_loc = regalloc.force_allocate_reg(length_box, forbidden_vars)
-            xxxxxxxxxxxxxxxxxxxxxxxx
-            imm = regalloc.convert_to_imm(args[4])
-            self.load(length_loc, imm)
-        if is_unicode:
-            bytes_box = TempPtr()
-            bytes_loc = regalloc.force_allocate_reg(bytes_box, forbidden_vars)
-            scale = self._get_unicode_item_scale()
-            assert length_loc.is_reg()
-            with scratch_reg(self.mc):
-                self.mc.load_imm(r.SCRATCH, 1 << scale)
-                if IS_PPC_32:
-                    self.mc.mullw(bytes_loc.value, r.SCRATCH.value, length_loc.value)
-                else:
-                    self.mc.mulld(bytes_loc.value, r.SCRATCH.value, length_loc.value)
-            length_box = bytes_box
-            length_loc = bytes_loc
-        # call memcpy()
-        regalloc.before_call()
-        imm_addr = make_imm_loc(self.memcpy_addr)
-        self._emit_call(imm_addr,
-                            [dstaddr_loc, srcaddr_loc, length_loc])
-
-        regalloc.possibly_free_var(length_box)
-        regalloc.possibly_free_var(dstaddr_box)
-        regalloc.possibly_free_var(srcaddr_box)
-
-    def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode):
-        if is_unicode:
-            ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE,
-                                                  self.cpu.translate_support_code)
-            scale = self._get_unicode_item_scale()
-        else:
-            ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
-                                                  self.cpu.translate_support_code)
+            basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
+                                        self.cpu.translate_support_code)
             assert itemsize == 1
             scale = 0
-        self._gen_address(ofsloc, ofs_items, scale, resloc, baseloc)
 
-    def _gen_address(self, sizereg, baseofs, scale, result, baseloc=None):
-        assert sizereg.is_reg()
-        if scale > 0:
-            scaled_loc = r.r0
-            if IS_PPC_32:
-                self.mc.slwi(scaled_loc.value, sizereg.value, scale)
-            else:
-                self.mc.sldi(scaled_loc.value, sizereg.value, scale)
+        self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
+        self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale)
+
+        if length_loc.is_imm():
+            length = length_loc.getint()
+            self.mc.load_imm(r.r5, length << scale)
         else:
-            scaled_loc = sizereg
-        if baseloc is not None:
-            assert baseloc.is_reg()
-            self.mc.add(result.value, baseloc.value, scaled_loc.value)
-            self.mc.addi(result.value, result.value, baseofs)
-        else:
-            self.mc.addi(result.value, scaled_loc.value, baseofs)
+            if scale > 0:
+                self.mc.sldi(r.r5.value, length_loc.value, scale)
+            elif length_loc is not r.r5:
+                self.mc.mr(r.r5.value, length_loc.value)
 
-    def _get_unicode_item_scale(self):
-        _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
-                                                  self.cpu.translate_support_code)
-        if itemsize == 4:
-            return 2
-        elif itemsize == 2:
-            return 1
-        else:
-            raise AssertionError("bad unicode item size")
+        self.mc.mr(r.r4.value, r.r0.value)
+        self.mc.addi(r.r4.value, r.r4.value, basesize)
+        self.mc.addi(r.r3.value, r.r2.value, basesize)
+
+        cb = callbuilder.CallBuilder(self, imm(self.memcpy_addr),
+                                     [r.r3, r.r4, r.r5], None)
+        cb.emit()
 
 
 class UnicodeOpAssembler(object):
@@ -991,135 +949,142 @@
     emit_jit_debug = emit_debug_merge_point
     emit_keepalive = emit_debug_merge_point
 
-    def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
+    def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False,
+                                is_frame=False, align_stack=False):
         # Write code equivalent to write_barrier() in the GC: it checks
-        # a flag in the object at arglocs[0], and if set, it calls the
-        # function remember_young_pointer() from the GC.  The two arguments
-        # to the call are in arglocs[:2].  The latter saves registers as needed
-        # and call the function jit_remember_young_pointer() from the GC.
-        descr = op.getdescr()
+        # a flag in the object at arglocs[0], and if set, it calls a
+        # helper piece of assembler.  The latter saves registers as needed
+        # and call the function remember_young_pointer() from the GC.
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
         #
-        opnum = op.getopnum()
-        card_marking = False
+        card_marking_mask = 0
         mask = descr.jit_wb_if_flag_singlebyte
-        if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+        if array and descr.jit_wb_cards_set != 0:
             # assumptions the rest of the function depends on:
             assert (descr.jit_wb_cards_set_byteofs ==
                     descr.jit_wb_if_flag_byteofs)
-            assert descr.jit_wb_cards_set_singlebyte == -0x80
-            card_marking = True
-            mask = descr.jit_wb_if_flag_singlebyte | -0x80
+            card_marking_mask = descr.jit_wb_cards_set_singlebyte
         #
         loc_base = arglocs[0]
+        assert loc_base.is_reg()
+        if is_frame:
+            assert loc_base is r.SPP
         assert _check_imm_arg(descr.jit_wb_if_flag_byteofs)
-        with scratch_reg(self.mc):
-            self.mc.lbz(r.SCRATCH.value, loc_base.value,
-                        descr.jit_wb_if_flag_byteofs)
-            # test whether this bit is set
-            mask &= 0xFF
-            self.mc.andix(r.SCRATCH.value, r.SCRATCH.value, mask)
+        mc.lbz(r.SCRATCH2.value, loc_base.value, descr.jit_wb_if_flag_byteofs)
+        mc.andix(r.SCRATCH.value, r.SCRATCH2.value, mask & 0xFF)
 
-        jz_location = self.mc.currpos()
-        self.mc.nop()
+        jz_location = mc.get_relative_pos()
+        mc.trap()        # patched later with 'beq'
 
         # for cond_call_gc_wb_array, also add another fast path:
         # if GCFLAG_CARDS_SET, then we can just set one bit and be done
-        if card_marking:
-            with scratch_reg(self.mc):
-                self.mc.lbz(r.SCRATCH.value, loc_base.value,
-                            descr.jit_wb_if_flag_byteofs)
-                self.mc.extsb(r.SCRATCH.value, r.SCRATCH.value)
-
-                # test whether this bit is set
-                self.mc.cmpwi(0, r.SCRATCH.value, 0)
-
-                js_location = self.mc.currpos()
-                self.mc.nop()
+        if card_marking_mask:
+            # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already
+            mc.andix(r.SCRATCH.value, r.SCRATCH2.value,
+                     card_marking_mask & 0xFF)
+            js_location = mc.get_relative_pos()
+            mc.trap()        # patched later with 'bne'
         else:
             js_location = 0
 
         # Write only a CALL to the helper prepared in advance, passing it as
         # argument the address of the structure we are writing into
         # (the first argument to COND_CALL_GC_WB).
-        helper_num = card_marking
-
-        if self._regalloc.fprm.reg_bindings:
+        helper_num = (card_marking_mask != 0)
+        if is_frame:
+            helper_num = 4
+        elif regalloc.fprm.reg_bindings:
             helper_num += 2
         if self.wb_slowpath[helper_num] == 0:    # tests only
             assert not we_are_translated()
             self.cpu.gc_ll_descr.write_barrier_descr = descr
-            self._build_wb_slowpath(card_marking,
-                                    bool(self._regalloc.fprm.reg_bindings))
+            self._build_wb_slowpath(card_marking_mask != 0,
+                                    bool(regalloc.fprm.reg_bindings))
             assert self.wb_slowpath[helper_num] != 0
         #
-        if loc_base is not r.r3:
-            self.mc.store(r.r3.value, r.SP.value, 24)
-            remap_frame_layout(self, [loc_base], [r.r3], r.SCRATCH)
-        addr = self.wb_slowpath[helper_num]
-        func = rffi.cast(lltype.Signed, addr)
-        self.mc.bl_abs(func)
-        if loc_base is not r.r3:
-            self.mc.load(r.r3.value, r.SP.value, 24)
+        if not is_frame:
+            mc.mr(r.r0.value, loc_base.value)    # unusual argument location
+        if is_frame and align_stack:
+            XXXX
+            mc.SUB_ri(esp.value, 16 - WORD) # erase the return address
+        mc.load_imm(r.SCRATCH2, self.wb_slowpath[helper_num])
+        mc.mtctr(r.SCRATCH2.value)
+        mc.bctrl()
+        if is_frame and align_stack:
+            XXXX
+            mc.ADD_ri(esp.value, 16 - WORD) # erase the return address
 
-        # if GCFLAG_CARDS_SET, then we can do the whole thing that would
-        # be done in the CALL above with just four instructions, so here
-        # is an inline copy of them
-        if card_marking:
-            with scratch_reg(self.mc):
-                jns_location = self.mc.currpos()
-                self.mc.nop()  # jump to the exit, patched later
-                # patch the JS above
-                offset = self.mc.currpos()
-                pmc = OverwritingBuilder(self.mc, js_location, 1)
-                # Jump if JS comparison is less than (bit set)
-                pmc.bc(12, 0, offset - js_location)
-                pmc.overwrite()
-                #
-                # case GCFLAG_CARDS_SET: emit a few instructions to do
-                # directly the card flag setting
-                loc_index = arglocs[1]
-                assert loc_index.is_reg()
-                tmp1 = arglocs[-1]
-                tmp2 = arglocs[-2]
-                tmp3 = arglocs[-3]
-                #byteofs
-                s = 3 + descr.jit_wb_card_page_shift
+        if card_marking_mask:
+            # The helper ends again with a check of the flag in the object.
+            # So here, we can simply write again a beq, which will be
+            # taken if GCFLAG_CARDS_SET is still not set.
+            jns_location = mc.get_relative_pos()
+            mc.trap()
+            #
+            # patch the 'bne' above
+            currpos = mc.currpos()
+            pmc = OverwritingBuilder(mc, js_location, 1)
+            pmc.bne(currpos - js_location)
+            pmc.overwrite()
+            #
+            # case GCFLAG_CARDS_SET: emit a few instructions to do
+            # directly the card flag setting
+            loc_index = arglocs[1]
+            if loc_index.is_reg():
 
-                self.mc.srli_op(tmp3.value, loc_index.value, s)
-                self.mc.not_(tmp3.value, tmp3.value)
+                tmp_loc = arglocs[2]
+                n = descr.jit_wb_card_page_shift
 
-                # byte_index
-                self.mc.li(r.SCRATCH.value, 7)
-                self.mc.srli_op(loc_index.value, loc_index.value,
-                                descr.jit_wb_card_page_shift)
-                self.mc.and_(tmp1.value, r.SCRATCH.value, loc_index.value)
+                # compute in tmp_loc the byte offset:
+                #     ~(index >> (card_page_shift + 3))   ('~' is 'not_' below)
+                mc.srli_op(tmp_loc.value, loc_index.value, n + 3)
 
-                # set the bit
-                self.mc.li(tmp2.value, 1)
-                self.mc.lbzx(r.SCRATCH.value, loc_base.value, tmp3.value)
-                self.mc.sl_op(tmp2.value, tmp2.value, tmp1.value)
-                self.mc.or_(r.SCRATCH.value, r.SCRATCH.value, tmp2.value)
-                self.mc.stbx(r.SCRATCH.value, loc_base.value, tmp3.value)
+                # compute in r2 the index of the bit inside the byte:
+                #     (index >> card_page_shift) & 7
+                mc.rldicl(r.SCRATCH2.value, loc_index.value, 64 - n, 61)
+                mc.li(r.SCRATCH.value, 1)
+                mc.not_(tmp_loc.value, tmp_loc.value)
+
+                # set r2 to 1 << r2
+                mc.sl_op(r.SCRATCH2.value, r.SCRATCH.value, r.SCRATCH2.value)
+
+                # set this bit inside the byte of interest
+                mc.lbzx(r.SCRATCH.value, loc_base.value, tmp_loc.value)
+                mc.or_(r.SCRATCH.value, r.SCRATCH.value, r.SCRATCH2.value)
+                mc.stbx(r.SCRATCH.value, loc_base.value, tmp_loc.value)
                 # done
 
-                # patch the JNS above
-                offset = self.mc.currpos()
-                pmc = OverwritingBuilder(self.mc, jns_location, 1)
-                # Jump if JNS comparison is not less than (bit not set)
-                pmc.bc(4, 0, offset - jns_location)
-                pmc.overwrite()
+            else:
+                byte_index = loc_index.value >> descr.jit_wb_card_page_shift
+                byte_ofs = ~(byte_index >> 3)
+                byte_val = 1 << (byte_index & 7)
+                assert _check_imm_arg(byte_ofs)
+
+                mc.lbz(r.SCRATCH.value, loc_base.value, byte_ofs)
+                mc.ori(r.SCRATCH.value, r.SCRATCH.value, byte_val)
+                mc.stb(r.SCRATCH.value, loc_base.value, byte_ofs)
+            #
+            # patch the beq just above
+            currpos = mc.currpos()
+            pmc = OverwritingBuilder(mc, jns_location, 1)
+            pmc.beq(currpos - jns_location)
+            pmc.overwrite()
 
         # patch the JZ above
-        offset = self.mc.currpos()
-        pmc = OverwritingBuilder(self.mc, jz_location, 1)
-        # Jump if JZ comparison is zero (CMP 0 is equal)
-        pmc.bc(12, 2, offset - jz_location)
+        currpos = mc.currpos()
+        pmc = OverwritingBuilder(mc, jz_location, 1)
+        pmc.beq(currpos - jz_location)
         pmc.overwrite()
 
-    emit_cond_call_gc_wb_array = emit_cond_call_gc_wb
+    def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
+        self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc)
+
+    def emit_cond_call_gc_wb_array(self, op, arglocs, regalloc):
+        self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc,
+                                     array=True)
+
 
 class ForceOpAssembler(object):
 
@@ -1129,215 +1094,95 @@
         res_loc = arglocs[0]
         self.mc.mr(res_loc.value, r.SPP.value)
 
-    #    self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT)
-    # from: ../x86/assembler.py:1668
-    # XXX Split into some helper methods
-    def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc):
-        tmploc = arglocs[1]
-        resloc = arglocs[2]
-        callargs = arglocs[3:]
+    def emit_call_assembler(self, op, arglocs, regalloc):
+        if len(arglocs) == 3:
+            [result_loc, argloc, vloc] = arglocs
+        else:
+            [result_loc, argloc] = arglocs
+            vloc = imm(0)
+        self._store_force_index(self._find_nearby_operation(regalloc, +1))
+        # 'result_loc' is either r3 or f1
+        self.call_assembler(op, argloc, vloc, result_loc, r.r3)
 
-        faildescr = guard_op.getdescr()
-        fail_index = self.cpu.get_fail_descr_number(faildescr)
-        self._write_fail_index(fail_index)
-        descr = op.getdescr()
-        assert isinstance(descr, JitCellToken)
-        # check value
-        assert tmploc is r.RES
-        xxxxxxxxxxxx
-        self._emit_call(fail_index, imm(descr._ppc_func_addr),
-                                callargs, result=tmploc)
-        if op.result is None:
-            value = self.cpu.done_with_this_frame_void_v
+    imm = staticmethod(imm)   # for call_assembler()
+
+    def _call_assembler_emit_call(self, addr, argloc, _):
+        self.regalloc_mov(argloc, r.r3)
+        self.mc.ld(r.r4.value, r.SP.value, THREADLOCAL_ADDR_OFFSET)
+
+        cb = callbuilder.CallBuilder(self, addr, [r.r3, r.r4], r.r3)
+        cb.emit()
+
+    def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
+        cb = callbuilder.CallBuilder(self, addr, arglocs, result_loc)
+        cb.emit()
+
+    def _call_assembler_check_descr(self, value, tmploc):
+        ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+        self.mc.ld(r.r5.value, r.r3.value, ofs)
+        if _check_imm_arg(value):
+            self.mc.cmp_op(0, r.r5.value, value, imm=True)
         else:
+            self.mc.load_imm(r.r4, value)
+            self.mc.cmp_op(0, r.r5.value, r.r4.value, imm=False)
+        jump_if_eq = self.mc.currpos()
+        self.mc.nop()      # patched later
+        return jump_if_eq
+
+    def _call_assembler_patch_je(self, result_loc, je_location):
+        jump_to_done = self.mc.currpos()
+        self.mc.nop()      # patched later
+        #
+        currpos = self.mc.currpos()
+        pmc = OverwritingBuilder(self.mc, je_location, 1)
+        pmc.beq(currpos - je_location)
+        pmc.overwrite()
+        #
+        return jump_to_done
+
+    def _call_assembler_load_result(self, op, result_loc):
+        if op.result is not None:
+            # load the return value from the dead frame's value index 0
             kind = op.result.type
-            if kind == INT:
-                value = self.cpu.done_with_this_frame_int_v
-            elif kind == REF:
-                value = self.cpu.done_with_this_frame_ref_v
-            elif kind == FLOAT:
-                value = self.cpu.done_with_this_frame_float_v
+            descr = self.cpu.getarraydescr_for_frame(kind)
+            ofs = self.cpu.unpack_arraydescr(descr)
+            if kind == FLOAT:
+                assert result_loc is r.f1
+                self.mc.lfd(r.f1.value, r.r3.value, ofs)
             else:
-                raise AssertionError(kind)
+                assert result_loc is r.r3
+                self.mc.ld(r.r3.value, r.r3.value, ofs)
 
-        # take fast path on equality
-        # => jump on inequality
-        with scratch_reg(self.mc):
-            self.mc.load_imm(r.SCRATCH, value)
-            self.mc.cmp_op(0, tmploc.value, r.SCRATCH.value)
-
-        #if values are equal we take the fast path
-        # Slow path, calling helper
-        # jump to merge point
-
-        jd = descr.outermost_jitdriver_sd
-        assert jd is not None
-
-        # Path A: load return value and reset token
-        # Fast Path using result boxes
-
-        fast_jump_pos = self.mc.currpos()
-        self.mc.nop()
-
-        # Reset the vable token --- XXX really too much special logic here:-(
-        if jd.index_of_virtualizable >= 0:
-            from pypy.jit.backend.llsupport.descr import FieldDescr
-            fielddescr = jd.vable_token_descr
-            assert isinstance(fielddescr, FieldDescr)
-            ofs = fielddescr.offset
-            tmploc = regalloc.get_scratch_reg(INT)
-            with scratch_reg(self.mc):
-                self.mov_loc_loc(arglocs[0], r.SCRATCH)
-                self.mc.li(tmploc.value, 0)
-                self.mc.storex(tmploc.value, 0, r.SCRATCH.value)
-
-        if op.result is not None:
-            # load the return value from fail_boxes_xxx[0]
-            kind = op.result.type
-            if kind == INT:
-                adr = self.fail_boxes_int.get_addr_for_num(0)
-            elif kind == REF:
-                adr = self.fail_boxes_ptr.get_addr_for_num(0)
-            elif kind == FLOAT:
-                adr = self.fail_boxes_float.get_addr_for_num(0)
-            else:
-                raise AssertionError(kind)
-            with scratch_reg(self.mc):
-                self.mc.load_imm(r.SCRATCH, adr)
-                if op.result.type == FLOAT:
-                    self.mc.lfdx(resloc.value, 0, r.SCRATCH.value)
-                else:
-                    self.mc.loadx(resloc.value, 0, r.SCRATCH.value)
-
-        # jump to merge point, patched later
-        fast_path_to_end_jump_pos = self.mc.currpos()
-        self.mc.nop()
-
-        jmp_pos = self.mc.currpos()
-        pmc = OverwritingBuilder(self.mc, fast_jump_pos, 1)
-        pmc.bc(4, 2, jmp_pos - fast_jump_pos)
+    def _call_assembler_patch_jmp(self, jmp_location):
+        currpos = self.mc.currpos()
+        pmc = OverwritingBuilder(self.mc, jmp_location, 1)
+        pmc.b(currpos - jmp_location)
         pmc.overwrite()
 
-        # Path B: use assembler helper
-        asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
-        if self.cpu.supports_floats:
-            floats = r.VOLATILES_FLOAT
-        else:
-            floats = []
-
-        with Saved_Volatiles(self.mc, save_RES=False):
-            # result of previous call is in r3
-            self.mov_loc_loc(arglocs[0], r.r4)
-            self.mc.call(asm_helper_adr)
-
-        # merge point
-        currpos = self.mc.currpos()
-        pmc = OverwritingBuilder(self.mc, fast_path_to_end_jump_pos, 1)
-        pmc.b(currpos - fast_path_to_end_jump_pos)
-        pmc.overwrite()
-
-        with scratch_reg(self.mc):
-            self.mc.load(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
-            self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-
-        self._emit_guard(guard_op, regalloc._prepare_guard(guard_op),
-                                        xxxxxxxxxxxxxxxxx+c.LT, save_exc=True)
-
-    # ../x86/assembler.py:668
     def redirect_call_assembler(self, oldlooptoken, newlooptoken):
         # some minimal sanity checking
         old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
         new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
         assert old_nbargs == new_nbargs
-        oldadr = oldlooptoken._ppc_func_addr
-        target = newlooptoken._ppc_func_addr
-        if IS_PPC_32:
-            # we overwrite the instructions at the old _ppc_func_addr
-            # to start with a JMP to the new _ppc_func_addr.
+        oldadr = oldlooptoken._ll_function_addr
+        target = newlooptoken._ll_function_addr
+        if IS_PPC_32 or not IS_BIG_ENDIAN:
+            # we overwrite the instructions at the old _ll_function_addr
+            # to start with a JMP to the new _ll_function_addr.
             # Ideally we should rather patch all existing CALLs, but well.
             mc = PPCBuilder()
             mc.b_abs(target)
             mc.copy_to_raw_memory(oldadr)
         else:
-            # PPC64 trampolines are data so overwrite the code address
-            # in the function descriptor at the old address
-            # (TOC and static chain pointer are the same).
+            # PPC64 big-endian trampolines are data so overwrite the code
+            # address in the function descriptor at the old address.
+            # Copy the whole 3-word trampoline, even though the other
+            # words are always zero so far.
             odata = rffi.cast(rffi.CArrayPtr(lltype.Signed), oldadr)
             tdata = rffi.cast(rffi.CArrayPtr(lltype.Signed), target)
             odata[0] = tdata[0]
-
-    def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc):
-        faildescr = guard_op.getdescr()
-        fail_index = self.cpu.get_fail_descr_number(faildescr)
-        self._write_fail_index(fail_index)
-        numargs = op.numargs()
-        callargs = arglocs[2:numargs + 1]  # extract the arguments to the call
-        adr = arglocs[1]
-        resloc = arglocs[0]
-        #
-        descr = op.getdescr()
-        size = descr.get_result_size()
-        signed = descr.is_result_signed()
-        #
-        xxxxxxxxxxxxxx
-        self._emit_call(fail_index, adr, callargs, resloc, (size, signed))
-
-        with scratch_reg(self.mc):
-            self.mc.load(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
-            self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-
-        self._emit_guard(guard_op, arglocs[1 + numargs:],
-                         xxxxxxxxxxxxxx+c.LT, save_exc=True)
-
-    def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc):
-
-        # first, close the stack in the sense of the asmgcc GC root tracker
-        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
-        numargs = op.numargs()
-        callargs = arglocs[2:numargs + 1]  # extract the arguments to the call
-        adr = arglocs[1]
-        resloc = arglocs[0]
-
-        if gcrootmap:
-            self.call_release_gil(gcrootmap, arglocs)
-        # do the call
-        faildescr = guard_op.getdescr()
-        fail_index = self.cpu.get_fail_descr_number(faildescr)
-        self._write_fail_index(fail_index)
-        #
-        descr = op.getdescr()
-        size = descr.get_result_size()
-        signed = descr.is_result_signed()
-        #
-        xxxxxxxxxxxxxxx
-        self._emit_call(fail_index, adr, callargs, resloc, (size, signed))
-        # then reopen the stack
-        if gcrootmap:
-            self.call_reacquire_gil(gcrootmap, resloc)
-
-        with scratch_reg(self.mc):
-            self.mc.load(r.SCRATCH.value, r.SPP.value, 0)
-            self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-
-        self._emit_guard(guard_op, arglocs[1 + numargs:],
-                         xxxxxxxxxxxxxxxxxx+c.LT, save_exc=True)
-
-    def call_release_gil(self, gcrootmap, save_registers):
-        # XXX don't know whether this is correct
-        # XXX use save_registers here
-        assert gcrootmap.is_shadow_stack
-        with Saved_Volatiles(self.mc):
-            #self._emit_call(NO_FORCE_INDEX, self.releasegil_addr, 
-            #                [], self._regalloc)
-            self._emit_call(imm(self.releasegil_addr), [])
-
-    def call_reacquire_gil(self, gcrootmap, save_loc):
-        # save the previous result into the stack temporarily.
-        # XXX like with call_release_gil(), we assume that we don't need
-        # to save vfp regs in this case. Besides the result location
-        assert gcrootmap.is_shadow_stack
-        with Saved_Volatiles(self.mc):
-            self._emit_call(imm(self.reacqgil_addr), [])
+            odata[1] = tdata[1]
+            odata[2] = tdata[2]
 
 
 class OpAssembler(IntOpAssembler, GuardOpAssembler,
diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py
--- a/rpython/jit/backend/ppc/ppc_assembler.py
+++ b/rpython/jit/backend/ppc/ppc_assembler.py
@@ -28,7 +28,7 @@
 from rpython.rlib.debug import (debug_print, debug_start, debug_stop,
                                 have_debug_prints)
 from rpython.rlib import rgc
-from rpython.rtyper.annlowlevel import llhelper
+from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
 from rpython.rlib.objectmodel import we_are_translated, specialize
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.jit.backend.ppc.locations import StackLocation, get_fp_offset, imm
@@ -92,8 +92,10 @@
     def __init__(self, cpu, translate_support_code=False):
         BaseAssembler.__init__(self, cpu, translate_support_code)
         self.loop_run_counters = []
+        self.wb_slowpath = [0, 0, 0, 0, 0]
         self.setup_failure_recovery()
         self.stack_check_slowpath = 0
+        self.propagate_exception_path = 0
         self.teardown()
 
     def set_debug(self, v):
@@ -122,33 +124,6 @@
             mc.lfd(reg.value, spp_reg.value,
                         self.OFFSET_SPP_TO_FPR_SAVE_AREA + WORD * i)
 
-    # The code generated here allocates a new stackframe 
-    # and is the first machine code to be executed.
-    def _make_frame(self, frame_depth):
-        XXX
-        self.mc.make_function_prologue(frame_depth)
-
-        # save SPP at the bottom of the stack frame
-        self.mc.store(r.SPP.value, r.SP.value, WORD)
-
-        # compute spilling pointer (SPP)
-        self.mc.addi(r.SPP.value, r.SP.value, 
-                frame_depth - self.OFFSET_SPP_TO_OLD_BACKCHAIN)
-
-        # save nonvolatile registers
-        self._save_nonvolatiles()
-
-        # save r31, use r30 as scratch register
-        # this is safe because r30 has been saved already
-        assert NONVOLATILES[-1] == r.SPP
-        ofs_to_r31 = (self.OFFSET_SPP_TO_GPR_SAVE_AREA +
-                      WORD * (len(NONVOLATILES)-1))
-        self.mc.load(r.r30.value, r.SP.value, WORD)
-        self.mc.store(r.r30.value, r.SPP.value, ofs_to_r31)
-        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
-        if gcrootmap and gcrootmap.is_shadow_stack:
-            self.gen_shadowstack_header(gcrootmap)
-
     def gen_shadowstack_header(self, gcrootmap):
         # we need to put two words into the shadowstack: the MARKER_FRAME
         # and the address of the frame (fp, actually)
@@ -296,7 +271,7 @@
         self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
         self.mc = None
 
-    def _store_and_reset_exception(self, mc, excvalloc, exctploc):
+    def _store_and_reset_exception(self, mc, excvalloc, exctploc=None):
         """Reset the exception, after fetching it inside the two regs.
         """
         mc.load_imm(r.r2, self.cpu.pos_exc_value())
@@ -304,7 +279,8 @@
         assert _check_imm_arg(diff)
         # Load the exception fields into the two registers
         mc.load(excvalloc.value, r.r2.value, 0)
-        mc.load(exctploc.value, r.r2.value, diff)
+        if exctploc is not None:
+            mc.load(exctploc.value, r.r2.value, diff)
         # Zero out the exception fields
         mc.li(r.r0.value, 0)
         mc.store(r.r0.value, r.r2.value, 0)
@@ -359,6 +335,7 @@
         return mc.materialize(self.cpu, [])
 
     def _build_malloc_slowpath(self):
+        xxxxxxx
         mc = PPCBuilder()
         frame_size = (len(r.MANAGED_FP_REGS) * WORD
                     + (BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
@@ -405,7 +382,7 @@
         # if r3 == 0 we skip the return above and jump to the exception path
         offset = mc.currpos() - jmp_pos
         pmc = OverwritingBuilder(mc, jmp_pos, 1)
-        pmc.bc(12, 2, offset) 
+        pmc.beq(offset)
         pmc.overwrite()
         # restore the frame before leaving
         with scratch_reg(mc):
@@ -500,7 +477,7 @@
         mc.b(self.propagate_exception_path)
 
         pmc = OverwritingBuilder(mc, jnz_location, 1)
-        pmc.bc(4, 2, mc.currpos() - jnz_location)
+        pmc.bne(mc.currpos() - jnz_location)
         pmc.overwrite()
 
         # restore link register out of preprevious frame
@@ -520,7 +497,6 @@
             self.write_64_bit_func_descr(rawstart, rawstart+3*WORD)
         self.stack_check_slowpath = rawstart
 
-    # TODO: see what need to be done when for_frame is True
     def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
         descr = self.cpu.gc_ll_descr.write_barrier_descr
         if descr is None:
@@ -536,56 +512,108 @@
         #
         # This builds a helper function called from the slow path of
         # write barriers.  It must save all registers, and optionally
-        # all fp registers.
+        # all fp registers.  It takes its single argument in r0.
         mc = PPCBuilder()
+        old_mc = self.mc
+        self.mc = mc
         #
-        frame_size = ((len(r.VOLATILES) + len(r.VOLATILES_FLOAT)
-                      + BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
-        mc.make_function_prologue(frame_size)
-        for i in range(len(r.VOLATILES)):
-                       mc.store(r.VOLATILES[i].value, r.SP.value,
-                              (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
-        if self.cpu.supports_floats:
-            for i in range(len(r.VOLATILES_FLOAT)):
-                           mc.stfd(r.VOLATILES_FLOAT[i].value, r.SP.value,
-                                  (len(r.VOLATILES) + BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
+        ignored_regs = [reg for reg in r.MANAGED_REGS if not (
+                            # 'reg' will be pushed if the following is true:
+                            reg in r.VOLATILES or
+                            reg is r.RCS1 or
+                            (withcards and reg is r.RCS2))]
+        if not for_frame:
+            # push all volatile registers, push RCS1, and sometimes push RCS2
+            self._push_all_regs_to_jitframe(mc, ignored_regs, withfloats)
+        else:
+            return #XXXXX
+            # we have one word to align
+            mc.SUB_ri(esp.value, 7 * WORD) # align and reserve some space
+            mc.MOV_sr(WORD, eax.value) # save for later
+            if self.cpu.supports_floats:
+                mc.MOVSD_sx(2 * WORD, xmm0.value)   # 32-bit: also 3 * WORD
+            if IS_X86_32:
+                mc.MOV_sr(4 * WORD, edx.value)
+                mc.MOV_sr(0, ebp.value)
+                exc0, exc1 = esi, edi
+            else:
+                mc.MOV_rr(edi.value, ebp.value)
+                exc0, exc1 = ebx, r12
+            mc.MOV(RawEspLoc(WORD * 5, REF), exc0)
+            mc.MOV(RawEspLoc(WORD * 6, INT), exc1)
+            # note that it's save to store the exception in register,
+            # since the call to write barrier can't collect
+            # (and this is assumed a bit left and right here, like lack
+            # of _reload_frame_if_necessary)
+            self._store_and_reset_exception(mc, exc0, exc1)
 
-        mc.call(rffi.cast(lltype.Signed, func))
-        if self.cpu.supports_floats:
-            for i in range(len(r.VOLATILES_FLOAT)):
-                           mc.lfd(r.VOLATILES_FLOAT[i].value, r.SP.value,
-                                  (len(r.VOLATILES) + BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
-        for i in range(len(r.VOLATILES)):
-                       mc.load(r.VOLATILES[i].value, r.SP.value,
-                              (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
-        mc.restore_LR_from_caller_frame(frame_size)
+        if withcards:
+            mc.mr(r.RCS2.value, r.r0.value)
+        #
+        # Save the lr into r.RCS1
+        mc.mflr(r.RCS1.value)
+        #
+        func = rffi.cast(lltype.Signed, func)
+        cb = callbuilder.CallBuilder(self, imm(func), [r.r0], None)
+        cb.emit()
+        #
+        # Restore lr
+        mc.mtlr(r.RCS1.value)
         #
         if withcards:
-            # A final compare before the RET, for the caller.  Careful to
+            # A final andix before the blr, for the caller.  Careful to
             # not follow this instruction with another one that changes
-            # the status of the CPU flags!
-            mc.lbz(r.SCRATCH.value, r.r3.value,
-                   descr.jit_wb_if_flag_byteofs)
-            mc.extsb(r.SCRATCH.value, r.SCRATCH.value)
-            mc.cmpwi(0, r.SCRATCH.value, 0)
+            # the status of cr0!
+            card_marking_mask = descr.jit_wb_cards_set_singlebyte
+            mc.lbz(r.RCS2.value, r.RCS2.value, descr.jit_wb_if_flag_byteofs)
+            mc.andix(r.RCS2.value, r.RCS2.value, card_marking_mask & 0xFF)
         #
-        mc.addi(r.SP.value, r.SP.value, frame_size)
-        mc.blr()
-        #
+
+        if not for_frame:
+            self._pop_all_regs_from_jitframe(mc, ignored_regs, withfloats)
+            mc.blr()
+        else:
+            XXXXXXX
+            if IS_X86_32:
+                mc.MOV_rs(edx.value, 4 * WORD)
+            if self.cpu.supports_floats:
+                mc.MOVSD_xs(xmm0.value, 2 * WORD)
+            mc.MOV_rs(eax.value, WORD) # restore
+            self._restore_exception(mc, exc0, exc1)
+            mc.MOV(exc0, RawEspLoc(WORD * 5, REF))
+            mc.MOV(exc1, RawEspLoc(WORD * 6, INT))
+            mc.LEA_rs(esp.value, 7 * WORD)
+            mc.RET()
+
+        self.mc = old_mc
         rawstart = mc.materialize(self.cpu, [])
-        self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+        if for_frame:
+            self.wb_slowpath[4] = rawstart
+        else:
+            self.wb_slowpath[withcards + 2 * withfloats] = rawstart
 
     def _build_propagate_exception_path(self):
         if not self.cpu.propagate_exception_descr:
             return
 
-        mc = PPCBuilder()
-        # the following call may be needed in the future:
-        # self._store_and_reset_exception()
+        self.mc = PPCBuilder()
+        #
+        # read and reset the current exception
 
-        mc.load_imm(r.RES, self.cpu.propagate_exception_descr)
-        self._gen_epilogue(mc)
-        self.propagate_exception_path = mc.materialize(self.cpu, [])
+        propagate_exception_descr = rffi.cast(lltype.Signed,
+                  cast_instance_to_gcref(self.cpu.propagate_exception_descr))
+        ofs3 = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+        ofs4 = self.cpu.get_ofs_of_frame_field('jf_descr')
+
+        self._store_and_reset_exception(self.mc, r.r3)
+        self.mc.load_imm(r.r4, propagate_exception_descr)
+        self.mc.std(r.r3.value, r.SPP.value, ofs3)
+        self.mc.std(r.r4.value, r.SPP.value, ofs4)
+        #
+        self._call_footer()
+        rawstart = self.mc.materialize(self.cpu, [])
+        self.propagate_exception_path = rawstart
+        self.mc = None
 
     # The code generated here serves as an exit stub from
     # the executed machine code.
@@ -617,28 +645,6 @@
 
         return mc.materialize(self.cpu, [], self.cpu.gc_ll_descr.gcrootmap)
 
-    def _gen_epilogue(self, mc):
-        XXX
-        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
-        if gcrootmap and gcrootmap.is_shadow_stack:
-            self.gen_footer_shadowstack(gcrootmap, mc)
-
-        # save SPP back in r3
-        mc.mr(r.r5.value, r.SPP.value)
-        self._restore_nonvolatiles(mc, r.r5)
-        # load old backchain into r4
-        if IS_PPC_32:
-            ofs = WORD
-        else:
-            ofs = WORD * 2
-        mc.load(r.r4.value, r.r5.value, self.OFFSET_SPP_TO_OLD_BACKCHAIN + ofs) 
-        mc.mtlr(r.r4.value)     # restore LR
-        # From SPP, we have a constant offset to the old backchain. We use the
-        # SPP to re-establish the old backchain because this exit stub is
-        # generated before we know how much space the entire frame will need.
-        mc.addi(r.SP.value, r.r5.value, self.OFFSET_SPP_TO_OLD_BACKCHAIN) # restore old SP
-        mc.blr()
-
     def _save_managed_regs(self, mc):
         """ store managed registers in ENCODING AREA
         """
@@ -735,7 +741,7 @@
             offset = self.mc.currpos() - patch_loc
             #
             pmc = OverwritingBuilder(self.mc, patch_loc, 1)
-            pmc.bc(4, 1, offset) # jump if SCRATCH <= r16, i. e. not(SCRATCH > r16)
+            pmc.ble(offset) # jump if SCRATCH <= r16, i. e. not(SCRATCH > r16)
             pmc.overwrite()
 
     def _call_footer(self):
@@ -944,97 +950,11 @@
         self.teardown()
         return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
 
-    DESCR_REF       = 0x00
-    DESCR_INT       = 0x01
-    DESCR_FLOAT     = 0x02
-    DESCR_SPECIAL   = 0x03
-    CODE_FROMSTACK  = 128
-    CODE_STOP       = 0 | DESCR_SPECIAL
-    CODE_HOLE       = 4 | DESCR_SPECIAL
-    CODE_INPUTARG   = 8 | DESCR_SPECIAL
-
-    def gen_descr_encoding(self, descr, failargs, locs):
-        assert self.mc is not None
-        buf = []
-        for i in range(len(failargs)):
-            arg = failargs[i]
-            if arg is not None:
-                if arg.type == REF:
-                    kind = self.DESCR_REF
-                elif arg.type == INT:
-                    kind = self.DESCR_INT
-                elif arg.type == FLOAT:
-                    kind = self.DESCR_FLOAT
-                else:
-                    raise AssertionError("bogus kind")
-                loc = locs[i]
-                if loc.is_stack():
-                    pos = loc.position
-                    if pos < 0:
-                        buf.append(self.CODE_INPUTARG)
-                        pos = ~pos
-                    n = self.CODE_FROMSTACK // 4 + pos
-                else:
-                    assert loc.is_reg() or loc.is_fp_reg()
-                    n = loc.value
-                n = kind + 4 * n
-                while n > 0x7F:
-                    buf.append((n & 0x7F) | 0x80)
-                    n >>= 7
-            else:
-                n = self.CODE_HOLE
-            buf.append(n)
-        buf.append(self.CODE_STOP)
-
-        fdescr = self.cpu.get_fail_descr_number(descr)
-
-        buf.append((fdescr >> 24) & 0xFF)
-        buf.append((fdescr >> 16) & 0xFF)
-        buf.append((fdescr >>  8) & 0xFF)
-        buf.append( fdescr        & 0xFF)
-        
-        lenbuf = len(buf)
-        # XXX fix memory leaks
-        enc_arr = lltype.malloc(rffi.CArray(rffi.CHAR), lenbuf, 
-                                flavor='raw', track_allocation=False)
-        enc_ptr = rffi.cast(lltype.Signed, enc_arr)
-        for i, byte in enumerate(buf):
-            enc_arr[i] = chr(byte)
-        # assert that the fail_boxes lists are big enough
-        assert len(failargs) <= self.fail_boxes_int.SIZE
-        return enc_ptr
-
-    def align(self, size):
-        while size % 8 != 0:
-            size += 1
-        return size
-
     def teardown(self):
         self.pending_guard_tokens = None
         self.mc = None
         self.current_clt = None
 
-    def compute_frame_depth(self, spilling_area, param_depth):
-        PARAMETER_AREA = param_depth * WORD
-        if IS_PPC_64:
-            PARAMETER_AREA += MAX_REG_PARAMS * WORD
-        SPILLING_AREA = spilling_area * WORD
-
-        frame_depth = (  GPR_SAVE_AREA
-                       + FPR_SAVE_AREA
-                       + FLOAT_INT_CONVERSION
-                       + FORCE_INDEX
-                       + self.ENCODING_AREA
-                       + SPILLING_AREA
-                       + PARAMETER_AREA
-                       + BACKCHAIN_SIZE * WORD)
-
-        # align stack pointer
-        while frame_depth % (4 * WORD) != 0:
-            frame_depth += WORD
-
-        return frame_depth
-    
     def _find_failure_recovery_bytecode(self, faildescr):
         return faildescr._failure_recovery_code_adr
 
@@ -1207,7 +1127,8 @@
                 with scratch_reg(self.mc):
                     offset = loc.value
                     self.mc.load_imm(r.SCRATCH, value)
-                    self.mc.store(r.SCRATCH.value, r.SPP.value, offset)
+                    self.mc.lfdx(r.FP_SCRATCH.value, 0, r.SCRATCH.value)
+                    self.mc.stfd(r.FP_SCRATCH.value, r.SPP.value, offset)
                 return
             assert 0, "not supported location"
         elif prev_loc.is_fp_reg():
@@ -1258,13 +1179,13 @@
                 self.mc.lfd(loc.value, r.SP.value, index)
             else:
                 self.mc.lfd(r.FP_SCRATCH.value, r.SP.value, index)
-                self.regalloc_mov(r.FP_SCRATCH.value, loc)
+                self.regalloc_mov(r.FP_SCRATCH, loc)
         else:
             if loc.is_core_reg():
                 self.mc.ld(loc.value, r.SP.value, index)
             else:
                 self.mc.ld(r.SCRATCH.value, r.SP.value, index)
-                self.regalloc_mov(r.SCRATCH.value, loc)
+                self.regalloc_mov(r.SCRATCH, loc)
 
     def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
         assert size & (WORD-1) == 0     # must be correctly aligned
@@ -1301,7 +1222,7 @@
 
         offset = self.mc.currpos() - fast_jmp_pos
         pmc = OverwritingBuilder(self.mc, fast_jmp_pos, 1)
-        pmc.bc(4, 1, offset) # jump if LE (not GT)
+        pmc.ble(offset) # jump if LE (not GT)
         pmc.overwrite()
         
         with scratch_reg(self.mc):
@@ -1318,8 +1239,10 @@
             gcrootmap.write_callshape(mark, force_index)
 
     def propagate_memoryerror_if_r3_is_null(self):
-        return # XXXXXXXXX
-        self.mc.cmp_op(0, r.RES.value, 0, imm=True)
+        # if self.propagate_exception_path == 0 (tests), this may jump to 0
+        # and segfaults.  too bad.  the alternative is to continue anyway
+        # with r3==0, but that will segfault too.
+        self.mc.cmp_op(0, r.r3.value, 0, imm=True)
         self.mc.b_cond_abs(self.propagate_exception_path, c.EQ)
 
     def write_new_force_index(self):
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -490,7 +490,7 @@
 
     prepare_int_force_ge_zero = helper.prepare_unary_op
 
-    def prepare_math_sqrt(self, op):
+    def _prepare_math_sqrt(self, op):
         loc = self.ensure_reg(op.getarg(1))
         self.free_op_vars()
         res = self.fprm.force_allocate_reg(op.result)
@@ -839,8 +839,17 @@
         return [base_loc, index_loc, value_loc, ofs_loc,
                 imm_size, imm_size]
 
-    #prepare_copystrcontent = void
-    #prepare_copyunicodecontent = void
+    def prepare_copystrcontent(self, op):
+        src_ptr_loc = self.ensure_reg(op.getarg(0))
+        dst_ptr_loc = self.ensure_reg(op.getarg(1))
+        src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
+        dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
+        length_loc  = self.ensure_reg_or_any_imm(op.getarg(4))
+        self._spill_before_call(save_all_regs=False)
+        return [src_ptr_loc, dst_ptr_loc,
+                src_ofs_loc, dst_ofs_loc, length_loc]
+
+    prepare_copyunicodecontent = prepare_copystrcontent
 
     def prepare_unicodelen(self, op):
         basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
@@ -877,22 +886,21 @@
     prepare_cast_ptr_to_int = prepare_same_as
     prepare_cast_int_to_ptr = prepare_same_as
 
+    def get_oopspecindex(self, op):
+        descr = op.getdescr()
+        assert descr is not None
+        effectinfo = descr.get_extra_info()
+        if effectinfo is not None:
+            return effectinfo.oopspecindex
+        return EffectInfo.OS_NONE
+
     def prepare_call(self, op):
-        effectinfo = op.getdescr().get_extra_info()
-        if effectinfo is not None:
-            oopspecindex = effectinfo.oopspecindex
-            if oopspecindex == EffectInfo.OS_MATH_SQRT:
-                xxxxxxxxx
-                args = self.prepare_math_sqrt(op)
-                self.assembler.emit_math_sqrt(op, args, self)
-                return
+        oopspecindex = self.get_oopspecindex(op)
+        if oopspecindex == EffectInfo.OS_MATH_SQRT:
+            return self._prepare_math_sqrt(op)
         return self._prepare_call(op)
 
-    def _prepare_call(self, op, save_all_regs=False):
-        args = []
-        args.append(None)
-        for i in range(op.numargs()):
-            args.append(self.loc(op.getarg(i)))
+    def _spill_before_call(self, save_all_regs=False):
         # spill variables that need to be saved around calls
         self.fprm.before_call(save_all_regs=save_all_regs)
         if not save_all_regs:
@@ -900,10 +908,16 @@
             if gcrootmap and gcrootmap.is_shadow_stack:
                 save_all_regs = 2
         self.rm.before_call(save_all_regs=save_all_regs)
+
+    def _prepare_call(self, op, save_all_regs=False):
+        args = []
+        args.append(None)
+        for i in range(op.numargs()):
+            args.append(self.loc(op.getarg(i)))
+        self._spill_before_call(save_all_regs)
         if op.result:
             resloc = self.after_call(op.result)
             args[0] = resloc
-        self.before_call_called = True
         return args
 
     def prepare_call_malloc_nursery(self, op):
@@ -943,31 +957,16 @@
     prepare_keepalive = void
 
     def prepare_cond_call_gc_wb(self, op):
-        assert op.result is None
-        # we force all arguments in a reg because it will be needed anyway by
-        # the following setfield_gc or setarrayitem_gc. It avoids loading it
-        # twice from the memory.
-        N = op.numargs()
-        args = op.getarglist()
-        arglocs = [self._ensure_value_is_boxed(op.getarg(i), args)
-                   for i in range(N)]
-        card_marking = False
-        if op.getopnum() == rop.COND_CALL_GC_WB_ARRAY:
-            descr = op.getdescr()
-            if we_are_translated():
-                cls = self.cpu.gc_ll_descr.has_write_barrier_class()
-                assert cls is not None and isinstance(descr, cls)
-            card_marking = descr.jit_wb_cards_set != 0
-        if card_marking:  # allocate scratch registers
-            tmp1 = self.get_scratch_reg(INT)
-            tmp2 = self.get_scratch_reg(INT)
-            tmp3 = self.get_scratch_reg(INT)
-            arglocs.append(tmp1)
-            arglocs.append(tmp2)
-            arglocs.append(tmp3)
+        arglocs = [self.ensure_reg(op.getarg(0))]
         return arglocs
 
-    prepare_cond_call_gc_wb_array = prepare_cond_call_gc_wb
+    def prepare_cond_call_gc_wb_array(self, op):
+        arglocs = [self.ensure_reg(op.getarg(0)),
+                   self.ensure_reg_or_16bit_imm(op.getarg(1)),
+                   None]
+        if arglocs[1].is_reg():
+            arglocs[2] = self.get_scratch_reg(INT)
+        return arglocs
 
     def prepare_force_token(self, op):
         res_loc = self.force_allocate_reg(op.result)
@@ -1028,21 +1027,11 @@
 
     prepare_call_release_gil = prepare_call_may_force
 
-    def prepare_guard_call_assembler(self, op, guard_op):
-        descr = op.getdescr()
-        assert isinstance(descr, JitCellToken)
-        jd = descr.outermost_jitdriver_sd
-        assert jd is not None
-        vable_index = jd.index_of_virtualizable
-        if vable_index >= 0:
-            self._sync_var(op.getarg(vable_index))
-            vable = self.frame_manager.loc(op.getarg(vable_index))
-        else:
-            vable = imm(0)
-        # make sure the call result location is free
-        tmploc = self.get_scratch_reg(INT, selected_reg=r.RES)
-        self.possibly_free_vars(guard_op.getfailargs())
-        return [vable, tmploc] + self._prepare_call(op, save_all_regs=True)
+    def prepare_call_assembler(self, op):
+        locs = self.locs_for_call_assembler(op)
+        self._spill_before_call(save_all_regs=True)
+        resloc = self.after_call(op.result)
+        return [resloc] + locs
 
     def _prepare_args_for_new_op(self, new_args):
         gc_ll_descr = self.cpu.gc_ll_descr
@@ -1060,6 +1049,11 @@
         self.force_spill_var(op.getarg(0))
         return []
 
+    def prepare_guard_not_forced_2(self, op):
+        self.rm.before_call(op.getfailargs(), save_all_regs=True)
+        arglocs = self._prepare_guard(op)
+        return arglocs
+
     def prepare_zero_ptr_field(self, op):
         base_loc = self.ensure_reg(op.getarg(0))
         ofs_loc = self.ensure_reg_or_16bit_imm(op.getarg(1))
diff --git a/rpython/jit/backend/ppc/runner.py b/rpython/jit/backend/ppc/runner.py
--- a/rpython/jit/backend/ppc/runner.py
+++ b/rpython/jit/backend/ppc/runner.py
@@ -2,7 +2,6 @@
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper.llinterp import LLInterpreter
 from rpython.rlib import rgc
-#from rpython.jit.backend.ppc.arch import FORCE_INDEX_OFS
 from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
 from rpython.jit.backend.ppc.ppc_assembler import AssemblerPPC
 from rpython.jit.backend.ppc.arch import WORD
@@ -33,11 +32,6 @@
 
     def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
                  gcdescr=None):
-        if gcdescr is not None:
-            gcdescr.force_index_ofs = FORCE_INDEX_OFS
-            # XXX for now the ppc backend does not support the gcremovetypeptr
-            # translation option
-            # assert gcdescr.config.translation.gcremovetypeptr is False
         AbstractLLCPU.__init__(self, rtyper, stats, opts,
                                translate_support_code, gcdescr)
 
@@ -80,8 +74,7 @@
 
         for jmp, tgt in looptoken.compiled_loop_token.invalidate_positions:
             mc = PPCBuilder()
-            mc.b_offset(tgt)
-            mc.prepare_insts_blocks()
+            mc.b_offset(tgt)     # a single instruction
             mc.copy_to_raw_memory(jmp)
         # positions invalidated
         looptoken.compiled_loop_token.invalidate_positions = []
diff --git a/rpython/jit/backend/ppc/symbol_lookup.py b/rpython/jit/backend/ppc/symbol_lookup.py
deleted file mode 100644
--- a/rpython/jit/backend/ppc/symbol_lookup.py
+++ /dev/null
@@ -1,15 +0,0 @@
-
-def lookup(sym):
-    global lookup
-    import py
-
-    _ppcgen = py.magic.autopath().dirpath().join('_ppcgen.c')._getpymodule()
-
-    try:
-        from _ppcgen import NSLookupAndBindSymbol
-
-        def lookup(sym):
-            return NSLookupAndBindSymbol('_' + sym)
-    except ImportError:
-        from _ppcgen import dlsym as lookup
-    return lookup(sym)
diff --git a/rpython/jit/backend/ppc/test/test_ppc.py b/rpython/jit/backend/ppc/test/test_ppc.py
--- a/rpython/jit/backend/ppc/test/test_ppc.py
+++ b/rpython/jit/backend/ppc/test/test_ppc.py
@@ -2,7 +2,6 @@
 import random, sys, os
 
 from rpython.jit.backend.ppc.codebuilder import BasicPPCAssembler, PPCBuilder
-from rpython.jit.backend.ppc.symbol_lookup import lookup
 from rpython.jit.backend.ppc.regname import *
 from rpython.jit.backend.ppc.register import *
 from rpython.jit.backend.ppc import form
diff --git a/rpython/jit/backend/ppc/test/test_runner.py b/rpython/jit/backend/ppc/test/test_runner.py
--- a/rpython/jit/backend/ppc/test/test_runner.py
+++ b/rpython/jit/backend/ppc/test/test_runner.py
@@ -23,16 +23,22 @@
     # ====> ../../test/runner_test.py
 
     if IS_PPC_32:
-        add_loop_instructions = ["mr", "add", "cmpwi", "beq", "b"]
+        add_loop_instructions = ["ld", "add", "cmpwi", "beq", "b"]
     else:
-        add_loop_instructions = ["mr", "add", "cmpdi", "beq", "b"]
-    bridge_loop_instructions_short = ["lis", "ori", "mtctr", "bctr"]
-    bridge_loop_instructions_long = ["lis", "ori", "rldicr", "oris", "ori",
-                                     "mtctr", "bctr"]
-   
-    def setup_method(self, meth):
-        self.cpu = PPC_CPU(rtyper=None, stats=FakeStats())
-        self.cpu.setup_once()
+        add_loop_instructions = ["ld", "add", "cmpdi", "beq", "b"]
+    bridge_loop_instructions = [
+        "ld", "cmpdi", "bge+",
+        "li", "lis", "ori", "mtctr", "bctrl",
+        "lis", "ori", "mtctr", "bctr"]
+    bridge_loop_instructions_alternative = [
+        "ld", "cmpdi", "bge+",
+        "li", "li", "rldicr", "oris", "ori", "mtctr", "bctrl",
+        "li", "rldicr", "oris", "ori", "mtctr", "bctr"]
+
+    def get_cpu(self):
+        cpu = PPC_CPU(rtyper=None, stats=FakeStats())
+        cpu.setup_once()
+        return cpu
 
     def test_compile_loop_many_int_args(self):
         for numargs in range(2, 16):
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -1113,12 +1113,12 @@
                             r_box = self.alloc_string("!???????!")
                             if r_box_is_const:
                                 r_box = r_box.constbox()
-                                self.execute_operation(rop.COPYSTRCONTENT,
-                                                       [s_box, r_box,
-                                                        srcstart_box,
-                                                        dststart_box,
-                                                        length_box], 'void')
-                                assert self.look_string(r_box) == "!??cdef?!"
+                            self.execute_operation(rop.COPYSTRCONTENT,
+                                                   [s_box, r_box,
+                                                    srcstart_box,
+                                                    dststart_box,
+                                                    length_box], 'void')
+                            assert self.look_string(r_box) == "!??cdef?!"
 
     def test_copyunicodecontent(self):
         s_box = self.alloc_unicode(u"abcdef")
@@ -1130,12 +1130,12 @@
                             r_box = self.alloc_unicode(u"!???????!")
                             if r_box_is_const:
                                 r_box = r_box.constbox()
-                                self.execute_operation(rop.COPYUNICODECONTENT,
-                                                       [s_box, r_box,
-                                                        srcstart_box,
-                                                        dststart_box,
-                                                        length_box], 'void')
-                                assert self.look_unicode(r_box) == u"!??cdef?!"
+                            self.execute_operation(rop.COPYUNICODECONTENT,
+                                                   [s_box, r_box,
+                                                    srcstart_box,
+                                                    dststart_box,
+                                                    length_box], 'void')
+                            assert self.look_unicode(r_box) == u"!??cdef?!"
 
     def test_do_unicode_basic(self):
         u = self.cpu.bh_newunicode(5)
@@ -2178,7 +2178,7 @@
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
             jit_wb_if_flag = 4096
-            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
             def get_write_barrier_fn(self, cpu):
                 return funcbox.getint()
@@ -2212,7 +2212,7 @@
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
             jit_wb_if_flag = 4096
-            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
             jit_wb_cards_set = 0       # <= without card marking
             def get_write_barrier_fn(self, cpu):
@@ -2259,10 +2259,10 @@
         funcbox = self.get_funcbox(self.cpu, func_ptr)
         class WriteBarrierDescr(AbstractDescr):
             jit_wb_if_flag = 4096
-            jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+            jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10')
             jit_wb_if_flag_singlebyte = 0x10
             jit_wb_cards_set = 32768
-            jit_wb_cards_set_byteofs = struct.pack("i", 32768).index('\x80')
+            jit_wb_cards_set_byteofs = struct.pack("l", 32768).index('\x80')
             jit_wb_cards_set_singlebyte = -0x80
             jit_wb_card_page_shift = 7
             def get_write_barrier_from_array_fn(self, cpu):
@@ -3674,6 +3674,7 @@
         assert not called
 
     def test_assembler_call_propagate_exc(self):
+        # WARNING: this test depends on test_memoryerror first passing
         if not isinstance(self.cpu, AbstractLLCPU):
             py.test.skip("llgraph can't fake exceptions well enough, give up")
 
@@ -4985,3 +4986,35 @@
                                 assert a[i].a == a[i].b == val
                             else:
                                 assert a[i] == rffi.cast(OF, val)
+
+    def test_jump_float_constant(self):
+        f0 = BoxFloat()
+        f1 = BoxFloat()
+        i2 = BoxInt()
+        f3 = BoxFloat()
+        i4 = BoxInt()
+        looptoken = JitCellToken()
+        targettoken = TargetToken()
+        operations = [
+            ResOperation(rop.LABEL, [f0, f1], None, descr=targettoken),
+            ResOperation(rop.CAST_FLOAT_TO_INT, [f1], i2),
+            ResOperation(rop.GUARD_VALUE, [i2, ConstInt(123456)], None,
+                         descr=BasicFailDescr(6)),
+            ResOperation(rop.FLOAT_ADD, [f0, ConstFloat(-0.5)], f3),
+            ResOperation(rop.FLOAT_GT, [f3, ConstFloat(9.12)], i4),
+            ResOperation(rop.GUARD_TRUE, [i4], None, descr=BasicFailDescr(2)),
+            ResOperation(rop.JUMP, [f3, ConstFloat(123456.78912)], None,
+                         descr=targettoken),
+            ]
+        inputargs = [f0, f1]
+        operations[2].setfailargs([])
+        operations[-2].setfailargs([f1, f3])
+
+        self.cpu.compile_loop(inputargs, operations, looptoken)
+        deadframe = self.cpu.execute_token(looptoken, 12.25, 123456.01)
+        fail = self.cpu.get_latest_descr(deadframe)
+        assert fail.identifier == 2
+        res = longlong.getrealfloat(self.cpu.get_float_value(deadframe, 0))
+        assert res == 123456.78912
+        res = longlong.getrealfloat(self.cpu.get_float_value(deadframe, 1))
+        assert res == 8.75
diff --git a/rpython/jit/backend/tool/viewcode.py b/rpython/jit/backend/tool/viewcode.py
--- a/rpython/jit/backend/tool/viewcode.py
+++ b/rpython/jit/backend/tool/viewcode.py
@@ -49,10 +49,12 @@
         'arm': 'arm',
         'arm_32': 'arm',
         'ppc' : 'powerpc:common64',
+        'ppc-64' : 'powerpc:common64',
     }
     machine_endianness = {
         # default value: 'little'
         'ppc' : sys.byteorder,     # i.e. same as the running machine...
+        'ppc-64' : sys.byteorder,     # i.e. same as the running machine...
     }
     cmd = find_objdump()
     objdump = ('%(command)s -b binary -m %(machine)s '


More information about the pypy-commit mailing list