[pypy-commit] pypy ppc-updated-backend: PPC Backend #4: get test_runner fully passing.
arigo
noreply at buildbot.pypy.org
Fri Sep 18 08:19:45 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: ppc-updated-backend
Changeset: r79681:72dfc868373f
Date: 2015-09-14 10:45 +0200
http://bitbucket.org/pypy/pypy/changeset/72dfc868373f/
Log: PPC Backend #4: get test_runner fully passing.
Fix many details, remove old code, etc.
diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -59,6 +59,7 @@
'x86': MODEL_X86, # Apple
'Power Macintosh': MODEL_PPC_64,
'ppc64': MODEL_PPC_64,
+ 'ppc64le': MODEL_PPC_64,
'x86_64': MODEL_X86,
'amd64': MODEL_X86, # freebsd
'AMD64': MODEL_X86, # win64
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -213,6 +213,23 @@
self.mc.get_relative_pos())
def call_assembler(self, op, argloc, vloc, result_loc, tmploc):
+ """
+ * argloc: location of the frame argument that we're passing to
+ the called assembler (this is the first return value
+ of locs_for_call_assembler())
+
+ * vloc: location of the virtualizable (not in a register;
+ this is the optional second return value of
+ locs_for_call_assembler(), or imm(0) if none returned)
+
+ * result_loc: location of op.result (which is not be
+ confused with the next one)
+
+ * tmploc: location where the actual call to the other piece
+ of assembler will return its jitframe result
+ (which is always a REF), before the helper may be
+ called
+ """
descr = op.getdescr()
assert isinstance(descr, JitCellToken)
#
diff --git a/rpython/jit/backend/ppc/_flush_icache.c b/rpython/jit/backend/ppc/_flush_icache.c
deleted file mode 100644
--- a/rpython/jit/backend/ppc/_flush_icache.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <Python.h>
-#include "../../../translator/c/src/asm_ppc.h"
-
-static PyObject*
-_flush_icache(PyObject *self, PyObject *args)
-{
- long base, size;
-
- if (!PyArg_ParseTuple(args, "ii:_flush_icache", &base, &size))
- return NULL;
-
- LL_flush_icache(base, size);
- Py_INCREF(Py_None);
- return Py_None;
-}
-
-PyMethodDef _flush_icache_methods[] = {
- {"_flush_icache", _flush_icache, METH_VARARGS, ""},
- {0, 0}
-};
-
-PyMODINIT_FUNC
-init_flush_icache(void)
-{
- Py_InitModule("_flush_icache", _flush_icache_methods);
-}
diff --git a/rpython/jit/backend/ppc/_ppcgen.c b/rpython/jit/backend/ppc/_ppcgen.c
deleted file mode 100644
--- a/rpython/jit/backend/ppc/_ppcgen.c
+++ /dev/null
@@ -1,154 +0,0 @@
-#include <Python.h>
-#include <sys/mman.h>
-
-#define __dcbf(base, index) \
- __asm__ ("dcbf %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory")
-
-
-static PyTypeObject* mmap_type;
-
-#if defined(__APPLE__)
-
-#include <mach-o/dyld.h>
-
-static PyObject*
-_ppy_NSLookupAndBindSymbol(PyObject* self, PyObject* args)
-{
- char *s;
- NSSymbol sym;
-
- if (!PyArg_ParseTuple(args, "s", &s))
- return NULL;
-
- if (!NSIsSymbolNameDefined(s)) {
- return PyErr_Format(PyExc_ValueError,
- "symbol '%s' not found", s);
- }
-
- sym = NSLookupAndBindSymbol(s);
-
- return PyInt_FromLong((long)NSAddressOfSymbol(sym));
-}
-
-
-#elif defined(linux)
-
-#include <dlfcn.h>
-
-static PyObject*
-_ppy_dlsym(PyObject* self, PyObject* args)
-{
- char *s;
- void *handle;
- void *sym;
-
- if (!PyArg_ParseTuple(args, "s", &s))
- return NULL;
-
- handle = dlopen(RTLD_DEFAULT, RTLD_LAZY);
- sym = dlsym(handle, s);
- if (sym == NULL) {
- return PyErr_Format(PyExc_ValueError,
- "symbol '%s' not found", s);
- }
- return PyInt_FromLong((long)sym);
-}
-
-#else
-
-#error "OS not supported"
-
-#endif
-
-
-static PyObject*
-_ppy_mmap_exec(PyObject* self, PyObject* args)
-{
- PyObject* code_args;
- PyObject* r;
- PyObject* mmap_obj;
- char* code;
- size_t size;
-
- if (!PyArg_ParseTuple(args, "O!O!:mmap_exec",
- mmap_type, &mmap_obj,
- &PyTuple_Type, &code_args))
- return NULL;
-
- code = *((char**)mmap_obj + 2);
- size = *((size_t*)mmap_obj + 3);
-
- r = ((PyCFunction)code)(NULL, code_args);
-
- Py_DECREF(args);
-
- return r;
-}
-
-static PyObject*
-_ppy_mmap_flush(PyObject* self, PyObject* arg)
-{
- char* code;
- size_t size;
- int i = 0;
-
- if (!PyObject_TypeCheck(arg, mmap_type)) {
- PyErr_SetString(PyExc_TypeError,
- "mmap_flush: single argument must be mmap object");
- }
-
- code = *((char**)arg + 2);
- size = *((size_t*)arg + 3);
-
- for (; i < size; i += 32){
- __dcbf(code, i);
- }
-
- Py_INCREF(Py_None);
- return Py_None;
-}
-
-
-PyMethodDef _ppy_methods[] = {
-#if defined(__APPLE__)
- {"NSLookupAndBindSymbol", _ppy_NSLookupAndBindSymbol,
- METH_VARARGS, ""},
-#elif defined(linux)
- {"dlsym", _ppy_dlsym, METH_VARARGS, ""},
-#endif
- {"mmap_exec", _ppy_mmap_exec, METH_VARARGS, ""},
- {"mmap_flush", _ppy_mmap_flush, METH_O, ""},
- {0, 0}
-};
-
-#if !defined(MAP_ANON) && defined(__APPLE__)
-#define MAP_ANON 0x1000
-#endif
-
-PyMODINIT_FUNC
-init_ppcgen(void)
-{
- PyObject* m;
- PyObject* mmap_module;
- PyObject* mmap_func;
- PyObject* mmap_obj;
-
- m = Py_InitModule("_ppcgen", _ppy_methods);
-
- /* argh */
- /* time to campaign for a C API for the mmap module! */
- mmap_module = PyImport_ImportModule("mmap");
- if (!mmap_module)
- return;
- mmap_func = PyObject_GetAttrString(mmap_module, "mmap");
- if (!mmap_func)
- return;
- mmap_obj = PyEval_CallFunction(mmap_func, "iii", -1, 0, MAP_ANON);
- if (!mmap_obj)
- return;
- mmap_type = mmap_obj->ob_type;
- Py_INCREF(mmap_type);
- Py_DECREF(mmap_obj);
- Py_DECREF(mmap_func);
- Py_DECREF(mmap_module);
-}
diff --git a/rpython/jit/backend/ppc/callbuilder.py b/rpython/jit/backend/ppc/callbuilder.py
--- a/rpython/jit/backend/ppc/callbuilder.py
+++ b/rpython/jit/backend/ppc/callbuilder.py
@@ -214,7 +214,7 @@
# replace b1_location with BEQ(here)
jmp_target = self.mc.currpos()
pmc = OverwritingBuilder(self.mc, b1_location, 1)
- pmc.bc(12, 2, jmp_target - b1_location) # "beq"
+ pmc.beq(jmp_target - b1_location)
pmc.overwrite()
if not we_are_translated(): # for testing: now we can access
diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -16,6 +16,14 @@
from rpython.translator.tool.cbuild import ExternalCompilationInfo
from rpython.jit.backend.ppc.rassemblermaker import make_rassembler
+
+# these are the *forbidden* encodings that don't accept register r0:
+# addi rX, r0, immed
+# subi rX, r0, immed
+# addis rX, r0, immed
+# subis rX, r0, immed
+
+
A = Form("frD", "frA", "frB", "XO3", "Rc")
A1 = Form("frD", "frB", "XO3", "Rc")
A2 = Form("frD", "frA", "frC", "XO3", "Rc")
@@ -910,30 +918,27 @@
def high(w):
return (w >> 16) & 0x0000FFFF
-# XXX check this
-if we_are_translated():
- eci = ExternalCompilationInfo(includes = ['asm_ppc.h'])
+_eci = ExternalCompilationInfo(post_include_bits=[
+ '#define rpython_flush_icache() asm("isync":::"memory")\n'
+ ])
+flush_icache = rffi.llexternal(
+ "rpython_flush_icache",
+ [],
+ lltype.Void,
+ compilation_info=_eci,
+ _nowrapper=True,
+ sandboxsafe=True)
- flush_icache = rffi.llexternal(
- "LL_flush_icache",
- [lltype.Signed, lltype.Signed],
- lltype.Void,
- compilation_info=eci,
- _nowrapper=True,
- sandboxsafe=True)
-else:
- def flush_icache(x, y): pass
class PPCGuardToken(GuardToken):
def __init__(self, cpu, gcmap, descr, failargs, faillocs,
exc, frame_depth, is_guard_not_invalidated=False,
is_guard_not_forced=False, fcond=c.cond_none):
- assert fcond != c.cond_none
GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs, exc,
frame_depth, is_guard_not_invalidated,
is_guard_not_forced)
self.fcond = fcond
- #self.offset = offset
+
class OverwritingBuilder(PPCAssembler):
def __init__(self, mc, start, num_insts=0):
@@ -1205,14 +1210,10 @@
def currpos(self):
return self.get_relative_pos()
- def flush_cache(self, addr):
- startaddr = rffi.cast(lltype.Signed, addr)
- size = rffi.cast(lltype.Signed, self.get_relative_pos())
- flush_icache(startaddr, size)
-
def copy_to_raw_memory(self, addr):
self._copy_to_raw_memory(addr)
- self.flush_cache(addr)
+ if we_are_translated():
+ flush_icache()
self._dump(addr, "jit-backend-dump", 'ppc')
def cmp_op(self, block, a, b, imm=False, signed=True, fp=False):
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -6,7 +6,9 @@
from rpython.jit.backend.ppc.locations import imm as make_imm_loc
from rpython.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD,
MAX_REG_PARAMS, MAX_FREG_PARAMS,
- PARAM_SAVE_AREA_OFFSET)
+ PARAM_SAVE_AREA_OFFSET,
+ THREADLOCAL_ADDR_OFFSET,
+ IS_BIG_ENDIAN)
from rpython.jit.metainterp.history import (JitCellToken, TargetToken, Box,
AbstractFailDescr, FLOAT, INT, REF)
@@ -22,6 +24,7 @@
from rpython.rtyper.lltypesystem import rstr, rffi, lltype
from rpython.rtyper.annlowlevel import cast_instance_to_gcref
from rpython.jit.metainterp.resoperation import rop
+from rpython.jit.codewriter.effectinfo import EffectInfo
from rpython.jit.backend.ppc import callbuilder
class IntOpAssembler(object):
@@ -209,7 +212,7 @@
l0, res = arglocs
self.mc.fabs(res.value, l0.value)
- def emit_math_sqrt(self, op, arglocs, regalloc):
+ def _emit_math_sqrt(self, op, arglocs, regalloc):
l0, res = arglocs
self.mc.fsqrt(res.value, l0.value)
@@ -320,7 +323,7 @@
self.mc.trap()
self._cmp_guard_class(op, arglocs, regalloc)
pmc = OverwritingBuilder(self.mc, patch_pos, 1)
- pmc.bc(12, 0, self.mc.currpos() - patch_pos) # LT
+ pmc.blt(self.mc.currpos() - patch_pos)
pmc.overwrite()
self.guard_success_cc = c.EQ
self._emit_guard(op, arglocs[3:])
@@ -355,6 +358,13 @@
self.guard_success_cc = c.EQ
self._emit_guard(op, arglocs)
+ def emit_guard_not_forced_2(self, op, arglocs, regalloc):
+ guard_token = self.build_guard_token(op, arglocs[0].value, arglocs[1:],
+ c.cond_none, save_exc=False)
+ self._finish_gcmap = guard_token.gcmap
+ self._store_force_index(op)
+ self.store_info_on_descr(0, guard_token)
+
class MiscOpAssembler(object):
@@ -448,6 +458,8 @@
pmc.overwrite()
def emit_guard_exception(self, op, arglocs, regalloc):
+ # XXX FIXME
+ # XXX pos_exc_value and pos_exception are 8 bytes apart, don't need both
loc, loc1, resloc, pos_exc_value, pos_exception = arglocs[:5]
failargs = arglocs[5:]
self.mc.load_imm(loc1, pos_exception.value)
@@ -490,6 +502,9 @@
cb.emit()
def emit_call(self, op, arglocs, regalloc):
+ oopspecindex = regalloc.get_oopspecindex(op)
+ if oopspecindex == EffectInfo.OS_MATH_SQRT:
+ return self._emit_math_sqrt(op, arglocs, regalloc)
self._emit_call(op, arglocs)
def emit_call_may_force(self, op, arglocs, regalloc):
@@ -832,7 +847,7 @@
if jz_location != -1:
pmc = OverwritingBuilder(self.mc, jz_location, 1)
- pmc.bc(4, 1, self.mc.currpos() - jz_location) # !GT
+ pmc.ble(self.mc.currpos() - jz_location) # !GT
pmc.overwrite()
class StrOpAssembler(object):
@@ -843,118 +858,61 @@
emit_strgetitem = FieldOpAssembler.emit_getarrayitem_gc
emit_strsetitem = FieldOpAssembler.emit_setarrayitem_gc
- #from ../x86/regalloc.py:928 ff.
def emit_copystrcontent(self, op, arglocs, regalloc):
- assert len(arglocs) == 0
- self._emit_copystrcontent(op, regalloc, is_unicode=False)
+ self._emit_copycontent(arglocs, is_unicode=False)
def emit_copyunicodecontent(self, op, arglocs, regalloc):
- assert len(arglocs) == 0
- self._emit_copystrcontent(op, regalloc, is_unicode=True)
+ self._emit_copycontent(arglocs, is_unicode=True)
- def _emit_copystrcontent(self, op, regalloc, is_unicode):
- # compute the source address
- args = op.getarglist()
- base_loc = regalloc._ensure_value_is_boxed(args[0], args)
- ofs_loc = regalloc._ensure_value_is_boxed(args[2], args)
- assert args[0] is not args[1] # forbidden case of aliasing
- regalloc.possibly_free_var(args[0])
- if args[3] is not args[2] is not args[4]: # MESS MESS MESS: don't free
- regalloc.possibly_free_var(args[2]) # it if ==args[3] or args[4]
- srcaddr_box = TempPtr()
- forbidden_vars = [args[1], args[3], args[4], srcaddr_box]
- srcaddr_loc = regalloc.force_allocate_reg(srcaddr_box)
- self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc,
- is_unicode=is_unicode)
+ def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
+ if src_ofs.is_imm():
+ value = src_ofs.value << scale
+ if value < 32768:
+ self.mc.addi(dst.value, src_ptr.value, value)
+ else:
+ self.mc.load_imm(dst, value)
+ self.mc.add(dst.value, src_ptr.value, dst.value)
+ elif scale == 0:
+ self.mc.add(dst.value, src_ptr.value, src_ofs.value)
+ else:
+ self.mc.sldi(dst.value, src_ofs.value, scale)
+ self.mc.add(dst.value, src_ptr.value, dst.value)
- # compute the destination address
- forbidden_vars = [args[4], args[3], srcaddr_box]
- dstaddr_box = TempPtr()
- dstaddr_loc = regalloc.force_allocate_reg(dstaddr_box)
- forbidden_vars.append(dstaddr_box)
- base_loc = regalloc._ensure_value_is_boxed(args[1], forbidden_vars)
- ofs_loc = regalloc._ensure_value_is_boxed(args[3], forbidden_vars)
- assert base_loc.is_reg()
- assert ofs_loc.is_reg()
- regalloc.possibly_free_var(args[1])
- if args[3] is not args[4]: # more of the MESS described above
- regalloc.possibly_free_var(args[3])
- regalloc.free_temp_vars()
- self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc,
- is_unicode=is_unicode)
+ def _emit_copycontent(self, arglocs, is_unicode):
+ [src_ptr_loc, dst_ptr_loc,
+ src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
- # compute the length in bytes
- forbidden_vars = [srcaddr_box, dstaddr_box]
- if isinstance(args[4], Box):
- length_box = args[4]
- length_loc = regalloc.make_sure_var_in_reg(args[4], forbidden_vars)
+ if is_unicode:
+ basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
+ self.cpu.translate_support_code)
+ if itemsize == 2: scale = 1
+ elif itemsize == 4: scale = 2
+ else: raise AssertionError
else:
- length_box = TempInt()
- length_loc = regalloc.force_allocate_reg(length_box, forbidden_vars)
- xxxxxxxxxxxxxxxxxxxxxxxx
- imm = regalloc.convert_to_imm(args[4])
- self.load(length_loc, imm)
- if is_unicode:
- bytes_box = TempPtr()
- bytes_loc = regalloc.force_allocate_reg(bytes_box, forbidden_vars)
- scale = self._get_unicode_item_scale()
- assert length_loc.is_reg()
- with scratch_reg(self.mc):
- self.mc.load_imm(r.SCRATCH, 1 << scale)
- if IS_PPC_32:
- self.mc.mullw(bytes_loc.value, r.SCRATCH.value, length_loc.value)
- else:
- self.mc.mulld(bytes_loc.value, r.SCRATCH.value, length_loc.value)
- length_box = bytes_box
- length_loc = bytes_loc
- # call memcpy()
- regalloc.before_call()
- imm_addr = make_imm_loc(self.memcpy_addr)
- self._emit_call(imm_addr,
- [dstaddr_loc, srcaddr_loc, length_loc])
-
- regalloc.possibly_free_var(length_box)
- regalloc.possibly_free_var(dstaddr_box)
- regalloc.possibly_free_var(srcaddr_box)
-
- def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode):
- if is_unicode:
- ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- scale = self._get_unicode_item_scale()
- else:
- ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR,
- self.cpu.translate_support_code)
+ basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
+ self.cpu.translate_support_code)
assert itemsize == 1
scale = 0
- self._gen_address(ofsloc, ofs_items, scale, resloc, baseloc)
- def _gen_address(self, sizereg, baseofs, scale, result, baseloc=None):
- assert sizereg.is_reg()
- if scale > 0:
- scaled_loc = r.r0
- if IS_PPC_32:
- self.mc.slwi(scaled_loc.value, sizereg.value, scale)
- else:
- self.mc.sldi(scaled_loc.value, sizereg.value, scale)
+ self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
+ self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale)
+
+ if length_loc.is_imm():
+ length = length_loc.getint()
+ self.mc.load_imm(r.r5, length << scale)
else:
- scaled_loc = sizereg
- if baseloc is not None:
- assert baseloc.is_reg()
- self.mc.add(result.value, baseloc.value, scaled_loc.value)
- self.mc.addi(result.value, result.value, baseofs)
- else:
- self.mc.addi(result.value, scaled_loc.value, baseofs)
+ if scale > 0:
+ self.mc.sldi(r.r5.value, length_loc.value, scale)
+ elif length_loc is not r.r5:
+ self.mc.mr(r.r5.value, length_loc.value)
- def _get_unicode_item_scale(self):
- _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- if itemsize == 4:
- return 2
- elif itemsize == 2:
- return 1
- else:
- raise AssertionError("bad unicode item size")
+ self.mc.mr(r.r4.value, r.r0.value)
+ self.mc.addi(r.r4.value, r.r4.value, basesize)
+ self.mc.addi(r.r3.value, r.r2.value, basesize)
+
+ cb = callbuilder.CallBuilder(self, imm(self.memcpy_addr),
+ [r.r3, r.r4, r.r5], None)
+ cb.emit()
class UnicodeOpAssembler(object):
@@ -991,135 +949,142 @@
emit_jit_debug = emit_debug_merge_point
emit_keepalive = emit_debug_merge_point
- def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
+ def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False,
+ is_frame=False, align_stack=False):
# Write code equivalent to write_barrier() in the GC: it checks
- # a flag in the object at arglocs[0], and if set, it calls the
- # function remember_young_pointer() from the GC. The two arguments
- # to the call are in arglocs[:2]. The latter saves registers as needed
- # and call the function jit_remember_young_pointer() from the GC.
- descr = op.getdescr()
+ # a flag in the object at arglocs[0], and if set, it calls a
+ # helper piece of assembler. The latter saves registers as needed
+ # and call the function remember_young_pointer() from the GC.
if we_are_translated():
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
assert cls is not None and isinstance(descr, cls)
#
- opnum = op.getopnum()
- card_marking = False
+ card_marking_mask = 0
mask = descr.jit_wb_if_flag_singlebyte
- if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+ if array and descr.jit_wb_cards_set != 0:
# assumptions the rest of the function depends on:
assert (descr.jit_wb_cards_set_byteofs ==
descr.jit_wb_if_flag_byteofs)
- assert descr.jit_wb_cards_set_singlebyte == -0x80
- card_marking = True
- mask = descr.jit_wb_if_flag_singlebyte | -0x80
+ card_marking_mask = descr.jit_wb_cards_set_singlebyte
#
loc_base = arglocs[0]
+ assert loc_base.is_reg()
+ if is_frame:
+ assert loc_base is r.SPP
assert _check_imm_arg(descr.jit_wb_if_flag_byteofs)
- with scratch_reg(self.mc):
- self.mc.lbz(r.SCRATCH.value, loc_base.value,
- descr.jit_wb_if_flag_byteofs)
- # test whether this bit is set
- mask &= 0xFF
- self.mc.andix(r.SCRATCH.value, r.SCRATCH.value, mask)
+ mc.lbz(r.SCRATCH2.value, loc_base.value, descr.jit_wb_if_flag_byteofs)
+ mc.andix(r.SCRATCH.value, r.SCRATCH2.value, mask & 0xFF)
- jz_location = self.mc.currpos()
- self.mc.nop()
+ jz_location = mc.get_relative_pos()
+ mc.trap() # patched later with 'beq'
# for cond_call_gc_wb_array, also add another fast path:
# if GCFLAG_CARDS_SET, then we can just set one bit and be done
- if card_marking:
- with scratch_reg(self.mc):
- self.mc.lbz(r.SCRATCH.value, loc_base.value,
- descr.jit_wb_if_flag_byteofs)
- self.mc.extsb(r.SCRATCH.value, r.SCRATCH.value)
-
- # test whether this bit is set
- self.mc.cmpwi(0, r.SCRATCH.value, 0)
-
- js_location = self.mc.currpos()
- self.mc.nop()
+ if card_marking_mask:
+ # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already
+ mc.andix(r.SCRATCH.value, r.SCRATCH2.value,
+ card_marking_mask & 0xFF)
+ js_location = mc.get_relative_pos()
+ mc.trap() # patched later with 'bne'
else:
js_location = 0
# Write only a CALL to the helper prepared in advance, passing it as
# argument the address of the structure we are writing into
# (the first argument to COND_CALL_GC_WB).
- helper_num = card_marking
-
- if self._regalloc.fprm.reg_bindings:
+ helper_num = (card_marking_mask != 0)
+ if is_frame:
+ helper_num = 4
+ elif regalloc.fprm.reg_bindings:
helper_num += 2
if self.wb_slowpath[helper_num] == 0: # tests only
assert not we_are_translated()
self.cpu.gc_ll_descr.write_barrier_descr = descr
- self._build_wb_slowpath(card_marking,
- bool(self._regalloc.fprm.reg_bindings))
+ self._build_wb_slowpath(card_marking_mask != 0,
+ bool(regalloc.fprm.reg_bindings))
assert self.wb_slowpath[helper_num] != 0
#
- if loc_base is not r.r3:
- self.mc.store(r.r3.value, r.SP.value, 24)
- remap_frame_layout(self, [loc_base], [r.r3], r.SCRATCH)
- addr = self.wb_slowpath[helper_num]
- func = rffi.cast(lltype.Signed, addr)
- self.mc.bl_abs(func)
- if loc_base is not r.r3:
- self.mc.load(r.r3.value, r.SP.value, 24)
+ if not is_frame:
+ mc.mr(r.r0.value, loc_base.value) # unusual argument location
+ if is_frame and align_stack:
+ XXXX
+ mc.SUB_ri(esp.value, 16 - WORD) # erase the return address
+ mc.load_imm(r.SCRATCH2, self.wb_slowpath[helper_num])
+ mc.mtctr(r.SCRATCH2.value)
+ mc.bctrl()
+ if is_frame and align_stack:
+ XXXX
+ mc.ADD_ri(esp.value, 16 - WORD) # erase the return address
- # if GCFLAG_CARDS_SET, then we can do the whole thing that would
- # be done in the CALL above with just four instructions, so here
- # is an inline copy of them
- if card_marking:
- with scratch_reg(self.mc):
- jns_location = self.mc.currpos()
- self.mc.nop() # jump to the exit, patched later
- # patch the JS above
- offset = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, js_location, 1)
- # Jump if JS comparison is less than (bit set)
- pmc.bc(12, 0, offset - js_location)
- pmc.overwrite()
- #
- # case GCFLAG_CARDS_SET: emit a few instructions to do
- # directly the card flag setting
- loc_index = arglocs[1]
- assert loc_index.is_reg()
- tmp1 = arglocs[-1]
- tmp2 = arglocs[-2]
- tmp3 = arglocs[-3]
- #byteofs
- s = 3 + descr.jit_wb_card_page_shift
+ if card_marking_mask:
+ # The helper ends again with a check of the flag in the object.
+ # So here, we can simply write again a beq, which will be
+ # taken if GCFLAG_CARDS_SET is still not set.
+ jns_location = mc.get_relative_pos()
+ mc.trap()
+ #
+ # patch the 'bne' above
+ currpos = mc.currpos()
+ pmc = OverwritingBuilder(mc, js_location, 1)
+ pmc.bne(currpos - js_location)
+ pmc.overwrite()
+ #
+ # case GCFLAG_CARDS_SET: emit a few instructions to do
+ # directly the card flag setting
+ loc_index = arglocs[1]
+ if loc_index.is_reg():
- self.mc.srli_op(tmp3.value, loc_index.value, s)
- self.mc.not_(tmp3.value, tmp3.value)
+ tmp_loc = arglocs[2]
+ n = descr.jit_wb_card_page_shift
- # byte_index
- self.mc.li(r.SCRATCH.value, 7)
- self.mc.srli_op(loc_index.value, loc_index.value,
- descr.jit_wb_card_page_shift)
- self.mc.and_(tmp1.value, r.SCRATCH.value, loc_index.value)
+ # compute in tmp_loc the byte offset:
+ # ~(index >> (card_page_shift + 3)) ('~' is 'not_' below)
+ mc.srli_op(tmp_loc.value, loc_index.value, n + 3)
- # set the bit
- self.mc.li(tmp2.value, 1)
- self.mc.lbzx(r.SCRATCH.value, loc_base.value, tmp3.value)
- self.mc.sl_op(tmp2.value, tmp2.value, tmp1.value)
- self.mc.or_(r.SCRATCH.value, r.SCRATCH.value, tmp2.value)
- self.mc.stbx(r.SCRATCH.value, loc_base.value, tmp3.value)
+ # compute in r2 the index of the bit inside the byte:
+ # (index >> card_page_shift) & 7
+ mc.rldicl(r.SCRATCH2.value, loc_index.value, 64 - n, 61)
+ mc.li(r.SCRATCH.value, 1)
+ mc.not_(tmp_loc.value, tmp_loc.value)
+
+ # set r2 to 1 << r2
+ mc.sl_op(r.SCRATCH2.value, r.SCRATCH.value, r.SCRATCH2.value)
+
+ # set this bit inside the byte of interest
+ mc.lbzx(r.SCRATCH.value, loc_base.value, tmp_loc.value)
+ mc.or_(r.SCRATCH.value, r.SCRATCH.value, r.SCRATCH2.value)
+ mc.stbx(r.SCRATCH.value, loc_base.value, tmp_loc.value)
# done
- # patch the JNS above
- offset = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, jns_location, 1)
- # Jump if JNS comparison is not less than (bit not set)
- pmc.bc(4, 0, offset - jns_location)
- pmc.overwrite()
+ else:
+ byte_index = loc_index.value >> descr.jit_wb_card_page_shift
+ byte_ofs = ~(byte_index >> 3)
+ byte_val = 1 << (byte_index & 7)
+ assert _check_imm_arg(byte_ofs)
+
+ mc.lbz(r.SCRATCH.value, loc_base.value, byte_ofs)
+ mc.ori(r.SCRATCH.value, r.SCRATCH.value, byte_val)
+ mc.stb(r.SCRATCH.value, loc_base.value, byte_ofs)
+ #
+ # patch the beq just above
+ currpos = mc.currpos()
+ pmc = OverwritingBuilder(mc, jns_location, 1)
+ pmc.beq(currpos - jns_location)
+ pmc.overwrite()
# patch the JZ above
- offset = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, jz_location, 1)
- # Jump if JZ comparison is zero (CMP 0 is equal)
- pmc.bc(12, 2, offset - jz_location)
+ currpos = mc.currpos()
+ pmc = OverwritingBuilder(mc, jz_location, 1)
+ pmc.beq(currpos - jz_location)
pmc.overwrite()
- emit_cond_call_gc_wb_array = emit_cond_call_gc_wb
+ def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
+ self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc)
+
+ def emit_cond_call_gc_wb_array(self, op, arglocs, regalloc):
+ self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc,
+ array=True)
+
class ForceOpAssembler(object):
@@ -1129,215 +1094,95 @@
res_loc = arglocs[0]
self.mc.mr(res_loc.value, r.SPP.value)
- # self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT)
- # from: ../x86/assembler.py:1668
- # XXX Split into some helper methods
- def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc):
- tmploc = arglocs[1]
- resloc = arglocs[2]
- callargs = arglocs[3:]
+ def emit_call_assembler(self, op, arglocs, regalloc):
+ if len(arglocs) == 3:
+ [result_loc, argloc, vloc] = arglocs
+ else:
+ [result_loc, argloc] = arglocs
+ vloc = imm(0)
+ self._store_force_index(self._find_nearby_operation(regalloc, +1))
+ # 'result_loc' is either r3 or f1
+ self.call_assembler(op, argloc, vloc, result_loc, r.r3)
- faildescr = guard_op.getdescr()
- fail_index = self.cpu.get_fail_descr_number(faildescr)
- self._write_fail_index(fail_index)
- descr = op.getdescr()
- assert isinstance(descr, JitCellToken)
- # check value
- assert tmploc is r.RES
- xxxxxxxxxxxx
- self._emit_call(fail_index, imm(descr._ppc_func_addr),
- callargs, result=tmploc)
- if op.result is None:
- value = self.cpu.done_with_this_frame_void_v
+ imm = staticmethod(imm) # for call_assembler()
+
+ def _call_assembler_emit_call(self, addr, argloc, _):
+ self.regalloc_mov(argloc, r.r3)
+ self.mc.ld(r.r4.value, r.SP.value, THREADLOCAL_ADDR_OFFSET)
+
+ cb = callbuilder.CallBuilder(self, addr, [r.r3, r.r4], r.r3)
+ cb.emit()
+
+ def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
+ cb = callbuilder.CallBuilder(self, addr, arglocs, result_loc)
+ cb.emit()
+
+ def _call_assembler_check_descr(self, value, tmploc):
+ ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+ self.mc.ld(r.r5.value, r.r3.value, ofs)
+ if _check_imm_arg(value):
+ self.mc.cmp_op(0, r.r5.value, value, imm=True)
else:
+ self.mc.load_imm(r.r4, value)
+ self.mc.cmp_op(0, r.r5.value, r.r4.value, imm=False)
+ jump_if_eq = self.mc.currpos()
+ self.mc.nop() # patched later
+ return jump_if_eq
+
+ def _call_assembler_patch_je(self, result_loc, je_location):
+ jump_to_done = self.mc.currpos()
+ self.mc.nop() # patched later
+ #
+ currpos = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, je_location, 1)
+ pmc.beq(currpos - je_location)
+ pmc.overwrite()
+ #
+ return jump_to_done
+
+ def _call_assembler_load_result(self, op, result_loc):
+ if op.result is not None:
+ # load the return value from the dead frame's value index 0
kind = op.result.type
- if kind == INT:
- value = self.cpu.done_with_this_frame_int_v
- elif kind == REF:
- value = self.cpu.done_with_this_frame_ref_v
- elif kind == FLOAT:
- value = self.cpu.done_with_this_frame_float_v
+ descr = self.cpu.getarraydescr_for_frame(kind)
+ ofs = self.cpu.unpack_arraydescr(descr)
+ if kind == FLOAT:
+ assert result_loc is r.f1
+ self.mc.lfd(r.f1.value, r.r3.value, ofs)
else:
- raise AssertionError(kind)
+ assert result_loc is r.r3
+ self.mc.ld(r.r3.value, r.r3.value, ofs)
- # take fast path on equality
- # => jump on inequality
- with scratch_reg(self.mc):
- self.mc.load_imm(r.SCRATCH, value)
- self.mc.cmp_op(0, tmploc.value, r.SCRATCH.value)
-
- #if values are equal we take the fast path
- # Slow path, calling helper
- # jump to merge point
-
- jd = descr.outermost_jitdriver_sd
- assert jd is not None
-
- # Path A: load return value and reset token
- # Fast Path using result boxes
-
- fast_jump_pos = self.mc.currpos()
- self.mc.nop()
-
- # Reset the vable token --- XXX really too much special logic here:-(
- if jd.index_of_virtualizable >= 0:
- from pypy.jit.backend.llsupport.descr import FieldDescr
- fielddescr = jd.vable_token_descr
- assert isinstance(fielddescr, FieldDescr)
- ofs = fielddescr.offset
- tmploc = regalloc.get_scratch_reg(INT)
- with scratch_reg(self.mc):
- self.mov_loc_loc(arglocs[0], r.SCRATCH)
- self.mc.li(tmploc.value, 0)
- self.mc.storex(tmploc.value, 0, r.SCRATCH.value)
-
- if op.result is not None:
- # load the return value from fail_boxes_xxx[0]
- kind = op.result.type
- if kind == INT:
- adr = self.fail_boxes_int.get_addr_for_num(0)
- elif kind == REF:
- adr = self.fail_boxes_ptr.get_addr_for_num(0)
- elif kind == FLOAT:
- adr = self.fail_boxes_float.get_addr_for_num(0)
- else:
- raise AssertionError(kind)
- with scratch_reg(self.mc):
- self.mc.load_imm(r.SCRATCH, adr)
- if op.result.type == FLOAT:
- self.mc.lfdx(resloc.value, 0, r.SCRATCH.value)
- else:
- self.mc.loadx(resloc.value, 0, r.SCRATCH.value)
-
- # jump to merge point, patched later
- fast_path_to_end_jump_pos = self.mc.currpos()
- self.mc.nop()
-
- jmp_pos = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, fast_jump_pos, 1)
- pmc.bc(4, 2, jmp_pos - fast_jump_pos)
+ def _call_assembler_patch_jmp(self, jmp_location):
+ currpos = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, jmp_location, 1)
+ pmc.b(currpos - jmp_location)
pmc.overwrite()
- # Path B: use assembler helper
- asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
- if self.cpu.supports_floats:
- floats = r.VOLATILES_FLOAT
- else:
- floats = []
-
- with Saved_Volatiles(self.mc, save_RES=False):
- # result of previous call is in r3
- self.mov_loc_loc(arglocs[0], r.r4)
- self.mc.call(asm_helper_adr)
-
- # merge point
- currpos = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, fast_path_to_end_jump_pos, 1)
- pmc.b(currpos - fast_path_to_end_jump_pos)
- pmc.overwrite()
-
- with scratch_reg(self.mc):
- self.mc.load(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-
- self._emit_guard(guard_op, regalloc._prepare_guard(guard_op),
- xxxxxxxxxxxxxxxxx+c.LT, save_exc=True)
-
- # ../x86/assembler.py:668
def redirect_call_assembler(self, oldlooptoken, newlooptoken):
# some minimal sanity checking
old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
assert old_nbargs == new_nbargs
- oldadr = oldlooptoken._ppc_func_addr
- target = newlooptoken._ppc_func_addr
- if IS_PPC_32:
- # we overwrite the instructions at the old _ppc_func_addr
- # to start with a JMP to the new _ppc_func_addr.
+ oldadr = oldlooptoken._ll_function_addr
+ target = newlooptoken._ll_function_addr
+ if IS_PPC_32 or not IS_BIG_ENDIAN:
+ # we overwrite the instructions at the old _ll_function_addr
+ # to start with a JMP to the new _ll_function_addr.
# Ideally we should rather patch all existing CALLs, but well.
mc = PPCBuilder()
mc.b_abs(target)
mc.copy_to_raw_memory(oldadr)
else:
- # PPC64 trampolines are data so overwrite the code address
- # in the function descriptor at the old address
- # (TOC and static chain pointer are the same).
+ # PPC64 big-endian trampolines are data so overwrite the code
+ # address in the function descriptor at the old address.
+ # Copy the whole 3-word trampoline, even though the other
+ # words are always zero so far.
odata = rffi.cast(rffi.CArrayPtr(lltype.Signed), oldadr)
tdata = rffi.cast(rffi.CArrayPtr(lltype.Signed), target)
odata[0] = tdata[0]
-
- def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc):
- faildescr = guard_op.getdescr()
- fail_index = self.cpu.get_fail_descr_number(faildescr)
- self._write_fail_index(fail_index)
- numargs = op.numargs()
- callargs = arglocs[2:numargs + 1] # extract the arguments to the call
- adr = arglocs[1]
- resloc = arglocs[0]
- #
- descr = op.getdescr()
- size = descr.get_result_size()
- signed = descr.is_result_signed()
- #
- xxxxxxxxxxxxxx
- self._emit_call(fail_index, adr, callargs, resloc, (size, signed))
-
- with scratch_reg(self.mc):
- self.mc.load(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-
- self._emit_guard(guard_op, arglocs[1 + numargs:],
- xxxxxxxxxxxxxx+c.LT, save_exc=True)
-
- def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc):
-
- # first, close the stack in the sense of the asmgcc GC root tracker
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- numargs = op.numargs()
- callargs = arglocs[2:numargs + 1] # extract the arguments to the call
- adr = arglocs[1]
- resloc = arglocs[0]
-
- if gcrootmap:
- self.call_release_gil(gcrootmap, arglocs)
- # do the call
- faildescr = guard_op.getdescr()
- fail_index = self.cpu.get_fail_descr_number(faildescr)
- self._write_fail_index(fail_index)
- #
- descr = op.getdescr()
- size = descr.get_result_size()
- signed = descr.is_result_signed()
- #
- xxxxxxxxxxxxxxx
- self._emit_call(fail_index, adr, callargs, resloc, (size, signed))
- # then reopen the stack
- if gcrootmap:
- self.call_reacquire_gil(gcrootmap, resloc)
-
- with scratch_reg(self.mc):
- self.mc.load(r.SCRATCH.value, r.SPP.value, 0)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-
- self._emit_guard(guard_op, arglocs[1 + numargs:],
- xxxxxxxxxxxxxxxxxx+c.LT, save_exc=True)
-
- def call_release_gil(self, gcrootmap, save_registers):
- # XXX don't know whether this is correct
- # XXX use save_registers here
- assert gcrootmap.is_shadow_stack
- with Saved_Volatiles(self.mc):
- #self._emit_call(NO_FORCE_INDEX, self.releasegil_addr,
- # [], self._regalloc)
- self._emit_call(imm(self.releasegil_addr), [])
-
- def call_reacquire_gil(self, gcrootmap, save_loc):
- # save the previous result into the stack temporarily.
- # XXX like with call_release_gil(), we assume that we don't need
- # to save vfp regs in this case. Besides the result location
- assert gcrootmap.is_shadow_stack
- with Saved_Volatiles(self.mc):
- self._emit_call(imm(self.reacqgil_addr), [])
+ odata[1] = tdata[1]
+ odata[2] = tdata[2]
class OpAssembler(IntOpAssembler, GuardOpAssembler,
diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py
--- a/rpython/jit/backend/ppc/ppc_assembler.py
+++ b/rpython/jit/backend/ppc/ppc_assembler.py
@@ -28,7 +28,7 @@
from rpython.rlib.debug import (debug_print, debug_start, debug_stop,
have_debug_prints)
from rpython.rlib import rgc
-from rpython.rtyper.annlowlevel import llhelper
+from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
from rpython.rlib.objectmodel import we_are_translated, specialize
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.jit.backend.ppc.locations import StackLocation, get_fp_offset, imm
@@ -92,8 +92,10 @@
def __init__(self, cpu, translate_support_code=False):
BaseAssembler.__init__(self, cpu, translate_support_code)
self.loop_run_counters = []
+ self.wb_slowpath = [0, 0, 0, 0, 0]
self.setup_failure_recovery()
self.stack_check_slowpath = 0
+ self.propagate_exception_path = 0
self.teardown()
def set_debug(self, v):
@@ -122,33 +124,6 @@
mc.lfd(reg.value, spp_reg.value,
self.OFFSET_SPP_TO_FPR_SAVE_AREA + WORD * i)
- # The code generated here allocates a new stackframe
- # and is the first machine code to be executed.
- def _make_frame(self, frame_depth):
- XXX
- self.mc.make_function_prologue(frame_depth)
-
- # save SPP at the bottom of the stack frame
- self.mc.store(r.SPP.value, r.SP.value, WORD)
-
- # compute spilling pointer (SPP)
- self.mc.addi(r.SPP.value, r.SP.value,
- frame_depth - self.OFFSET_SPP_TO_OLD_BACKCHAIN)
-
- # save nonvolatile registers
- self._save_nonvolatiles()
-
- # save r31, use r30 as scratch register
- # this is safe because r30 has been saved already
- assert NONVOLATILES[-1] == r.SPP
- ofs_to_r31 = (self.OFFSET_SPP_TO_GPR_SAVE_AREA +
- WORD * (len(NONVOLATILES)-1))
- self.mc.load(r.r30.value, r.SP.value, WORD)
- self.mc.store(r.r30.value, r.SPP.value, ofs_to_r31)
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap and gcrootmap.is_shadow_stack:
- self.gen_shadowstack_header(gcrootmap)
-
def gen_shadowstack_header(self, gcrootmap):
# we need to put two words into the shadowstack: the MARKER_FRAME
# and the address of the frame (fp, actually)
@@ -296,7 +271,7 @@
self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
self.mc = None
- def _store_and_reset_exception(self, mc, excvalloc, exctploc):
+ def _store_and_reset_exception(self, mc, excvalloc, exctploc=None):
"""Reset the exception, after fetching it inside the two regs.
"""
mc.load_imm(r.r2, self.cpu.pos_exc_value())
@@ -304,7 +279,8 @@
assert _check_imm_arg(diff)
# Load the exception fields into the two registers
mc.load(excvalloc.value, r.r2.value, 0)
- mc.load(exctploc.value, r.r2.value, diff)
+ if exctploc is not None:
+ mc.load(exctploc.value, r.r2.value, diff)
# Zero out the exception fields
mc.li(r.r0.value, 0)
mc.store(r.r0.value, r.r2.value, 0)
@@ -359,6 +335,7 @@
return mc.materialize(self.cpu, [])
def _build_malloc_slowpath(self):
+ xxxxxxx
mc = PPCBuilder()
frame_size = (len(r.MANAGED_FP_REGS) * WORD
+ (BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
@@ -405,7 +382,7 @@
# if r3 == 0 we skip the return above and jump to the exception path
offset = mc.currpos() - jmp_pos
pmc = OverwritingBuilder(mc, jmp_pos, 1)
- pmc.bc(12, 2, offset)
+ pmc.beq(offset)
pmc.overwrite()
# restore the frame before leaving
with scratch_reg(mc):
@@ -500,7 +477,7 @@
mc.b(self.propagate_exception_path)
pmc = OverwritingBuilder(mc, jnz_location, 1)
- pmc.bc(4, 2, mc.currpos() - jnz_location)
+ pmc.bne(mc.currpos() - jnz_location)
pmc.overwrite()
# restore link register out of preprevious frame
@@ -520,7 +497,6 @@
self.write_64_bit_func_descr(rawstart, rawstart+3*WORD)
self.stack_check_slowpath = rawstart
- # TODO: see what need to be done when for_frame is True
def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
descr = self.cpu.gc_ll_descr.write_barrier_descr
if descr is None:
@@ -536,56 +512,108 @@
#
# This builds a helper function called from the slow path of
# write barriers. It must save all registers, and optionally
- # all fp registers.
+ # all fp registers. It takes its single argument in r0.
mc = PPCBuilder()
+ old_mc = self.mc
+ self.mc = mc
#
- frame_size = ((len(r.VOLATILES) + len(r.VOLATILES_FLOAT)
- + BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
- mc.make_function_prologue(frame_size)
- for i in range(len(r.VOLATILES)):
- mc.store(r.VOLATILES[i].value, r.SP.value,
- (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
- if self.cpu.supports_floats:
- for i in range(len(r.VOLATILES_FLOAT)):
- mc.stfd(r.VOLATILES_FLOAT[i].value, r.SP.value,
- (len(r.VOLATILES) + BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
+ ignored_regs = [reg for reg in r.MANAGED_REGS if not (
+ # 'reg' will be pushed if the following is true:
+ reg in r.VOLATILES or
+ reg is r.RCS1 or
+ (withcards and reg is r.RCS2))]
+ if not for_frame:
+ # push all volatile registers, push RCS1, and sometimes push RCS2
+ self._push_all_regs_to_jitframe(mc, ignored_regs, withfloats)
+ else:
+ return #XXXXX
+ # we have one word to align
+ mc.SUB_ri(esp.value, 7 * WORD) # align and reserve some space
+ mc.MOV_sr(WORD, eax.value) # save for later
+ if self.cpu.supports_floats:
+ mc.MOVSD_sx(2 * WORD, xmm0.value) # 32-bit: also 3 * WORD
+ if IS_X86_32:
+ mc.MOV_sr(4 * WORD, edx.value)
+ mc.MOV_sr(0, ebp.value)
+ exc0, exc1 = esi, edi
+ else:
+ mc.MOV_rr(edi.value, ebp.value)
+ exc0, exc1 = ebx, r12
+ mc.MOV(RawEspLoc(WORD * 5, REF), exc0)
+ mc.MOV(RawEspLoc(WORD * 6, INT), exc1)
+ # note that it's save to store the exception in register,
+ # since the call to write barrier can't collect
+ # (and this is assumed a bit left and right here, like lack
+ # of _reload_frame_if_necessary)
+ self._store_and_reset_exception(mc, exc0, exc1)
- mc.call(rffi.cast(lltype.Signed, func))
- if self.cpu.supports_floats:
- for i in range(len(r.VOLATILES_FLOAT)):
- mc.lfd(r.VOLATILES_FLOAT[i].value, r.SP.value,
- (len(r.VOLATILES) + BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
- for i in range(len(r.VOLATILES)):
- mc.load(r.VOLATILES[i].value, r.SP.value,
- (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
- mc.restore_LR_from_caller_frame(frame_size)
+ if withcards:
+ mc.mr(r.RCS2.value, r.r0.value)
+ #
+ # Save the lr into r.RCS1
+ mc.mflr(r.RCS1.value)
+ #
+ func = rffi.cast(lltype.Signed, func)
+ cb = callbuilder.CallBuilder(self, imm(func), [r.r0], None)
+ cb.emit()
+ #
+ # Restore lr
+ mc.mtlr(r.RCS1.value)
#
if withcards:
- # A final compare before the RET, for the caller. Careful to
+ # A final andix before the blr, for the caller. Careful to
# not follow this instruction with another one that changes
- # the status of the CPU flags!
- mc.lbz(r.SCRATCH.value, r.r3.value,
- descr.jit_wb_if_flag_byteofs)
- mc.extsb(r.SCRATCH.value, r.SCRATCH.value)
- mc.cmpwi(0, r.SCRATCH.value, 0)
+ # the status of cr0!
+ card_marking_mask = descr.jit_wb_cards_set_singlebyte
+ mc.lbz(r.RCS2.value, r.RCS2.value, descr.jit_wb_if_flag_byteofs)
+ mc.andix(r.RCS2.value, r.RCS2.value, card_marking_mask & 0xFF)
#
- mc.addi(r.SP.value, r.SP.value, frame_size)
- mc.blr()
- #
+
+ if not for_frame:
+ self._pop_all_regs_from_jitframe(mc, ignored_regs, withfloats)
+ mc.blr()
+ else:
+ XXXXXXX
+ if IS_X86_32:
+ mc.MOV_rs(edx.value, 4 * WORD)
+ if self.cpu.supports_floats:
+ mc.MOVSD_xs(xmm0.value, 2 * WORD)
+ mc.MOV_rs(eax.value, WORD) # restore
+ self._restore_exception(mc, exc0, exc1)
+ mc.MOV(exc0, RawEspLoc(WORD * 5, REF))
+ mc.MOV(exc1, RawEspLoc(WORD * 6, INT))
+ mc.LEA_rs(esp.value, 7 * WORD)
+ mc.RET()
+
+ self.mc = old_mc
rawstart = mc.materialize(self.cpu, [])
- self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+ if for_frame:
+ self.wb_slowpath[4] = rawstart
+ else:
+ self.wb_slowpath[withcards + 2 * withfloats] = rawstart
def _build_propagate_exception_path(self):
if not self.cpu.propagate_exception_descr:
return
- mc = PPCBuilder()
- # the following call may be needed in the future:
- # self._store_and_reset_exception()
+ self.mc = PPCBuilder()
+ #
+ # read and reset the current exception
- mc.load_imm(r.RES, self.cpu.propagate_exception_descr)
- self._gen_epilogue(mc)
- self.propagate_exception_path = mc.materialize(self.cpu, [])
+ propagate_exception_descr = rffi.cast(lltype.Signed,
+ cast_instance_to_gcref(self.cpu.propagate_exception_descr))
+ ofs3 = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ ofs4 = self.cpu.get_ofs_of_frame_field('jf_descr')
+
+ self._store_and_reset_exception(self.mc, r.r3)
+ self.mc.load_imm(r.r4, propagate_exception_descr)
+ self.mc.std(r.r3.value, r.SPP.value, ofs3)
+ self.mc.std(r.r4.value, r.SPP.value, ofs4)
+ #
+ self._call_footer()
+ rawstart = self.mc.materialize(self.cpu, [])
+ self.propagate_exception_path = rawstart
+ self.mc = None
# The code generated here serves as an exit stub from
# the executed machine code.
@@ -617,28 +645,6 @@
return mc.materialize(self.cpu, [], self.cpu.gc_ll_descr.gcrootmap)
- def _gen_epilogue(self, mc):
- XXX
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap and gcrootmap.is_shadow_stack:
- self.gen_footer_shadowstack(gcrootmap, mc)
-
- # save SPP back in r3
- mc.mr(r.r5.value, r.SPP.value)
- self._restore_nonvolatiles(mc, r.r5)
- # load old backchain into r4
- if IS_PPC_32:
- ofs = WORD
- else:
- ofs = WORD * 2
- mc.load(r.r4.value, r.r5.value, self.OFFSET_SPP_TO_OLD_BACKCHAIN + ofs)
- mc.mtlr(r.r4.value) # restore LR
- # From SPP, we have a constant offset to the old backchain. We use the
- # SPP to re-establish the old backchain because this exit stub is
- # generated before we know how much space the entire frame will need.
- mc.addi(r.SP.value, r.r5.value, self.OFFSET_SPP_TO_OLD_BACKCHAIN) # restore old SP
- mc.blr()
-
def _save_managed_regs(self, mc):
""" store managed registers in ENCODING AREA
"""
@@ -735,7 +741,7 @@
offset = self.mc.currpos() - patch_loc
#
pmc = OverwritingBuilder(self.mc, patch_loc, 1)
- pmc.bc(4, 1, offset) # jump if SCRATCH <= r16, i. e. not(SCRATCH > r16)
+ pmc.ble(offset) # jump if SCRATCH <= r16, i. e. not(SCRATCH > r16)
pmc.overwrite()
def _call_footer(self):
@@ -944,97 +950,11 @@
self.teardown()
return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
- DESCR_REF = 0x00
- DESCR_INT = 0x01
- DESCR_FLOAT = 0x02
- DESCR_SPECIAL = 0x03
- CODE_FROMSTACK = 128
- CODE_STOP = 0 | DESCR_SPECIAL
- CODE_HOLE = 4 | DESCR_SPECIAL
- CODE_INPUTARG = 8 | DESCR_SPECIAL
-
- def gen_descr_encoding(self, descr, failargs, locs):
- assert self.mc is not None
- buf = []
- for i in range(len(failargs)):
- arg = failargs[i]
- if arg is not None:
- if arg.type == REF:
- kind = self.DESCR_REF
- elif arg.type == INT:
- kind = self.DESCR_INT
- elif arg.type == FLOAT:
- kind = self.DESCR_FLOAT
- else:
- raise AssertionError("bogus kind")
- loc = locs[i]
- if loc.is_stack():
- pos = loc.position
- if pos < 0:
- buf.append(self.CODE_INPUTARG)
- pos = ~pos
- n = self.CODE_FROMSTACK // 4 + pos
- else:
- assert loc.is_reg() or loc.is_fp_reg()
- n = loc.value
- n = kind + 4 * n
- while n > 0x7F:
- buf.append((n & 0x7F) | 0x80)
- n >>= 7
- else:
- n = self.CODE_HOLE
- buf.append(n)
- buf.append(self.CODE_STOP)
-
- fdescr = self.cpu.get_fail_descr_number(descr)
-
- buf.append((fdescr >> 24) & 0xFF)
- buf.append((fdescr >> 16) & 0xFF)
- buf.append((fdescr >> 8) & 0xFF)
- buf.append( fdescr & 0xFF)
-
- lenbuf = len(buf)
- # XXX fix memory leaks
- enc_arr = lltype.malloc(rffi.CArray(rffi.CHAR), lenbuf,
- flavor='raw', track_allocation=False)
- enc_ptr = rffi.cast(lltype.Signed, enc_arr)
- for i, byte in enumerate(buf):
- enc_arr[i] = chr(byte)
- # assert that the fail_boxes lists are big enough
- assert len(failargs) <= self.fail_boxes_int.SIZE
- return enc_ptr
-
- def align(self, size):
- while size % 8 != 0:
- size += 1
- return size
-
def teardown(self):
self.pending_guard_tokens = None
self.mc = None
self.current_clt = None
- def compute_frame_depth(self, spilling_area, param_depth):
- PARAMETER_AREA = param_depth * WORD
- if IS_PPC_64:
- PARAMETER_AREA += MAX_REG_PARAMS * WORD
- SPILLING_AREA = spilling_area * WORD
-
- frame_depth = ( GPR_SAVE_AREA
- + FPR_SAVE_AREA
- + FLOAT_INT_CONVERSION
- + FORCE_INDEX
- + self.ENCODING_AREA
- + SPILLING_AREA
- + PARAMETER_AREA
- + BACKCHAIN_SIZE * WORD)
-
- # align stack pointer
- while frame_depth % (4 * WORD) != 0:
- frame_depth += WORD
-
- return frame_depth
-
def _find_failure_recovery_bytecode(self, faildescr):
return faildescr._failure_recovery_code_adr
@@ -1207,7 +1127,8 @@
with scratch_reg(self.mc):
offset = loc.value
self.mc.load_imm(r.SCRATCH, value)
- self.mc.store(r.SCRATCH.value, r.SPP.value, offset)
+ self.mc.lfdx(r.FP_SCRATCH.value, 0, r.SCRATCH.value)
+ self.mc.stfd(r.FP_SCRATCH.value, r.SPP.value, offset)
return
assert 0, "not supported location"
elif prev_loc.is_fp_reg():
@@ -1258,13 +1179,13 @@
self.mc.lfd(loc.value, r.SP.value, index)
else:
self.mc.lfd(r.FP_SCRATCH.value, r.SP.value, index)
- self.regalloc_mov(r.FP_SCRATCH.value, loc)
+ self.regalloc_mov(r.FP_SCRATCH, loc)
else:
if loc.is_core_reg():
self.mc.ld(loc.value, r.SP.value, index)
else:
self.mc.ld(r.SCRATCH.value, r.SP.value, index)
- self.regalloc_mov(r.SCRATCH.value, loc)
+ self.regalloc_mov(r.SCRATCH, loc)
def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
assert size & (WORD-1) == 0 # must be correctly aligned
@@ -1301,7 +1222,7 @@
offset = self.mc.currpos() - fast_jmp_pos
pmc = OverwritingBuilder(self.mc, fast_jmp_pos, 1)
- pmc.bc(4, 1, offset) # jump if LE (not GT)
+ pmc.ble(offset) # jump if LE (not GT)
pmc.overwrite()
with scratch_reg(self.mc):
@@ -1318,8 +1239,10 @@
gcrootmap.write_callshape(mark, force_index)
def propagate_memoryerror_if_r3_is_null(self):
- return # XXXXXXXXX
- self.mc.cmp_op(0, r.RES.value, 0, imm=True)
+ # if self.propagate_exception_path == 0 (tests), this may jump to 0
+ # and segfaults. too bad. the alternative is to continue anyway
+ # with r3==0, but that will segfault too.
+ self.mc.cmp_op(0, r.r3.value, 0, imm=True)
self.mc.b_cond_abs(self.propagate_exception_path, c.EQ)
def write_new_force_index(self):
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -490,7 +490,7 @@
prepare_int_force_ge_zero = helper.prepare_unary_op
- def prepare_math_sqrt(self, op):
+ def _prepare_math_sqrt(self, op):
loc = self.ensure_reg(op.getarg(1))
self.free_op_vars()
res = self.fprm.force_allocate_reg(op.result)
@@ -839,8 +839,17 @@
return [base_loc, index_loc, value_loc, ofs_loc,
imm_size, imm_size]
- #prepare_copystrcontent = void
- #prepare_copyunicodecontent = void
+ def prepare_copystrcontent(self, op):
+ src_ptr_loc = self.ensure_reg(op.getarg(0))
+ dst_ptr_loc = self.ensure_reg(op.getarg(1))
+ src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
+ dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
+ length_loc = self.ensure_reg_or_any_imm(op.getarg(4))
+ self._spill_before_call(save_all_regs=False)
+ return [src_ptr_loc, dst_ptr_loc,
+ src_ofs_loc, dst_ofs_loc, length_loc]
+
+ prepare_copyunicodecontent = prepare_copystrcontent
def prepare_unicodelen(self, op):
basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
@@ -877,22 +886,21 @@
prepare_cast_ptr_to_int = prepare_same_as
prepare_cast_int_to_ptr = prepare_same_as
+ def get_oopspecindex(self, op):
+ descr = op.getdescr()
+ assert descr is not None
+ effectinfo = descr.get_extra_info()
+ if effectinfo is not None:
+ return effectinfo.oopspecindex
+ return EffectInfo.OS_NONE
+
def prepare_call(self, op):
- effectinfo = op.getdescr().get_extra_info()
- if effectinfo is not None:
- oopspecindex = effectinfo.oopspecindex
- if oopspecindex == EffectInfo.OS_MATH_SQRT:
- xxxxxxxxx
- args = self.prepare_math_sqrt(op)
- self.assembler.emit_math_sqrt(op, args, self)
- return
+ oopspecindex = self.get_oopspecindex(op)
+ if oopspecindex == EffectInfo.OS_MATH_SQRT:
+ return self._prepare_math_sqrt(op)
return self._prepare_call(op)
- def _prepare_call(self, op, save_all_regs=False):
- args = []
- args.append(None)
- for i in range(op.numargs()):
- args.append(self.loc(op.getarg(i)))
+ def _spill_before_call(self, save_all_regs=False):
# spill variables that need to be saved around calls
self.fprm.before_call(save_all_regs=save_all_regs)
if not save_all_regs:
@@ -900,10 +908,16 @@
if gcrootmap and gcrootmap.is_shadow_stack:
save_all_regs = 2
self.rm.before_call(save_all_regs=save_all_regs)
+
+ def _prepare_call(self, op, save_all_regs=False):
+ args = []
+ args.append(None)
+ for i in range(op.numargs()):
+ args.append(self.loc(op.getarg(i)))
+ self._spill_before_call(save_all_regs)
if op.result:
resloc = self.after_call(op.result)
args[0] = resloc
- self.before_call_called = True
return args
def prepare_call_malloc_nursery(self, op):
@@ -943,31 +957,16 @@
prepare_keepalive = void
def prepare_cond_call_gc_wb(self, op):
- assert op.result is None
- # we force all arguments in a reg because it will be needed anyway by
- # the following setfield_gc or setarrayitem_gc. It avoids loading it
- # twice from the memory.
- N = op.numargs()
- args = op.getarglist()
- arglocs = [self._ensure_value_is_boxed(op.getarg(i), args)
- for i in range(N)]
- card_marking = False
- if op.getopnum() == rop.COND_CALL_GC_WB_ARRAY:
- descr = op.getdescr()
- if we_are_translated():
- cls = self.cpu.gc_ll_descr.has_write_barrier_class()
- assert cls is not None and isinstance(descr, cls)
- card_marking = descr.jit_wb_cards_set != 0
- if card_marking: # allocate scratch registers
- tmp1 = self.get_scratch_reg(INT)
- tmp2 = self.get_scratch_reg(INT)
- tmp3 = self.get_scratch_reg(INT)
- arglocs.append(tmp1)
- arglocs.append(tmp2)
- arglocs.append(tmp3)
+ arglocs = [self.ensure_reg(op.getarg(0))]
return arglocs
- prepare_cond_call_gc_wb_array = prepare_cond_call_gc_wb
+ def prepare_cond_call_gc_wb_array(self, op):
+ arglocs = [self.ensure_reg(op.getarg(0)),
+ self.ensure_reg_or_16bit_imm(op.getarg(1)),
+ None]
+ if arglocs[1].is_reg():
+ arglocs[2] = self.get_scratch_reg(INT)
+ return arglocs
def prepare_force_token(self, op):
res_loc = self.force_allocate_reg(op.result)
@@ -1028,21 +1027,11 @@
prepare_call_release_gil = prepare_call_may_force
- def prepare_guard_call_assembler(self, op, guard_op):
- descr = op.getdescr()
- assert isinstance(descr, JitCellToken)
- jd = descr.outermost_jitdriver_sd
- assert jd is not None
- vable_index = jd.index_of_virtualizable
- if vable_index >= 0:
- self._sync_var(op.getarg(vable_index))
- vable = self.frame_manager.loc(op.getarg(vable_index))
- else:
- vable = imm(0)
- # make sure the call result location is free
- tmploc = self.get_scratch_reg(INT, selected_reg=r.RES)
- self.possibly_free_vars(guard_op.getfailargs())
- return [vable, tmploc] + self._prepare_call(op, save_all_regs=True)
+ def prepare_call_assembler(self, op):
+ locs = self.locs_for_call_assembler(op)
+ self._spill_before_call(save_all_regs=True)
+ resloc = self.after_call(op.result)
+ return [resloc] + locs
def _prepare_args_for_new_op(self, new_args):
gc_ll_descr = self.cpu.gc_ll_descr
@@ -1060,6 +1049,11 @@
self.force_spill_var(op.getarg(0))
return []
+ def prepare_guard_not_forced_2(self, op):
+ self.rm.before_call(op.getfailargs(), save_all_regs=True)
+ arglocs = self._prepare_guard(op)
+ return arglocs
+
def prepare_zero_ptr_field(self, op):
base_loc = self.ensure_reg(op.getarg(0))
ofs_loc = self.ensure_reg_or_16bit_imm(op.getarg(1))
diff --git a/rpython/jit/backend/ppc/runner.py b/rpython/jit/backend/ppc/runner.py
--- a/rpython/jit/backend/ppc/runner.py
+++ b/rpython/jit/backend/ppc/runner.py
@@ -2,7 +2,6 @@
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.rtyper.llinterp import LLInterpreter
from rpython.rlib import rgc
-#from rpython.jit.backend.ppc.arch import FORCE_INDEX_OFS
from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
from rpython.jit.backend.ppc.ppc_assembler import AssemblerPPC
from rpython.jit.backend.ppc.arch import WORD
@@ -33,11 +32,6 @@
def __init__(self, rtyper, stats, opts=None, translate_support_code=False,
gcdescr=None):
- if gcdescr is not None:
- gcdescr.force_index_ofs = FORCE_INDEX_OFS
- # XXX for now the ppc backend does not support the gcremovetypeptr
- # translation option
- # assert gcdescr.config.translation.gcremovetypeptr is False
AbstractLLCPU.__init__(self, rtyper, stats, opts,
translate_support_code, gcdescr)
@@ -80,8 +74,7 @@
for jmp, tgt in looptoken.compiled_loop_token.invalidate_positions:
mc = PPCBuilder()
- mc.b_offset(tgt)
- mc.prepare_insts_blocks()
+ mc.b_offset(tgt) # a single instruction
mc.copy_to_raw_memory(jmp)
# positions invalidated
looptoken.compiled_loop_token.invalidate_positions = []
diff --git a/rpython/jit/backend/ppc/symbol_lookup.py b/rpython/jit/backend/ppc/symbol_lookup.py
deleted file mode 100644
--- a/rpython/jit/backend/ppc/symbol_lookup.py
+++ /dev/null
@@ -1,15 +0,0 @@
-
-def lookup(sym):
- global lookup
- import py
-
- _ppcgen = py.magic.autopath().dirpath().join('_ppcgen.c')._getpymodule()
-
- try:
- from _ppcgen import NSLookupAndBindSymbol
-
- def lookup(sym):
- return NSLookupAndBindSymbol('_' + sym)
- except ImportError:
- from _ppcgen import dlsym as lookup
- return lookup(sym)
diff --git a/rpython/jit/backend/ppc/test/test_ppc.py b/rpython/jit/backend/ppc/test/test_ppc.py
--- a/rpython/jit/backend/ppc/test/test_ppc.py
+++ b/rpython/jit/backend/ppc/test/test_ppc.py
@@ -2,7 +2,6 @@
import random, sys, os
from rpython.jit.backend.ppc.codebuilder import BasicPPCAssembler, PPCBuilder
-from rpython.jit.backend.ppc.symbol_lookup import lookup
from rpython.jit.backend.ppc.regname import *
from rpython.jit.backend.ppc.register import *
from rpython.jit.backend.ppc import form
diff --git a/rpython/jit/backend/ppc/test/test_runner.py b/rpython/jit/backend/ppc/test/test_runner.py
--- a/rpython/jit/backend/ppc/test/test_runner.py
+++ b/rpython/jit/backend/ppc/test/test_runner.py
@@ -23,16 +23,22 @@
# ====> ../../test/runner_test.py
if IS_PPC_32:
- add_loop_instructions = ["mr", "add", "cmpwi", "beq", "b"]
+ add_loop_instructions = ["ld", "add", "cmpwi", "beq", "b"]
else:
- add_loop_instructions = ["mr", "add", "cmpdi", "beq", "b"]
- bridge_loop_instructions_short = ["lis", "ori", "mtctr", "bctr"]
- bridge_loop_instructions_long = ["lis", "ori", "rldicr", "oris", "ori",
- "mtctr", "bctr"]
-
- def setup_method(self, meth):
- self.cpu = PPC_CPU(rtyper=None, stats=FakeStats())
- self.cpu.setup_once()
+ add_loop_instructions = ["ld", "add", "cmpdi", "beq", "b"]
+ bridge_loop_instructions = [
+ "ld", "cmpdi", "bge+",
+ "li", "lis", "ori", "mtctr", "bctrl",
+ "lis", "ori", "mtctr", "bctr"]
+ bridge_loop_instructions_alternative = [
+ "ld", "cmpdi", "bge+",
+ "li", "li", "rldicr", "oris", "ori", "mtctr", "bctrl",
+ "li", "rldicr", "oris", "ori", "mtctr", "bctr"]
+
+ def get_cpu(self):
+ cpu = PPC_CPU(rtyper=None, stats=FakeStats())
+ cpu.setup_once()
+ return cpu
def test_compile_loop_many_int_args(self):
for numargs in range(2, 16):
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -1113,12 +1113,12 @@
r_box = self.alloc_string("!???????!")
if r_box_is_const:
r_box = r_box.constbox()
- self.execute_operation(rop.COPYSTRCONTENT,
- [s_box, r_box,
- srcstart_box,
- dststart_box,
- length_box], 'void')
- assert self.look_string(r_box) == "!??cdef?!"
+ self.execute_operation(rop.COPYSTRCONTENT,
+ [s_box, r_box,
+ srcstart_box,
+ dststart_box,
+ length_box], 'void')
+ assert self.look_string(r_box) == "!??cdef?!"
def test_copyunicodecontent(self):
s_box = self.alloc_unicode(u"abcdef")
@@ -1130,12 +1130,12 @@
r_box = self.alloc_unicode(u"!???????!")
if r_box_is_const:
r_box = r_box.constbox()
- self.execute_operation(rop.COPYUNICODECONTENT,
- [s_box, r_box,
- srcstart_box,
- dststart_box,
- length_box], 'void')
- assert self.look_unicode(r_box) == u"!??cdef?!"
+ self.execute_operation(rop.COPYUNICODECONTENT,
+ [s_box, r_box,
+ srcstart_box,
+ dststart_box,
+ length_box], 'void')
+ assert self.look_unicode(r_box) == u"!??cdef?!"
def test_do_unicode_basic(self):
u = self.cpu.bh_newunicode(5)
@@ -2178,7 +2178,7 @@
funcbox = self.get_funcbox(self.cpu, func_ptr)
class WriteBarrierDescr(AbstractDescr):
jit_wb_if_flag = 4096
- jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+ jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10')
jit_wb_if_flag_singlebyte = 0x10
def get_write_barrier_fn(self, cpu):
return funcbox.getint()
@@ -2212,7 +2212,7 @@
funcbox = self.get_funcbox(self.cpu, func_ptr)
class WriteBarrierDescr(AbstractDescr):
jit_wb_if_flag = 4096
- jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+ jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10')
jit_wb_if_flag_singlebyte = 0x10
jit_wb_cards_set = 0 # <= without card marking
def get_write_barrier_fn(self, cpu):
@@ -2259,10 +2259,10 @@
funcbox = self.get_funcbox(self.cpu, func_ptr)
class WriteBarrierDescr(AbstractDescr):
jit_wb_if_flag = 4096
- jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+ jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10')
jit_wb_if_flag_singlebyte = 0x10
jit_wb_cards_set = 32768
- jit_wb_cards_set_byteofs = struct.pack("i", 32768).index('\x80')
+ jit_wb_cards_set_byteofs = struct.pack("l", 32768).index('\x80')
jit_wb_cards_set_singlebyte = -0x80
jit_wb_card_page_shift = 7
def get_write_barrier_from_array_fn(self, cpu):
@@ -3674,6 +3674,7 @@
assert not called
def test_assembler_call_propagate_exc(self):
+ # WARNING: this test depends on test_memoryerror first passing
if not isinstance(self.cpu, AbstractLLCPU):
py.test.skip("llgraph can't fake exceptions well enough, give up")
@@ -4985,3 +4986,35 @@
assert a[i].a == a[i].b == val
else:
assert a[i] == rffi.cast(OF, val)
+
+ def test_jump_float_constant(self):
+ f0 = BoxFloat()
+ f1 = BoxFloat()
+ i2 = BoxInt()
+ f3 = BoxFloat()
+ i4 = BoxInt()
+ looptoken = JitCellToken()
+ targettoken = TargetToken()
+ operations = [
+ ResOperation(rop.LABEL, [f0, f1], None, descr=targettoken),
+ ResOperation(rop.CAST_FLOAT_TO_INT, [f1], i2),
+ ResOperation(rop.GUARD_VALUE, [i2, ConstInt(123456)], None,
+ descr=BasicFailDescr(6)),
+ ResOperation(rop.FLOAT_ADD, [f0, ConstFloat(-0.5)], f3),
+ ResOperation(rop.FLOAT_GT, [f3, ConstFloat(9.12)], i4),
+ ResOperation(rop.GUARD_TRUE, [i4], None, descr=BasicFailDescr(2)),
+ ResOperation(rop.JUMP, [f3, ConstFloat(123456.78912)], None,
+ descr=targettoken),
+ ]
+ inputargs = [f0, f1]
+ operations[2].setfailargs([])
+ operations[-2].setfailargs([f1, f3])
+
+ self.cpu.compile_loop(inputargs, operations, looptoken)
+ deadframe = self.cpu.execute_token(looptoken, 12.25, 123456.01)
+ fail = self.cpu.get_latest_descr(deadframe)
+ assert fail.identifier == 2
+ res = longlong.getrealfloat(self.cpu.get_float_value(deadframe, 0))
+ assert res == 123456.78912
+ res = longlong.getrealfloat(self.cpu.get_float_value(deadframe, 1))
+ assert res == 8.75
diff --git a/rpython/jit/backend/tool/viewcode.py b/rpython/jit/backend/tool/viewcode.py
--- a/rpython/jit/backend/tool/viewcode.py
+++ b/rpython/jit/backend/tool/viewcode.py
@@ -49,10 +49,12 @@
'arm': 'arm',
'arm_32': 'arm',
'ppc' : 'powerpc:common64',
+ 'ppc-64' : 'powerpc:common64',
}
machine_endianness = {
# default value: 'little'
'ppc' : sys.byteorder, # i.e. same as the running machine...
+ 'ppc-64' : sys.byteorder, # i.e. same as the running machine...
}
cmd = find_objdump()
objdump = ('%(command)s -b binary -m %(machine)s '
More information about the pypy-commit
mailing list