[pypy-commit] pypy ppc-updated-backend: PPC Backend #2: test_runner.py is now "3/4rd passing".
arigo
noreply at buildbot.pypy.org
Fri Sep 4 08:21:55 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: ppc-updated-backend
Changeset: r79405:733ec999d18b
Date: 2015-08-29 13:51 +0200
http://bitbucket.org/pypy/pypy/changeset/733ec999d18b/
Log: PPC Backend #2: test_runner.py is now "3/4rd passing".
Various fixes. Also, optimized "prepare_int_lt" and other similar
operations: before, a "int_lt; guard_true" combination was produced
as highly inefficient assembler. And, the way we built "guard_xyz"
operations would fail if the target of the branch is too far away
(>32k); test and fix for this case.
diff too long, truncating to 2000 out of 3139 lines
diff --git a/rpython/jit/backend/ppc/TODO b/rpython/jit/backend/ppc/TODO
deleted file mode 100644
--- a/rpython/jit/backend/ppc/TODO
+++ /dev/null
@@ -1,4 +0,0 @@
-
-prepare_guard_int_lt & friends: re-enable in walk_operations()
-
-guard_xyz: fail if the target of the branch is too far away (+32k?)
diff --git a/rpython/jit/backend/ppc/callbuilder.py b/rpython/jit/backend/ppc/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/ppc/callbuilder.py
@@ -0,0 +1,60 @@
+from rpython.jit.backend.ppc.arch import IS_PPC_64, WORD
+import rpython.jit.backend.ppc.register as r
+from rpython.jit.metainterp.history import INT
+from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
+from rpython.jit.backend.ppc.jump import remap_frame_layout
+
+
+def follow_jump(addr):
+ # xxx implement me
+ return addr
+
+
+class CallBuilder(AbstractCallBuilder):
+
+ def __init__(self, assembler, fnloc, arglocs, resloc):
+ AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
+ resloc, restype=INT, ressize=None)
+
+ def prepare_arguments(self):
+ assert IS_PPC_64
+
+ # First, copy fnloc into r2
+ self.asm.regalloc_mov(self.fnloc, r.r2)
+
+ # Prepare arguments
+ arglocs = self.arglocs
+ argtypes = self.argtypes
+
+ assert len(argtypes) <= 8, "XXX"
+ non_float_locs = arglocs
+ non_float_regs = ( # XXX
+ [r.r3, r.r4, r.r5, r.r6, r.r7, r.r8, r.r9, r.r10][:len(argtypes)])
+
+ remap_frame_layout(self.asm, non_float_locs, non_float_regs,
+ r.SCRATCH)
+
+
+ def push_gcmap(self):
+ pass # XXX
+
+ def pop_gcmap(self):
+ pass # XXX
+
+ def emit_raw_call(self):
+ # Load the function descriptor (currently in r2) from memory:
+ # [r2 + 0] -> ctr
+ # [r2 + 16] -> r11
+ # [r2 + 8] -> r2 (= TOC)
+ self.mc.ld(r.SCRATCH.value, r.r2.value, 0)
+ self.mc.ld(r.r11.value, r.r2.value, 16)
+ self.mc.mtctr(r.SCRATCH.value)
+ self.mc.ld(r.TOC.value, r.r2.value, 8)
+ # Call it
+ self.mc.bctrl()
+
+ def restore_stack_pointer(self):
+ pass # XXX
+
+ def load_result(self):
+ pass
diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -643,7 +643,7 @@
bdnz = BA.bc(BO=16, BI=0)
bdnzt = BA.bc(BO=8)
bdnzf = BA.bc(BO=0)
- bdz = BA.bc(BO=18)
+ bdz = BA.bc(BO=18, BI=0)
bdzt = BA.bc(BO=10)
bdzf = BA.bc(BO=2)
@@ -652,7 +652,7 @@
bdnza = BA.bca(BO=16, BI=0)
bdnzta = BA.bca(BO=8)
bdnzfa = BA.bca(BO=0)
- bdza = BA.bca(BO=18)
+ bdza = BA.bca(BO=18, BI=0)
bdzta = BA.bca(BO=10)
bdzfa = BA.bca(BO=2)
@@ -661,7 +661,7 @@
bdnzl = BA.bcl(BO=16, BI=0)
bdnztl = BA.bcl(BO=8)
bdnzfl = BA.bcl(BO=0)
- bdzl = BA.bcl(BO=18)
+ bdzl = BA.bcl(BO=18, BI=0)
bdztl = BA.bcl(BO=10)
bdzfl = BA.bcl(BO=2)
@@ -670,7 +670,7 @@
bdnzla = BA.bcla(BO=16, BI=0)
bdnztla = BA.bcla(BO=8)
bdnzfla = BA.bcla(BO=0)
- bdzla = BA.bcla(BO=18)
+ bdzla = BA.bcla(BO=18, BI=0)
bdztla = BA.bcla(BO=10)
bdzfla = BA.bcla(BO=2)
@@ -934,20 +934,24 @@
#self.offset = offset
class OverwritingBuilder(PPCAssembler):
- def __init__(self, cb, start, num_insts):
+ def __init__(self, mc, start, num_insts=0):
PPCAssembler.__init__(self)
- self.cb = cb
+ self.mc = mc
self.index = start
- self.num_insts = num_insts
def currpos(self):
assert 0, "not implemented"
+ def write32(self, word):
+ index = self.index
+ self.mc.overwrite(index, chr((word >> 24) & 0xff))
+ self.mc.overwrite(index + 1, chr((word >> 16) & 0xff))
+ self.mc.overwrite(index + 2, chr((word >> 8) & 0xff))
+ self.mc.overwrite(index + 3, chr(word & 0xff))
+ self.index = index + 4
+
def overwrite(self):
- assert len(self.insts) <= self.num_insts
- startindex = self.index / 4
- for i, new_inst in enumerate(self.insts):
- self.cb.insts[i + startindex] = new_inst
+ pass
class PPCBuilder(BlockBuilderMixin, PPCAssembler):
def __init__(self):
@@ -997,16 +1001,16 @@
self.b(offset)
def b_cond_offset(self, offset, condition):
- BI = condition[0]
- BO = condition[1]
+ assert condition != c.UH
+ BI, BO = c.encoding[condition]
pos = self.currpos()
target_ofs = offset - pos
self.bc(BO, BI, target_ofs)
def b_cond_abs(self, addr, condition):
- BI = condition[0]
- BO = condition[1]
+ assert condition != c.UH
+ BI, BO = c.encoding[condition]
with scratch_reg(self):
self.load_imm(r.SCRATCH, addr)
diff --git a/rpython/jit/backend/ppc/condition.py b/rpython/jit/backend/ppc/condition.py
--- a/rpython/jit/backend/ppc/condition.py
+++ b/rpython/jit/backend/ppc/condition.py
@@ -1,22 +1,32 @@
-# CONDITION = (BI (number of bit tested in CR), BO (12 if bit is 1, else 4))
+EQ = 0
+NE = 1
+LE = 2
+GT = 3
+LT = 4
+GE = 5
+SO = 6
+NS = 7
+UH = -1 # invalid
-SET = 12
-UNSET = 4
+def negate(cond):
+ return cond ^ 1
-LE = (1, UNSET)
-NE = (2, UNSET)
-GT = (1, SET)
-LT = (0, SET)
-EQ = (2, SET)
-GE = (0, UNSET)
-UH = (-1, -1) # invalid
+assert negate(EQ) == NE
+assert negate(NE) == EQ
+assert negate(LE) == GT
+assert negate(GT) == LE
+assert negate(LT) == GE
+assert negate(GE) == LT
+assert negate(SO) == NS
+assert negate(NS) == SO
-# values below are random ...
-
-U_LT = 50
-U_LE = 60
-U_GT = 70
-U_GE = 80
-
-IS_TRUE = 90
-IS_ZERO = 100
+encoding = [
+ (2, 12), # EQ
+ (2, 4), # NE
+ (1, 4), # LE
+ (1, 12), # GT
+ (0, 12), # LT
+ (0, 4), # GE
+ (3, 12), # SO
+ (3, 4), # NS
+]
diff --git a/rpython/jit/backend/ppc/helper/assembler.py b/rpython/jit/backend/ppc/helper/assembler.py
--- a/rpython/jit/backend/ppc/helper/assembler.py
+++ b/rpython/jit/backend/ppc/helper/assembler.py
@@ -2,53 +2,70 @@
from rpython.rlib.rarithmetic import intmask
from rpython.jit.backend.ppc.arch import MAX_REG_PARAMS, IS_PPC_32, WORD
from rpython.jit.metainterp.history import FLOAT
+from rpython.jit.metainterp.resoperation import rop
import rpython.jit.backend.ppc.register as r
from rpython.rtyper.lltypesystem import rffi, lltype
+def test_condition_for(condition, guard_op):
+ opnum = guard_op.getopnum()
+ if opnum == rop.GUARD_FALSE:
+ return condition
+ elif opnum == rop.GUARD_TRUE:
+ return c.negate(condition)
+ assert 0, opnum
+
+def do_emit_cmp_op(self, guard_op, arglocs, condition, signed, fp):
+ l0 = arglocs[0]
+ l1 = arglocs[1]
+ assert not l0.is_imm()
+ # do the comparison
+ self.mc.cmp_op(0, l0.value, l1.value,
+ imm=l1.is_imm(), signed=signed, fp=fp)
+
+ # CR bits:
+ # 0: LT
+ # 1: GT
+ # 2: EQ
+ # 3: UNordered
+
+ if fp:
+ # Support for NaNs: with LE or GE, if one of the operands is a
+ # NaN, we get CR=1,0,0,0 (unordered bit only). We're about to
+ # check "not GT" or "not LT", but in case of NaN we want to
+ # get the answer False.
+ if condition == c.LE:
+ self.mc.crnor(1, 1, 3)
+ condition = c.GT
+ elif condition == c.GE:
+ self.mc.crnor(0, 0, 3)
+ condition = c.LT
+
+ if guard_op is None:
+ # After the comparison, place the result in a single bit of the CR
+ bit, invert = c.encoding[condition]
+ assert 0 <= bit <= 3
+ if invert == 12:
+ pass
+ elif invert == 4:
+ self.mc.crnor(bit, bit, bit)
+ else:
+ assert 0
+
+ assert len(arglocs) == 3
+ res = arglocs[2]
+ resval = res.value
+ # move the content of the CR to resval
+ self.mc.mfcr(resval)
+ # zero out everything except of the result
+ self.mc.rlwinm(resval, resval, 1 + bit, 31, 31)
+ else:
+ failargs = arglocs[2:]
+ fcond = test_condition_for(condition, guard_op)
+ self._emit_guard(guard_op, failargs, fcond)
+
def gen_emit_cmp_op(condition, signed=True, fp=False):
- def f(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- # do the comparison
- self.mc.cmp_op(0, l0.value, l1.value,
- imm=l1.is_imm(), signed=signed, fp=fp)
- # After the comparison, place the result
- # in the first bit of the CR
- if condition == c.LT or condition == c.U_LT:
- self.mc.cror(0, 0, 0)
- elif condition == c.LE or condition == c.U_LE:
- self.mc.cror(0, 0, 2)
- elif condition == c.EQ:
- self.mc.cror(0, 2, 2)
- elif condition == c.GE or condition == c.U_GE:
- self.mc.cror(0, 1, 2)
- elif condition == c.GT or condition == c.U_GT:
- self.mc.cror(0, 1, 1)
- elif condition == c.NE:
- self.mc.crnor(0, 2, 2)
- else:
- assert 0, "condition not known"
-
- resval = res.value
- # move the content of the CR to resval
- self.mc.mfcr(resval)
- # zero out everything except of the result
- self.mc.rlwinm(resval, resval, 1, 31, 31)
- return f
-
-def gen_emit_unary_cmp_op(condition):
- def f(self, op, arglocs, regalloc):
- reg, res = arglocs
-
- self.mc.cmp_op(0, reg.value, 0, imm=True)
- if condition == c.IS_ZERO:
- self.mc.cror(0, 2, 2)
- elif condition == c.IS_TRUE:
- self.mc.cror(0, 0, 1)
- else:
- assert 0, "condition not known"
-
- self.mc.mfcr(res.value)
- self.mc.rlwinm(res.value, res.value, 1, 31, 31)
+ def f(self, op, guard_op, arglocs, regalloc):
+ do_emit_cmp_op(self, guard_op, arglocs, condition, signed, fp)
return f
def count_reg_args(args):
diff --git a/rpython/jit/backend/ppc/helper/regalloc.py b/rpython/jit/backend/ppc/helper/regalloc.py
--- a/rpython/jit/backend/ppc/helper/regalloc.py
+++ b/rpython/jit/backend/ppc/helper/regalloc.py
@@ -1,127 +1,97 @@
from rpython.jit.metainterp.history import ConstInt, Box, FLOAT
+from rpython.jit.backend.ppc.locations import imm
-IMM_SIZE = 2 ** 15 - 1
-
-def check_imm_box(arg, size=IMM_SIZE, allow_zero=True):
+def check_imm_box(arg, lower_bound=-2**15, upper_bound=2**15-1):
if isinstance(arg, ConstInt):
- return _check_imm_arg(arg.getint(), size, allow_zero)
+ i = arg.getint()
+ return lower_bound <= i <= upper_bound
return False
-def _check_imm_arg(arg, size=IMM_SIZE, allow_zero=True):
- assert not isinstance(arg, ConstInt)
- #if not we_are_translated():
- # if not isinstance(arg, int):
- # import pdb; pdb.set_trace()
- i = arg
- if allow_zero:
- lower_bound = i >= 0
- else:
- lower_bound = i > 0
- return i <= size and lower_bound
+def _check_imm_arg(i):
+ return (-2**15) <= i <= (2**15-1)
-def prepare_cmp_op():
- def f(self, op):
- boxes = op.getarglist()
- arg0, arg1 = boxes
- imm_a0 = check_imm_box(arg0)
- imm_a1 = check_imm_box(arg1)
- l0 = self._ensure_value_is_boxed(arg0, forbidden_vars=boxes)
+def _prepare_cmp_op(signed):
+ lower_bound = -2**15 if signed else 0
+ upper_bound = 2**15-1 if signed else 2**16-1
+ def f(self, op, guard_op):
+ l0 = self.ensure_reg(op.getarg(0))
+ a1 = op.getarg(1)
+ if check_imm_box(a1, lower_bound, upper_bound):
+ l1 = imm(a1.getint())
+ else:
+ l1 = self.ensure_reg(a1)
+ self.free_op_vars()
+ if guard_op is None:
+ res = self.force_allocate_reg(op.result)
+ return [l0, l1, res]
+ else:
+ return self._prepare_guard(guard_op, [l0, l1])
+ return f
+prepare_cmp_op = _prepare_cmp_op(signed=True)
+prepare_cmp_op_unsigned = _prepare_cmp_op(signed=False)
- if imm_a1 and not imm_a0:
- l1 = self._ensure_value_is_boxed(arg1, boxes)
- else:
- l1 = self._ensure_value_is_boxed(arg1, forbidden_vars=boxes)
-
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
+def prepare_unary_cmp(self, op, guard_op):
+ l0 = self.ensure_reg(op.getarg(0))
+ l1 = imm(0)
+ self.free_op_vars()
+ if guard_op is None:
res = self.force_allocate_reg(op.result)
return [l0, l1, res]
- return f
+ else:
+ return self._prepare_guard(guard_op, [l0, l1])
-def prepare_unary_cmp():
- def f(self, op):
- a0 = op.getarg(0)
- assert isinstance(a0, Box)
- reg = self._ensure_value_is_boxed(a0)
- self.possibly_free_vars_for_op(op)
- res = self.force_allocate_reg(op.result, [a0])
- return [reg, res]
- return f
+def prepare_float_cmp(self, op, guard_op):
+ l0 = self.ensure_reg(op.getarg(0))
+ l1 = self.ensure_reg(op.getarg(1))
+ self.free_op_vars()
+ if guard_op is None:
+ res = self.force_allocate_reg(op.result)
+ return [l0, l1, res]
+ else:
+ return self._prepare_guard(guard_op, [l0, l1])
-def prepare_unary_int_op():
- def f(self, op):
- l0 = self._ensure_value_is_boxed(op.getarg(0))
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
- res = self.force_allocate_reg(op.result)
- return [l0, res]
- return f
+def prepare_unary_op(self, op):
+ l0 = self.ensure_reg(op.getarg(0))
+ self.free_op_vars()
+ res = self.force_allocate_reg(op.result)
+ return [l0, res]
-def prepare_binary_int_op_with_imm():
- def f(self, op):
- a0 = op.getarg(0)
- a1 = op.getarg(1)
- boxes = op.getarglist()
- l0 = self._ensure_value_is_boxed(a0, boxes)
- if isinstance(a1, ConstInt) and _check_imm_arg(a1.getint()):
- l1 = self.convert_to_imm(a1)
- else:
- l1 = self._ensure_value_is_boxed(a1, boxes)
- locs = [l0, l1]
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
- res = self.force_allocate_reg(op.result)
- return locs + [res]
- return f
+def prepare_binary_op(self, op):
+ reg1 = self.ensure_reg(op.getarg(0))
+ reg2 = self.ensure_reg(op.getarg(1))
+ self.free_op_vars()
+ res = self.force_allocate_reg(op.result)
+ return [reg1, reg2, res]
-def prepare_binary_int_op():
- def f(self, op):
- boxes = op.getarglist()
- b0, b1 = boxes
+def prepare_int_add_or_mul(self, op):
+ a0 = op.getarg(0)
+ a1 = op.getarg(1)
+ if check_imm_box(a0):
+ a0, a1 = a1, a0
+ l0 = self.ensure_reg(a0)
+ if check_imm_box(a1):
+ l1 = imm(a1.getint())
+ else:
+ l1 = self.ensure_reg(a1)
+ self.free_op_vars()
+ res = self.force_allocate_reg(op.result)
+ return [l0, l1, res]
- reg1 = self._ensure_value_is_boxed(b0, forbidden_vars=boxes)
- reg2 = self._ensure_value_is_boxed(b1, forbidden_vars=boxes)
+def prepare_int_sub(self, op):
+ l0 = self.ensure_reg(op.getarg(0))
+ a1 = op.getarg(1)
+ if check_imm_box(a1, -2**15+1, 2**15):
+ l1 = imm(a1.getint())
+ else:
+ l1 = self.ensure_reg(a1)
+ self.free_op_vars()
+ res = self.force_allocate_reg(op.result)
+ return [l0, l1, res]
- self.possibly_free_vars_for_op(op)
- res = self.force_allocate_reg(op.result)
- self.possibly_free_var(op.result)
- return [reg1, reg2, res]
- return f
-
-def prepare_float_op(name=None, base=True, float_result=True, guard=False):
- if guard:
- def f(self, op, guard_op):
- locs = []
- loc1 = self._ensure_value_is_boxed(op.getarg(0))
- locs.append(loc1)
- if base:
- loc2 = self._ensure_value_is_boxed(op.getarg(1))
- locs.append(loc2)
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
- if guard_op is None:
- res = self.force_allocate_reg(op.result)
- assert float_result == (op.result.type == FLOAT)
- locs.append(res)
- return locs
- else:
- args = self._prepare_guard(guard_op, locs)
- return args
- else:
- def f(self, op):
- locs = []
- loc1 = self._ensure_value_is_boxed(op.getarg(0))
- locs.append(loc1)
- if base:
- loc2 = self._ensure_value_is_boxed(op.getarg(1))
- locs.append(loc2)
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
- res = self.force_allocate_reg(op.result)
- assert float_result == (op.result.type == FLOAT)
- locs.append(res)
- return locs
- if name:
- f.__name__ = name
- return f
-
+def prepare_int_binary_ovf(self, op, guard_op):
+ reg1 = self.ensure_reg(op.getarg(0))
+ reg2 = self.ensure_reg(op.getarg(1))
+ self.free_op_vars()
+ res = self.force_allocate_reg(op.result)
+ assert guard_op is not None
+ return self._prepare_guard(guard_op, [reg1, reg2, res])
diff --git a/rpython/jit/backend/ppc/locations.py b/rpython/jit/backend/ppc/locations.py
--- a/rpython/jit/backend/ppc/locations.py
+++ b/rpython/jit/backend/ppc/locations.py
@@ -24,6 +24,9 @@
def is_reg(self):
return False
+ def is_core_reg(self): # for llsupport/assembler.py
+ return self.is_reg()
+
def is_fp_reg(self):
return False
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -1,11 +1,10 @@
-from rpython.jit.backend.ppc.helper.assembler import (gen_emit_cmp_op,
- gen_emit_unary_cmp_op)
+from rpython.jit.backend.ppc.helper.assembler import gen_emit_cmp_op
from rpython.jit.backend.ppc.helper.regalloc import _check_imm_arg
import rpython.jit.backend.ppc.condition as c
import rpython.jit.backend.ppc.register as r
from rpython.jit.backend.ppc.locations import imm
from rpython.jit.backend.ppc.locations import imm as make_imm_loc
-from rpython.jit.backend.ppc.arch import (IS_PPC_32, WORD,
+from rpython.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD,
MAX_REG_PARAMS, MAX_FREG_PARAMS)
from rpython.jit.metainterp.history import (JitCellToken, TargetToken, Box,
@@ -17,12 +16,11 @@
PPCBuilder, PPCGuardToken)
from rpython.jit.backend.ppc.regalloc import TempPtr, TempInt
from rpython.jit.backend.llsupport import symbolic
-from rpython.jit.backend.llsupport.descr import InteriorFieldDescr
+from rpython.jit.backend.llsupport.descr import InteriorFieldDescr, CallDescr
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.rtyper.lltypesystem import rstr, rffi, lltype
from rpython.jit.metainterp.resoperation import rop
-
-NO_FORCE_INDEX = -1
+from rpython.jit.backend.ppc import callbuilder
class IntOpAssembler(object):
@@ -30,47 +28,58 @@
def emit_int_add(self, op, arglocs, regalloc):
l0, l1, res = arglocs
- if l0.is_imm():
- self.mc.addi(res.value, l1.value, l0.value)
- elif l1.is_imm():
+ assert not l0.is_imm()
+ if l1.is_imm():
self.mc.addi(res.value, l0.value, l1.value)
else:
self.mc.add(res.value, l0.value, l1.value)
- def emit_int_add_ovf(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.addo(res.value, l0.value, l1.value)
-
def emit_int_sub(self, op, arglocs, regalloc):
l0, l1, res = arglocs
- if l0.is_imm():
- self.mc.subfic(res.value, l1.value, l0.value)
- elif l1.is_imm():
+ assert not l0.is_imm()
+ if l1.is_imm():
self.mc.subi(res.value, l0.value, l1.value)
else:
self.mc.sub(res.value, l0.value, l1.value)
- def emit_int_sub_ovf(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.subfo(res.value, l1.value, l0.value)
-
def emit_int_mul(self, op, arglocs, regalloc):
l0, l1, res = arglocs
- if l0.is_imm():
- self.mc.mulli(res.value, l1.value, l0.value)
- elif l1.is_imm():
+ assert not l0.is_imm()
+ if l1.is_imm():
self.mc.mulli(res.value, l0.value, l1.value)
elif IS_PPC_32:
self.mc.mullw(res.value, l0.value, l1.value)
else:
self.mc.mulld(res.value, l0.value, l1.value)
- def emit_int_mul_ovf(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
+ def do_emit_int_binary_ovf(self, op, guard_op, arglocs, emit):
+ l0, l1, res = arglocs[0], arglocs[1], arglocs[2]
+ self.mc.load_imm(r.SCRATCH, 0)
+ self.mc.mtxer(r.SCRATCH.value)
+ emit(res.value, l0.value, l1.value)
+ #
+ failargs = arglocs[3:]
+ assert guard_op is not None
+ opnum = guard_op.getopnum()
+ if opnum == rop.GUARD_NO_OVERFLOW:
+ fcond = c.SO
+ elif opnum == rop.GUARD_OVERFLOW:
+ fcond = c.NS
+ else:
+ assert 0
+ self._emit_guard(guard_op, failargs, fcond)
+
+ def emit_guard_int_add_ovf(self, op, guard_op, arglocs, regalloc):
+ self.do_emit_int_binary_ovf(op, guard_op, arglocs, self.mc.addox)
+
+ def emit_guard_int_sub_ovf(self, op, guard_op, arglocs, regalloc):
+ self.do_emit_int_binary_ovf(op, guard_op, arglocs, self.mc.subox)
+
+ def emit_guard_int_mul_ovf(self, op, guard_op, arglocs, regalloc):
if IS_PPC_32:
- self.mc.mullwo(res.value, l0.value, l1.value)
+ self.do_emit_int_binary_ovf(op, guard_op, arglocs, self.mc.mullwox)
else:
- self.mc.mulldo(res.value, l0.value, l1.value)
+ self.do_emit_int_binary_ovf(op, guard_op, arglocs, self.mc.mulldox)
def emit_int_floordiv(self, op, arglocs, regalloc):
l0, l1, res = arglocs
@@ -130,26 +139,26 @@
else:
self.mc.divdu(res.value, l0.value, l1.value)
- emit_int_le = gen_emit_cmp_op(c.LE)
- emit_int_lt = gen_emit_cmp_op(c.LT)
- emit_int_gt = gen_emit_cmp_op(c.GT)
- emit_int_ge = gen_emit_cmp_op(c.GE)
- emit_int_eq = gen_emit_cmp_op(c.EQ)
- emit_int_ne = gen_emit_cmp_op(c.NE)
+ emit_guard_int_le = gen_emit_cmp_op(c.LE)
+ emit_guard_int_lt = gen_emit_cmp_op(c.LT)
+ emit_guard_int_gt = gen_emit_cmp_op(c.GT)
+ emit_guard_int_ge = gen_emit_cmp_op(c.GE)
+ emit_guard_int_eq = gen_emit_cmp_op(c.EQ)
+ emit_guard_int_ne = gen_emit_cmp_op(c.NE)
- emit_uint_lt = gen_emit_cmp_op(c.U_LT, signed=False)
- emit_uint_le = gen_emit_cmp_op(c.U_LE, signed=False)
- emit_uint_gt = gen_emit_cmp_op(c.U_GT, signed=False)
- emit_uint_ge = gen_emit_cmp_op(c.U_GE, signed=False)
+ emit_guard_uint_lt = gen_emit_cmp_op(c.LT, signed=False)
+ emit_guard_uint_le = gen_emit_cmp_op(c.LE, signed=False)
+ emit_guard_uint_gt = gen_emit_cmp_op(c.GT, signed=False)
+ emit_guard_uint_ge = gen_emit_cmp_op(c.GE, signed=False)
- emit_int_is_zero = gen_emit_unary_cmp_op(c.IS_ZERO)
- emit_int_is_true = gen_emit_unary_cmp_op(c.IS_TRUE)
+ emit_guard_int_is_zero = emit_guard_int_eq # EQ to 0
+ emit_guard_int_is_true = emit_guard_int_ne # NE to 0
- emit_ptr_eq = emit_int_eq
- emit_ptr_ne = emit_int_ne
+ emit_guard_ptr_eq = emit_guard_int_eq
+ emit_guard_ptr_ne = emit_guard_int_ne
- emit_instance_ptr_eq = emit_ptr_eq
- emit_instance_ptr_ne = emit_ptr_ne
+ emit_guard_instance_ptr_eq = emit_guard_ptr_eq
+ emit_guard_instance_ptr_ne = emit_guard_ptr_ne
def emit_int_neg(self, op, arglocs, regalloc):
l0, res = arglocs
@@ -159,6 +168,18 @@
l0, res = arglocs
self.mc.not_(res.value, l0.value)
+ def emit_int_signext(self, op, arglocs, regalloc):
+ l0, res = arglocs
+ extend_from = op.getarg(1).getint()
+ if extend_from == 1:
+ self.mc.extsb(res.value, l0.value)
+ elif extend_from == 2:
+ self.mc.extsh(res.value, l0.value)
+ elif extend_from == 4:
+ self.mc.extsw(res.value, l0.value)
+ else:
+ raise AssertionError(extend_from)
+
def emit_int_force_ge_zero(self, op, arglocs, regalloc):
arg, res = arglocs
with scratch_reg(self.mc):
@@ -201,12 +222,12 @@
l0, res = arglocs
self.mc.fsqrt(res.value, l0.value)
- emit_float_le = gen_emit_cmp_op(c.LE, fp=True)
- emit_float_lt = gen_emit_cmp_op(c.LT, fp=True)
- emit_float_gt = gen_emit_cmp_op(c.GT, fp=True)
- emit_float_ge = gen_emit_cmp_op(c.GE, fp=True)
- emit_float_eq = gen_emit_cmp_op(c.EQ, fp=True)
- emit_float_ne = gen_emit_cmp_op(c.NE, fp=True)
+ emit_guard_float_le = gen_emit_cmp_op(c.LE, fp=True)
+ emit_guard_float_lt = gen_emit_cmp_op(c.LT, fp=True)
+ emit_guard_float_gt = gen_emit_cmp_op(c.GT, fp=True)
+ emit_guard_float_ge = gen_emit_cmp_op(c.GE, fp=True)
+ emit_guard_float_eq = gen_emit_cmp_op(c.EQ, fp=True)
+ emit_guard_float_ne = gen_emit_cmp_op(c.NE, fp=True)
def emit_cast_float_to_int(self, op, arglocs, regalloc):
l0, temp_loc, res = arglocs
@@ -215,10 +236,10 @@
self.mc.ld(res.value, r.SP.value, -16)
def emit_cast_int_to_float(self, op, arglocs, regalloc):
- l0, temp_loc, res = arglocs
+ l0, res = arglocs
self.mc.std(l0.value, r.SP.value, -16)
- self.mc.lfd(temp_loc.value, r.SP.value, -16)
- self.mc.fcfid(res.value, temp_loc.value)
+ self.mc.lfd(res.value, r.SP.value, -16)
+ self.mc.fcfid(res.value, res.value)
def emit_convert_float_bytes_to_longlong(self, op, arglocs, regalloc):
l0, res = arglocs
@@ -241,7 +262,8 @@
fcond, save_exc, is_guard_not_invalidated,
is_guard_not_forced)
token.pos_jump_offset = self.mc.currpos()
- self.mc.nop() # has to be patched later on
+ if not is_guard_not_invalidated:
+ self.mc.trap() # has to be patched later on
self.pending_guard_tokens.append(token)
def build_guard_token(self, op, frame_depth, arglocs, fcond, save_exc,
@@ -269,24 +291,6 @@
self.mc.cmp_op(0, l0.value, 0, imm=True)
self._emit_guard(op, failargs, c.NE)
- # TODO - Evaluate whether this can be done with
- # SO bit instead of OV bit => usage of CR
- # instead of XER could be more efficient
- def _emit_ovf_guard(self, op, arglocs, cond):
- # move content of XER to GPR
- with scratch_reg(self.mc):
- self.mc.mfspr(r.SCRATCH.value, 1)
- # shift and mask to get comparison result
- self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, 1, 0, 0)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
- self._emit_guard(op, arglocs, cond)
-
- def emit_guard_no_overflow(self, op, arglocs, regalloc):
- self._emit_ovf_guard(op, arglocs, c.NE)
-
- def emit_guard_overflow(self, op, arglocs, regalloc):
- self._emit_ovf_guard(op, arglocs, c.EQ)
-
def emit_guard_value(self, op, arglocs, regalloc):
l0 = arglocs[0]
l1 = arglocs[1]
@@ -307,15 +311,15 @@
def emit_guard_class(self, op, arglocs, regalloc):
self._cmp_guard_class(op, arglocs, regalloc)
- self._emit_guard(op, arglocs[3:], c.NE, save_exc=False)
+ self._emit_guard(op, arglocs[3:], c.NE)
def emit_guard_nonnull_class(self, op, arglocs, regalloc):
self.mc.cmp_op(0, arglocs[0].value, 1, imm=True, signed=False)
patch_pos = self.mc.currpos()
- self.mc.nop()
+ self.mc.trap()
self._cmp_guard_class(op, arglocs, regalloc)
pmc = OverwritingBuilder(self.mc, patch_pos, 1)
- pmc.bc(12, 0, self.mc.currpos() - patch_pos)
+ pmc.bc(12, 0, self.mc.currpos() - patch_pos) # LT
pmc.overwrite()
self._emit_guard(op, arglocs[3:], c.NE, save_exc=False)
@@ -330,7 +334,7 @@
# here, we have to go back from 'classptr' to the value expected
# from reading the half-word in the object header. Note that
# this half-word is at offset 0 on a little-endian machine;
- # it would be at offset 2 (32 bit) or 4 (64 bit) on a
+ # but it is at offset 2 (32 bit) or 4 (64 bit) on a
# big-endian machine.
with scratch_reg(self.mc):
if IS_PPC_32:
@@ -340,7 +344,7 @@
self.mc.cmp_op(0, r.SCRATCH.value, typeid.value, imm=typeid.is_imm())
def emit_guard_not_invalidated(self, op, locs, regalloc):
- return self._emit_guard(op, locs, c.EQ, is_guard_not_invalidated=True)
+ return self._emit_guard(op, locs, c.UH, is_guard_not_invalidated=True)
class MiscOpAssembler(object):
@@ -349,12 +353,12 @@
def emit_increment_debug_counter(self, op, arglocs, regalloc):
[addr_loc, value_loc] = arglocs
self.mc.load(value_loc.value, addr_loc.value, 0)
- self.mc.addi(value_loc.value, value_loc.value, 1)
+ self.mc.addi(value_loc.value, value_loc.value, 1) # can't use r0!
self.mc.store(value_loc.value, addr_loc.value, 0)
def emit_finish(self, op, arglocs, regalloc):
base_ofs = self.cpu.get_baseofs_of_frame_field()
- if len(arglocs) == 2:
+ if len(arglocs) > 1:
[return_val, fail_descr_loc] = arglocs
if op.getarg(0).type == FLOAT:
self.mc.stfd(return_val.value, r.SPP.value, base_ofs)
@@ -409,7 +413,8 @@
def emit_same_as(self, op, arglocs, regalloc):
argloc, resloc = arglocs
- self.regalloc_mov(argloc, resloc)
+ if argloc is not resloc:
+ self.regalloc_mov(argloc, resloc)
emit_cast_ptr_to_int = emit_same_as
emit_cast_int_to_ptr = emit_same_as
@@ -427,131 +432,129 @@
loc, loc1, resloc, pos_exc_value, pos_exception = arglocs[:5]
failargs = arglocs[5:]
self.mc.load_imm(loc1, pos_exception.value)
-
- with scratch_reg(self.mc):
- self.mc.load(r.SCRATCH.value, loc1.value, 0)
- self.mc.cmp_op(0, r.SCRATCH.value, loc.value)
+ self.mc.load(r.SCRATCH.value, loc1.value, 0)
+ self.mc.cmp_op(0, r.SCRATCH.value, loc.value)
self._emit_guard(op, failargs, c.NE, save_exc=True)
self.mc.load_imm(loc, pos_exc_value.value)
if resloc:
self.mc.load(resloc.value, loc.value, 0)
+
+ self.mc.load_imm(r.SCRATCH, 0)
+ self.mc.store(r.SCRATCH.value, loc.value, 0)
+ self.mc.store(r.SCRATCH.value, loc1.value, 0)
- with scratch_reg(self.mc):
- self.mc.load_imm(r.SCRATCH, 0)
- self.mc.store(r.SCRATCH.value, loc.value, 0)
- self.mc.store(r.SCRATCH.value, loc1.value, 0)
-
- def emit_call(self, op, arglocs, regalloc, force_index=NO_FORCE_INDEX):
- if force_index == NO_FORCE_INDEX:
- force_index = self.write_new_force_index()
+ def emit_call(self, op, arglocs, regalloc):
resloc = arglocs[0]
adr = arglocs[1]
arglist = arglocs[2:]
+
+ cb = callbuilder.CallBuilder(self, adr, arglist, resloc)
+
descr = op.getdescr()
- size = descr.get_result_size()
- signed = descr.is_result_signed()
- self._emit_call(force_index, adr, arglist, resloc, (size, signed))
+ assert isinstance(descr, CallDescr)
+ cb.argtypes = descr.get_arg_types()
+ cb.restype = descr.get_result_type()
- def _emit_call(self, force_index, adr, arglocs,
- result=None, result_info=(-1,-1)):
- n_args = len(arglocs)
+ cb.emit()
- # collect variables that need to go in registers
- # and the registers they will be stored in
- num = 0
- fpnum = 0
- count = 0
- non_float_locs = []
- non_float_regs = []
- float_locs = []
- float_regs = []
- stack_args = []
- float_stack_arg = False
- for i in range(n_args):
- arg = arglocs[i]
+ ## def _emit_call(self, adr, arglocs, result=None, result_info=(-1,-1)):
+ ## n_args = len(arglocs)
- if arg.type == FLOAT:
- if fpnum < MAX_FREG_PARAMS:
- fpreg = r.PARAM_FPREGS[fpnum]
- float_locs.append(arg)
- float_regs.append(fpreg)
- fpnum += 1
- # XXX Duplicate float arguments in GPR slots
- if num < MAX_REG_PARAMS:
- num += 1
- else:
- stack_args.append(arg)
- else:
- stack_args.append(arg)
- else:
- if num < MAX_REG_PARAMS:
- reg = r.PARAM_REGS[num]
- non_float_locs.append(arg)
- non_float_regs.append(reg)
- num += 1
- else:
- stack_args.append(arg)
- float_stack_arg = True
+ ## # collect variables that need to go in registers
+ ## # and the registers they will be stored in
+ ## num = 0
+ ## fpnum = 0
+ ## count = 0
+ ## non_float_locs = []
+ ## non_float_regs = []
+ ## float_locs = []
+ ## float_regs = []
+ ## stack_args = []
+ ## float_stack_arg = False
+ ## for i in range(n_args):
+ ## arg = arglocs[i]
- if adr in non_float_regs:
- non_float_locs.append(adr)
- non_float_regs.append(r.r11)
- adr = r.r11
+ ## if arg.type == FLOAT:
+ ## if fpnum < MAX_FREG_PARAMS:
+ ## fpreg = r.PARAM_FPREGS[fpnum]
+ ## float_locs.append(arg)
+ ## float_regs.append(fpreg)
+ ## fpnum += 1
+ ## # XXX Duplicate float arguments in GPR slots
+ ## if num < MAX_REG_PARAMS:
+ ## num += 1
+ ## else:
+ ## stack_args.append(arg)
+ ## else:
+ ## stack_args.append(arg)
+ ## else:
+ ## if num < MAX_REG_PARAMS:
+ ## reg = r.PARAM_REGS[num]
+ ## non_float_locs.append(arg)
+ ## non_float_regs.append(reg)
+ ## num += 1
+ ## else:
+ ## stack_args.append(arg)
+ ## float_stack_arg = True
- # compute maximum of parameters passed
- self.max_stack_params = max(self.max_stack_params, len(stack_args))
+ ## if adr in non_float_regs:
+ ## non_float_locs.append(adr)
+ ## non_float_regs.append(r.r11)
+ ## adr = r.r11
- # compute offset at which parameters are stored
- if IS_PPC_32:
- param_offset = BACKCHAIN_SIZE * WORD
- else:
- # space for first 8 parameters
- param_offset = ((BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
+ ## # compute maximum of parameters passed
+ ## self.max_stack_params = max(self.max_stack_params, len(stack_args))
- with scratch_reg(self.mc):
- if float_stack_arg:
- self.mc.stfd(r.f0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
- for i, arg in enumerate(stack_args):
- offset = param_offset + i * WORD
- if arg is not None:
- if arg.type == FLOAT:
- self.regalloc_mov(arg, r.f0)
- self.mc.stfd(r.f0.value, r.SP.value, offset)
- else:
- self.regalloc_mov(arg, r.SCRATCH)
- self.mc.store(r.SCRATCH.value, r.SP.value, offset)
- if float_stack_arg:
- self.mc.lfd(r.f0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ ## # compute offset at which parameters are stored
+ ## if IS_PPC_32:
+ ## param_offset = BACKCHAIN_SIZE * WORD
+ ## else:
+ ## # space for first 8 parameters
+ ## param_offset = ((BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
- # remap values stored in core registers
- remap_frame_layout(self, float_locs, float_regs, r.f0)
- remap_frame_layout(self, non_float_locs, non_float_regs, r.SCRATCH)
+ ## with scratch_reg(self.mc):
+ ## if float_stack_arg:
+ ## self.mc.stfd(r.f0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
+ ## for i, arg in enumerate(stack_args):
+ ## offset = param_offset + i * WORD
+ ## if arg is not None:
+ ## if arg.type == FLOAT:
+ ## self.regalloc_mov(arg, r.f0)
+ ## self.mc.stfd(r.f0.value, r.SP.value, offset)
+ ## else:
+ ## self.regalloc_mov(arg, r.SCRATCH)
+ ## self.mc.store(r.SCRATCH.value, r.SP.value, offset)
+ ## if float_stack_arg:
+ ## self.mc.lfd(r.f0.value, r.SPP.value, FORCE_INDEX_OFS + WORD)
- # the actual call
- if adr.is_imm():
- self.mc.call(adr.value)
- elif adr.is_stack():
- self.regalloc_mov(adr, r.SCRATCH)
- self.mc.call_register(r.SCRATCH)
- elif adr.is_reg():
- self.mc.call_register(adr)
- else:
- assert 0, "should not reach here"
+ ## # remap values stored in core registers
+ ## remap_frame_layout(self, float_locs, float_regs, r.f0)
+ ## remap_frame_layout(self, non_float_locs, non_float_regs, r.SCRATCH)
- self.mark_gc_roots(force_index)
- # ensure the result is wellformed and stored in the correct location
- if result is not None and result_info != (-1, -1):
- self._ensure_result_bit_extension(result, result_info[0],
- result_info[1])
+ ## # the actual call
+ ## if adr.is_imm():
+ ## self.mc.call(adr.value)
+ ## elif adr.is_stack():
+ ## self.regalloc_mov(adr, r.SCRATCH)
+ ## self.mc.call_register(r.SCRATCH)
+ ## elif adr.is_reg():
+ ## self.mc.call_register(adr)
+ ## else:
+ ## assert 0, "should not reach here"
+
+ ## self.mark_gc_roots(force_index)
+ ## # ensure the result is wellformed and stored in the correct location
+ ## if result is not None and result_info != (-1, -1):
+ ## self._ensure_result_bit_extension(result, result_info[0],
+ ## result_info[1])
class FieldOpAssembler(object):
_mixin_ = True
- def emit_setfield_gc(self, op, arglocs, regalloc):
- value_loc, base_loc, ofs, size = arglocs
+ def _write_to_mem(self, value_loc, base_loc, ofs, size):
if size.value == 8:
if value_loc.is_fp_reg():
if ofs.is_imm():
@@ -581,10 +584,17 @@
else:
assert 0, "size not supported"
+ def emit_setfield_gc(self, op, arglocs, regalloc):
+ value_loc, base_loc, ofs, size = arglocs
+ self._write_to_mem(value_loc, base_loc, ofs, size)
+
emit_setfield_raw = emit_setfield_gc
+ emit_zero_ptr_field = emit_setfield_gc
- def emit_getfield_gc(self, op, arglocs, regalloc):
- base_loc, ofs, res, size = arglocs
+ def _load_from_mem(self, res, base_loc, ofs, size, signed):
+ # res, base_loc, ofs, size and signed are all locations
+ assert base_loc is not r.SCRATCH
+ sign = signed.value
if size.value == 8:
if res.is_fp_reg():
if ofs.is_imm():
@@ -597,261 +607,229 @@
else:
self.mc.ldx(res.value, base_loc.value, ofs.value)
elif size.value == 4:
- if ofs.is_imm():
- self.mc.lwz(res.value, base_loc.value, ofs.value)
+ if IS_PPC_64 and sign:
+ if ofs.is_imm():
+ self.mc.lwa(res.value, base_loc.value, ofs.value)
+ else:
+ self.mc.lwax(res.value, base_loc.value, ofs.value)
else:
- self.mc.lwzx(res.value, base_loc.value, ofs.value)
+ if ofs.is_imm():
+ self.mc.lwz(res.value, base_loc.value, ofs.value)
+ else:
+ self.mc.lwzx(res.value, base_loc.value, ofs.value)
elif size.value == 2:
- if ofs.is_imm():
- self.mc.lhz(res.value, base_loc.value, ofs.value)
+ if sign:
+ if ofs.is_imm():
+ self.mc.lha(res.value, base_loc.value, ofs.value)
+ else:
+ self.mc.lhax(res.value, base_loc.value, ofs.value)
else:
- self.mc.lhzx(res.value, base_loc.value, ofs.value)
+ if ofs.is_imm():
+ self.mc.lhz(res.value, base_loc.value, ofs.value)
+ else:
+ self.mc.lhzx(res.value, base_loc.value, ofs.value)
elif size.value == 1:
if ofs.is_imm():
self.mc.lbz(res.value, base_loc.value, ofs.value)
else:
self.mc.lbzx(res.value, base_loc.value, ofs.value)
+ if sign:
+ self.mc.extsb(res.value, res.value)
else:
assert 0, "size not supported"
- signed = op.getdescr().is_field_signed()
- if signed:
- self._ensure_result_bit_extension(res, size.value, signed)
+ def emit_getfield_gc(self, op, arglocs, regalloc):
+ base_loc, ofs, res, size, sign = arglocs
+ self._load_from_mem(res, base_loc, ofs, size, sign)
emit_getfield_raw = emit_getfield_gc
emit_getfield_raw_pure = emit_getfield_gc
emit_getfield_gc_pure = emit_getfield_gc
+ SIZE2SCALE = dict([(1<<_i, _i) for _i in range(32)])
+
+ def _multiply_by_constant(self, loc, multiply_by, scratch_loc):
+ if multiply_by == 1:
+ return loc
+ try:
+ scale = self.SIZE2SCALE[multiply_by]
+ except KeyError:
+ if _check_imm_arg(multiply_by):
+ self.mc.mulli(scratch_loc.value, loc.value, multiply_by)
+ else:
+ self.mc.load_imm(scratch_loc.value, multiply_by)
+ if IS_PPC_32:
+ self.mc.mullw(scratch_loc.value, loc.value,
+ scratch_loc.value)
+ else:
+ self.mc.mulld(scratch_loc.value, loc.value,
+ scratch_loc.value)
+ else:
+ self.mc.sldi(scratch_loc.value, loc.value, scale)
+ return scratch_loc
+
+ def _apply_scale(self, ofs, index_loc, itemsize):
+ # For arrayitem and interiorfield reads and writes: this returns an
+ # offset suitable for use in ld/ldx or similar instructions.
+ # The result will be either the register r2 or a 16-bit immediate.
+ # The arguments stand for "ofs + index_loc * itemsize",
+ # with the following constrains:
+ assert ofs.is_imm() # must be an immediate...
+ assert _check_imm_arg(ofs.getint()) # ...that fits 16 bits
+ assert index_loc is not r.SCRATCH2 # can be a reg or imm (any size)
+ assert itemsize.is_imm() # must be an immediate (any size)
+
+ multiply_by = itemsize.value
+ offset = ofs.getint()
+ if index_loc.is_imm():
+ offset += index_loc.getint() * multiply_by
+ if _check_imm_arg(offset):
+ return imm(offset)
+ else:
+ self.mc.load_imm(r.SCRATCH2, offset)
+ return r.SCRATCH2
+ else:
+ index_loc = self._multiply_by_constant(index_loc, multiply_by,
+ r.SCRATCH2)
+ # here, the new index_loc contains 'index_loc * itemsize'.
+ # If offset != 0 then we have to add it here. Note that
+ # mc.addi() would not be valid with operand r0.
+ if offset != 0:
+ self.mc.addi(r.SCRATCH2.value, index_loc.value, offset)
+ index_loc = r.SCRATCH2
+ return index_loc
+
def emit_getinteriorfield_gc(self, op, arglocs, regalloc):
- (base_loc, index_loc, res_loc,
- ofs_loc, ofs, itemsize, fieldsize) = arglocs
- with scratch_reg(self.mc):
- if _check_imm_arg(itemsize.value):
- self.mc.mulli(r.SCRATCH.value, index_loc.value, itemsize.value)
- else:
- self.mc.load_imm(r.SCRATCH, itemsize.value)
- self.mc.mullw(r.SCRATCH.value, index_loc.value, r.SCRATCH.value)
- descr = op.getdescr()
- assert isinstance(descr, InteriorFieldDescr)
- signed = descr.fielddescr.is_field_signed()
- if ofs.value > 0:
- if ofs_loc.is_imm():
- self.mc.addic(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
- else:
- self.mc.add(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
-
- if fieldsize.value == 8:
- if res_loc.is_fp_reg():
- self.mc.lfdx(res_loc.value, base_loc.value, r.SCRATCH.value)
- else:
- self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value)
- elif fieldsize.value == 4:
- self.mc.lwzx(res_loc.value, base_loc.value, r.SCRATCH.value)
- if signed:
- self.mc.extsw(res_loc.value, res_loc.value)
- elif fieldsize.value == 2:
- self.mc.lhzx(res_loc.value, base_loc.value, r.SCRATCH.value)
- if signed:
- self.mc.extsh(res_loc.value, res_loc.value)
- elif fieldsize.value == 1:
- self.mc.lbzx(res_loc.value, base_loc.value, r.SCRATCH.value)
- if signed:
- self.mc.extsb(res_loc.value, res_loc.value)
- else:
- assert 0
+ (base_loc, index_loc, res_loc, ofs_loc,
+ itemsize, fieldsize, fieldsign) = arglocs
+ ofs_loc = self._apply_scale(ofs_loc, index_loc, itemsize)
+ self._load_from_mem(res_loc, base_loc, ofs_loc, fieldsize, fieldsign)
emit_getinteriorfield_raw = emit_getinteriorfield_gc
def emit_setinteriorfield_gc(self, op, arglocs, regalloc):
- (base_loc, index_loc, value_loc,
- ofs_loc, ofs, itemsize, fieldsize) = arglocs
- with scratch_reg(self.mc):
- if _check_imm_arg(itemsize.value):
- self.mc.mulli(r.SCRATCH.value, index_loc.value, itemsize.value)
- else:
- self.mc.load_imm(r.SCRATCH, itemsize.value)
- self.mc.mullw(r.SCRATCH.value, index_loc.value, r.SCRATCH.value)
- if ofs.value > 0:
- if ofs_loc.is_imm():
- self.mc.addic(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
- else:
- self.mc.add(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
- if fieldsize.value == 8:
- if value_loc.is_fp_reg():
- self.mc.stfdx(value_loc.value, base_loc.value, r.SCRATCH.value)
- else:
- self.mc.stdx(value_loc.value, base_loc.value, r.SCRATCH.value)
- elif fieldsize.value == 4:
- self.mc.stwx(value_loc.value, base_loc.value, r.SCRATCH.value)
- elif fieldsize.value == 2:
- self.mc.sthx(value_loc.value, base_loc.value, r.SCRATCH.value)
- elif fieldsize.value == 1:
- self.mc.stbx(value_loc.value, base_loc.value, r.SCRATCH.value)
- else:
- assert 0
+ (base_loc, index_loc, value_loc, ofs_loc,
+ itemsize, fieldsize) = arglocs
+ ofs_loc = self._apply_scale(ofs_loc, index_loc, itemsize)
+ self._write_to_mem(value_loc, base_loc, ofs_loc, fieldsize)
emit_setinteriorfield_raw = emit_setinteriorfield_gc
-class ArrayOpAssembler(object):
-
- _mixin_ = True
-
def emit_arraylen_gc(self, op, arglocs, regalloc):
res, base_loc, ofs = arglocs
self.mc.load(res.value, base_loc.value, ofs.value)
- def emit_setarrayitem_gc(self, op, arglocs, regalloc):
- value_loc, base_loc, ofs_loc, scratch_loc, scale, ofs = arglocs
- assert ofs_loc.is_reg()
-
- if scale.value > 0:
- #scale_loc = r.SCRATCH
- scale_loc = scratch_loc
- if IS_PPC_32:
- self.mc.slwi(scale_loc.value, ofs_loc.value, scale.value)
- else:
- self.mc.sldi(scale_loc.value, ofs_loc.value, scale.value)
- else:
- scale_loc = ofs_loc
-
- # add the base offset
- if ofs.value > 0:
- self.mc.addi(r.SCRATCH.value, scale_loc.value, ofs.value)
- scale_loc = r.SCRATCH
-
- if scale.value == 3:
- if value_loc.is_fp_reg():
- self.mc.stfdx(value_loc.value, base_loc.value, scale_loc.value)
- else:
- self.mc.stdx(value_loc.value, base_loc.value, scale_loc.value)
- elif scale.value == 2:
- self.mc.stwx(value_loc.value, base_loc.value, scale_loc.value)
- elif scale.value == 1:
- self.mc.sthx(value_loc.value, base_loc.value, scale_loc.value)
- elif scale.value == 0:
- self.mc.stbx(value_loc.value, base_loc.value, scale_loc.value)
- else:
- assert 0, "scale %s not supported" % (scale.value)
-
+ emit_setarrayitem_gc = emit_setinteriorfield_gc
emit_setarrayitem_raw = emit_setarrayitem_gc
- def _write_to_mem(self, value_loc, base_loc, ofs_loc, scale):
- if scale.value == 3:
- if value_loc.is_fp_reg():
- self.mc.stfdx(value_loc.value, base_loc.value, ofs_loc.value)
- else:
- self.mc.stdx(value_loc.value, base_loc.value, ofs_loc.value)
- elif scale.value == 2:
- self.mc.stwx(value_loc.value, base_loc.value, ofs_loc.value)
- elif scale.value == 1:
- self.mc.sthx(value_loc.value, base_loc.value, ofs_loc.value)
- elif scale.value == 0:
- self.mc.stbx(value_loc.value, base_loc.value, ofs_loc.value)
- else:
- assert 0
-
- def emit_raw_store(self, op, arglocs, regalloc):
- value_loc, base_loc, ofs_loc, scale, ofs = arglocs
- assert ofs_loc.is_reg()
- self._write_to_mem(value_loc, base_loc, ofs_loc, scale)
-
- def emit_getarrayitem_gc(self, op, arglocs, regalloc):
- res, base_loc, ofs_loc, scratch_loc, scale, ofs = arglocs
- assert ofs_loc.is_reg()
- signed = op.getdescr().is_item_signed()
-
- if scale.value > 0:
- scale_loc = scratch_loc
- if IS_PPC_32:
- self.mc.slwi(scale_loc.value, ofs_loc.value, scale.value)
- else:
- self.mc.sldi(scale_loc.value, ofs_loc.value, scale.value)
- else:
- scale_loc = ofs_loc
-
- # add the base offset
- if ofs.value > 0:
- self.mc.addi(r.SCRATCH.value, scale_loc.value, ofs.value)
- scale_loc = r.SCRATCH
-
- if scale.value == 3:
- if res.is_fp_reg():
- self.mc.lfdx(res.value, base_loc.value, scale_loc.value)
- else:
- self.mc.ldx(res.value, base_loc.value, scale_loc.value)
- elif scale.value == 2:
- self.mc.lwzx(res.value, base_loc.value, scale_loc.value)
- if signed:
- self.mc.extsw(res.value, res.value)
- elif scale.value == 1:
- self.mc.lhzx(res.value, base_loc.value, scale_loc.value)
- if signed:
- self.mc.extsh(res.value, res.value)
- elif scale.value == 0:
- self.mc.lbzx(res.value, base_loc.value, scale_loc.value)
- if signed:
- self.mc.extsb(res.value, res.value)
- else:
- assert 0
-
+ emit_getarrayitem_gc = emit_getinteriorfield_gc
emit_getarrayitem_raw = emit_getarrayitem_gc
emit_getarrayitem_gc_pure = emit_getarrayitem_gc
- def _load_from_mem(self, res_loc, base_loc, ofs_loc, scale, signed=False):
- if scale.value == 3:
- if res_loc.is_fp_reg():
- self.mc.lfdx(res_loc.value, base_loc.value, ofs_loc.value)
+ emit_raw_store = emit_setarrayitem_gc
+ emit_raw_load = emit_getarrayitem_gc
+
+ def _copy_in_scratch2(self, loc):
+ if loc.is_imm():
+ self.mc.li(r.SCRATCH2.value, loc.value)
+ elif loc is not r.SCRATCH2:
+ self.mc.mr(r.SCRATCH2.value, loc.value)
+ return r.SCRATCH2
+
+ def emit_zero_array(self, op, arglocs, regalloc):
+ base_loc, startindex_loc, length_loc, ofs_loc, itemsize_loc = arglocs
+
+ # assume that an array where an item size is N:
+ # * if N is even, then all items are aligned to a multiple of 2
+ # * if N % 4 == 0, then all items are aligned to a multiple of 4
+ # * if N % 8 == 0, then all items are aligned to a multiple of 8
+ itemsize = itemsize_loc.getint()
+ if itemsize & 1:
+ stepsize = 1
+ stXux = self.mc.stbux
+ stXu = self.mc.stbu
+ stX = self.mc.stb
+ elif itemsize & 2:
+ stepsize = 2
+ stXux = self.mc.sthux
+ stXu = self.mc.sthu
+ stX = self.mc.sth
+ elif (itemsize & 4) or IS_PPC_32:
+ stepsize = 4
+ stXux = self.mc.stwux
+ stXu = self.mc.stwu
+ stX = self.mc.stw
+ else:
+ stepsize = WORD
+ stXux = self.mc.stdux
+ stXu = self.mc.stdu
+ stX = self.mc.std
+
+ repeat_factor = itemsize // stepsize
+ if repeat_factor != 1:
+ # This is only for itemsize not in (1, 2, 4, WORD).
+ # Include the repeat_factor inside length_loc if it is a constant
+ if length_loc.is_imm():
+ length_loc = imm(length_loc.value * repeat_factor)
+ repeat_factor = 1 # included
+
+ unroll = -1
+ if length_loc.is_imm():
+ if length_loc.value <= 8:
+ unroll = length_loc.value
+ if unroll <= 0:
+ return # nothing to do
+
+ ofs_loc = self._apply_scale(ofs_loc, startindex_loc, itemsize_loc)
+ ofs_loc = self._copy_in_scratch2(ofs_loc)
+
+ if unroll > 0:
+ assert repeat_factor == 1
+ self.mc.li(r.SCRATCH.value, 0)
+ stXux(r.SCRATCH.value, ofs_loc.value, base_loc.value)
+ for i in range(1, unroll):
+ stX(r.SCRATCH.value, ofs_loc.value, i * stepsize)
+
+ else:
+ if length_loc.is_imm():
+ self.mc.load_imm(r.SCRATCH, length_loc.value)
+ length_loc = r.SCRATCH
+ jz_location = -1
+ assert repeat_factor == 1
else:
- self.mc.ldx(res_loc.value, base_loc.value, ofs_loc.value)
- elif scale.value == 2:
- self.mc.lwzx(res_loc.value, base_loc.value, ofs_loc.value)
- if signed:
- self.mc.extsw(res_loc.value, res_loc.value)
- elif scale.value == 1:
- self.mc.lhzx(res_loc.value, base_loc.value, ofs_loc.value)
- if signed:
- self.mc.extsh(res_loc.value, res_loc.value)
- elif scale.value == 0:
- self.mc.lbzx(res_loc.value, base_loc.value, ofs_loc.value)
- if signed:
- self.mc.extsb(res_loc.value, res_loc.value)
- else:
- assert 0
+ self.mc.cmp_op(0, length_loc.value, 0, imm=True)
+ jz_location = self.mc.currpos()
+ self.mc.trap()
+ length_loc = self._multiply_by_constant(length_loc,
+ repeat_factor,
+ r.SCRATCH)
+ self.mc.mtctr(length_loc.value)
+ self.mc.li(r.SCRATCH.value, 0)
- def emit_raw_load(self, op, arglocs, regalloc):
- res_loc, base_loc, ofs_loc, scale, ofs = arglocs
- assert ofs_loc.is_reg()
- # no base offset
- assert ofs.value == 0
- signed = op.getdescr().is_item_signed()
- self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed)
+ stXux(r.SCRATCH.value, ofs_loc.value, base_loc.value)
+ bdz_location = self.mc.currpos()
+ self.mc.trap()
+
+ loop_location = self.mc.currpos()
+ stXu(r.SCRATCH.value, ofs_loc.value, stepsize)
+ self.mc.bdnz(loop_location - self.mc.currpos())
+
+ pmc = OverwritingBuilder(self.mc, bdz_location, 1)
+ pmc.bdz(self.mc.currpos() - bdz_location)
+ pmc.overwrite()
+
+ if jz_location != -1:
+ pmc = OverwritingBuilder(self.mc, jz_location, 1)
+ pmc.bc(4, 1, self.mc.currpos() - jz_location) # !GT
+ pmc.overwrite()
class StrOpAssembler(object):
_mixin_ = True
- def emit_strlen(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- if l1.is_imm():
- self.mc.load(res.value, l0.value, l1.getint())
- else:
- self.mc.loadx(res.value, l0.value, l1.value)
-
- def emit_strgetitem(self, op, arglocs, regalloc):
- res, base_loc, ofs_loc, basesize = arglocs
- if ofs_loc.is_imm():
- self.mc.addi(res.value, base_loc.value, ofs_loc.getint())
- else:
- self.mc.add(res.value, base_loc.value, ofs_loc.value)
- self.mc.lbz(res.value, res.value, basesize.value)
-
- def emit_strsetitem(self, op, arglocs, regalloc):
- value_loc, base_loc, ofs_loc, temp_loc, basesize = arglocs
- if ofs_loc.is_imm():
- self.mc.addi(temp_loc.value, base_loc.value, ofs_loc.getint())
- else:
- self.mc.add(temp_loc.value, base_loc.value, ofs_loc.value)
- self.mc.stb(value_loc.value, temp_loc.value, basesize.value)
+ emit_strlen = FieldOpAssembler.emit_getfield_gc
+ emit_strgetitem = FieldOpAssembler.emit_getarrayitem_gc
+ emit_strsetitem = FieldOpAssembler.emit_setarrayitem_gc
#from ../x86/regalloc.py:928 ff.
def emit_copystrcontent(self, op, arglocs, regalloc):
@@ -901,6 +879,7 @@
else:
length_box = TempInt()
length_loc = regalloc.force_allocate_reg(length_box, forbidden_vars)
+ xxxxxxxxxxxxxxxxxxxxxxxx
imm = regalloc.convert_to_imm(args[4])
self.load(length_loc, imm)
if is_unicode:
@@ -919,7 +898,7 @@
# call memcpy()
regalloc.before_call()
imm_addr = make_imm_loc(self.memcpy_addr)
- self._emit_call(NO_FORCE_INDEX, imm_addr,
+ self._emit_call(imm_addr,
[dstaddr_loc, srcaddr_loc, length_loc])
regalloc.possibly_free_var(length_box)
@@ -970,41 +949,9 @@
_mixin_ = True
- emit_unicodelen = StrOpAssembler.emit_strlen
-
- def emit_unicodegetitem(self, op, arglocs, regalloc):
- # res is used as a temporary location
- # => it is save to use it before loading the result
- res, base_loc, ofs_loc, scale, basesize, itemsize = arglocs
-
- if IS_PPC_32:
- self.mc.slwi(res.value, ofs_loc.value, scale.value)
- else:
- self.mc.sldi(res.value, ofs_loc.value, scale.value)
- self.mc.add(res.value, base_loc.value, res.value)
-
- if scale.value == 2:
- self.mc.lwz(res.value, res.value, basesize.value)
- elif scale.value == 1:
- self.mc.lhz(res.value, res.value, basesize.value)
- else:
- assert 0, itemsize.value
-
- def emit_unicodesetitem(self, op, arglocs, regalloc):
- value_loc, base_loc, ofs_loc, temp_loc, scale, basesize, itemsize = arglocs
-
- if IS_PPC_32:
- self.mc.slwi(temp_loc.value, ofs_loc.value, scale.value)
- else:
- self.mc.sldi(temp_loc.value, ofs_loc.value, scale.value)
- self.mc.add(temp_loc.value, base_loc.value, temp_loc.value)
-
- if scale.value == 2:
- self.mc.stw(value_loc.value, temp_loc.value, basesize.value)
- elif scale.value == 1:
- self.mc.sth(value_loc.value, temp_loc.value, basesize.value)
- else:
- assert 0, itemsize.value
+ emit_unicodelen = FieldOpAssembler.emit_getfield_gc
+ emit_unicodegetitem = FieldOpAssembler.emit_getarrayitem_gc
+ emit_unicodesetitem = FieldOpAssembler.emit_setarrayitem_gc
class AllocOpAssembler(object):
@@ -1169,7 +1116,6 @@
def emit_force_token(self, op, arglocs, regalloc):
res_loc = arglocs[0]
self.mc.mr(res_loc.value, r.SPP.value)
- self.mc.addi(res_loc.value, res_loc.value, FORCE_INDEX_OFS)
# self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT)
# from: ../x86/assembler.py:1668
@@ -1186,6 +1132,7 @@
assert isinstance(descr, JitCellToken)
# check value
assert tmploc is r.RES
+ xxxxxxxxxxxx
self._emit_call(fail_index, imm(descr._ppc_func_addr),
callargs, result=tmploc)
if op.result is None:
@@ -1320,6 +1267,7 @@
size = descr.get_result_size()
signed = descr.is_result_signed()
#
+ xxxxxxxxxxxxxx
self._emit_call(fail_index, adr, callargs, resloc, (size, signed))
with scratch_reg(self.mc):
@@ -1348,6 +1296,7 @@
size = descr.get_result_size()
signed = descr.is_result_signed()
#
+ xxxxxxxxxxxxxxx
self._emit_call(fail_index, adr, callargs, resloc, (size, signed))
# then reopen the stack
if gcrootmap:
@@ -1366,7 +1315,7 @@
with Saved_Volatiles(self.mc):
#self._emit_call(NO_FORCE_INDEX, self.releasegil_addr,
# [], self._regalloc)
- self._emit_call(NO_FORCE_INDEX, imm(self.releasegil_addr), [])
+ self._emit_call(imm(self.releasegil_addr), [])
def call_reacquire_gil(self, gcrootmap, save_loc):
# save the previous result into the stack temporarily.
@@ -1374,12 +1323,12 @@
# to save vfp regs in this case. Besides the result location
assert gcrootmap.is_shadow_stack
with Saved_Volatiles(self.mc):
- self._emit_call(NO_FORCE_INDEX, imm(self.reacqgil_addr), [])
+ self._emit_call(imm(self.reacqgil_addr), [])
class OpAssembler(IntOpAssembler, GuardOpAssembler,
MiscOpAssembler, FieldOpAssembler,
- ArrayOpAssembler, StrOpAssembler,
+ StrOpAssembler,
UnicodeOpAssembler, ForceOpAssembler,
AllocOpAssembler, FloatOpAssembler):
diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py
--- a/rpython/jit/backend/ppc/ppc_assembler.py
+++ b/rpython/jit/backend/ppc/ppc_assembler.py
@@ -224,14 +224,18 @@
self._push_all_regs_to_jitframe(mc, [], withfloats)
if exc:
- # We might have an exception pending. Load it into r2...
- mc.write32(0)
- #mc.MOV(ebx, heap(self.cpu.pos_exc_value()))
- #mc.MOV(heap(self.cpu.pos_exception()), imm0)
- #mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
- ## ...and save ebx into 'jf_guard_exc'
- #offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
- #mc.MOV_br(offset, ebx.value)
+ # We might have an exception pending.
+ mc.load_imm(r.r2, self.cpu.pos_exc_value())
+ # Copy it into 'jf_guard_exc'
+ offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ mc.load(r.r0.value, r.r2.value, 0)
+ mc.store(r.r0.value, r.SPP.value, offset)
+ # Zero out the exception fields
+ diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
+ assert _check_imm_arg(diff)
+ mc.li(r.r0.value, 0)
+ mc.store(r.r0.value, r.r2.value, 0)
+ mc.store(r.r0.value, r.r2.value, diff)
# now we return from the complete frame, which starts from
# _call_header_with_stack_check(). The _call_footer below does it.
@@ -275,7 +279,7 @@
mc.cmp_op(0, r.RES.value, 0, imm=True)
jmp_pos = mc.currpos()
- mc.nop()
+ mc.trap()
nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
mc.load_imm(r.r4, nursery_free_adr)
@@ -375,7 +379,7 @@
mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
jnz_location = mc.currpos()
- mc.nop()
+ mc.trap()
# restore parameter registers
for i, reg in enumerate(r.PARAM_REGS):
@@ -600,7 +604,7 @@
self.mc.mfctr(r.r16.value)
patch_loc = self.mc.currpos()
- self.mc.nop()
+ self.mc.trap()
# make minimal frame which contains the LR
#
@@ -636,15 +640,16 @@
if gcrootmap and gcrootmap.is_shadow_stack:
self._call_footer_shadowstack(gcrootmap)
- # load old backchain into r4
- self.mc.load(r.r4.value, r.SP.value,
- STD_FRAME_SIZE_IN_BYTES + LR_BC_OFFSET)
-
# restore registers r25 to r31
for i, reg in enumerate(REGISTERS_SAVED):
self.mc.load(reg.value, r.SP.value,
GPR_SAVE_AREA_OFFSET + i * WORD)
+ # load the return address into r4
+ self.mc.load(r.r4.value, r.SP.value,
+ STD_FRAME_SIZE_IN_BYTES + LR_BC_OFFSET)
+
+ # throw away the stack frame and return to r4
self.mc.addi(r.SP.value, r.SP.value, STD_FRAME_SIZE_IN_BYTES)
self.mc.mtlr(r.r4.value) # restore LR
self.mc.blr()
@@ -654,6 +659,7 @@
assert self.memcpy_addr != 0, "setup_once() not called?"
self.current_clt = looptoken.compiled_loop_token
self.pending_guard_tokens = []
+ self.pending_guard_tokens_recovered = 0
#if WORD == 8:
# self.pending_memoryerror_trampoline_from = []
# self.error_trampoline_64 = 0
@@ -930,6 +936,21 @@
ptr = rffi.cast(lltype.Signed, gcmap)
mc.load_imm(r.r2, ptr)
+ def break_long_loop(self):
+ # If the loop is too long, the guards in it will jump forward
+ # more than 32 KB. We use an approximate hack to know if we
+ # should break the loop here with an unconditional "b" that
+ # jumps over the target code.
+ jmp_pos = self.mc.currpos()
+ self.mc.trap()
+
+ self.write_pending_failure_recoveries()
+
+ currpos = self.mc.currpos()
+ pmc = OverwritingBuilder(self.mc, jmp_pos, 1)
+ pmc.b(currpos - jmp_pos)
+ pmc.overwrite()
+
def generate_quick_failure(self, guardtok):
startpos = self.mc.currpos()
fail_descr, target = self.store_info_on_descr(startpos, guardtok)
@@ -944,10 +965,15 @@
def write_pending_failure_recoveries(self):
# for each pending guard, generate the code of the recovery stub
# at the end of self.mc.
- for tok in self.pending_guard_tokens:
+ for i in range(self.pending_guard_tokens_recovered,
+ len(self.pending_guard_tokens)):
+ tok = self.pending_guard_tokens[i]
tok.pos_recovery_stub = self.generate_quick_failure(tok)
+ self.pending_guard_tokens_recovered = len(self.pending_guard_tokens)
def patch_pending_failure_recoveries(self, rawstart):
+ assert (self.pending_guard_tokens_recovered ==
+ len(self.pending_guard_tokens))
clt = self.current_clt
for tok in self.pending_guard_tokens:
addr = rawstart + tok.pos_jump_offset
@@ -988,15 +1014,6 @@
clt.asmmemmgr_blocks = []
return clt.asmmemmgr_blocks
- def _prepare_sp_patch_position(self):
- """Generate NOPs as placeholder to patch the instruction(s) to update
- the sp according to the number of spilled variables"""
- size = SIZE_LOAD_IMM_PATCH_SP
- l = self.mc.currpos()
- for _ in range(size):
- self.mc.nop()
- return l
-
def regalloc_mov(self, prev_loc, loc):
if prev_loc.is_imm():
value = prev_loc.getint()
@@ -1137,29 +1154,6 @@
else:
raise AssertionError('Trying to pop to an invalid location')
- def _ensure_result_bit_extension(self, resloc, size, signed):
- if size == 1:
- if not signed: #unsigned char
- if IS_PPC_32:
- self.mc.rlwinm(resloc.value, resloc.value, 0, 24, 31)
- else:
- self.mc.rldicl(resloc.value, resloc.value, 0, 56)
- else:
- self.mc.extsb(resloc.value, resloc.value)
- elif size == 2:
- if not signed:
- if IS_PPC_32:
- self.mc.rlwinm(resloc.value, resloc.value, 0, 16, 31)
- else:
- self.mc.rldicl(resloc.value, resloc.value, 0, 48)
- else:
- self.mc.extsh(resloc.value, resloc.value)
- elif size == 4:
- if not signed:
- self.mc.rldicl(resloc.value, resloc.value, 0, 32)
- else:
- self.mc.extsw(resloc.value, resloc.value)
-
def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
assert size & (WORD-1) == 0 # must be correctly aligned
@@ -1178,7 +1172,7 @@
self.mc.cmp_op(0, r.r4.value, r.SCRATCH.value, signed=False)
fast_jmp_pos = self.mc.currpos()
- self.mc.nop()
+ self.mc.trap()
# We load into r3 the address stored at nursery_free_adr. We calculate
# the new value for nursery_free_adr and store in r1 The we load the
@@ -1212,6 +1206,7 @@
gcrootmap.write_callshape(mark, force_index)
def propagate_memoryerror_if_r3_is_null(self):
+ return # XXXXXXXXX
self.mc.cmp_op(0, r.RES.value, 0, imm=True)
self.mc.b_cond_abs(self.propagate_exception_path, c.EQ)
@@ -1253,6 +1248,10 @@
(op.getopname(), guard_op.getopname())
raise NotImplementedError(op)
+def add_none_argument(fn):
+ return (lambda self, op, arglocs, regalloc:
+ fn(self, op, None, arglocs, regalloc))
+
operations = [notimplemented_op] * (rop._LAST + 1)
operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1)
@@ -1271,8 +1270,10 @@
continue
methname = 'emit_guard_%s' % key
if hasattr(AssemblerPPC, methname):
+ assert operations[value] is notimplemented_op
func = getattr(AssemblerPPC, methname).im_func
operations_with_guard[value] = func
+ operations[value] = add_none_argument(func)
class BridgeAlreadyCompiled(Exception):
pass
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -6,13 +6,8 @@
from rpython.jit.backend.ppc.jump import (remap_frame_layout,
remap_frame_layout_mixed)
from rpython.jit.backend.ppc.locations import imm, get_fp_offset
-from rpython.jit.backend.ppc.helper.regalloc import (_check_imm_arg,
- prepare_cmp_op,
- prepare_unary_int_op,
- prepare_binary_int_op,
- prepare_binary_int_op_with_imm,
- prepare_unary_cmp,
- prepare_float_op)
+from rpython.jit.backend.ppc.helper.regalloc import _check_imm_arg, check_imm_box
+from rpython.jit.backend.ppc.helper import regalloc as helper
from rpython.jit.metainterp.history import (Const, ConstInt, ConstFloat, ConstPtr,
Box, BoxPtr,
INT, REF, FLOAT)
@@ -32,6 +27,8 @@
from rpython.jit.codewriter.effectinfo import EffectInfo
from rpython.rlib import rgc
+LIMIT_LOOP_BREAK = 15000 # should be much smaller than 32 KB
+
# xxx hack: set a default value for TargetToken._arm_loop_code. If 0, we know
# that it is a LABEL that was not compiled yet.
TargetToken._ppc_loop_code = 0
@@ -56,11 +53,12 @@
class FPRegisterManager(RegisterManager):
- all_regs = r.ALL_FLOAT_REGS
+ all_regs = r.MANAGED_FP_REGS
box_types = [FLOAT]
save_around_call_regs = r.VOLATILES_FLOAT
def convert_to_imm(self, c):
+ assert isinstance(c, ConstFloat)
adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
x = c.getfloatstorage()
rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
@@ -72,25 +70,21 @@
def call_result_location(self, v):
return r.f1
- def ensure_value_is_boxed(self, thing, forbidden_vars=[]):
- loc = None
- if isinstance(thing, Const):
- assert isinstance(thing, ConstFloat)
- loc = self.get_scratch_reg(FLOAT, self.temp_boxes + forbidden_vars)
- immvalue = self.convert_to_imm(thing)
+ def ensure_reg(self, box):
+ if isinstance(box, Const):
+ loc = self.get_scratch_reg()
+ immvalue = self.convert_to_imm(box)
self.assembler.load(loc, immvalue)
else:
- loc = self.make_sure_var_in_reg(thing,
- forbidden_vars=self.temp_boxes + forbidden_vars)
+ assert box in self.temp_boxes
+ loc = self.make_sure_var_in_reg(box,
+ forbidden_vars=self.temp_boxes)
return loc
- def get_scratch_reg(self, type=FLOAT, forbidden_vars=[],
- selected_reg=None):
- assert type == FLOAT # for now
+ def get_scratch_reg(self):
box = TempFloat()
+ reg = self.force_allocate_reg(box, forbidden_vars=self.temp_boxes)
self.temp_boxes.append(box)
- reg = self.force_allocate_reg(box, forbidden_vars=forbidden_vars,
- selected_reg=selected_reg)
return reg
@@ -142,39 +136,23 @@
assert isinstance(c, ConstPtr)
return locations.ImmLocation(rffi.cast(lltype.Signed, c.value))
- def ensure_value_is_boxed(self, thing, forbidden_vars=None):
- loc = None
- if isinstance(thing, Const):
- if isinstance(thing, ConstPtr):
- tp = REF
- else:
- tp = INT
- loc = self.get_scratch_reg(tp, forbidden_vars=self.temp_boxes
- + forbidden_vars)
- immvalue = self.convert_to_imm(thing)
+ def ensure_reg(self, box):
+ if isinstance(box, Const):
+ loc = self.get_scratch_reg()
+ immvalue = self.convert_to_imm(box)
self.assembler.load(loc, immvalue)
else:
- loc = self.make_sure_var_in_reg(thing,
- forbidden_vars=self.temp_boxes + forbidden_vars)
+ assert box in self.temp_boxes
+ loc = self.make_sure_var_in_reg(box,
+ forbidden_vars=self.temp_boxes)
return loc
- def allocate_scratch_reg(self, type=INT, selected_reg=None, forbidden_vars=None):
- """Allocate a scratch register, possibly spilling a managed register.
- This register is freed after emitting the current operation and can not
- be spilled"""
+ def get_scratch_reg(self):
box = TempBox()
More information about the pypy-commit
mailing list