[pypy-commit] pypy default: hg merge optimize-cond-call
arigo
noreply at buildbot.pypy.org
Sat Sep 5 11:03:38 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r79450:95f8d1976013
Date: 2015-09-05 11:03 +0200
http://bitbucket.org/pypy/pypy/changeset/95f8d1976013/
Log: hg merge optimize-cond-call
Remove the merging of operations with the following guards, which
removes some amount of messy code. Instead, operations like INT_EQ
check if they should leave the result in the condition code; if so,
the next GUARD_TRUE or GUARD_FALSE will pick up the condition code
without producing any more code. Finally, this simpler arrangement
allows COND_CALL to also pick up the condition code, which would
have been a total mess before.
diff too long, truncating to 2000 out of 2119 lines
diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -12,8 +12,7 @@
from rpython.jit.backend.arm.opassembler import ResOpAssembler
from rpython.jit.backend.arm.regalloc import (Regalloc,
CoreRegisterManager, check_imm_arg, VFPRegisterManager,
- operations as regalloc_operations,
- operations_with_guard as regalloc_operations_with_guard)
+ operations as regalloc_operations)
from rpython.jit.backend.llsupport import jitframe, rewrite
from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER, debug_bridge, BaseAssembler
from rpython.jit.backend.llsupport.regalloc import get_scale, valid_addressing_size
@@ -645,8 +644,10 @@
size_excluding_failure_stuff - loop_head)
def _assemble(self, regalloc, inputargs, operations):
+ self.guard_success_cc = c.cond_none
regalloc.compute_hint_frame_locations(operations)
self._walk_operations(inputargs, operations, regalloc)
+ assert self.guard_success_cc == c.cond_none
frame_depth = regalloc.get_final_frame_depth()
jump_target_descr = regalloc.jump_target_descr
if jump_target_descr is not None:
@@ -927,6 +928,7 @@
def _walk_operations(self, inputargs, operations, regalloc):
fcond = c.AL
self._regalloc = regalloc
+ regalloc.operations = operations
while regalloc.position() < len(operations) - 1:
regalloc.next_instruction()
i = regalloc.position()
@@ -935,18 +937,7 @@
opnum = op.getopnum()
if op.has_no_side_effect() and op.result not in regalloc.longevity:
regalloc.possibly_free_vars_for_op(op)
- elif self._regalloc.can_merge_with_next_guard(op, i, operations):
- guard = operations[i + 1]
- assert guard.is_guard()
- arglocs = regalloc_operations_with_guard[opnum](regalloc, op,
- guard, fcond)
- fcond = asm_operations_with_guard[opnum](self, op,
- guard, arglocs, regalloc, fcond)
- assert fcond is not None
- regalloc.next_instruction()
- regalloc.possibly_free_vars_for_op(guard)
- regalloc.possibly_free_vars(guard.getfailargs())
- elif not we_are_translated() and op.getopnum() == -124:
+ if not we_are_translated() and op.getopnum() == -124:
regalloc.prepare_force_spill(op, fcond)
else:
arglocs = regalloc_operations[opnum](regalloc, op, fcond)
@@ -962,6 +953,7 @@
regalloc.free_temp_vars()
regalloc._check_invariants()
self.mc.mark_op(None) # end of the loop
+ regalloc.operations = None
def regalloc_emit_extra(self, op, arglocs, fcond, regalloc):
# for calls to a function with a specifically-supported OS_xxx
@@ -1516,21 +1508,11 @@
raise NotImplementedError(op)
-def notimplemented_op_with_guard(self, op, guard_op, arglocs, regalloc, fcond):
- print "[ARM/asm] %s with guard %s not implemented" % \
- (op.getopname(), guard_op.getopname())
- raise NotImplementedError(op)
-
asm_operations = [notimplemented_op] * (rop._LAST + 1)
-asm_operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1)
asm_extra_operations = {}
for name, value in ResOpAssembler.__dict__.iteritems():
- if name.startswith('emit_guard_'):
- opname = name[len('emit_guard_'):]
- num = getattr(rop, opname.upper())
- asm_operations_with_guard[num] = value
- elif name.startswith('emit_opx_'):
+ if name.startswith('emit_opx_'):
opname = name[len('emit_opx_'):]
num = getattr(EffectInfo, 'OS_' + opname.upper())
asm_extra_operations[num] = value
diff --git a/rpython/jit/backend/arm/conditions.py b/rpython/jit/backend/arm/conditions.py
--- a/rpython/jit/backend/arm/conditions.py
+++ b/rpython/jit/backend/arm/conditions.py
@@ -13,11 +13,13 @@
GT = 0xC
LE = 0xD
AL = 0xE
+cond_none = -1
opposites = [NE, EQ, CC, CS, PL, MI, VC, VS, LS, HI, LT, GE, LE, GT, AL]
def get_opposite_of(operation):
+ assert operation >= 0
return opposites[operation]
# see mapping for floating poin according to
diff --git a/rpython/jit/backend/arm/helper/assembler.py b/rpython/jit/backend/arm/helper/assembler.py
--- a/rpython/jit/backend/arm/helper/assembler.py
+++ b/rpython/jit/backend/arm/helper/assembler.py
@@ -6,33 +6,32 @@
from rpython.rlib.rarithmetic import r_uint, r_longlong, intmask
from rpython.jit.metainterp.resoperation import rop
+
+def flush_cc(asm, condition, result_loc):
+ # After emitting an instruction that leaves a boolean result in
+ # a condition code (cc), call this. In the common case, result_loc
+ # will be set to 'fp' by the regalloc, which in this case means
+ # "propagate it between this operation and the next guard by keeping
+ # it in the cc". In the uncommon case, result_loc is another
+ # register, and we emit a load from the cc into this register.
+ assert asm.guard_success_cc == c.cond_none
+ if result_loc is r.fp:
+ asm.guard_success_cc = condition
+ else:
+ asm.mc.MOV_ri(result_loc.value, 1, condition)
+ asm.mc.MOV_ri(result_loc.value, 0, c.get_opposite_of(condition))
+
+
def gen_emit_op_unary_cmp(name, true_cond):
- false_cond = c.get_opposite_of(true_cond)
def f(self, op, arglocs, regalloc, fcond):
assert fcond is not None
reg, res = arglocs
self.mc.CMP_ri(reg.value, 0)
- self.mc.MOV_ri(res.value, 1, true_cond)
- self.mc.MOV_ri(res.value, 0, false_cond)
+ flush_cc(self, true_cond, res)
return fcond
f.__name__ = 'emit_op_%s' % name
return f
-def gen_emit_guard_unary_cmp(name, true_cond):
- false_cond = c.get_opposite_of(true_cond)
- def f(self, op, guard, arglocs, regalloc, fcond):
- assert fcond is not None
- assert guard is not None
- reg = arglocs[0]
- self.mc.CMP_ri(reg.value, 0)
- cond = true_cond
- guard_opnum = guard.getopnum()
- if guard_opnum == rop.GUARD_FALSE:
- cond = false_cond
- return self._emit_guard(guard, arglocs[1:], cond, save_exc=False)
- f.__name__ = 'emit_guard_%s' % name
- return f
-
def gen_emit_op_ri(name, opname):
ri_op = getattr(InstrBuilder, '%s_ri' % opname)
rr_op = getattr(InstrBuilder, '%s_rr' % opname)
@@ -61,8 +60,7 @@
f.__name__ = 'emit_op_%s' % name
return f
-def gen_emit_cmp_op(name, condition):
- inv = c.get_opposite_of(condition)
+def gen_emit_cmp_op(name, true_cond):
def f(self, op, arglocs, regalloc, fcond):
l0, l1, res = arglocs
@@ -70,32 +68,11 @@
self.mc.CMP_ri(l0.value, imm=l1.getint(), cond=fcond)
else:
self.mc.CMP_rr(l0.value, l1.value, cond=fcond)
- self.mc.MOV_ri(res.value, 1, cond=condition)
- self.mc.MOV_ri(res.value, 0, cond=inv)
+ flush_cc(self, true_cond, res)
return fcond
f.__name__ = 'emit_op_%s' % name
return f
-def gen_emit_cmp_op_guard(name, true_cond):
- false_cond = c.get_opposite_of(true_cond)
- def f(self, op, guard, arglocs, regalloc, fcond):
- assert guard is not None
- l0 = arglocs[0]
- l1 = arglocs[1]
- assert l0.is_core_reg()
-
- if l1.is_imm():
- self.mc.CMP_ri(l0.value, imm=l1.getint(), cond=fcond)
- else:
- self.mc.CMP_rr(l0.value, l1.value, cond=fcond)
- guard_opnum = guard.getopnum()
- cond = true_cond
- if guard_opnum == rop.GUARD_FALSE:
- cond = false_cond
- return self._emit_guard(guard, arglocs[2:], cond, save_exc=False)
- f.__name__ = 'emit_guard_%s' % name
- return f
-
def gen_emit_float_op(name, opname):
op_rr = getattr(InstrBuilder, opname)
def f(self, op, arglocs, regalloc, fcond):
@@ -104,6 +81,7 @@
return fcond
f.__name__ = 'emit_op_%s' % name
return f
+
def gen_emit_unary_float_op(name, opname):
op_rr = getattr(InstrBuilder, opname)
def f(self, op, arglocs, regalloc, fcond):
@@ -113,34 +91,16 @@
f.__name__ = 'emit_op_%s' % name
return f
-def gen_emit_float_cmp_op(name, cond):
- inv = c.get_opposite_of(cond)
+def gen_emit_float_cmp_op(name, true_cond):
def f(self, op, arglocs, regalloc, fcond):
arg1, arg2, res = arglocs
self.mc.VCMP(arg1.value, arg2.value)
self.mc.VMRS(cond=fcond)
- self.mc.MOV_ri(res.value, 1, cond=cond)
- self.mc.MOV_ri(res.value, 0, cond=inv)
+ flush_cc(self, true_cond, res)
return fcond
f.__name__ = 'emit_op_%s' % name
return f
-def gen_emit_float_cmp_op_guard(name, true_cond):
- false_cond = c.get_opposite_of(true_cond)
- def f(self, op, guard, arglocs, regalloc, fcond):
- assert guard is not None
- arg1 = arglocs[0]
- arg2 = arglocs[1]
- self.mc.VCMP(arg1.value, arg2.value)
- self.mc.VMRS(cond=fcond)
- cond = true_cond
- guard_opnum = guard.getopnum()
- if guard_opnum == rop.GUARD_FALSE:
- cond = false_cond
- return self._emit_guard(guard, arglocs[2:], cond, save_exc=False)
- f.__name__ = 'emit_guard_%s' % name
- return f
-
class saved_registers(object):
def __init__(self, cb, regs_to_save, vfp_regs_to_save=None):
diff --git a/rpython/jit/backend/arm/helper/regalloc.py b/rpython/jit/backend/arm/helper/regalloc.py
--- a/rpython/jit/backend/arm/helper/regalloc.py
+++ b/rpython/jit/backend/arm/helper/regalloc.py
@@ -50,42 +50,28 @@
f.__name__ = name
return f
-def prepare_float_op(name=None, base=True, float_result=True, guard=False):
- if guard:
- def f(self, op, guard_op, fcond):
- locs = []
- loc1 = self.make_sure_var_in_reg(op.getarg(0))
- locs.append(loc1)
- if base:
- loc2 = self.make_sure_var_in_reg(op.getarg(1))
- locs.append(loc2)
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
- if guard_op is None:
- res = self.force_allocate_reg(op.result)
- assert float_result == (op.result.type == FLOAT)
- locs.append(res)
- return locs
- else:
- args = self._prepare_guard(guard_op, locs)
- return args
- else:
- def f(self, op, fcond):
- locs = []
- loc1 = self.make_sure_var_in_reg(op.getarg(0))
- locs.append(loc1)
- if base:
- loc2 = self.make_sure_var_in_reg(op.getarg(1))
- locs.append(loc2)
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
- res = self.force_allocate_reg(op.result)
- assert float_result == (op.result.type == FLOAT)
- locs.append(res)
- return locs
- if name:
- f.__name__ = name
- return f
+def prepare_unary_op(self, op, fcond):
+ loc1 = self.make_sure_var_in_reg(op.getarg(0))
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res = self.force_allocate_reg(op.result)
+ return [loc1, res]
+
+def prepare_two_regs_op(self, op, fcond):
+ loc1 = self.make_sure_var_in_reg(op.getarg(0))
+ loc2 = self.make_sure_var_in_reg(op.getarg(1))
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res = self.force_allocate_reg(op.result)
+ return [loc1, loc2, res]
+
+def prepare_float_cmp(self, op, fcond):
+ loc1 = self.make_sure_var_in_reg(op.getarg(0))
+ loc2 = self.make_sure_var_in_reg(op.getarg(1))
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res = self.force_allocate_reg_or_cc(op.result)
+ return [loc1, loc2, res]
def prepare_op_by_helper_call(name):
def f(self, op, fcond):
@@ -106,43 +92,28 @@
f.__name__ = name
return f
-def prepare_cmp_op(name=None):
- def f(self, op, guard_op, fcond):
- assert fcond is not None
- boxes = list(op.getarglist())
- arg0, arg1 = boxes
- imm_a1 = check_imm_box(arg1)
+def prepare_int_cmp(self, op, fcond):
+ assert fcond is not None
+ boxes = list(op.getarglist())
+ arg0, arg1 = boxes
+ imm_a1 = check_imm_box(arg1)
- l0 = self.make_sure_var_in_reg(arg0, forbidden_vars=boxes)
- if imm_a1:
- l1 = self.convert_to_imm(arg1)
- else:
- l1 = self.make_sure_var_in_reg(arg1, forbidden_vars=boxes)
+ l0 = self.make_sure_var_in_reg(arg0, forbidden_vars=boxes)
+ if imm_a1:
+ l1 = self.convert_to_imm(arg1)
+ else:
+ l1 = self.make_sure_var_in_reg(arg1, forbidden_vars=boxes)
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
- if guard_op is None:
- res = self.force_allocate_reg(op.result)
- return [l0, l1, res]
- else:
- args = self._prepare_guard(guard_op, [l0, l1])
- return args
- if name:
- f.__name__ = name
- return f
+ self.possibly_free_vars_for_op(op)
+ self.free_temp_vars()
+ res = self.force_allocate_reg_or_cc(op.result)
+ return [l0, l1, res]
-def prepare_op_unary_cmp(name=None):
- def f(self, op, guard_op, fcond):
- assert fcond is not None
- a0 = op.getarg(0)
- assert isinstance(a0, Box)
- reg = self.make_sure_var_in_reg(a0)
- self.possibly_free_vars_for_op(op)
- if guard_op is None:
- res = self.force_allocate_reg(op.result, [a0])
- return [reg, res]
- else:
- return self._prepare_guard(guard_op, [reg])
- if name:
- f.__name__ = name
- return f
+def prepare_unary_cmp(self, op, fcond):
+ assert fcond is not None
+ a0 = op.getarg(0)
+ assert isinstance(a0, Box)
+ reg = self.make_sure_var_in_reg(a0)
+ self.possibly_free_vars_for_op(op)
+ res = self.force_allocate_reg_or_cc(op.result)
+ return [reg, res]
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -5,13 +5,10 @@
from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE
from rpython.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call,
gen_emit_op_unary_cmp,
- gen_emit_guard_unary_cmp,
gen_emit_op_ri,
gen_emit_cmp_op,
- gen_emit_cmp_op_guard,
gen_emit_float_op,
gen_emit_float_cmp_op,
- gen_emit_float_cmp_op_guard,
gen_emit_unary_float_op,
saved_registers)
from rpython.jit.backend.arm.helper.regalloc import check_imm_arg
@@ -114,32 +111,25 @@
return fcond
#ref: http://blogs.arm.com/software-enablement/detecting-overflow-from-mul/
- def emit_guard_int_mul_ovf(self, op, guard, arglocs, regalloc, fcond):
+ def emit_op_int_mul_ovf(self, op, arglocs, regalloc, fcond):
reg1 = arglocs[0]
reg2 = arglocs[1]
res = arglocs[2]
- failargs = arglocs[3:]
self.mc.SMULL(res.value, r.ip.value, reg1.value, reg2.value,
cond=fcond)
self.mc.CMP_rr(r.ip.value, res.value, shifttype=shift.ASR,
imm=31, cond=fcond)
-
- if guard.getopnum() == rop.GUARD_OVERFLOW:
- fcond = self._emit_guard(guard, failargs, c.NE, save_exc=False)
- elif guard.getopnum() == rop.GUARD_NO_OVERFLOW:
- fcond = self._emit_guard(guard, failargs, c.EQ, save_exc=False)
- else:
- assert 0
+ self.guard_success_cc = c.EQ
return fcond
- def emit_guard_int_add_ovf(self, op, guard, arglocs, regalloc, fcond):
- self.int_add_impl(op, arglocs[0:3], regalloc, fcond, flags=True)
- self._emit_guard_overflow(guard, arglocs[3:], fcond)
+ def emit_op_int_add_ovf(self, op, arglocs, regalloc, fcond):
+ fcond = self.int_add_impl(op, arglocs, regalloc, fcond, flags=True)
+ self.guard_success_cc = c.VC
return fcond
- def emit_guard_int_sub_ovf(self, op, guard, arglocs, regalloc, fcond):
- self.int_sub_impl(op, arglocs[0:3], regalloc, fcond, flags=True)
- self._emit_guard_overflow(guard, arglocs[3:], fcond)
+ def emit_op_int_sub_ovf(self, op, arglocs, regalloc, fcond):
+ fcond = self.int_sub_impl(op, arglocs, regalloc, fcond, flags=True)
+ self.guard_success_cc = c.VC
return fcond
emit_op_int_floordiv = gen_emit_op_by_helper_call('int_floordiv', 'DIV')
@@ -160,37 +150,17 @@
emit_op_int_gt = gen_emit_cmp_op('int_gt', c.GT)
emit_op_int_ge = gen_emit_cmp_op('int_ge', c.GE)
- emit_guard_int_lt = gen_emit_cmp_op_guard('int_lt', c.LT)
- emit_guard_int_le = gen_emit_cmp_op_guard('int_le', c.LE)
- emit_guard_int_eq = gen_emit_cmp_op_guard('int_eq', c.EQ)
- emit_guard_int_ne = gen_emit_cmp_op_guard('int_ne', c.NE)
- emit_guard_int_gt = gen_emit_cmp_op_guard('int_gt', c.GT)
- emit_guard_int_ge = gen_emit_cmp_op_guard('int_ge', c.GE)
-
emit_op_uint_le = gen_emit_cmp_op('uint_le', c.LS)
emit_op_uint_gt = gen_emit_cmp_op('uint_gt', c.HI)
emit_op_uint_lt = gen_emit_cmp_op('uint_lt', c.LO)
emit_op_uint_ge = gen_emit_cmp_op('uint_ge', c.HS)
- emit_guard_uint_le = gen_emit_cmp_op_guard('uint_le', c.LS)
- emit_guard_uint_gt = gen_emit_cmp_op_guard('uint_gt', c.HI)
- emit_guard_uint_lt = gen_emit_cmp_op_guard('uint_lt', c.LO)
- emit_guard_uint_ge = gen_emit_cmp_op_guard('uint_ge', c.HS)
-
emit_op_ptr_eq = emit_op_instance_ptr_eq = emit_op_int_eq
emit_op_ptr_ne = emit_op_instance_ptr_ne = emit_op_int_ne
- emit_guard_ptr_eq = emit_guard_instance_ptr_eq = emit_guard_int_eq
- emit_guard_ptr_ne = emit_guard_instance_ptr_ne = emit_guard_int_ne
-
- emit_op_int_add_ovf = emit_op_int_add
- emit_op_int_sub_ovf = emit_op_int_sub
emit_op_int_is_true = gen_emit_op_unary_cmp('int_is_true', c.NE)
emit_op_int_is_zero = gen_emit_op_unary_cmp('int_is_zero', c.EQ)
- emit_guard_int_is_true = gen_emit_guard_unary_cmp('int_is_true', c.NE)
- emit_guard_int_is_zero = gen_emit_guard_unary_cmp('int_is_zero', c.EQ)
-
def emit_op_int_invert(self, op, arglocs, regalloc, fcond):
reg, res = arglocs
@@ -223,9 +193,15 @@
fcond=fcond)
return token
- def _emit_guard(self, op, arglocs, fcond, save_exc,
+ def _emit_guard(self, op, arglocs, save_exc,
is_guard_not_invalidated=False,
is_guard_not_forced=False):
+ if is_guard_not_invalidated:
+ fcond = c.cond_none
+ else:
+ fcond = self.guard_success_cc
+ self.guard_success_cc = c.cond_none
+ assert fcond != c.cond_none
pos = self.mc.currpos()
token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], pos, fcond, save_exc,
is_guard_not_invalidated,
@@ -241,27 +217,13 @@
self.mc.BKPT()
return c.AL
- def _emit_guard_overflow(self, guard, failargs, fcond):
- if guard.getopnum() == rop.GUARD_OVERFLOW:
- fcond = self._emit_guard(guard, failargs, c.VS, save_exc=False)
- elif guard.getopnum() == rop.GUARD_NO_OVERFLOW:
- fcond = self._emit_guard(guard, failargs, c.VC, save_exc=False)
- else:
- assert 0
- return fcond
-
def emit_op_guard_true(self, op, arglocs, regalloc, fcond):
- l0 = arglocs[0]
- failargs = arglocs[1:]
- self.mc.CMP_ri(l0.value, 0)
- fcond = self._emit_guard(op, failargs, c.NE, save_exc=False)
+ fcond = self._emit_guard(op, arglocs, save_exc=False)
return fcond
def emit_op_guard_false(self, op, arglocs, regalloc, fcond):
- l0 = arglocs[0]
- failargs = arglocs[1:]
- self.mc.CMP_ri(l0.value, 0)
- fcond = self._emit_guard(op, failargs, c.EQ, save_exc=False)
+ self.guard_success_cc = c.get_opposite_of(self.guard_success_cc)
+ fcond = self._emit_guard(op, arglocs, save_exc=False)
return fcond
def emit_op_guard_value(self, op, arglocs, regalloc, fcond):
@@ -278,27 +240,27 @@
assert l1.is_vfp_reg()
self.mc.VCMP(l0.value, l1.value)
self.mc.VMRS(cond=fcond)
- fcond = self._emit_guard(op, failargs, c.EQ, save_exc=False)
+ self.guard_success_cc = c.EQ
+ fcond = self._emit_guard(op, failargs, save_exc=False)
return fcond
emit_op_guard_nonnull = emit_op_guard_true
emit_op_guard_isnull = emit_op_guard_false
- def emit_op_guard_no_overflow(self, op, arglocs, regalloc, fcond):
- return self._emit_guard(op, arglocs, c.VC, save_exc=False)
-
- def emit_op_guard_overflow(self, op, arglocs, regalloc, fcond):
- return self._emit_guard(op, arglocs, c.VS, save_exc=False)
+ emit_op_guard_no_overflow = emit_op_guard_true
+ emit_op_guard_overflow = emit_op_guard_false
def emit_op_guard_class(self, op, arglocs, regalloc, fcond):
self._cmp_guard_class(op, arglocs, regalloc, fcond)
- self._emit_guard(op, arglocs[3:], c.EQ, save_exc=False)
+ self.guard_success_cc = c.EQ
+ self._emit_guard(op, arglocs[3:], save_exc=False)
return fcond
def emit_op_guard_nonnull_class(self, op, arglocs, regalloc, fcond):
self.mc.CMP_ri(arglocs[0].value, 1)
self._cmp_guard_class(op, arglocs, regalloc, c.HS)
- self._emit_guard(op, arglocs[3:], c.EQ, save_exc=False)
+ self.guard_success_cc = c.EQ
+ self._emit_guard(op, arglocs[3:], save_exc=False)
return fcond
def _cmp_guard_class(self, op, locs, regalloc, fcond):
@@ -315,18 +277,20 @@
self.mc.CMP_rr(r.ip.value, typeid.value, cond=fcond)
def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond):
- return self._emit_guard(op, locs, fcond, save_exc=False,
+ return self._emit_guard(op, locs, save_exc=False,
is_guard_not_invalidated=True)
def emit_op_label(self, op, arglocs, regalloc, fcond):
self._check_frame_depth_debug(self.mc)
return fcond
- def cond_call(self, op, gcmap, cond_loc, call_loc, fcond):
+ def emit_op_cond_call(self, op, arglocs, regalloc, fcond):
+ [call_loc] = arglocs
+ gcmap = regalloc.get_gcmap([call_loc])
+
assert call_loc is r.r4
- self.mc.TST_rr(cond_loc.value, cond_loc.value)
jmp_adr = self.mc.currpos()
- self.mc.BKPT() # patched later
+ self.mc.BKPT() # patched later: the conditional jump
#
self.push_gcmap(self.mc, gcmap, store=True)
#
@@ -344,8 +308,13 @@
self.mc.BL(cond_call_adr)
self.pop_gcmap(self.mc)
# never any result value
+ cond = c.get_opposite_of(self.guard_success_cc)
+ self.guard_success_cc = c.cond_none
pmc = OverwritingBuilder(self.mc, jmp_adr, WORD)
- pmc.B_offs(self.mc.currpos(), c.EQ) # equivalent to 0 as result of TST above
+ pmc.B_offs(self.mc.currpos(), cond)
+ # might be overridden again to skip over the following
+ # guard_no_exception too
+ self.previous_cond_call_jcond = jmp_adr, cond
return fcond
def emit_op_jump(self, op, arglocs, regalloc, fcond):
@@ -441,8 +410,15 @@
failargs = arglocs[1:]
self.mc.LDR_ri(loc.value, loc.value)
self.mc.CMP_ri(loc.value, 0)
- cond = self._emit_guard(op, failargs, c.EQ, save_exc=True)
- return cond
+ self.guard_success_cc = c.EQ
+ fcond = self._emit_guard(op, failargs, save_exc=True)
+ # If the previous operation was a COND_CALL, overwrite its conditional
+ # jump to jump over this GUARD_NO_EXCEPTION as well, if we can
+ if self._find_nearby_operation(-1).getopnum() == rop.COND_CALL:
+ jmp_adr, prev_cond = self.previous_cond_call_jcond
+ pmc = OverwritingBuilder(self.mc, jmp_adr, WORD)
+ pmc.B_offs(self.mc.currpos(), prev_cond)
+ return fcond
def emit_op_guard_exception(self, op, arglocs, regalloc, fcond):
loc, loc1, resloc, pos_exc_value, pos_exception = arglocs[:5]
@@ -451,7 +427,8 @@
self.mc.LDR_ri(r.ip.value, loc1.value)
self.mc.CMP_rr(r.ip.value, loc.value)
- self._emit_guard(op, failargs, c.EQ, save_exc=True)
+ self.guard_success_cc = c.EQ
+ self._emit_guard(op, failargs, save_exc=True)
self._store_and_reset_exception(self.mc, resloc)
return fcond
@@ -975,16 +952,14 @@
def imm(self, v):
return imm(v)
- def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc,
- fcond):
+ def emit_op_call_assembler(self, op, arglocs, regalloc, fcond):
if len(arglocs) == 4:
[argloc, vloc, result_loc, tmploc] = arglocs
else:
[argloc, result_loc, tmploc] = arglocs
vloc = imm(0)
- self.call_assembler(op, guard_op, argloc, vloc, result_loc, tmploc)
- self._emit_guard_may_force(guard_op,
- regalloc._prepare_guard(guard_op))
+ self._store_force_index(self._find_nearby_operation(+1))
+ self.call_assembler(op, argloc, vloc, result_loc, tmploc)
return fcond
def _call_assembler_emit_call(self, addr, argloc, resloc):
@@ -1058,41 +1033,37 @@
mc.B(target)
mc.copy_to_raw_memory(oldadr)
- def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc,
- fcond):
- self._store_force_index(guard_op)
- numargs = op.numargs()
- callargs = arglocs[:numargs + 3] # extract the arguments to the call
- guardargs = arglocs[len(callargs):]
- #
- self._emit_call(op, callargs, fcond=fcond)
- self._emit_guard_may_force(guard_op, guardargs)
- return fcond
-
- def _emit_guard_may_force(self, guard_op, arglocs):
+ def emit_op_guard_not_forced(self, op, arglocs, regalloc, fcond):
ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
self.mc.LDR_ri(r.ip.value, r.fp.value, imm=ofs)
self.mc.CMP_ri(r.ip.value, 0)
- self._emit_guard(guard_op, arglocs, c.EQ,
- save_exc=True, is_guard_not_forced=True)
+ self.guard_success_cc = c.EQ
+ self._emit_guard(op, arglocs, save_exc=True, is_guard_not_forced=True)
+ return fcond
- def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc,
- fcond):
- numargs = op.numargs()
- callargs = arglocs[:numargs + 3] # extract the arguments to the call
- guardargs = arglocs[len(callargs):] # extrat the arguments for the guard
- self._store_force_index(guard_op)
- self._emit_call(op, callargs, is_call_release_gil=True)
- self._emit_guard_may_force(guard_op, guardargs)
+ def emit_op_call_may_force(self, op, arglocs, regalloc, fcond):
+ self._store_force_index(self._find_nearby_operation(+1))
+ self._emit_call(op, arglocs, fcond=fcond)
+ return fcond
+
+ def emit_op_call_release_gil(self, op, arglocs, regalloc, fcond):
+ self._store_force_index(self._find_nearby_operation(+1))
+ self._emit_call(op, arglocs, is_call_release_gil=True)
return fcond
def _store_force_index(self, guard_op):
+ assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
+ guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
faildescr = guard_op.getdescr()
ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
value = rffi.cast(lltype.Signed, cast_instance_to_gcref(faildescr))
self.mc.gen_load_int(r.ip.value, value)
self.store_reg(self.mc, r.ip, r.fp, ofs)
+ def _find_nearby_operation(self, delta):
+ regalloc = self._regalloc
+ return regalloc.operations[regalloc.rm.position + delta]
+
def emit_op_call_malloc_gc(self, op, arglocs, regalloc, fcond):
self.emit_op_call(op, arglocs, regalloc, fcond)
self.propagate_memoryerror_if_r0_is_null()
@@ -1125,13 +1096,6 @@
emit_op_float_gt = gen_emit_float_cmp_op('float_gt', c.GT)
emit_op_float_ge = gen_emit_float_cmp_op('float_ge', c.GE)
- emit_guard_float_lt = gen_emit_float_cmp_op_guard('float_lt', c.VFP_LT)
- emit_guard_float_le = gen_emit_float_cmp_op_guard('float_le', c.VFP_LE)
- emit_guard_float_eq = gen_emit_float_cmp_op_guard('float_eq', c.EQ)
- emit_guard_float_ne = gen_emit_float_cmp_op_guard('float_ne', c.NE)
- emit_guard_float_gt = gen_emit_float_cmp_op_guard('float_gt', c.GT)
- emit_guard_float_ge = gen_emit_float_cmp_op_guard('float_ge', c.GE)
-
def emit_op_cast_float_to_int(self, op, arglocs, regalloc, fcond):
arg, res = arglocs
assert arg.is_vfp_reg()
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -5,13 +5,16 @@
RegisterManager, TempBox, compute_vars_longevity, BaseRegalloc, \
get_scale
from rpython.jit.backend.arm import registers as r
+from rpython.jit.backend.arm import conditions as c
from rpython.jit.backend.arm import locations
from rpython.jit.backend.arm.locations import imm, get_fp_offset
from rpython.jit.backend.arm.helper.regalloc import (prepare_op_by_helper_call,
- prepare_op_unary_cmp,
+ prepare_unary_cmp,
prepare_op_ri,
- prepare_cmp_op,
- prepare_float_op,
+ prepare_int_cmp,
+ prepare_unary_op,
+ prepare_two_regs_op,
+ prepare_float_cmp,
check_imm_arg,
check_imm_box,
VMEM_imm_size,
@@ -146,6 +149,7 @@
box_types = None # or a list of acceptable types
no_lower_byte_regs = all_regs
save_around_call_regs = r.caller_resp
+ frame_reg = r.fp
def __init__(self, longevity, frame_manager=None, assembler=None):
RegisterManager.__init__(self, longevity, frame_manager, assembler)
@@ -235,6 +239,18 @@
return self.rm.force_allocate_reg(var, forbidden_vars,
selected_reg, need_lower_byte)
+ def force_allocate_reg_or_cc(self, var, forbidden_vars=[]):
+ assert var.type == INT
+ if self.next_op_can_accept_cc(self.operations, self.rm.position):
+ # hack: return the 'fp' location to mean "lives in CC". This
+ # fp will not actually be used, and the location will be freed
+ # after the next op as usual.
+ self.rm.force_allocate_frame_reg(var)
+ return r.fp
+ else:
+ # else, return a regular register (not fp).
+ return self.rm.force_allocate_reg(var)
+
def try_allocate_reg(self, v, selected_reg=None, need_lower_byte=False):
if v.type == FLOAT:
return self.vfprm.try_allocate_reg(v, selected_reg,
@@ -467,25 +483,6 @@
resloc = self.force_allocate_reg(op.result)
return [argloc, imm(numbytes), resloc]
- def prepare_guard_int_mul_ovf(self, op, guard, fcond):
- boxes = op.getarglist()
- reg1 = self.make_sure_var_in_reg(boxes[0], forbidden_vars=boxes)
- reg2 = self.make_sure_var_in_reg(boxes[1], forbidden_vars=boxes)
- res = self.force_allocate_reg(op.result)
- return self._prepare_guard(guard, [reg1, reg2, res])
-
- def prepare_guard_int_add_ovf(self, op, guard, fcond):
- locs = self._prepare_op_int_add(op, fcond)
- res = self.force_allocate_reg(op.result)
- locs.append(res)
- return self._prepare_guard(guard, locs)
-
- def prepare_guard_int_sub_ovf(self, op, guard, fcond):
- locs = self._prepare_op_int_sub(op, fcond)
- res = self.force_allocate_reg(op.result)
- locs.append(res)
- return self._prepare_guard(guard, locs)
-
prepare_op_int_floordiv = prepare_op_by_helper_call('int_floordiv')
prepare_op_int_mod = prepare_op_by_helper_call('int_mod')
prepare_op_uint_floordiv = prepare_op_by_helper_call('unit_floordiv')
@@ -500,58 +497,36 @@
prepare_op_uint_rshift = prepare_op_ri('uint_rshift', imm_size=0x1F,
allow_zero=False, commutative=False)
- prepare_op_int_lt = prepare_cmp_op('int_lt')
- prepare_op_int_le = prepare_cmp_op('int_le')
- prepare_op_int_eq = prepare_cmp_op('int_eq')
- prepare_op_int_ne = prepare_cmp_op('int_ne')
- prepare_op_int_gt = prepare_cmp_op('int_gt')
- prepare_op_int_ge = prepare_cmp_op('int_ge')
+ prepare_op_int_lt = prepare_int_cmp
+ prepare_op_int_le = prepare_int_cmp
+ prepare_op_int_eq = prepare_int_cmp
+ prepare_op_int_ne = prepare_int_cmp
+ prepare_op_int_gt = prepare_int_cmp
+ prepare_op_int_ge = prepare_int_cmp
- prepare_op_uint_le = prepare_cmp_op('uint_le')
- prepare_op_uint_gt = prepare_cmp_op('uint_gt')
+ prepare_op_uint_le = prepare_int_cmp
+ prepare_op_uint_gt = prepare_int_cmp
- prepare_op_uint_lt = prepare_cmp_op('uint_lt')
- prepare_op_uint_ge = prepare_cmp_op('uint_ge')
+ prepare_op_uint_lt = prepare_int_cmp
+ prepare_op_uint_ge = prepare_int_cmp
prepare_op_ptr_eq = prepare_op_instance_ptr_eq = prepare_op_int_eq
prepare_op_ptr_ne = prepare_op_instance_ptr_ne = prepare_op_int_ne
- prepare_guard_int_lt = prepare_cmp_op('guard_int_lt')
- prepare_guard_int_le = prepare_cmp_op('guard_int_le')
- prepare_guard_int_eq = prepare_cmp_op('guard_int_eq')
- prepare_guard_int_ne = prepare_cmp_op('guard_int_ne')
- prepare_guard_int_gt = prepare_cmp_op('guard_int_gt')
- prepare_guard_int_ge = prepare_cmp_op('guard_int_ge')
-
- prepare_guard_uint_le = prepare_cmp_op('guard_uint_le')
- prepare_guard_uint_gt = prepare_cmp_op('guard_uint_gt')
-
- prepare_guard_uint_lt = prepare_cmp_op('guard_uint_lt')
- prepare_guard_uint_ge = prepare_cmp_op('guard_uint_ge')
-
- prepare_guard_ptr_eq = prepare_guard_instance_ptr_eq = prepare_guard_int_eq
- prepare_guard_ptr_ne = prepare_guard_instance_ptr_ne = prepare_guard_int_ne
-
prepare_op_int_add_ovf = prepare_op_int_add
prepare_op_int_sub_ovf = prepare_op_int_sub
+ prepare_op_int_mul_ovf = prepare_op_int_mul
- prepare_op_int_is_true = prepare_op_unary_cmp('int_is_true')
- prepare_op_int_is_zero = prepare_op_unary_cmp('int_is_zero')
+ prepare_op_int_is_true = prepare_unary_cmp
+ prepare_op_int_is_zero = prepare_unary_cmp
- prepare_guard_int_is_true = prepare_op_unary_cmp('int_is_true')
- prepare_guard_int_is_zero = prepare_op_unary_cmp('int_is_zero')
-
- def prepare_op_int_neg(self, op, fcond):
- l0 = self.make_sure_var_in_reg(op.getarg(0))
- self.possibly_free_vars_for_op(op)
- self.free_temp_vars()
- resloc = self.force_allocate_reg(op.result)
- return [l0, resloc]
-
- prepare_op_int_invert = prepare_op_int_neg
+ prepare_op_int_neg = prepare_unary_op
+ prepare_op_int_invert = prepare_unary_op
def prepare_op_call(self, op, fcond):
- effectinfo = op.getdescr().get_extra_info()
+ calldescr = op.getdescr()
+ assert calldescr is not None
+ effectinfo = calldescr.get_extra_info()
if effectinfo is not None:
oopspecindex = effectinfo.oopspecindex
if oopspecindex in (EffectInfo.OS_LLONG_ADD,
@@ -603,13 +578,12 @@
def _call(self, op, arglocs, force_store=[], save_all_regs=False):
# spill variables that need to be saved around calls
- self.vfprm.before_call(save_all_regs=save_all_regs)
+ self.vfprm.before_call(force_store, save_all_regs=save_all_regs)
if not save_all_regs:
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
save_all_regs = 2
- self.rm.before_call(save_all_regs=save_all_regs)
- self.before_call_called = True
+ self.rm.before_call(force_store, save_all_regs=save_all_regs)
resloc = None
if op.result:
resloc = self.after_call(op.result)
@@ -666,14 +640,25 @@
locs = [imm(fail_descr)]
return locs
- def prepare_op_guard_true(self, op, fcond):
- l0 = self.make_sure_var_in_reg(op.getarg(0))
- args = self._prepare_guard(op, [l0])
+ def load_condition_into_cc(self, box):
+ if self.assembler.guard_success_cc == c.cond_none:
+ loc = self.loc(box)
+ if not loc.is_core_reg():
+ assert loc.is_stack()
+ self.assembler.regalloc_mov(loc, r.lr)
+ loc = r.lr
+ self.assembler.mc.CMP_ri(loc.value, 0)
+ self.assembler.guard_success_cc = c.NE
+
+ def _prepare_guard_cc(self, op, fcond):
+ self.load_condition_into_cc(op.getarg(0))
+ args = self._prepare_guard(op, [])
return args
- prepare_op_guard_false = prepare_op_guard_true
- prepare_op_guard_nonnull = prepare_op_guard_true
- prepare_op_guard_isnull = prepare_op_guard_true
+ prepare_op_guard_true = _prepare_guard_cc
+ prepare_op_guard_false = _prepare_guard_cc
+ prepare_op_guard_nonnull = _prepare_guard_cc
+ prepare_op_guard_isnull = _prepare_guard_cc
def prepare_op_guard_value(self, op, fcond):
boxes = op.getarglist()
@@ -697,6 +682,7 @@
prepare_op_guard_overflow = prepare_op_guard_no_overflow
prepare_op_guard_not_invalidated = prepare_op_guard_no_overflow
+ prepare_op_guard_not_forced = prepare_op_guard_no_overflow
def prepare_op_guard_exception(self, op, fcond):
boxes = op.getarglist()
@@ -1188,9 +1174,8 @@
arg = op.getarg(i)
self.make_sure_var_in_reg(arg, args_so_far, selected_reg=reg)
args_so_far.append(arg)
- loc_cond = self.make_sure_var_in_reg(op.getarg(0), args_so_far)
- gcmap = self.get_gcmap([tmpreg])
- self.assembler.cond_call(op, gcmap, loc_cond, tmpreg, fcond)
+ self.load_condition_into_cc(op.getarg(0))
+ return [tmpreg]
def prepare_op_force_token(self, op, fcond):
# XXX for now we return a regular reg
@@ -1244,19 +1229,16 @@
self.assembler.store_force_descr(op, fail_locs[1:], fail_locs[0].value)
self.possibly_free_vars(op.getfailargs())
- def prepare_guard_call_may_force(self, op, guard_op, fcond):
- args = self._prepare_call(op, save_all_regs=True)
- return self._prepare_guard(guard_op, args)
+ def prepare_op_call_may_force(self, op, fcond):
+ return self._prepare_call(op, save_all_regs=True)
- def prepare_guard_call_release_gil(self, op, guard_op, fcond):
- args = self._prepare_call(op, save_all_regs=True, first_arg_index=2)
- return self._prepare_guard(guard_op, args)
+ def prepare_op_call_release_gil(self, op, fcond):
+ return self._prepare_call(op, save_all_regs=True, first_arg_index=2)
- def prepare_guard_call_assembler(self, op, guard_op, fcond):
- locs = self.locs_for_call_assembler(op, guard_op)
+ def prepare_op_call_assembler(self, op, fcond):
+ locs = self.locs_for_call_assembler(op)
tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
resloc = self._call(op, locs + [tmploc], save_all_regs=True)
- self.possibly_free_vars(guard_op.getfailargs())
return locs + [resloc, tmploc]
def _prepare_args_for_new_op(self, new_args):
@@ -1271,39 +1253,18 @@
arglocs.append(t)
return arglocs
- prepare_op_float_add = prepare_float_op(name='prepare_op_float_add')
- prepare_op_float_sub = prepare_float_op(name='prepare_op_float_sub')
- prepare_op_float_mul = prepare_float_op(name='prepare_op_float_mul')
- prepare_op_float_truediv = prepare_float_op(name='prepare_op_float_truediv')
- prepare_op_float_lt = prepare_float_op(float_result=False,
- name='prepare_op_float_lt')
- prepare_op_float_le = prepare_float_op(float_result=False,
- name='prepare_op_float_le')
- prepare_op_float_eq = prepare_float_op(float_result=False,
- name='prepare_op_float_eq')
- prepare_op_float_ne = prepare_float_op(float_result=False,
- name='prepare_op_float_ne')
- prepare_op_float_gt = prepare_float_op(float_result=False,
- name='prepare_op_float_gt')
- prepare_op_float_ge = prepare_float_op(float_result=False,
- name='prepare_op_float_ge')
- prepare_op_float_neg = prepare_float_op(base=False,
- name='prepare_op_float_neg')
- prepare_op_float_abs = prepare_float_op(base=False,
- name='prepare_op_float_abs')
-
- prepare_guard_float_lt = prepare_float_op(guard=True,
- float_result=False, name='prepare_guard_float_lt')
- prepare_guard_float_le = prepare_float_op(guard=True,
- float_result=False, name='prepare_guard_float_le')
- prepare_guard_float_eq = prepare_float_op(guard=True,
- float_result=False, name='prepare_guard_float_eq')
- prepare_guard_float_ne = prepare_float_op(guard=True,
- float_result=False, name='prepare_guard_float_ne')
- prepare_guard_float_gt = prepare_float_op(guard=True,
- float_result=False, name='prepare_guard_float_gt')
- prepare_guard_float_ge = prepare_float_op(guard=True,
- float_result=False, name='prepare_guard_float_ge')
+ prepare_op_float_add = prepare_two_regs_op
+ prepare_op_float_sub = prepare_two_regs_op
+ prepare_op_float_mul = prepare_two_regs_op
+ prepare_op_float_truediv = prepare_two_regs_op
+ prepare_op_float_lt = prepare_float_cmp
+ prepare_op_float_le = prepare_float_cmp
+ prepare_op_float_eq = prepare_float_cmp
+ prepare_op_float_ne = prepare_float_cmp
+ prepare_op_float_gt = prepare_float_cmp
+ prepare_op_float_ge = prepare_float_cmp
+ prepare_op_float_neg = prepare_unary_op
+ prepare_op_float_abs = prepare_unary_op
def _prepare_op_math_sqrt(self, op, fcond):
loc = self.make_sure_var_in_reg(op.getarg(1))
@@ -1327,10 +1288,8 @@
self.force_spill_var(op.getarg(0))
return []
- prepare_op_convert_float_bytes_to_longlong = prepare_float_op(base=False,
- name='prepare_op_convert_float_bytes_to_longlong')
- prepare_op_convert_longlong_bytes_to_float = prepare_float_op(base=False,
- name='prepare_op_convert_longlong_bytes_to_float')
+ prepare_op_convert_float_bytes_to_longlong = prepare_unary_op
+ prepare_op_convert_longlong_bytes_to_float = prepare_unary_op
#def prepare_op_read_timestamp(self, op, fcond):
# loc = self.get_scratch_reg(INT)
@@ -1348,22 +1307,12 @@
return [loc1, res]
-def add_none_argument(fn):
- return lambda self, op, fcond: fn(self, op, None, fcond)
-
-
def notimplemented(self, op, fcond):
print "[ARM/regalloc] %s not implemented" % op.getopname()
raise NotImplementedError(op)
-def notimplemented_with_guard(self, op, guard_op, fcond):
- print "[ARM/regalloc] %s with guard %s not implemented" % \
- (op.getopname(), guard_op.getopname())
- raise NotImplementedError(op)
-
operations = [notimplemented] * (rop._LAST + 1)
-operations_with_guard = [notimplemented_with_guard] * (rop._LAST + 1)
for key, value in rop.__dict__.items():
@@ -1374,13 +1323,3 @@
if hasattr(Regalloc, methname):
func = getattr(Regalloc, methname).im_func
operations[value] = func
-
-for key, value in rop.__dict__.items():
- key = key.lower()
- if key.startswith('_'):
- continue
- methname = 'prepare_guard_%s' % key
- if hasattr(Regalloc, methname):
- func = getattr(Regalloc, methname).im_func
- operations_with_guard[value] = func
- operations[value] = add_none_argument(func)
diff --git a/rpython/jit/backend/arm/test/conftest.py b/rpython/jit/backend/arm/test/conftest.py
--- a/rpython/jit/backend/arm/test/conftest.py
+++ b/rpython/jit/backend/arm/test/conftest.py
@@ -1,21 +1,12 @@
"""
-This conftest adds an option to run the translation tests which by default will
-be disabled.
-Also it disables the backend tests on non ARMv7 platforms
+This disables the backend tests on non ARMv7 platforms.
+Note that you need "--slow" to run translation tests.
"""
import py, os
from rpython.jit.backend import detect_cpu
cpu = detect_cpu.autodetect()
-def pytest_addoption(parser):
- group = parser.getgroup('translation test options')
- group.addoption('--run-translation-tests',
- action="store_true",
- default=False,
- dest="run_translation_tests",
- help="run tests that translate code")
-
def pytest_collect_directory(path, parent):
if not cpu.startswith('arm'):
py.test.skip("ARM(v7) tests skipped: cpu is %r" % (cpu,))
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -212,8 +212,7 @@
self.codemap_builder.leave_portal_frame(op.getarg(0).getint(),
self.mc.get_relative_pos())
- def call_assembler(self, op, guard_op, argloc, vloc, result_loc, tmploc):
- self._store_force_index(guard_op)
+ def call_assembler(self, op, argloc, vloc, result_loc, tmploc):
descr = op.getdescr()
assert isinstance(descr, JitCellToken)
#
@@ -262,9 +261,6 @@
#
# Here we join Path A and Path B again
self._call_assembler_patch_jmp(jmp_location)
- # XXX here should be emitted guard_not_forced, but due
- # to incompatibilities in how it's done, we leave it for the
- # caller to deal with
@specialize.argtype(1)
def _inject_debugging_code(self, looptoken, operations, tp, number):
diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -639,31 +639,26 @@
if looptoken.compiled_loop_token is not None: # <- for tests
looptoken.compiled_loop_token._ll_initial_locs = locs
- def can_merge_with_next_guard(self, op, i, operations):
- if (op.getopnum() == rop.CALL_MAY_FORCE or
- op.getopnum() == rop.CALL_ASSEMBLER or
- op.getopnum() == rop.CALL_RELEASE_GIL):
- assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
- return True
- if not op.is_comparison():
- if op.is_ovf():
- if (operations[i + 1].getopnum() != rop.GUARD_NO_OVERFLOW and
- operations[i + 1].getopnum() != rop.GUARD_OVERFLOW):
- not_implemented("int_xxx_ovf not followed by "
- "guard_(no)_overflow")
- return True
+ def next_op_can_accept_cc(self, operations, i):
+ op = operations[i]
+ next_op = operations[i + 1]
+ opnum = next_op.getopnum()
+ if (opnum != rop.GUARD_TRUE and opnum != rop.GUARD_FALSE
+ and opnum != rop.COND_CALL):
return False
- if (operations[i + 1].getopnum() != rop.GUARD_TRUE and
- operations[i + 1].getopnum() != rop.GUARD_FALSE):
+ if next_op.getarg(0) is not op.result:
return False
- if operations[i + 1].getarg(0) is not op.result:
+ if self.longevity[op.result][1] > i + 1:
return False
- if (self.longevity[op.result][1] > i + 1 or
- op.result in operations[i + 1].getfailargs()):
- return False
+ if opnum != rop.COND_CALL:
+ if op.result in operations[i + 1].getfailargs():
+ return False
+ else:
+ if op.result in operations[i + 1].getarglist()[1:]:
+ return False
return True
- def locs_for_call_assembler(self, op, guard_op):
+ def locs_for_call_assembler(self, op):
descr = op.getdescr()
assert isinstance(descr, JitCellToken)
if op.numargs() == 2:
diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2293,7 +2293,7 @@
value |= 32768
assert s.data.tid == value
- def test_cond_call(self):
+ def test_cond_call_1(self):
def func_void(*args):
called.append(args)
@@ -2330,6 +2330,52 @@
assert longlong.getrealfloat(self.cpu.get_float_value(frame, 6)) == 1.2
assert longlong.getrealfloat(self.cpu.get_float_value(frame, 7)) == 3.4
+ def test_cond_call_2(self):
+ def func_void(*args):
+ called.append(args)
+
+ FUNC = self.FuncType([lltype.Signed, lltype.Signed], lltype.Void)
+ func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+ calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT,
+ EffectInfo.MOST_GENERAL)
+
+ for (operation, arg1, arg2_if_true, arg2_if_false) in [
+ ('int_lt', -5, 2, -5),
+ ('int_le', 5, 5, -6),
+ ('int_eq', 11, 11, 12),
+ ('int_ne', 11, 12, 11),
+ ('int_gt', 8, -1, 8),
+ ('int_xor', 7, 3, 7), # test without a comparison at all
+ ('int_is_true', 4242, 1, 0),
+ ('int_is_zero', 4242, 0, 1),
+ ('float_lt', -0.5, 0.2, -0.5),
+ ('float_eq', 1.1, 1.1, 1.2),
+ ]:
+ called = []
+
+ ops = '''
+ [%s, %s, i3, i4]
+ i2 = %s(%s)
+ cond_call(i2, ConstClass(func_ptr), i3, i4, descr=calldescr)
+ guard_no_exception(descr=faildescr) []
+ finish()
+ ''' % ("i0" if operation.startswith('int') else "f0",
+ "i1" if operation.startswith('int') else "f1",
+ operation,
+ ("i1" if operation.startswith('int_is_') else
+ "i0, i1" if operation.startswith('int') else
+ "f0, f1"))
+ loop = parse(ops, namespace={'func_ptr': func_ptr,
+ 'calldescr': calldescr,
+ 'faildescr': BasicFailDescr()})
+ looptoken = JitCellToken()
+ self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+ frame = self.cpu.execute_token(looptoken, arg1, arg2_if_false, 0, 0)
+ assert called == []
+ frame = self.cpu.execute_token(looptoken, arg1, arg2_if_true,
+ 67, 89)
+ assert called == [(67, 89)]
+
def test_force_operations_returning_void(self):
values = []
def maybe_force(token, flag):
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -726,8 +726,10 @@
def _assemble(self, regalloc, inputargs, operations):
self._regalloc = regalloc
+ self.guard_success_cc = rx86.cond_none
regalloc.compute_hint_frame_locations(operations)
regalloc.walk_operations(inputargs, operations)
+ assert self.guard_success_cc == rx86.cond_none
if we_are_translated() or self.cpu.dont_keepalive_stuff:
self._regalloc = None # else keep it around for debugging
frame_depth = regalloc.get_final_frame_depth()
@@ -922,8 +924,8 @@
oopspecindex = effectinfo.oopspecindex
genop_math_list[oopspecindex](self, op, arglocs, resloc)
- def regalloc_perform_with_guard(self, op, guard_op, faillocs,
- arglocs, resloc, frame_depth):
+ def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
+ frame_depth):
faildescr = guard_op.getdescr()
assert isinstance(faildescr, AbstractFailDescr)
failargs = guard_op.getfailargs()
@@ -931,21 +933,12 @@
guard_token = self.implement_guard_recovery(guard_opnum,
faildescr, failargs,
faillocs, frame_depth)
- if op is None:
- dispatch_opnum = guard_opnum
- else:
- dispatch_opnum = op.getopnum()
- genop_guard_list[dispatch_opnum](self, op, guard_op, guard_token,
- arglocs, resloc)
+ genop_guard_list[guard_opnum](self, guard_op, guard_token,
+ arglocs, resloc)
if not we_are_translated():
# must be added by the genop_guard_list[]()
assert guard_token is self.pending_guard_tokens[-1]
- def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
- frame_depth):
- self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
- resloc, frame_depth)
-
def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
@@ -977,88 +970,70 @@
self.mc.LEA_rm(result_loc.value, (loc.value, delta))
return genop_binary_or_lea
+ def flush_cc(self, cond, result_loc):
+ # After emitting a instruction that leaves a boolean result in
+ # a condition code (cc), call this. In the common case, result_loc
+ # will be set to ebp by the regalloc, which in this case means
+ # "propagate it between this operation and the next guard by keeping
+ # it in the cc". In the uncommon case, result_loc is another
+ # register, and we emit a load from the cc into this register.
+ assert self.guard_success_cc == rx86.cond_none
+ if result_loc is ebp:
+ self.guard_success_cc = cond
+ else:
+ rl = result_loc.lowest8bits()
+ self.mc.SET_ir(cond, rl.value)
+ self.mc.MOVZX8_rr(result_loc.value, rl.value)
+
def _cmpop(cond, rev_cond):
+ cond = rx86.Conditions[cond]
+ rev_cond = rx86.Conditions[rev_cond]
+ #
def genop_cmp(self, op, arglocs, result_loc):
- rl = result_loc.lowest8bits()
if isinstance(op.getarg(0), Const):
self.mc.CMP(arglocs[1], arglocs[0])
- self.mc.SET_ir(rx86.Conditions[rev_cond], rl.value)
+ self.flush_cc(rev_cond, result_loc)
else:
self.mc.CMP(arglocs[0], arglocs[1])
- self.mc.SET_ir(rx86.Conditions[cond], rl.value)
- self.mc.MOVZX8_rr(result_loc.value, rl.value)
+ self.flush_cc(cond, result_loc)
return genop_cmp
- def _cmpop_float(cond, rev_cond, is_ne=False):
- def genop_cmp(self, op, arglocs, result_loc):
- if isinstance(arglocs[0], RegLoc):
+ def _if_parity_clear_zero_and_carry(self):
+ self.mc.J_il8(rx86.Conditions['NP'], 0)
+ jnp_location = self.mc.get_relative_pos()
+ # CMP EBP, 0: as EBP cannot be null here, that operation should
+ # always clear zero and carry
+ self.mc.CMP_ri(ebp.value, 0)
+ # patch the JNP above
+ offset = self.mc.get_relative_pos() - jnp_location
+ assert 0 < offset <= 127
+ self.mc.overwrite(jnp_location-1, chr(offset))
+
+ def _cmpop_float(cond, rev_cond):
+ is_ne = cond == 'NE'
+ need_direct_p = 'A' not in cond
+ need_rev_p = 'A' not in rev_cond
+ cond_contains_e = ('E' in cond) ^ ('N' in cond)
+ cond = rx86.Conditions[cond]
+ rev_cond = rx86.Conditions[rev_cond]
+ #
+ def genop_cmp_float(self, op, arglocs, result_loc):
+ if need_direct_p:
+ direct_case = not isinstance(arglocs[1], RegLoc)
+ else:
+ direct_case = isinstance(arglocs[0], RegLoc)
+ if direct_case:
self.mc.UCOMISD(arglocs[0], arglocs[1])
checkcond = cond
+ need_p = need_direct_p
else:
self.mc.UCOMISD(arglocs[1], arglocs[0])
checkcond = rev_cond
-
- tmp1 = result_loc.lowest8bits()
- if IS_X86_32:
- tmp2 = result_loc.higher8bits()
- elif IS_X86_64:
- tmp2 = X86_64_SCRATCH_REG.lowest8bits()
-
- self.mc.SET_ir(rx86.Conditions[checkcond], tmp1.value)
- if is_ne:
- self.mc.SET_ir(rx86.Conditions['P'], tmp2.value)
- self.mc.OR8_rr(tmp1.value, tmp2.value)
- else:
- self.mc.SET_ir(rx86.Conditions['NP'], tmp2.value)
- self.mc.AND8_rr(tmp1.value, tmp2.value)
- self.mc.MOVZX8_rr(result_loc.value, tmp1.value)
- return genop_cmp
-
- def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond):
- def genop_cmp_guard(self, op, guard_op, guard_token, arglocs, result_loc):
- guard_opnum = guard_op.getopnum()
- if isinstance(op.getarg(0), Const):
- self.mc.CMP(arglocs[1], arglocs[0])
- if guard_opnum == rop.GUARD_FALSE:
- self.implement_guard(guard_token, rev_cond)
- else:
- self.implement_guard(guard_token, false_rev_cond)
- else:
- self.mc.CMP(arglocs[0], arglocs[1])
- if guard_opnum == rop.GUARD_FALSE:
- self.implement_guard(guard_token, cond)
- else:
- self.implement_guard(guard_token, false_cond)
- return genop_cmp_guard
-
- def _cmpop_guard_float(cond, rev_cond, false_cond, false_rev_cond):
- need_direct_jp = 'A' not in cond
- need_rev_jp = 'A' not in rev_cond
- def genop_cmp_guard_float(self, op, guard_op, guard_token, arglocs,
- result_loc):
- guard_opnum = guard_op.getopnum()
- if isinstance(arglocs[0], RegLoc):
- self.mc.UCOMISD(arglocs[0], arglocs[1])
- checkcond = cond
- checkfalsecond = false_cond
- need_jp = need_direct_jp
- else:
- self.mc.UCOMISD(arglocs[1], arglocs[0])
- checkcond = rev_cond
- checkfalsecond = false_rev_cond
- need_jp = need_rev_jp
- if guard_opnum == rop.GUARD_FALSE:
- if need_jp:
- self.mc.J_il8(rx86.Conditions['P'], 6)
- self.implement_guard(guard_token, checkcond)
- else:
- if need_jp:
- self.mc.J_il8(rx86.Conditions['P'], 2)
- self.mc.J_il8(rx86.Conditions[checkcond], 5)
- self.implement_guard(guard_token)
- else:
- self.implement_guard(guard_token, checkfalsecond)
- return genop_cmp_guard_float
+ need_p = need_rev_p
+ if need_p:
+ self._if_parity_clear_zero_and_carry()
+ self.flush_cc(checkcond, result_loc)
+ return genop_cmp_float
def simple_call(self, fnloc, arglocs, result_loc=eax):
if result_loc is xmm0:
@@ -1121,37 +1096,17 @@
genop_ptr_eq = genop_instance_ptr_eq = genop_int_eq
genop_ptr_ne = genop_instance_ptr_ne = genop_int_ne
- genop_float_lt = _cmpop_float('B', 'A')
- genop_float_le = _cmpop_float('BE', 'AE')
- genop_float_ne = _cmpop_float('NE', 'NE', is_ne=True)
- genop_float_eq = _cmpop_float('E', 'E')
- genop_float_gt = _cmpop_float('A', 'B')
- genop_float_ge = _cmpop_float('AE', 'BE')
-
genop_uint_gt = _cmpop("A", "B")
genop_uint_lt = _cmpop("B", "A")
genop_uint_le = _cmpop("BE", "AE")
genop_uint_ge = _cmpop("AE", "BE")
- genop_guard_int_lt = _cmpop_guard("L", "G", "GE", "LE")
- genop_guard_int_le = _cmpop_guard("LE", "GE", "G", "L")
- genop_guard_int_eq = _cmpop_guard("E", "E", "NE", "NE")
- genop_guard_int_ne = _cmpop_guard("NE", "NE", "E", "E")
- genop_guard_int_gt = _cmpop_guard("G", "L", "LE", "GE")
- genop_guard_int_ge = _cmpop_guard("GE", "LE", "L", "G")
- genop_guard_ptr_eq = genop_guard_instance_ptr_eq = genop_guard_int_eq
- genop_guard_ptr_ne = genop_guard_instance_ptr_ne = genop_guard_int_ne
-
- genop_guard_uint_gt = _cmpop_guard("A", "B", "BE", "AE")
- genop_guard_uint_lt = _cmpop_guard("B", "A", "AE", "BE")
- genop_guard_uint_le = _cmpop_guard("BE", "AE", "A", "B")
- genop_guard_uint_ge = _cmpop_guard("AE", "BE", "B", "A")
-
- genop_guard_float_lt = _cmpop_guard_float("B", "A", "AE","BE")
- genop_guard_float_le = _cmpop_guard_float("BE","AE", "A", "B")
- genop_guard_float_eq = _cmpop_guard_float("E", "E", "NE","NE")
- genop_guard_float_gt = _cmpop_guard_float("A", "B", "BE","AE")
- genop_guard_float_ge = _cmpop_guard_float("AE","BE", "B", "A")
+ genop_float_lt = _cmpop_float("B", "A")
+ genop_float_le = _cmpop_float("BE","AE")
+ genop_float_eq = _cmpop_float("E", "E")
+ genop_float_ne = _cmpop_float("NE", "NE")
+ genop_float_gt = _cmpop_float("A", "B")
+ genop_float_ge = _cmpop_float("AE","BE")
def genop_math_sqrt(self, op, arglocs, resloc):
self.mc.SQRTSD(arglocs[0], resloc)
@@ -1181,20 +1136,6 @@
else:
raise AssertionError("bad number of bytes")
- def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc):
- guard_opnum = guard_op.getopnum()
- if isinstance(arglocs[0], RegLoc):
- self.mc.UCOMISD(arglocs[0], arglocs[1])
- else:
- self.mc.UCOMISD(arglocs[1], arglocs[0])
- if guard_opnum == rop.GUARD_TRUE:
- self.mc.J_il8(rx86.Conditions['P'], 6)
- self.implement_guard(guard_token, 'E')
- else:
- self.mc.J_il8(rx86.Conditions['P'], 2)
- self.mc.J_il8(rx86.Conditions['E'], 5)
- self.implement_guard(guard_token)
-
def genop_float_neg(self, op, arglocs, resloc):
# Following what gcc does: res = x ^ 0x8000000000000000
self.mc.XORPD(arglocs[0], heap(self.float_const_neg_addr))
@@ -1241,33 +1182,20 @@
else:
self.mov(loc0, resloc)
- def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
- guard_opnum = guard_op.getopnum()
- self.mc.CMP(arglocs[0], imm0)
- if guard_opnum == rop.GUARD_TRUE:
- self.implement_guard(guard_token, 'Z')
+ def test_location(self, loc):
+ assert not isinstance(loc, ImmedLoc)
+ if isinstance(loc, RegLoc):
+ self.mc.TEST_rr(loc.value, loc.value) # more compact
else:
- self.implement_guard(guard_token, 'NZ')
+ self.mc.CMP(loc, imm0) # works from memory too
def genop_int_is_true(self, op, arglocs, resloc):
- self.mc.CMP(arglocs[0], imm0)
- rl = resloc.lowest8bits()
- self.mc.SET_ir(rx86.Conditions['NE'], rl.value)
- self.mc.MOVZX8(resloc, rl)
-
- def genop_guard_int_is_zero(self, op, guard_op, guard_token, arglocs, resloc):
- guard_opnum = guard_op.getopnum()
- self.mc.CMP(arglocs[0], imm0)
- if guard_opnum == rop.GUARD_TRUE:
- self.implement_guard(guard_token, 'NZ')
- else:
- self.implement_guard(guard_token, 'Z')
+ self.test_location(arglocs[0])
+ self.flush_cc(rx86.Conditions['NZ'], resloc)
def genop_int_is_zero(self, op, arglocs, resloc):
- self.mc.CMP(arglocs[0], imm0)
- rl = resloc.lowest8bits()
- self.mc.SET_ir(rx86.Conditions['E'], rl.value)
- self.mc.MOVZX8(resloc, rl)
+ self.test_location(arglocs[0])
+ self.flush_cc(rx86.Conditions['Z'], resloc)
def genop_same_as(self, op, arglocs, resloc):
self.mov(arglocs[0], resloc)
@@ -1618,30 +1546,40 @@
self.mc.MOVD32_xr(resloc.value, eax.value)
self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
- def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2):
- loc = locs[0]
- self.mc.TEST(loc, loc)
- self.implement_guard(guard_token, 'Z')
+ def genop_guard_guard_true(self, guard_op, guard_token, locs, resloc):
+ self.implement_guard(guard_token)
genop_guard_guard_nonnull = genop_guard_guard_true
- def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token,
- locs, ign_2):
+ def genop_guard_guard_false(self, guard_op, guard_token, locs, resloc):
+ self.guard_success_cc = rx86.invert_condition(self.guard_success_cc)
+ self.implement_guard(guard_token)
+ genop_guard_guard_isnull = genop_guard_guard_false
+
+ def genop_guard_guard_no_exception(self, guard_op, guard_token, locs, ign):
self.mc.CMP(heap(self.cpu.pos_exception()), imm0)
- self.implement_guard(guard_token, 'NZ')
+ self.guard_success_cc = rx86.Conditions['Z']
+ self.implement_guard(guard_token)
+ # If the previous operation was a COND_CALL, overwrite its conditional
+ # jump to jump over this GUARD_NO_EXCEPTION as well, if we can
+ if self._find_nearby_operation(-1).getopnum() == rop.COND_CALL:
+ jmp_adr = self.previous_cond_call_jcond
+ offset = self.mc.get_relative_pos() - jmp_adr
+ if offset <= 127:
+ self.mc.overwrite(jmp_adr-1, chr(offset))
- def genop_guard_guard_not_invalidated(self, ign_1, guard_op, guard_token,
- locs, ign_2):
+ def genop_guard_guard_not_invalidated(self, guard_op, guard_token,
+ locs, ign):
pos = self.mc.get_relative_pos() + 1 # after potential jmp
guard_token.pos_jump_offset = pos
self.pending_guard_tokens.append(guard_token)
- def genop_guard_guard_exception(self, ign_1, guard_op, guard_token,
- locs, resloc):
+ def genop_guard_guard_exception(self, guard_op, guard_token, locs, resloc):
loc = locs[0]
loc1 = locs[1]
self.mc.MOV(loc1, heap(self.cpu.pos_exception()))
self.mc.CMP(loc1, loc)
- self.implement_guard(guard_token, 'NE')
+ self.guard_success_cc = rx86.Conditions['E']
+ self.implement_guard(guard_token)
self._store_and_reset_exception(self.mc, resloc)
def _store_and_reset_exception(self, mc, excvalloc=None, exctploc=None,
@@ -1674,41 +1612,29 @@
mc.MOV(heap(self.cpu.pos_exc_value()), tmploc)
mc.MOV(heap(self.cpu.pos_exception()), exctploc)
- def _gen_guard_overflow(self, guard_op, guard_token):
- guard_opnum = guard_op.getopnum()
- if guard_opnum == rop.GUARD_NO_OVERFLOW:
- self.implement_guard(guard_token, 'O')
- elif guard_opnum == rop.GUARD_OVERFLOW:
- self.implement_guard(guard_token, 'NO')
- else:
- not_implemented("int_xxx_ovf followed by %s" %
- guard_op.getopname())
+ def genop_int_add_ovf(self, op, arglocs, resloc):
+ self.genop_int_add(op, arglocs, resloc)
+ self.guard_success_cc = rx86.Conditions['NO']
- def genop_guard_int_add_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
- self.mc.ADD(arglocs[0], arglocs[1])
- return self._gen_guard_overflow(guard_op, guard_token)
+ def genop_int_sub_ovf(self, op, arglocs, resloc):
+ self.genop_int_sub(op, arglocs, resloc)
+ self.guard_success_cc = rx86.Conditions['NO']
- def genop_guard_int_sub_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
- self.mc.SUB(arglocs[0], arglocs[1])
- return self._gen_guard_overflow(guard_op, guard_token)
+ def genop_int_mul_ovf(self, op, arglocs, resloc):
+ self.genop_int_mul(op, arglocs, resloc)
+ self.guard_success_cc = rx86.Conditions['NO']
- def genop_guard_int_mul_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
- self.mc.IMUL(arglocs[0], arglocs[1])
- return self._gen_guard_overflow(guard_op, guard_token)
+ genop_guard_guard_no_overflow = genop_guard_guard_true
+ genop_guard_guard_overflow = genop_guard_guard_false
- def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2):
- loc = locs[0]
- self.mc.TEST(loc, loc)
- self.implement_guard(guard_token, 'NZ')
- genop_guard_guard_isnull = genop_guard_guard_false
-
- def genop_guard_guard_value(self, ign_1, guard_op, guard_token, locs, ign_2):
+ def genop_guard_guard_value(self, guard_op, guard_token, locs, ign):
if guard_op.getarg(0).type == FLOAT:
assert guard_op.getarg(1).type == FLOAT
self.mc.UCOMISD(locs[0], locs[1])
else:
self.mc.CMP(locs[0], locs[1])
- self.implement_guard(guard_token, 'NE')
+ self.guard_success_cc = rx86.Conditions['E']
+ self.implement_guard(guard_token)
def _cmp_guard_class(self, locs):
offset = self.cpu.vtable_offset
@@ -1743,12 +1669,12 @@
elif IS_X86_64:
self.mc.CMP32_mi((locs[0].value, 0), expected_typeid)
- def genop_guard_guard_class(self, ign_1, guard_op, guard_token, locs, ign_2):
+ def genop_guard_guard_class(self, guard_op, guard_token, locs, ign):
self._cmp_guard_class(locs)
- self.implement_guard(guard_token, 'NE')
+ self.guard_success_cc = rx86.Conditions['E']
+ self.implement_guard(guard_token)
- def genop_guard_guard_nonnull_class(self, ign_1, guard_op,
- guard_token, locs, ign_2):
+ def genop_guard_guard_nonnull_class(self, guard_op, guard_token, locs, ign):
self.mc.CMP(locs[0], imm1)
# Patched below
self.mc.J_il8(rx86.Conditions['B'], 0)
@@ -1759,7 +1685,8 @@
assert 0 < offset <= 127
self.mc.overwrite(jb_location-1, chr(offset))
#
- self.implement_guard(guard_token, 'NE')
+ self.guard_success_cc = rx86.Conditions['E']
+ self.implement_guard(guard_token)
def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
fail_locs, frame_depth):
@@ -1924,13 +1851,11 @@
# exit function
self._call_footer()
- def implement_guard(self, guard_token, condition=None):
+ def implement_guard(self, guard_token):
# These jumps are patched later.
- if condition:
- self.mc.J_il(rx86.Conditions[condition], 0)
- else:
- self.mc.JMP_l(0)
- self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
+ assert self.guard_success_cc >= 0
+ self.mc.J_il(rx86.invert_condition(self.guard_success_cc), 0)
+ self.guard_success_cc = rx86.cond_none
guard_token.pos_jump_offset = self.mc.get_relative_pos() - 4
self.pending_guard_tokens.append(guard_token)
@@ -1964,42 +1889,44 @@
cb.emit()
def _store_force_index(self, guard_op):
+ assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
+ guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
faildescr = guard_op.getdescr()
ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
self.mc.MOV(raw_stack(ofs), imm(rffi.cast(lltype.Signed,
cast_instance_to_gcref(faildescr))))
- def _emit_guard_not_forced(self, guard_token):
+ def _find_nearby_operation(self, delta):
+ regalloc = self._regalloc
+ return regalloc.operations[regalloc.rm.position + delta]
+
+ def genop_guard_guard_not_forced(self, guard_op, guard_token, locs, resloc):
ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
self.mc.CMP_bi(ofs, 0)
- self.implement_guard(guard_token, 'NE')
+ self.guard_success_cc = rx86.Conditions['E']
+ self.implement_guard(guard_token)
- def genop_guard_call_may_force(self, op, guard_op, guard_token,
- arglocs, result_loc):
- self._store_force_index(guard_op)
+ def genop_call_may_force(self, op, arglocs, result_loc):
+ self._store_force_index(self._find_nearby_operation(+1))
self._genop_call(op, arglocs, result_loc)
- self._emit_guard_not_forced(guard_token)
- def genop_guard_call_release_gil(self, op, guard_op, guard_token,
- arglocs, result_loc):
- self._store_force_index(guard_op)
+ def genop_call_release_gil(self, op, arglocs, result_loc):
+ self._store_force_index(self._find_nearby_operation(+1))
self._genop_call(op, arglocs, result_loc, is_call_release_gil=True)
- self._emit_guard_not_forced(guard_token)
def imm(self, v):
return imm(v)
# ------------------- CALL ASSEMBLER --------------------------
- def genop_guard_call_assembler(self, op, guard_op, guard_token,
- arglocs, result_loc):
+ def genop_call_assembler(self, op, arglocs, result_loc):
if len(arglocs) == 2:
[argloc, vloc] = arglocs
else:
[argloc] = arglocs
vloc = self.imm(0)
- self.call_assembler(op, guard_op, argloc, vloc, result_loc, eax)
- self._emit_guard_not_forced(guard_token)
+ self._store_force_index(self._find_nearby_operation(+1))
+ self.call_assembler(op, argloc, vloc, result_loc, eax)
def _call_assembler_emit_call(self, addr, argloc, _):
threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT)
@@ -2200,10 +2127,9 @@
not_implemented("not implemented operation with res: %s" %
op.getopname())
- def not_implemented_op_guard(self, op, guard_op,
- failaddr, arglocs, resloc):
+ def not_implemented_op_guard(self, guard_op, guard_token, locs, resloc):
not_implemented("not implemented operation (guard): %s" %
- op.getopname())
+ guard_op.getopname())
def closing_jump(self, target_token):
target = target_token._ll_loop_code
@@ -2216,10 +2142,12 @@
def label(self):
self._check_frame_depth_debug(self.mc)
- def cond_call(self, op, gcmap, loc_cond, imm_func, arglocs):
- self.mc.TEST(loc_cond, loc_cond)
- self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
+ def cond_call(self, op, gcmap, imm_func, arglocs):
+ assert self.guard_success_cc >= 0
+ self.mc.J_il8(rx86.invert_condition(self.guard_success_cc), 0)
+ # patched later
jmp_adr = self.mc.get_relative_pos()
+ self.guard_success_cc = rx86.cond_none
#
self.push_gcmap(self.mc, gcmap, store=True)
#
@@ -2260,8 +2188,9 @@
offset = self.mc.get_relative_pos() - jmp_adr
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr-1, chr(offset))
- # XXX if the next operation is a GUARD_NO_EXCEPTION, we should
- # somehow jump over it too in the fast path
+ # might be overridden again to skip over the following
+ # guard_no_exception too
+ self.previous_cond_call_jcond = jmp_adr
def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
assert size & (WORD-1) == 0 # must be correctly aligned
@@ -2454,7 +2383,7 @@
opname = name[len('genop_discard_'):]
num = getattr(rop, opname.upper())
genop_discard_list[num] = value
- elif name.startswith('genop_guard_') and name != 'genop_guard_exception':
+ elif name.startswith('genop_guard_'):
opname = name[len('genop_guard_'):]
num = getattr(rop, opname.upper())
genop_guard_list[num] = value
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -215,6 +215,18 @@
return self.rm.force_allocate_reg(var, forbidden_vars,
selected_reg, need_lower_byte)
+ def force_allocate_reg_or_cc(self, var):
+ assert var.type == INT
+ if self.next_op_can_accept_cc(self.operations, self.rm.position):
+ # hack: return the ebp location to mean "lives in CC". This
+ # ebp will not actually be used, and the location will be freed
+ # after the next op as usual.
+ self.rm.force_allocate_frame_reg(var)
+ return ebp
+ else:
+ # else, return a regular register (not ebp).
+ return self.rm.force_allocate_reg(var, need_lower_byte=True)
+
def force_spill_var(self, var):
if var.type == FLOAT:
return self.xrm.force_spill_var(var)
@@ -278,20 +290,8 @@
self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs))
self.assembler.regalloc_perform_math(op, arglocs, result_loc)
- def locs_for_fail(self, guard_op):
- return [self.loc(v) for v in guard_op.getfailargs()]
-
- def perform_with_guard(self, op, guard_op, arglocs, result_loc):
- faillocs = self.locs_for_fail(guard_op)
- self.rm.position += 1
- self.xrm.position += 1
- self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs,
- arglocs, result_loc,
- self.fm.get_frame_depth())
- self.possibly_free_vars(guard_op.getfailargs())
-
def perform_guard(self, guard_op, arglocs, result_loc):
- faillocs = self.locs_for_fail(guard_op)
+ faillocs = [self.loc(v) for v in guard_op.getfailargs()]
if not we_are_translated():
if result_loc is not None:
self.assembler.dump('%s <- %s(%s)' % (result_loc, guard_op,
@@ -310,7 +310,7 @@
def walk_operations(self, inputargs, operations):
i = 0
- #self.operations = operations
+ self.operations = operations
while i < len(operations):
op = operations[i]
self.assembler.mc.mark_op(op)
@@ -321,10 +321,7 @@
i += 1
self.possibly_free_vars_for_op(op)
continue
- if self.can_merge_with_next_guard(op, i, operations):
- oplist_with_guard[op.getopnum()](self, op, operations[i + 1])
- i += 1
- elif not we_are_translated() and op.getopnum() == -124:
+ if not we_are_translated() and op.getopnum() == -124:
self._consider_force_spill(op)
else:
oplist[op.getopnum()](self, op)
@@ -336,6 +333,7 @@
assert not self.xrm.reg_bindings
self.flush_loop()
self.assembler.mc.mark_op(None) # end of the loop
+ self.operations = None
for arg in inputargs:
self.possibly_free_var(arg)
@@ -363,14 +361,19 @@
return self.xrm.loc(v)
return self.rm.loc(v)
- def _consider_guard(self, op):
- loc = self.rm.make_sure_var_in_reg(op.getarg(0))
- self.perform_guard(op, [loc], None)
+ def load_condition_into_cc(self, box):
+ if self.assembler.guard_success_cc == rx86.cond_none:
+ self.assembler.test_location(self.loc(box))
+ self.assembler.guard_success_cc = rx86.Conditions['NZ']
- consider_guard_true = _consider_guard
- consider_guard_false = _consider_guard
- consider_guard_nonnull = _consider_guard
- consider_guard_isnull = _consider_guard
+ def _consider_guard_cc(self, op):
+ self.load_condition_into_cc(op.getarg(0))
+ self.perform_guard(op, [], None)
+
+ consider_guard_true = _consider_guard_cc
+ consider_guard_false = _consider_guard_cc
+ consider_guard_nonnull = _consider_guard_cc
+ consider_guard_isnull = _consider_guard_cc
def consider_finish(self, op):
# the frame is in ebp, but we have to point where in the frame is
@@ -415,6 +418,7 @@
consider_guard_no_overflow = consider_guard_no_exception
consider_guard_overflow = consider_guard_no_exception
+ consider_guard_not_forced = consider_guard_no_exception
def consider_guard_value(self, op):
x = self.make_sure_var_in_reg(op.getarg(0))
@@ -482,17 +486,9 @@
consider_int_or = _consider_binop_symm
consider_int_xor = _consider_binop_symm
- def _consider_binop_with_guard(self, op, guard_op):
- loc, argloc = self._consider_binop_part(op)
- self.perform_with_guard(op, guard_op, [loc, argloc], loc)
-
- def _consider_binop_with_guard_symm(self, op, guard_op):
- loc, argloc = self._consider_binop_part(op, symm=True)
- self.perform_with_guard(op, guard_op, [loc, argloc], loc)
-
- consider_int_mul_ovf = _consider_binop_with_guard_symm
- consider_int_sub_ovf = _consider_binop_with_guard
- consider_int_add_ovf = _consider_binop_with_guard_symm
+ consider_int_mul_ovf = _consider_binop_symm
+ consider_int_sub_ovf = _consider_binop
+ consider_int_add_ovf = _consider_binop_symm
def consider_int_neg(self, op):
res = self.rm.force_result_in_reg(op.result, op.getarg(0))
@@ -541,7 +537,7 @@
consider_uint_floordiv = consider_int_floordiv
- def _consider_compop(self, op, guard_op):
+ def _consider_compop(self, op):
vx = op.getarg(0)
vy = op.getarg(1)
arglocs = [self.loc(vx), self.loc(vy)]
@@ -551,12 +547,9 @@
pass
else:
arglocs[0] = self.rm.make_sure_var_in_reg(vx)
- if guard_op is None:
- loc = self.rm.force_allocate_reg(op.result, args,
- need_lower_byte=True)
- self.perform(op, arglocs, loc)
- else:
- self.perform_with_guard(op, guard_op, arglocs, None)
+ self.possibly_free_vars(args)
+ loc = self.force_allocate_reg_or_cc(op.result)
+ self.perform(op, arglocs, loc)
consider_int_lt = _consider_compop
consider_int_gt = _consider_compop
@@ -582,7 +575,7 @@
consider_float_mul = _consider_float_op # xxx could be _symm
consider_float_truediv = _consider_float_op
- def _consider_float_cmp(self, op, guard_op):
+ def _consider_float_cmp(self, op):
vx = op.getarg(0)
vy = op.getarg(1)
arglocs = [self.loc(vx), self.loc(vy)]
@@ -592,11 +585,9 @@
arglocs[1] = self.xrm.make_sure_var_in_reg(vy)
else:
arglocs[0] = self.xrm.make_sure_var_in_reg(vx)
- if guard_op is None:
- res = self.rm.force_allocate_reg(op.result, need_lower_byte=True)
- self.perform(op, arglocs, res)
- else:
- self.perform_with_guard(op, guard_op, arglocs, None)
+ self.possibly_free_vars(op.getarglist())
+ loc = self.force_allocate_reg_or_cc(op.result)
+ self.perform(op, arglocs, loc)
consider_float_lt = _consider_float_cmp
consider_float_le = _consider_float_cmp
@@ -737,7 +728,7 @@
else:
self._consider_call(op)
- def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None):
+ def _call(self, op, arglocs, force_store=[], guard_not_forced=False):
# we need to save registers on the stack:
#
# - at least the non-callee-saved registers
@@ -750,7 +741,7 @@
# grab_frame_values() would not be able to locate values in
# callee-saved registers.
#
- save_all_regs = guard_not_forced_op is not None
+ save_all_regs = guard_not_forced
self.xrm.before_call(force_store, save_all_regs=save_all_regs)
if not save_all_regs:
gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
@@ -768,12 +759,9 @@
resloc = self.rm.after_call(op.result)
else:
resloc = None
- if guard_not_forced_op is not None:
- self.perform_with_guard(op, guard_not_forced_op, arglocs, resloc)
- else:
- self.perform(op, arglocs, resloc)
+ self.perform(op, arglocs, resloc)
- def _consider_call(self, op, guard_not_forced_op=None, first_arg_index=1):
More information about the pypy-commit
mailing list