[pypy-commit] pypy s390x-backend: adding resoperations to regalloc/assembler (label, int_(lt, eq, ...), guards)

Wed Oct 28 07:02:25 EDT 2015

Author: Richard Plangger <planrichi at gmail.com>
Branch: s390x-backend
Changeset: r80471:3a2cb683d03e
Date: 2015-10-28 10:11 +0100
http://bitbucket.org/pypy/pypy/changeset/3a2cb683d03e/

Log:	adding resoperations to regalloc/assembler (label,int_(lt,eq,...),
	guards)

diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py
--- a/rpython/jit/backend/zarch/assembler.py
+++ b/rpython/jit/backend/zarch/assembler.py
@@ -11,7 +11,7 @@
         STD_FRAME_SIZE_IN_BYTES, GPR_STACK_SAVE_IN_BYTES,
         THREADLOCAL_ADDR_OFFSET)
 from rpython.jit.backend.zarch.opassembler import (IntOpAssembler,
-    FloatOpAssembler)
+    FloatOpAssembler, GuardOpAssembler)
 from rpython.jit.backend.zarch.regalloc import Regalloc
 from rpython.jit.metainterp.resoperation import rop
 from rpython.rlib.debug import (debug_print, debug_start, debug_stop,
@@ -105,7 +105,8 @@
         self.places = []
 
 class AssemblerZARCH(BaseAssembler,
-        IntOpAssembler, FloatOpAssembler):
+        IntOpAssembler, FloatOpAssembler,
+        GuardOpAssembler):
 
     def __init__(self, cpu, translate_support_code=False):
         BaseAssembler.__init__(self, cpu, translate_support_code)
@@ -145,6 +146,9 @@
         self.mc = None
         self.pending_guards = None
 
+    def target_arglocs(self, looptoken):
+        return looptoken._zarch_arglocs
+
     def get_asmmemmgr_blocks(self, looptoken):
         clt = looptoken.compiled_loop_token
         if clt.asmmemmgr_blocks is None:
@@ -333,7 +337,7 @@
         if prev_loc.is_imm():
             value = prev_loc.getint()
             # move immediate value to register
-            if loc.is_core_reg():
+            if loc.is_reg():
                 self.mc.load_imm(loc, value)
                 return
             # move immediate value to memory
@@ -347,7 +351,7 @@
         elif prev_loc.is_stack():
             offset = prev_loc.value
             # move from memory to register
-            if loc.is_core_reg():
+            if loc.is_reg():
                 self.mc.load(loc, r.SPP, offset)
                 return
             # move in memory
@@ -363,17 +367,15 @@
                 self.mc.LDY(loc, l.addr(offset, r.SPP))
                 return
             assert 0, "not supported location"
-        elif prev_loc.is_core_reg():
-            reg = prev_loc.value
+        elif prev_loc.is_reg():
             # move to another register
-            if loc.is_core_reg():
-                other_reg = loc.value
-                self.mc.mr(other_reg, reg)
+            if loc.is_reg():
+                self.mc.LGR(loc, prev_loc)
                 return
             # move to memory
             elif loc.is_stack():
                 offset = loc.value
-                self.mc.store(reg, r.SPP, offset)
+                self.mc.store(prev_loc, r.SPP, offset)
                 return
             assert 0, "not supported location"
         elif prev_loc.is_imm_float():
@@ -517,6 +519,9 @@
     def emit_increment_debug_counter(self, op, arglocs, regalloc):
         pass # TODO
 
+    def emit_label(self, op, arglocs, regalloc):
+        pass
+
     def emit_finish(self, op, arglocs, regalloc):
         base_ofs = self.cpu.get_baseofs_of_frame_field()
         if len(arglocs) > 1:
diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py
--- a/rpython/jit/backend/zarch/codebuilder.py
+++ b/rpython/jit/backend/zarch/codebuilder.py
@@ -1,8 +1,9 @@
-from rpython.jit.backend.zarch import conditions as cond
-from rpython.jit.backend.zarch import registers as reg
-from rpython.jit.backend.zarch import locations as loc
+from rpython.jit.backend.zarch import conditions as c
+from rpython.jit.backend.zarch import registers as r
+from rpython.jit.backend.zarch import locations as l
 from rpython.jit.backend.zarch.instruction_builder import build_instr_codes
 from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
+from rpython.jit.backend.llsupport.assembler import GuardToken
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.unroll import unrolling_iterable
 from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
@@ -19,15 +20,19 @@
 def binary_helper_call(name):
     function = getattr(support, 'arm_%s' % name)
 
-    def f(self, c=cond.AL):
+    def f(self, c=c.AL):
         """Generates a call to a helper function, takes its
         arguments in r0 and r1, result is placed in r0"""
         addr = rffi.cast(lltype.Signed, function)
         self.BL(addr, c)
     return f
 
-class Operand(object):
-    pass
+class ZARCHGuardToken(GuardToken):
+    def __init__(self, cpu, gcmap, descr, failargs, faillocs,
+                 guard_opnum, frame_depth, fcond=c.cond_none):
+        GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs,
+                            guard_opnum, frame_depth)
+        self.fcond = fcond
 
 class AbstractZARCHBuilder(object):
     def write_i32(self, word):
@@ -85,11 +90,32 @@
         self._dump(addr, "jit-backend-dump", "s390x")
 
     def load(self, treg, sreg, offset):
-        self.LG(treg, loc.addr(offset, sreg))
+        self.LG(treg, l.addr(offset, sreg))
 
     def currpos(self):
         return self.get_relative_pos()
 
+    def cmp_op(self, a, b, pool=False, signed=True, fp=False):
+        if fp == True:
+            xxx
+            self.fcmpu(a, b)
+        else:
+            if signed:
+                if pool:
+                    # 64 bit immediate signed
+                    self.CLG(a, b)
+                else:
+                    # 64 bit signed
+                    self.CLGR(a, b)
+            else:
+                if pool:
+                    # 64 bit immediate unsigned
+                    self.CG(a, b)
+                else:
+                    # 64 bit unsigned
+                    self.CGR(a, b)
+
+
 _classes = (AbstractZARCHBuilder,)
 
 # Used to build the MachineCodeBlockWrapper
diff --git a/rpython/jit/backend/zarch/conditions.py b/rpython/jit/backend/zarch/conditions.py
--- a/rpython/jit/backend/zarch/conditions.py
+++ b/rpython/jit/backend/zarch/conditions.py
@@ -6,6 +6,10 @@
 GT = loc.imm(0x2)
 LE = loc.imm(EQ.value | LT.value)
 GE = loc.imm(EQ.value | GT.value)
+NE = loc.imm(LT.value | GT.value)
 OVERFLOW = loc.imm(0x1)
 
 cond_none = loc.imm(0x0)
+
+def negate(cond):
+    return cond
diff --git a/rpython/jit/backend/zarch/helper/assembler.py b/rpython/jit/backend/zarch/helper/assembler.py
--- a/rpython/jit/backend/zarch/helper/assembler.py
+++ b/rpython/jit/backend/zarch/helper/assembler.py
@@ -0,0 +1,69 @@
+import rpython.jit.backend.zarch.conditions as c
+import rpython.jit.backend.zarch.registers as r
+from rpython.rlib.rarithmetic import intmask
+from rpython.jit.backend.zarch.arch import WORD
+from rpython.jit.metainterp.history import FLOAT
+from rpython.jit.metainterp.resoperation import rop
+from rpython.rtyper.lltypesystem import rffi, lltype
+
+def flush_cc(asm, condition, result_loc):
+    # After emitting an instruction that leaves a boolean result in
+    # a condition code (cc), call this.  In the common case, result_loc
+    # will be set to SPP by the regalloc, which in this case means
+    # "propagate it between this operation and the next guard by keeping
+    # it in the cc".  In the uncommon case, result_loc is another
+    # register, and we emit a load from the cc into this register.
+    assert asm.guard_success_cc == c.cond_none
+    if result_loc is r.SPP:
+        asm.guard_success_cc = condition
+    else:
+        # Possibly invert the bit in the CR
+        bit, invert = c.encoding[condition]
+        assert 0 <= bit <= 3
+        if invert == 12:
+            pass
+        elif invert == 4:
+            asm.mc.crnor(bit, bit, bit)
+        else:
+            assert 0
+
+        resval = result_loc.value
+        # move the content of the CR to resval
+        asm.mc.mfcr(resval)
+        # zero out everything except of the result
+        asm.mc.rlwinm(resval, resval, 1 + bit, 31, 31)
+
+
+def do_emit_cmp_op(self, arglocs, condition, signed, fp):
+    l0 = arglocs[0]
+    l1 = arglocs[1]
+    assert not l0.is_imm()
+    # do the comparison
+    self.mc.cmp_op(l0, l1, pool=l1.is_in_pool(), signed=signed, fp=fp)
+
+    # CR bits:
+    #     0: LT
+    #     1: GT
+    #     2: EQ
+    #     3: UNordered
+
+    if fp:
+        # Support for NaNs: with LE or GE, if one of the operands is a
+        # NaN, we get CR=1,0,0,0 (unordered bit only).  We're about to
+        # check "not GT" or "not LT", but in case of NaN we want to
+        # get the answer False.
+        #if condition == c.LE:
+        #    self.mc.crnor(1, 1, 3)
+        #    condition = c.GT
+        #elif condition == c.GE:
+        #    self.mc.crnor(0, 0, 3)
+        #    condition = c.LT
+        pass
+
+    flush_cc(self, condition, r.SPP)
+
+
+def gen_emit_cmp_op(condition, signed=True, fp=False):
+    def f(self, op, arglocs, regalloc):
+        do_emit_cmp_op(self, arglocs, condition, signed, fp)
+    return f
diff --git a/rpython/jit/backend/zarch/helper/regalloc.py b/rpython/jit/backend/zarch/helper/regalloc.py
--- a/rpython/jit/backend/zarch/helper/regalloc.py
+++ b/rpython/jit/backend/zarch/helper/regalloc.py
@@ -7,7 +7,7 @@
         return lower_bound <= i <= upper_bound
     return False
 
-def _prepare_int_binary_arith(self, op):
+def prepare_int_add_or_mul(self, op):
     a0 = op.getarg(0)
     a1 = op.getarg(1)
     if check_imm(a0):
@@ -21,7 +21,32 @@
     self.force_result_in_reg(op, a0)
     return [l0, l1]
 
-def _prepare_float_binary_arith(self, op):
+def prepare_int_sub(self, op):
+    a0 = op.getarg(0)
+    a1 = op.getarg(1)
+    if isinstance(a0, ConstInt):
+        a0, a1 = a1, a0
+    l0 = self.ensure_reg(a0)
+    l1 = self.ensure_reg(a1)
+    self.free_op_vars()
+    self.force_result_in_reg(op, a0)
+    return [l0, l1]
+
+def prepare_cmp_op(self, op):
+    a0 = op.getarg(0)
+    a1 = op.getarg(1)
+    if check_imm(a0):
+        a0, a1 = a1, a0
+    l0 = self.ensure_reg(a0)
+    if check_imm(a1):
+        l1 = imm(a1.getint())
+    else:
+        l1 = self.ensure_reg(a1)
+    self.free_op_vars()
+    self.force_result_in_reg(op, a0)
+    return [l0, l1]
+
+def prepare_binary_op(self, op):
     a0 = op.getarg(0)
     a1 = op.getarg(1)
     l0 = self.ensure_reg(a0)
diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py
--- a/rpython/jit/backend/zarch/instructions.py
+++ b/rpython/jit/backend/zarch/instructions.py
@@ -21,6 +21,13 @@
     'AGF':     ('rxy',   ['\xE3','\x18']),
     'AHI':     ('ri',    ['\xA7','\x0A']),
     'AGHI':    ('ri',    ['\xA7','\x0B']),
+
+
+    # comparision
+    'CGR':     ('rre',    ['\xB9','\x20']),
+    'CG':      ('rxy',    ['\xE3','\x20']),
+    'CLGR':    ('rre',    ['\xB9','\x21']),
+    'CLG':     ('rxy',    ['\xE3','\x20']),
 }
 
 logic_mnemonic_codes = {
diff --git a/rpython/jit/backend/zarch/locations.py b/rpython/jit/backend/zarch/locations.py
--- a/rpython/jit/backend/zarch/locations.py
+++ b/rpython/jit/backend/zarch/locations.py
@@ -14,7 +14,7 @@
     def is_raw_sp(self):
         return False
 
-    def is_core_reg(self):
+    def is_reg(self):
         return False
 
     def is_fp_reg(self):
@@ -45,7 +45,7 @@
     def __repr__(self):
         return 'r%d' % self.value
 
-    def is_core_reg(self):
+    def is_reg(self):
         return True
 
     def as_key(self):       # 0 <= as_key <= 15
@@ -60,7 +60,7 @@
     def __repr__(self):
         return 'f%d' % self.value
 
-    def is_core_reg(self):
+    def is_reg(self):
         return False
 
     def is_fp_reg(self):
diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -1,15 +1,28 @@
+from rpython.jit.backend.zarch.helper.assembler import gen_emit_cmp_op
+from rpython.jit.backend.zarch.codebuilder import ZARCHGuardToken
+import rpython.jit.backend.zarch.conditions as c
+import rpython.jit.backend.zarch.registers as r
+from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 
 class IntOpAssembler(object):
     _mixin_ = True
 
     def emit_int_add(self, op, arglocs, regalloc):
         l0, l1 = arglocs
-        assert not l0.is_imm()
         if l1.is_imm():
             self.mc.AGHI(l0, l1)
+        elif l1.is_in_pool():
+            self.mc.AG(l0, l1)
         else:
             self.mc.AGR(l0, l1)
 
+    emit_int_le = gen_emit_cmp_op(c.LE)
+    emit_int_lt = gen_emit_cmp_op(c.LT)
+    emit_int_gt = gen_emit_cmp_op(c.GT)
+    emit_int_ge = gen_emit_cmp_op(c.GE)
+    emit_int_eq = gen_emit_cmp_op(c.EQ)
+    emit_int_ne = gen_emit_cmp_op(c.NE)
+
 class FloatOpAssembler(object):
     _mixin_ = True
 
@@ -40,3 +53,192 @@
             self.mc.DDB(l0, l1)
         else:
             self.mc.DDBR(l0, l1)
+
+class GuardOpAssembler(object):
+    _mixin_ = True
+
+    def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False):
+        if is_guard_not_invalidated:
+            fcond = c.cond_none
+        else:
+            fcond = self.guard_success_cc
+            self.guard_success_cc = c.cond_none
+            assert fcond != c.cond_none
+            fcond = c.negate(fcond)
+        token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], fcond)
+        token.pos_jump_offset = self.mc.currpos()
+        assert token.guard_not_invalidated() == is_guard_not_invalidated
+        if not is_guard_not_invalidated:
+            self.mc.trap()     # has to be patched later on
+        self.pending_guard_tokens.append(token)
+
+    def build_guard_token(self, op, frame_depth, arglocs, fcond):
+        descr = op.getdescr()
+        gcmap = allocate_gcmap(self, frame_depth, r.JITFRAME_FIXED_SIZE)
+        token = ZARCHGuardToken(self.cpu, gcmap, descr, op.getfailargs(),
+                              arglocs, op.getopnum(), frame_depth,
+                              fcond)
+        return token
+
+    def emit_guard_true(self, op, arglocs, regalloc):
+        self._emit_guard(op, arglocs)
+
+    def emit_guard_false(self, op, arglocs, regalloc):
+        self.guard_success_cc = c.negate(self.guard_success_cc)
+        self._emit_guard(op, arglocs)
+
+    def emit_guard_overflow(self, op, arglocs, regalloc):
+        self.guard_success_cc = c.SO
+        self._emit_guard(op, arglocs)
+
+    def emit_guard_no_overflow(self, op, arglocs, regalloc):
+        self.guard_success_cc = c.NS
+        self._emit_guard(op, arglocs)
+
+    def emit_guard_value(self, op, arglocs, regalloc):
+        l0 = arglocs[0]
+        l1 = arglocs[1]
+        failargs = arglocs[2:]
+
+        if l0.is_reg():
+            if l1.is_imm():
+                self.mc.cmp_op(0, l0.value, l1.getint(), imm=True)
+            else:
+                self.mc.cmp_op(0, l0.value, l1.value)
+        elif l0.is_fp_reg():
+            assert l1.is_fp_reg()
+            self.mc.cmp_op(0, l0.value, l1.value, fp=True)
+        self.guard_success_cc = c.EQ
+        self._emit_guard(op, failargs)
+
+    emit_guard_nonnull = emit_guard_true
+    emit_guard_isnull = emit_guard_false
+
+    def emit_guard_class(self, op, arglocs, regalloc):
+        self._cmp_guard_class(op, arglocs, regalloc)
+        self.guard_success_cc = c.EQ
+        self._emit_guard(op, arglocs[2:])
+
+    def emit_guard_nonnull_class(self, op, arglocs, regalloc):
+        self.mc.cmp_op(0, arglocs[0].value, 1, imm=True, signed=False)
+        patch_pos = self.mc.currpos()
+        self.mc.trap()
+        self._cmp_guard_class(op, arglocs, regalloc)
+        pmc = OverwritingBuilder(self.mc, patch_pos, 1)
+        pmc.blt(self.mc.currpos() - patch_pos)
+        pmc.overwrite()
+        self.guard_success_cc = c.EQ
+        self._emit_guard(op, arglocs[2:])
+
+    def _cmp_guard_class(self, op, locs, regalloc):
+        offset = self.cpu.vtable_offset
+        if offset is not None:
+            # could be one instruction shorter, but don't care because
+            # it's not this case that is commonly translated
+            self.mc.load(r.SCRATCH.value, locs[0].value, offset)
+            self.mc.load_imm(r.SCRATCH2, locs[1].value)
+            self.mc.cmp_op(0, r.SCRATCH.value, r.SCRATCH2.value)
+        else:
+            expected_typeid = (self.cpu.gc_ll_descr
+                    .get_typeid_from_classptr_if_gcremovetypeptr(locs[1].value))
+            self._cmp_guard_gc_type(locs[0], expected_typeid)
+
+    def _read_typeid(self, targetreg, loc_ptr):
+        # Note that the typeid half-word is at offset 0 on a little-endian
+        # machine; it is at offset 2 or 4 on a big-endian machine.
+        assert self.cpu.supports_guard_gc_type
+        if IS_PPC_32:
+            self.mc.lhz(targetreg.value, loc_ptr.value, 2 * IS_BIG_ENDIAN)
+        else:
+            self.mc.lwz(targetreg.value, loc_ptr.value, 4 * IS_BIG_ENDIAN)
+
+    def _cmp_guard_gc_type(self, loc_ptr, expected_typeid):
+        self._read_typeid(r.SCRATCH2, loc_ptr)
+        assert 0 <= expected_typeid <= 0x7fffffff   # 4 bytes are always enough
+        if expected_typeid > 0xffff:     # if 2 bytes are not enough
+            self.mc.subis(r.SCRATCH2.value, r.SCRATCH2.value,
+                          expected_typeid >> 16)
+            expected_typeid = expected_typeid & 0xffff
+        self.mc.cmp_op(0, r.SCRATCH2.value, expected_typeid,
+                       imm=True, signed=False)
+
+    def emit_guard_gc_type(self, op, arglocs, regalloc):
+        self._cmp_guard_gc_type(arglocs[0], arglocs[1].value)
+        self.guard_success_cc = c.EQ
+        self._emit_guard(op, arglocs[2:])
+
+    def emit_guard_is_object(self, op, arglocs, regalloc):
+        assert self.cpu.supports_guard_gc_type
+        loc_object = arglocs[0]
+        # idea: read the typeid, fetch one byte of the field 'infobits' from
+        # the big typeinfo table, and check the flag 'T_IS_RPYTHON_INSTANCE'.
+        base_type_info, shift_by, sizeof_ti = (
+            self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
+        infobits_offset, IS_OBJECT_FLAG = (
+            self.cpu.gc_ll_descr.get_translated_info_for_guard_is_object())
+
+        self._read_typeid(r.SCRATCH2, loc_object)
+        self.mc.load_imm(r.SCRATCH, base_type_info + infobits_offset)
+        assert shift_by == 0     # on PPC64; fixme for PPC32
+        self.mc.lbzx(r.SCRATCH2.value, r.SCRATCH2.value, r.SCRATCH.value)
+        self.mc.andix(r.SCRATCH2.value, r.SCRATCH2.value, IS_OBJECT_FLAG & 0xff)
+        self.guard_success_cc = c.NE
+        self._emit_guard(op, arglocs[1:])
+
+    def emit_guard_subclass(self, op, arglocs, regalloc):
+        assert self.cpu.supports_guard_gc_type
+        loc_object = arglocs[0]
+        loc_check_against_class = arglocs[1]
+        offset = self.cpu.vtable_offset
+        offset2 = self.cpu.subclassrange_min_offset
+        if offset is not None:
+            # read this field to get the vtable pointer
+            self.mc.load(r.SCRATCH2.value, loc_object.value, offset)
+            # read the vtable's subclassrange_min field
+            assert _check_imm_arg(offset2)
+            self.mc.ld(r.SCRATCH2.value, r.SCRATCH2.value, offset2)
+        else:
+            # read the typeid
+            self._read_typeid(r.SCRATCH, loc_object)
+            # read the vtable's subclassrange_min field, as a single
+            # step with the correct offset
+            base_type_info, shift_by, sizeof_ti = (
+                self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
+            self.mc.load_imm(r.SCRATCH2, base_type_info + sizeof_ti + offset2)
+            assert shift_by == 0     # on PPC64; fixme for PPC32
+            self.mc.ldx(r.SCRATCH2.value, r.SCRATCH2.value, r.SCRATCH.value)
+        # get the two bounds to check against
+        vtable_ptr = loc_check_against_class.getint()
+        vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr)
+        check_min = vtable_ptr.subclassrange_min
+        check_max = vtable_ptr.subclassrange_max
+        assert check_max > check_min
+        check_diff = check_max - check_min - 1
+        # right now, a full PyPy uses less than 6000 numbers,
+        # so we'll assert here that it always fit inside 15 bits
+        assert 0 <= check_min <= 0x7fff
+        assert 0 <= check_diff <= 0xffff
+        # check by doing the unsigned comparison (tmp - min) < (max - min)
+        self.mc.subi(r.SCRATCH2.value, r.SCRATCH2.value, check_min)
+        self.mc.cmp_op(0, r.SCRATCH2.value, check_diff, imm=True, signed=False)
+        # the guard passes if we get a result of "below or equal"
+        self.guard_success_cc = c.LE
+        self._emit_guard(op, arglocs[2:])
+
+    def emit_guard_not_invalidated(self, op, arglocs, regalloc):
+        self._emit_guard(op, arglocs, is_guard_not_invalidated=True)
+
+    def emit_guard_not_forced(self, op, arglocs, regalloc):
+        ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
+        self.mc.ld(r.SCRATCH.value, r.SPP.value, ofs)
+        self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
+        self.guard_success_cc = c.EQ
+        self._emit_guard(op, arglocs)
+
+    def emit_guard_not_forced_2(self, op, arglocs, regalloc):
+        guard_token = self.build_guard_token(op, arglocs[0].value, arglocs[1:],
+                                             c.cond_none)
+        self._finish_gcmap = guard_token.gcmap
+        self._store_force_index(op)
+        self.store_info_on_descr(0, guard_token)
+
diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -8,7 +8,7 @@
                                             INT, REF, FLOAT, VOID)
 from rpython.jit.metainterp.history import JitCellToken, TargetToken
 from rpython.jit.metainterp.resoperation import rop
-from rpython.jit.backend.zarch import locations
+from rpython.jit.backend.zarch import locations as l
 from rpython.rtyper.lltypesystem import rffi, lltype, rstr, llmemory
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
@@ -16,7 +16,7 @@
 from rpython.jit.backend.llsupport.descr import ArrayDescr
 import rpython.jit.backend.zarch.registers as r
 import rpython.jit.backend.zarch.conditions as c
-import rpython.jit.backend.zarch.helper.regalloc as regallochelp
+import rpython.jit.backend.zarch.helper.regalloc as helper
 from rpython.jit.backend.llsupport.descr import unpack_arraydescr
 from rpython.jit.backend.llsupport.descr import unpack_fielddescr
 from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr
@@ -64,7 +64,7 @@
 
     def convert_to_imm(self, c):
         adr = self.convert_to_adr(c)
-        return locations.ConstFloatLoc(adr)
+        return l.ConstFloatLoc(adr)
 
     def __init__(self, longevity, frame_manager=None, assembler=None):
         RegisterManager.__init__(self, longevity, frame_manager, assembler)
@@ -74,7 +74,7 @@
 
     def place_in_pool(self, var):
         offset = self.assembler.pool.place(var)
-        return locations.pool(offset, r.POOL)
+        return l.pool(offset, r.POOL)
 
     def ensure_reg(self, box):
         if isinstance(box, Const):
@@ -116,7 +116,7 @@
 
     def convert_to_imm(self, c):
         val = self.convert_to_int(c)
-        return locations.ImmLocation(val)
+        return l.ImmLocation(val)
 
     def ensure_reg(self, box):
         if isinstance(box, Const):
@@ -143,8 +143,8 @@
         self.base_ofs = base_ofs
 
     def frame_pos(self, loc, box_type):
-        #return locations.StackLocation(loc, get_fp_offset(self.base_ofs, loc), box_type)
-        return locations.StackLocation(loc, get_fp_offset(self.base_ofs, loc), box_type)
+        #return l.StackLocation(loc, get_fp_offset(self.base_ofs, loc), box_type)
+        return l.StackLocation(loc, get_fp_offset(self.base_ofs, loc), box_type)
 
     @staticmethod
     def frame_size(type):
@@ -152,7 +152,7 @@
 
     @staticmethod
     def get_loc_index(loc):
-        assert isinstance(loc, locations.StackLocation)
+        assert isinstance(loc, l.StackLocation)
         return loc.position
 
 
@@ -350,7 +350,7 @@
                 gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
         for box, loc in self.fm.bindings.iteritems():
             if box.type == REF and self.rm.is_still_alive(box):
-                assert isinstance(loc, locations.StackLocation)
+                assert isinstance(loc, l.StackLocation)
                 val = loc.get_position() + r.JITFRAME_FIXED_SIZE
                 gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
         return gcmap
@@ -463,11 +463,103 @@
     def prepare_increment_debug_counter(self, op):
         pass # XXX
 
-    prepare_int_add = regallochelp._prepare_int_binary_arith
-    prepare_float_add = regallochelp._prepare_float_binary_arith
-    prepare_float_sub = regallochelp._prepare_float_binary_arith
-    prepare_float_mul = regallochelp._prepare_float_binary_arith
-    prepare_float_div = regallochelp._prepare_float_binary_arith
+    prepare_int_add = helper.prepare_int_add_or_mul
+    prepare_int_sub = helper.prepare_int_sub
+    prepare_int_mul = helper.prepare_int_add_or_mul
+
+    prepare_int_le = helper.prepare_cmp_op
+    prepare_int_lt = helper.prepare_cmp_op
+    prepare_int_ge = helper.prepare_cmp_op
+    prepare_int_gt = helper.prepare_cmp_op
+    prepare_int_eq = helper.prepare_cmp_op
+    prepare_int_ne = helper.prepare_cmp_op
+
+    prepare_float_add = helper.prepare_binary_op
+    prepare_float_sub = helper.prepare_binary_op
+    prepare_float_mul = helper.prepare_binary_op
+    prepare_float_truediv = helper.prepare_binary_op
+
+    def _prepare_guard(self, op, args=None):
+        if args is None:
+            args = []
+        args.append(imm(self.fm.get_frame_depth()))
+        for arg in op.getfailargs():
+            if arg:
+                args.append(self.loc(arg))
+            else:
+                args.append(None)
+        self.possibly_free_vars(op.getfailargs())
+        #
+        # generate_quick_failure() produces up to 14 instructions per guard
+        self.limit_loop_break -= 14 * 4
+        #
+        return args
+
+    def load_condition_into_cc(self, box):
+        if self.assembler.guard_success_cc == c.cond_none:
+            xxx
+            loc = self.ensure_reg(box)
+            mc = self.assembler.mc
+            mc.cmp_op(loc, l.imm(0), imm=True)
+            self.assembler.guard_success_cc = c.NE
+
+    def _prepare_guard_cc(self, op):
+        self.load_condition_into_cc(op.getarg(0))
+        return self._prepare_guard(op)
+
+    prepare_guard_true = _prepare_guard_cc
+    prepare_guard_false = _prepare_guard_cc
+    prepare_guard_nonnull = _prepare_guard_cc
+    prepare_guard_isnull = _prepare_guard_cc
+
+    def prepare_label(self, op):
+        descr = op.getdescr()
+        assert isinstance(descr, TargetToken)
+        inputargs = op.getarglist()
+        arglocs = [None] * len(inputargs)
+        #
+        # we use force_spill() on the boxes that are not going to be really
+        # used any more in the loop, but that are kept alive anyway
+        # by being in a next LABEL's or a JUMP's argument or fail_args
+        # of some guard
+        position = self.rm.position
+        for arg in inputargs:
+            assert not isinstance(arg, Const)
+            if self.last_real_usage.get(arg, -1) <= position:
+                self.force_spill_var(arg)
+        #
+        # we need to make sure that no variable is stored in spp (=r31)
+        for arg in inputargs:
+            assert self.loc(arg) is not r.SPP, (
+                "variable stored in spp in prepare_label")
+        self.rm.bindings_to_frame_reg.clear()
+        #
+        for i in range(len(inputargs)):
+            arg = inputargs[i]
+            assert not isinstance(arg, Const)
+            loc = self.loc(arg)
+            assert loc is not r.SPP
+            arglocs[i] = loc
+            if loc.is_reg():
+                self.fm.mark_as_free(arg)
+        #
+        # if we are too close to the start of the loop, the label's target may
+        # get overridden by redirect_call_assembler().  (rare case)
+        self.flush_loop()
+        #
+        descr._zarch_arglocs = arglocs
+        descr._ll_loop_code = self.assembler.mc.currpos()
+        descr._zarch_clt = self.assembler.current_clt
+        self.assembler.target_tokens_currently_compiling[descr] = None
+        self.possibly_free_vars_for_op(op)
+        #
+        # if the LABEL's descr is precisely the target of the JUMP at the
+        # end of the same loop, i.e. if what we are compiling is a single
+        # loop that ends up jumping to this LABEL, then we can now provide
+        # the hints about the expected position of the spilled variables.
+        jump_op = self.final_jump_op
+        if jump_op is not None and jump_op.getdescr() is descr:
+            self._compute_hint_frame_locations_from_descr(descr)
 
     def prepare_finish(self, op):
         descr = op.getdescr()