[pypy-commit] pypy vecopt: remembering the position of the guard exit to resume the regallocator at a guard exit

Thu Jun 18 11:41:42 CEST 2015

Author: Richard Plangger <rich at pasra.at>
Branch: vecopt
Changeset: r78169:6466422700f1
Date: 2015-06-18 11:41 +0200
http://bitbucket.org/pypy/pypy/changeset/6466422700f1/

Log:	remembering the position of the guard exit to resume the
	regallocator at a guard exit

diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -269,7 +269,7 @@
         #
         self._push_all_regs_to_jitframe(mc, [], self.cpu.supports_floats, callee_only)
         ## args are in their respective positions
-        mc.PUSH([r.ip.value, r.lr.value])
+        mlc.PUSH([r.ip.value, r.lr.value])
         mc.BLX(r.r4.value)
         self._reload_frame_if_necessary(mc)
         self._pop_all_regs_from_jitframe(mc, [], supports_floats,
@@ -930,6 +930,7 @@
         while regalloc.position() < len(operations) - 1:
             regalloc.next_instruction()
             i = regalloc.position()
+            self.position = i
             op = operations[i]
             self.mc.mark_op(op)
             opnum = op.getopnum()
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -37,10 +37,10 @@
 
 
 class ArmGuardToken(GuardToken):
-    def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
+    def __init__(self, cpu, pos, gcmap, faildescr, failargs, fail_locs,
                  offset, exc, frame_depth, is_guard_not_invalidated=False,
                  is_guard_not_forced=False, fcond=c.AL):
-        GuardToken.__init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
+        GuardToken.__init__(self, cpu, pos, gcmap, faildescr, failargs, fail_locs,
                             exc, frame_depth, is_guard_not_invalidated,
                             is_guard_not_forced)
         self.fcond = fcond
@@ -211,16 +211,16 @@
         assert isinstance(descr, AbstractFailDescr)
 
         gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
-        token = ArmGuardToken(self.cpu, gcmap,
-                                    descr,
-                                    failargs=op.getfailargs(),
-                                    fail_locs=arglocs,
-                                    offset=offset,
-                                    exc=save_exc,
-                                    frame_depth=frame_depth,
-                                    is_guard_not_invalidated=is_guard_not_invalidated,
-                                    is_guard_not_forced=is_guard_not_forced,
-                                    fcond=fcond)
+        token = ArmGuardToken(self.cpu, self.position, gcmap,
+                              descr,
+                              failargs=op.getfailargs(),
+                              fail_locs=arglocs,
+                              offset=offset,
+                              exc=save_exc,
+                              frame_depth=frame_depth,
+                              is_guard_not_invalidated=is_guard_not_invalidated,
+                              is_guard_not_forced=is_guard_not_forced,
+                              fcond=fcond)
         return token
 
     def _emit_guard(self, op, arglocs, fcond, save_exc,
diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -22,10 +22,11 @@
 )
 
 class GuardToken(object):
-    def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs, exc,
+    def __init__(self, cpu, pos, gcmap, faildescr, failargs, fail_locs, exc,
                  frame_depth, is_guard_not_invalidated, is_guard_not_forced):
         assert isinstance(faildescr, AbstractFailDescr)
         self.cpu = cpu
+        self.position = pos
         self.faildescr = faildescr
         self.failargs = failargs
         self.fail_locs = fail_locs
@@ -62,6 +63,7 @@
 
     def __init__(self, cpu, translate_support_code=False):
         self.cpu = cpu
+        self.position = 0
         self.memcpy_addr = 0
         self.memset_addr = 0
         self.rtyper = cpu.rtyper
@@ -127,6 +129,7 @@
         self.gcmap_for_finish[0] = r_uint(1)
 
     def setup(self, looptoken):
+        self.position = 0
         if self.cpu.HAS_CODEMAP:
             self.codemap_builder = CodemapBuilder()
         self._finish_gcmap = lltype.nullptr(jitframe.GCMAP)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -591,6 +591,7 @@
         # for each pending guard, generate the code of the recovery stub
         # at the end of self.mc.
         for tok in self.pending_guard_tokens:
+            regalloc.position = tok.position
             tok.pos_recovery_stub = self.generate_quick_failure(tok, regalloc)
         if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
             self.error_trampoline_64 = self.generate_propagate_error_64()
@@ -1794,7 +1795,7 @@
         is_guard_not_invalidated = guard_opnum == rop.GUARD_NOT_INVALIDATED
         is_guard_not_forced = guard_opnum == rop.GUARD_NOT_FORCED
         gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
-        return GuardToken(self.cpu, gcmap, faildescr, failargs,
+        return GuardToken(self.cpu, self.position, gcmap, faildescr, failargs,
                           fail_locs, exc, frame_depth,
                           is_guard_not_invalidated, is_guard_not_forced)
 
@@ -2483,14 +2484,18 @@
         for i,arg in enumerate(fail_args):
             if arg is None:
                 continue
+            assert arg.scalar_var is not None
             if isinstance(arg, BoxVectorAccum):
                 loc = fail_locs[i]
+                assert isinstance(loc, RegLoc)
+                assert loc.is_xmm
                 tgtloc = regalloc.force_allocate_reg(arg.scalar_var, fail_args)
+                assert tgtloc is not None
                 if arg.operator == '+':
                     # reduction using plus
                     self._accum_reduce_sum(arg, loc, tgtloc)
                     fail_locs[i] = tgtloc
-                    self._regalloc.possibly_free_var(arg)
+                    regalloc.possibly_free_var(arg)
                     fail_args[i] = arg.scalar_var
                 else:
                     raise NotImplementedError("accum operator %s not implemented" %
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -341,6 +341,7 @@
             op = operations[i]
             self.assembler.mc.mark_op(op)
             assert self.assembler.mc._frame_size == DEFAULT_FRAME_BYTES
+            self.assembler.position = i
             self.rm.position = i
             self.xrm.position = i
             if op.has_no_side_effect() and op.result not in self.longevity: