[pypy-commit] pypy default: Be slightly more vigilant when it comes to allocating registers for variables

Fri Dec 2 09:34:51 CET 2011

Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: 
Changeset: r50055:4b2e7974fd92
Date: 2011-12-02 10:34 +0200
http://bitbucket.org/pypy/pypy/changeset/4b2e7974fd92/

Log:	Be slightly more vigilant when it comes to allocating registers for
	variables at the beginning of the loop. Not *much* of an
	improvement, but should help for tight loops

diff --git a/pypy/jit/backend/llsupport/test/test_regalloc.py b/pypy/jit/backend/llsupport/test/test_regalloc.py
--- a/pypy/jit/backend/llsupport/test/test_regalloc.py
+++ b/pypy/jit/backend/llsupport/test/test_regalloc.py
@@ -2,6 +2,8 @@
 from pypy.jit.metainterp.history import BoxInt, ConstInt, BoxFloat, INT, FLOAT
 from pypy.jit.backend.llsupport.regalloc import FrameManager
 from pypy.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan
+from pypy.jit.tool.oparser import parse
+from pypy.jit.backend.detect_cpu import getcpuclass
 
 def newboxes(*values):
     return [BoxInt(v) for v in values]
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -167,26 +167,22 @@
         operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
                                                        allgcrefs)
         # compute longevity of variables
-        longevity = self._compute_vars_longevity(inputargs, operations)
+        longevity, useful = self._compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
         self.rm = gpr_reg_mgr_cls(longevity,
                                   frame_manager = self.fm,
                                   assembler = self.assembler)
         self.xrm = xmm_reg_mgr_cls(longevity, frame_manager = self.fm,
                                    assembler = self.assembler)
-        return operations
+        return operations, useful
 
     def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
-        operations = self._prepare(inputargs, operations, allgcrefs)
-        jump = operations[-1]
-        loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
-        self.loop_consts = loop_consts
-        return self._process_inputargs(inputargs), operations
+        operations, useful = self._prepare(inputargs, operations, allgcrefs)
+        return self._process_inputargs(inputargs, useful), operations
 
     def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
                        allgcrefs):
-        operations = self._prepare(inputargs, operations, allgcrefs)
-        self.loop_consts = {}
+        operations, _ = self._prepare(inputargs, operations, allgcrefs)
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
         self.param_depth = prev_depths[1]
@@ -195,7 +191,7 @@
     def reserve_param(self, n):
         self.param_depth = max(self.param_depth, n)
 
-    def _process_inputargs(self, inputargs):
+    def _process_inputargs(self, inputargs, useful):
         # XXX we can sort out here by longevity if we need something
         # more optimal
         floatlocs = [None] * len(inputargs)
@@ -211,7 +207,7 @@
             arg = inputargs[i]
             assert not isinstance(arg, Const)
             reg = None
-            if arg not in self.loop_consts and self.longevity[arg][1] > -1:
+            if self.longevity[arg][1] > -1 and arg in useful:
                 if arg.type == FLOAT:
                     # xxx is it really a good idea?  at the first CALL they
                     # will all be flushed anyway
@@ -287,15 +283,15 @@
         else:
             return self.xrm.make_sure_var_in_reg(var, forbidden_vars)
 
-    def _compute_loop_consts(self, inputargs, jump, looptoken):
-        if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
-            loop_consts = {}
-        else:
-            loop_consts = {}
-            for i in range(len(inputargs)):
-                if inputargs[i] is jump.getarg(i):
-                    loop_consts[inputargs[i]] = i
-        return loop_consts
+    #def _compute_loop_consts(self, inputargs, jump, looptoken):
+    #    if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
+    #        loop_consts = {}
+    #    else:
+    #        loop_consts = {}
+    #        for i in range(len(inputargs)):
+    #            if inputargs[i] is jump.getarg(i):
+    #                loop_consts[inputargs[i]] = i
+    #    return loop_consts
 
     def _update_bindings(self, locs, inputargs):
         # XXX this should probably go to llsupport/regalloc.py
@@ -450,8 +446,14 @@
     def _compute_vars_longevity(self, inputargs, operations):
         # compute a dictionary that maps variables to index in
         # operations that is a "last-time-seen"
+
+        # returns a pair longevity/useful. Non-useful variables are ones that
+        # never appear in the assembler or it does not matter if they appear on
+        # stack or in registers. Main example is loop arguments that go
+        # only to guard operations or to jump or to finish
         produced = {}
         last_used = {}
+        useful = {}
         for i in range(len(operations)-1, -1, -1):
             op = operations[i]
             if op.result:
@@ -459,8 +461,11 @@
                     continue
                 assert op.result not in produced
                 produced[op.result] = i
+            opnum = op.getopnum()
             for j in range(op.numargs()):
                 arg = op.getarg(j)
+                if opnum != rop.JUMP and opnum != rop.FINISH:
+                    useful[arg] = None
                 if isinstance(arg, Box) and arg not in last_used:
                     last_used[arg] = i
             if op.is_guard():
@@ -486,7 +491,7 @@
                 longevity[arg] = (0, last_used[arg])
                 del last_used[arg]
         assert len(last_used) == 0
-        return longevity
+        return longevity, useful
 
     def loc(self, v):
         if v is None: # xxx kludgy
diff --git a/pypy/jit/backend/x86/test/test_regalloc.py b/pypy/jit/backend/x86/test/test_regalloc.py
--- a/pypy/jit/backend/x86/test/test_regalloc.py
+++ b/pypy/jit/backend/x86/test/test_regalloc.py
@@ -149,6 +149,13 @@
             self.cpu.execute_token(loop.token)
         return loop
 
+    def prepare_loop(self, ops):
+        loop = self.parse(ops)
+        regalloc = RegAlloc(self.cpu.assembler, False)
+        regalloc.prepare_loop(loop.inputargs, loop.operations,
+                              loop.token, [])
+        return regalloc
+
     def getint(self, index):
         return self.cpu.get_latest_value_int(index)
 
@@ -422,6 +429,35 @@
         self.run(loop)
         assert self.getints(9) == range(9)
 
+    def test_loopargs(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        jump(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 2
+
+    def test_loopargs_2(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        finish(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 2
+
+    def test_loopargs_3(self):
+        ops = """
+        [i0, i1, i2, i3]
+        i4 = int_add(i0, i1)
+        guard_true(i4) [i0, i1, i2, i3, i4]
+        jump(i4, i1, i2, i3)
+        """
+        regalloc = self.prepare_loop(ops)
+        assert len(regalloc.rm.reg_bindings) == 2
+    
+
 class TestRegallocCompOps(BaseTestRegalloc):
     
     def test_cmp_op_0(self):