[pypy-commit] pypy default: Be slightly more vigilant when it comes to allocating registers for variables
fijal
noreply at buildbot.pypy.org
Fri Dec 2 09:34:51 CET 2011
Author: Maciej Fijalkowski <fijall at gmail.com>
Branch:
Changeset: r50055:4b2e7974fd92
Date: 2011-12-02 10:34 +0200
http://bitbucket.org/pypy/pypy/changeset/4b2e7974fd92/
Log: Be slightly more vigilant when it comes to allocating registers for
variables at the beginning of the loop. Not *much* of an
improvement, but should help for tight loops
diff --git a/pypy/jit/backend/llsupport/test/test_regalloc.py b/pypy/jit/backend/llsupport/test/test_regalloc.py
--- a/pypy/jit/backend/llsupport/test/test_regalloc.py
+++ b/pypy/jit/backend/llsupport/test/test_regalloc.py
@@ -2,6 +2,8 @@
from pypy.jit.metainterp.history import BoxInt, ConstInt, BoxFloat, INT, FLOAT
from pypy.jit.backend.llsupport.regalloc import FrameManager
from pypy.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan
+from pypy.jit.tool.oparser import parse
+from pypy.jit.backend.detect_cpu import getcpuclass
def newboxes(*values):
return [BoxInt(v) for v in values]
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -167,26 +167,22 @@
operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
allgcrefs)
# compute longevity of variables
- longevity = self._compute_vars_longevity(inputargs, operations)
+ longevity, useful = self._compute_vars_longevity(inputargs, operations)
self.longevity = longevity
self.rm = gpr_reg_mgr_cls(longevity,
frame_manager = self.fm,
assembler = self.assembler)
self.xrm = xmm_reg_mgr_cls(longevity, frame_manager = self.fm,
assembler = self.assembler)
- return operations
+ return operations, useful
def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
- operations = self._prepare(inputargs, operations, allgcrefs)
- jump = operations[-1]
- loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
- self.loop_consts = loop_consts
- return self._process_inputargs(inputargs), operations
+ operations, useful = self._prepare(inputargs, operations, allgcrefs)
+ return self._process_inputargs(inputargs, useful), operations
def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
allgcrefs):
- operations = self._prepare(inputargs, operations, allgcrefs)
- self.loop_consts = {}
+ operations, _ = self._prepare(inputargs, operations, allgcrefs)
self._update_bindings(arglocs, inputargs)
self.fm.frame_depth = prev_depths[0]
self.param_depth = prev_depths[1]
@@ -195,7 +191,7 @@
def reserve_param(self, n):
self.param_depth = max(self.param_depth, n)
- def _process_inputargs(self, inputargs):
+ def _process_inputargs(self, inputargs, useful):
# XXX we can sort out here by longevity if we need something
# more optimal
floatlocs = [None] * len(inputargs)
@@ -211,7 +207,7 @@
arg = inputargs[i]
assert not isinstance(arg, Const)
reg = None
- if arg not in self.loop_consts and self.longevity[arg][1] > -1:
+ if self.longevity[arg][1] > -1 and arg in useful:
if arg.type == FLOAT:
# xxx is it really a good idea? at the first CALL they
# will all be flushed anyway
@@ -287,15 +283,15 @@
else:
return self.xrm.make_sure_var_in_reg(var, forbidden_vars)
- def _compute_loop_consts(self, inputargs, jump, looptoken):
- if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
- loop_consts = {}
- else:
- loop_consts = {}
- for i in range(len(inputargs)):
- if inputargs[i] is jump.getarg(i):
- loop_consts[inputargs[i]] = i
- return loop_consts
+ #def _compute_loop_consts(self, inputargs, jump, looptoken):
+ # if jump.getopnum() != rop.JUMP or jump.getdescr() is not looptoken:
+ # loop_consts = {}
+ # else:
+ # loop_consts = {}
+ # for i in range(len(inputargs)):
+ # if inputargs[i] is jump.getarg(i):
+ # loop_consts[inputargs[i]] = i
+ # return loop_consts
def _update_bindings(self, locs, inputargs):
# XXX this should probably go to llsupport/regalloc.py
@@ -450,8 +446,14 @@
def _compute_vars_longevity(self, inputargs, operations):
# compute a dictionary that maps variables to index in
# operations that is a "last-time-seen"
+
+ # returns a pair longevity/useful. Non-useful variables are ones that
+ # never appear in the assembler or it does not matter if they appear on
+ # stack or in registers. Main example is loop arguments that go
+ # only to guard operations or to jump or to finish
produced = {}
last_used = {}
+ useful = {}
for i in range(len(operations)-1, -1, -1):
op = operations[i]
if op.result:
@@ -459,8 +461,11 @@
continue
assert op.result not in produced
produced[op.result] = i
+ opnum = op.getopnum()
for j in range(op.numargs()):
arg = op.getarg(j)
+ if opnum != rop.JUMP and opnum != rop.FINISH:
+ useful[arg] = None
if isinstance(arg, Box) and arg not in last_used:
last_used[arg] = i
if op.is_guard():
@@ -486,7 +491,7 @@
longevity[arg] = (0, last_used[arg])
del last_used[arg]
assert len(last_used) == 0
- return longevity
+ return longevity, useful
def loc(self, v):
if v is None: # xxx kludgy
diff --git a/pypy/jit/backend/x86/test/test_regalloc.py b/pypy/jit/backend/x86/test/test_regalloc.py
--- a/pypy/jit/backend/x86/test/test_regalloc.py
+++ b/pypy/jit/backend/x86/test/test_regalloc.py
@@ -149,6 +149,13 @@
self.cpu.execute_token(loop.token)
return loop
+ def prepare_loop(self, ops):
+ loop = self.parse(ops)
+ regalloc = RegAlloc(self.cpu.assembler, False)
+ regalloc.prepare_loop(loop.inputargs, loop.operations,
+ loop.token, [])
+ return regalloc
+
def getint(self, index):
return self.cpu.get_latest_value_int(index)
@@ -422,6 +429,35 @@
self.run(loop)
assert self.getints(9) == range(9)
+ def test_loopargs(self):
+ ops = """
+ [i0, i1, i2, i3]
+ i4 = int_add(i0, i1)
+ jump(i4, i1, i2, i3)
+ """
+ regalloc = self.prepare_loop(ops)
+ assert len(regalloc.rm.reg_bindings) == 2
+
+ def test_loopargs_2(self):
+ ops = """
+ [i0, i1, i2, i3]
+ i4 = int_add(i0, i1)
+ finish(i4, i1, i2, i3)
+ """
+ regalloc = self.prepare_loop(ops)
+ assert len(regalloc.rm.reg_bindings) == 2
+
+ def test_loopargs_3(self):
+ ops = """
+ [i0, i1, i2, i3]
+ i4 = int_add(i0, i1)
+ guard_true(i4) [i0, i1, i2, i3, i4]
+ jump(i4, i1, i2, i3)
+ """
+ regalloc = self.prepare_loop(ops)
+ assert len(regalloc.rm.reg_bindings) == 2
+
+
class TestRegallocCompOps(BaseTestRegalloc):
def test_cmp_op_0(self):
More information about the pypy-commit
mailing list