[pypy-commit] pypy default: Improve the hack 062e9d06c908: revert the changes done in the
arigo
noreply at buildbot.pypy.org
Sat Dec 10 00:09:55 CET 2011
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r50332:9b45755e2c2b
Date: 2011-12-09 22:58 +0100
http://bitbucket.org/pypy/pypy/changeset/9b45755e2c2b/
Log: Improve the hack 062e9d06c908: revert the changes done in the
RegisterManager, and instead write a more involved but cleaner
solution in the FrameManager. As a bonus you get tests too. This
solution should work even when assembling bridges, not just loops.
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -16,35 +16,101 @@
""" Manage frame positions
"""
def __init__(self):
- self.frame_bindings = {}
- self.frame_depth = 0
+ self.bindings = {}
+ self.used = [] # list of bools
+ self.hint_frame_locations = {}
+
+ frame_depth = property(lambda:xxx, lambda:xxx) # XXX kill me
+
+ def get_frame_depth(self):
+ return len(self.used)
def get(self, box):
- return self.frame_bindings.get(box, None)
+ return self.bindings.get(box, None)
def loc(self, box):
+ """Return or create the frame location associated with 'box'."""
+ # first check if it's already in the frame_manager
try:
- return self.frame_bindings[box]
+ return self.bindings[box]
except KeyError:
- return self.get_new_loc(box)
+ pass
+ # check if we have a hint for this box
+ if box in self.hint_frame_locations:
+ # if we do, try to reuse the location for this box
+ loc = self.hint_frame_locations[box]
+ if self.try_to_reuse_location(box, loc):
+ return loc
+ # no valid hint. make up a new free location
+ return self.get_new_loc(box)
def get_new_loc(self, box):
size = self.frame_size(box.type)
- self.frame_depth += ((-self.frame_depth) & (size-1))
- # ^^^ frame_depth is rounded up to a multiple of 'size', assuming
+ # frame_depth is rounded up to a multiple of 'size', assuming
# that 'size' is a power of two. The reason for doing so is to
# avoid obscure issues in jump.py with stack locations that try
# to move from position (6,7) to position (7,8).
- newloc = self.frame_pos(self.frame_depth, box.type)
- self.frame_bindings[box] = newloc
- self.frame_depth += size
+ while self.get_frame_depth() & (size - 1):
+ self.used.append(False)
+ #
+ index = self.get_frame_depth()
+ newloc = self.frame_pos(index, box.type)
+ for i in range(size):
+ self.used.append(True)
+ #
+ if not we_are_translated(): # extra testing
+ testindex = self.get_loc_index(newloc)
+ assert testindex == index
+ #
+ self.bindings[box] = newloc
return newloc
+ def set_binding(self, box, loc):
+ self.bindings[box] = loc
+ #
+ index = self.get_loc_index(loc)
+ endindex = index + self.frame_size(box)
+ while len(self.used) < endindex:
+ self.used.append(False)
+ while index < endindex:
+ self.used[index] = True
+ index += 1
+
def reserve_location_in_frame(self, size):
- frame_depth = self.frame_depth
- self.frame_depth += size
+ frame_depth = self.get_frame_depth()
+ for i in range(size):
+ self.used.append(True)
return frame_depth
+ def mark_as_free(self, box):
+ try:
+ loc = self.bindings[box]
+ except KeyError:
+ return # already gone
+ del self.bindings[box]
+ #
+ size = self.frame_size(box.type)
+ baseindex = self.get_loc_index(loc)
+ for i in range(size):
+ index = baseindex + i
+ assert 0 <= index < len(self.used)
+ self.used[index] = False
+
+ def try_to_reuse_location(self, box, loc):
+ index = self.get_loc_index(loc)
+ assert index >= 0
+ size = self.frame_size(box.type)
+ for i in range(size):
+ while (index + i) >= len(self.used):
+ self.used.append(False)
+ if self.used[index + i]:
+ return False # already in use
+ # good, we can reuse the location
+ for i in range(size):
+ self.used[index + i] = True
+ self.bindings[box] = loc
+ return True
+
# abstract methods that need to be overwritten for specific assemblers
@staticmethod
def frame_pos(loc, type):
@@ -52,6 +118,10 @@
@staticmethod
def frame_size(type):
return 1
+ @staticmethod
+ def get_loc_index(loc):
+ raise NotImplementedError("Purely abstract")
+
class RegisterManager(object):
""" Class that keeps track of register allocations
@@ -70,8 +140,6 @@
self.position = -1
self.frame_manager = frame_manager
self.assembler = assembler
- self.hint_frame_locations = {} # {Box: StackLoc}
- self.freed_frame_locations = {} # {StackLoc: None}
def is_still_alive(self, v):
# Check if 'v' is alive at the current position.
@@ -103,9 +171,7 @@
self.free_regs.append(self.reg_bindings[v])
del self.reg_bindings[v]
if self.frame_manager is not None:
- if v in self.frame_manager.frame_bindings:
- loc = self.frame_manager.frame_bindings[v]
- self.freed_frame_locations[loc] = None
+ self.frame_manager.mark_as_free(v)
def possibly_free_vars(self, vars):
""" Same as 'possibly_free_var', but for all v in vars.
@@ -177,23 +243,6 @@
self.reg_bindings[v] = loc
return loc
- def _frame_loc(self, v):
- # first check if it's already in the frame_manager
- try:
- return self.frame_manager.frame_bindings[v]
- except KeyError:
- pass
- # check if we have a hint for this box
- if v in self.hint_frame_locations:
- # if we do, check that the hinted location is known to be free
- loc = self.hint_frame_locations[v]
- if loc in self.freed_frame_locations:
- del self.freed_frame_locations[loc]
- self.frame_manager.frame_bindings[v] = loc
- return loc
- # no valid hint. make up a new free location
- return self.frame_manager.get_new_loc(v)
-
def _spill_var(self, v, forbidden_vars, selected_reg,
need_lower_byte=False):
v_to_spill = self._pick_variable_to_spill(v, forbidden_vars,
@@ -201,7 +250,7 @@
loc = self.reg_bindings[v_to_spill]
del self.reg_bindings[v_to_spill]
if self.frame_manager.get(v_to_spill) is None:
- newloc = self._frame_loc(v_to_spill)
+ newloc = self.frame_manager.loc(v_to_spill)
self.assembler.regalloc_mov(loc, newloc)
return loc
@@ -278,7 +327,7 @@
except KeyError:
if box in self.bindings_to_frame_reg:
return self.frame_reg
- return self._frame_loc(box)
+ return self.frame_manager.loc(box)
def return_constant(self, v, forbidden_vars=[], selected_reg=None):
""" Return the location of the constant v. If 'selected_reg' is
@@ -326,7 +375,7 @@
self.reg_bindings[v] = loc
self.assembler.regalloc_mov(prev_loc, loc)
else:
- loc = self._frame_loc(v)
+ loc = self.frame_manager.loc(v)
self.assembler.regalloc_mov(prev_loc, loc)
def force_result_in_reg(self, result_v, v, forbidden_vars=[]):
@@ -345,7 +394,7 @@
self.reg_bindings[result_v] = loc
return loc
if v not in self.reg_bindings:
- prev_loc = self._frame_loc(v)
+ prev_loc = self.frame_manager.loc(v)
loc = self.force_allocate_reg(v, forbidden_vars)
self.assembler.regalloc_mov(prev_loc, loc)
assert v in self.reg_bindings
@@ -365,7 +414,7 @@
def _sync_var(self, v):
if not self.frame_manager.get(v):
reg = self.reg_bindings[v]
- to = self._frame_loc(v)
+ to = self.frame_manager.loc(v)
self.assembler.regalloc_mov(reg, to)
# otherwise it's clean
diff --git a/pypy/jit/backend/llsupport/test/test_regalloc.py b/pypy/jit/backend/llsupport/test/test_regalloc.py
--- a/pypy/jit/backend/llsupport/test/test_regalloc.py
+++ b/pypy/jit/backend/llsupport/test/test_regalloc.py
@@ -44,6 +44,9 @@
return 2
else:
return 1
+ def get_loc_index(self, loc):
+ assert isinstance(loc, FakeFramePos)
+ return loc.pos
class MockAsm(object):
def __init__(self):
@@ -282,7 +285,7 @@
rm.force_allocate_reg(b)
rm.before_call()
assert len(rm.reg_bindings) == 2
- assert fm.frame_depth == 2
+ assert fm.get_frame_depth() == 2
assert len(asm.moves) == 2
rm._check_invariants()
rm.after_call(boxes[-1])
@@ -305,7 +308,7 @@
rm.force_allocate_reg(b)
rm.before_call(save_all_regs=True)
assert len(rm.reg_bindings) == 0
- assert fm.frame_depth == 4
+ assert fm.get_frame_depth() == 4
assert len(asm.moves) == 4
rm._check_invariants()
rm.after_call(boxes[-1])
@@ -327,7 +330,7 @@
xrm = XRegisterManager(longevity, frame_manager=fm, assembler=asm)
xrm.loc(f0)
rm.loc(b0)
- assert fm.frame_depth == 3
+ assert fm.get_frame_depth() == 3
@@ -351,20 +354,14 @@
def test_hint_frame_locations_1(self):
- b0, b1 = newboxes(0, 1)
- longevity = {b0: (0, 1), b1: (0, 1)}
+ b0, = newboxes(0)
fm = TFrameManager()
- asm = MockAsm()
- rm = RegisterManager(longevity, frame_manager=fm, assembler=asm)
- rm.hint_frame_locations[b0] = "some_stack_loc"
- rm.freed_frame_locations["some_stack_loc"] = None
- rm.force_allocate_reg(b0)
- rm.force_allocate_reg(b1)
- rm.force_spill_var(b0)
- rm.force_spill_var(b1)
- assert rm.loc(b0) == "some_stack_loc"
- assert isinstance(rm.loc(b1), FakeFramePos)
- rm._check_invariants()
+ loc123 = FakeFramePos(123, INT)
+ fm.hint_frame_locations[b0] = loc123
+ assert fm.get_frame_depth() == 0
+ loc = fm.loc(b0)
+ assert loc == loc123
+ assert fm.get_frame_depth() == 124
def test_hint_frame_locations_2(self):
b0, b1, b2 = newboxes(0, 1, 2)
@@ -378,20 +375,99 @@
rm.force_spill_var(b0)
loc = rm.loc(b0)
assert isinstance(loc, FakeFramePos)
+ assert fm.get_loc_index(loc) == 0
rm.position = 1
- assert loc not in rm.freed_frame_locations
+ assert fm.used == [True]
rm.possibly_free_var(b0)
- assert loc in rm.freed_frame_locations
+ assert fm.used == [False]
#
- rm.hint_frame_locations[b1] = loc
+ fm.hint_frame_locations[b1] = loc
rm.force_spill_var(b1)
loc1 = rm.loc(b1)
- assert loc1 is loc
- assert rm.freed_frame_locations == {}
+ assert loc1 == loc
+ assert fm.used == [True]
#
- rm.hint_frame_locations[b2] = loc
+ fm.hint_frame_locations[b2] = loc
rm.force_spill_var(b2)
loc2 = rm.loc(b2)
- assert loc2 is not loc1 # because it's not in freed_frame_locations
+ assert loc2 != loc1 # because it was not free
+ assert fm.used == [True, True]
#
rm._check_invariants()
+
+ def test_frame_manager_basic(self):
+ b0, b1 = newboxes(0, 1)
+ fm = TFrameManager()
+ loc0 = fm.loc(b0)
+ assert fm.get_loc_index(loc0) == 0
+ #
+ assert fm.get(b1) is None
+ loc1 = fm.loc(b1)
+ assert fm.get_loc_index(loc1) == 1
+ assert fm.get(b1) == loc1
+ #
+ loc0b = fm.loc(b0)
+ assert loc0b == loc0
+ #
+ fm.loc(BoxInt())
+ assert fm.get_frame_depth() == 3
+ #
+ f0 = BoxFloat()
+ locf0 = fm.loc(f0)
+ assert fm.get_loc_index(locf0) == 4
+ assert fm.get_frame_depth() == 6
+ #
+ f1 = BoxFloat()
+ locf1 = fm.loc(f1)
+ assert fm.get_loc_index(locf1) == 6
+ assert fm.get_frame_depth() == 8
+ assert fm.used == [True, True, True, False, True, True, True, True]
+ #
+ fm.mark_as_free(b0)
+ assert fm.used == [False, True, True, False, True, True, True, True]
+ fm.mark_as_free(b0)
+ assert fm.used == [False, True, True, False, True, True, True, True]
+ fm.mark_as_free(f1)
+ assert fm.used == [False, True, True, False, True, True, False, False]
+ #
+ fm.reserve_location_in_frame(1)
+ assert fm.get_frame_depth() == 9
+ assert fm.used == [False, True, True, False, True, True, False, False, True]
+ #
+ assert b0 not in fm.bindings
+ fm.set_binding(b0, loc0)
+ assert b0 in fm.bindings
+ assert fm.used == [True, True, True, False, True, True, False, False, True]
+ #
+ b3 = BoxInt()
+ assert not fm.try_to_reuse_location(b3, loc0)
+ assert fm.used == [True, True, True, False, True, True, False, False, True]
+ #
+ fm.mark_as_free(b0)
+ assert fm.used == [False, True, True, False, True, True, False, False, True]
+ assert fm.try_to_reuse_location(b3, loc0)
+ assert fm.used == [True, True, True, False, True, True, False, False, True]
+ #
+ fm.mark_as_free(b0) # already free
+ assert fm.used == [True, True, True, False, True, True, False, False, True]
+ #
+ fm.mark_as_free(b3)
+ assert fm.used == [False, True, True, False, True, True, False, False, True]
+ f3 = BoxFloat()
+ assert not fm.try_to_reuse_location(f3, fm.frame_pos(0, FLOAT))
+ assert not fm.try_to_reuse_location(f3, fm.frame_pos(1, FLOAT))
+ assert not fm.try_to_reuse_location(f3, fm.frame_pos(2, FLOAT))
+ assert not fm.try_to_reuse_location(f3, fm.frame_pos(3, FLOAT))
+ assert not fm.try_to_reuse_location(f3, fm.frame_pos(4, FLOAT))
+ assert not fm.try_to_reuse_location(f3, fm.frame_pos(5, FLOAT))
+ assert fm.used == [False, True, True, False, True, True, False, False, True]
+ assert fm.try_to_reuse_location(f3, fm.frame_pos(6, FLOAT))
+ assert fm.used == [False, True, True, False, True, True, True, True, True]
+ #
+ fm.used = [False]
+ assert fm.try_to_reuse_location(BoxFloat(), fm.frame_pos(0, FLOAT))
+ assert fm.used == [True, True]
+ #
+ fm.used = [True]
+ assert not fm.try_to_reuse_location(BoxFloat(), fm.frame_pos(0, FLOAT))
+ assert fm.used == [True]
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -694,7 +694,7 @@
regalloc.walk_operations(operations)
if we_are_translated() or self.cpu.dont_keepalive_stuff:
self._regalloc = None # else keep it around for debugging
- frame_depth = regalloc.fm.frame_depth
+ frame_depth = regalloc.fm.get_frame_depth()
param_depth = regalloc.param_depth
jump_target_descr = regalloc.jump_target_descr
if jump_target_descr is not None:
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -138,6 +138,10 @@
return 2
else:
return 1
+ @staticmethod
+ def get_loc_index(loc):
+ assert isinstance(loc, StackLoc)
+ return loc.position
if WORD == 4:
gpr_reg_mgr_cls = X86RegisterManager
@@ -184,7 +188,6 @@
allgcrefs):
operations, _ = self._prepare(inputargs, operations, allgcrefs)
self._update_bindings(arglocs, inputargs)
- self.fm.frame_depth = prev_depths[0]
self.param_depth = prev_depths[1]
return operations
@@ -307,7 +310,7 @@
self.xrm.reg_bindings[arg] = loc
used[loc] = None
else:
- self.fm.frame_bindings[arg] = loc
+ self.fm.set_binding(arg, loc)
else:
if isinstance(loc, RegLoc):
if loc is ebp:
@@ -316,7 +319,7 @@
self.rm.reg_bindings[arg] = loc
used[loc] = None
else:
- self.fm.frame_bindings[arg] = loc
+ self.fm.set_binding(arg, loc)
self.rm.free_regs = []
for reg in self.rm.all_regs:
if reg not in used:
@@ -352,7 +355,7 @@
def get_current_depth(self):
# return (self.fm.frame_depth, self.param_depth), but trying to share
# the resulting tuple among several calls
- arg0 = self.fm.frame_depth
+ arg0 = self.fm.get_frame_depth()
arg1 = self.param_depth
result = self.assembler._current_depths_cache
if result[0] != arg0 or result[1] != arg1:
@@ -1334,12 +1337,12 @@
loc = nonfloatlocs[i]
if isinstance(loc, StackLoc):
assert box.type != FLOAT
- self.rm.hint_frame_locations[box] = loc
+ self.fm.hint_frame_locations[box] = loc
else:
loc = floatlocs[i]
if isinstance(loc, StackLoc):
assert box.type == FLOAT
- self.xrm.hint_frame_locations[box] = loc
+ self.fm.hint_frame_locations[box] = loc
def consider_jump(self, op):
assembler = self.assembler
@@ -1385,7 +1388,7 @@
def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
shape = gcrootmap.get_basic_shape(IS_X86_64)
- for v, val in self.fm.frame_bindings.items():
+ for v, val in self.fm.bindings.items():
if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
assert isinstance(val, StackLoc)
gcrootmap.add_frame_offset(shape, get_ebp_ofs(val.position))
diff --git a/pypy/jit/backend/x86/test/test_recompilation.py b/pypy/jit/backend/x86/test/test_recompilation.py
--- a/pypy/jit/backend/x86/test/test_recompilation.py
+++ b/pypy/jit/backend/x86/test/test_recompilation.py
@@ -42,6 +42,7 @@
i5 = int_add(i4, 1)
i6 = int_add(i5, 1)
i7 = int_add(i5, i4)
+ force_spill(i5)
i8 = int_add(i7, 1)
i9 = int_add(i8, 1)
finish(i3, i4, i5, i6, i7, i8, i9, descr=fdescr2)
@@ -49,10 +50,9 @@
bridge = self.attach_bridge(ops, loop, -2)
descr = loop.operations[2].getdescr()
new = descr._x86_bridge_frame_depth
- assert descr._x86_bridge_param_depth == 0
- # XXX: Maybe add enough ops to force stack on 64-bit as well?
- if IS_X86_32:
- assert new > previous
+ assert descr._x86_bridge_param_depth == 0
+ # the force_spill() forces the stack to grow
+ assert new > previous
self.cpu.set_future_value_int(0, 0)
fail = self.run(loop)
assert fail.identifier == 2
@@ -104,6 +104,9 @@
i8 = int_add(i3, 1)
i6 = int_add(i8, i10)
i7 = int_add(i3, i6)
+ force_spill(i6)
+ force_spill(i7)
+ force_spill(i8)
i12 = int_add(i7, i8)
i11 = int_add(i12, i6)
jump(i3, i12, i11, i10, i6, i7, descr=looptoken)
@@ -112,9 +115,8 @@
guard_op = loop.operations[5]
loop_frame_depth = loop.token._x86_frame_depth
assert loop.token._x86_param_depth == 0
- # XXX: Maybe add enough ops to force stack on 64-bit as well?
- if IS_X86_32:
- assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
+ # the force_spill() forces the stack to grow
+ assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
assert guard_op.getdescr()._x86_bridge_param_depth == 0
self.cpu.set_future_value_int(0, 0)
self.cpu.set_future_value_int(1, 0)
More information about the pypy-commit
mailing list