[pypy-commit] pypy default: Improve the hack 062e9d06c908: revert the changes done in the

arigo noreply at buildbot.pypy.org
Sat Dec 10 00:09:55 CET 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r50332:9b45755e2c2b
Date: 2011-12-09 22:58 +0100
http://bitbucket.org/pypy/pypy/changeset/9b45755e2c2b/

Log:	Improve the hack 062e9d06c908: revert the changes done in the
	RegisterManager, and instead write a more involved but cleaner
	solution in the FrameManager. As a bonus you get tests too. This
	solution should work even when assembling bridges, not just loops.

diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -16,35 +16,101 @@
     """ Manage frame positions
     """
     def __init__(self):
-        self.frame_bindings = {}
-        self.frame_depth    = 0
+        self.bindings = {}
+        self.used = []      # list of bools
+        self.hint_frame_locations = {}
+
+    frame_depth = property(lambda:xxx, lambda:xxx)   # XXX kill me
+
+    def get_frame_depth(self):
+        return len(self.used)
 
     def get(self, box):
-        return self.frame_bindings.get(box, None)
+        return self.bindings.get(box, None)
 
     def loc(self, box):
+        """Return or create the frame location associated with 'box'."""
+        # first check if it's already in the frame_manager
         try:
-            return self.frame_bindings[box]
+            return self.bindings[box]
         except KeyError:
-            return self.get_new_loc(box)
+            pass
+        # check if we have a hint for this box
+        if box in self.hint_frame_locations:
+            # if we do, try to reuse the location for this box
+            loc = self.hint_frame_locations[box]
+            if self.try_to_reuse_location(box, loc):
+                return loc
+        # no valid hint.  make up a new free location
+        return self.get_new_loc(box)
 
     def get_new_loc(self, box):
         size = self.frame_size(box.type)
-        self.frame_depth += ((-self.frame_depth) & (size-1))
-        # ^^^ frame_depth is rounded up to a multiple of 'size', assuming
+        # frame_depth is rounded up to a multiple of 'size', assuming
         # that 'size' is a power of two.  The reason for doing so is to
         # avoid obscure issues in jump.py with stack locations that try
         # to move from position (6,7) to position (7,8).
-        newloc = self.frame_pos(self.frame_depth, box.type)
-        self.frame_bindings[box] = newloc
-        self.frame_depth += size
+        while self.get_frame_depth() & (size - 1):
+            self.used.append(False)
+        #
+        index = self.get_frame_depth()
+        newloc = self.frame_pos(index, box.type)
+        for i in range(size):
+            self.used.append(True)
+        #
+        if not we_are_translated():    # extra testing
+            testindex = self.get_loc_index(newloc)
+            assert testindex == index
+        #
+        self.bindings[box] = newloc
         return newloc
 
+    def set_binding(self, box, loc):
+        self.bindings[box] = loc
+        #
+        index = self.get_loc_index(loc)
+        endindex = index + self.frame_size(box)
+        while len(self.used) < endindex:
+            self.used.append(False)
+        while index < endindex:
+            self.used[index] = True
+            index += 1
+
     def reserve_location_in_frame(self, size):
-        frame_depth = self.frame_depth
-        self.frame_depth += size
+        frame_depth = self.get_frame_depth()
+        for i in range(size):
+            self.used.append(True)
         return frame_depth
 
+    def mark_as_free(self, box):
+        try:
+            loc = self.bindings[box]
+        except KeyError:
+            return    # already gone
+        del self.bindings[box]
+        #
+        size = self.frame_size(box.type)
+        baseindex = self.get_loc_index(loc)
+        for i in range(size):
+            index = baseindex + i
+            assert 0 <= index < len(self.used)
+            self.used[index] = False
+
+    def try_to_reuse_location(self, box, loc):
+        index = self.get_loc_index(loc)
+        assert index >= 0
+        size = self.frame_size(box.type)
+        for i in range(size):
+            while (index + i) >= len(self.used):
+                self.used.append(False)
+            if self.used[index + i]:
+                return False    # already in use
+        # good, we can reuse the location
+        for i in range(size):
+            self.used[index + i] = True
+        self.bindings[box] = loc
+        return True
+
     # abstract methods that need to be overwritten for specific assemblers
     @staticmethod
     def frame_pos(loc, type):
@@ -52,6 +118,10 @@
     @staticmethod
     def frame_size(type):
         return 1
+    @staticmethod
+    def get_loc_index(loc):
+        raise NotImplementedError("Purely abstract")
+
 
 class RegisterManager(object):
     """ Class that keeps track of register allocations
@@ -70,8 +140,6 @@
         self.position = -1
         self.frame_manager = frame_manager
         self.assembler = assembler
-        self.hint_frame_locations = {}     # {Box: StackLoc}
-        self.freed_frame_locations = {}    # {StackLoc: None}
 
     def is_still_alive(self, v):
         # Check if 'v' is alive at the current position.
@@ -103,9 +171,7 @@
                 self.free_regs.append(self.reg_bindings[v])
                 del self.reg_bindings[v]
             if self.frame_manager is not None:
-                if v in self.frame_manager.frame_bindings:
-                    loc = self.frame_manager.frame_bindings[v]
-                    self.freed_frame_locations[loc] = None
+                self.frame_manager.mark_as_free(v)
 
     def possibly_free_vars(self, vars):
         """ Same as 'possibly_free_var', but for all v in vars.
@@ -177,23 +243,6 @@
                 self.reg_bindings[v] = loc
                 return loc
 
-    def _frame_loc(self, v):
-        # first check if it's already in the frame_manager
-        try:
-            return self.frame_manager.frame_bindings[v]
-        except KeyError:
-            pass
-        # check if we have a hint for this box
-        if v in self.hint_frame_locations:
-            # if we do, check that the hinted location is known to be free
-            loc = self.hint_frame_locations[v]
-            if loc in self.freed_frame_locations:
-                del self.freed_frame_locations[loc]
-                self.frame_manager.frame_bindings[v] = loc
-                return loc
-        # no valid hint.  make up a new free location
-        return self.frame_manager.get_new_loc(v)
-
     def _spill_var(self, v, forbidden_vars, selected_reg,
                    need_lower_byte=False):
         v_to_spill = self._pick_variable_to_spill(v, forbidden_vars,
@@ -201,7 +250,7 @@
         loc = self.reg_bindings[v_to_spill]
         del self.reg_bindings[v_to_spill]
         if self.frame_manager.get(v_to_spill) is None:
-            newloc = self._frame_loc(v_to_spill)
+            newloc = self.frame_manager.loc(v_to_spill)
             self.assembler.regalloc_mov(loc, newloc)
         return loc
 
@@ -278,7 +327,7 @@
         except KeyError:
             if box in self.bindings_to_frame_reg:
                 return self.frame_reg
-            return self._frame_loc(box)
+            return self.frame_manager.loc(box)
 
     def return_constant(self, v, forbidden_vars=[], selected_reg=None):
         """ Return the location of the constant v.  If 'selected_reg' is
@@ -326,7 +375,7 @@
             self.reg_bindings[v] = loc
             self.assembler.regalloc_mov(prev_loc, loc)
         else:
-            loc = self._frame_loc(v)
+            loc = self.frame_manager.loc(v)
             self.assembler.regalloc_mov(prev_loc, loc)
 
     def force_result_in_reg(self, result_v, v, forbidden_vars=[]):
@@ -345,7 +394,7 @@
             self.reg_bindings[result_v] = loc
             return loc
         if v not in self.reg_bindings:
-            prev_loc = self._frame_loc(v)
+            prev_loc = self.frame_manager.loc(v)
             loc = self.force_allocate_reg(v, forbidden_vars)
             self.assembler.regalloc_mov(prev_loc, loc)
         assert v in self.reg_bindings
@@ -365,7 +414,7 @@
     def _sync_var(self, v):
         if not self.frame_manager.get(v):
             reg = self.reg_bindings[v]
-            to = self._frame_loc(v)
+            to = self.frame_manager.loc(v)
             self.assembler.regalloc_mov(reg, to)
         # otherwise it's clean
 
diff --git a/pypy/jit/backend/llsupport/test/test_regalloc.py b/pypy/jit/backend/llsupport/test/test_regalloc.py
--- a/pypy/jit/backend/llsupport/test/test_regalloc.py
+++ b/pypy/jit/backend/llsupport/test/test_regalloc.py
@@ -44,6 +44,9 @@
             return 2
         else:
             return 1
+    def get_loc_index(self, loc):
+        assert isinstance(loc, FakeFramePos)
+        return loc.pos
 
 class MockAsm(object):
     def __init__(self):
@@ -282,7 +285,7 @@
             rm.force_allocate_reg(b)
         rm.before_call()
         assert len(rm.reg_bindings) == 2
-        assert fm.frame_depth == 2
+        assert fm.get_frame_depth() == 2
         assert len(asm.moves) == 2
         rm._check_invariants()
         rm.after_call(boxes[-1])
@@ -305,7 +308,7 @@
             rm.force_allocate_reg(b)
         rm.before_call(save_all_regs=True)
         assert len(rm.reg_bindings) == 0
-        assert fm.frame_depth == 4
+        assert fm.get_frame_depth() == 4
         assert len(asm.moves) == 4
         rm._check_invariants()
         rm.after_call(boxes[-1])
@@ -327,7 +330,7 @@
         xrm = XRegisterManager(longevity, frame_manager=fm, assembler=asm)
         xrm.loc(f0)
         rm.loc(b0)
-        assert fm.frame_depth == 3
+        assert fm.get_frame_depth() == 3
         
         
 
@@ -351,20 +354,14 @@
 
 
     def test_hint_frame_locations_1(self):
-        b0, b1 = newboxes(0, 1)
-        longevity = {b0: (0, 1), b1: (0, 1)}
+        b0, = newboxes(0)
         fm = TFrameManager()
-        asm = MockAsm()
-        rm = RegisterManager(longevity, frame_manager=fm, assembler=asm)
-        rm.hint_frame_locations[b0] = "some_stack_loc"
-        rm.freed_frame_locations["some_stack_loc"] = None
-        rm.force_allocate_reg(b0)
-        rm.force_allocate_reg(b1)
-        rm.force_spill_var(b0)
-        rm.force_spill_var(b1)
-        assert rm.loc(b0) == "some_stack_loc"
-        assert isinstance(rm.loc(b1), FakeFramePos)
-        rm._check_invariants()
+        loc123 = FakeFramePos(123, INT)
+        fm.hint_frame_locations[b0] = loc123
+        assert fm.get_frame_depth() == 0
+        loc = fm.loc(b0)
+        assert loc == loc123
+        assert fm.get_frame_depth() == 124
 
     def test_hint_frame_locations_2(self):
         b0, b1, b2 = newboxes(0, 1, 2)
@@ -378,20 +375,99 @@
         rm.force_spill_var(b0)
         loc = rm.loc(b0)
         assert isinstance(loc, FakeFramePos)
+        assert fm.get_loc_index(loc) == 0
         rm.position = 1
-        assert loc not in rm.freed_frame_locations
+        assert fm.used == [True]
         rm.possibly_free_var(b0)
-        assert loc in rm.freed_frame_locations
+        assert fm.used == [False]
         #
-        rm.hint_frame_locations[b1] = loc
+        fm.hint_frame_locations[b1] = loc
         rm.force_spill_var(b1)
         loc1 = rm.loc(b1)
-        assert loc1 is loc
-        assert rm.freed_frame_locations == {}
+        assert loc1 == loc
+        assert fm.used == [True]
         #
-        rm.hint_frame_locations[b2] = loc
+        fm.hint_frame_locations[b2] = loc
         rm.force_spill_var(b2)
         loc2 = rm.loc(b2)
-        assert loc2 is not loc1     # because it's not in freed_frame_locations
+        assert loc2 != loc1     # because it was not free
+        assert fm.used == [True, True]
         #
         rm._check_invariants()
+
+    def test_frame_manager_basic(self):
+        b0, b1 = newboxes(0, 1)
+        fm = TFrameManager()
+        loc0 = fm.loc(b0)
+        assert fm.get_loc_index(loc0) == 0
+        #
+        assert fm.get(b1) is None
+        loc1 = fm.loc(b1)
+        assert fm.get_loc_index(loc1) == 1
+        assert fm.get(b1) == loc1
+        #
+        loc0b = fm.loc(b0)
+        assert loc0b == loc0
+        #
+        fm.loc(BoxInt())
+        assert fm.get_frame_depth() == 3
+        #
+        f0 = BoxFloat()
+        locf0 = fm.loc(f0)
+        assert fm.get_loc_index(locf0) == 4
+        assert fm.get_frame_depth() == 6
+        #
+        f1 = BoxFloat()
+        locf1 = fm.loc(f1)
+        assert fm.get_loc_index(locf1) == 6
+        assert fm.get_frame_depth() == 8
+        assert fm.used == [True, True, True, False, True, True, True, True]
+        #
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, True, True]
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, True, True]
+        fm.mark_as_free(f1)
+        assert fm.used == [False, True, True, False, True, True, False, False]
+        #
+        fm.reserve_location_in_frame(1)
+        assert fm.get_frame_depth() == 9
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        #
+        assert b0 not in fm.bindings
+        fm.set_binding(b0, loc0)
+        assert b0 in fm.bindings
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        b3 = BoxInt()
+        assert not fm.try_to_reuse_location(b3, loc0)
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b0)
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        assert fm.try_to_reuse_location(b3, loc0)
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b0)   # already free
+        assert fm.used == [True, True, True, False, True, True, False, False, True]
+        #
+        fm.mark_as_free(b3)
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        f3 = BoxFloat()
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(0, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(1, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(2, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(3, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(4, FLOAT))
+        assert not fm.try_to_reuse_location(f3, fm.frame_pos(5, FLOAT))
+        assert fm.used == [False, True, True, False, True, True, False, False, True]
+        assert fm.try_to_reuse_location(f3, fm.frame_pos(6, FLOAT))
+        assert fm.used == [False, True, True, False, True, True, True, True, True]
+        #
+        fm.used = [False]
+        assert fm.try_to_reuse_location(BoxFloat(), fm.frame_pos(0, FLOAT))
+        assert fm.used == [True, True]
+        #
+        fm.used = [True]
+        assert not fm.try_to_reuse_location(BoxFloat(), fm.frame_pos(0, FLOAT))
+        assert fm.used == [True]
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -694,7 +694,7 @@
         regalloc.walk_operations(operations)
         if we_are_translated() or self.cpu.dont_keepalive_stuff:
             self._regalloc = None   # else keep it around for debugging
-        frame_depth = regalloc.fm.frame_depth
+        frame_depth = regalloc.fm.get_frame_depth()
         param_depth = regalloc.param_depth
         jump_target_descr = regalloc.jump_target_descr
         if jump_target_descr is not None:
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -138,6 +138,10 @@
             return 2
         else:
             return 1
+    @staticmethod
+    def get_loc_index(loc):
+        assert isinstance(loc, StackLoc)
+        return loc.position
 
 if WORD == 4:
     gpr_reg_mgr_cls = X86RegisterManager
@@ -184,7 +188,6 @@
                        allgcrefs):
         operations, _ = self._prepare(inputargs, operations, allgcrefs)
         self._update_bindings(arglocs, inputargs)
-        self.fm.frame_depth = prev_depths[0]
         self.param_depth = prev_depths[1]
         return operations
 
@@ -307,7 +310,7 @@
                     self.xrm.reg_bindings[arg] = loc
                     used[loc] = None
                 else:
-                    self.fm.frame_bindings[arg] = loc
+                    self.fm.set_binding(arg, loc)
             else:
                 if isinstance(loc, RegLoc):
                     if loc is ebp:
@@ -316,7 +319,7 @@
                         self.rm.reg_bindings[arg] = loc
                         used[loc] = None
                 else:
-                    self.fm.frame_bindings[arg] = loc
+                    self.fm.set_binding(arg, loc)
         self.rm.free_regs = []
         for reg in self.rm.all_regs:
             if reg not in used:
@@ -352,7 +355,7 @@
     def get_current_depth(self):
         # return (self.fm.frame_depth, self.param_depth), but trying to share
         # the resulting tuple among several calls
-        arg0 = self.fm.frame_depth
+        arg0 = self.fm.get_frame_depth()
         arg1 = self.param_depth
         result = self.assembler._current_depths_cache
         if result[0] != arg0 or result[1] != arg1:
@@ -1334,12 +1337,12 @@
                 loc = nonfloatlocs[i]
                 if isinstance(loc, StackLoc):
                     assert box.type != FLOAT
-                    self.rm.hint_frame_locations[box] = loc
+                    self.fm.hint_frame_locations[box] = loc
                 else:
                     loc = floatlocs[i]
                     if isinstance(loc, StackLoc):
                         assert box.type == FLOAT
-                        self.xrm.hint_frame_locations[box] = loc
+                        self.fm.hint_frame_locations[box] = loc
 
     def consider_jump(self, op):
         assembler = self.assembler
@@ -1385,7 +1388,7 @@
 
     def get_mark_gc_roots(self, gcrootmap, use_copy_area=False):
         shape = gcrootmap.get_basic_shape(IS_X86_64)
-        for v, val in self.fm.frame_bindings.items():
+        for v, val in self.fm.bindings.items():
             if (isinstance(v, BoxPtr) and self.rm.stays_alive(v)):
                 assert isinstance(val, StackLoc)
                 gcrootmap.add_frame_offset(shape, get_ebp_ofs(val.position))
diff --git a/pypy/jit/backend/x86/test/test_recompilation.py b/pypy/jit/backend/x86/test/test_recompilation.py
--- a/pypy/jit/backend/x86/test/test_recompilation.py
+++ b/pypy/jit/backend/x86/test/test_recompilation.py
@@ -42,6 +42,7 @@
         i5 = int_add(i4, 1)
         i6 = int_add(i5, 1)
         i7 = int_add(i5, i4)
+        force_spill(i5)
         i8 = int_add(i7, 1)
         i9 = int_add(i8, 1)
         finish(i3, i4, i5, i6, i7, i8, i9, descr=fdescr2)
@@ -49,10 +50,9 @@
         bridge = self.attach_bridge(ops, loop, -2)
         descr = loop.operations[2].getdescr()
         new = descr._x86_bridge_frame_depth
-        assert descr._x86_bridge_param_depth == 0        
-        # XXX: Maybe add enough ops to force stack on 64-bit as well?
-        if IS_X86_32:
-            assert new > previous
+        assert descr._x86_bridge_param_depth == 0
+        # the force_spill() forces the stack to grow
+        assert new > previous
         self.cpu.set_future_value_int(0, 0)
         fail = self.run(loop)
         assert fail.identifier == 2
@@ -104,6 +104,9 @@
         i8 = int_add(i3, 1)
         i6 = int_add(i8, i10)
         i7 = int_add(i3, i6)
+        force_spill(i6)
+        force_spill(i7)
+        force_spill(i8)
         i12 = int_add(i7, i8)
         i11 = int_add(i12, i6)
         jump(i3, i12, i11, i10, i6, i7, descr=looptoken)
@@ -112,9 +115,8 @@
         guard_op = loop.operations[5]
         loop_frame_depth = loop.token._x86_frame_depth
         assert loop.token._x86_param_depth == 0
-        # XXX: Maybe add enough ops to force stack on 64-bit as well?
-        if IS_X86_32:
-            assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
+        # the force_spill() forces the stack to grow
+        assert guard_op.getdescr()._x86_bridge_frame_depth > loop_frame_depth
         assert guard_op.getdescr()._x86_bridge_param_depth == 0
         self.cpu.set_future_value_int(0, 0)
         self.cpu.set_future_value_int(1, 0)


More information about the pypy-commit mailing list