[pypy-commit] pypy regalloc-playground: collect some statitics about the register allocator

cfbolz pypy.commits at gmail.com
Sun Aug 20 13:09:37 EDT 2017


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: regalloc-playground
Changeset: r92187:9ee8caf969f5
Date: 2017-08-20 16:04 +0200
http://bitbucket.org/pypy/pypy/changeset/9ee8caf969f5/

Log:	collect some statitics about the register allocator

diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -406,9 +406,12 @@
                                selected_reg, need_lower_byte=need_lower_byte)
         loc = self.reg_bindings[v_to_spill]
         del self.reg_bindings[v_to_spill]
+        self.assembler.num_spills += 1
         if self.frame_manager.get(v_to_spill) is None:
             newloc = self.frame_manager.loc(v_to_spill)
             self.assembler.regalloc_mov(loc, newloc)
+        else:
+            self.assembler.num_spills_to_existing += 1
         return loc
 
     def _pick_variable_to_spill(self, v, forbidden_vars, selected_reg=None,
@@ -522,6 +525,7 @@
         loc = self.force_allocate_reg(v, forbidden_vars, selected_reg,
                                       need_lower_byte=need_lower_byte)
         if prev_loc is not loc:
+            self.assembler.num_reloads += 1
             self.assembler.regalloc_mov(prev_loc, loc)
         return loc
 
@@ -576,6 +580,7 @@
 
     def _sync_var(self, v):
         if not self.frame_manager.get(v):
+            self.num_moves_calls += 1
             reg = self.reg_bindings[v]
             to = self.frame_manager.loc(v)
             self.assembler.regalloc_mov(reg, to)
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -90,6 +90,20 @@
         self.target_tokens_currently_compiling = {}
         self.frame_depth_to_patch = []
 
+        # XXX register allocation statistics to be removed later
+        self.num_moves_calls = 0
+        self.num_moves_jump = 0
+        self.num_spills = 0
+        self.num_spills_to_existing = 0
+        self.num_reloads = 0
+
+        self.preamble_num_moves_calls = 0
+        self.preamble_num_moves_jump = 0
+        self.preamble_num_spills = 0
+        self.preamble_num_spills_to_existing = 0
+        self.preamble_num_reloads = 0
+
+
     def teardown(self):
         self.pending_guard_tokens = None
         if WORD == 8:
@@ -545,6 +559,25 @@
                                                  size_excluding_failure_stuff))
         debug_print("            end: 0x%x" % r_uint(rawstart + full_size))
         debug_stop("jit-backend-addr")
+        debug_start("jit-regalloc-stats")
+        debug_print("Loop %d (%s) has address 0x%x to 0x%x (bootstrap 0x%x)" % (
+            looptoken.number, loopname,
+            r_uint(rawstart + looppos),
+            r_uint(rawstart + size_excluding_failure_stuff),
+            r_uint(rawstart + functionpos)))
+        debug_print("assembler size: ", size_excluding_failure_stuff)
+        debug_print("number ops: ", len(operations))
+        debug_print("preamble num moves calls: ", self.preamble_num_moves_calls)
+        debug_print("preamble num moves jump:", self.preamble_num_moves_jump)
+        debug_print("preamble num moves spills:", self.preamble_num_spills)
+        debug_print("preamble num moves spills to existing:", self.preamble_num_spills_to_existing)
+        debug_print("preamble num register reloads:", self.preamble_num_reloads)
+        debug_print("num moves calls: ", self.num_moves_calls)
+        debug_print("num moves jump:", self.num_moves_jump)
+        debug_print("num moves spills:", self.num_spills)
+        debug_print("num moves spills to existing:", self.num_spills_to_existing)
+        debug_print("num moves register reloads:", self.num_reloads)
+        debug_stop("jit-regalloc-stats")
         self.patch_pending_failure_recoveries(rawstart)
         #
         ops_offset = self.mc.ops_offset
@@ -624,6 +657,24 @@
         debug_print("       failures: 0x%x" % r_uint(rawstart + codeendpos))
         debug_print("            end: 0x%x" % r_uint(rawstart + fullsize))
         debug_stop("jit-backend-addr")
+        debug_start("jit-regalloc-stats")
+        debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+                    (r_uint(descr_number), r_uint(rawstart + startpos),
+                        r_uint(rawstart + codeendpos)))
+
+        debug_print("assembler size: ", fullsize)
+        debug_print("number ops: ", len(operations))
+        debug_print("preamble num moves calls: ", self.preamble_num_moves_calls)
+        debug_print("preamble num moves jump:", self.preamble_num_moves_jump)
+        debug_print("preamble num moves spills:", self.preamble_num_spills)
+        debug_print("preamble num moves spills to existing:", self.preamble_num_spills_to_existing)
+        debug_print("preamble num register reloads:", self.preamble_num_reloads)
+        debug_print("num moves calls: ", self.num_moves_calls)
+        debug_print("num moves jump:", self.num_moves_jump)
+        debug_print("num moves spills:", self.num_spills)
+        debug_print("num moves spills to existing:", self.num_spills_to_existing)
+        debug_print("num moves register reloads:", self.num_reloads)
+        debug_stop("jit-regalloc-stats")
         self.patch_pending_failure_recoveries(rawstart)
         # patch the jump from original guard
         self.patch_jump_for_descr(faildescr, rawstart + startpos)
@@ -1291,10 +1342,12 @@
                                      result_loc, result_type,
                                      result_size)
         cb.emit()
+        self.num_moves_calls += cb.num_moves
 
     def simple_call_no_collect(self, fnloc, arglocs):
         cb = callbuilder.CallBuilder(self, fnloc, arglocs)
         cb.emit_no_collect()
+        self.num_moves_calls += cb.num_moves
 
     def _reload_frame_if_necessary(self, mc, shadowstack_reg=None):
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
@@ -2144,6 +2197,7 @@
                 cb.emit()
             else:
                 cb.emit_no_collect()
+        self.num_moves_calls += cb.num_moves
 
     def _store_force_index(self, guard_op):
         assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
@@ -2422,6 +2476,16 @@
             self.mc.JMP(imm(target))
 
     def label(self):
+        self.preamble_num_moves_calls += self.num_moves_calls
+        self.preamble_num_moves_jump += self.num_moves_jump
+        self.preamble_num_spills += self.num_spills
+        self.preamble_num_spills_to_existing += self.num_spills_to_existing
+        self.preamble_num_reloads += self.num_reloads
+        self.num_moves_calls = 0
+        self.num_moves_jump = 0
+        self.num_spills = 0
+        self.num_spills_to_existing = 0
+        self.num_reloads = 0
         self._check_frame_depth_debug(self.mc)
 
     class CondCallSlowPath(codebuf.SlowPath):
diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -632,25 +632,30 @@
         self.subtract_esp_aligned(on_stack - self.stack_max)
 
         # Handle register arguments: first remap the xmm arguments
-        remap_frame_layout(self.asm, xmm_src_locs, xmm_dst_locs,
-                           X86_64_XMM_SCRATCH_REG)
+        num_moves = remap_frame_layout(self.asm, xmm_src_locs, xmm_dst_locs,
+                                       X86_64_XMM_SCRATCH_REG)
         # Load the singlefloat arguments from main regs or stack to xmm regs
         if singlefloats is not None:
             for src, dst in singlefloats:
                 if isinstance(dst, RawEspLoc):
                     # XXX too much special logic
                     if isinstance(src, RawEbpLoc):
+                        num_moves += 2
                         self.mc.MOV32(X86_64_SCRATCH_REG, src)
                         self.mc.MOV32(dst, X86_64_SCRATCH_REG)
                     else:
+                        num_moves += 1
                         self.mc.MOV32(dst, src)
                     continue
                 if isinstance(src, ImmedLoc):
+                    num_moves += 1
                     self.mc.MOV(X86_64_SCRATCH_REG, src)
                     src = X86_64_SCRATCH_REG
+                num_moves += 1
                 self.mc.MOVD32(dst, src)
         # Finally remap the arguments in the main regs
-        remap_frame_layout(self.asm, src_locs, dst_locs, X86_64_SCRATCH_REG)
+        num_moves += remap_frame_layout(self.asm, src_locs, dst_locs, X86_64_SCRATCH_REG)
+        self.num_moves = num_moves
 
 
     def emit_raw_call(self):
diff --git a/rpython/jit/backend/x86/jump.py b/rpython/jit/backend/x86/jump.py
--- a/rpython/jit/backend/x86/jump.py
+++ b/rpython/jit/backend/x86/jump.py
@@ -6,6 +6,7 @@
     pending_dests = len(dst_locations)
     srccount = {}    # maps dst_locations to how many times the same
                      # location appears in src_locations
+    num_moves = 0
     for dst in dst_locations:
         key = dst._getregkey()
         assert key not in srccount, "duplicate value in dst_locations!"
@@ -39,6 +40,7 @@
                     if key in srccount:
                         srccount[key] -= 1
                 _move(assembler, src, dst, tmpreg)
+                num_moves += 1
                 progress = True
         if not progress:
             # we are left with only pure disjoint cycles
@@ -53,6 +55,7 @@
                 originalkey = dst._getregkey()
                 if srccount[originalkey] >= 0:
                     assembler.regalloc_push(dst)
+                    num_moves += 1
                     while True:
                         key = dst._getregkey()
                         assert srccount[key] == 1
@@ -63,9 +66,12 @@
                         if src._getregkey() == originalkey:
                             break
                         _move(assembler, src, dst, tmpreg)
+                        num_moves += 1
                         dst = src
                     assembler.regalloc_pop(dst)
+                    num_moves += 1
             assert pending_dests == 0
+    return num_moves
 
 def _move(assembler, src, dst, tmpreg):
     if dst.is_memory_reference() and src.is_memory_reference():
@@ -93,6 +99,7 @@
         dst_keys[loc._getregkey()] = None
     src_locations2red = []
     dst_locations2red = []
+    num_moves = 0
     for i in range(len(src_locations2)):
         loc    = src_locations2[i]
         dstloc = dst_locations2[i]
@@ -100,6 +107,7 @@
             key = loc._getregkey()
             if (key in dst_keys or (loc.get_width() > WORD and
                                     (key + WORD) in dst_keys)):
+                num_moves += 1
                 assembler.regalloc_push(loc)
                 extrapushes.append(dstloc)
                 continue
@@ -109,12 +117,14 @@
     dst_locations2 = dst_locations2red
     #
     # remap the integer and pointer registers and stack locations
-    remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
+    num_moves += remap_frame_layout(assembler, src_locations1, dst_locations1, tmpreg1)
     #
     # remap the xmm registers and stack locations
-    remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
+    num_moves += remap_frame_layout(assembler, src_locations2, dst_locations2, tmpreg2)
     #
     # finally, pop the extra xmm stack locations
     while len(extrapushes) > 0:
         loc = extrapushes.pop()
         assembler.regalloc_pop(loc)
+        num_moves += 1
+    return num_moves
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1365,11 +1365,12 @@
             tmpreg = None
             xmmtmp = None
         # Do the remapping
-        remap_frame_layout_mixed(assembler,
+        num_moves = remap_frame_layout_mixed(assembler,
                                  src_locations1, dst_locations1, tmpreg,
                                  src_locations2, dst_locations2, xmmtmp)
         self.possibly_free_vars_for_op(op)
         assembler.closing_jump(self.jump_target_descr)
+        assembler.num_moves_jump += num_moves
 
     def consider_enter_portal_frame(self, op):
         self.assembler.enter_portal_frame(op)


More information about the pypy-commit mailing list