[pypy-commit] pypy default: Tweak: 16% speed increase of PyPy annotation, by avoiding the

Sun Nov 19 18:33:13 EST 2017

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93086:60c4fa1b0539
Date: 2017-11-20 00:32 +0100
http://bitbucket.org/pypy/pypy/changeset/60c4fa1b0539/

Log:	Tweak: 16% speed increase of PyPy annotation, by avoiding the
	situation where a block is rescheduled many many times before it is
	finally resolved (e.g. because resolving it requires (re)flowing
	through a very long chain of blocks).

diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -33,7 +33,7 @@
             translator = TranslationContext()
             translator.annotator = self
         self.translator = translator
-        self.pendingblocks = {}  # map {block: graph-containing-it}
+        self.genpendingblocks=[{}] # [{block: graph-containing-it}] * generation
         self.annotated = {}      # set of blocks already seen
         self.added_blocks = None # see processblock() below
         self.links_followed = {} # set of links that have ever been followed
@@ -57,7 +57,7 @@
         self.errors = []
 
     def __getstate__(self):
-        attrs = """translator pendingblocks annotated links_followed
+        attrs = """translator genpendingblocks annotated links_followed
         notify bookkeeper frozen policy added_blocks""".split()
         ret = self.__dict__.copy()
         for key, value in ret.items():
@@ -188,18 +188,39 @@
             else:
                 self.mergeinputargs(graph, block, cells)
             if not self.annotated[block]:
-                self.pendingblocks[block] = graph
+                self.schedulependingblock(graph, block)
+
+    def schedulependingblock(self, graph, block):
+        # 'self.genpendingblocks' is a list of dictionaries which is
+        # logically equivalent to just one dictionary.  But we keep a
+        # 'generation' number on each block (=key), and whenever we
+        # process a block, we increase its generation number.  The
+        # block is added to the 'genpendingblocks' indexed by its
+        # generation number.  See complete_pending_blocks() below.
+        generation = getattr(block, 'generation', 0)
+        self.genpendingblocks[generation][block] = graph
 
     def complete_pending_blocks(self):
-        while self.pendingblocks:
-            # Grab all blocks from 'self.pendingblocks' in a list, and
-            # walk that list.  This prevents a situation where the same
-            # block is added over and over again to 'self.pendingblocks'
-            # and the code here would pop that same block from the dict
-            # over and over again, without ever looking at other blocks.
-            all_blocks = self.pendingblocks.keys()
-            for block in all_blocks:
-                graph = self.pendingblocks.pop(block)
+        while True:
+            # Find the first of the dictionaries in 'self.genpendingblocks'
+            # which is not empty
+            gen = 0
+            for pendingblocks in self.genpendingblocks:
+                if pendingblocks:
+                    break
+                gen += 1
+            else:
+                return    # all empty => done
+
+            gen += 1   # next generation number
+            if len(self.genpendingblocks) == gen:
+                self.genpendingblocks.append({})
+
+            # Process all blocks at this level
+            # (if any gets re-inserted, it will be into the next level)
+            while pendingblocks:
+                block, graph = pendingblocks.popitem()
+                block.generation = gen
                 self.processblock(graph, block)
 
     def complete(self):
@@ -207,7 +228,7 @@
         while True:
             self.complete_pending_blocks()
             self.policy.no_more_blocks_to_annotate(self)
-            if not self.pendingblocks:
+            if not any(self.genpendingblocks):
                 break   # finished
         # make sure that the return variables of all graphs is annotated
         if self.added_blocks is not None:
@@ -393,7 +414,7 @@
     def reflowpendingblock(self, graph, block):
         assert not self.frozen
         assert graph not in self.fixed_graphs
-        self.pendingblocks[block] = graph
+        self.schedulependingblock(graph, block)
         assert block in self.annotated
         self.annotated[block] = False  # must re-flow
         self.blocked_blocks[block] = (graph, None)
diff --git a/rpython/flowspace/model.py b/rpython/flowspace/model.py
--- a/rpython/flowspace/model.py
+++ b/rpython/flowspace/model.py
@@ -170,7 +170,7 @@
 
 class Block(object):
     __slots__ = """inputargs operations exitswitch
-                exits blockcolor""".split()
+                exits blockcolor generation""".split()
 
     def __init__(self, inputargs):
         self.inputargs = list(inputargs)  # mixed list of variable/const XXX