[pypy-commit] pypy better-storesink: don't optimize array fields of virtualizables

cfbolz pypy.commits at gmail.com
Fri Sep 16 17:13:35 EDT 2016


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: better-storesink
Changeset: r87174:6efe86960ad3
Date: 2016-09-16 22:49 +0200
http://bitbucket.org/pypy/pypy/changeset/6efe86960ad3/

Log:	don't optimize array fields of virtualizables

	(otherwise they can end up being passed around too much)

diff --git a/rpython/translator/backendopt/cse.py b/rpython/translator/backendopt/cse.py
--- a/rpython/translator/backendopt/cse.py
+++ b/rpython/translator/backendopt/cse.py
@@ -255,11 +255,27 @@
                 self._clear_heapcache_for(concretetype, fieldname)
                 self.heapcache[target, concretetype, fieldname] = op.args[2]
                 continue
+            if op.opname == "jit_force_virtualizable":
+                T = op.args[0].concretetype
+                FIELD = getattr(T.TO, op.args[1].value)
+                if hasattr(FIELD, 'TO') and isinstance(FIELD.TO, lltype.GcArray):
+                    # clear the cache for the virtualizable array fields, as
+                    # they run the risk of being passed around too much
+                    self._clear_heapcache_for_effects(
+                        {('struct', T, op.args[1].value)})
+
             if has_side_effects(op):
                 self._clear_heapcache_for_effects_of_op(op)
                 continue
 
             # foldable operations
+            if op.opname == "cast_pointer":
+                # cast_pointer is a pretty strange operation! it introduces
+                # more aliases, that confuse the CSE pass. Therefore we unify
+                # the two variables in new_unions, to improve the folding.
+                self.new_unions.union(op.args[0], op.result)
+                # don't do anything further
+                continue
             if not can_fold(op):
                 continue
             key = (op.opname, op.result.concretetype,
@@ -270,11 +286,6 @@
                 added_same_as += 1
             else:
                 self.purecache[key] = op.result
-            if op.opname == "cast_pointer":
-                # cast_pointer is a pretty strange operation! it introduces
-                # more aliases, that confuse the CSE pass. Therefore we unify
-                # the two variables in new_unions, to improve the folding.
-                self.new_unions.union(op.args[0], op.result)
         return added_same_as
 
 def _merge(tuples, variable_families, analyzer, loop_blocks, backedges):
diff --git a/rpython/translator/backendopt/stat.py b/rpython/translator/backendopt/stat.py
--- a/rpython/translator/backendopt/stat.py
+++ b/rpython/translator/backendopt/stat.py
@@ -1,65 +1,99 @@
 from rpython.translator.simplify import get_graph
 from hashlib import md5
+from collections import defaultdict
+
+def find_reachable_graphs(graph, translator, ignore_stack_checks=False):
+    seen_graphs = set()
+    stack = [graph]
+    while stack:
+        graph = stack.pop()
+        if graph in seen_graphs:
+            continue
+        seen_graphs.add(graph)
+        yield graph
+        for block, op in graph.iterblockops():
+            if op.opname == "direct_call":
+                called_graph = get_graph(op.args[0], translator)
+                if called_graph is not None and ignore_stack_checks:
+                    if called_graph.name.startswith('ll_stack_check'):
+                        continue
+                if called_graph is not None:
+                    stack.append(called_graph)
+            elif op.opname == "indirect_call":
+                called_graphs = op.args[-1].value
+                if called_graphs is not None:
+                    stack.extend(called_graphs)
+
 
 def get_statistics(graph, translator, save_per_graph_details=None, ignore_stack_checks=False):
-    seen_graphs = {}
-    stack = [graph]
     num_graphs = 0
     num_blocks = 0
     num_ops = 0
     num_mallocs = 0
+    num_memory = 0
     per_graph = {}
-    while stack:
-        graph = stack.pop()
-        if graph in seen_graphs:
-            continue
-        seen_graphs[graph] = True
+    for graph in find_reachable_graphs(graph, translator, ignore_stack_checks):
         num_graphs += 1
         old_num_blocks = num_blocks
         old_num_ops = num_ops
         old_num_mallocs = num_mallocs
+        old_num_memory = num_memory
         for block in graph.iterblocks():
             num_blocks += 1
             for op in block.operations:
-                if op.opname == "direct_call":
-                    called_graph = get_graph(op.args[0], translator)
-                    if called_graph is not None and ignore_stack_checks:
-                        if called_graph.name.startswith('ll_stack_check'):
-                            continue
-                    if called_graph is not None:
-                        stack.append(called_graph)
-                elif op.opname == "indirect_call":
-                    called_graphs = op.args[-1].value
-                    if called_graphs is not None:
-                        stack.extend(called_graphs)
-                elif op.opname.startswith("malloc"):
+                if op.opname.startswith("malloc"):
                     num_mallocs += 1
+                elif op.opname.startswith(("get", "set")):
+                    num_memory += 1
                 num_ops += 1
-        per_graph[graph] = (num_blocks-old_num_blocks, num_ops-old_num_ops, num_mallocs-old_num_mallocs)
+        per_graph[graph] = (num_blocks-old_num_blocks, num_ops-old_num_ops, num_mallocs-old_num_mallocs, num_memory-old_num_memory)
     if save_per_graph_details:
         details = []
-        for graph, (nblocks, nops, nmallocs) in per_graph.iteritems():
+        for graph, (nblocks, nops, nmallocs, nmemory) in per_graph.iteritems():
             try:
                 code = graph.func.func_code.co_code
             except AttributeError:
                 code = "None"
             hash = md5(code).hexdigest()
-            details.append((hash, graph.name, nblocks, nops, nmallocs))
+            details.append((hash, graph.name, nblocks, nops, nmallocs, nmemory))
         details.sort()
         f = open(save_per_graph_details, "w")
         try:
-            for hash, name, nblocks, nops, nmallocs in details:
-                print >>f, hash, name, nblocks, nops, nmallocs
+            for hash, name, nblocks, nops, nmallocs, nmemory in details:
+                print >>f, hash, name, nblocks, nops, nmallocs, nmemory
         finally:
             f.close()
-    return num_graphs, num_blocks, num_ops, num_mallocs
+    return num_graphs, num_blocks, num_ops, num_mallocs, num_memory
 
 def print_statistics(graph, translator, save_per_graph_details=None, ignore_stack_checks=False):
-    num_graphs, num_blocks, num_ops, num_mallocs = get_statistics(
+    num_graphs, num_blocks, num_ops, num_mallocs, num_memory = get_statistics(
             graph, translator, save_per_graph_details,
             ignore_stack_checks=ignore_stack_checks)
     print ("Statistics:\nnumber of graphs %s\n"
            "number of blocks %s\n"
            "number of operations %s\n"
            "number of mallocs %s\n"
-           ) % (num_graphs, num_blocks, num_ops, num_mallocs)
+           "number of memory operations %s\n"
+           ) % (num_graphs, num_blocks, num_ops, num_mallocs, num_memory)
+    calls = defaultdict(int)
+    opnames = defaultdict(int)
+    for graph in find_reachable_graphs(graph, translator):
+        for block, op in graph.iterblockops():
+            opnames[op.opname] += 1
+            if op.opname == "direct_call":
+                called_graph = get_graph(op.args[0], translator)
+                if called_graph is not None and ignore_stack_checks:
+                    if called_graph.name.startswith('ll_stack_check'):
+                        continue
+                if called_graph is not None:
+                    calls[called_graph] += 1
+            elif op.opname == "indirect_call":
+                called_graphs = op.args[-1].value
+                if called_graphs is not None:
+                    for called_graph in called_graphs:
+                        calls[called_graph] += 1
+    for num, name in sorted((num, name) for (name, num) in opnames.iteritems()):
+        print name, num
+    print
+    for num, graph in sorted((num, graph) for (graph, num) in calls.iteritems())[-100:]:
+        print graph.name, num
diff --git a/rpython/translator/backendopt/test/test_cse.py b/rpython/translator/backendopt/test/test_cse.py
--- a/rpython/translator/backendopt/test/test_cse.py
+++ b/rpython/translator/backendopt/test/test_cse.py
@@ -465,6 +465,28 @@
                 return cls.user_overridden_class
         self.check(f, [int], getfield=0)
 
+    def test_dont_fold_virtualizable(self):
+        class A(object):
+            _virtualizable_ = ["x[*]", "y"]
+
+        a1 = A()
+        a1.x = [1, 2, 3]
+        a1.y = 2
+        a2 = A()
+        a2.x = [65, 4, 3]
+        a2.y = 8
+        def f(i):
+            if i:
+                a = a1
+            else:
+                a = a2
+            res = a.x[0]
+            res += a.y
+            if i == 10:
+                res += a.x[1]
+                res += a.y
+            return res
+        self.check(f, [int], getfield=3)
 
 
 def fakevar(name='v'):


More information about the pypy-commit mailing list