[pypy-commit] pypy better-storesink: experiment with a more general common subexpression elimination.

cfbolz pypy.commits at gmail.com
Fri Sep 16 17:12:46 EDT 2016


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: better-storesink
Changeset: r87150:c204353fe193
Date: 2016-08-22 13:47 +0100
http://bitbucket.org/pypy/pypy/changeset/c204353fe193/

Log:	experiment with a more general common subexpression elimination.

	The algorithm is a simple forward pass, merging information at
	control flow merges. It subsumes storesink.py and
	remove_duplicate_casts

diff --git a/rpython/translator/backendopt/cse.py b/rpython/translator/backendopt/cse.py
new file mode 100644
--- /dev/null
+++ b/rpython/translator/backendopt/cse.py
@@ -0,0 +1,178 @@
+import collections
+
+from rpython.translator.backendopt import support
+from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.flowspace.model import mkentrymap, Variable
+from rpython.translator.backendopt import removenoops
+from rpython.translator import simplify
+from rpython.translator.backendopt import ssa
+
+def has_side_effects(op):
+    if op.opname == 'debug_assert' or op.opname == 'jit_force_virtualizable':
+        return False
+    try:
+        return getattr(llop, op.opname).sideeffects
+    except AttributeError:
+        return True
+
+def cse_graph(graph):
+    return CSE([graph]).transform(graph)
+
+def can_fold(op):
+    return getattr(llop, op.opname).canfold
+
+class Cache(object):
+    def __init__(self, variable_families, purecache=None, heapcache=None):
+        if purecache is None:
+            purecache = {}
+        if heapcache is None:
+            heapcache = {}
+        self.purecache = purecache
+        self.heapcache = heapcache
+        self.variable_families = variable_families
+
+    def copy(self):
+        return Cache(
+                self.variable_families, self.purecache.copy(),
+                self.heapcache.copy())
+
+
+    def merge(self, firstlink, tuples):
+        purecache = {}
+        block = firstlink.target
+        # copy all operations that exist in *all* blocks over. need to add a new
+        # inputarg if the result is really a variable
+        for key, res in self.purecache.iteritems():
+            for link, cache in tuples[1:]:
+                val = cache.purecache.get(key, None)
+                if val is None:
+                    break
+            else:
+                newres = res
+                if isinstance(res, Variable):
+                    newres = res.copy()
+                    for link, cache in tuples:
+                        link.args.append(cache.purecache[key])
+                    block.inputargs.append(newres)
+                purecache[key] = newres
+
+        # merge heapcache
+        heapcache = {}
+        for key, res in self.heapcache.iteritems():
+            for link, cache in tuples[1:]:
+                val = cache.heapcache.get(key, None)
+                if val is None:
+                    break
+            else:
+                newres = res
+                if isinstance(res, Variable):
+                    newres = res.copy()
+                    for link, cache in tuples:
+                        link.args.append(cache.heapcache[key])
+                    block.inputargs.append(newres)
+                heapcache[key] = newres
+
+        return Cache(self.variable_families, purecache, heapcache)
+
+    def _clear_heapcache_for(self, concretetype, fieldname):
+        for k in self.heapcache.keys():
+            if k[0].concretetype == concretetype and k[1] == fieldname:
+                del self.heapcache[k]
+
+    def cse_block(self, block):
+        def representative_arg(arg):
+            if isinstance(arg, Variable):
+                return self.variable_families.find_rep(arg)
+            return arg
+        added_some_same_as = False
+        for op in block.operations:
+            # heap operations
+            if op.opname == 'getfield':
+                tup = (representative_arg(op.args[0]), op.args[1].value)
+                res = self.heapcache.get(tup, None)
+                if res is not None:
+                    op.opname = 'same_as'
+                    op.args = [res]
+                    added_some_same_as = True
+                else:
+                    self.heapcache[tup] = op.result
+                continue
+            if op.opname in ('setarrayitem', 'setinteriorfield', "malloc", "malloc_varsize"):
+                continue
+            if op.opname == 'setfield':
+                target = representative_arg(op.args[0])
+                field = op.args[1].value
+                self._clear_heapcache_for(target.concretetype, field)
+                self.heapcache[target, field] = op.args[2]
+                continue
+            if has_side_effects(op):
+                self.heapcache.clear()
+                continue
+
+            # foldable operations
+            if not can_fold(op):
+                continue
+            key = (op.opname, op.result.concretetype,
+                   tuple([representative_arg(arg) for arg in op.args]))
+            res = self.purecache.get(key, None)
+            if res is not None:
+                op.opname = 'same_as'
+                op.args = [res]
+                added_some_same_as = True
+                self.variable_families.union(res, op.result)
+            else:
+                self.purecache[key] = op.result
+        return added_some_same_as
+
+def _merge(tuples, variable_families):
+    if not tuples:
+        return Cache(variable_families)
+    if len(tuples) == 1:
+        (link, cache), = tuples
+        return cache.copy()
+    firstlink, firstcache = tuples[0]
+    return firstcache.merge(firstlink, tuples)
+
+class CSE(object):
+    def __init__(self, translator):
+        self.translator = translator
+
+    def transform(self, graph):
+        variable_families = ssa.DataFlowFamilyBuilder(graph).get_variable_families()
+        entrymap = mkentrymap(graph)
+        backedges = support.find_backedges(graph)
+        todo = collections.deque([graph.startblock])
+        caches_to_merge = collections.defaultdict(list)
+        done = set()
+
+        added_some_same_as = False
+
+        while todo:
+            block = todo.popleft()
+            can_cache = True
+            for link in entrymap[block]:
+                if link in backedges:
+                    can_cache = False
+
+            if block.operations:
+                if not can_cache:
+                    cache = Cache(variable_families)
+                else:
+                    cache = _merge(caches_to_merge[block], variable_families)
+                changed_block = cache.cse_block(block)
+                added_some_same_as = changed_block or added_some_same_as
+            done.add(block)
+            # add all target blocks where all predecessors are already done
+            for exit in block.exits:
+                for lnk in entrymap[exit.target]:
+                    if lnk.prevblock not in done and lnk not in backedges:
+                        break
+                else:
+                    if exit.target not in done:
+                        todo.append(exit.target)
+                caches_to_merge[exit.target].append((exit, cache))
+        if added_some_same_as:
+            ssa.SSA_to_SSI(graph)
+            removenoops.remove_same_as(graph)
+        simplify.transform_dead_op_vars(graph)
+
diff --git a/rpython/translator/backendopt/test/test_cse.py b/rpython/translator/backendopt/test/test_cse.py
new file mode 100644
--- /dev/null
+++ b/rpython/translator/backendopt/test/test_cse.py
@@ -0,0 +1,214 @@
+import pytest
+from rpython.translator.translator import TranslationContext, graphof
+from rpython.translator.backendopt.cse import CSE
+from rpython.translator.backendopt import removenoops
+from rpython.flowspace.model import checkgraph, summary
+from rpython.conftest import option
+
+class TestStoreSink(object):
+    def translate(self, func, argtypes):
+        t = TranslationContext()
+        t.buildannotator().build_types(func, argtypes)
+        t.buildrtyper().specialize()
+        return t
+
+    def check(self, f, argtypes, **expected):
+        from rpython.translator.backendopt import inline, all, constfold
+        t = self.translate(f, argtypes)
+        getfields = 0
+        graph = graphof(t, f)
+        if option.view:
+            t.view()
+        removenoops.remove_same_as(graph)
+        checkgraph(graph)
+        cse = CSE(t)
+        cse.transform(graph)
+        if option.view:
+            t.view()
+        checkgraph(graph)
+        s = summary(graph)
+        for key, val in expected.items():
+            assert s.get(key, 0) == val
+        assert "same_as" not in s
+
+    def test_infrastructure(self):
+        def f(i):
+            x = (i + 1) * (i + 1)
+            y = (i + 1) * (i + 1)
+            return x - y
+
+        self.check(f, [int], int_add=1, int_mul=1)
+
+    def test_split(self):
+        def f(i, j):
+            k = i + 1
+            if j:
+                return i + 1
+            return k * (i + 1)
+
+        self.check(f, [int, int], int_add=1)
+
+    def test_merge(self):
+        def f(i, j):
+            if j:
+                k = i + 1
+                j = 1
+            else:
+                j = i + 1
+                k = 5
+            return k * j * (i + 1)
+
+        # an add in each branch, but not the final block
+        self.check(f, [int, int], int_add=2)
+
+    def test_optimize_across_merge(self):
+        def f(i, j):
+            k = i + 1
+            if j:
+                j = 1
+            else:
+                j = i + 1
+            return k * j * (i + 1)
+        self.check(f, [int, int], int_add=1)
+
+
+    def test_getfield(self):
+        class A(object):
+            pass
+
+        def f(i):
+            a = A()
+            a.x = i
+            return a.x + a.x
+
+        self.check(f, [int], getfield=0)
+
+    def test_irrelevant_setfield(self):
+        class A(object):
+            pass
+
+        def f(i):
+            a = A()
+            a.x = i
+            one = a.x
+            a.y = 3
+            two = a.x
+            return one + two
+
+        self.check(f, [int], getfield=0)
+
+    def test_relevant_setfield(self):
+        class A(object):
+            pass
+
+        def f(i):
+            a = A()
+            b = A()
+            a.x = i
+            b.x = i + 1
+            one = a.x
+            b.x = i
+            two = a.x
+            return one + two
+
+        self.check(f, [int], getfield=2)
+
+    def test_different_concretetype(self):
+        class A(object):
+            pass
+
+        class B(object):
+            pass
+
+        def f(i):
+            a = A()
+            b = B()
+            a.x = i
+            one = a.x
+            b.x = i + 1
+            two = a.x
+            return one + two
+
+        self.check(f, [int], getfield=0)
+
+    def test_subclass(self):
+        class A(object):
+            pass
+
+        class B(A):
+            pass
+
+        def f(i):
+            a = A()
+            b = B()
+            a.x = i
+            one = a.x
+            b.x = i + 1
+            two = a.x
+            return one + two
+
+        self.check(f, [int], getfield=1)
+
+    def test_bug_1(self):
+        class A(object):
+            pass
+
+        def f(i):
+            a = A()
+            a.cond = i > 0
+            n = a.cond
+            if a.cond:
+                return True
+            return n
+
+        self.check(f, [int], getfield=0)
+
+
+    def test_cfg_splits_getfield(self):
+        class A(object):
+            pass
+
+        def f(i):
+            a = A()
+            j = i
+            for i in range(i):
+                a.x = i
+                if i:
+                    j = a.x + a.x
+                else:
+                    j = a.x * 5
+            return j
+
+        self.check(f, [int], getfield=0)
+
+    def test_malloc_does_not_invalidate(self):
+        class A(object):
+            pass
+        class B(object):
+            pass
+
+        def f(i):
+            a = A()
+            a.x = i
+            b = B()
+            return a.x
+
+        self.check(f, [int], getfield=0)
+
+    def test_merge_heapcache(self):
+        class A(object):
+            pass
+
+        def f(i):
+            a = A()
+            j = i
+            for i in range(i):
+                a.x = i
+                if i:
+                    j = a.x + a.x
+                else:
+                    j = a.x * 5
+                j += a.x
+            return j
+
+        self.check(f, [int], getfield=0)


More information about the pypy-commit mailing list