[pypy-commit] pypy better-storesink: document and cleanup stuff
cfbolz
pypy.commits at gmail.com
Sun Nov 20 16:53:51 EST 2016
Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: better-storesink
Changeset: r88500:94adb0d43fce
Date: 2016-11-20 22:45 +0100
http://bitbucket.org/pypy/pypy/changeset/94adb0d43fce/
Log: document and cleanup stuff
diff --git a/rpython/translator/backendopt/cse.py b/rpython/translator/backendopt/cse.py
--- a/rpython/translator/backendopt/cse.py
+++ b/rpython/translator/backendopt/cse.py
@@ -1,3 +1,9 @@
+""" A very simple common subexpression elimination pass. It's a very simple
+forward pass, that simply eliminates operations that were executed in all paths
+leading to the current block. Information flows strictly forward, using a cache
+of already seen operations. Caches are merged at control flow merges.
+
+No loop invariant code motion occurs (yet). """
import collections
from rpython.translator.backendopt import support
@@ -40,6 +46,7 @@
heapcache = {}
if new_unions is None:
new_unions = unionfind.UnionFind()
+ # (opname, concretetype of result, args) -> previous (life) result
self.purecache = purecache
self.heapcache = heapcache
self.variable_families = variable_families
@@ -61,6 +68,8 @@
# the *cache dictionaries, never to actually put any new variable into
# the graph, because the concretetypes can change when calling
# _var_rep.
+ if not isinstance(var, Variable):
+ return var
var = self.new_unions.find_rep(var)
return self.variable_families.find_rep(var)
@@ -71,6 +80,9 @@
return (opname, concretetype, tuple(listargs))
def _find_new_res(self, results):
+ """ merges a list of results into a new variable. If all the results
+ are the same, just use that, in which case it's not necessary to pass
+ it along any links either. """
# helper function for _merge_results
first = self._var_rep(results[0])
newres = None
@@ -101,17 +113,27 @@
backedge.args.append(newres)
return newres
- def merge(self, firstlink, tuples, backedges):
+ def _merge(self, firstlink, tuples, backedges):
+ """ The core algorithm of merging: actually merge many caches. """
purecache = {}
block = firstlink.target
- # copy all operations that exist in *all* blocks over. need to add a new
- # inputarg if the result is really a variable
+ # copy all operations that exist in *all* blocks over.
# note that a backedge is not a problem for regular pure operations:
# since the argument is a phi node iff it is not loop invariant,
# copying things over is always save (yay SSA form!)
- # try non-straight merges
+ # try non-straight merges: they are merges where the operands are
+ # different in the previous blocks, but where the arguments themselves
+ # are merged into a new variable in the target block
+ # this is code like this:
+ # if <cond>
+ # x = i + 1
+ # a = i
+ # else:
+ # y = j + 1
+ # a = j
+ # here, a + 1 is redundant, and can be replaced by the merge of x and y
for argindex in range(len(block.inputargs)):
inputarg = block.inputargs[argindex]
# bit slow, but probably ok
@@ -136,8 +158,9 @@
newres = self._merge_results(tuples, results, backedges)
purecache[newkey] = newres
+ # the simple case: the operation is really performed on the *same*
+ # operands. This is the case if the key exists in all other caches
for key, res in self.purecache.iteritems():
- # "straight" merge: the variable is in all other caches
results = [res]
for link, cache in tuples[1:]:
val = cache.purecache.get(key, None)
@@ -228,10 +251,7 @@
self.new_unions.union(res, op.result)
def cse_block(self, block):
- def representative_arg(arg):
- if isinstance(arg, Variable):
- return self._var_rep(arg)
- return arg
+ """ perform common subexpression elimination on block. """
added_same_as = 0
for opindex in range(len(block.operations) - block.canraise):
op = block.operations[opindex]
@@ -239,7 +259,7 @@
if op.opname == 'getfield':
fieldname = op.args[1].value
concretetype = op.args[0].concretetype
- arg0 = representative_arg(op.args[0])
+ arg0 = self._var_rep(op.args[0])
key = (arg0, op.args[0].concretetype, fieldname)
res = self.heapcache.get(key, None)
if res is not None:
@@ -250,20 +270,20 @@
continue
if op.opname == 'setfield':
concretetype = op.args[0].concretetype
- target = representative_arg(op.args[0])
+ target = self._var_rep(op.args[0])
fieldname = op.args[1].value
key = (target, concretetype, fieldname)
res = self.heapcache.get(key, None)
if (res is not None and
- representative_arg(res) ==
- representative_arg(op.args[2])):
+ self._var_rep(res) ==
+ self._var_rep(op.args[2])):
# writing the same value that's already there
op.opname = "same_as"
op.args = [Constant("not needed setfield", lltype.Void)]
added_same_as += 1
continue
self._clear_heapcache_for(concretetype, fieldname)
- self.heapcache[target, concretetype, fieldname] = op.args[2]
+ self.heapcache[key] = op.args[2]
continue
if op.opname == "jit_force_virtualizable":
T = op.args[0].concretetype
@@ -276,7 +296,7 @@
if op.opname == "malloc_varsize":
# we can remember the size of the malloced object
key = ("getarraysize", lltype.Signed,
- (representative_arg(op.result), ))
+ (self._var_rep(op.result), ))
self.purecache[key] = op.args[2]
can_fold_op = can_fold(op)
@@ -289,12 +309,12 @@
# can't hash pointers, so use the graph directly
key = ("direct_call", op.result.concretetype,
(funcobj.graph, ) +
- tuple([representative_arg(arg)
+ tuple([self._var_rep(arg)
for arg in op.args[1:]]))
can_fold_op = True
elif can_fold_op:
key = (op.opname, op.result.concretetype,
- tuple([representative_arg(arg) for arg in op.args]))
+ tuple([self._var_rep(arg) for arg in op.args]))
if has_side_effects_op:
@@ -318,21 +338,26 @@
self.purecache[key] = op.result
return added_same_as
-def _merge(tuples, variable_families, analyzer, loop_blocks, backedges):
- if not tuples:
- return Cache(variable_families, analyzer)
- if len(tuples) == 1:
- (link, cache), = tuples
- result = cache.copy()
- else:
- firstlink, firstcache = tuples[0]
- result = firstcache.merge(firstlink, tuples, backedges)
- if loop_blocks:
- # for all blocks in the loop, clean the heapcache for their effects
- # that way, loop-invariant reads can be removed, if no one writes to
- # anything that can alias with them.
- result._clear_heapcache_for_loop_blocks(loop_blocks)
- return result
+ @staticmethod
+ def merge(tuples, variable_families, analyzer, loop_blocks, backedges):
+ """ merge list of
+ (incoming link, cache in that link's prevblock)
+ tuples into one new cache that holds in the target block.
+ """
+ if not tuples:
+ return Cache(variable_families, analyzer)
+ if len(tuples) == 1:
+ (link, cache), = tuples
+ result = cache.copy()
+ else:
+ firstlink, firstcache = tuples[0]
+ result = firstcache._merge(firstlink, tuples, backedges)
+ if loop_blocks:
+ # for all blocks in the loop, clean the heapcache for their effects
+ # that way, loop-invariant reads can be removed, if no one writes to
+ # anything that can alias with them.
+ result._clear_heapcache_for_loop_blocks(loop_blocks)
+ return result
def compute_reachability_no_backedges(graph, backedges):
reachable = {}
@@ -374,25 +399,27 @@
result[block] = loop_blocks
return result
- loop_blocks = support.find_loop_blocks(graph)
- result = {}
- for loop_block, start in loop_blocks.iteritems():
- result.setdefault(start, []).append(loop_block)
- return result
-
class CSE(object):
def __init__(self, translator):
self.translator = translator
self.analyzer = WriteAnalyzer(translator)
def transform(self, graph):
+ """ Perform CSE on graph. """
+ # this optimization really works on SSA graphs. don't transform the
+ # graph, but use the data flow analysis of SSA to figure out which SSI
+ # variables would be the same SSA variable
variable_families = ssa.DataFlowFamilyBuilder(graph).get_variable_families()
entrymap = mkentrymap(graph)
backedges = support.find_backedges(graph)
loops = loop_blocks(graph, backedges, entrymap)
todo = collections.deque([graph.startblock])
+
+ # map blocks to a list of
+ # (incoming link, cache in that link's prevblock)
caches_to_merge = collections.defaultdict(list)
done = set()
+ enqueued = {graph.startblock}
added_same_as = 0
@@ -404,21 +431,24 @@
if link in backedges]
if not block.is_final_block():
- cache = _merge(
+ cache = Cache.merge(
caches_to_merge[block], variable_families, self.analyzer,
loops.get(block, None), current_backedges)
added_same_as += cache.cse_block(block)
else:
- cache = Cache(variable_families, self.analyzer)
+ cache = None
done.add(block)
- # add all target blocks where all predecessors are already done
+ # add all target blocks where all predecessors are already done,
+ # or are backedges
for exit in block.exits:
for lnk in entrymap[exit.target]:
if lnk.prevblock not in done and lnk not in backedges:
break
else:
- if exit.target not in done and exit.target not in todo: # XXX
+ if exit.target not in done and exit.target not in enqueued:
todo.append(exit.target)
+ enqueued.add(exit.target)
+ assert cache is not None # final blocks don't have exits
caches_to_merge[exit.target].append((exit, cache))
simplify.transform_dead_op_vars(graph)
if added_same_as:
More information about the pypy-commit
mailing list