[pypy-commit] pypy stm-gc: A tracker that attempts to follow globally where GC pointers go.

arigo noreply at buildbot.pypy.org
Thu Feb 16 12:24:38 CET 2012


Author: Armin Rigo <arigo at tunes.org>
Branch: stm-gc
Changeset: r52545:ac10a97ed5c2
Date: 2012-02-16 12:14 +0100
http://bitbucket.org/pypy/pypy/changeset/ac10a97ed5c2/

Log:	A tracker that attempts to follow globally where GC pointers go.

diff --git a/pypy/translator/stm/gcsource.py b/pypy/translator/stm/gcsource.py
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/gcsource.py
@@ -0,0 +1,106 @@
+from pypy.objspace.flow.model import Variable
+from pypy.rpython.lltypesystem import lltype
+from pypy.translator.simplify import get_graph
+
+
+COPIES_POINTER = set([
+    'force_cast', 'cast_pointer', 'same_as', 'cast_opaque_ptr',
+    ])
+
+
+def _is_gc(var_or_const):
+    TYPE = var_or_const.concretetype
+    return isinstance(TYPE, lltype.Ptr) and TYPE.TO._gckind == 'gc'
+
+def enum_gc_dependencies(translator):
+    """Enumerate pairs (var-or-const-or-op, var) that together describe
+    the whole control flow of GC pointers in the program.  If the source
+    is a SpaceOperation, it means 'produced by this operation but we can't
+    follow what this operation does'.  If the source is None, it means
+    'coming from somewhere, unsure where'.
+    """
+    # Tracking dependencies of only GC pointers simplifies the logic here.
+    # We don't have to worry about external calls and callbacks.
+    # This works by assuming that each graph's calls are fully tracked
+    # by the last argument to 'indirect_call'.  Graphs for which we don't
+    # find any call like this are assumed to be called 'from the outside'
+    # passing any random arguments to it.
+    resultlist = []
+    was_a_callee = set()
+    #
+    def call(graph, args, result):
+        inputargs = graph.getargs()
+        assert len(args) == len(inputargs)
+        for v1, v2 in zip(args, inputargs):
+            if _is_gc(v2):
+                assert _is_gc(v1)
+                resultlist.append((v1, v2))
+        if _is_gc(result):
+            v = graph.getreturnvar()
+            assert _is_gc(v)
+            resultlist.append((v, result))
+        was_a_callee.add(graph)
+    #
+    for graph in translator.graphs:
+        for block in graph.iterblocks():
+            for op in block.operations:
+                #
+                if op.opname in COPIES_POINTER:
+                    if _is_gc(op.result) and _is_gc(op.args[0]):
+                        resultlist.append((op.args[0], op.result))
+                        continue
+                #
+                if op.opname == 'direct_call':
+                    tograph = get_graph(op.args[0], translator)
+                    if tograph is not None:
+                        call(tograph, op.args[1:], op.result)
+                        continue
+                #
+                if op.opname == 'indirect_call':
+                    tographs = op.args[-1].value
+                    if tographs is not None:
+                        for tograph in tographs:
+                            call(tograph, op.args[1:-1], op.result)
+                        continue
+                #
+                if _is_gc(op.result):
+                    resultlist.append((op, op.result))
+            #
+            for link in block.exits:
+                for v1, v2 in zip(link.args, link.target.inputargs):
+                    if _is_gc(v2):
+                        assert _is_gc(v1)
+                        resultlist.append((v1, v2))
+    #
+    for graph in translator.graphs:
+        if graph not in was_a_callee:
+            for v in graph.getargs():
+                if _is_gc(v):
+                    resultlist.append((None, v))
+    return resultlist
+
+
+class GcSource(object):
+    """Works like a dict {gcptr-var: set-of-sources}.  A source is a
+    Constant, or a SpaceOperation that creates the value, or None which
+    means 'no clue'."""
+
+    def __init__(self, translator):
+        self.translator = translator
+        self._backmapping = {}
+        for v1, v2 in enum_gc_dependencies(translator):
+            self._backmapping.setdefault(v2, []).append(v1)
+
+    def __getitem__(self, variable):
+        result = set()
+        pending = [variable]
+        seen = set(pending)
+        for v2 in pending:
+            for v1 in self._backmapping.get(v2, ()):
+                if isinstance(v1, Variable):
+                    if v1 not in seen:
+                        seen.add(v1)
+                        pending.append(v1)
+                else:
+                    result.add(v1)
+        return result
diff --git a/pypy/translator/stm/test/test_gcsource.py b/pypy/translator/stm/test/test_gcsource.py
new file mode 100644
--- /dev/null
+++ b/pypy/translator/stm/test/test_gcsource.py
@@ -0,0 +1,117 @@
+from pypy.translator.translator import TranslationContext
+from pypy.translator.stm.gcsource import GcSource
+from pypy.objspace.flow.model import SpaceOperation, Constant
+from pypy.rpython.lltypesystem import lltype
+
+
+class X:
+    def __init__(self, n):
+        self.n = n
+
+
+def gcsource(func, sig):
+    t = TranslationContext()
+    t.buildannotator().build_types(func, sig)
+    t.buildrtyper().specialize()
+    gsrc = GcSource(t)
+    return gsrc
+
+def test_simple():
+    def main(n):
+        return X(n)
+    gsrc = gcsource(main, [int])
+    v_result = gsrc.translator.graphs[0].getreturnvar()
+    s = gsrc[v_result]
+    assert len(s) == 1
+    [op] = list(s)
+    assert isinstance(op, SpaceOperation)
+    assert op.opname == 'malloc'
+
+def test_two_sources():
+    foo = X(42)
+    def main(n):
+        if n > 5:
+            return X(n)
+        else:
+            return foo
+    gsrc = gcsource(main, [int])
+    v_result = gsrc.translator.graphs[0].getreturnvar()
+    s = gsrc[v_result]
+    assert len(s) == 2
+    [s1, s2] = list(s)
+    if isinstance(s1, SpaceOperation):
+        s1, s2 = s2, s1
+    assert isinstance(s1, Constant)
+    assert s1.value.inst_n == 42
+    assert isinstance(s2, SpaceOperation)
+    assert s2.opname == 'malloc'
+
+def test_call():
+    def f1(n):
+        return X(n)
+    def main(n):
+        return f1(n)
+    gsrc = gcsource(main, [int])
+    v_result = gsrc.translator.graphs[0].getreturnvar()
+    s = gsrc[v_result]
+    assert len(s) == 1
+    assert list(s)[0].opname == 'malloc'
+
+def test_indirect_call():
+    foo = X(42)
+    def f1(n):
+        return X(n)
+    def f2(n):
+        return foo
+    lst = [f1, f2]
+    def main(n):
+        return lst[n % 2](n)
+    gsrc = gcsource(main, [int])
+    v_result = gsrc.translator.graphs[0].getreturnvar()
+    s = gsrc[v_result]
+    assert len(s) == 2
+    [s1, s2] = list(s)
+    if isinstance(s1, SpaceOperation):
+        s1, s2 = s2, s1
+    assert isinstance(s1, Constant)
+    assert s1.value.inst_n == 42
+    assert isinstance(s2, SpaceOperation)
+    assert s2.opname == 'malloc'
+
+def test_argument():
+    def f1(x):
+        return x
+    def main(n):
+        return f1(X(5))
+    gsrc = gcsource(main, [int])
+    v_result = gsrc.translator.graphs[0].getreturnvar()
+    s = gsrc[v_result]
+    assert len(s) == 1
+    assert list(s)[0].opname == 'malloc'
+
+def test_argument_twice():
+    foo = X(42)
+    def f1(x):
+        return x
+    def main(n):
+        f1(foo)
+        return f1(X(5))
+    gsrc = gcsource(main, [int])
+    v_result = gsrc.translator.graphs[0].getreturnvar()
+    s = gsrc[v_result]
+    assert len(s) == 2
+    [s1, s2] = list(s)
+    if isinstance(s1, SpaceOperation):
+        s1, s2 = s2, s1
+    assert isinstance(s1, Constant)
+    assert s1.value.inst_n == 42
+    assert isinstance(s2, SpaceOperation)
+    assert s2.opname == 'malloc'
+
+def test_unknown_source():
+    def main(x):
+        return x
+    gsrc = gcsource(main, [lltype.Ptr(lltype.GcStruct('S'))])
+    v_result = gsrc.translator.graphs[0].getreturnvar()
+    s = gsrc[v_result]
+    assert list(s) == [None]


More information about the pypy-commit mailing list