[pypy-svn] r13275 - pypy/dist/pypy/tool

cfbolz at codespeak.net cfbolz at codespeak.net
Fri Jun 10 20:48:19 CEST 2005


Author: cfbolz
Date: Fri Jun 10 20:48:18 2005
New Revision: 13275

Added:
   pypy/dist/pypy/tool/import_graph.py
Log:
added simple tool that creates a module import dependency graph as a
.dot file. At the moment the result is not usable (too many edges), I'm
working on it.


Added: pypy/dist/pypy/tool/import_graph.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/tool/import_graph.py	Fri Jun 10 20:48:18 2005
@@ -0,0 +1,215 @@
+from __future__ import division
+import autopath
+import py
+
+import math
+import random
+import sets
+
+exclude_files = ["__init__.py", "autopath.py", "conftest.py"]
+
+def include_file(path):
+    if ("test" in str(path) or "tool" in str(path) or
+        "documentation" in str(path) or "pyrex" in str(path) or
+        "_cache" in str(path)):
+        return False
+    if path.basename in exclude_files:
+        return False
+    return True
+
+def get_mod_from_path(path):
+    dirs = path.get("dirname")[0].split("/")
+    pypyindex = dirs.index("pypy")
+    return ".".join(dirs[pypyindex:] + path.get("purebasename"))
+
+
+def find_references(path):
+    refs = []
+    for line in path.open("r"):
+        if line.startswith("    "): # ignore local imports to reduce graph size
+            continue
+        if "\\" in line: #ignore line continuations
+            continue
+        line = line.strip()
+        line = line.split("#")[0].strip()
+        if line.startswith("import pypy."): # import pypy.bla.whatever
+            if " as " not in line:
+                refs.append((line[7:].strip(), None))
+            else: # import pypy.bla.whatever as somethingelse
+                assert line.count(" as ") == 1
+                line = line.split(" as ")
+                refs.append((line[0][7:].strip(), line[1].strip()))
+        elif line.startswith("from ") and "pypy" in line: #from pypy.b import a
+            line = line[5:]
+            if " as " not in line:
+                line = line.split(" import ")
+                what = line[1].split(",")
+                for w in what:
+                    refs.append((line[0].strip() + "." + w.strip(), None))
+            else: # prom pypy.b import a as c
+                if line.count(" as ") != 1 or "," in line:
+                    print"can't handle this: " + line
+                    continue
+                line = line.split(" as ")
+                what = line[0].replace(" import ", ".").replace(" ", "")
+                refs.append((what, line[1].strip()))
+    return refs
+
+def get_module(ref, imports):
+    ref = ref.split(".")
+    i = len(ref)
+    while i:
+        possible_mod = ".".join(ref[:i])
+        if possible_mod in imports:
+            return possible_mod
+        i -= 1
+    return None
+
+def casteljeau(points, t):
+    points = points[:]
+    while len(points) > 1:
+        for i in range(len(points) - 1):
+            points[i] = points[i] * (1 - t) + points[i + 1] * t
+        del points[-1]
+    return points[0]
+
+def color(t):
+    points = [0, 0, 1, 0, 0]
+    casteljeau([0, 0, 1, 0, 0], t) / 0.375
+
+class ModuleGraph(object):
+    def __init__(self, path):
+        self.imports = {}
+        self.clusters = {}
+        self.mod_to_cluster = {}
+        for f in path.visit("*.py"):
+            if include_file(f):
+                self.imports[get_mod_from_path(f)] = find_references(f)
+        self.remove_object_refs()
+        self.remove_double_refs()
+        self.incoming = {}
+        for mod in self.imports:
+            self.incoming[mod] = sets.Set()
+        for mod, refs in self.imports.iteritems():
+            for ref in refs:
+                if ref[0] in self.incoming:
+                    self.incoming[ref[0]].add(mod)
+        self.remove_single_nodes()
+        self.topgraph_properties = ["rankdir=LR"]
+
+    def remove_object_refs(self):
+        # reduces cases like import pypy.translator.genc.basetype.CType to
+        # import pypy.translator.genc.basetype
+        for mod, refs in self.imports.iteritems():
+            i = 0
+            while i < len(refs):
+                if refs[i][0] in self.imports:
+                    i += 1
+                else:
+                    nref = get_module(refs[i][0], self.imports)
+                    if nref is None:
+                        print "removing", repr(refs[i])
+                        del refs[i]
+                    else:
+                        refs[i] = (nref, None)
+                        i += 1
+
+    def remove_double_refs(self):
+        # remove several references to the same module
+        for mod, refs in self.imports.iteritems():
+            i = 0
+            seen_refs = sets.Set()
+            while i < len(refs):
+                if refs[i] not in seen_refs:
+                    seen_refs.add(refs[i])
+                    i += 1
+                else:
+                    del refs[i]
+
+    def remove_single_nodes(self):
+        # remove nodes that have no attached edges
+        rem = []
+        for mod, refs in self.imports.iteritems():
+            if len(refs) == 0 and len(self.incoming[mod]) == 0:
+                rem.append(mod)
+        for m in rem:
+            del self.incoming[m]
+            del self.imports[m]
+
+    def create_clusters(self):
+        self.topgraph_properties.append("compound=true;")
+        self.clustered = True
+        hierarchy = [sets.Set() for i in range(6)]
+        for mod in self.imports:
+            for i, d in enumerate(mod.split(".")):
+                hierarchy[i].add(d)
+        for i in range(6):
+            if len(hierarchy[i]) != 1:
+                break
+        for mod in self.imports:
+            cluster = mod.split(".")[i]
+            if i == len(mod.split(".")) - 1:
+                continue
+            if cluster not in self.clusters:
+                self.clusters[cluster] = sets.Set()
+            self.clusters[cluster].add(mod)
+            self.mod_to_cluster[mod] = cluster
+
+    def remove_tangling_randomly(self):
+        # remove edges to nodes that have a lot incoming edges randomly
+        tangled = []
+        for mod, incoming in self.incoming.iteritems():
+            if len(incoming) > 10:
+                tangled.append(mod)
+        for mod in tangled:
+            remove = sets.Set()
+            incoming = self.incoming[mod]
+            while len(remove) < len(incoming) * 0.80:
+                remove.add(random.choice(list(incoming)))
+            for rem in remove:
+                for i in range(len(self.imports[rem])):
+                    if self.imports[rem][i][1] == mod:
+                        break
+                del self.imports[rem][i]
+                incoming.remove(rem)
+                print "removing", mod, "<-", rem
+        self.remove_single_nodes()
+
+    def dotfile(self, dot):
+        f = dot.open("w")
+        f.write("digraph G {\n")
+        for prop in self.topgraph_properties:
+            f.write("\t%s\n" % prop)
+        #write clusters and inter-cluster edges
+        for cluster, nodes in self.clusters.iteritems():
+            f.write("\tsubgraph cluster_%s {\n" % cluster)
+            f.write("\t\tstyle=filled;\n\t\tcolor=lightgrey\n")
+            for node in nodes:
+                f.write('\t\t"%s";\n' % node[5:])
+            for mod, refs in self.imports.iteritems():
+                for ref in refs:
+                    if mod in nodes and ref[0] in nodes:
+                        f.write('\t\t"%s" -> "%s";\n' % (mod[5:], ref[0][5:]))
+            f.write("\t}\n")
+        #write edges between clusters
+        for mod, refs in self.imports.iteritems():
+            try:
+                nodes = self.clusters[self.mod_to_cluster[mod]]
+            except KeyError:
+                nodes = sets.Set()
+            for ref in refs:
+                if ref[0] not in nodes:
+                    f.write('\t"%s" -> "%s";\n' % (mod[5:], ref[0][5:]))
+        f.write("}")
+        f.close()
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        path = py.path.local(sys.argv[1])
+    else:
+        path = py.path.local(".")
+    gr = ModuleGraph(path)
+    gr.create_clusters()
+    dot = path.join("import_graph.dot")
+    gr.dotfile(dot)



More information about the Pypy-commit mailing list