[Python-Dev] example checkers based on compiler package

Jeremy Hylton jeremy@cnri.reston.va.us
Mon, 6 Mar 2000 14:28:12 -0500 (EST)


There was some discussion on python-dev over the weekend about
generating warnings, and Moshe Zadke posted a selfnanny that warned
about methods that didn't have self as the first argument.

I think these kinds of warnings are useful, and I'd like to see a more
general framework for them built are Python abstract syntax originally
from P2C.  Ideally, they would be available as command line tools and
integrated into GUIs like IDLE in some useful way.

I've included a couple of quick examples I coded up last night based
on the compiler package (recently re-factored) that is resident in
python/nondist/src/Compiler.  The analysis on the one that checks for
name errors is a bit of a mess, but the overall structure seems right.

I'm hoping to collect a few more examples of checkers and generalize
from them to develop a framework for checking for errors and reporting
them.

Jeremy

------------ checkself.py ------------
"""Check for methods that do not have self as the first argument"""

from compiler import parseFile, walk, ast, misc

class Warning:
    def __init__(self, filename, klass, method, lineno, msg):
        self.filename = filename
        self.klass = klass
        self.method = method
        self.lineno = lineno
        self.msg = msg

    _template = "%(filename)s:%(lineno)s %(klass)s.%(method)s: %(msg)s"

    def __str__(self):
        return  self._template % self.__dict__

class NoArgsWarning(Warning):
    super_init = Warning.__init__
    
    def __init__(self, filename, klass, method, lineno):
        self.super_init(filename, klass, method, lineno,
                        "no arguments")

class NotSelfWarning(Warning):
    super_init = Warning.__init__
    
    def __init__(self, filename, klass, method, lineno, argname):
        self.super_init(filename, klass, method, lineno,
                        "self slot is named %s" % argname)

class CheckSelf:
    def __init__(self, filename):
        self.filename = filename
        self.warnings = []
        self.scope = misc.Stack()

    def inClass(self):
        if self.scope:
            return isinstance(self.scope.top(), ast.Class)
        return 0        

    def visitClass(self, klass):
        self.scope.push(klass)
        self.visit(klass.code)
        self.scope.pop()
        return 1

    def visitFunction(self, func):
        if self.inClass():
            classname = self.scope.top().name
            if len(func.argnames) == 0:
                w = NoArgsWarning(self.filename, classname, func.name,
                                  func.lineno)
                self.warnings.append(w)
            elif func.argnames[0] != "self":
                w = NotSelfWarning(self.filename, classname, func.name,
                                   func.lineno, func.argnames[0])
                self.warnings.append(w)
        self.scope.push(func)
        self.visit(func.code)
        self.scope.pop()
        return 1

def check(filename):
    global p, check
    p = parseFile(filename)
    check = CheckSelf(filename)
    walk(p, check)
    for w in check.warnings:
        print w

if __name__ == "__main__":
    import sys

    # XXX need to do real arg processing
    check(sys.argv[1])

------------ badself.py ------------
def foo():
    return 12

class Foo:
    def __init__():
        pass

    def foo(self, foo):
        pass

    def bar(this, that):
        def baz(this=that):
            return this
        return baz

def bar():
    class Quux:
        def __init__(self):
            self.sum = 1
        def quam(x, y):
            self.sum = self.sum + (x * y)
    return Quux()

------------ checknames.py ------------
"""Check for NameErrors"""

from compiler import parseFile, walk
from compiler.misc import Stack, Set

import __builtin__
from UserDict import UserDict

class Warning:
    def __init__(self, filename, funcname, lineno):
        self.filename = filename
        self.funcname = funcname
        self.lineno = lineno

    def __str__(self):
        return self._template % self.__dict__

class UndefinedLocal(Warning):
    super_init = Warning.__init__
    
    def __init__(self, filename, funcname, lineno, name):
        self.super_init(filename, funcname, lineno)
        self.name = name

    _template = "%(filename)s:%(lineno)s  %(funcname)s undefined local %(name)s"

class NameError(UndefinedLocal):
    _template = "%(filename)s:%(lineno)s  %(funcname)s undefined name %(name)s"

class NameSet(UserDict):
    """Track names and the line numbers where they are referenced"""
    def __init__(self):
        self.data = self.names = {}

    def add(self, name, lineno):
        l = self.names.get(name, [])
        l.append(lineno)
        self.names[name] = l

class CheckNames:
    def __init__(self, filename):
        self.filename = filename
        self.warnings = []
        self.scope = Stack()
        self.gUse = NameSet()
        self.gDef = NameSet()
        # _locals is the stack of local namespaces
        # locals is the top of the stack
        self._locals = Stack()
        self.lUse = None
        self.lDef = None
        self.lGlobals = None # var declared global
        # holds scope,def,use,global triples for later analysis
        self.todo = []

    def enterNamespace(self, node):
##        print node.name
        self.scope.push(node)
        self.lUse = use = NameSet()
        self.lDef = _def = NameSet()
        self.lGlobals = gbl = NameSet()
        self._locals.push((use, _def, gbl))

    def exitNamespace(self):
##        print
        self.todo.append((self.scope.top(), self.lDef, self.lUse,
                          self.lGlobals))
        self.scope.pop()
        self._locals.pop()
        if self._locals:
            self.lUse, self.lDef, self.lGlobals = self._locals.top()
        else:
            self.lUse = self.lDef = self.lGlobals = None

    def warn(self, warning, funcname, lineno, *args):
        args = (self.filename, funcname, lineno) + args
        self.warnings.append(apply(warning, args))

    def defName(self, name, lineno, local=1):
##        print "defName(%s, %s, local=%s)" % (name, lineno, local)
        if self.lUse is None:
            self.gDef.add(name, lineno)
        elif local == 0:
            self.gDef.add(name, lineno)
            self.lGlobals.add(name, lineno)
        else:
            self.lDef.add(name, lineno)

    def useName(self, name, lineno, local=1):
##        print "useName(%s, %s, local=%s)" % (name, lineno, local)
        if self.lUse is None:
            self.gUse.add(name, lineno)
        elif local == 0:
            self.gUse.add(name, lineno)
            self.lUse.add(name, lineno)            
        else:
            self.lUse.add(name, lineno)

    def check(self):
        for s, d, u, g in self.todo:
            self._check(s, d, u, g, self.gDef)
        # XXX then check the globals

    def _check(self, scope, _def, use, gbl, globals):
        # check for NameError
        # a name is defined iff it is in def.keys()
        # a name is global iff it is in gdefs.keys()
        gdefs = UserDict()
        gdefs.update(globals)
        gdefs.update(__builtin__.__dict__)
        defs = UserDict()
        defs.update(gdefs)
        defs.update(_def)
        errors = Set()
        for name in use.keys():
            if not defs.has_key(name):
                firstuse = use[name][0]
                self.warn(NameError, scope.name, firstuse, name)
                errors.add(name)

        # check for UndefinedLocalNameError
        # order == use & def sorted by lineno
        # elements are lineno, flag, name
        # flag = 0 if use, flag = 1 if def
        order = []
        for name, lines in use.items():
            if gdefs.has_key(name) and not _def.has_key(name):
                # this is a global ref, we can skip it
                continue
            for lineno in lines:
                order.append(lineno, 0, name)
        for name, lines in _def.items():
            for lineno in lines:
                order.append(lineno, 1, name)
        order.sort()
        # ready contains names that have been defined or warned about
        ready = Set()
        for lineno, flag, name in order:
            if flag == 0: # use
                if not ready.has_elt(name) and not errors.has_elt(name):
                    self.warn(UndefinedLocal, scope.name, lineno, name)
                    ready.add(name) # don't warn again
            else:
                ready.add(name)

    # below are visitor methods
        

    def visitFunction(self, node, noname=0):
        for expr in node.defaults:
            self.visit(expr)
        if not noname:
            self.defName(node.name, node.lineno)
        self.enterNamespace(node)
        for name in node.argnames:
            self.defName(name, node.lineno)
        self.visit(node.code)
        self.exitNamespace()
        return 1

    def visitLambda(self, node):
        return self.visitFunction(node, noname=1)

    def visitClass(self, node):
        for expr in node.bases:
            self.visit(expr)
        self.defName(node.name, node.lineno)
        self.enterNamespace(node)
        self.visit(node.code)
        self.exitNamespace()
        return 1

    def visitName(self, node):
        self.useName(node.name, node.lineno)

    def visitGlobal(self, node):
        for name in node.names:
            self.defName(name, node.lineno, local=0)

    def visitImport(self, node):
        for name in node.names:
            self.defName(name, node.lineno)

    visitFrom = visitImport

    def visitAssName(self, node):
        self.defName(node.name, node.lineno)
    
def check(filename):
    global p, checker
    p = parseFile(filename)
    checker = CheckNames(filename)
    walk(p, checker)
    checker.check()
    for w in checker.warnings:
        print w

if __name__ == "__main__":
    import sys

    # XXX need to do real arg processing
    check(sys.argv[1])

------------ badnames.py ------------
# XXX can we detect race conditions on accesses to global variables?
#     probably can (conservatively) by noting variables _created_ by
#     global decls in funcs
import string
import time

def foo(x):
    return x + y

def foo2(x):
    return x + z

a = 4

def foo3(x):
    a, b = x, a

def bar(x):
    z = x
    global z

def bar2(x):
    f = string.strip
    a = f(x)
    import string
    return string.lower(a)

def baz(x, y):
    return x + y + z

def outer(x):
    def inner(y):
        return x + y
    return inner