garbage collection / reference cycles (cont.)

Aaron Brady castironpi at gmail.com
Wed Mar 25 01:12:30 EDT 2009


On Mar 25, 12:11 am, Aaron Brady <castiro... at gmail.com> wrote:
> Hello,
>
> I am posting the code I mentioned on Saturday that collects garbage
> and cyclic garbage in a flattened two-step process.  The code takes
> 122 lines incl. comments, with 100 in tests.  It should be in a reply
> to this.
>
> My aim is a buffer-like object which can contain reference-counted
> objects.  This is a preliminary Python version of the cycle detector.
> I expect to port it to C++, but the buffer object as well as object
> proxies are Python objects.  The memory management strategy,
> synchronization, etc., are other modules.  It is similar in principle
> to Python's own 'gc'.  If it's sound, it may have some educational and
> explanatory value also.
>
> Anyway, since I received a little interest in it, I wanted to follow
> up.  It is free to play with.  If there's a better group to ask about
> this, or there are more scholarly, widely-used, or thorough treatments
> or implementations, I'm interested.

from collections import deque

class Globals:
    to_collect= deque() # FIFO of garbage that has been decref'ed;
                        # Queue them instead of nested 'gc' calls

    to_collect_set= set() # hash lookup of the same information
    ser_gc_running= False # bool flag if the GC is running

    def schedule_collect( ob ):
        ''' Add to FIFO- no gc call '''
        if ob in Globals.to_collect_set:
            return
        Globals.to_collect.append( ob )
        Globals.to_collect_set.add( ob )

    def serial_gc( ):
        ''' Visit objects which have been decref'ed.  If they
        have left reachability, enqueue the entire cycle they
        are in; this as opposed to nested 'final' calls. '''
        if Globals.ser_gc_running:
            return
        Globals.ser_gc_running= True

        while Globals.to_collect:
            ob= Globals.to_collect.popleft( )
            Globals.to_collect_set.remove( ob )
            if ob.ref_ct== 0:
                ob.final( )
            else:
                incycle= Globals.cycle_detect( ob )
                if incycle:
                    # Request object to drop its referenecs;
                    # re-queue the object.  (Potential
                    # infinite loop, if objects do not comply.)
                    for k, v in list( ob.__dict__.items( ) ):
                        if not isinstance( v, ManagedOb ):
                            continue
                        ob.final_attr( k )
                    Globals.schedule_collect( ob )


        Globals.ser_gc_running= False

    def cycle_detect( ob ):
        ''' Detect an unreachable reference cycle in the
        descendants of 'ob'.  Return True if so, False if
        still reachable.  Only called when walking the
        'to_collect' queue. '''
        parents= { } # disjunction( ancestors, descendants )
        bfs= deque( [ ob ] )
        refct_copy= { ob: ob.ref_ct }
        # copy the ref_ct's to a map;
        # decrement the copies on visit (breadth-first)
        while bfs:
            x= bfs.popleft( )
            for k, v in x.__dict__.items( ):
                if not isinstance( v, ManagedOb ):
                    continue
                if v not in refct_copy:
                    refct_copy[ v ]= v.ref_ct
                    bfs.append( v )
                if v not in parents:
                    parents[ v ]= set( )
                refct_copy[ v ]-= 1
                parents[ v ].add( ( x, k ) )
        print( 'parents', parents )
        print( 'refct_copy', refct_copy )

        # any extra-cyclic references?
        if refct_copy[ ob ]:
            return False

        # (ancestors && descendants) all zero?
        # --(breadth-first)
        bfs= deque( [ ob ] )
        visited= set( [ ob ] )
        while bfs:
            x= bfs.popleft( )
            for n, _ in parents[ x ]:
                if n in visited:
                    continue
                if refct_copy[ n ]:
                    return False
                visited.add( n )
                bfs.append( n )
        print( 'cycle of', ob, 'found' )
        return True

class ManagedOb:
    def __init__( self, name ):
        self.ref_ct= 1
        self.name= name
    def assign( self, attr, other ):
        ''' setattr function (basically) '''
        if hasattr( self, attr ):
            getattr( self, attr ).decref( )
        other.incref( )
        setattr( self, attr, other )
    def incref( self ):
        self.ref_ct+= 1
    def decref( self ):
        print( self, 'decref' )
        self.ref_ct-= 1
        # check for cycles and poss. delete
        Globals.schedule_collect( self )
        Globals.serial_gc( ) # trip the collector
    def final_attr( self, attr ):
        ''' magic function.  your object has left
        reachability and is requested to drop its
        reference to 'attr'. '''
        ob= getattr( self, attr )
        delattr( self, attr )
        ob.decref( )
    def final( self ):
        for _, v in self.__dict__.items( ):
            if not isinstance( v, ManagedOb ):
                continue
            v.decref( )
        print( 'finalizing', self )
    def __repr__( self ):
        return '<%s,%i>'% ( self.name, self.ref_ct )

def run( externals ):
    ''' create six global variables, which refer
    to eachother in different shapes.  only keep
    references to those in the 'externals' string. '''
    global p, q, r, s, t, u
    print( )
    p= ManagedOb( 'p' ) # created with reference count 1
    q= ManagedOb( 'q' )
    r= ManagedOb( 'r' )
    s= ManagedOb( 's' )
    t= ManagedOb( 't' )
    u= ManagedOb( 'u' )

    p.assign( 'at', q )
    q.assign( 'at', p )
    q.assign( 'at1', q )
    q.assign( 'at2', r )
    r.assign( 'at', q )
    s.assign( 'at', p )
    q.assign( 'at3', t )
    u.assign( 'at', t )

    # release references not in 'externals'
    for x in p, q, r, s, t, u:
        if x.name not in externals:
            x.decref()

    print( 'p: (q), q: (pqrt), r: (q), s: (p), t: (), u: (t)' )

def assert_exist( *args ):
    for arg in args:
        print( arg, 'exists' )
        assert arg.ref_ct> 0

def assert_destroyed( *args ):
    for arg in args:
        print( arg, 'destroyed' )
        assert arg.ref_ct== 0

run( 'psu' ) # external references to 'p', 's', and 'u'

p.decref() # decref 'p'.  should not free any.

assert_exist( p, q, r, s, t, u )

run( 'psu' ) # start over

p.decref()

s.decref() # decref 'p' and 's'.  should decref 'q', 'r',
           # and 't'.  should finalize 's', 'p', 'r', 'q'.

assert_exist( t, u )
assert_destroyed( p, q, r, s )

run( 'psu' )

s.decref()

p.decref() # same result, different order

assert_exist( t, u )
assert_destroyed( p, q, r, s )

run( 'psu' )

s.decref() # should finalize 's'.

assert_exist( p, q, r, t, u )
assert_destroyed( s )

run( 'qsu' ) # more.

q.decref()

assert_exist( p, q, r, s, t, u )

run( 'qsu' )

q.decref()

s.decref()

assert_exist( t, u )
assert_destroyed( p, q, r, s )

run( 'qsu' )

s.decref()

q.decref()

assert_exist( t, u )
assert_destroyed( p, q, r, s )

run( 'qsu' )

s.decref()

assert_exist( p, q, r, t, u )
assert_destroyed( s )




More information about the Python-list mailing list