Recursive list comprehension

Wed Dec 8 16:08:17 EST 2004

On Wed, 2004-12-08 at 15:02, Steven Bethard wrote:
> Adam DePrince wrote:
> > def flatten( i ):
> >     try:
> >         i = i.__iter__()
> >         while 1:
> >             j = flatten( i.next() )
> >             try:
> >                 while 1:
> >                     yield j.next()
> >             except StopIteration:
> >                 pass
> >     except AttributeError:
> >         yield i
> 
> 
> Probably you want to catch a TypeError instead of an AttributeError; 
> objects may support the iterator protocol without defining an __iter__ 
> method:
> 
>  >>> class C:
> ...     def __getitem__(self, index):
> ...         if index > 3:
> ...             raise IndexError(index)
> ...         return index
> ...
>  >>> list(iter(C()))
> [0, 1, 2, 3]
> 
> I would write your code as something like:
> 
>  >>> def flatten(i):
> ...     try:
> ...         if isinstance(i, basestring):
> ...             raise TypeError('strings are atomic')
> ...         iterable = iter(i)
> ...     except TypeError:
> ...         yield i
> ...     else:
> ...         for item in iterable:
> ...             for sub_item in flatten(item):
> ...                 yield sub_item
> ...
>  >>> list(flatten([['N','F'],['E'],['D']]))
> ['N', 'F', 'E', 'D']
>  >>> list(flatten([C(), 'A', 'B', ['C', C()]]))
> [0, 1, 2, 3, 'A', 'B', 'C', 0, 1, 2, 3]
> 
> Note that I special-case strings because, while strings support the 
> iterator protocol, in this case we want to consider them 'atomic'.  By 
> catching the TypeError instead of an AttributeError, I can support 
> old-style iterators as well.

Of course, iter( "a" ).next() is "a" -- if you don't look for the
special case of a string you will spin until you blow your stack.   The
problem with a special case is it misses objects that have string like
behavior but are not members of basestring.  This change deals with that
case, albeit at the expense of some performance (it adds a small
O(depth) factor)).  

def flatten(i, history=[]):
    try:
        if reduce( lambda x,y:x or y, map( lambda x:i is x, history ),\
False ):
            raise TypeError('Dej' )
        iterable = iter(i)
    except TypeError:
        yiel>>> list(flatten([C(), 'A', 'B', ['C', C()]]))
> [0, 1, 2, 3, 'A', 'B', 'C', 0, 1, 2, 3]
> 
> Note that I special-case strings because, while strings support the 
> iterator protocol, in this case we want to consider them 'atomic'.  By 
> catching the TypeError instead of an AttributeError, I can support 
> old-style iterators as well.

Of course, iter( "a" ).next() is "a" -- if you don't look for the
special case of a string you will spin until you blow your stack.   The
problem with a special case is it misses objects that have string like
behavior but are not members of basestring.  This change deals with that
case, albeit at the expense of some performance (it adds a small
O(depth) factor)).  

def flatten(i, history=[]):
    try:
        if isinstance( i,basestring) or reduce( lambda x,y:x or y, map(
lambda x:i is x, history ),\ False ):
            raise TypeError('strings are atomic' )
        iterable = iter(i)
    except TypeError:
        yield i
    else:
        history = history + [i] 
        for item in iterable:
            for sub_item in flatten(item, history ):
                yield sub_item

if __name__ == "__main__":
    print list( flatten( a ) )
    print list( flatten( b ) )
    print list( flatten( c ) )

> 
> Steve
Adam DePrince