Recursing into directories
Steven Majewski
sdm7g at Virginia.EDU
Tue Dec 11 12:01:08 EST 2001
On Mon, 10 Dec 2001, Gerhard [iso-8859-1] Häring wrote:
> Le 10/12/01 17:22, Ron crivit:
> > Hi,
> >
> > Is there a built-in class (in dircache, os.path????) that
> > recurses thru a directory tree?
> >
> > I've written one using dircache.listdir, but if there is some-
> > thing pre-written, I'd rather use that...
>
> Only a function: os.path.walk
>
> And because I usually want a class, too, I didn't find it especially
> useful.
>
A function like os.path.walk is awkward becuase it's inverted -- you
need to do all of the work inside with a callback. Generators are
much neater -- you can invert it back again so that your processing
is on the outside where it belongs, and the directory walking is
hidden inside the generator which just produces the next pathname
on each call. Something like:
# recursive file iterator as a generator:
from __future__ import generators
from os import listdir, path, curdir
def Files( *paths ):
if not paths: paths = ( curdir, )
for start in paths:
for file in listdir( start ):
file = path.join( start, file )
if path.isfile( file ): yield file
elif path.isdir(file):
for more in Files( file ):
yield more
Then you can do: 'for file in Files( ... )' ,
Or better yet, wrap the generator in a class.
The example below (which I've posted before) allows you to do something like:
for x in Files() & isGif & fileOwner('sdm7g') & fileLargerThan(512) |range(20):
to loop over the first 20 Gif files larger than 512 bytes owned by 'sdm7g'.
(or if you don't like the overloading of "&" and "|", use a test in a
list comprehension -- both are clearer than the functional notation. )
-- Steve Majewski
------
from __future__ import generators,nested_scopes
## You need:
## a generator: (you can also start with a list)
def Ints():
n = 0
while 1:
yield n
n += 1
def Always( arg ):
while 1:
yield arg
## one or more filters:
def Test( gen, test ):
for x in gen:
if test(x): yield x
## and a terminator:
## by condition:
def Quit( gen, test ):
for x in gen:
if test(x): raise StopIteration
else: yield x
# or by count:
def Count( gen, n ):
for x in gen:
yield x
n -= 1
if n == 0 : break
## shorthand names so the lines don't get too long...
odd = lambda x: Test( x, lambda y: y % 2 )
enough = lambda x: Quit( x, lambda y: y > 100 )
notdiv3 = lambda x: Test( x, lambda y: y % 3 )
print "\n odd ints that are not divisible by 3 under 100:"
for i in notdiv3( enough( odd( Ints() ))):
print i
print "\n first 20 odd ints not divisible by 3:"
for i in Count( notdiv3(odd(Ints())), 20 ):
print i
# recursive file iterator as a generator:
from os import listdir, path, curdir
def Files( *paths ):
if not paths: paths = ( curdir, )
for start in paths:
for file in listdir( start ):
file = path.join( start, file )
if path.isfile( file ): yield file
elif path.isdir(file):
for more in Files( file ):
yield more
import os, stat
def fileLargerThan( n ):
return lambda s,size=n: os.stat(s)[stat.ST_SIZE] > n
def fileExt(ext):
return lambda s: os.path.splitext( s )[-1].lower() == ext.lower()
isGif = fileExt( '.gif' )
## This is MUCH nicer than using os.path.walk() with a callback!
## find the first 20 gifs in your cwd...
for f in Count(Test( Files(), isGif ), 20 ): print f
def fileSize( test ):
return lambda name: test( os.stat(name)[stat.ST_SIZE] )
import pwd
def fileOwner( uname ):
uid = pwd.getpwnam( uname )[2]
return lambda fname: os.stat( fname )[stat.ST_UID] == uid
_test = Test
_count = Count
class Gen:
def __init__( self, generator ):
self.generator = generator
def __iter__( self ):
return self.generator
class Genpipe(Gen):
def Test( self, pred ):
self.generator = Test( self.generator, pred )
return self
def Count( self, n ):
self.generator = Count( self.generator, n )
return self
def __or__( self, other ):
if callable(other):
self.generator = Test( self.generator, other )
return self
print 'Genpipe test...'
for file in Genpipe( Files('.') ).Test( isGif ).Count(10):
print file
def Append( *generators ):
for g in generators:
for x in g:
yield x
def Alternate( *generators ):
gs = list(generators)
for i in range(len(gs)):
if not hasattr( gs[i], 'next' ):
gs[i] = iter(gs[i])
while 1:
for g in gs:
yield g.next()
def Combine( *generators ):
gs = list(generators)
for i in range(len(gs)):
if not hasattr( gs[i], 'next' ):
gs[i] = iter(gs[i])
while 1:
lis = []
for g in gs:
lis.append( g.next() )
yield tuple(lis)
class Genops(Genpipe):
def __and__( self, other ):
if callable(other):
self.generator = Test( self.generator, other )
return self
def __or__( self, other ):
self.generator = Alternate( self.generator, other )
return self
def __add__( self, other ):
self.generator = Append( self.generator, other )
return self
_files = Files
class Files(Genops):
def __init__( self, *args ):
self.generator = apply( _files, args )
print 'Last test...'
for x in Files() & isGif & fileOwner('sdm7g') & fileLargerThan(512) |range(20):
print x
More information about the Python-list
mailing list