Recursing into directories

Steven Majewski sdm7g at Virginia.EDU
Tue Dec 11 12:01:08 EST 2001


On Mon, 10 Dec 2001, Gerhard [iso-8859-1] Häring wrote:

> Le 10/12/01  17:22, Ron crivit:
> > Hi,
> >
> > Is there a built-in class (in dircache, os.path????) that
> > recurses thru a directory tree?
> >
> > I've written one using dircache.listdir, but if there is some-
> > thing pre-written, I'd rather use that...
>
> Only a function: os.path.walk
>
> And because I usually want a class, too, I didn't find it especially
> useful.
>

A function like os.path.walk is awkward becuase it's inverted -- you
need to do all of the work inside with a callback. Generators are
much neater -- you can invert it back again so that your processing
is on the outside where it belongs, and the directory walking is
hidden inside the generator which just produces the next pathname
on each call. Something like:

# recursive file iterator as a generator:
from __future__ import generators
from os import listdir, path, curdir

def Files( *paths ):
        if not paths: paths = ( curdir, )
        for start in paths:
                for file in listdir( start ):
                        file = path.join( start, file )
                        if path.isfile( file ): yield file
                        elif path.isdir(file):
                                for more in Files( file ):
                                        yield more

Then you can do: 'for file in Files( ... )' ,
Or better yet, wrap the generator in a class.
The example below (which I've posted before) allows you to do something like:

for x in Files() & isGif & fileOwner('sdm7g') & fileLargerThan(512) |range(20):

to loop over the first 20 Gif files larger than 512 bytes owned by 'sdm7g'.
(or if you don't like the overloading of "&" and "|", use a test in a
 list comprehension -- both are clearer than the functional notation. )



-- Steve Majewski

------

from __future__ import generators,nested_scopes

## You need:

## a generator: (you can also start with a list)

def Ints():
	n = 0
	while 1:
		yield n
		n += 1

def Always( arg ):
	while 1:
		yield arg


## one or more filters:

def Test( gen, test ):
	for x in gen:
		if test(x):	yield x

## and a terminator:

## by condition:
def Quit( gen, test ):
	for x in gen:
		if test(x): raise StopIteration
		else: yield x

# or by count:
def Count( gen, n ):
	for x in gen:
		yield x
		n -= 1
		if n == 0 : break


## shorthand names so the lines don't get too long...

odd = lambda x: Test( x, lambda y: y % 2 )
enough = lambda x: Quit( x, lambda y: y > 100 )
notdiv3 = lambda x: Test( x, lambda y: y % 3 )


print "\n odd ints that are not divisible by 3 under 100:"
for i in notdiv3( enough( odd( Ints() ))):
	print i

print "\n first 20 odd ints not divisible by 3:"
for i in Count( notdiv3(odd(Ints())), 20 ):
	print i


# recursive file iterator as a generator:

from os import listdir, path, curdir

def Files( *paths ):
	if not paths: paths = ( curdir, )
	for start in paths:
		for file in listdir( start ):
			file = path.join( start, file )
			if path.isfile( file ): yield file
			elif path.isdir(file):
				for more in Files( file ):
					yield more


import os, stat

def fileLargerThan( n ):
	return lambda s,size=n: os.stat(s)[stat.ST_SIZE] > n

def fileExt(ext):
	return lambda s: os.path.splitext( s )[-1].lower() == ext.lower()

isGif = fileExt( '.gif' )

## This is MUCH nicer than using os.path.walk() with a callback!

## find the first 20 gifs in your cwd...
for f in Count(Test( Files(), isGif ), 20 ): print f

def fileSize( test ):
	return lambda name: test( os.stat(name)[stat.ST_SIZE] )

import pwd
def fileOwner( uname ):
	uid = pwd.getpwnam( uname )[2]
	return lambda fname: os.stat( fname )[stat.ST_UID] == uid


_test = Test
_count = Count

class Gen:
	def __init__( self, generator ):
		self.generator = generator
	def __iter__( self ):
		return self.generator

class Genpipe(Gen):
	def Test( self, pred ):
		self.generator = Test( self.generator, pred )
		return self
	def Count( self, n ):
		self.generator = Count( self.generator, n )
		return self
	def __or__( self, other ):
		if callable(other):
			self.generator = Test( self.generator, other )
			return self

print 'Genpipe test...'
for file in Genpipe( Files('.') ).Test( isGif ).Count(10):
	print file

def Append( *generators ):
	for g in generators:
		for x in g:
			yield x


def Alternate( *generators ):
	gs = list(generators)
	for i in range(len(gs)):
		if not hasattr( gs[i], 'next' ):
			gs[i] = iter(gs[i])
	while 1:
		for g in gs:
			yield g.next()



def Combine( *generators ):
	gs = list(generators)
	for i in range(len(gs)):
		if not hasattr( gs[i], 'next' ):
			gs[i] = iter(gs[i])
	while 1:
		lis = []
		for g in gs:
			lis.append( g.next() )
		yield tuple(lis)



class Genops(Genpipe):
	def __and__( self, other ):
		if callable(other):
			self.generator = Test( self.generator, other )
			return self
	def __or__( self, other ):
		self.generator = Alternate( self.generator, other )
		return self
	def __add__( self, other ):
		self.generator = Append( self.generator, other )
		return self


_files = Files
class Files(Genops):
	def __init__( self, *args ):
		self.generator = apply( _files, args )

print 'Last test...'
for x in Files() & isGif & fileOwner('sdm7g') & fileLargerThan(512) |range(20):
	print x








More information about the Python-list mailing list