[Spambayes-checkins] spambayes splitndirs.py,1.4,1.5
Guido van Rossum
gvanrossum@users.sourceforge.net
Tue, 24 Sep 2002 11:26:13 -0700
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv5034
Modified Files:
splitndirs.py
Log Message:
Add -g option to glob each input path. This is handy on Windows.
Patch contributed by Alexander Leidinger.
Index: splitndirs.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/splitndirs.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** splitndirs.py 23 Sep 2002 21:20:10 -0000 1.4
--- splitndirs.py 24 Sep 2002 18:26:11 -0000 1.5
***************
*** 3,7 ****
"""Split an mbox into N random directories of files.
! Usage: %(program)s [-h] [-s seed] [-v] -n N sourcembox ... outdirbase
Options:
--- 3,7 ----
"""Split an mbox into N random directories of files.
! Usage: %(program)s [-h] [-g] [-s seed] [-v] -n N sourcembox ... outdirbase
Options:
***************
*** 9,12 ****
--- 9,17 ----
Print this help message and exit
+ -g
+ Do globbing on each sourcepath. This is helpful on Windows, where
+ the native shells don't glob, or when you have more mboxes than
+ your shell allows you to specify on the commandline.
+
-s seed
Seed the random number generator with seed (an integer).
***************
*** 22,26 ****
Arguments:
sourcembox
! The mbox to split.
outdirbase
--- 27,31 ----
Arguments:
sourcembox
! The mbox or path to an mbox to split.
outdirbase
***************
*** 46,49 ****
--- 51,55 ----
import email
import getopt
+ import glob
import mboxutils
***************
*** 65,72 ****
def main():
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hn:s:v', ['help'])
except getopt.error, msg:
usage(1, msg)
n = None
verbose = False
--- 71,79 ----
def main():
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hgn:s:v', ['help'])
except getopt.error, msg:
usage(1, msg)
+ doglob = False
n = None
verbose = False
***************
*** 74,77 ****
--- 81,86 ----
if opt in ('-h', '--help'):
usage(0)
+ elif opt == '-g':
+ doglob = True
elif opt == '-s':
random.seed(int(arg))
***************
*** 95,111 ****
counter = 0
for inputpath in inputpaths:
! mbox = mboxutils.getmbox(inputpath)
! for msg in mbox:
! i = random.randrange(n)
! astext = str(msg)
! #assert astext.endswith('\n')
! counter += 1
! msgfile = open('%s/%d' % (outdirs[i], counter), 'wb')
! msgfile.write(astext)
! msgfile.close()
! if verbose:
! if counter % 100 == 0:
! sys.stdout.write('.')
! sys.stdout.flush()
if verbose:
--- 104,126 ----
counter = 0
for inputpath in inputpaths:
! if doglob:
! inpaths = glob.glob(inputpath)
! else:
! inpaths = [inputpath]
!
! for inpath in inpaths:
! mbox = mboxutils.getmbox(inpath)
! for msg in mbox:
! i = random.randrange(n)
! astext = str(msg)
! #assert astext.endswith('\n')
! counter += 1
! msgfile = open('%s/%d' % (outdirs[i], counter), 'wb')
! msgfile.write(astext)
! msgfile.close()
! if verbose:
! if counter % 100 == 0:
! sys.stdout.write('.')
! sys.stdout.flush()
if verbose: