[Spambayes-checkins] spambayes/utilities rebal.py,1.4,1.5
Tim Peters
tim_one at users.sourceforge.net
Sat Dec 27 21:14:52 EST 2003
Update of /cvsroot/spambayes/spambayes/utilities
In directory sc8-pr-cvs1:/tmp/cvs-serv31501
Modified Files:
rebal.py
Log Message:
A long time ago, this script was easy to use with a standard test setup.
Then it grew -r and -s options, apparently for non-standard test
setups (unsure), and it became both clumsy and error-prone to use with
a standard test setup then. Screw that. Added a new -t option, which
makes it again easy to use with a standard test setup. -t can't be used
in the same run with -r or -s. I intended that -r and -s still work,
but since I've never had a use for those apart from long-windedly
getting them to believe a standard test setup is in use, if I broke them
I'll never know that. Someone who cares about those should ensure they
still work as *they* intended.
Index: rebal.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/utilities/rebal.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** rebal.py 26 Dec 2003 07:02:22 -0000 1.4
--- rebal.py 28 Dec 2003 02:14:50 -0000 1.5
***************
*** 7,25 ****
options:
-d - dry run; display what would be moved, but don't do it [%(DRYRUN)s]
- -r res - specify an alternate reservoir [%(RESDIR)s]
- -s set - specify an alternate Set prefix [%(SETPREFIX)s]
-n num - specify number of files per Set dir desired [%(NPERDIR)s]
! -v - tell user what's happening [%(VERBOSE)s]
! -q - be quiet about what's happening [not %(VERBOSE)s]
! -c - confirm file moves into Set directory [%(CONFIRM)s]
! -Q - don't confirm moves; this is independent of -v/-q
-h - display this message and quit
! Moves files among the Set subdirectories and a reservoir directory as
! necessary. You should execute this script from the directory containing your
! Data directory. By default, the Set1, Set2, ..., and reservoir subdirectories
! under (relative path) Data/Ham/ are rebalanced; this can be changed with the
! -s argument. The script will work with a variable number of Set directories,
! but they must already exist, and the reservoir directory must also exist.
It's recommended that you run with the -d (dry run) option first, to see what
--- 7,31 ----
options:
-d - dry run; display what would be moved, but don't do it [%(DRYRUN)s]
-n num - specify number of files per Set dir desired [%(NPERDIR)s]
! -t - top directory, holding Set and reservoir subdirs [%(TOPDIR)s]
!
! -v - tell user what's happening; opposite of -q [%(VERBOSE)s]
! -q - be quiet about what's happening; opposite of -v [not %(VERBOSE)s]
!
! -c - confirm file moves into Set directory; opposite of -Q [%(CONFIRM)s]
! -Q - don't confirm moves; opposite of -c; independent of -v/-q
!
-h - display this message and quit
! If you have a non-standard test setup, you can use -r/-s instead of -t:
! -r res - specify an alternate reservoir [%(RESDIR)s]
! -s set - specify an alternate Set prefix [%(SETPREFIX)s]
!
! Moves files randomly among the Set subdirectories and a reservoir directory to
! leave -n files in each Set directory. By default, the Set1, Set2, ..., and
! reservoir subdirectories under (relative path) Data/Ham/ are rebalanced; this
! can be changed with the -t option. The script will work with a variable
! number of Set directories, but they must already exist, and the reservoir
! directory must also exist.
It's recommended that you run with the -d (dry run) option first, to see what
***************
*** 28,52 ****
difficult to recover from that mistake.
! Example:
!
! rebal.py -r reservoir -s Set -n 300
!
! This will move random files between the directory 'reservoir' and the
! various subdirectories prefixed with 'Set', making sure no more than 300
! files are left in the 'Set' directories when finished.
!
! Example:
!
! Suppose you want to shuffle your Set files around, winding up with 300 files
! in each one, you can execute:
!
! rebal.py -n 0
! rebal.py -n 300 -Q
!
! The first run will move all files from the various Data/Ham/Set directories
! to the Data/Ham/reservoir directory. The second run will randomly parcel
! out 300 files to each of the Data/Ham/Set directories.
"""
import os
import sys
--- 34,66 ----
difficult to recover from that mistake.
! See the module comments for examples.
"""
+ # Examples:
+ #
+ # rebal.py -n 300
+ #
+ # Moves files among the Set1, Set2, ..., and reservoir directories under
+ # Data/Ham/, leaving 300 files in each Set directory.
+ #
+ # rebal.py -t Data/Spam -n 300
+ #
+ # The same, but under Data/Spam/.
+ #
+ # rebal.py -r reservoir -s Set -n 300
+ #
+ # The same, but under the Set1, Set2, ..., and reservoir directories
+ # in the current directory.
+ #
+ # Supposing you want to shuffle your Set files around randomly, winding up
+ # with 300 files in each one, you can execute:
+ #
+ # rebal.py -n 0
+ # rebal.py -n 300 -Q
+ #
+ # The first moves all files from the various Data/Ham/Set directories to the
+ # Data/Ham/reservoir directory. The second run randomly parcels out 300 files
+ # to each of the Data/Ham/Set directories.
+
import os
import sys
***************
*** 64,69 ****
# defaults
NPERDIR = 4000
! RESDIR = 'Data/Ham/reservoir'
! SETPREFIX = 'Data/Ham/Set'
VERBOSE = True
CONFIRM = True
--- 78,84 ----
# defaults
NPERDIR = 4000
! TOPDIR = os.path.join('Data', 'Ham')
! RESDIR = os.path.join(TOPDIR, 'reservoir')
! SETPREFIX = os.path.join(TOPDIR, 'Set')
VERBOSE = True
CONFIRM = True
***************
*** 73,76 ****
--- 88,92 ----
if msg:
print >> sys.stderr, str(msg)
+ print >> sys.stderr
print >> sys.stderr, __doc__ % globals()
***************
*** 83,87 ****
"""
! base = os.path.split(f)[-1]
out = os.path.join(targetdir, base)
while os.path.exists(out):
--- 99,103 ----
"""
! base = os.path.basename(f)
out = os.path.join(targetdir, base)
while os.path.exists(out):
***************
*** 92,107 ****
print "moving", f, "to", out
os.rename(f, out)
! return os.path.split(out)[-1]
def main(args):
nperdir = NPERDIR
- resdir = RESDIR
- setprefix = SETPREFIX
verbose = VERBOSE
confirm = CONFIRM
dryrun = DRYRUN
try:
! opts, args = getopt.getopt(args, "dr:s:n:vqcQh")
except getopt.GetoptError, msg:
usage(msg)
--- 108,122 ----
print "moving", f, "to", out
os.rename(f, out)
! return os.path.basename(out)
def main(args):
nperdir = NPERDIR
verbose = VERBOSE
confirm = CONFIRM
dryrun = DRYRUN
+ topdir = resdir = setprefix = None
try:
! opts, args = getopt.getopt(args, "dr:s:t:n:vqcQh")
except getopt.GetoptError, msg:
usage(msg)
***************
*** 111,114 ****
--- 126,131 ----
if opt == "-n":
nperdir = int(arg)
+ elif opt == "-t":
+ topdir = arg
elif opt == "-r":
resdir = arg
***************
*** 131,138 ****
raise SystemError("internal error on option '%s'" % opt)
res = os.listdir(resdir)
dirs = glob.glob(setprefix + "*")
! if dirs == []:
print >> sys.stderr, "no directories starting with", setprefix, "exist."
return 1
--- 148,171 ----
raise SystemError("internal error on option '%s'" % opt)
+ # Derive setprefix and resdir from topdir, if the latter was given.
+ if topdir is not None:
+ if resdir is not None or setprefix is not None:
+ usage("-t can't be specified with -r or -s")
+ return -1
+ setprefix = os.path.join(topdir, "Set")
+ resdir = os.path.join(topdir, "reservoir")
+ else:
+ if setprefix is None:
+ setprefix = SETPREFIX
+ if resdir is None:
+ resdir = RESDIR
+
+ if not os.path.exists(resdir):
+ print >> sys.stderr, "reservoir directory %s doesn't exist" % resdir
+ return 1
res = os.listdir(resdir)
dirs = glob.glob(setprefix + "*")
! if not dirs:
print >> sys.stderr, "no directories starting with", setprefix, "exist."
return 1
***************
*** 141,145 ****
# name of a Set subdirectory, and files is a list of files in that dir.
stuff = []
! n = len(res)
for d in dirs:
fs = os.listdir(d)
--- 174,178 ----
# name of a Set subdirectory, and files is a list of files in that dir.
stuff = []
! n = len(res) # total number of all files
for d in dirs:
fs = os.listdir(d)
More information about the Spambayes-checkins
mailing list