[Spambayes-checkins] spambayes mboxtrain.py,1.11,1.12
Tony Meyer
anadelonbrin at users.sourceforge.net
Wed Aug 13 16:59:54 EDT 2003
Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv1580
Modified Files:
mboxtrain.py
Log Message:
Implement patches from:
[ 788001 ] mboxtrain.py maildir bugfix and feature
The main change is the addition of an "-r" switch that will remove the
training data after it is used. There are also a couple of other minor
improvements.
Index: mboxtrain.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/mboxtrain.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** mboxtrain.py 9 Jul 2003 06:35:49 -0000 1.11
--- mboxtrain.py 13 Aug 2003 22:59:52 -0000 1.12
***************
*** 35,39 ****
-n train mail residing in "new" directory, in addition to "cur" directory,
! which is always trained
"""
--- 35,41 ----
-n train mail residing in "new" directory, in addition to "cur" directory,
! which is always trained (Maildir only)
!
! -r remove mail which was trained on (Maildir only)
"""
***************
*** 46,49 ****
--- 48,52 ----
import sys, os, getopt
from spambayes import hammie, mboxutils
+ from spambayes.Options import options
program = sys.argv[0]
***************
*** 63,69 ****
if is_spam:
! spamtxt = "spam"
else:
! spamtxt = "ham"
oldtxt = msg.get(TRAINED_HDR)
if force:
--- 66,72 ----
if is_spam:
! spamtxt = options["Headers", "header_spam_string"]
else:
! spamtxt = options["Headers", "header_ham_string"]
oldtxt = msg.get(TRAINED_HDR)
if force:
***************
*** 83,90 ****
return True
! def maildir_train(h, path, is_spam, force):
"""Train bayes with all messages from a maildir."""
! if loud: print " Reading as Maildir"
import time
--- 86,93 ----
return True
! def maildir_train(h, path, is_spam, force, removetrained):
"""Train bayes with all messages from a maildir."""
! if loud: print " Reading %s as Maildir" % (path,)
import time
***************
*** 97,105 ****
for fn in os.listdir(path):
- counter += 1
cfn = os.path.join(path, fn)
tfn = os.path.normpath(os.path.join(path, "..", "tmp",
"%d.%d_%d.%s" % (time.time(), pid,
counter, host)))
if loud:
sys.stdout.write(" %s \r" % fn)
--- 100,110 ----
for fn in os.listdir(path):
cfn = os.path.join(path, fn)
tfn = os.path.normpath(os.path.join(path, "..", "tmp",
"%d.%d_%d.%s" % (time.time(), pid,
counter, host)))
+ if (os.path.isdir(cfn)):
+ continue
+ counter += 1
if loud:
sys.stdout.write(" %s \r" % fn)
***************
*** 117,120 ****
--- 122,127 ----
# people actually use Maildirs?
os.rename(tfn, cfn)
+ if (removetrained):
+ os.unlink(cfn)
if loud:
***************
*** 208,212 ****
(trained, counter))
! def train(h, path, is_spam, force, trainnew):
if not os.path.exists(path):
raise ValueError("Nonexistent path: %s" % path)
--- 215,219 ----
(trained, counter))
! def train(h, path, is_spam, force, trainnew, removetrained):
if not os.path.exists(path):
raise ValueError("Nonexistent path: %s" % path)
***************
*** 214,220 ****
mbox_train(h, path, is_spam, force)
elif os.path.isdir(os.path.join(path, "cur")):
! maildir_train(h, os.path.join(path, "cur"), is_spam, force)
if trainnew:
! maildir_train(h, os.path.join(path, "new"), is_spam, force)
elif os.path.isdir(path):
mhdir_train(h, path, is_spam, force)
--- 221,227 ----
mbox_train(h, path, is_spam, force)
elif os.path.isdir(os.path.join(path, "cur")):
! maildir_train(h, os.path.join(path, "cur"), is_spam, force, removetrained)
if trainnew:
! maildir_train(h, os.path.join(path, "new"), is_spam, force, removetrained)
elif os.path.isdir(path):
mhdir_train(h, path, is_spam, force)
***************
*** 237,241 ****
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hfqnd:D:g:s:')
except getopt.error, msg:
usage(2, msg)
--- 244,248 ----
try:
! opts, args = getopt.getopt(sys.argv[1:], 'hfqnrd:D:g:s:')
except getopt.error, msg:
usage(2, msg)
***************
*** 248,251 ****
--- 255,259 ----
force = False
trainnew = False
+ removetrained = False
good = []
spam = []
***************
*** 263,266 ****
--- 271,276 ----
elif opt == '-s':
spam.append(arg)
+ elif opt == "-r":
+ removetrained = True
elif opt == "-d":
usedb = True
***************
*** 279,288 ****
for g in good:
if loud: print "Training ham (%s):" % g
! train(h, g, False, force, trainnew)
save = True
for s in spam:
if loud: print "Training spam (%s):" % s
! train(h, s, True, force, trainnew)
save = True
--- 289,298 ----
for g in good:
if loud: print "Training ham (%s):" % g
! train(h, g, False, force, trainnew, removetrained)
save = True
for s in spam:
if loud: print "Training spam (%s):" % s
! train(h, s, True, force, trainnew, removetrained)
save = True
More information about the Spambayes-checkins
mailing list