[Spambayes-checkins] spambayes/contrib tte.py,1.12,1.13
Skip Montanaro
montanaro at users.sourceforge.net
Mon Jul 26 04:46:52 CEST 2004
Update of /cvsroot/spambayes/spambayes/contrib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12291
Modified Files:
tte.py
Log Message:
Generalize the spam:ham ratio flag to include the ham value instead of
having it be implicitly 1.
Index: tte.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/contrib/tte.py,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** tte.py 25 Jul 2004 03:00:53 -0000 1.12
--- tte.py 26 Jul 2004 02:46:49 -0000 1.13
***************
*** 36,43 ****
-R Walk backwards through the mailbox.
! --ratio=n Define the number of spam messages to be trained for each ham.
! The default is 1, but given the sorry state of the Net's email
! infrastructure these days you'll probably want to raise it. Keep
! it as close to 1 as you can...
Note: The -c command line argument isn't quite as benign as it might first
--- 36,43 ----
-R Walk backwards through the mailbox.
! --ratio=n Define the ratio of spam:ham messages to be trained at once.
! The default is 1:1, but given the sorry state of the Net's email
! infrastructure these days you'll probably want to raise it (3:2 or
! 2:1, etc). Keep it as close to 1 as you can...
Note: The -c command line argument isn't quite as benign as it might first
***************
*** 101,104 ****
--- 101,106 ----
spam_cutoff = Options.options["Categorization", "spam_cutoff"]
+ nspam, nham = ratio
+
while round < maxrounds and (hmisses or smisses or round == 0):
hambone = mboxutils.getmbox(hambox)
***************
*** 116,122 ****
try:
while not maxmsgs or nmsgs < maxmsgs:
! ham = hambone.next()
spams = []
! for i in range(ratio):
try:
spams.append(spamcan.next())
--- 118,134 ----
try:
while not maxmsgs or nmsgs < maxmsgs:
! hams = []
! for i in range(nham):
! try:
! hams.append(hambone.next())
! except StopIteration:
! # no hams left so exit
! if not hams:
! raise
! # use what we've collected
! break
!
spams = []
! for i in range(nspam):
try:
spams.append(spamcan.next())
***************
*** 128,144 ****
break
! nmsgs += 1 + len(spams)
sys.stdout.write("\r%5d" % nmsgs)
sys.stdout.flush()
! score = store.spamprob(tokenize(ham))
! selector = ham["message-id"] or ham["subject"]
! if score > ham_cutoff and selector is not None:
! if verbose:
! print >> sys.stderr, "miss ham: %.6f %s" % (
! score, selector)
! hmisses += 1
! tdict[ham["message-id"]] = True
! store.learn(tokenize(ham), False)
for spam in spams:
--- 140,157 ----
break
! nmsgs += len(hams) + len(spams)
sys.stdout.write("\r%5d" % nmsgs)
sys.stdout.flush()
! for ham in hams:
! score = store.spamprob(tokenize(ham))
! selector = ham["message-id"] or ham["subject"]
! if score > ham_cutoff and selector is not None:
! if verbose:
! print >> sys.stderr, "miss ham: %.6f %s" % (
! score, selector)
! hmisses += 1
! tdict[ham["message-id"]] = True
! store.learn(tokenize(ham), False)
for spam in spams:
***************
*** 200,204 ****
verbose = False
reverse = False
! sh_ratio = 1
for opt, arg in opts:
if opt in ("-h", "--help"):
--- 213,217 ----
verbose = False
reverse = False
! sh_ratio = (1, 1)
for opt, arg in opts:
if opt in ("-h", "--help"):
***************
*** 222,226 ****
Options.options.set_from_cmdline(arg, sys.stderr)
elif opt == '--ratio':
! sh_ratio = int(arg)
if ham is None or spam is None:
--- 235,240 ----
Options.options.set_from_cmdline(arg, sys.stderr)
elif opt == '--ratio':
! arg = arg.split(":")
! sh_ratio = (int(arg[0]), int(arg[1]))
if ham is None or spam is None:
More information about the Spambayes-checkins
mailing list