From mhammond at users.sourceforge.net Wed Mar 3 22:13:46 2004 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Wed Mar 3 22:20:52 2004 Subject: [Spambayes-checkins] spambayes/Outlook2000 addin.py,1.127,1.128 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31926 Modified Files: addin.py Log Message: set pythoncom.frozen along with sys.frozen in our nasty registration hacks Index: addin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v retrieving revision 1.127 retrieving revision 1.128 diff -C2 -d -r1.127 -r1.128 *** addin.py 9 Feb 2004 03:59:24 -0000 1.127 --- addin.py 4 Mar 2004 03:13:44 -0000 1.128 *************** *** 1538,1542 **** if hasattr(sys, "frozen"): sys.frozendllhandle = win32api.LoadLibrary("outlook_addin.dll") ! sys.frozen = "dll" # Without this, com registration will look at class.__module__, and # get all confused about the module name holding our class in the DLL --- 1538,1542 ---- if hasattr(sys, "frozen"): sys.frozendllhandle = win32api.LoadLibrary("outlook_addin.dll") ! pythoncom.frozen = sys.frozen = "dll" # Without this, com registration will look at class.__module__, and # get all confused about the module name holding our class in the DLL From mhammond at users.sourceforge.net Sat Mar 6 20:58:40 2004 From: mhammond at users.sourceforge.net (Mark Hammond) Date: Sat Mar 6 21:06:16 2004 Subject: [Spambayes-checkins] spambayes/Outlook2000 msgstore.py,1.84,1.85 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30572/Outlook2000 Modified Files: msgstore.py Log Message: Catch all MAPI errors fetching the HTML for a message, and remove the warning about old win32all versions. Index: msgstore.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/msgstore.py,v retrieving revision 1.84 retrieving revision 1.85 diff -C2 -d -r1.84 -r1.85 *** msgstore.py 27 Feb 2004 02:57:40 -0000 1.84 --- msgstore.py 7 Mar 2004 01:58:38 -0000 1.85 *************** *** 442,468 **** # Some nasty stuff for getting RTF out of the message - _have_complained_about_missing_rtf = False def GetHTMLFromRTFProperty(mapi_object, prop_tag = PR_RTF_COMPRESSED): - global _have_complained_about_missing_rtf try: rtf_stream = mapi_object.OpenProperty(prop_tag, pythoncom.IID_IStream, 0, 0) except pythoncom.com_error, details: if not IsNotFoundCOMException(details): print "ERROR getting RTF body", details return "" ! try: ! html_stream = mapi.WrapCompressedRTFStream(rtf_stream, 0) ! except AttributeError: ! if not _have_complained_about_missing_rtf: ! print "*" * 50 ! print "Sorry, but you need to update to a new win32all (158 or " ! print "later), so we correctly get the HTML from messages." ! print "See http://starship.python.net/crew/mhammond/win32" ! print "*" * 50 ! _have_complained_about_missing_rtf = True ! return "" ! html = mapi.RTFStreamToHTML(html_stream) ! # html may be None if not RTF originally from HTML, but here we # always want a string return html or '' --- 442,456 ---- # Some nasty stuff for getting RTF out of the message def GetHTMLFromRTFProperty(mapi_object, prop_tag = PR_RTF_COMPRESSED): try: rtf_stream = mapi_object.OpenProperty(prop_tag, pythoncom.IID_IStream, 0, 0) + html_stream = mapi.WrapCompressedRTFStream(rtf_stream, 0) + html = mapi.RTFStreamToHTML(html_stream) except pythoncom.com_error, details: if not IsNotFoundCOMException(details): print "ERROR getting RTF body", details return "" ! # html may be None if RTF not originally from HTML, but here we # always want a string return html or '' From montanaro at users.sourceforge.net Sun Mar 7 09:51:10 2004 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Sun Mar 7 09:58:51 2004 Subject: [Spambayes-checkins] spambayes/contrib tte.py,1.6,1.7 Message-ID: Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26024 Modified Files: tte.py Log Message: Add note in docstring about using more extreme values for ham_cutoff and spam_cutoff to force weaker ham and spam to be used in training. Index: tte.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/contrib/tte.py,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** tte.py 26 Feb 2004 15:45:48 -0000 1.6 --- tte.py 7 Mar 2004 14:51:07 -0000 1.7 *************** *** 32,44 **** Set [sect, opt] in the options database to val. ! Note that the -c command line argument isn't quite as benign as it might ! first appear. Since the tte protocol trains on the same number of ham and ! spam messages, if you use the output of one run as input into a later run ! you will almost certainly train on fewer messages than before since the two files will probably not have the same number of messages. The extra messages in the longer file will be ignored in future runs until you add more messages to the shorter file. ! For more detail on the notion of training to exhaustion see Gary Robinson's blog: http://www.garyrobinson.net/2004/02/spam_filtering_.html --- 32,56 ---- Set [sect, opt] in the options database to val. ! Note: The -c command line argument isn't quite as benign as it might first ! appear. Since the tte protocol trains on the same number of ham and spam ! messages, if you use the output of one run as input into a later run you ! will almost certainly train on fewer messages than before since the two files will probably not have the same number of messages. The extra messages in the longer file will be ignored in future runs until you add more messages to the shorter file. ! Note: Adding messages which train correctly won't affect anything other than ! adding more ham or spam to the respective training pile. To force such ! messages to have an effect you should set your ham_cutoff and spam_cutoff ! values closer to 0.0 and 1.0 than your normal settings during scoring. For ! example, if your normal ham_cutoff and spam_cutoff values are 0.2 and 0.8, ! you might run %(prog)s like ! ! %(prog)s -o Categorization:ham_cutoff:0.05 \ ! -o Categorization:spam_cutoff:0.95 \ ! [ other args ] ! ! For more detail on the notion of training to exhaustion see Gary Robinson's ! blog: http://www.garyrobinson.net/2004/02/spam_filtering_.html *************** *** 98,102 **** except StopIteration: pass ! delta = datetime.datetime.now()-start seconds = delta.seconds + delta.microseconds/1000000 --- 110,114 ---- except StopIteration: pass ! delta = datetime.datetime.now()-start seconds = delta.seconds + delta.microseconds/1000000 From anadelonbrin at users.sourceforge.net Mon Mar 15 18:06:48 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Mar 15 18:15:53 2004 Subject: [Spambayes-checkins] spambayes/spambayes/resources ui.html, 1.32, 1.33 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/resources In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31961/spambayes/resources Modified Files: ui.html Log Message: Add a note warning about [915466] Sorting review page loses classifications. This is non-trivial to fix at the moment, so the warning will have to do until someone has more time to fix it properly. Index: ui.html =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui.html,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** ui.html 22 Feb 2004 02:31:18 -0000 1.32 --- ui.html 15 Mar 2004 23:06:44 -0000 1.33 *************** *** 242,246 **** buttons in that section in one go. Click one of the other headers to sort messages (within their classification) by that ! header.

--- 242,247 ---- buttons in that section in one go. Click one of the other headers to sort messages (within their classification) by that ! header (note that sorting will lose any changes you have ! made to the page).

From anadelonbrin at users.sourceforge.net Mon Mar 15 18:25:37 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Mar 15 18:34:42 2004 Subject: [Spambayes-checkins] spambayes/spambayes ProxyUI.py,1.42,1.43 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3202/spambayes Modified Files: ProxyUI.py Log Message: Fix [ 906581 ] Assertion failed in search subject Can an email message's items/headers be non-strings? It appears from the bug report that this must be the case, although I'm not sure what would cause that to be so. The email package's documentation is very difficult (for me) to make sense of, so I'm not really sure. In any case, forcing them to strings won't hurt really, except that a search might be a bit slower (but that's a rare occurence). Index: ProxyUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** ProxyUI.py 5 Feb 2004 09:33:02 -0000 1.42 --- ProxyUI.py 15 Mar 2004 23:25:24 -0000 1.43 *************** *** 467,473 **** msg.load() if params.has_key('subject'): ! if self._contains(msg['Subject'], key, ic): push((k, corp)) if params.has_key('body'): msg_body = msg.as_string() msg_body = msg_body[msg_body.index('\r\n\r\n'):] --- 467,476 ---- msg.load() if params.has_key('subject'): ! subj = str(msg['Subject']) ! if self._contains(subj, key, ic): push((k, corp)) if params.has_key('body'): + # For [ 906581 ] Assertion failed in search + # subject. Can the headers be a non-string? msg_body = msg.as_string() msg_body = msg_body[msg_body.index('\r\n\r\n'):] *************** *** 476,479 **** --- 479,487 ---- if params.has_key('headers'): for nm, val in msg.items(): + # For [ 906581 ] Assertion failed in + # search subject. Can the headers be + # a non-string? + nm = str(nm) + val = str(val) if self._contains(nm, key, ic) or \ self._contains(val, key, ic): From montanaro at users.sourceforge.net Mon Mar 15 20:05:36 2004 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Mon Mar 15 20:14:43 2004 Subject: [Spambayes-checkins] spambayes/contrib tte.py,1.7,1.8 Message-ID: Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23407 Modified Files: tte.py Log Message: added a verbose flag Index: tte.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/contrib/tte.py,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** tte.py 7 Mar 2004 14:51:07 -0000 1.7 --- tte.py 16 Mar 2004 01:05:33 -0000 1.8 *************** *** 6,10 **** usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] \ ! [ -m N ] [ -r N ] [ -c ext ] [ -o sect:opt:val ] -h - Print this usage message and exit. --- 6,10 ---- usage %(prog)s [ -h ] -g file -s file [ -d file | -p file ] \ ! [ -m N ] [ -r N ] [ -c ext ] [ -o sect:opt:val ] [ -v ] -h - Print this usage message and exit. *************** *** 32,35 **** --- 32,38 ---- Set [sect, opt] in the options database to val. + -v Be very verbose, spewing all sorts of stuff out to stderr. + + Note: The -c command line argument isn't quite as benign as it might first appear. Since the tte protocol trains on the same number of ham and spam *************** *** 78,82 **** print >> sys.stderr, __doc__.strip() % globals() ! def train(store, ham, spam, maxmsgs, maxrounds, tdict): smisses = hmisses = round = 0 ham_cutoff = Options.options["Categorization", "ham_cutoff"] --- 81,85 ---- print >> sys.stderr, __doc__.strip() % globals() ! def train(store, ham, spam, maxmsgs, maxrounds, tdict, verbose): smisses = hmisses = round = 0 ham_cutoff = Options.options["Categorization", "ham_cutoff"] *************** *** 87,90 **** --- 90,97 ---- spamcan = mboxutils.getmbox(spam) round += 1 + + if verbose: + print >> sys.stderr, "*** round", round, "***" + hmisses = smisses = nmsgs = 0 start = datetime.datetime.now() *************** *** 98,107 **** sys.stdout.flush() ! if store.spamprob(tokenize(hammsg)) > ham_cutoff: hmisses += 1 tdict[hammsg["message-id"]] = True store.learn(tokenize(hammsg), False) ! if store.spamprob(tokenize(spammsg)) < spam_cutoff: smisses += 1 tdict[spammsg["message-id"]] = True --- 105,120 ---- sys.stdout.flush() ! score = store.spamprob(tokenize(hammsg)) ! if score > ham_cutoff: ! if verbose: ! print >> sys.stderr, "miss ham: %.6f %s" % (score, hammsg["message-id"]) hmisses += 1 tdict[hammsg["message-id"]] = True store.learn(tokenize(hammsg), False) ! score = store.spamprob(tokenize(spammsg)) ! if score < spam_cutoff: ! if verbose: ! print >> sys.stderr, "miss spam: %.6f %s" % (score, spammsg["message-id"]) smisses += 1 tdict[spammsg["message-id"]] = True *************** *** 140,146 **** def main(args): try: ! opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:c:", ["help", "good=", "spam=", ! "database=", "pickle=", "option=", "max=", "maxrounds=", "cullext="]) --- 153,159 ---- def main(args): try: ! opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:c:v", ["help", "good=", "spam=", ! "database=", "pickle=", "verbose", "option=", "max=", "maxrounds=", "cullext="]) *************** *** 152,159 **** --- 165,175 ---- maxmsgs = 0 maxrounds = MAXROUNDS + verbose = False for opt, arg in opts: if opt in ("-h", "--help"): usage() return 0 + elif opt in ("-v", "--verbose"): + verbose = True elif opt in ("-g", "--good"): ham = arg *************** *** 183,187 **** tdict = {} ! train(store, ham, spam, maxmsgs, maxrounds, tdict) store.store() --- 199,203 ---- tdict = {} ! train(store, ham, spam, maxmsgs, maxrounds, tdict, verbose) store.store() From anadelonbrin at users.sourceforge.net Mon Mar 15 22:08:10 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Mar 15 22:17:16 2004 Subject: [Spambayes-checkins] spambayes/spambayes Corpus.py, 1.13, 1.14 ProxyUI.py, 1.43, 1.44 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12621/spambayes Modified Files: Corpus.py ProxyUI.py Log Message: Fix [851785] Pop3proxy stores Ham/Spam/Unsure subject line in message cache If the notate_to or notate_subject options are used, then the text that gets stored in the sb_server cache includes these modifications. Later, when these messages are used for training, this information is also used (so there is a subject:spam clue that is introduced by spambayes, for example. This isn't really ideal (although nor is modifying these headers in the first place). This changes the review page to ask the corpus to strip out the classifications from the to and subject lines if (and only if) the appropriate options are set. The only flaw I can see with this is that if there are cached messages that were added *before* the option was set, and so don't have the classification, but do have an identical one from some other source (an upstream filter, for example), this information will be stripped. It doesn't seem likely that this would be common. Index: Corpus.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Corpus.py,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Corpus.py 17 Dec 2003 08:55:04 -0000 1.13 --- Corpus.py 16 Mar 2004 03:08:07 -0000 1.14 *************** *** 180,187 **** self.msgs[key] = None ! def takeMessage(self, key, fromcorpus): '''Move a Message from another corpus to this corpus''' msg = fromcorpus[key] msg.load() # ensure that the substance has been loaded fromcorpus.removeMessage(msg) self.addMessage(msg) --- 180,213 ---- self.msgs[key] = None ! def takeMessage(self, key, fromcorpus, fromCache=False): '''Move a Message from another corpus to this corpus''' msg = fromcorpus[key] msg.load() # ensure that the substance has been loaded + + # If the notate_to or notate_subject options are set, then the + # message in the cache has this information, and it will get used + # in training, which is not ideal. So if that option is set, strip + # that data before training. The only time I can see this failing + # is if the option is changed at some point, so older messages + # don't have the notation, but some other program did do the same + # notation, which would be lost. This shouldn't be a big deal, + # though. + if fromCache: + for header, header_opt in (("Subject", "notate_subject"), + ("To", "notate_to")): + # For Python 2.2, which doesn't allow "string in string". + if isinstance(options["Headers", header_opt], + types.StringsTypes): + notate_opt = (options["Headers", header_opt],) + else: + notate_opt = options["Headers", header_opt] + + for opt, tag in (("ham", "header_ham_string"), + ("spam", "header_spam_string"), + ("unsure", "header_unsure_string")): + if opt in notate_opt and \ + msg[header].startswith("%s," % options["Headers", tag]): + msg.replace_header(header, msg[header][len(tag)+1:]) + fromcorpus.removeMessage(msg) self.addMessage(msg) Index: ProxyUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** ProxyUI.py 15 Mar 2004 23:25:24 -0000 1.43 --- ProxyUI.py 16 Mar 2004 03:08:07 -0000 1.44 *************** *** 384,388 **** if sourceCorpus is not None: try: ! targetCorpus.takeMessage(id, sourceCorpus) if numTrained == 0: self.write("

Training... ") --- 384,391 ---- if sourceCorpus is not None: try: ! # fromCache is a fix for sf #851785. ! # See the comments in Corpus.py ! targetCorpus.takeMessage(id, sourceCorpus, ! fromCache=True) if numTrained == 0: self.write("

Training... ") From anadelonbrin at users.sourceforge.net Mon Mar 15 23:41:51 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Mar 15 23:50:59 2004 Subject: [Spambayes-checkins] spambayes/spambayes/resources ui_html.py, 1.31, 1.32 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/resources In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27019/spambayes/resources Modified Files: ui_html.py Log Message: Opps. Forgot to check in the generated file with the last modification to ui.html. This adds extra information about sorting resetting the review page. Index: ui_html.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui_html.py,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** ui_html.py 22 Feb 2004 02:31:18 -0000 1.31 --- ui_html.py 16 Mar 2004 04:41:48 -0000 1.32 *************** *** 67,121 **** T\031Z@ex\012̄\013!\032-\027\030\012\010Ǻ\010\023c\"Q\000J@0cz+4\ cTW\024#\035k3,Ǥ\017B@*\003l\010:]Q\010)?\002\001QRQފ\0222f\ ! `.;U\033d\006e;@{V\0160\006DSApȦk\032΅Q\027s\010FL\006ı/0\024rr\ ! b})Go.C\017]˹\005@\002N\0359q35~]\015\003\032\014%͖UІ\032\ ! /Y,KQ\001cRL*s\017\026`I\0260}x\036Z>MkUs\023we\ ! jLRE!\013Ö=WO`pW^L^DL;T'[\0045\023UD3v\ ! \020ʠW\037ӧ\037d\016A\034V]s_[A\004\015,b\027M\017\000\017\ ! zSqD%v\000\004{\034ǙӋ\003\037vi\"\"\005\017k[L̘zr\ ! _ȷr%+X\016\016\017۽\016*i\030?+E\007\015?rn\031\014~\007%a\032f⏛\021ol\ ! *v|5P6ͫF)f5ް_\007NI\"\0303,\037m̪#n@NAV\031\017us\ ! ݪl\005\003tB\012(6\016Q\0246mؗF\014z55]8q|iy\ ! 8Q\037\025>މFf'e`o\021pgUFpT:\037?\002Hy1`\006&_qq\"Xx\011\ ! \"8\011P\027#0x%\017*\022\033H\002\"1d\026S?Unл\013\033\ ! \0174y\027]\024)6\036bIs?D\"\022:JB0Xtѓ\007}\011),2tL.ք\ ! Aڄ56H>m\0055id6#FNe2{54Y[\004b̋#v|nD\ ! NSvhx::>(\012[(-\007bE8Avj'\021ԈS]kv\002'!biA9B6\017\004aV\022\ ! %U\025B#NnRhS<ԁv*͕\004*\022\012*\013\005z7J-\036\ ! \011z+Jwm+pF\"mJ\0003zIx&ƹSՈ\031|'\011dhc礮M\011VU\ ! ,\023KyOg쁙ý\023+]lzU\001\0374C<^\012l\035gϞw#\024 0\ ! p\032+v\033$Ouz2=F\022i)A\002sr(tf=զ^x5a#mH<\012\0145\ ! [Ưw:\007c.|^4\004\007-\014)&b砿\017\022*1e|]c#1\ ! \033\033\025hi\0268TQvMiRgFӺF3!'}єfb.\036I\\\032iHC\0071\ ! \032O\001qGI\005\037fw\036Yk\024$ذvz\031#\034\031xC\ ! ,L?Y@0:,ÚeD:sV\033\007Iy@\002\0326\006\001ze;\015}v\ ! E|k824a͢N7gt.ǧ;M\026P\035B)wm\033۽]3UW\030u\ ! \013\"HͯWmQ\0361x\034\004N([\004Q*{&?\004jˌhK\006\ ! ֬eB6ɨOUS'.\017l\003Ugoh$<\033|$}9S4|{:7X\ ! ӽo_\033}G\\p+\032\026ܨ\004w,tC\001V\005j\031\002:{|\003t\000%bO\ ! .P\0079\024\007\021A\023B6\021jYo<7\033\006\005\011\037F5n9%rR+9\007zs+W\021\007\ ! \021\007H\026L$&MG2.$OZ\034\003K2\010a83\005#ACc*1\026NJ\ ! P'+\022},l\017揕tL.n4r*w|%fg<\031\0101\026\ ! \011h~\024\007`=\004g&R\001|h^:nc]9\026Mgw0\015\011`s\036\035e\000\ ! $\013=={8jYIM-;/M\011:J㎍\015b\025\\M~7\023:\004\007\034[?\002\ ! lsn(\003o1\0036\026\022^Ϣ\033qov;\0156\005Tx5zf\006Ӡ\ ! (ʬ:2\022S3PA٨[T\027w&ͧ\023xF\016?ro\001\006;e{\ ! nS78\023B\005oT:_k\020\036ɕ4\015!PUt^ayKu#\ ! \0004\000OD䪂\012\025îT\012\0152~\012Z=/\026\011U=\014t\035 Տj\016>^U\037&\027\ ! /߾vt\0378\0321\002\027\027L^iH-=^aSB;o|}~>b8s\ ! \035\011g[p\027of6\025\036\023g2ǘ?sHS\035&ºoWgoV\037QWU\002BhlBU6\ ! `.\033N\001W\002:iʖ3\032\037\022D_YG\016\014\0305\015\033@4_*ӧ\022\003C\ ! k1wM\037E\0371\016\034Yh;1U)wA\026\013[W1\005F\025꫏l[f2\ ! (Be?ڦ\022Պ\001mH};\034>\013')5G)\007+\025\023\027\030\0063^xU~އ\ ! S&\007bU&C]ߍ\007\020N:(؈D*UȦ2/Dn11c2K~!ʕȮ`<8IW)v\032=\035\033,.\020Qij2aݹn\024\ ! @X\012}|r\002\014ia\001={\001{:\022Qsl@\027@\013҃\007\003\020A\023B6\021Dz[o<\ ! 7\033\006f\006\011\037F5n9rR9\007zs+W\021\007\021\007H\026L$&MG2.$OZ\034\003\ ! K2\010a83\005#AC%h+1V`JP'+\022},l\017揕\ ! tL.n4rJ\016&fg<\031\0101V\011h~\024\007`=\004g&R\001|\ ! h^:nc]9_M0\015\011`s\0369 e\000H\014==-|8j\035YM-;\ ! /q\012:J㎍\015b\025\\M~7\023:M\007\034[?\002lsnݨ'o1\0036\026\022^\ ! \033qov;\0156\005TGx5zf\006Ӡ(ʬ:\022S3Peڨ[T\ ! \027w&ͧ\023xF\016Ǵo\001\006;e{nS78TћL\0134'0=Hwu>\ ! \017=!<+U:G\010T򥦾\027\032xpS&ɭ$\015\023Qwx4jG3\013SM5M\ ! gk1X$T0\007\002\006~TsVv\0255xӦE\031\007\02757fJK\ ! Gj\012\022\026?3bWqv:SrN \007׎<[ނxiKs0]\0220\ ! A볹m#oqn9 *\035\037=$=\027.?t1s\0128\011?ўC\ ! (5]\026֝~\023:\023Kӽ\022G\024DCg\023R\001su\0128J\027IS5&\ ! 2\005T\034|\037Z\034m( ܝqhJGۙ7\ ! ܭJ9\023\014BXغ)h/7\030VdK65sF\021*68\036\016V\014hmm{E#T\ ! Y N>Iݭ9Y?0\034«\023\026n>2A>->'4\02432iM\037j\007n\ ! ?x\010CAF\015f'RqB6yᙽ[ufc\007`_hvJA~\014!\025Y[<ތ\ ! 8\004_;c|4EYrtW\026\016R?#}'e8]W_CNJڟ\0226ۭ\034\ ! t]c{b\014O*5m,\")l`X1ess1Wb0³a\ ! ,\005\004\001x$3\001%\012\035g\023,\015<шۇ9})jշ\011\003\007\021o_b\016\015s\ ! `Nhx=\032̏HW+Lh]\"T7qe \\/]zJ^̌ق\ ! Nn\0243\031\024:wHc-\012(\014\013\013\007&:$") ### end From anadelonbrin at users.sourceforge.net Tue Mar 16 00:08:33 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Mar 16 00:17:41 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_server.py,1.20,1.21 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32222/scripts Modified Files: sb_server.py Log Message: Fix the potential open relay problem with smtpproxy. Fix 1: The listening_ports option for smtproxy is now of type SERVER rather than type PORT, as the pop3proxy one already was. This means that you can set the listening port to be (for example) localhost:25, and only localhost connections will be accepted. Fix 2: There are two new options, one in each of the pop3proxy and smtpproxy sections, both called allow_remote_connections. These do the same thing as the option of the same name in the html_ui section (it's basically the same code!). By default, connections from outside localhost will return a POP3/SMTP error message and close. You can explicitly open this up to certain IPs or to anyone, if you really want to. Fix 3: I've added to the smtpproxy option documentation to point out that entering in your smtp server details isn't necessary if you're not going to use it to train (although it does also allow the bug report to be sent...) --- I've tested this as much as I can, using my machine and another machine on my network. I'm behind a firewall I don't control, so can't test anything more remote than that, or use one of the available testing websites to check if this works properly. Given that this is a major concern, it would be great if someone else was able to test this. This also closes [ 797579 ] Disable connections to POP3 and SMTP from remote hosts Index: sb_server.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_server.py,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** sb_server.py 5 Feb 2004 08:13:26 -0000 1.20 --- sb_server.py 16 Mar 2004 05:08:31 -0000 1.21 *************** *** 188,194 **** --- 188,219 ---- self.seenAllHeaders = False # For the current RETR or TOP self.startTime = 0 # (ditto) + + if not self.onIncomingConnection(clientSocket): + # We must refuse this connection, so pass an error back + # to the mail client. + self.push("-ERR Connection not allowed\r\n") + self.close_when_done() + return + self.serverSocket = ServerLineReader(serverName, serverPort, self.onServerLine) + def onIncomingConnection(self, clientSocket): + """Checks the security settings.""" + # Stolen from UserInterface.py + + remoteIP = clientSocket.getpeername()[0] + trustedIPs = options["pop3proxy", "allow_remote_connections"] + + if trustedIPs == "*" or remoteIP == clientSocket.getsockname()[0]: + return True + + trustedIPs = trustedIPs.replace('.', '\.').replace('*', '([01]?\d\d?|2[04]\d|25[0-5])') + for trusted in trustedIPs.split(','): + if re.search("^" + trusted + "$", remoteIP): + return True + + return False + def onTransaction(self, command, args, response): """Overide this. Takes the raw request and the response, and From anadelonbrin at users.sourceforge.net Tue Mar 16 00:08:34 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Mar 16 00:17:42 2004 Subject: [Spambayes-checkins] spambayes/spambayes Options.py, 1.104, 1.105 ProxyUI.py, 1.44, 1.45 smtpproxy.py, 1.6, 1.7 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32222/spambayes Modified Files: Options.py ProxyUI.py smtpproxy.py Log Message: Fix the potential open relay problem with smtpproxy. Fix 1: The listening_ports option for smtproxy is now of type SERVER rather than type PORT, as the pop3proxy one already was. This means that you can set the listening port to be (for example) localhost:25, and only localhost connections will be accepted. Fix 2: There are two new options, one in each of the pop3proxy and smtpproxy sections, both called allow_remote_connections. These do the same thing as the option of the same name in the html_ui section (it's basically the same code!). By default, connections from outside localhost will return a POP3/SMTP error message and close. You can explicitly open this up to certain IPs or to anyone, if you really want to. Fix 3: I've added to the smtpproxy option documentation to point out that entering in your smtp server details isn't necessary if you're not going to use it to train (although it does also allow the bug report to be sent...) --- I've tested this as much as I can, using my machine and another machine on my network. I'm behind a firewall I don't control, so can't test anything more remote than that, or use one of the available testing websites to check if this works properly. Given that this is a major concern, it would be great if someone else was able to test this. This also closes [ 797579 ] Disable connections to POP3 and SMTP from remote hosts Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.104 retrieving revision 1.105 diff -C2 -d -r1.104 -r1.105 *** Options.py 5 Feb 2004 08:13:26 -0000 1.104 --- Options.py 16 Mar 2004 05:08:31 -0000 1.105 *************** *** 795,815 **** specify the same number of ports as servers, separated by commas.""", SERVER, DO_NOT_RESTORE), ), "smtpproxy" : ( ("remote_servers", "Remote Servers", (), ! """The Spambayes SMTP proxy intercepts outgoing email - if you ! forward mail to one of the addresses below, it is examined for an id ! and the message corresponding to that id is trained as ham/spam. All ! other mail is sent along to your outgoing mail server. You need to ! specify which SMTP server(s) you wish it to intercept - a SMTP server ! address typically looks like "smtp.myisp.net". If you use more than ! one server, simply separate their names with commas. You can get ! these server names from your existing email configuration, or from ! your ISP or system administrator. If you are using Web-based email, ! you can't use the Spambayes SMTP proxy (sorry!). In your email ! client's configuration, where you would normally put your SMTP server ! address, you should now put the address of the machine running ! Spambayes.""", SERVER, DO_NOT_RESTORE), --- 795,826 ---- specify the same number of ports as servers, separated by commas.""", SERVER, DO_NOT_RESTORE), + + ("allow_remote_connections", "Allowed remote connections", "localhost", + """Enter a list of trusted IPs, separated by commas. Remote POP + connections from any of them will be allowed. You can trust any + IP using a single '*' as field value. You can also trust ranges of + IPs using the '*' character as a wildcard (for instance 192.168.0.*). + The localhost IP will always be trusted. Type 'localhost' in the + field to trust this only address.""", + IP_LIST, RESTORE), ), "smtpproxy" : ( ("remote_servers", "Remote Servers", (), ! """Use of the SMTP proxy is optional - if you would rather just train ! via the web interface, or the pop3dnd or mboxtrain scripts, then you ! can safely leave this option blank. The Spambayes SMTP proxy ! intercepts outgoing email - if you forward mail to one of the ! addresses below, it is examined for an id and the message ! corresponding to that id is trained as ham/spam. All other mail is ! sent along to your outgoing mail server. You need to specify which ! SMTP server(s) you wish it to intercept - a SMTP server address ! typically looks like "smtp.myisp.net". If you use more than one ! server, simply separate their names with commas. You can get these ! server names from your existing email configuration, or from your ISP ! or system administrator. If you are using Web-based email, you can't ! use the Spambayes SMTP proxy (sorry!). In your email client's ! configuration, where you would normally put your SMTP server address, ! you should now put the address of the machine running SpamBayes.""", SERVER, DO_NOT_RESTORE), *************** *** 821,825 **** client to use this port. If there are multiple servers, you must specify the same number of ports as servers, separated by commas.""", ! PORT, DO_NOT_RESTORE), ("ham_address", "Train as ham address", "spambayes_ham@localhost", --- 832,850 ---- client to use this port. If there are multiple servers, you must specify the same number of ports as servers, separated by commas.""", ! SERVER, DO_NOT_RESTORE), ! ! ("allow_remote_connections", "Allowed remote connections", "localhost", ! """Enter a list of trusted IPs, separated by commas. Remote SMTP ! connections from any of them will be allowed. You can trust any ! IP using a single '*' as field value. You can also trust ranges of ! IPs using the '*' character as a wildcard (for instance 192.168.0.*). ! The localhost IP will always be trusted. Type 'localhost' in the ! field to trust this only address. Note that you can unwittingly ! turn a SMTP server into an open proxy if you open this up, as ! connections to the server will appear to be from your machine, even ! if they are from a remote machine *through* your machine, to the ! server. We do not recommend opening this up fully (i.e. using '*'). ! """, ! IP_LIST, RESTORE), ("ham_address", "Train as ham address", "spambayes_ham@localhost", Index: ProxyUI.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/ProxyUI.py,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** ProxyUI.py 16 Mar 2004 03:08:07 -0000 1.44 --- ProxyUI.py 16 Mar 2004 05:08:31 -0000 1.45 *************** *** 144,147 **** --- 144,149 ---- ('html_ui', 'http_user_name'), ('html_ui', 'http_password'), + ('pop3proxy', 'allow_remote_connections'), + ('smtpproxy', 'allow_remote_connections'), ) Index: smtpproxy.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/smtpproxy.py,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** smtpproxy.py 16 Feb 2004 08:59:14 -0000 1.6 --- smtpproxy.py 16 Mar 2004 05:08:31 -0000 1.7 *************** *** 159,165 **** --- 159,191 ---- self.data = "" self.blockData = False + + if not self.onIncomingConnection(clientSocket): + # We must refuse this connection, so pass an error back + # to the mail client. + self.push("421 Connection not allowed\r\n") + self.close_when_done() + return + self.serverSocket = ServerLineReader(serverName, serverPort, self.onServerLine) + + def onIncomingConnection(self, clientSocket): + """Checks the security settings.""" + # Stolen from UserInterface.py + + remoteIP = clientSocket.getpeername()[0] + trustedIPs = options["smtpproxy", "allow_remote_connections"] + + if trustedIPs == "*" or remoteIP == clientSocket.getsockname()[0]: + return True + + trustedIPs = trustedIPs.replace('.', '\.').replace('*', '([01]?\d\d?|2[04]\d|25[0-5])') + for trusted in trustedIPs.split(','): + if re.search("^" + trusted + "$", remoteIP): + return True + + return False + def onTransaction(self, command, args): """Overide this. Takes the raw command and returns the (possibly From anadelonbrin at users.sourceforge.net Tue Mar 16 02:04:33 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Mar 16 02:13:42 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_imapfilter.py,1.27,1.28 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19356/scripts Modified Files: sb_imapfilter.py Log Message: The earlier change to using BODY.PEEK was faulty in two ways, so fix that. 1. Python's imaplib is quirky in that it quotes when it shouldn't, unless you put the command in (), so do that. 2. The extract_fetch_data function wasn't setup to handle BODY.PEEK[] responses, and then the creation of the message from that wasn't set up to handle them either, so add those in. My testing indicates that (at least with my server) using BODY.PEEK[] now works, and retains the /Seen status as it should. (Note that even during the failing, it would still work, since it would fall back to RFC822, but it would lose the /Seen status). Index: sb_imapfilter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_imapfilter.py,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** sb_imapfilter.py 17 Feb 2004 04:38:18 -0000 1.27 --- sb_imapfilter.py 16 Mar 2004 07:04:31 -0000 1.28 *************** *** 151,154 **** --- 151,155 ---- r"[\+\-]\d{4,4}\")") RFC822_RE = re.compile(r"(RFC822) (\{[\d]+\})") + BODY_PEEK_RE = re.compile(r"(BODY\[\]) (\{[\d]+\})") RFC822_HEADER_RE = re.compile(r"(RFC822.HEADER) (\{[\d]+\})") UID_RE = re.compile(r"(UID) ([\d]+)") *************** *** 184,190 **** # UID # RFC822.HEADER # All others are ignored. for r in [FLAGS_RE, INTERNALDATE_RE, RFC822_RE, UID_RE, ! RFC822_HEADER_RE]: mo = r.search(response) if mo is not None: --- 185,192 ---- # UID # RFC822.HEADER + # BODY.PEEK # All others are ignored. for r in [FLAGS_RE, INTERNALDATE_RE, RFC822_RE, UID_RE, ! RFC822_HEADER_RE, BODY_PEEK_RE]: mo = r.search(response) if mo is not None: *************** *** 332,336 **** self.folder = None self.previous_folder = None ! self.rfc822_command = "BODY.PEEK[]" self.got_substance = False self.invalid = False --- 334,339 ---- self.folder = None self.previous_folder = None ! self.rfc822_command = "(BODY.PEEK[])" ! self.rfc822_key = "BODY[]" self.got_substance = False self.invalid = False *************** *** 382,388 **** --- 385,393 ---- except IMAP4.error: self.rfc822_command = "RFC822" + self.rfc822_key = "RFC822" response = imap.uid("FETCH", self.uid, self.rfc822_command) if response[0] != "OK": self.rfc822_command = "RFC822" + self.rfc822_key = "RFC822" response = imap.uid("FETCH", self.uid, self.rfc822_command) self._check(response, "uid fetch") *************** *** 393,397 **** # copying over all its internals. try: ! new_msg = email.Parser.Parser().parsestr(data["RFC822"]) except email.Errors.MessageParseError, e: # Yikes! Barry set this to return at this point, which --- 398,402 ---- # copying over all its internals. try: ! new_msg = email.Parser.Parser().parsestr(data[self.rfc822_key]) except email.Errors.MessageParseError, e: # Yikes! Barry set this to return at this point, which From montanaro at users.sourceforge.net Tue Mar 16 16:36:24 2004 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue Mar 16 16:45:42 2004 Subject: [Spambayes-checkins] spambayes CHANGELOG.txt,1.36,1.37 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13619 Modified Files: CHANGELOG.txt Log Message: Modify sb_dbexpimp.py to use csv as the interchange format. Add compatcsv.py to create the minimum amount of csv knowledge needed by sb_dbexpimp.py on Python 2.2 which doesn't have a csv module. Index: CHANGELOG.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/CHANGELOG.txt,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** CHANGELOG.txt 9 Feb 2004 05:34:16 -0000 1.36 --- CHANGELOG.txt 16 Mar 2004 21:36:22 -0000 1.37 *************** *** 1,4 **** --- 1,8 ---- [Note that all dates are in English, not American format - i.e. day/month/year] + Alpha Release 10 (Beta Release 1?) + ================================== + Skip Montanaro 16/03/2004 Change sb_dbexpimp.py to use csv as interchange format. + Alpha Release 9 =============== From montanaro at users.sourceforge.net Tue Mar 16 16:36:24 2004 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue Mar 16 16:45:44 2004 Subject: [Spambayes-checkins] spambayes/spambayes compatcsv.py,NONE,1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13619/spambayes Added Files: compatcsv.py Log Message: Modify sb_dbexpimp.py to use csv as the interchange format. Add compatcsv.py to create the minimum amount of csv knowledge needed by sb_dbexpimp.py on Python 2.2 which doesn't have a csv module. --- NEW FILE: compatcsv.py --- #!/usr/bin/env python """Implement just enough of a csv parser to support sb_dbexpimp.py's needs.""" import sys import re if sys.platform == "windows": EOL = "\r\n" elif sys.platform == "mac": EOL = "\r" else: EOL = "\n" class reader: def __init__(self, fp): self.fp = fp def __iter__(self): return self def next(self): return self.parse_line(self.fp.next()) def parse_line(self, line): """parse the line. very simple assumptions: * separator is a comma * fields are only quoted with quotation marks and only quoted if the field contains a comma or a quotation mark * embedded quotation marks are doubled """ result = [] while line: if line[0] == '"': # search for ending quotation mark match = re.match('"(.*?)"[^"]', line) if match is None: # embedded newline line = line + self.fp.next() continue else: field = match.group(1) field = field.replace('""', '"') try: dummy = unicode(field, "ascii") except UnicodeError: field = unicode(field, "utf-8") result.append(field) line = line[len(field)+3:] else: # field is terminated by a comma or EOL match = re.match("(.*?)(,|%s)"%EOL, line) if match is None: print "parse error:", line raise field = match.group(1) try: dummy = unicode(field, "ascii") except UnicodeError: field = unicode(field, "utf-8") result.append(field) line = line[len(field)+len(match.group(2))] return result class writer: def __init__(self, fp): self.fp = fp def writerow(self, row): result = [] for item in row: if isinstance(item, unicode): item = item.encode("utf-8") else: item = str(item) if re.search('["\n,]', item) is not None: item = '"%s"' % item.replace('"', '""') result.append(item) result = ",".join(result) self.fp.write(result+EOL) From montanaro at users.sourceforge.net Tue Mar 16 16:36:24 2004 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue Mar 16 16:45:48 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_dbexpimp.py,1.7,1.8 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13619/scripts Modified Files: sb_dbexpimp.py Log Message: Modify sb_dbexpimp.py to use csv as the interchange format. Add compatcsv.py to create the minimum amount of csv knowledge needed by sb_dbexpimp.py on Python 2.2 which doesn't have a csv module. Index: sb_dbexpimp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_dbexpimp.py,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** sb_dbexpimp.py 10 Feb 2004 18:54:38 -0000 1.7 --- sb_dbexpimp.py 16 Mar 2004 21:36:22 -0000 1.8 *************** *** 101,104 **** --- 101,109 ---- True, False = 1, 0 + try: + import csv + except ImportError: + import spambayes.compatcsv as csv + import spambayes.storage from spambayes.Options import options *************** *** 110,117 **** if isinstance(s, UnicodeType): s = s.encode('utf-8') ! return urllib.quote(s) def uunquote(s): ! return unicode(urllib.unquote(s), 'utf-8') def runExport(dbFN, useDBM, outFN): --- 115,125 ---- if isinstance(s, UnicodeType): s = s.encode('utf-8') ! return s def uunquote(s): ! try: ! return unicode(s, 'utf-8') ! except UnicodeDecodeError: ! return s def runExport(dbFN, useDBM, outFN): *************** *** 129,132 **** --- 137,142 ---- raise + writer = csv.writer(fp) + nham = bayes.nham; nspam = bayes.nspam; *************** *** 136,140 **** % (nham, nspam, len(words)) ! fp.write("%s,%s,\n" % (nham, nspam)) for word in words: --- 146,150 ---- % (nham, nspam, len(words)) ! writer.writerow([nham, nspam]) for word in words: *************** *** 143,149 **** spamcount = wi.spamcount word = uquote(word) ! fp.write("%s`%s`%s`\n" % (word, hamcount, spamcount)) ! ! fp.close() def runImport(dbFN, useDBM, newDBM, inFN): --- 153,157 ---- spamcount = wi.spamcount word = uquote(word) ! writer.writerow([word, hamcount, spamcount]) def runImport(dbFN, useDBM, newDBM, inFN): *************** *** 152,181 **** try: os.unlink(dbFN) ! except OSError, e: ! if e.errno != 2: # errno. ! raise try: os.unlink(dbFN+".dat") ! except OSError, e: ! if e.errno != 2: # errno. ! raise try: os.unlink(dbFN+".dir") ! except OSError, e: ! if e.errno != 2: # errno. ! raise bayes = spambayes.storage.open_storage(dbFN, useDBM) try: ! fp = open(inFN, 'r') except IOError, e: if e.errno != errno.ENOENT: raise ! nline = fp.readline() ! (nham, nspam, junk) = re.split(',', nline) if newDBM: --- 160,186 ---- try: os.unlink(dbFN) ! except OSError: ! pass try: os.unlink(dbFN+".dat") ! except OSError: ! pass try: os.unlink(dbFN+".dir") ! except OSError: ! pass bayes = spambayes.storage.open_storage(dbFN, useDBM) try: ! fp = open(inFN, 'rb') except IOError, e: if e.errno != errno.ENOENT: raise ! rdr = csv.reader(fp) ! (nham, nspam) = rdr.next() if newDBM: *************** *** 193,200 **** print "%s database %s using file %s" % (impType, dbFN, inFN) ! lines = fp.readlines() ! ! for line in lines: ! (word, hamcount, spamcount, junk) = re.split('`', line) word = uunquote(word) --- 198,202 ---- print "%s database %s using file %s" % (impType, dbFN, inFN) ! for (word, hamcount, spamcount) in rdr: word = uunquote(word) *************** *** 209,214 **** bayes._wordinfoset(word, wi) - fp.close() - print "Storing database, please be patient. Even moderately sized" print "databases may take a very long time to store." --- 211,214 ---- From anadelonbrin at users.sourceforge.net Tue Mar 16 17:25:25 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Mar 16 17:34:41 2004 Subject: [Spambayes-checkins] spambayes CHANGELOG.txt, 1.37, 1.38 WHAT_IS_NEW.txt, 1.28, 1.29 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26583 Modified Files: CHANGELOG.txt WHAT_IS_NEW.txt Log Message: Bring up to date. Also (should have done this last time!) run through a spell/grammer checker. Index: CHANGELOG.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/CHANGELOG.txt,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** CHANGELOG.txt 16 Mar 2004 21:36:22 -0000 1.37 --- CHANGELOG.txt 16 Mar 2004 22:25:20 -0000 1.38 *************** *** 4,7 **** --- 4,32 ---- ================================== Skip Montanaro 16/03/2004 Change sb_dbexpimp.py to use csv as interchange format. + Tony Meyer 16/03/2004 Add [ 797579 ] Disable connections to POP3 and SMTP from remote hosts + Tony Meyer 16/03/2004 Fix [851785] Pop3proxy stores Ham/Spam/Unsure subject line in message cache + Tony Meyer 16/03/2004 Fix [ 906581 ] Assertion failed in search subject + Tony Meyer 16/03/2004 Add a note warning about [915466] Sorting review page loses classifications. + Skip Montanaro 16/03/2004 Add a verbose flag to tte.py + Mark Hammond 07/03/2004 Outlook: Catch all MAPI errors fetching the HTML for a message, and remove the warning about old win32all versions. + Mark Hammond 04/03/2004 Outlook: set pythoncom.frozen along with sys.frozen in our nasty registration hacks + Mark Hammond 27/02/2004 Outlook: Handle the fact that GetParent() may raise an exception, in which case we aren't able to show the item in the tree. + Mark Hammond 27/02/2004 Outlook: GetParent() catches MAPI errors and raises a MsgStoreException + Mark Hammond 27/02/2004 Outlook: Improve speed by calling .SetColumns() before .Restrict() + Skip Montanaro 26/02/2004 Add a -c flag to tte.py + Tony Meyer 22/02/2004 Simplify the auto bug reports via the web interface a bit, and get the user to enter a subject. + Tony Meyer 17/02/2004 Add [ 848311 ] sb_imapfilter.py obeys launch_browser + Tony Meyer 17/02/2004 The service was built, but not included in the installer for 1.0a9. Fix that. + Tony Meyer 16/02/2004 Fix the line wrapping in autogenerated bug reports via the web interface. + Tony Meyer 16/02/2004 Fix a NameError in smtpproxy.py + Tony Meyer 16/02/2004 imapfilter: Use BODY.PEEK[] instead of RFC822.PEEK. + Tony Meyer 16/02/2004 imapfilter: Report time taken a little less pedantically. + Tony Meyer 16/02/2004 Fix [ 737967 ] Malformed messages break pop3proxy (et al) + Tony Meyer 15/02/2004 Fix half of [ 896366 ] Crashes in the web interface. + Skip Montanaro 13/02/2004 tte.py: record time (in seconds) to execute each round and count the number of leftover hams and spams at the end + Skip Montanaro 13/02/2004 Collect all potential MTA complaints, not just sendmail's "may be forged" (from Tim Peters). + Tony Meyer 13/02/2004 Fix [ 895606 ] 1.0a9 proxy raises an X-Spambayes-Exception + Skip Montanaro 12/02/2004 Big speedup when using sb_filter.py to process an entire mailbox. Instead of opening the database for each filter operation, cache the open db object and reuse as long as its mode is the same as the last time we used it. + Kenny Pitt 11/02/2004 Fix typo in sb_dbexpimp.py usage statement. Alpha Release 9 Index: WHAT_IS_NEW.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/WHAT_IS_NEW.txt,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** WHAT_IS_NEW.txt 5 Feb 2004 09:41:49 -0000 1.28 --- WHAT_IS_NEW.txt 16 Mar 2004 22:25:21 -0000 1.29 *************** *** 11,26 **** changes" section. ! New in Alpha Release 9 ! ====================== ! ! ------------------------------------ ! ** What happened to Alpha Release 8? ! ------------------------------------ ! ! There was no Alpha Release 8! Apart from 8 being a very dangerous number ! , we wanted to match the version numbers of the binary releases, as ! this time that includes more than just the Outlook plug-in, and we are ! going to attempt to release source and binary roughly simultaneously. This ! should hopefully make things clearer when discussing the various versions. -------------------------- --- 11,16 ---- changes" section. ! New in Alpha (Beta?) Release 10 (1?) ! ==================================== -------------------------- *************** *** 28,62 **** -------------------------- ! o The way pathnames in option files are handled has changed, as has the ! default values for some pathname options, in some situations. All ! pathnames in option values that are not absolute (with Windows, this ! means they will start with a drive letter) are now relative to the ! directory of the last configuration file to be loaded, rather than to ! the current working directory. ! ! What does this mean for you? Nothing, as long as your pathnames ! (the cache directories and databases, primarily) are either absolute ! or in the same directory as your configuration file. If, after ! upgrading, your database is suddenly empty, then you need to fix your ! configuration so that it points to the correct place. ! ! o Specifying the database name and type is now consistent across all ! scripts. Use "-p FILE" for a pickle, and "-d FILE" for a dbm. We ! still recommend setting these in a configuration file, rather than ! via the command line, so that if they change you remain unaffected. ! ! o The -p option for sb_imapfilter.py (to specify the password manually) ! is now -P (-p is universally used to signal that a pickle is to be used). ! ! o The -p option for sb_notesfilter.py (to prompt before ending) is now ! -P (-p is universally used to signal that a pickle is to be used). ! o The experimental (and probably unused) format for specifying that ! MySQL or PostgrepSQL should be used for the database ("mysql::" and ! "pgsql::") has been removed. These are now set like any other database ! type, using the persistent_use_database option (these cannot be set via ! the command line, apart from with the generic -o option). ! ! There should be no other incompatible changes (from 1.0a7) in this release. --- 18,32 ---- -------------------------- ! o The 'flat' or interchange format used by the sb_dbexpimp.py script has ! changed to CSV (comma-separated-values, as popularised by Microsoft ! Excel), rather than the old '`' separated format. If you have any ! existing files in the old format, you should use the old 1.0a9 script to ! convert these back to a pickle (or some other format), and then use the ! new script to convert them to CSV (if desired). This change should ! alleviate problems with some unicode characters in the database, and ! make it easier for other programs (such as Excel) to use the exported ! database. ! There should be no other incompatible changes (from 1.0a9) in this release. *************** *** 67,243 **** Outlook Plugin -------------- ! o Add slightly better statistics. ! o When installing, along with checking Outlook isn't running, check ! Outlook's mail transport also isn't running. ! o Fix uninstall problem - uninstall should be 100% clean, assuming Outlook ! isn't running. ! o Default to background filtering being on. ! o When doing a "batch train" (eg, selecting multiple messages and saying ! "Delete as" or "Recover from") the database was saved in between each ! and every message. Now it is only saved at the end. ! o Ensure that the Spam and Unsure folders aren't also being watched for ! new messages. ! o Include the folder name in many messages, to help track down wierd bugs ! from user logs, and also say what we are watching a folder for. ! o Don't allow top-level folders to be selected. ! o Don't allow a single-select dialog to be closed without a selection. ! o Add the Spam field to the 'Unsure' folder as is done with the Spam and ! watched folders. ! o Improve matters when the default (Outlook message) store is offline. ! o If the user attempts to close the Manager dialog while there is a ! problem preventing us being enabled, confirm they really want to close ! it. ! o Try and tone down the toolbar message in the log to prevent people ! reporting it as a bug. ! o When the 'New Folder' button was used to create a folder, that folder ! was not used when you closed the dialog, even though it was selected. ! o Add Spam field to unsure and empty folders. ! o Fix things so that the plug-in should better appear in the "COM Addins" ! list when running the binary version. ! o Add a warning when the Spam/Unsure folder cannot be found, with a ! suggestion about how this might have happened, and what to do about it. ! o Fix log message that indicated that timer delay values are in milliseconds ! (they are in seconds). POP3 Proxy / SMTP Proxy ----------------------- ! o smtpproxy is now only a module, not a script. Use sb_server instead. ! o sb_server was ignoring command-line options; fix. Web Interface ------------- ! o Add the start of a user-friendly interface to the testing tools that are ! part of SpamBayes (for testing new features against corpora of ham and ! spam). ! o Fix a bug in the web interface where the probability would be ! incorrectly calculated on 'show clues'. ! o More robust code for parsing score headers - copes with the presence of ! logarithms. ! o More robust code for parsing evidence headers. Copes with ';' and ': ' ! being part of a clue. ! o Increased the auth-digest login timeout from one minute to twenty. ! o Improve the 'Find Message' query on the front page of the web UI. ! o Add an 'advanced' word query (off by default). ! o Make the review messages page more customizable. ! o Provide a partially filled-in bug report message (please use it!). ! o Add basic statistics information. ! o Add a basic help system. ! o Add warning information, for example if the user has imbalanced ! training, or insufficient training. ! o Fixed an infinite loop when you break the browser connection to ! sb_server when sb_server is busy training. ! o New options "Ham Discard Level" and "Spam Discard Level". These make the ! interface default to discarding hams/spams in the training interface. ! o UserInterface: Split digest auth info properly. ! o Default to twenty search results rather than just one. ! o The status message wasn't updated as often as it should have been. ! o Output plurals correctly in stats information. ! o We printed out false positive numbers in the false negatives section of ! the stats, and vice versa. ! o Quote IMAP folder names when displaying them. ! o Added a third configuration page (via the "Experimental Configuration" ! button on the main Configuration page). This shows all the current ! experimental options (see the "New Experimental Options" section below). ! Note that these are, as the name implies, experimental, and they might ! not improve the results that you get. POP3 Proxy Service / POP3 Proxy Tray Application ------------------------------------------------ ! o Fix checking for most recent version. ! o Better icons and icon loading code. ! o Change the double-click (default) action to "review messages" and ! display the default in bold. ! o If a proxy is already running, don't start the service. ! o When we stopped the proxy and then restarted it didn't work. IMAP Filter ----------- ! o When a message is marked as deleted, it is now marked as read (seen) as ! well. ! o If sb_imapfilter.py is run without any switches, just serve the web ! interface (but don't launch a browser). ! o Ignore errors that occur when parsing a message. ! o Under certain circumstances, the fitler would add two identical ! SpamBayes ID headers; this is no longer the case. sb_filter --------- ! o Print each message once, not once per argument. ! o Now obeys the notate_to and notate_subject options. ! o Added -o/--option command line argument, which allows setting any ! option via the command line. ! o Expanded the documentation. ! o Allow multiple types of mailboxes to be processed using mboxutils.getmbox. ! If any mailbox files are given on the command line, the output is always ! a Unix-style mailbox containing From_ lines. ! o If the -n switch was before the -d/-p switch, then the name wouldn't be ! used; this has been changed so that the -d/-p name is used wherever the ! -n switch is. ! ! Testing Suite ! ------------- ! o Many improvements to the mksets.py script. ! o Many improvements to the rebal.py script. ! o Many improvements to the sort+group.py script. ! o Many improvements to the export.py script (for Outlook). ! o Added additional input/output methods to mkgraph.py. ! o Improvements to the documentation for mkgraph.py, regimes.py and ! incremental.py. ! o Added a makefile to the testtools directory to make using timcv.py easier. ! o Added a new regime - "balanced_corrected". Tokenizer --------- ! o Sendmail annotates the Received: header with "(may be forged)" if it ! thinks the sender is forging its identity. Generate a token for this, ! if we are mining received headers. ! o Solved the "backwards breakdown" problem with IP addresses in Received: ! headers. ! o Tightened up recognition of hostnames and accepted bracketed or ! parenthesized IP addresses without requiring a leading space. ! o Add the missing code for the Habeas headers tokenizing (and deprecated it). ! o Removed support code for the defunct experimental_ham_spam_imbalance_adjustment ! option. General ------- ! o sb_mboxtrain.py now preserves modtimes in Maildir and MH mailboxes. ! o loosecksum.py now allows multiple mailboxes on the command line. ! o Option names are always case insensitive, no matter what. ! o All scripts can use a new "-o" option, to set artibrary global options ! from the command line (instead of via a configuration file). ! o Non-absolute file/path options are relative to the last configuration ! file loaded, not the current working directory, as previously. ! o Moved the option loading code to a function. ! o Generalized the DirOfTxtFileMailbox class in mboxutils to assume all ! non-directory files contain a single message and to recursively descend ! into subdirectories of the argument directory. ! o Loosened constraints on HEADER_VALUE regular expression. ! o Import/Export data as utf-8 with sb_expimp.py. ! o Fixes to the which_database.py utility script. ! o Fix bug where if one was using Python 2.2, Windows and bsddb ! the database would never open correctly. ! o Fix the pspam scripts, muttrc and spambayes.el so that they work with ! the current SpamBayes package. ! o New script: sb_evoscore.py - A shim script between sb_xmlrpcserver.py ! and Ximian Evolution. ! o New script: mkreversemap.py - generates a pickle file mapping features ! to mailbox files and message-id's. ! o New script: extractmessages.py - use with mkreversemap.py to identify ! messages in your training database which contain interesting tokens. ! o New script: hammer.py: Hammers the core SpamBayes code, repeatedly ! training and classifying using faked-up messages. ! o New script: findbest.py, to find the next 'best' unsure message to train ! on. ! o Previous releases have included the sb_pop3dnd.py script (once named ! sb_overkill.py). With this release, this script should be fully ! usable. It provides the same POP3 proxy as sb_server, but also ! provides a local IMAP server so that you can train messages by dragging ! and dropping them within the mail client. Transition ========== ! If you are transitioning from a version older than 1.0a7, please also read the notes in the previous release notes (accessible from ). --- 37,79 ---- Outlook Plugin -------------- ! o Various minor improvements. POP3 Proxy / SMTP Proxy ----------------------- ! o Fix an error that stopped the SMTP proxy working at all in many cases. Web Interface ------------- ! o The pre-filled out bug reports have been simplified somewhat, and ! the readability of the resulting message has been improved. POP3 Proxy Service / POP3 Proxy Tray Application ------------------------------------------------ ! o The POP3 Proxy service was left out of the 1.0a9 installer, although ! it was present in the source archive. This has been corrected. IMAP Filter ----------- ! o The filter better handles invalid messages. ! o The filter should leave the "Seen" status of messages alone with ! more IMAP servers than previously. sb_filter --------- ! o Improved speed when using sb_filter.py to process an entire mailbox. Tokenizer --------- ! o Collect all potential MTA complaints, not just sendmail's ! "may be forged". General ------- ! o Added new flags and results data to the contrib/tte.py script. Transition ========== ! If you are transitioning from a version older than 1.0a9, please also read the notes in the previous release notes (accessible from ). *************** *** 247,254 **** however. ! o If you use the scripts in the testtools directory, you should examine ! how these scripts have changed (some command line switches are altered). ! ! See also the the changes listed in "Incompatible changes" at the top of this document. --- 83,87 ---- however. ! See also the changes listed in "Incompatible changes" at the top of this document. *************** *** 257,262 **** =================== The following bugs tracked via the Sourceforge system were fixed: ! 818871, 833439, 803798, 787676, 860410, 856628, 859215, 856141, 842984, ! 872044, 805852, 874784, 824628, 890645, 870799, 881427 A URL containing the details of these bugs can be made by appending the --- 90,94 ---- =================== The following bugs tracked via the Sourceforge system were fixed: ! 851785, 906581, 737967, 895606 A URL containing the details of these bugs can be made by appending the *************** *** 267,283 **** Feature Requests Added ====================== ! The following feature request tracked via the Sourceforge system was ! added for this release: ! 827138 ! ! A url containing the details of these feature requests can be made by ! appending the request number to this url: ! http://sourceforge.net/tracker/index.php?func=detail&group_id=61702&atid=498104&aid= Patches integrated =================== The following patches tracked via the Sourceforge system were integrated ! for this release. ! 842464, 831388, 809008, 831388, 857595, 857597, 861656 A url containing the details of these feature requests can be made by --- 99,110 ---- Feature Requests Added ====================== ! No feature requests tracked via the Sourceforge system were added for this ! release. Patches integrated =================== The following patches tracked via the Sourceforge system were integrated ! for this release: ! 797579, 848311 A url containing the details of these feature requests can be made by *************** *** 285,301 **** http://sourceforge.net/tracker/index.php?func=detail&group_id=61702&atid=498105&aid= ! Deprecated Options ! ================== ! SpamBayes now has a method of noting options that are deprecated and which ! will not be available in future releases (it is likely that options will ! only be deprecated for one release before being removed). Deprecated ! options will not be offered in the graphical interfaces (Outlook plugin ! and web interface), and will be listed in the "What's New" file (this ! file) for each release. Deprecated options have the same name as previously, but now begin with "x-" (so "extract_dow" is now "x-extract_dow"). You can continue to use ! the original name (eg "extract_dow") in your configuration file, but will receive warnings in your log file or console window. We recommend that you examine this output every time you upgrade SpamBayes to ensure that you are --- 112,128 ---- http://sourceforge.net/tracker/index.php?func=detail&group_id=61702&atid=498105&aid= ! Newly Deprecated Options ! ======================== ! Since 1.0a9, SpamBayes has had a method of noting options that are ! deprecated and which will not be available in future releases (it is ! likely that options will only be deprecated for one release before being ! removed). Deprecated options will not be offered in the graphical ! interfaces (Outlook plugin and web interface), and will be listed in ! the "What's New" file (this file) for each release. Deprecated options have the same name as previously, but now begin with "x-" (so "extract_dow" is now "x-extract_dow"). You can continue to use ! the original name (e.g. "extract_dow") in your configuration file, but will receive warnings in your log file or console window. We recommend that you examine this output every time you upgrade SpamBayes to ensure that you are *************** *** 305,309 **** the spambayes-dev archives (at ). ! The following options have been deprecated in this release: o [Tokenizer] generate_time_buckets o [Tokenizer] extract_dow --- 132,139 ---- the spambayes-dev archives (at ). ! No options have been deprecated in this release. ! ! The following options are still deprecated and will be removed in the near ! future, unless testing indicates otherwise: o [Tokenizer] generate_time_buckets o [Tokenizer] extract_dow *************** *** 314,323 **** ======================== ! SpamBayes now has a method of noting options that are experimental and ! which may be removed or made permanent in future releases (many experimental ! options will only be experimental for one release before being removed or ! fully integrated). Experimental options will not be offered in the ! graphical interfaces (Outlook plugin and web interface), and will be ! listed in the "What's New" file (this file) for each release. Experimental options begin with "x-" (as do deprecated options). If you --- 144,154 ---- ======================== ! Since 1.0a9, SpamBayes has had a method of noting options that are ! experimental and which may be removed or made permanent in future releases ! (many experimental options will only be experimental for one release before ! being removed or fully integrated). Experimental options are not exposed ! by the Outlook plugin, and are listed on a separate ! "Experimental Configuration" page in the web interface. The options will ! be listed in the "What's New" file (this file) for each release. Experimental options begin with "x-" (as do deprecated options). If you *************** *** 333,337 **** users to test these options out on their mail and let us know the results. This can be as simple as turning on the option and emailing ! spambayes@python.org with anacdotal results after a period of time, or the full testtools scripts can be used. For details about using these, please read the "README-DEVEL.txt" file that comes with the SpamBayes source --- 164,168 ---- users to test these options out on their mail and let us know the results. This can be as simple as turning on the option and emailing ! spambayes@python.org with anecdotal results after a period of time, or the full testtools scripts can be used. For details about using these, please read the "README-DEVEL.txt" file that comes with the SpamBayes source *************** *** 340,344 **** Experimental options are always turned off by default. ! The following experimental options have been added in this release: o [Tokenizer] x-search_for_habeas_headers o [Tokenizer] x-reduce_habeas_headers --- 171,178 ---- Experimental options are always turned off by default. ! No experimental options have been added in this release. ! ! Experimental options that are currently available (which we invite you to ! try out and report back your results) include: o [Tokenizer] x-search_for_habeas_headers o [Tokenizer] x-reduce_habeas_headers *************** *** 359,363 **** o [URLRetriever] x-web_prefix If these are used, if a message is scored as 'unsure', and could use ! more tokens in it's classification, then text from any URLs in the message is retrieved and used, if it makes a difference to the classification. --- 193,197 ---- o [URLRetriever] x-web_prefix If these are used, if a message is scored as 'unsure', and could use ! more tokens in its classification, then text from any URLs in the message is retrieved and used, if it makes a difference to the classification. From anadelonbrin at users.sourceforge.net Tue Mar 16 19:08:40 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Mar 16 19:17:55 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_dbexpimp.py,1.8,1.9 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20193/scripts Modified Files: sb_dbexpimp.py Log Message: CSV files opened with the csv module should be opened for writing with the "wb" mode. (As pointed out by Skip). Index: sb_dbexpimp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_dbexpimp.py,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** sb_dbexpimp.py 16 Mar 2004 21:36:22 -0000 1.8 --- sb_dbexpimp.py 17 Mar 2004 00:08:38 -0000 1.9 *************** *** 132,136 **** try: ! fp = open(outFN, 'w') except IOError, e: if e.errno != errno.ENOENT: --- 132,136 ---- try: ! fp = open(outFN, 'wb') except IOError, e: if e.errno != errno.ENOENT: From kpitt at users.sourceforge.net Wed Mar 17 09:11:25 2004 From: kpitt at users.sourceforge.net (Kenny Pitt) Date: Wed Mar 17 09:20:49 2004 Subject: [Spambayes-checkins] spambayes/Outlook2000 filter.py,1.37,1.38 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14803 Modified Files: filter.py Log Message: Don't record classification in stats unless all_actions is true so that rescoring messages doesn't skew the statistics counters. Index: filter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/filter.py,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** filter.py 19 Dec 2003 06:27:29 -0000 1.37 --- filter.py 17 Mar 2004 14:11:22 -0000 1.38 *************** *** 103,107 **** raise RuntimeError, "Eeek - bad action '%r'" % (action,) ! mgr.stats.RecordClassification(prob) return disposition except: --- 103,108 ---- raise RuntimeError, "Eeek - bad action '%r'" % (action,) ! if all_actions: ! mgr.stats.RecordClassification(prob) return disposition except: From anadelonbrin at users.sourceforge.net Thu Mar 18 22:12:08 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Thu Mar 18 22:21:45 2004 Subject: [Spambayes-checkins] website faq.txt,1.64,1.65 Message-ID: Update of /cvsroot/spambayes/website In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13479 Modified Files: faq.txt Log Message: Expand the "can I use OE" faq to include tips on creating the OE rules, particularly how to avoid catching "[Spambayes] blah" messages. Index: faq.txt =================================================================== RCS file: /cvsroot/spambayes/website/faq.txt,v retrieving revision 1.64 retrieving revision 1.65 diff -C2 -d -r1.64 -r1.65 *** faq.txt 19 Feb 2004 22:44:03 -0000 1.64 --- faq.txt 19 Mar 2004 03:12:06 -0000 1.65 *************** *** 304,308 **** Outlook Express does not let you filter on arbitrary headers (like X-Spambayes-Classification), sb_server must add the classification to the ! "To:" line, or the "Subject" line. sb_server/sb_imapfilter aren't quite as 'transparent' as the Outlook plugin, --- 304,322 ---- Outlook Express does not let you filter on arbitrary headers (like X-Spambayes-Classification), sb_server must add the classification to the ! "To:" line, or the "Subject" line. The configuration page has options that ! let you do this. ! ! Once you've set up sb_server, you also need to create rules (like any other ! rules) in Outlook Express, to take the appropriate action on mail based on ! its classification (move spam to a spam folder, for example). You do this ! by searching the "To:" or "Subject:" line for the classification - since ! you can't set the rule to look only at the start of the line, we recommend ! that you search for (e.g.) "spam," rather than simply "spam", so that you ! don't catch other messages, like ones from the SpamBayes mailing list. ! ! Even this can cause troubles if you get messages with subjects like "I get ! a lot of spam, do you?". In this case, you probably need to alter the tag ! that SpamBayes uses (to 'SBSpam' or something else), which you can do by ! editing the configuration file to include the header_spam_string option. sb_server/sb_imapfilter aren't quite as 'transparent' as the Outlook plugin, From anadelonbrin at users.sourceforge.net Sun Mar 21 18:56:49 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Sun Mar 21 19:07:00 2004 Subject: [Spambayes-checkins] spambayes/spambayes Corpus.py,1.14,1.15 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28900/spambayes Modified Files: Corpus.py Log Message: In the fix for 851785 two (simple) bugs were introduced: fix those. This should hopefully fix the problems that Remi was experiencing, too. Index: Corpus.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Corpus.py,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** Corpus.py 16 Mar 2004 03:08:07 -0000 1.14 --- Corpus.py 21 Mar 2004 23:56:47 -0000 1.15 *************** *** 92,95 **** --- 92,96 ---- import sys # for output of docstring import time + import types from spambayes.Options import options *************** *** 198,202 **** # For Python 2.2, which doesn't allow "string in string". if isinstance(options["Headers", header_opt], ! types.StringsTypes): notate_opt = (options["Headers", header_opt],) else: --- 199,203 ---- # For Python 2.2, which doesn't allow "string in string". if isinstance(options["Headers", header_opt], ! types.StringTypes): notate_opt = (options["Headers", header_opt],) else: From anadelonbrin at users.sourceforge.net Mon Mar 22 23:39:17 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Mar 22 23:49:36 2004 Subject: [Spambayes-checkins] spambayes/spambayes OptionsClass.py,1.23,1.24 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20020/spambayes Modified Files: OptionsClass.py Log Message: When figuring out whether an option can have multiple values or not we should use the default value rather than the current one. Otherwise we end up in situations like where notate_to is "spam", so a single value, but multiple values are allowed. This shouldn't affect anything else, because all the single-value defaults for options that allow multiple values already are tuples. Fixes radio button/checkbox problem reported on the list by Amir Katz. Index: OptionsClass.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/OptionsClass.py,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** OptionsClass.py 5 Feb 2004 09:33:02 -0000 1.23 --- OptionsClass.py 23 Mar 2004 04:39:15 -0000 1.24 *************** *** 145,149 **** def multiple_values_allowed(self): '''Multiple values are allowed for this option.''' ! return type(self.value) in MultiContainerTypes def is_valid(self, value): --- 145,149 ---- def multiple_values_allowed(self): '''Multiple values are allowed for this option.''' ! return type(self.default_value) in MultiContainerTypes def is_valid(self, value): From anadelonbrin at users.sourceforge.net Mon Mar 22 23:42:54 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Mar 22 23:53:12 2004 Subject: [Spambayes-checkins] spambayes CHANGELOG.txt, 1.38, 1.39 WHAT_IS_NEW.txt, 1.29, 1.30 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20744 Modified Files: CHANGELOG.txt WHAT_IS_NEW.txt Log Message: Bring up to date. Index: CHANGELOG.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/CHANGELOG.txt,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** CHANGELOG.txt 16 Mar 2004 22:25:20 -0000 1.38 --- CHANGELOG.txt 23 Mar 2004 04:42:52 -0000 1.39 *************** *** 3,6 **** --- 3,8 ---- Alpha Release 10 (Beta Release 1?) ================================== + Tony Meyer 23/03/2004 Fix a subtle bug where if one option was selected for notate_to/subject the option would be presented with radio buttons not checkboxes (so only one, and never zero or 2/3 options could be chosen). + Kenny Pitt 18/03/2004 Outlook: Don't record classification in stats unless all_actions is true so that rescoring messages doesn't skew the statistics counters. Skip Montanaro 16/03/2004 Change sb_dbexpimp.py to use csv as interchange format. Tony Meyer 16/03/2004 Add [ 797579 ] Disable connections to POP3 and SMTP from remote hosts Index: WHAT_IS_NEW.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/WHAT_IS_NEW.txt,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** WHAT_IS_NEW.txt 16 Mar 2004 22:25:21 -0000 1.29 --- WHAT_IS_NEW.txt 23 Mar 2004 04:42:52 -0000 1.30 *************** *** 47,50 **** --- 47,55 ---- o The pre-filled out bug reports have been simplified somewhat, and the readability of the resulting message has been improved. + o If a single choice was made for the "Notate To" or "Notate Subject" + options, the configuration page would in future present that option + with radio buttons instead of checkboxes, meaning that the option + could not be turned off, and that adding an extra choice could not be + made. This has been fixed. POP3 Proxy Service / POP3 Proxy Tray Application From anadelonbrin at users.sourceforge.net Tue Mar 23 21:13:00 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Mar 23 21:23:27 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_dbexpimp.py,1.9,1.10 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3715/scripts Modified Files: sb_dbexpimp.py Log Message: I found the -n that someone mentioned! Fix the docstring to reflect the current (and previous?) usage - there is a -m switch, and there is not a -n one. Index: sb_dbexpimp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_dbexpimp.py,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** sb_dbexpimp.py 17 Mar 2004 00:08:38 -0000 1.9 --- sb_dbexpimp.py 24 Mar 2004 02:12:58 -0000 1.10 *************** *** 28,32 **** Database merging - multiple databases can be merged into one quite easily ! by simply not specifying -n on an import. This will add the two database nham and nspams together (assuming the two databases do not share corpora) and for wordinfo conflicts, will add spamcount and hamcount together. --- 28,32 ---- Database merging - multiple databases can be merged into one quite easily ! by specifying -m on an import. This will add the two database nham and nspams together (assuming the two databases do not share corpora) and for wordinfo conflicts, will add spamcount and hamcount together. From anadelonbrin at users.sourceforge.net Wed Mar 24 01:29:50 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Wed Mar 24 01:40:21 2004 Subject: [Spambayes-checkins] spambayes/spambayes Corpus.py, 1.15, 1.16 storage.py, 1.39, 1.40 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11633/spambayes Modified Files: Corpus.py storage.py Log Message: I find it hard to believe that this bug has never been seen before, when it would have been there for a number of releases. Maybe I'm wrong, but testing seems to indicate that this is an actual bug. To experience it (before this update) - best done with a fresh corpus directory, rather than the one that has all your stuff in it: 1. Run sb_server with -o globals:verbose:True 2. Set your days to expiry to 0. 3. Receive a message via sb_server. 4. Train the message. 5. Telnet to localhost:110 (eg) and do USER username then QUIT You'll see in the verbose info that the message that you just trained has been *untrained*. This is not good! This fixes the problem by adding a 'flags' argument to the various corpus observers (to my knowledge, this is only the storage.Trainer). The only use of the flag at the moment is to pass a "don't train" flag to the observers (specifically removeMessage) when expiring messages, which then doesn't train. Messages that are removed in other ways (like regular use of the review page) are still trained as appropriate. I would really appreciate it if someone else could give this a wizz to confirm both the bug and the fix. (This arose at some point because of two things: adding in a expiry with every USER command, so that long-running sb_servers still expired (the initial one happens before the observers are attached, so doesn't hurt), and expiring the ham/spam caches, so that users didn't end up with really big directories). Index: Corpus.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Corpus.py,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** Corpus.py 21 Mar 2004 23:56:47 -0000 1.15 --- Corpus.py 24 Mar 2004 06:29:41 -0000 1.16 *************** *** 118,122 **** self.observers.append(observer) ! def addMessage(self, message): '''Add a Message to this corpus''' --- 118,122 ---- self.observers.append(observer) ! def addMessage(self, message, observer_flags=None): '''Add a Message to this corpus''' *************** *** 132,138 **** # training related if hasattr(obs, "onAddMessage"): ! obs.onAddMessage(message) ! def removeMessage(self, message): '''Remove a Message from this corpus''' key = message.key() --- 132,138 ---- # training related if hasattr(obs, "onAddMessage"): ! obs.onAddMessage(message, observer_flags) ! def removeMessage(self, message, observer_flags=None): '''Remove a Message from this corpus''' key = message.key() *************** *** 145,149 **** # see comments in event loop in addMessage if hasattr(obs, "onRemoveMessage"): ! obs.onRemoveMessage(message) def cacheMessage(self, message): --- 145,149 ---- # see comments in event loop in addMessage if hasattr(obs, "onRemoveMessage"): ! obs.onRemoveMessage(message, observer_flags) def cacheMessage(self, message): *************** *** 273,277 **** if options["globals", "verbose"]: print 'message %s has expired' % (msg.key()) ! self.removeMessage(msg) --- 273,278 ---- if options["globals", "verbose"]: print 'message %s has expired' % (msg.key()) ! from spambayes.storage import NO_TRAINING_FLAG ! self.removeMessage(msg, observer_flags=NO_TRAINING_FLAG) Index: storage.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/storage.py,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** storage.py 5 Feb 2004 08:13:27 -0000 1.39 --- storage.py 24 Mar 2004 06:29:47 -0000 1.40 *************** *** 566,569 **** --- 566,572 ---- + # Flags that the Trainer will recognise. + NO_TRAINING_FLAG = "no_training" + class Trainer: '''Associates a Classifier object and one or more Corpora, \ *************** *** 577,582 **** self.updateprobs = updateprobs ! def onAddMessage(self, message): '''A message is being added to an observed corpus.''' self.train(message) --- 580,588 ---- self.updateprobs = updateprobs ! def onAddMessage(self, message, flags=None): '''A message is being added to an observed corpus.''' + # There are no flags that we currently care about, so + # get rid of the variable so that PyChecker doesn't bother us. + del flags self.train(message) *************** *** 592,598 **** message.RememberTrained(self.is_spam) ! def onRemoveMessage(self, message): '''A message is being removed from an observed corpus.''' ! self.untrain(message) def untrain(self, message): --- 598,609 ---- message.RememberTrained(self.is_spam) ! def onRemoveMessage(self, message, flags=None): '''A message is being removed from an observed corpus.''' ! # If a message is being expired from the corpus, we do ! # *NOT* want to untrain it, because that's not what's happening. ! # If this is the case, then flags will include NO_TRAINING_FLAG. ! # There are no other flags we currently use. ! if flags.find(NO_TRAINING_FLAG) < 0: ! self.untrain(message) def untrain(self, message): From anadelonbrin at users.sourceforge.net Wed Mar 24 01:36:01 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Wed Mar 24 01:46:28 2004 Subject: [Spambayes-checkins] spambayes CHANGELOG.txt, 1.39, 1.40 WHAT_IS_NEW.txt, 1.30, 1.31 Message-ID: Update of /cvsroot/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13082 Modified Files: CHANGELOG.txt WHAT_IS_NEW.txt Log Message: Bring up to date. Also pick the 1.0b1 name, since no-one seems to care all that much, and it's potentially more clear than having a two digit (10) alpha number. Index: CHANGELOG.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/CHANGELOG.txt,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** CHANGELOG.txt 23 Mar 2004 04:42:52 -0000 1.39 --- CHANGELOG.txt 24 Mar 2004 06:35:58 -0000 1.40 *************** *** 1,6 **** [Note that all dates are in English, not American format - i.e. day/month/year] ! Alpha Release 10 (Beta Release 1?) ! ================================== Tony Meyer 23/03/2004 Fix a subtle bug where if one option was selected for notate_to/subject the option would be presented with radio buttons not checkboxes (so only one, and never zero or 2/3 options could be chosen). Kenny Pitt 18/03/2004 Outlook: Don't record classification in stats unless all_actions is true so that rescoring messages doesn't skew the statistics counters. --- 1,7 ---- [Note that all dates are in English, not American format - i.e. day/month/year] ! Beta Release 1 ! ============== ! Tony Meyer 24/03/2004 When messages expired from the sb_server caches during a USER check (rather than on launch) they would be untrained. This is not right at all, and so was fixed. Tony Meyer 23/03/2004 Fix a subtle bug where if one option was selected for notate_to/subject the option would be presented with radio buttons not checkboxes (so only one, and never zero or 2/3 options could be chosen). Kenny Pitt 18/03/2004 Outlook: Don't record classification in stats unless all_actions is true so that rescoring messages doesn't skew the statistics counters. Index: WHAT_IS_NEW.txt =================================================================== RCS file: /cvsroot/spambayes/spambayes/WHAT_IS_NEW.txt,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** WHAT_IS_NEW.txt 23 Mar 2004 04:42:52 -0000 1.30 --- WHAT_IS_NEW.txt 24 Mar 2004 06:35:58 -0000 1.31 *************** *** 11,16 **** changes" section. ! New in Alpha (Beta?) Release 10 (1?) ! ==================================== -------------------------- --- 11,16 ---- changes" section. ! New in Beta Release 1 ! ===================== -------------------------- *************** *** 52,55 **** --- 52,63 ---- could not be turned off, and that adding an extra choice could not be made. This has been fixed. + o Messages stored in the cache directories are 'expired' after a certain + number of days, to save space. This occurs when launching sb_server + and when executing a POP3 USER command (for those that have very long + running instances of sb_server). When a message was due to expire on + the USER command check (rather than on launch), sb_server would + incorrectly attempt to (un)train the message, resulting in a loss of + training data or a "can't find this file" error, depending on whether + the message was in memory. This is now fixed. POP3 Proxy Service / POP3 Proxy Tray Application From montanaro at users.sourceforge.net Wed Mar 24 15:18:12 2004 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Wed Mar 24 15:28:50 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_dbexpimp.py,1.10,1.11 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18494 Modified Files: sb_dbexpimp.py Log Message: Dump odd import/export example in favor of a simple merge example. Rearrange the import/merge message to read a bit better. Index: sb_dbexpimp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_dbexpimp.py,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** sb_dbexpimp.py 24 Mar 2004 02:12:58 -0000 1.10 --- sb_dbexpimp.py 24 Mar 2004 20:18:10 -0000 1.11 *************** *** 27,34 **** have a 'seed' database to start with. ! Database merging - multiple databases can be merged into one quite easily ! by specifying -m on an import. This will add the two database ! nham and nspams together (assuming the two databases do not share corpora) ! and for wordinfo conflicts, will add spamcount and hamcount together. Spambayes software release migration - an export can be executed before --- 27,35 ---- have a 'seed' database to start with. ! Database merging - multiple databases can be merged into one quite ! easily by specifying -m on an import. This will add the two database ! nham and nspams together (assuming the two databases do not share ! corpora) and for wordinfo conflicts, will add spamcount and hamcount ! together. Spambayes software release migration - an export can be executed before *************** *** 68,73 **** sb_dbexpimp -i -d mybayes.db -f mybayes.db.export ! Export, then import (reorganize) new pickled mybayes.db ! sb_dbexpimp -e -i -n -p mybayes.db -f mybayes.db.export Convert a bayes database from pickle to DBM --- 69,74 ---- sb_dbexpimp -i -d mybayes.db -f mybayes.db.export ! Merge home.db.export into an existing DBM work.db ! sb_dbexpimp -i -m -d work.db -f home.db.export Convert a bayes database from pickle to DBM *************** *** 196,200 **** impType = "Merging" ! print "%s database %s using file %s" % (impType, dbFN, inFN) for (word, hamcount, spamcount) in rdr: --- 197,201 ---- impType = "Merging" ! print "%s file %s into database %s" % (impType, inFN, dbFN) for (word, hamcount, spamcount) in rdr: From montanaro at users.sourceforge.net Wed Mar 24 16:30:46 2004 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Wed Mar 24 16:41:24 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_dbexpimp.py,1.11,1.12 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3245 Modified Files: sb_dbexpimp.py Log Message: Delete merge example. Last example already demonstrates use of the -m flag. Index: sb_dbexpimp.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_dbexpimp.py,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** sb_dbexpimp.py 24 Mar 2004 20:18:10 -0000 1.11 --- sb_dbexpimp.py 24 Mar 2004 21:30:43 -0000 1.12 *************** *** 69,75 **** sb_dbexpimp -i -d mybayes.db -f mybayes.db.export - Merge home.db.export into an existing DBM work.db - sb_dbexpimp -i -m -d work.db -f home.db.export - Convert a bayes database from pickle to DBM sb_dbexpimp -e -p abayes.db -f abayes.export --- 69,72 ---- From anadelonbrin at users.sourceforge.net Wed Mar 24 17:29:02 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Wed Mar 24 17:39:38 2004 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.105,1.106 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16164/spambayes Modified Files: Options.py Log Message: Add a description for [html_ui] launch_browser. Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.105 retrieving revision 1.106 diff -C2 -d -r1.105 -r1.106 *** Options.py 16 Mar 2004 05:08:31 -0000 1.105 --- Options.py 24 Mar 2004 22:28:59 -0000 1.106 *************** *** 885,889 **** ("launch_browser", "Launch browser", False, ! """""", BOOLEAN, RESTORE), --- 885,892 ---- ("launch_browser", "Launch browser", False, ! """If this option is set, then whenever sb_server or sb_imapfilter is ! started the default web browser will be opened to the main web ! interface page. Use of the -b switch when starting from the command ! line overrides this option.""", BOOLEAN, RESTORE), From htrd at users.sourceforge.net Thu Mar 25 14:53:17 2004 From: htrd at users.sourceforge.net (Toby Dickenson) Date: Thu Mar 25 15:04:03 2004 Subject: [Spambayes-checkins] spambayes/contrib sb_bnfilter.py, NONE, 1.1 sb_bnserver.py, NONE, 1.1 README, 1.1, 1.2 Message-ID: Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17211 Modified Files: README Added Files: sb_bnfilter.py sb_bnserver.py Log Message: first add of sb_bnfilter - like sb_filter but without the startup overhead --- NEW FILE: sb_bnfilter.py --- #! /usr/bin/env python # This script has a similar interface and purpose to sb_filter, but avoids # re-initialising spambayes for consecutive requests using a short-lived # server process. This is intended to give the performance advantages of # sb_xmlrpcserver, without the administrative complications. # # The strategy is: # # * while we cant connect to a unix domain socket # * fork a seperate process that runs in the background # * in the child process: # * exec sb_bnserver. it listens on that same unix domain socket. # * in the parent process: # * sleep a little, to give the child chance to start up # * write the filtering/training command line options to the socket # * copy the content of stdin to the socket # * meanwhile..... sb_bnserver gets to work on that data in the same manner # as sb_filter. it writes its response back through that socket # * read a line from the socket containing a success/failure code # * read a line from the socket containing a byte count # * copy the remainder of the content of the socket to stdout or stderr, # depending on whether it reported success or failure. # * if the number of bytes read from the socket is different to the byte # count, exit with an error # * if the reported exit code is non-zero, exit with an error # # sb_bnfilter will only terminate with a zero exit code if everything # is ok. If it terminates with a non-zero exit code then its stdout should # be ignored. # # sb_bnserver will close itself and remove its socket after a period of # inactivity to ensure it does not use up resources indefinitely. # # Author: Toby Dickenson # """Usage: %(program)s [options] Where: -h show usage and exit * -f filter (default if no processing options are given) * -g [EXPERIMENTAL] (re)train as a good (ham) message * -s [EXPERIMENTAL] (re)train as a bad (spam) message * -t [EXPERIMENTAL] filter and train based on the result -- you must make sure to untrain all mistakes later. Not recommended. * -G [EXPERIMENTAL] untrain ham (only use if you've already trained this message) * -S [EXPERIMENTAL] untrain spam (only use if you've already trained this message) -k FILE Unix domain socket used to communicate with a short-lived server process. Default is ~/.sbbnsock- These options will not take effect when connecting to a preloaded server: -p FILE use pickle FILE as the persistent store. loads data from this file if it exists, and saves data to this file at the end. -d FILE use DBM store FILE as the persistent store. -o section:option:value set [section, option] in the options database to value -a seconds timeout in seconds between requests before this server terminates -A number terminate this server after this many requests """ import sys, getopt, socket, errno, os, time def usage(code, msg=''): """Print usage message and sys.exit(code).""" if msg: print >> sys.stderr, msg print >> sys.stderr print >> sys.stderr, __doc__ sys.exit(code) def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'hfgstGSd:p:o:a:A:k:') except getopt.error, msg: usage(2, msg) # build the default socket filename from environment variables filename = os.path.expanduser('~/.sbbnsock-'+socket.gethostname()) action_options = [] server_options = [] for opt, arg in opts: if opt == '-h': usage(0) elif opt in ('-f', '-g', '-s', '-t', '-G', '-S'): action_options.append(opt) elif opt in ('-d', '-p', '-o', '-a', '-A'): server_options.append(opt) server_options.append(arg) elif opt == '-k': filename = arg if len(args) != 0: usage(2) server_options.append(filename) s = make_socket(server_options, filename) # We have a connection to the existing shared server w_file = s.makefile('w') r_file = s.makefile('r') # pass our command line on the first line into the socket w_file.write(' '.join(action_options)+'\n') # copy entire contents of stdin into the socket while 1: b = sys.stdin.read(1024*64) if not b: break w_file.write(b) w_file.flush() w_file.close() s.shutdown(1) # expect to get back a line containing the size of the rest of the response error = int(r_file.readline()) expected_size = int(r_file.readline()) if error: output = sys.stderr else: output = sys.stdout total_size = 0 # copy entire contents of socket into stdout or stderr while 1: b = r_file.read(1024*64) if not b: break output.write(b) total_size += len(b) output.flush() # If we didnt receive the right amount then something has gone wrong. # exit now, and procmail will ignore everything we have sent to stdout. # Note that this policy is different to the xmlrpc client, which # tries to handle errors internally by constructing a stdout that is # the same as stdin was. if total_size != expected_size: print >> sys.stderr, 'size mismatch %d != %d' % (total_size, expected_size) sys.exit(3) if error: sys.exit(error) def make_socket(server_options, file): refused_count = 0 no_server_count = 0 while 1: try: s = socket.socket(socket.AF_UNIX,socket.SOCK_STREAM) s.connect(file) except socket.error,e: if e[0] == errno.EAGAIN: # baaah pass elif e[0] == errno.ENOENT: # no such file.... no such server. create one. no_server_count += 1 if no_server_count>4: raise fork_server(server_options) elif e[0] == errno.ECONNREFUSED: # socket file exists but noone listening. refused_count += 1 if refused_count == 2: # This is the second time we havent been able to connect. Maybe that socket # file has got orphaned. remove it, wait, and try again try: os.unlink(file) except EnvironmentError: pass elif refused_count>2: raise else: raise # some other problem time.sleep(0.2 * 2.0**no_server_count * 2.0**refused_count) else: return s def fork_server(options): if os.fork(): # parent return os.close(0) sys.stdin = sys.__stdin__ = open("/dev/null") os.close(1) sys.stdout = sys.__stdout__ = open("/dev/null", "w") # leave stderr # os.close(2) # sys.stderr = sys.__stderr__ = open("/dev/null", "w") os.setsid() # Use exec rather than import here because eventually it may be nice to reimplement this one file in C os.execv(sys.executable,[sys.executable, os.path.join(os.path.split(sys.argv[0])[0],'sb_bnserver.py') ]+options) # should never get here sys._exit(1) if __name__ == "__main__": main() --- NEW FILE: sb_bnserver.py --- #! /usr/bin/env python # Another server version of hammie.py # This is not intended to be run manually, it is the opportunistic # daemon backend of sb_bnfilter. # # Author: Toby Dickenson # """Usage: %(program)s [options] FILE Where: -h show usage and exit -p FILE use pickle FILE as the persistent store. loads data from this file if it exists, and saves data to this file at the end. -d FILE use DBM store FILE as the persistent store. -o section:option:value set [section, option] in the options database to value -a seconds timeout in seconds between requests before this server terminates -A number terminate this server after this many requests FILE unix domain socket used on which we listen """ import os, getopt, sys, SocketServer, time, traceback, select, socket, errno try: True, False except NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0 # See Options.py for explanations of these properties program = sys.argv[0] def usage(code, msg=''): """Print usage message and sys.exit(code).""" if msg: print >> sys.stderr, msg print >> sys.stderr print >> sys.stderr, __doc__ sys.exit(code) def main(): """Main program; parse options and go.""" try: opts, args = getopt.getopt(sys.argv[1:], 'hd:p:o:a:A:') except getopt.error, msg: usage(2, msg) if len(args) != 1: usage(2, "socket not specified") # get the server up before initializing spambayes, so that # we havent wasted time if we later find we cant start the server try: server = BNServer(args[0], BNRequest) except socket.error,e: if e[0]==errno.EADDRINUSE: pass # in use, no need else: raise # a real error else: try: from spambayes import Options, storage options = Options.options for opt, arg in opts: if opt == '-h': usage(0) elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) elif opt == '-a': server.timeout = float(arg) elif opt == '-A': server.number = int(arg) h = make_HammieFilter() h.dbname, h.usedb = storage.database_type(opts) server.hammie = h server.serve_until_idle() h.close() finally: try: os.unlink(args[0]) except EnvironmentError: pass class NowIdle(Exception): pass class BNServer(SocketServer.UnixStreamServer): allow_reuse_address = True timeout = 10.0 number = 100 def serve_until_idle(self): try: for i in range(self.number): self.handle_request() except NowIdle: pass def get_request(self): r,w,e = select.select([self.socket], [], [], self.timeout) if r: return self.socket.accept() else: raise NowIdle() class BNRequest(SocketServer.StreamRequestHandler): def handle(self): switches = self.rfile.readline() body = self.rfile.read() try: response = self._calc_response(switches,body) self.wfile.write('0\n%d\n'%(len(response),)) self.wfile.write(response) except: response = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] self.wfile.write('1\n%d\n'%(len(response),)) self.wfile.write(response) def _calc_response(self,switches,body): switches = switches.split() actions = [] opts, args = getopt.getopt(switches, 'fgstGS') h = self.server.hammie for opt, arg in opts: if opt == '-f': actions.append(h.filter) elif opt == '-g': actions.append(h.train_ham) elif opt == '-s': actions.append(h.train_spam) elif opt == '-t': actions.append(h.filter_train) elif opt == '-G': actions.append(h.untrain_ham) elif opt == '-S': actions.append(h.untrain_spam) if actions == []: actions = [h.filter] from spambayes import mboxutils msg = mboxutils.get_message(body) for action in actions: action(msg) return msg.as_string(1) def make_HammieFilter(): # The sb_hammie script has some logic in the HammieFiler class that we need here too. # Ideally that should be moved into the spambayes package, but for now lets just # abuse sys.path, make assumptions about the directory layout, and import it direct # from the sb_filter script. from spambayes import Options path = os.path.split(Options.__file__)[0]+'/../scripts' print >> sys.stderr, path if path not in sys.path: sys.path.append(path) from sb_filter import HammieFilter return HammieFilter() if __name__ == "__main__": main() Index: README =================================================================== RCS file: /cvsroot/spambayes/spambayes/contrib/README,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** README 20 Nov 2003 02:45:20 -0000 1.1 --- README 25 Mar 2004 19:53:15 -0000 1.2 *************** *** 23,24 **** --- 23,30 ---- messages for for select tokens + sb_bnfilter.py - alternative to sb_filter that avoids re-initialising + spambayes for consecutive requests using a short-lived server process. + This is intended to give the performance advantages of sb_xmlrpcserver, + without the administrative complications. + + sb_bnserver.py - component of sb_bnfilter.py From htrd at users.sourceforge.net Fri Mar 26 02:36:39 2004 From: htrd at users.sourceforge.net (Toby Dickenson) Date: Fri Mar 26 02:47:28 2004 Subject: [Spambayes-checkins] spambayes/contrib sb_bnserver.py,1.1,1.2 Message-ID: Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21237 Modified Files: sb_bnserver.py Log Message: remove debugging print Index: sb_bnserver.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/contrib/sb_bnserver.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** sb_bnserver.py 25 Mar 2004 19:53:15 -0000 1.1 --- sb_bnserver.py 26 Mar 2004 07:36:36 -0000 1.2 *************** *** 162,166 **** from spambayes import Options path = os.path.split(Options.__file__)[0]+'/../scripts' - print >> sys.stderr, path if path not in sys.path: sys.path.append(path) --- 162,165 ---- From htrd at users.sourceforge.net Mon Mar 29 01:52:42 2004 From: htrd at users.sourceforge.net (Toby Dickenson) Date: Mon Mar 29 02:04:02 2004 Subject: [Spambayes-checkins] spambayes/contrib sb_bnfilter.py,1.1,1.2 Message-ID: Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12589/contrib Modified Files: sb_bnfilter.py Log Message: less aggressive timings. allow plenty of time to start the rest of spambayes in the server Index: sb_bnfilter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/contrib/sb_bnfilter.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** sb_bnfilter.py 25 Mar 2004 19:53:15 -0000 1.1 --- sb_bnfilter.py 29 Mar 2004 06:52:39 -0000 1.2 *************** *** 177,188 **** # socket file exists but noone listening. refused_count += 1 ! if refused_count == 2: ! # This is the second time we havent been able to connect. Maybe that socket ! # file has got orphaned. remove it, wait, and try again try: os.unlink(file) except EnvironmentError: pass ! elif refused_count>2: raise else: --- 177,189 ---- # socket file exists but noone listening. refused_count += 1 ! if refused_count == 6: ! # We have been waiting ages and still havent been able to connect. Maybe that socket ! # file has got orphaned. remove it, wait, and try again. We need to allow ! # enough time for sb_bnserver to initialise the rest of spambayes try: os.unlink(file) except EnvironmentError: pass ! elif refused_count>6: raise else: From kpitt at users.sourceforge.net Tue Mar 30 11:30:22 2004 From: kpitt at users.sourceforge.net (Kenny Pitt) Date: Tue Mar 30 11:42:18 2004 Subject: [Spambayes-checkins] spambayes/spambayes FileCorpus.py,1.9,1.10 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32386 Modified Files: FileCorpus.py Log Message: Add observer_flags param to the FileCorpus version of removeMessage(). Index: FileCorpus.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/FileCorpus.py,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** FileCorpus.py 11 Dec 2003 18:41:34 -0000 1.9 --- FileCorpus.py 30 Mar 2004 16:30:16 -0000 1.10 *************** *** 140,144 **** Corpus.Corpus.addMessage(self, message) ! def removeMessage(self, message): '''Remove a Message from this corpus''' if options["globals", "verbose"]: --- 140,144 ---- Corpus.Corpus.addMessage(self, message) ! def removeMessage(self, message, observer_flags=None): '''Remove a Message from this corpus''' if options["globals", "verbose"]: *************** *** 149,153 **** # superclass processing *MUST* be done # perform superclass processing *LAST!* ! Corpus.Corpus.removeMessage(self, message) def __repr__(self): --- 149,153 ---- # superclass processing *MUST* be done # perform superclass processing *LAST!* ! Corpus.Corpus.removeMessage(self, message, observer_flags) def __repr__(self):