From anadelonbrin at users.sourceforge.net Tue Aug 3 08:51:03 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Aug 3 08:51:05 2004 Subject: [Spambayes-checkins] spambayes/spambayes/test test_sb-server.py, 1.5, 1.6 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23435/spambayes/test Modified Files: test_sb-server.py Log Message: Implement [ 909088 ] remove STLS pop3 capability Remove STLS like we remove PIPELINING, using Richie's suggested code. Also update the test. Index: test_sb-server.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/test/test_sb-server.py,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** test_sb-server.py 3 Dec 2003 00:35:58 -0000 1.5 --- test_sb-server.py 3 Aug 2004 06:51:00 -0000 1.6 *************** *** 167,173 **** test purposes - the POP3 proxy *doesn't* support pipelining, and we test that it correctly filters out that capability from the ! proxied capability list.""" lines = ["+OK Capability list follows", "PIPELINING", "TOP", ".", --- 167,174 ---- test purposes - the POP3 proxy *doesn't* support pipelining, and we test that it correctly filters out that capability from the ! proxied capability list. Ditto for STLS.""" lines = ["+OK Capability list follows", "PIPELINING", + "STLS", "TOP", ".", *************** *** 284,287 **** --- 285,299 ---- assert response.find("PIPELINING") == -1 + # Verify that the test server claims to support STLS. + pop3Server.send("capa\r\n") + response = pop3Server.recv(1000) + assert response.find("STLS") >= 0 + + # Ask for the capabilities via the proxy, and verify that the proxy + # is filtering out the PIPELINING capability. + proxy.send("capa\r\n") + response = proxy.recv(1000) + assert response.find("STLS") == -1 + # Stat the mailbox to get the number of messages. proxy.send("stat\r\n") From anadelonbrin at users.sourceforge.net Tue Aug 3 08:51:55 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Aug 3 08:51:58 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_server.py,1.25,1.26 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23574/scripts Modified Files: sb_server.py Log Message: Implement [ 909088 ] remove STLS pop3 capability Remove STLS like we remove PIPELINING, using Richie's suggested code. Also update the test. Also clean up the getopt stuff, and add a couple of clarifying comments. Index: sb_server.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_server.py,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** sb_server.py 19 Jul 2004 09:57:07 -0000 1.25 --- sb_server.py 3 Aug 2004 06:51:53 -0000 1.26 *************** *** 303,311 **** def onResponse(self): ! # We don't support pipelining, so if the command is CAPA and the ! # response includes PIPELINING, hack out that line of the response. ! if self.command == 'CAPA': ! pipelineRE = r'(?im)^PIPELINING[^\n]*\n' ! self.response = re.sub(pipelineRE, '', self.response) # Pass the request and the raw response to the subclass and --- 303,312 ---- def onResponse(self): ! # There are some features, tested by clients using CAPA, ! # that we don't support. We strip them from the CAPA ! # response here, so that the client won't use them. ! for unsupported in ['PIPELINING', 'STLS', ]: ! unsupportedLine = r'(?im)^%s[^\n]*\n' % (unsupported,) ! self.response = re.sub(unsupportedLine, '', self.response) # Pass the request and the raw response to the subclass and *************** *** 916,920 **** # Read the arguments. try: ! opts, args = getopt.getopt(sys.argv[1:], 'hbpsd:p:l:u:o:') except getopt.error, msg: print >>sys.stderr, str(msg) + '\n\n' + __doc__ --- 917,921 ---- # Read the arguments. try: ! opts, args = getopt.getopt(sys.argv[1:], 'hbd:p:l:u:o:') except getopt.error, msg: print >>sys.stderr, str(msg) + '\n\n' + __doc__ *************** *** 928,931 **** --- 929,934 ---- elif opt == '-b': state.launchUI = True + # '-p' and '-d' are handled by the storage.database_type call + # below, in case you are wondering why they are missing. elif opt == '-l': state.proxyPorts = [_addressAndPort(arg)] From sjoerd at users.sourceforge.net Tue Aug 3 13:54:25 2004 From: sjoerd at users.sourceforge.net (Sjoerd Mullender) Date: Tue Aug 3 13:54:32 2004 Subject: [Spambayes-checkins] spambayes/spambayes message.py,1.51,1.52 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3194 Modified Files: message.py Log Message: Don't round-trip the message being tokenized to a string. Index: message.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/message.py,v retrieving revision 1.51 retrieving revision 1.52 diff -C2 -d -r1.51 -r1.52 *** message.py 14 Jul 2004 07:08:51 -0000 1.51 --- message.py 3 Aug 2004 11:54:22 -0000 1.52 *************** *** 273,277 **** def asTokens(self): ! return tokenize(self.as_string()) def tokenize(self): --- 273,277 ---- def asTokens(self): ! return tokenize(self) def tokenize(self): From anadelonbrin at users.sourceforge.net Wed Aug 4 10:18:20 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Wed Aug 4 10:18:23 2004 Subject: [Spambayes-checkins] spambayes/spambayes message.py,1.52,1.53 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8575/spambayes Modified Files: message.py Log Message: The StringIO import is meant to fall back to StringIO if cStringIO is not available, I believe. Add a insert_exception_header utility function. Both sb_server and sb_imapfilter do this, so it makes sense to factor the code out to here. (This can't be a method of the Message object itself, because we use it when we can't make a Message object). Index: message.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/message.py,v retrieving revision 1.52 retrieving revision 1.53 diff -C2 -d -r1.52 -r1.53 *** message.py 3 Aug 2004 11:54:22 -0000 1.52 --- message.py 4 Aug 2004 08:18:18 -0000 1.53 *************** *** 95,98 **** --- 95,99 ---- import shelve import pickle + import traceback import email *************** *** 105,109 **** from spambayes.tokenizer import tokenize ! from cStringIO import StringIO CRLF_RE = re.compile(r'\r\n|\r|\n') --- 106,113 ---- from spambayes.tokenizer import tokenize ! try: ! import cStringIO as StringIO ! except ImportError: ! import StringIO CRLF_RE = re.compile(r'\r\n|\r|\n') *************** *** 248,252 **** Use *_from_string as described above.""" prs = email.Parser.Parser() ! fp = StringIO(payload) # this is kindof a hack, due to the fact that the parser creates a # new message object, and we already have the message object --- 252,256 ---- Use *_from_string as described above.""" prs = email.Parser.Parser() ! fp = StringIO.StringIO(payload) # this is kindof a hack, due to the fact that the parser creates a # new message object, and we already have the message object *************** *** 482,483 **** --- 486,515 ---- def sbheadermessage_from_string(s, _class=SBHeaderMessage, strict=False): return email.message_from_string(s, _class, strict) + + # Utility function to insert an exception header into the given RFC822 text. + # This is used by both sb_server and sb_imapfilter, so it's handy to have + # it available separately. + def insert_exception_header(string_msg): + """Insert an exception header into the given RFC822 message (as text). + + Returns a tuple of the new message text and the exception details.""" + stream = StringIO.StringIO() + traceback.print_exc(None, stream) + details = stream.getvalue() + + # Build the header. This will strip leading whitespace from + # the lines, so we add a leading dot to maintain indentation. + detailLines = details.strip().split('\n') + dottedDetails = '\n.'.join(detailLines) + headerName = 'X-Spambayes-Exception' + header = email.Header.Header(dottedDetails, header_name=headerName) + + # Insert the exception header, and also insert the id header, + # otherwise we might keep doing this message over and over again. + # We also ensure that the line endings are /r/n as RFC822 requires. + headers, body = re.split(r'\n\r?\n', string_msg, 1) + header = re.sub(r'\r?\n', '\r\n', str(header)) + headers += "\n%s: %s\r\n%s: %s\r\n\r\n" % \ + (headerName, header, + options["Headers", "mailid_header_name"], self.id) + return (headers + body, details) From anadelonbrin at users.sourceforge.net Wed Aug 4 10:32:29 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Wed Aug 4 10:32:31 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_imapfilter.py,1.34,1.35 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10643/scripts Modified Files: sb_imapfilter.py Log Message: Update some comments. Move _extract_fetch_data to be a method of the IMAPSession. Add doctsrings. Be less restrictive about the error returned when logging in fails. Centralise checking the IMAP response into an IMAPSession method. Raise a BadIMAPResponseError exception if it is not right (and that can either be caught or not, depending on whether it's fatal). Remove FindMessage() which no-one uses. Don't use setFolder, just do self.folder = folder. Change the way the get_substance method works (renaming it in the process). Now we return a new message object, which is the same, but has the contents. This way we can avoid using private members of email objects. Remove global imap object and make it a member of the various objects. Remove the layers of attempting to fetch. These were legacy from the days when I wrongly used RFC822.PEEK. BODY.PEEK should always work. Move adding exception header out to spambayes.message Try to do a better job of waiting for the new message to appear (to fix sf#941596). Have to check for sure that this fixes it, yet. Remove a useless except KeyError from __iter__ Remove recent_uids(), which no-one uses. Switch to using the Message-ID header id as our id, unless one can't be found, in which case we use our one. This is often requested, as it means we don't need to resave the message all the time. We look for our header first, so we should still work with old messages, but this might mean that existing messages are retrained or refilted - I have yet to confirm this (for the WHAT_IS_NEW file). Tidy up the Train() function. Keep going if just one folder is bad (training/filtering). These are *major* changes to imapfilter, so could well break something. I have done testing here, and have half a unittest script (checking it in shortly), but even still. I would appreciate testing from anyone that's running from CVS & using imapfilter. Index: sb_imapfilter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_imapfilter.py,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** sb_imapfilter.py 19 Jul 2004 09:55:21 -0000 1.34 --- sb_imapfilter.py 4 Aug 2004 08:32:26 -0000 1.35 *************** *** 34,38 **** to value - Examples: --- 34,37 ---- *************** *** 47,73 **** Warnings: [...1340 lines suppressed...] if not (doClassify or doTrain): ! if server != "": imap = IMAPSession(server, port, imapDebug, doExpunge) httpServer = UserInterfaceServer(options["html_ui", "port"]) --- 971,977 ---- # Web interface if not (doClassify or doTrain): ! if server == "": ! imap = None ! else: imap = IMAPSession(server, port, imapDebug, doExpunge) httpServer = UserInterfaceServer(options["html_ui", "port"]) *************** *** 937,940 **** --- 984,988 ---- imap = IMAPSession(server, port, imapDebug, doExpunge) imap.login(username, pwd) + imap_filter.imap_server = imap if doTrain: From anadelonbrin at users.sourceforge.net Wed Aug 4 10:36:35 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Wed Aug 4 10:36:37 2004 Subject: [Spambayes-checkins] spambayes/spambayes/test test_sb_imapfilter.py, NONE, 1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11096/spambayes/test Added Files: test_sb_imapfilter.py Log Message: First stab at some unit tests for sb_imapfilter.py. These are pretty limited at the moment, but I will add to them as I have time. At the moment, many require an IMAP server (hardcoded values in the script). At some point I'll improve this with a dummy server like test_sb-server.py has, but too much work for now. These all pass here right now, with the updated imapfilter script. --- NEW FILE: test_sb_imapfilter.py --- # Test sb_imapfilter script. # At the moment, the script needs to be provided with an IMAP server to # use for the testing. It would be nice if we provided a dummy server # like test_sb-server.py does for POP, but this will do for the moment. import sys import time import imaplib import unittest import sb_test_support sb_test_support.fix_sys_path() from spambayes.Options import options from sb_imapfilter import BadIMAPResponseError from sb_imapfilter import IMAPSession, IMAPMessage, IMAPFolder IMAP_SERVER = "mail.madsods.gen.nz" IMAP_PORT = 143 IMAP_USERNAME = "test_account+madsods.gen.nz" IMAP_PASSWORD = "" IMAP_FOLDER_LIST = ["INBOX", "unsure", "ham_to_train", "spam", "spam_to_train", ".mailboxlist"] IMAP_FOLDER_LIST.sort() class IMAPSessionTest(unittest.TestCase): def setUp(self): self.imap = IMAPSession(IMAP_SERVER, IMAP_PORT) def tearDown(self): try: self.imap.logout() except imaplib.error: pass def testGoodLogin(self): self.imap.login(IMAP_USERNAME, IMAP_PASSWORD) self.assert_(self.imap.logged_in) def testBadLogin(self): self.assertRaises(SystemExit, self.imap.login, IMAP_USERNAME, "wrong password") def test_check_response(self): test_data = "IMAP response data" response = ("OK", test_data) data = self.imap.check_response("", response) self.assertEqual(data, test_data) response = ("NO", test_data) self.assertRaises(BadIMAPResponseError, self.imap.check_response, "", response) def testSelectFolder(self): # This test will fail if testGoodLogin fails. self.imap.login(IMAP_USERNAME, IMAP_PASSWORD) # Check handling of Python (not SpamBayes) bug #845560. self.assertRaises(BadIMAPResponseError, self.imap.SelectFolder, "") # Check selection. self.imap.SelectFolder("Inbox") response = self.imap.response('OK') self.assert_(response[0] == "OK") self.assert_(response[1] != [None]) # Check that we don't reselect if we are already in that folder. self.imap.SelectFolder("Inbox") response = self.imap.response('OK') self.assert_(response[0] == "OK") self.assert_(response[1] == [None]) def test_folder_list(self): # This test will fail if testGoodLogin fails. self.imap.login(IMAP_USERNAME, IMAP_PASSWORD) # If we had more control over what the IMAP server returned # (say we had our own one, as suggested above), then we could # test returning literals, getting an error, and a bad literal, # but since we don't, just do a simple test for now. folders = self.imap.folder_list() self.assertEqual(folders, IMAP_FOLDER_LIST) def test_extract_fetch_data(self): response = "bad response" self.assertRaises(BadIMAPResponseError, self.imap.extract_fetch_data, response) # Check UID and message_number. message_number = "123" uid = "5432" response = "%s (UID %s)" % (message_number, uid) data = self.imap.extract_fetch_data(response) self.assertEqual(data["message_number"], message_number) self.assertEqual(data["UID"], uid) # Check INTERNALDATE, FLAGS. flags = r"(\Seen \Deleted)" date = '"27-Jul-2004 13:11:56 +1200"' response = "%s (FLAGS %s INTERNALDATE %s)" % \ (message_number, flags, date) data = self.imap.extract_fetch_data(response) self.assertEqual(data["FLAGS"], flags) self.assertEqual(data["INTERNALDATE"], date) # Check RFC822 and literals. rfc = "Subject: Test\r\n\r\nThis is a test message." response = ("%s (RFC822 {%s}" % (message_number, len(rfc)), rfc) data = self.imap.extract_fetch_data(response) self.assertEqual(data["message_number"], message_number) self.assertEqual(data["RFC822"], rfc) # Check RFC822.HEADER. headers = "Subject: Foo\r\nX-SpamBayes-ID: 1231-1\r\n" response = ("%s (RFC822.HEADER {%s}" % (message_number, len(headers)), headers) data = self.imap.extract_fetch_data(response) self.assertEqual(data["RFC822.HEADER"], headers) # Check BODY.PEEK. peek = "Subject: Test2\r\n\r\nThis is another test message." response = ("%s (BODY[] {%s}" % (message_number, len(peek)), peek) data = self.imap.extract_fetch_data(response) self.assertEqual(data["BODY[]"], peek) class IMAPMessageTest(unittest.TestCase): def setUp(self): imap = IMAPSession(IMAP_SERVER, IMAP_PORT) self.msg = IMAPMessage() self.msg.imap_server = imap def tearDown(self): try: self.msg.imap_server.logout() except imaplib.error: pass # These tests might fail if more than one second passes # between the call and the assert. We could make it more robust, # or you could just run this on a faster machine, like me . def test_extract_time_no_date(self): date = self.msg.extractTime() self.assertEqual(date, imaplib.Time2Internaldate(time.time())) def test_extract_time_date(self): self.msg["Date"] = "Wed, 19 May 2004 20:05:15 +1200" date = self.msg.extractTime() self.assertEqual(date, '"19-May-2004 20:05:15 +1200"') def test_extract_time_bad_date(self): self.msg["Date"] = "Mon, 06 May 0102 10:51:16 -0100" date = self.msg.extractTime() self.assertEqual(date, imaplib.Time2Internaldate(time.time())) def test_as_string_invalid(self): content = "This is example content.\nThis is more\r\n" self.msg.invalid = True self.msg.invalid_content = content as_string = self.msg.as_string() self.assertEqual(self.msg._force_CRLF(content), as_string) def testMoveTo(self): fol1 = "Folder1" fol2 = "Folder2" self.msg.MoveTo(fol1) self.assertEqual(self.msg.folder, fol1) self.msg.MoveTo(fol2) self.assertEqual(self.msg.previous_folder, fol1) self.assertEqual(self.msg.folder, fol2) def test_get_full_message(self): self.assertRaises(AssertionError, self.msg.get_full_message) self.msg.id = "unittest" self.assertRaises(AttributeError, self.msg.get_full_message) self.msg.imap_server.login(IMAP_USERNAME, IMAP_PASSWORD) self.msg.imap_server.select() response = self.msg.imap_server.fetch(1, "UID") self.assertEqual(response[0], "OK") self.msg.uid = response[1][0][7:-1] self.msg.folder = IMAPFolder("Inbox", self.msg.imap_server) # When we have a dummy server, check for MemoryError here. # And also an unparseable message (for Python < 2.4). new_msg = self.msg.get_full_message() self.assertEqual(new_msg.folder, self.msg.folder) self.assertEqual(new_msg.previous_folder, self.msg.previous_folder) self.assertEqual(new_msg.uid, self.msg.uid) self.assertEqual(new_msg.id, self.msg.id) self.assertEqual(new_msg.rfc822_key, self.msg.rfc822_key) self.assertEqual(new_msg.rfc822_command, self.msg.rfc822_command) self.assertEqual(new_msg.imap_server, self.msg.imap_server) id_header = options["Headers", "mailid_header_name"] self.assertEqual(new_msg[id_header], self.msg.id) new_msg2 = new_msg.get_full_message() # These should be the same object, not just equal. self.assert_(new_msg is new_msg2) def suite(): suite = unittest.TestSuite() for cls in (IMAPSessionTest, IMAPMessageTest, ): suite.addTest(unittest.makeSuite(cls)) return suite if __name__=='__main__': sb_test_support.unittest_main(argv=sys.argv + ['suite']) From anadelonbrin at users.sourceforge.net Thu Aug 5 02:56:06 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Thu Aug 5 02:56:10 2004 Subject: [Spambayes-checkins] spambayes/spambayes Options.py,1.110,1.111 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17846/spambayes Modified Files: Options.py Log Message: Goodbye to the last rements of the experimental imbalance option. Having this in your configuration file would do nothing these days anyway. Also goodbye to two deprecated options: [Tokenizer] x-extract_dow and [Tokenizer] x-generate_time_buckets. No-one objected on the list (and some agreed), and they've been deprecated for a while. 1.0 (1.0.1, etc) users will continue to get a "you are using a deprecated option" warning in their logs, so by the time they move to 1.1, they should have stopped. Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.110 retrieving revision 1.111 diff -C2 -d -r1.110 -r1.111 *** Options.py 21 Jul 2004 18:58:51 -0000 1.110 --- Options.py 5 Aug 2004 00:55:54 -0000 1.111 *************** *** 137,149 **** INTEGER, RESTORE), - ("x-generate_time_buckets", "Generate time buckets", False, - """(DEPRECATED) Generate tokens which resemble the posting time - in 10-minute buckets: 'time:' hour ':' minute//10""", - BOOLEAN, RESTORE), - - ("x-extract_dow", "Extract day-of-week", False, - """(DEPRECATED) Extract day of the week tokens from the Date: header.""", - BOOLEAN, RESTORE), - ("x-pick_apart_urls", "Extract clues about url structure", False, """(EXPERIMENTAL) Note whether url contains non-standard port or --- 137,140 ---- *************** *** 468,499 **** BOOLEAN, RESTORE), - # If the # of ham and spam in training data are out of balance, the - # spamprob guesses can get stronger in the direction of the category - # with more training msgs. In one sense this must be so, since the more - # data we have of one flavor, the more we know about that flavor. But - # that allows the accidental appearance of a strong word of that flavor - # in a msg of the other flavor much more power than an accident in the - # other direction. Enable experimental_ham_spam_imbalance_adjustment if - # you have more ham than spam training data (or more spam than ham), and - # the Bayesian probability adjustment won't 'believe' raw counts more - # than min(# ham trained on, # spam trained on) justifies. I *expect* - # this option will go away (and become the default), but people *with* - # strong imbalance need to test it first.\ - # LATER: this option sucked, creating more problems than it solved. - # It's deprecated, and the support code has gone away. - - ("x-experimental_ham_spam_imbalance_adjustment", "Compensate for unequal numbers of spam and ham", False, - """(DEPRECATED) If your training database has significantly more ham - than spam, or vice versa, you may start seeing an increase in - incorrect classifications (messages put in the wrong category, not - just marked as unsure). If so, this option allows you to compensate - for this, at the cost of increasing the number of messages classified - as "unsure". - - Note that the effect is subtle, and you should experiment with both - settings to choose the option that suits you best. You do not have - to retrain your database if you change this option.""", - BOOLEAN, RESTORE), - ("x-use_bigrams", "Use mixed uni/bi-grams scheme", False, """(EXPERIMENTAL) Generate both unigrams (words) and bigrams (pairs of --- 459,462 ---- From anadelonbrin at users.sourceforge.net Thu Aug 5 02:56:56 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Thu Aug 5 02:56:59 2004 Subject: [Spambayes-checkins] spambayes/spambayes tokenizer.py,1.31,1.32 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17978/spambayes Modified Files: tokenizer.py Log Message: Goodbye to support code for two deprecated options: [Tokenizer] x-extract_dow and [Tokenizer] x-generate_time_buckets. No-one objected on the list (and some agreed), and they've been deprecated for a while. Index: tokenizer.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/tokenizer.py,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** tokenizer.py 12 Feb 2004 22:07:55 -0000 1.31 --- tokenizer.py 5 Aug 2004 00:56:53 -0000 1.32 *************** *** 1469,1498 **** yield 'received:' + tok - # Date: - if options["Tokenizer", "x-generate_time_buckets"]: - for header in msg.get_all("date", ()): - mat = self.date_hms_re.search(header) - # return the time in Date: headers arranged in - # 10-minute buckets - if mat is not None: - h = int(mat.group('hour')) - bucket = int(mat.group('minute')) // 10 - yield 'time:%02d:%d' % (h, bucket) - - if options["Tokenizer", "x-extract_dow"]: - for header in msg.get_all("date", ()): - # extract the day of the week - for fmt in self.date_formats: - try: - timetuple = time.strptime(header, fmt) - except ValueError: - pass - else: - yield 'dow:%d' % timetuple[6] - break - else: - # if nothing matches, declare the Date: header invalid - yield 'dow:invalid' - # Message-Id: This seems to be a small win and should not # adversely affect a mixed source corpus so it's always enabled. --- 1469,1472 ---- From anadelonbrin at users.sourceforge.net Fri Aug 6 07:23:24 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Fri Aug 6 07:23:28 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_imapfilter.py,1.35,1.36 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25032/scripts Modified Files: sb_imapfilter.py Log Message: Fix three bugs found by Sjoerd Mullender: imap.close() should be self.imap_server.close() There were some instances of BadIMAPResponse instead of BadIMAPResponseError (obviously I was confused about what I was calling it when I wrote them). Need to pass the message id to insert_exception_header, and use self.rfc822_key as the key. And one in response to a problem from Aaron Konstam: isinstance check is wrong, so will never be true, so literals in the folder list will never be handled correctly. Index: sb_imapfilter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_imapfilter.py,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** sb_imapfilter.py 4 Aug 2004 08:32:26 -0000 1.35 --- sb_imapfilter.py 6 Aug 2004 05:23:21 -0000 1.36 *************** *** 205,209 **** # a list of all the deleted messages which we don't do # anything with). ! imap.close() if folder == "": --- 205,209 ---- # a list of all the deleted messages which we don't do # anything with). ! self.imap_server.close() if folder == "": *************** *** 236,242 **** # Sigh. Some servers may give us back the folder name as a # literal, so we need to crunch this out. ! if isinstance(fol, ()): ! r = re.compile(r"{\d+}") ! m = r.search(fol[0]) if not m: # Something is wrong here! Skip this folder. --- 236,241 ---- # Sigh. Some servers may give us back the folder name as a # literal, so we need to crunch this out. ! if isinstance(fol, types.TupleType): ! m = re.search(r"{\d+}", fol[0]) if not m: # Something is wrong here! Skip this folder. *************** *** 381,385 **** try: self.imap_server.SelectFolder(self.folder.name) ! except BadIMAPResponse: # Can't select the folder, so getting the substance will not # work. --- 380,384 ---- try: self.imap_server.SelectFolder(self.folder.name) ! except BadIMAPResponseError: # Can't select the folder, so getting the substance will not # work. *************** *** 435,439 **** # exception data and then the original message. self.invalid = True ! text, details = message.insert_exception_header(data["RFC822"]) self.invalid_content = text self.got_substance = True --- 434,439 ---- # exception data and then the original message. self.invalid = True ! text, details = message.insert_exception_header( ! data[self.rfc822_key], self.id) self.invalid_content = text self.got_substance = True *************** *** 518,522 **** try: self.imap_server.check_response("", response) ! except BadIMAPResponse: pass else: --- 518,522 ---- try: self.imap_server.check_response("", response) ! except BadIMAPResponseError: pass else: *************** *** 848,852 **** try: self.imap_server.SelectFolder(self.unsure_folder.name) ! except BadIMAPResponse: print "Cannot select spam folder. Please check configuration." sys.exit(-1) --- 848,852 ---- try: self.imap_server.SelectFolder(self.unsure_folder.name) ! except BadIMAPResponseError: print "Cannot select spam folder. Please check configuration." sys.exit(-1) *************** *** 856,860 **** try: self.imap_server.SelectFolder(filter_folder) ! except BadIMAPResponse: print "Cannot select %s, skipping." % (filter_folder,) continue --- 856,860 ---- try: self.imap_server.SelectFolder(filter_folder) ! except BadIMAPResponseError: print "Cannot select %s, skipping." % (filter_folder,) continue From anadelonbrin at users.sourceforge.net Fri Aug 6 07:25:01 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Fri Aug 6 07:25:05 2004 Subject: [Spambayes-checkins] spambayes/spambayes message.py,1.53,1.54 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25177/spambayes Modified Files: message.py Log Message: Pass message id as a parameter - self isn't available! Index: message.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/message.py,v retrieving revision 1.53 retrieving revision 1.54 diff -C2 -d -r1.53 -r1.54 *** message.py 4 Aug 2004 08:18:18 -0000 1.53 --- message.py 6 Aug 2004 05:24:58 -0000 1.54 *************** *** 490,494 **** # This is used by both sb_server and sb_imapfilter, so it's handy to have # it available separately. ! def insert_exception_header(string_msg): """Insert an exception header into the given RFC822 message (as text). --- 490,494 ---- # This is used by both sb_server and sb_imapfilter, so it's handy to have # it available separately. ! def insert_exception_header(string_msg, msg_id): """Insert an exception header into the given RFC822 message (as text). *************** *** 512,515 **** headers += "\n%s: %s\r\n%s: %s\r\n\r\n" % \ (headerName, header, ! options["Headers", "mailid_header_name"], self.id) return (headers + body, details) --- 512,515 ---- headers += "\n%s: %s\r\n%s: %s\r\n\r\n" % \ (headerName, header, ! options["Headers", "mailid_header_name"], msg_id) return (headers + body, details) From anadelonbrin at users.sourceforge.net Mon Aug 9 08:16:35 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Aug 9 08:16:38 2004 Subject: [Spambayes-checkins] website background.ht,1.19,1.20 Message-ID: Update of /cvsroot/spambayes/website In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30459 Modified Files: background.ht Log Message: Add a link to Gary's Linux Journal paper. Add a link to my (and Brendon's) CEAS'04 paper. Index: background.ht =================================================================== RCS file: /cvsroot/spambayes/website/background.ht,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** background.ht 9 Jul 2004 00:37:25 -0000 1.19 --- background.ht 9 Aug 2004 06:16:32 -0000 1.20 *************** *** 13,19 **** interesting essay suggesting some improvements to Graham's original approach. !
  • Gary also wrote a Linux Journal ! article ! about this.
  • more links? mail --- 13,18 ---- interesting essay suggesting some improvements to Graham's original approach. !

  • Gary Robinson's ! Linux Journal article discussed using the chi squared distribution.
  • more links? mail *************** *** 265,266 **** --- 264,278 ---- +

    Papers about SpamBayes

    +

    Tony Meyer and Brendon Whateley wrote a + paper introducing Spambayes for + the 2004 Conference on Email and Spam (CEAS 04). + The aim of the paper was to introduce SpamBayes (and so provide a paper to + reference for future work, allowing other authors to skip past the basic + stuff), and to introduce some of the main concepts that SpamBayes is based + on (like the importance of the 'unsure' range). Limited results from + testing the unigrams/bigrams tiling experiemental option, and from various + training regimes are also included. The intent was not + to demonstrate the superioriy of SpamBayes. Note that a great deal of the + background information about the history of SpamBayes is sourced from this + page.

    From anadelonbrin at users.sourceforge.net Mon Aug 9 08:18:44 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Aug 9 08:18:47 2004 Subject: [Spambayes-checkins] website quotes.ht,1.9,1.10 Message-ID: Update of /cvsroot/spambayes/website In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30674 Modified Files: quotes.ht Log Message: Add a couple of extra links from Erik Brown. Index: quotes.ht =================================================================== RCS file: /cvsroot/spambayes/website/quotes.ht,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** quotes.ht 16 Apr 2004 22:09:14 -0000 1.9 --- quotes.ht 9 Aug 2004 06:18:41 -0000 1.10 *************** *** 75,78 **** --- 75,96 ----

    +

    + I have found the Outlook plug-in to be very easy to install and use. + It has not caused any problems with Outlook and has been amazingly + effective at getting spam out of my Inbox.
    + Chris Tayor in an Ottawa PC Users' Group, Inc. + Product Review. +

    + +

    + The program is easy to use and setup and probably one of the most + accurate filtering tools we have come across...
    + SnapFiles + review. +

    + +

    Spamotomy users have a + bit to say, too! +

    What we are saying about us

    From anadelonbrin at users.sourceforge.net Mon Aug 9 08:21:18 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Aug 9 08:21:21 2004 Subject: [Spambayes-checkins] website unix.ht,1.10,1.11 Message-ID: Update of /cvsroot/spambayes/website In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31103 Modified Files: unix.ht Log Message: A name was causing havoc in the generated html, so simplify it (putting in the accent would be better, I guess). Add a link to Toby's KMail instructions. Index: unix.ht =================================================================== RCS file: /cvsroot/spambayes/website/unix.ht,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** unix.ht 7 Jun 2004 02:46:37 -0000 1.10 --- unix.ht 9 Aug 2004 06:21:15 -0000 1.11 *************** *** 61,65 **** href="applications.html#sb_server">sb_server.py.

    !

    You might wish to set sb_server.py to run as a daemon - Fernando NIO and Dave Handley have provided these scripts (respectively) which will allow you to do this:

    --- 61,65 ---- href="applications.html#sb_server">sb_server.py.

    !

    You might wish to set sb_server.py to run as a daemon - Fernando Nino and Dave Handley have provided these scripts (respectively) which will allow you to do this:

    *************** *** 195,198 **** --- 195,204 ---- +

    KMail

    +

    Toby Dickenson has written a + description of his SpamBayes and KMail setup (using sb_bnfilter.py), + which is an effective guide to setting up your system if you are a KMail + user.

    +

    IMAP

    From anadelonbrin at users.sourceforge.net Mon Aug 9 08:23:05 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Aug 9 08:23:08 2004 Subject: [Spambayes-checkins] website server_side.ht,1.5,1.6 Message-ID: Update of /cvsroot/spambayes/website In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31250 Modified Files: server_side.ht Log Message: Add description from Aaron Konstam. Note that this isn't really what you would think of as server side filtering (AFAICT) - it's more like making client side filtering much easier, but it can't hurt to have the description here. Index: server_side.ht =================================================================== RCS file: /cvsroot/spambayes/website/server_side.ht,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** server_side.ht 22 Jul 2004 03:20:26 -0000 1.5 --- server_side.ht 9 Aug 2004 06:22:47 -0000 1.6 *************** *** 130,131 **** --- 130,183 ---- an IMAP server, using imapfilter.py (now sb_imapfilter.py) and hammiefilter.py (now sb_filter.py).

    + +

    An Alternate method of Server Mail filtering in Linux or Unix + environments

    +

    Aaron Konstam has given us this description of the setup used at + Trinity University.

    + +

    As opposed to other suggested server filtering setups with SpamBayes + this approach has the advantage that although the server is doing all the + filtering each, user on a client machine has complete control of the + training of the filtering process to meet his or her own tastes. It is + ideal for the university student lab environment but could be used in + commercial environments as well.

    + +

    The basis of this method is that all the user directories as well as + the password authentication data are kept on the server. The + authentication data is made available to all the client machines through a + well known Unix and Linux service called NIS. Any user can sit at any + machine and log in using the same password, change passwords and make any + other changes to their user environment.

    + +

    The home directories are NFS mounted from the server on all the client + machines. Therefore, the users home directory on the client machine is + identical to the one on the server. The user has access to his hammie.db + file, his personal configuration file and all the SpamBayes software that + has been installed on the clients. Of course the SpamBayes software is also + installed on ther server.

    + +

    Mail is filtered by the server using a .procmailrc file in the user's + directory that runs sb_filter.py. One further thing, which should be + obvious, is that we have created MX records so that all mail addressed to a + client is actually delivered to the server.

    + +

    Training can easily be done with a simple script such as:

    + +
    + #!/bin/bash
    + #script: trainsb
    + 
    + /usr/bin/sb_mboxtrain.py -d $HOME/.hammie.db -g  $HOME/Mail/$1 -s $HOME/Mail/$2
    + 
    + +

    used as follows:

    trainsb ham spam

    + +

    Notice that no proxy servers of any kind are necessary for the user to + read their mail, train it, manipulate it or do anything else they want to + do. However, if they want to use the web interface on the local client + machine to train their mail that is also available to them.

    + +

    As a side note we run our lab Windows machines in exactly the same way. + There is a server for authenticating users and user's directories are kept + on a central server. One imagines one could train users mail in exactly + the same way on our Windows machines in our labs.

    From anadelonbrin at users.sourceforge.net Mon Aug 9 08:50:06 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Aug 9 08:50:13 2004 Subject: [Spambayes-checkins] spambayes/spambayes Options.py, 1.111, 1.112 classifier.py, 1.25, 1.26 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2027/spambayes Modified Files: Options.py classifier.py Log Message: Change [Classifier] x-use_bigrams to a normal, not experimental option. i.e. it's now [Classifier] use_bigrams. Note that this has almost no effect (the only one I can think of is that it won't be listed on the Experimental options page in sb_server/sb_imapfilter). The option is still False by default, and if you have "x-use_bigrams" in your config file, it'll still (silently) work. You can remove the x- if you like. Index: Options.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v retrieving revision 1.111 retrieving revision 1.112 diff -C2 -d -r1.111 -r1.112 *** Options.py 5 Aug 2004 00:55:54 -0000 1.111 --- Options.py 9 Aug 2004 06:50:03 -0000 1.112 *************** *** 459,464 **** BOOLEAN, RESTORE), ! ("x-use_bigrams", "Use mixed uni/bi-grams scheme", False, ! """(EXPERIMENTAL) Generate both unigrams (words) and bigrams (pairs of words). However, extending an idea originally from Gary Robinson, the message is 'tiled' into non-overlapping unigrams and bigrams, --- 459,464 ---- BOOLEAN, RESTORE), ! ("use_bigrams", "Use mixed uni/bi-grams scheme", False, ! """Generate both unigrams (words) and bigrams (pairs of words). However, extending an idea originally from Gary Robinson, the message is 'tiled' into non-overlapping unigrams and bigrams, Index: classifier.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/classifier.py,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** classifier.py 19 Jul 2004 09:58:59 -0000 1.25 --- classifier.py 9 Aug 2004 06:50:04 -0000 1.26 *************** *** 267,271 **** else that it's definitely not spam. """ ! if options["Classifier", "x-use_bigrams"]: wordstream = self._enhance_wordstream(wordstream) if options["URLRetriever", "x-slurp_urls"]: --- 267,271 ---- else that it's definitely not spam. """ ! if options["Classifier", "use_bigrams"]: wordstream = self._enhance_wordstream(wordstream) if options["URLRetriever", "x-slurp_urls"]: *************** *** 278,282 **** Pass the same arguments you passed to learn(). """ ! if options["Classifier", "x-use_bigrams"]: wordstream = self._enhance_wordstream(wordstream) if options["URLRetriever", "x-slurp_urls"]: --- 278,282 ---- Pass the same arguments you passed to learn(). """ ! if options["Classifier", "use_bigrams"]: wordstream = self._enhance_wordstream(wordstream) if options["URLRetriever", "x-slurp_urls"]: *************** *** 430,434 **** mindist = options["Classifier", "minimum_prob_strength"] ! if options["Classifier", "x-use_bigrams"]: # This scheme mixes single tokens with pairs of adjacent tokens. # wordstream is "tiled" into non-overlapping unigrams and --- 430,434 ---- mindist = options["Classifier", "minimum_prob_strength"] ! if options["Classifier", "use_bigrams"]: # This scheme mixes single tokens with pairs of adjacent tokens. # wordstream is "tiled" into non-overlapping unigrams and *************** *** 534,539 **** "word"). ! If the experimental "Classifier":"x-use_bigrams" option is ! removed, this function can be removed, too. """ --- 534,539 ---- "word"). ! If the "Classifier":"use_bigrams" option is removed, this function ! can be removed, too. """ From anadelonbrin at users.sourceforge.net Mon Aug 9 09:01:41 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Aug 9 09:01:43 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_imapfilter.py,1.36,1.37 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3374/scripts Modified Files: sb_imapfilter.py Log Message: Fix remaining references to the old imap global found by Sjoerd Mullender. Updated test suit coming soon... Index: sb_imapfilter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_imapfilter.py,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** sb_imapfilter.py 6 Aug 2004 05:23:21 -0000 1.36 --- sb_imapfilter.py 9 Aug 2004 07:01:38 -0000 1.37 *************** *** 205,209 **** # a list of all the deleted messages which we don't do # anything with). ! self.imap_server.close() if folder == "": --- 205,209 ---- # a list of all the deleted messages which we don't do # anything with). ! self.close() if folder == "": *************** *** 227,231 **** try: all_folders = self.check_response("list", response) ! except BadIMAPResponse: # We want to keep going, so just print out a warning, and # return an empty list. --- 227,231 ---- try: all_folders = self.check_response("list", response) ! except BadIMAPResponseError: # We want to keep going, so just print out a warning, and # return an empty list. *************** *** 574,579 **** multiple_ids = new_id.split() for id_to_remove in multiple_ids[:-1]: ! response = imap.uid("STORE", id_to_remove, "+FLAGS.SILENT", ! "(\\Deleted \\Seen)") command = "silently delete and make seen %s" % (id_to_remove,) self.imap_server.check_response(command, response) --- 574,580 ---- multiple_ids = new_id.split() for id_to_remove in multiple_ids[:-1]: ! response = self.imap_server.uid("STORE", id_to_remove, ! "+FLAGS.SILENT", ! "(\\Deleted \\Seen)") command = "silently delete and make seen %s" % (id_to_remove,) self.imap_server.check_response(command, response) *************** *** 598,602 **** # ok as long as another message hasn't also arrived). if new_id == "": ! response = imap.uid("SEARCH", "ALL") data = self.imap_server.check_response("search all", response) --- 599,603 ---- # ok as long as another message hasn't also arrived). if new_id == "": ! response = self.imap_server.uid("SEARCH", "ALL") data = self.imap_server.check_response("search all", response) From anadelonbrin at users.sourceforge.net Mon Aug 9 09:45:05 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Mon Aug 9 09:45:08 2004 Subject: [Spambayes-checkins] spambayes/spambayes/test test_sb_imapfilter.py, 1.1, 1.2 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/test In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10014/spambayes/test Modified Files: test_sb_imapfilter.py Log Message: No new tests, but a better system. Use a dummy IMAP server like test_sb-server.py does, so that we can control what returns and don't need to have an IMAP server available to run the tests. Index: test_sb_imapfilter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/test/test_sb_imapfilter.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** test_sb_imapfilter.py 4 Aug 2004 08:36:32 -0000 1.1 --- test_sb_imapfilter.py 9 Aug 2004 07:45:02 -0000 1.2 *************** *** 7,31 **** import sys import time import imaplib import unittest import sb_test_support sb_test_support.fix_sys_path() from spambayes.Options import options from sb_imapfilter import BadIMAPResponseError from sb_imapfilter import IMAPSession, IMAPMessage, IMAPFolder ! IMAP_SERVER = "mail.madsods.gen.nz" ! IMAP_PORT = 143 ! IMAP_USERNAME = "test_account+madsods.gen.nz" ! IMAP_PASSWORD = "" ! IMAP_FOLDER_LIST = ["INBOX", "unsure", "ham_to_train", "spam", ! "spam_to_train", ".mailboxlist"] ! IMAP_FOLDER_LIST.sort() ! class IMAPSessionTest(unittest.TestCase): def setUp(self): ! self.imap = IMAPSession(IMAP_SERVER, IMAP_PORT) def tearDown(self): --- 7,176 ---- import sys import time + import types + import socket + import thread import imaplib import unittest + import asyncore import sb_test_support sb_test_support.fix_sys_path() + from spambayes import Dibbler from spambayes.Options import options from sb_imapfilter import BadIMAPResponseError from sb_imapfilter import IMAPSession, IMAPMessage, IMAPFolder ! IMAP_PORT = 8143 ! IMAP_USERNAME = "testu" ! IMAP_PASSWORD = "testp" ! IMAP_FOLDER_LIST = ["INBOX", "unsure", "ham_to_train", "spam"] ! # Key is UID. ! IMAP_MESSAGES = {101 : """Subject: Test\r\n\r\nBody test.""", ! 102 : """Subject: Test2\r\n\r\nAnother body test."""} ! # Map of ID -> UID ! IMAP_UIDS = {1 : 101, 2: 102} ! class TestListener(Dibbler.Listener): ! """Listener for TestIMAP4Server. Works on port 8143, to co-exist ! with real IMAP4 servers.""" ! def __init__(self, socketMap=asyncore.socket_map): ! Dibbler.Listener.__init__(self, IMAP_PORT, TestIMAP4Server, ! (socketMap,), socketMap=socketMap) ! ! ! class TestIMAP4Server(Dibbler.BrighterAsyncChat): ! """Minimal IMAP4 server, for testing purposes. Accepts a limited ! subset of commands, and also a KILL command, to terminate.""" ! def __init__(self, clientSocket, socketMap): ! # Grumble: asynchat.__init__ doesn't take a 'map' argument, ! # hence the two-stage construction. ! Dibbler.BrighterAsyncChat.__init__(self) ! Dibbler.BrighterAsyncChat.set_socket(self, clientSocket, socketMap) ! self.set_terminator('\r\n') ! # okCommands are just ignored (we pass back a happy this-was-fine ! # answer, and do nothing. ! self.okCommands = ['NOOP', 'LOGOUT', 'CAPABILITY', 'KILL'] ! # These commands actually result in something. ! self.handlers = {'LIST' : self.onList, ! 'LOGIN' : self.onLogin, ! 'SELECT' : self.onSelect, ! 'FETCH' : self.onFetch, ! 'UID' : self.onUID, ! } ! self.push("* OK [CAPABILITY IMAP4REV1 AUTH=LOGIN] " \ ! "localhost IMAP4rev1\r\n") ! self.request = '' ! ! def collect_incoming_data(self, data): ! """Asynchat override.""" ! self.request = self.request + data ! ! def found_terminator(self): ! """Asynchat override.""" ! id, command = self.request.split(None, 1) ! if ' ' in command: ! command, args = command.split(None, 1) ! else: ! args = '' ! command = command.upper() ! if command in self.okCommands: ! self.push("%s OK (we hope)\r\n" % (id,)) ! if command == 'LOGOUT': ! self.close_when_done() ! if command == 'KILL': ! self.socket.shutdown(2) ! self.close() ! raise SystemExit() ! else: ! handler = self.handlers.get(command, self.onUnknown) ! self.push(handler(id, command, args, False)) # Or push_slowly for testing ! self.request = '' ! ! def push_slowly(self, response): ! """Useful for testing.""" ! for c in response: ! self.push(c) ! time.sleep(0.02) ! ! def onLogin(self, id, command, args, uid=False): ! """Log in to server.""" ! username, password = args.split(None, 1) ! username = username.strip('"') ! password = password.strip('"') ! if username == IMAP_USERNAME and password == IMAP_PASSWORD: ! return "%s OK [CAPABILITY IMAP4REV1] User %s " \ ! "authenticated.\r\n" % (id, username) ! return "%s NO LOGIN failed\r\n" % (id,) ! ! def onList(self, id, command, args, uid=False): ! """Return list of folders.""" ! base = '\r\n* LIST (\\NoInferiors \\UnMarked) "/" ' ! return "%s%s\r\n%s OK LIST completed\r\n" % \ ! (base[2:], base.join(IMAP_FOLDER_LIST), id) ! ! def onSelect(self, id, command, args, uid=False): ! exists = "* %d EXISTS" % (len(IMAP_MESSAGES),) ! recent = "* 0 RECENT" ! uidv = "* OK [UIDVALIDITY 1091599302] UID validity status" ! next_uid = "* OK [UIDNEXT 23] Predicted next UID" ! flags = "* FLAGS (\Answered \Flagged \Deleted \Draft \Seen)" ! perm_flags = "* OK [PERMANENTFLAGS (\* \Answered \Flagged " \ ! "\Deleted \Draft \Seen)] Permanent flags" ! complete = "%s OK [READ-WRITE] SELECT completed" % (id,) ! return "%s\r\n" % ("\r\n".join([exists, recent, uidv, next_uid, ! flags, perm_flags, complete]),) ! ! def onFetch(self, id, command, args, uid=False): ! msg_nums, msg_parts = args.split(None, 1) ! msg_nums = msg_nums.split() ! response = {} ! for msg in msg_nums: ! response[msg] = [] ! if "UID" in msg_parts: ! if uid: ! for msg in msg_nums: ! response[msg].append("FETCH (UID %s)" % (msg,)) ! else: ! for msg in msg_nums: ! response[msg].append("FETCH (UID %s)" % ! (IMAP_UIDS[int(msg)])) ! if "BODY.PEEK[]" in msg_parts: ! for msg in msg_nums: ! if uid: ! msg_uid = int(msg) ! else: ! msg_uid = IMAP_UIDS[int(msg)] ! response[msg].append(("FETCH (BODY[] {%s}" % ! (len(IMAP_MESSAGES[msg_uid])), ! IMAP_MESSAGES[msg_uid])) ! for msg in msg_nums: ! try: ! simple = " ".join(response[msg]) ! except TypeError: ! simple = [] ! for part in response[msg]: ! if isinstance(part, types.StringTypes): ! simple.append(part) ! else: ! simple.append('%s\r\n%s)' % (part[0], part[1])) ! simple = " ".join(simple) ! response[msg] = "* %s %s" % (msg, simple) ! response_text = "\r\n".join(response.values()) ! return "%s\r\n%s OK FETCH completed\r\n" % (response_text, id) ! ! def onUID(self, id, command, args, uid=False): ! actual_command, args = args.split(None, 1) ! handler = self.handlers.get(actual_command, self.onUnknown) ! return handler(id, command, args, uid=True) ! ! def onUnknown(self, id, command, args, uid=False): ! """Unknown IMAP4 command.""" ! return "%s BAD Command unrecognised: %s\r\n" % (id, repr(command)) ! ! ! class BaseIMAPFilterTest(unittest.TestCase): def setUp(self): ! self.imap = IMAPSession("localhost", IMAP_PORT) def tearDown(self): *************** *** 35,38 **** --- 180,185 ---- pass + + class IMAPSessionTest(BaseIMAPFilterTest): def testGoodLogin(self): self.imap.login(IMAP_USERNAME, IMAP_PASSWORD) *************** *** 81,85 **** folders = self.imap.folder_list() ! self.assertEqual(folders, IMAP_FOLDER_LIST) def test_extract_fetch_data(self): --- 228,234 ---- folders = self.imap.folder_list() ! correct = IMAP_FOLDER_LIST[:] ! correct.sort() ! self.assertEqual(folders, correct) def test_extract_fetch_data(self): *************** *** 127,141 **** ! class IMAPMessageTest(unittest.TestCase): def setUp(self): ! imap = IMAPSession(IMAP_SERVER, IMAP_PORT) self.msg = IMAPMessage() ! self.msg.imap_server = imap ! ! def tearDown(self): ! try: ! self.msg.imap_server.logout() ! except imaplib.error: ! pass # These tests might fail if more than one second passes --- 276,284 ---- ! class IMAPMessageTest(BaseIMAPFilterTest): def setUp(self): ! BaseIMAPFilterTest.setUp(self) self.msg = IMAPMessage() ! self.msg.imap_server = self.imap # These tests might fail if more than one second passes *************** *** 210,212 **** --- 353,359 ---- if __name__=='__main__': + def runTestServer(): + TestListener() + asyncore.loop() + thread.start_new_thread(runTestServer, ()) sb_test_support.unittest_main(argv=sys.argv + ['suite']) From anadelonbrin at users.sourceforge.net Tue Aug 10 08:48:11 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Tue Aug 10 08:48:15 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_server.py,1.26,1.27 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9437/scripts Modified Files: sb_server.py Log Message: Apparently, some people are using spam_cutoff values of 0.3 or lower. This doesn't seem like a sensible idea to me at all! Add a warning about it so that people at least know that it's not generally what you ought to be doing (and a corresponding one about the ham_cutoff). While we're there, warn them if the ham_cutoff is higher than the spam_cutoff, which means that nothing will work like it is meant to. Index: sb_server.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_server.py,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** sb_server.py 3 Aug 2004 06:51:53 -0000 1.26 --- sb_server.py 10 Aug 2004 06:48:09 -0000 1.27 *************** *** 750,753 **** --- 750,767 ---- "will classify all messages as 'unsure', " \ "ready for you to train." + # Add an additional warning message if the user's thresholds are + # truly odd. + spam_cut = options["Categorization", "spam_cutoff"] + ham_cut = options["Categorization", "ham_cutoff"] + if spam_cut < 0.5: + self.warning += "
    Warning: we do not recommend setting " \ + "the spam threshold less than 0.5." + if ham_cut > 0.5: + self.warning += "
    Warning: we do not recommend setting " \ + "the ham threshold greater than 0.5." + if ham_cut > spam_cut: + self.warning += "
    Warning: your ham threshold is " \ + "higher than your spam threshold. " \ + "Results are unpredictable." def createWorkers(self): From kpitt at users.sourceforge.net Tue Aug 10 16:20:28 2004 From: kpitt at users.sourceforge.net (Kenny Pitt) Date: Tue Aug 10 16:20:31 2004 Subject: [Spambayes-checkins] spambayes/spambayes UserInterface.py, 1.47, 1.48 PyMeldLite.py, 1.7, 1.8 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16527 Modified Files: UserInterface.py PyMeldLite.py Log Message: First pass at moving help text out of the Python source and into the ui.html file. Along the way, correct a few minor typos and make the capitalization of SpamBayes consistent in the UI text. Index: UserInterface.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/UserInterface.py,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** UserInterface.py 13 May 2004 04:05:50 -0000 1.47 --- UserInterface.py 10 Aug 2004 14:20:24 -0000 1.48 *************** *** 896,1009 **** helppage = self.html.helppage.clone() if topic: ! # Present help specific to a certain page. We probably want to ! # load this from a file, rather than fill up UserInterface.py, ! # but for demonstration purposes, do this for now. ! # (Note that this, of course, should be in ProxyUI, not here.) ! if topic == "review": ! helppage.helpheader = "Review Page Help" ! helppage.helptext = """

    When you first start using ! SpamBayes, all your mail will be classified as 'unsure' because SpamBayes ! doesn't have any preconceived ideas about what good or bad mail looks like. ! As soon as you start training the classification will improve, and by the ! time you've classified even 20 messages of each you'll be seeing quite ! reasonable results.

    ! !

    SpamBayes saves a temporary copy of all incoming mail ! so that classification can be independant of whatever mail client you are ! using. You need to run through these messages and tell SpamBayes how to ! handle mail like that in the future. This page lists messages that have ! arrived in the last %s days and that have not yet been trained. For each ! message listed, you need to choose to either discard ! (don't train on this message), defer (leave training on ! this message until later), or train (as either good - ! ham), or bad - spam). You do this by ! simply clicking in the circle in the appropriate column; if you wish to ! change all the messages to the same action, you can simply click the column ! heading.

    ! !

    You are presented with the subject and sender of each message, but, if ! this isn't enough information for you to make a decision on the message, ! you can also view the message text (this is the raw text, so you can't do ! any damage if the message contains a virus or any other malignant data). ! To do this, simply click on the subject of the message.

    ! !

    Once you have chosen the actions you wish to perform on all the ! displayed messages, click the Train button at the end of the page. ! SpamBayes will then update its database to reflect this data.

    ! !

    Note that the messages are split up into the classification that ! SpamBayes would place the message with current training data (if this is ! correct, you might choose to Discard the message, rather than ! train on it - see the SpamBayes wiki ! for discussion of training techniques). You can also see the ! Tokens that the message contains (the words in the message, ! plus some additional tokens that SpamBayes generates) and the Clues ! that SpamBayes used in classifying the message (not all tokens are ! used in classification).

    ! !

    So that the page isn't overwhelmingly long, messages waiting for review ! are split by the day they arrived. You can use the Previous Day ! or Next Day buttons at the top of the page to move between days. ! If mail arrives while the review page is open the new messages will ! not be automatically added to the displayed list; to add ! the new message, click the Refresh button at the top of the page. !


    """ % (options["Storage", "cache_expiry_days"],) ! elif topic == "stats": ! # This does belong with UserInterface.py, but should ! # still probably be loaded from a file or something to ! # avoid all this clutter. Someone come up with the ! # best solution! (A pickle? A single text file? A text ! # file per help page in a directory?) ! helppage.helpheader = "Statistics Page Help" ! helppage.helptext = """

    SpamBayes keeps track of certain ! information about the messages that are classified. For your interest, ! this page displays statistics about the messages that have been classified ! and trained so far.

    ! !

    Currently the page displays information about the ! number of messages that have been classified as good, bad and unsure, how ! many of these were false negatives or positives, and how many messages ! were classified as unsure (and what their correct classification was).

    ! !

    Note that the data for this page resides in the "message info" ! database that SpamBayes uses, and so only reflects messages since the ! last time this database was created.


    """ ! elif topic == "home_proxy": ! # Also belongs with UserInterface.py, and probably ! # not with the source! ! helppage.helpheader = "Home Page Help" ! helppage.helptext = """

    This is the main page for the ! SpamBayes web interface. You are presented with some information about ! the current status of SpamBayes, and can follow links to review messages ! or alter your configuration.

    ! !

    If you have messages stored in a mbox or dbx (Outlook Express) file ! that you wish to 'bulk' train, or if you wish to train on a message ! that you type in, you can do this on this page. Click the ! "Browse" button (or paste the text in, including headers), ! and then click the Train as Ham or Train as Spam ! button.

    ! !

    Likewise, if you have a message that you wish to classify, you ! can do this. Either paste the message into the text box, or click ! "Browse" and locate the text file that the message is ! located in. Click Classify, and you will be taken to a ! page describing the classification of that message.

    ! !

    If you want to find out information about a word in the statistics ! database that forms the heart of SpamBayes, you can use the "Word ! Query" facility. Enter in the word that you wish to search for ! and click Tell me about this word. If you enable the advanced ! find query, you can also search using wildcards or regular expressions.

    ! !

    You can also search for a specific message in the cache of temporary ! copies of messages that have been proxied. You might wish to do this if ! you realise that you have incorrectly trained a message and need to correct ! the training. You can search the subject, headers, or message body, or ! for the SpamBayes ID (which is in the headers of messages that SpamBayes ! proxies). Messages that are found will be presented in the standard ! review page. Note that once messages expire from the cache (after %s ! days), you can no longer find them.

    !
    """ % (options["Storage", "cache_expiry_days"],) self.write(helppage) self._writePostamble() --- 896,908 ---- helppage = self.html.helppage.clone() if topic: ! # Present help specific to a certain page. ! headerelem_name = "helpheader_" + topic ! textelem_name = "helptext_" + topic ! try: ! helppage.helpheader = self.html[headerelem_name]._content ! helppage.helptext = self.html[textelem_name]._content % \ ! { "cache_expiry_days": options["Storage", "cache_expiry_days"] } ! except KeyError: ! pass self.write(helppage) self._writePostamble() Index: PyMeldLite.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/PyMeldLite.py,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** PyMeldLite.py 31 Jan 2003 18:32:27 -0000 1.7 --- PyMeldLite.py 10 Aug 2004 14:20:24 -0000 1.8 *************** *** 773,776 **** --- 773,835 ---- raise AttributeError, "No element or attribute named %r" % name + def __getitem__(self, name): + """`object[]`, if this Meld contains an element with an `id` + attribute of `name`, returns a Meld representing that element. + + If no such element exists, a KeyError is raised. + + >>> p = Meld('

    Hello World

    ') + >>> print p["who"] + World + >>> print p["who"]_content + World + """ + + node = self._findByID(self._tree, name) + if node: + return Meld(node, self._readonly) + raise KeyError, "No element named %r" % name + + def __setitem__(self, name, value): + """`object[] = value` sets the XML content of the element with an + `id` of `name`. + + If no such element exists, a KeyError is raised because there is no + info about the type of element to add. + + >>> p = Meld('

    Hello World

    ') + >>> p["who"] = "Richie" + >>> p["who"].id = "newwho" + >>> print p +

    Hello Richie

    + """ + + if self._readonly: + raise ReadOnlyError, READ_ONLY_MESSAGE + node = self._findByID(self._tree, name) + if hasattr(value, '_tree') and value._tree is node: + return # x["y"] = x.y + if node: + self._replaceNodeContent(node, value) + return + raise KeyError, "No element named %r" % name + + def __delitem__(self, name): + """Deletes the named element from the `Meld`: + + >>> p = Meld('

    Hello World

    ') + >>> del p["who"] + >>> print p +

    Hello

    + """ + + if self._readonly: + raise ReadOnlyError, READ_ONLY_MESSAGE + node = self._findByID(self._tree, name) + if node: + node.parent.children.remove(node) + return + raise KeyError, "No element named %r" % name + def __iadd__(self, other): """`object1 += object2` appends a string or a clone of a Meld to From kpitt at users.sourceforge.net Tue Aug 10 16:20:28 2004 From: kpitt at users.sourceforge.net (Kenny Pitt) Date: Tue Aug 10 16:20:32 2004 Subject: [Spambayes-checkins] spambayes/spambayes/resources ui.html, 1.33, 1.34 ui_html.py, 1.32, 1.33 Message-ID: Update of /cvsroot/spambayes/spambayes/spambayes/resources In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16527/resources Modified Files: ui.html ui_html.py Log Message: First pass at moving help text out of the Python source and into the ui.html file. Along the way, correct a few minor typos and make the capitalization of SpamBayes consistent in the UI text. Index: ui.html =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui.html,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** ui.html 15 Mar 2004 23:06:44 -0000 1.33 --- ui.html 10 Aug 2004 14:20:25 -0000 1.34 *************** *** 2,6 **** ! Spambayes User Interface --- 2,6 ---- ! SpamBayes User Interface *************** *** 45,49 ****   ! Spambayes Web Interface: Home > ui.html --- 45,49 ----   ! SpamBayes Web Interface: Home > ui.html *************** *** 56,62 ****

    This file, ui.html, defines the look-and-feel ! of the user interface of the Spambayes Server. The various pieces of HTML defined here are extracted and manipulated at ! runtime to dynamically produce the HTML that the Spambayes Server serves up - this file acts as a palette of HTML components. PyMeldLite is the module that provides --- 56,62 ----

    This file, ui.html, defines the look-and-feel ! of the user interface of the SpamBayes Server. The various pieces of HTML defined here are extracted and manipulated at ! runtime to dynamically produce the HTML that the SpamBayes Server serves up - this file acts as a palette of HTML components. PyMeldLite is the module that provides *************** *** 117,128 **** ! - !
    SpamBayes Help
      Sorry, there isn't any specific help available for that section. !
      If you believe you may have found a bug in SpamBayes, or are --- 117,131 ---- ! ! ! ! !
    SpamBayes Help
      Sorry, there isn't any specific help available for that section. !

    If you believe you may have found a bug in SpamBayes, or are *************** *** 142,146 **** using this, please ensure that you have all the pertinent information. (Otherwise your response is likely to be "please tell us ..."). !
    --- 145,257 ---- using this, please ensure that you have all the pertinent information. (Otherwise your response is likely to be "please tell us ..."). !
    ! ! !


    !

    Context-sensitive help page text

    ! !
    Review Page Help
    !
    !

    When you first start using ! SpamBayes, all your mail will be classified as 'unsure' because SpamBayes ! doesn't have any preconceived ideas about what good or bad mail looks like. ! As soon as you start training the classification will improve, and by the ! time you've classified even 20 messages of each you'll be seeing quite ! reasonable results.

    ! !

    SpamBayes saves a temporary copy of all incoming mail ! so that classification can be independant of whatever mail client you are ! using. You need to run through these messages and tell SpamBayes how to ! handle mail like that in the future. This page lists messages that have ! arrived in the last %(cache_expiry_days)s days and that have not yet been ! trained. For each message listed, you need to choose to either ! discard (don't train on this message), ! defer (leave training on this message until later), or ! train (as either good - ham, or bad - ! spam). You do this by simply clicking in the circle in ! the appropriate column; if you wish to change all the messages to the same ! action, you can simply click the column heading.

    ! !

    You are presented with the subject and sender of each message, but, if ! this isn't enough information for you to make a decision on the message, ! you can also view the message text (this is the raw text, so you can't do ! any damage if the message contains a virus or any other malignant data). ! To do this, simply click on the subject of the message.

    ! !

    Once you have chosen the actions you wish to perform on all the ! displayed messages, click the Train button at the end of the page. ! SpamBayes will then update its database to reflect this data.

    ! !

    Note that the messages are split up into the classification that ! SpamBayes would place the message with current training data (if this is ! correct, you might choose to Discard the message, rather than ! train on it - see the SpamBayes wiki ! for discussion of training techniques). You can also see the ! Tokens that the message contains (the words in the message, ! plus some additional tokens that SpamBayes generates) and the Clues ! that SpamBayes used in classifying the message (not all tokens are ! used in classification).

    ! !

    So that the page isn't overwhelmingly long, messages waiting for review ! are split by the day they arrived. You can use the Previous Day ! or Next Day buttons at the top of the page to move between days. ! If mail arrives while the review page is open the new messages will ! not be automatically added to the displayed list; to add ! the new message, click the Refresh button at the top of the page. !

    !
    ! !
    Statistics Page Help
    !
    !

    SpamBayes keeps track of certain ! information about the messages that are classified. For your interest, ! this page displays statistics about the messages that have been classified ! and trained so far.

    ! !

    Currently the page displays information about the ! number of messages that have been classified as good, bad and unsure, how ! many of these were false negatives or positives, and how many messages ! were classified as unsure (and what their correct classification was).

    ! !

    Note that the data for this page resides in the "message info" ! database that SpamBayes uses, and so only reflects messages since the ! last time this database was created.

    !
    ! !
    Home Page Help
    !
    !

    This is the main page for the ! SpamBayes web interface. You are presented with some information about ! the current status of SpamBayes, and can follow links to review messages ! or alter your configuration.

    ! !

    If you have messages stored in a mbox or dbx (Outlook Express) file ! that you wish to 'bulk' train, or if you wish to train on a message ! that you type in, you can do this on this page. Click the ! "Browse" button (or paste the text in, including headers), ! and then click the Train as Ham or Train as Spam ! button.

    ! !

    Likewise, if you have a message that you wish to classify, you ! can do this. Either paste the message into the text box, or click ! "Browse" and locate the text file that the message is ! located in. Click Classify, and you will be taken to a ! page describing the classification of that message.

    ! !

    If you want to find out information about a word in the statistics ! database that forms the heart of SpamBayes, you can use the "Word ! Query" facility. Enter in the word that you wish to search for ! and click Tell me about this word. If you enable the advanced ! find query, you can also search using wildcards or regular expressions.

    ! !

    You can also search for a specific message in the cache of temporary ! copies of messages that have been proxied. You might wish to do this if ! you realise that you have incorrectly trained a message and need to correct ! the training. You can search the subject, headers, or message body, or ! for the SpamBayes ID (which is in the headers of messages that SpamBayes ! proxies). Messages that are found will be presented in the standard ! review page. Note that once messages expire from the cache (after ! %(cache_expiry_days)s days), you can no longer find them.

    *************** *** 209,213 **** !       You can configure your Spambayes
          system using the Configuration page. --- 320,324 ---- !       You can configure your SpamBayes
          system using the Configuration page. *************** *** 225,229 ****

    ! The Spambayes proxy stores all the messages it sees. You can train the classifier based on those messages using the Review messages page. --- 336,340 ----

    ! The SpamBayes proxy stores all the messages it sees. You can train the classifier based on those messages using the Review messages page. *************** *** 318,322 **** ! Re: Spambayes and PyMeld rock! 8-) --- 429,433 ---- ! Re: SpamBayes and PyMeld rock! 8-) *************** *** 502,506 ****

    This page allows you to change the options that control how ! Spambayes processes your email. Your options are stored in /example/pathname.

    --- 613,617 ----

    This page allows you to change the options that control how ! SpamBayes processes your email. Your options are stored in /example/pathname.

    *************** *** 597,601 **** Version 0.00
    ! Spambayes Web Interface, Mon Dec 30 14:04:32 2002. Spambayes.org --- 708,712 ---- Version 0.00
    ! SpamBayes Web Interface, Mon Dec 30 14:04:32 2002. Spambayes.org *************** *** 613,614 **** --- 724,726 ---- + Index: ui_html.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/resources/ui_html.py,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** ui_html.py 16 Mar 2004 04:41:48 -0000 1.32 --- ui_html.py 10 Aug 2004 14:20:25 -0000 1.33 *************** *** 6,121 **** import zlib ! data = zlib.decompress("xa\016\0072و}9^}\004\ ! '\015o\027\036&z|x6]_q\\o%\002p8\026Xȭ&\011c]ʓ\006n2\ ! \011a`(`s03{ً\014`\034?;3f2\013E+d\012\007K\0176\001fW,\012l5%Q\ ! n\011!9-\032\025ыs>\027j\012dh\003_P)>3ꛋ\000u̵E˯9[\ ! Ea\0009x\030\003&\017g\031\027\016D\\\031Sk\0010}Г\016\012\003ҡ8N\011\035\014\ ! Zv\013n\\\026Nm\"Rq+\02355\000\015\032u\014[_\023n]\014\030h\011\ ! D4x3dE\030ݳ y~+\015M\036`\033yh9gy\026\\I\036|\006\006k\\|/\ ! 0?\030?Myz\016x\000\0324=\0053P}4a\031\024e$'#?Wʥ)\"\0233\030\010\ ! 5{1Á%4yqʨN\035\0150Lq\0249 ?\"vO\005\020b\0016Dc\007p5\027ԕ\027\010\025&\ ! /Ә0Yȕw%;ZaR#*|\005X(\000\013P4L_i\025,H\033&\036+֩\000o\015\ ! \003Ka_{\014ڥw<\006Z9c09[aV%B0m/PB6QJI\017Y{{>K\ ! \015qׂg\035sw*|\032zc\030\0339ˬp\010۳\037>d2\025Ɂmwļ\016a\035\ ! ថB\031\000\012>)0W\";@I30+\010_ͮӸ8~;\004avb\020'OЀ\ ! Fj\010C\014\012M\006 gC$֪:Ev`-\"߰f倻\023__0qS\ ! \011\013D\034C\000\036YE6\000'Ԁ^~З\011P\ ! 44#_E\000\005я@\020`\\\0375B\006|?}\025\000\005*\003'\000󕻙\027(sngC\037\ ! 1t|x:hrzW$s\030\017ZNE˕\033G\012\003ʟp=$T\005#\023\ ! 6[;\"\007h\026UD\023%,uaư>\"a\027\033\023d&\020AY4k}\015~NE2 $\ ! H2\031ٵ1\027.\005\016ШkxIx_qkzrR\011Š\012]Tod\ ! P\\ȯC9ɚ\010Y\024<3Y\015%\030\025s8=M&\006$Vi9\004zH{3#gp\012\ ! \0271\\|\006:\012P)t*WC9\030\034v\014%@O)FP\001XW)\003XA\0030e\025\ ! \037{1E\005\021\001d%\016<\011PXN1@*\017\0048\030:$cp\0056^\000Z\034Yk3\ ! \020e(\013\014\021,6lP\036;\001\000/[A q'jk04\025\0347\012\022XZ\033\036\ ! \014E\030jrD.W~)\010\032x\001\030e@t\011\030U\0378q\037q\020\004\024{-\035\015\0069P\020\010`\\\ ! \023Q\0140\023蜧 <Ϗ\032x5.b\027FYXbC\010ˍ\017\000\020\012\003Xr\ ! \024\035! #\0032\0052_3\005\000f|TScL\000}VTf#\001:0sh3|\027L\ ! 3f%t>:F3\020%U\006\035&\021\003M!13]\002\002K\014F=8O9i$^,z +\025%\ ! \006\023L\005F+\023v+`$\0036\000v:Tl\000I=\010\026i\024\014طj\037,Id\005\ ! $b)\012Z\033\011\023L$52\\a\005Fɹd+x\007?E]EP\005K3x~9MK\034ݡbzO\ ! V\005X~\017I\"m_X_?\003\031\\\036---~ůJpچA\022'@إt\\PN\ ! ߽tE)F\001yB\004t\031C)\004\030+X@Uj%\011A\024I\025.=\ ! %^\012i\002ӡ\015\012[\026-y?ߞ\016=>y,,\003FI(\036.^MZMc\002\ ! \0300\012.\00427n/dJ:\007S[\026{\030`0#n\007\036\033Dn11c2K~!ʕȮ`<8IW)v\032=\035\033,.\020Qij2aݹn\024\ ! @X\012}|r\002\014ia\001={\001{:\022Qsl@\027@\013҃\007\003\020A\023B6\021Dz[o<\ ! 7\033\006f\006\011\037F5n9rR9\007zs+W\021\007\021\007H\026L$&MG2.$OZ\034\003\ ! K2\010a83\005#AC%h+1V`JP'+\022},l\017揕\ ! tL.n4rJ\016&fg<\031\0101V\011h~\024\007`=\004g&R\001|\ ! h^:nc]9_M0\015\011`s\0369 e\000H\014==-|8j\035YM-;\ ! /q\012:J㎍\015b\025\\M~7\023:M\007\034[?\002lsnݨ'o1\0036\026\022^\ ! \033qov;\0156\005TGx5zf\006Ӡ(ʬ:\022S3Peڨ[T\ ! \027w&ͧ\023xF\016Ǵo\001\006;e{nS78TћL\0134'0=Hwu>\ ! \017=!<+U:G\010T򥦾\027\032xpS&ɭ$\015\023Qwx4jG3\013SM5M\ ! gk1X$T0\007\002\006~TsVv\0255xӦE\031\007\02757fJK\ ! Gj\012\022\026?3bWqv:SrN \007׎<[ނxiKs0]\0220\ ! A볹m#oqn9 *\035\037=$=\027.?t1s\0128\011?ўC\ ! (5]\026֝~\023:\023Kӽ\022G\024DCg\023R\001su\0128J\027IS5&\ ! 2\005T\034|\037Z\034m( ܝqhJGۙ7\ ! ܭJ9\023\014BXغ)h/7\030VdK65sF\021*68\036\016V\014hmm{E#T\ ! Y N>Iݭ9Y?0\034«\023\026n>2A>->'4\02432iM\037j\007n\ ! ?x\010CAF\015f'RqB6yᙽ[ufc\007`_hvJA~\014!\025Y[<ތ\ ! 8\004_;c|4EYrtW\026\016R?#}'e8]W_CNJڟ\0226ۭ\034\ ! t]c{b\014O*5m,\")l`X1ess1Wb0³a\ ! ,\005\004\001x$3\001%\012\035g\023,\015<шۇ9})jշ\011\003\007\021o_b\016\015s\ ! `Nhx=\032̏HW+Lh]\"T7qe \\/]zJ^̌ق\ ! Nn\0243\031\024:wHc-\012(\014\013\013\007&:$") ### end --- 6,155 ---- import zlib ! data = zlib.decompress("x=ksFS0\015\")v\022=kN;;ʥR)\020\030X\000(nncf0x!\ ! ;WuJ\"\001LOwO\007廫뿽%^_}#\017W;\032~>\032^^\033Ocqˤ\ ! (Md<\032ћ|Ţ\\[\020Q\031+\021\036}&\0373|!ת\020?\025*\027?$g2P\027#z\000,U\ ! )Ţ,#[\025]zW2X(O\004)<^\036\005|i,Uጸ^T\003\0018L_x|F\ ! ^o\002C\036]\001<{\037eUGq\024\022.LsYQi\032b\006\020ķGB摌\007XE\ ! E1\020\013\025ߩ2\012X|\036%gb,\027\002\030\017|\0116\000|<\026\007igOc\011<\ ! `T\"\005=\033?\031fxoY!Fh\002pqHOEjE3w\036I\000O\0313véL\022d*\ ! yVI\0103\005c\0053La/ڏG\001_~!4C\037i\006{Q\036b\032\003̾iY\ ! Σ7-!D8\027?\024ANΘ\006FK\036@{0 \035B\ ! Jddk\002yh\0330̣Lݤ\011/h*U\032>9\031?:п!l\030\031\ ! [q1Ҷ\023>\022\023x\007!/N\004,K5ȟ;E\006\026\016Gm\000M.\\\024yp<\002\014\ ! \030e\ ! <=Ghrń\"]14%g\017\032SaTN{p\010\0016.\000/i0@*x\021%YU6ԏx\ ! \\g\0125\020\037\017\004.\0114k\033iͪL\"\027bV\016j\021er%\013YzTJ2`κ\ ! \030\026.\0036\034\015]ce|\032{\023\030\0359˼t\010s\034꾸\024i\003O\002o \ ! \016pOHa\032\000\032!)0W*?@r0+\010lXY<_~\004avb\024M'\ ! bHЀFj!\012C\014\012mF g\035ucJU\033uD\025/\"߲\012]\025/Z\030\034\ ! 8\020Vi\012g(za@T\011?\020i\023]_[P6ӥ(\\`\"F\013l$ #\0015\ ! 1\000Oƪm\0005?\022\001\011'G`Br JL\003A\036jX\021N\025\022\016VT\021\014b\ ! Vm1鈬4;BYl\023۬źm<_\030%>VWkf\023)\023\037^K^\0310\022)\ ! L\006|ބ\005ZW\007''\016ӷ=#M\034ʍ}\0218\001&=~m|r\001A\001\007wx}G\ ! /\023\010hjA\014A\000N\007\037 \\>j\004EG-\030\000\033\005Ai\034n\026,M=\ ! \0077/}61\023!WM4PЊ\032x9#(*C')\034ƃ\026M?*L \005S\001/`:\036h\022\ ! I\035c@O;˙=<;\015\025\004Ȼ\024\\]Xe1G$\"Cc\002?(+f\035OQ;)\ ! H\006\004\031Y>;xv\011\027\032:}\015W7\011\003Ug;Gt\021Ԃ{z\" \ ! 6c\001\016x(S2Y\"SA4\002^'y'VaV\033\"bY\023;-\022\016>*.r1\013\001\ ! #5 i\027?(>B\025\020P!I\020Ł\0020\023+\001\032)\024C\014\005\016\010*\000\ ! \013af\000Nu`//s\037\000cJ\024\"0\015Q@\035>$@%1-U,r\025\010\002`^p\0214^C\ ! 4\012|\012o*\025\014Q\016~Z\001d\000\006wm&A<\001\000zXAnqĊjn\031-G)\ ! \011\017\032\022XVad\036\034GF\014o9N-i\004)\033\004\006@L\024t\011\030p \000$q\010ry\ ! =,F\005P\020(`\\\015\023U0\027U/\030g7 <ώ\036x\015%!\020AS .JB\000׻\ ! \012yW\000#\024\0060\016#)ށ%\032m!d$\0072\005ӪX\013\006\000+BaHV\000]W>jH3\010B\023\ ! \000\035\0347L9<~\020*LQo)\026CiY(B-xXD=U)\006o\023\020h:\010\011\023OOQ\ ! xYtO\002W+VĶS\030¨\\٥\014؜jT\037Pz>\011 )]1GpH\ ! `¡U`\005BճD,\0053RC\012#^1{JA=8c`mKc\023\017ޡa]Eb\002o\015\ ! `A\022G\030rJph\030\016m6/v*L۽.1\021\013wr`7#^*8oXJ\037Za_\000p\027^DeA\014Ji^b$7Ե}c\ ! \014\031y{\033Xs58OZ!\004.R#1\015gTLWm\030e:UQ*EUH\"Z\ ! ʐ\012'&(-?\006&k\021?+\010MP,8\035\"w\000{\0200ì\011D\030\037j\ ! v\025W\0348ԃ !W{m\"\015\001z\034\022oӺ0-% \011'f\010;.VO\013brP\ ! JFx\032plnH)8\0358Tg10P3G\020)0\\\023\007\020\030@{?2׵2\026F\0334s\014$\ ! Y\012/ygRQ\010).`<\012\020VSN&:Z9^\014\030^R\010zdUvC\011}9\032\ ! 'qe,m\023AAz^,P!KN\012;Q;A\013P'\005DAGt[scDE\ ! 8nz`*In\013R\035{8]\014r`\"b\001A~\023p\013\030|9S+\013M,n&*\\E\ ! ׵Dى{qI\005\031dn40mi@\021\"Ĺ\000X\004$\017\013b\011\014\026\012\\$,儭\ ! \002C`\032h\020@H493\005\021\034Yirc;,\016{'\027\\\0004\004K[J>Jsc|\ ! u\035kEJ\023X\0144`\035qι{xU\013\004\010\016=\003{=n _{rC4\ ! \012\033;-$rjZe&ɱt\030\020y\003E͞hkgiRRp\024B6\025\027\014b\ ! \026E*EK\010t\021^\021l5$w#\022\033\0029O\024ZG&\036c\013(&VU|\ ! RЊm\000!ʹ\0150*\"'7mT\012qe,fq|5yş=@=\003b/AQ3\002\007+*\ ! V0N3OHTט!^;\010n\015X1\032\030p)ی4q\034n\000\003^qV\ ! \023WYD-,\"\002QWHv\006,\021&Y\024\023\020\0340\010\026[\035XnNqREfY\025A\ ! \036M7\\ȀbM\006\005x\011yG\030We\024\031kV‘ \027yRu+a\014\021U*\ ! _k- Z:m0s\023\036.`.(g[^c\004cR}\003\024hDSx;\016\022\ ! 7k'\\\027\005\0121'9\026bET/_nCz{E^X,z\"\025'%YeM.\027r\ ! dM[\014ӌ]\033\003'\021GE/\030\017踡:ds&4ĉv5N;0\006@}*S\ ! oN\030?\024\007\\5\035\035f4\017ʆv\"1~1AFOBɢDˍ),SLq\033H\ ! \007rVr=ks)\026$\003T\021,{\012s$\010\014R#vSzo*.|\012\ ! \0367ť*\027)\014յstёx˛\0126Ya.x:a\007\024X\033U^6vg\001w\000\ ! ʳ\0361\025a\00725}EcKTʣZѢC6=L^5O7o<{ab\ ! {5:MgO\002[(*0,\034l7v_\031c=\031w{\020\021=N\0230\016\ ! R:B˹|tj~hÚ\017&\037\021Qvfx\035h\031\017Dh$78\ ! 7>CMx(\037ل~\002L6g6{YS1u\030>L7/o\033~t\ ! $\" qIC\030\014\024m.FaVY\033B^\006\014\022K\014{ܲ'\036g\014B%\ ! ߤs%?YqT\022\0235\005_;Fm2B^!]ypl\027i\036+\002t\022\"f\ ! ۉ_9\013VC|''FB6D4ݼ>CJZAݜͮ\024\014t~R]\ ! 6޿{JQ\032\017͞t\032\036wZZXw]A\023$=O\031F\004vBXv\015\ ! [Yř\001m\023p#H=\036w/iG\035d{\023aD\0318\000.\030®ʴ~5Љ\006\ ! \026n\035j̍Yj\021lkB[^wG-v\016jca\\0k\023yΑ$\006-J\020\034y\023~s\ ! E{DfDWiߦ\016ֱI\004\014>Ǡ3u6\"\030ѝƞ:Ҷ[\ ! s\013\004^o\024,.M\022f\000\033 \017H\005A%\\z]4>$.W9\031vV\ ! fcã\0331\022}͜ (ĺ\023QM~g)JAF—m2aW.M\010Z_ӭp\ ! \002y\027q<[4\030_7_Tj,{\0216Bs,:W\015\"\004\033`H:2b\ ! w26݁1{0s\001ߪ\017\020W2\012Љ:m}L\026Қ`}R%IBW0a霟=rÂANP++[\007\003ؾ֎*v]\037M\ ! Y]\034AP\\d\005qJaMXW\013\003!#\037H&>Y=\003bӌڏֶ-ȣ̐\ ! NU2\032]|gȪu7Mj\036HzD\031,S}\032hpAu!\037\ ! ?\026+X%s'\036{e\034zyL\034D\\D₆\011Ÿ||RQ\004\036U\ ! \007 8!n\034ga\016P;Pty)N^{\003ŋ-q9)KaIW*\002G\ ! ypx\035vdyzhgUԿF_\023i\015߿xOکouo@\ ! z1\032|\012Dc\023[uon\013nζ\004VMP&<|)u\020xm\005\033\017\016NV\ ! A\030}\015y\020\005麹\016);ז}i%nx7\006w?i\027t͗\006\013Aѫ\"';\ ! $\033[\004ܟ\"x\021\035n)CBb\007\012_;Hȼ\014\ ! 7HT\011^4I\0179\031q_` 4Jo\016\020ki\010X\003q|,>Lǧ۳\ ! qϙL]9\011s\031F\003M\031|0\020g\006Lm\010_\004\033J\ ! \027\0051|\"@?\007\001&;\017y\0023\036Ǐzhoo j\024~c2y\ ! \037\0227\024wǘibE;Ln?\0217\013\012\010{@Cz*[R.]Ћ6ZoE\0269w\ ! ։,Neح\000 Gg_PC\032PO\021}*.#}ְKrOo>t\030Kk]o7L\035\ ! Q f\026Ull\001ӻ%IOƱ.\024[gMP\036\012wiw@~uڷhJ\ ! \006Ԛ֬cB6ͨϱU\017S'\010ɦ\000Hx6Hj'XmKp\0334|{\006\ ! xRo\020Q\036CĴ\\Z^V\012}|r\006\014ia\001=yl\003&:ϢO\033V\ ! ۗ797)d\033ql&|ažWFaH7\033\026@w^wuAM̡\ ! S\034=\010H\036u9\006ğy\004a8+\007@C-hM@ԁtC,\ ! Y!ON?\015?aIG/阴}zi\001>^39PGA\013-D\033\ ! Vw1W\025$Ւ\001>em3MXWp^/J/²\002+\033\036\023'<կ\034_\000\036\ ! K,svK6=w?h#w3aENz626Vp\015]à^ӛcڝ\007m\014\ ! 7nw\030L\0113\026\022^׋čE[\006xm\017<5Q'\0314h\ ! *d\013\003|ޖu_Ȅ:U\037zx\\3e>]3jv\0101VԷ\035@`}S|\ ! &s3\014@s\016Ӟ[tbW\036x]\022ϳ\027HeO˗v|A\031Vڼ[A4;I\003\ ! D.Gcڨw4&t{72ѽ\030{Y.\022z\030\003\001L_j\004+;xU\007z7g\ ! \\\020\027\021\\Zzn¦\016/W4/~ٜg3>sL'9xq\024\ ! -\006śZG_ړ.8\016]Mm\033\001ھãlr@G\020=S\027>~]\ ! CjS\027\037p\022~=4Q}\026]~:\013C_Y;\022G\024D`Rpu\ ! \0328J\027I[\026Z\036G\020/zH3`l?~:n\001|M\037\003-,\007އb&G\025\ ! I\"ܝqx̑ggi\014\025n`\0226Uبbtz&[>\035}\026kk\024Nm\ ! Sxjň|k7*@\001\005ԓXn̙\011Mf\002`sXS)OX6zw`a\016/\ ! ᓾ\032JZ;ځ\036\033\017 \036z(؈D2jd(=Wyg6v {\016/\032\ ! \0302MOAZg*p{Kƛ\021>\003W\034CcbQaJvTU\011(x\025ߧi8]\ ! 0JO\036+Lf5o\0366hxQ_us/ε\013J\033?\0102ƹjt\ ! nw\031_LFzḬsb\026\022\000<\022\005t,`kFd=7aV\ ! -\015|>bDvU- 3\007s\035E`~ߥh\021$M\017vrn\ ! (A/]z*Q̌قJWݝPS7\000=%<\027fo w[\002k\007h\ ! \001\013b") ### end From anadelonbrin at users.sourceforge.net Wed Aug 11 06:50:45 2004 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Wed Aug 11 06:50:47 2004 Subject: [Spambayes-checkins] website faq.txt,1.80,1.81 Message-ID: Update of /cvsroot/spambayes/website In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7222 Modified Files: faq.txt Log Message: Typo spotted by Terry Speirs. Index: faq.txt =================================================================== RCS file: /cvsroot/spambayes/website/faq.txt,v retrieving revision 1.80 retrieving revision 1.81 diff -C2 -d -r1.80 -r1.81 *** faq.txt 22 Jul 2004 01:10:27 -0000 1.80 --- faq.txt 11 Aug 2004 04:50:42 -0000 1.81 *************** *** 247,251 **** 20 hams), you'll find that it's getting it right most of the time. The web training interface automatically checks the Ham/Spam boxes according ! to what it thinks, so all you need to do it correct the odd mistake - it's very quick and easy. --- 247,251 ---- 20 hams), you'll find that it's getting it right most of the time. The web training interface automatically checks the Ham/Spam boxes according ! to what it thinks, so all you need to do is correct the odd mistake - it's very quick and easy. From kpitt at users.sourceforge.net Wed Aug 11 17:16:18 2004 From: kpitt at users.sourceforge.net (Kenny Pitt) Date: Wed Aug 11 17:16:22 2004 Subject: [Spambayes-checkins] spambayes/Outlook2000/sandbox .cvsignore, NONE, 1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000/sandbox In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13644 Added Files: .cvsignore Log Message: During development we may run scripts from the sandbox directory, so ignore the resulting .pyc files. --- NEW FILE: .cvsignore --- *.pyc From kpitt at users.sourceforge.net Wed Aug 11 17:22:40 2004 From: kpitt at users.sourceforge.net (Kenny Pitt) Date: Wed Aug 11 17:22:43 2004 Subject: [Spambayes-checkins] spambayes/windows/docs/images .cvsignore, NONE, 1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/windows/docs/images In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14617/windows/docs/images Added Files: .cvsignore Log Message: Ignore the Thumbs.db file that WinXP creates if you view an image directory in Thumbnail or Filmstrip view. --- NEW FILE: .cvsignore --- Thumbs.db From kpitt at users.sourceforge.net Wed Aug 11 17:22:39 2004 From: kpitt at users.sourceforge.net (Kenny Pitt) Date: Wed Aug 11 17:22:45 2004 Subject: [Spambayes-checkins] spambayes/Outlook2000/docs/images .cvsignore, NONE, 1.1 Message-ID: Update of /cvsroot/spambayes/spambayes/Outlook2000/docs/images In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14617/Outlook2000/docs/images Added Files: .cvsignore Log Message: Ignore the Thumbs.db file that WinXP creates if you view an image directory in Thumbnail or Filmstrip view. --- NEW FILE: .cvsignore --- Thumbs.db From montanaro at users.sourceforge.net Tue Aug 17 19:05:00 2004 From: montanaro at users.sourceforge.net (Skip Montanaro) Date: Tue Aug 17 19:05:15 2004 Subject: [Spambayes-checkins] spambayes/contrib tte.py,1.13,1.14 Message-ID: Update of /cvsroot/spambayes/spambayes/contrib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28787 Modified Files: tte.py Log Message: Seems better to try and alternate ham/spam scoring instead of scoring all the hams in a batch and all the spams. After implementing the ratio stuff I began to have problems. I think the "score all the hams then all the spams" in a chunk was the cause. Index: tte.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/contrib/tte.py,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** tte.py 26 Jul 2004 02:46:49 -0000 1.13 --- tte.py 17 Aug 2004 17:04:42 -0000 1.14 *************** *** 144,169 **** sys.stdout.flush() ! for ham in hams: ! score = store.spamprob(tokenize(ham)) ! selector = ham["message-id"] or ham["subject"] ! if score > ham_cutoff and selector is not None: ! if verbose: ! print >> sys.stderr, "miss ham: %.6f %s" % ( ! score, selector) ! hmisses += 1 ! tdict[ham["message-id"]] = True ! store.learn(tokenize(ham), False) ! for spam in spams: ! score = store.spamprob(tokenize(spam)) ! selector = (spam["message-id"] or ! spam["subject"]) ! if score < spam_cutoff and selector is not None: ! if verbose: ! print >> sys.stderr, "miss spam: %.6f %s" % ( ! score, selector) ! smisses += 1 ! tdict[spam["message-id"]] = True ! store.learn(tokenize(spam), True) except StopIteration: --- 144,170 ---- sys.stdout.flush() ! for (ham, spam) in map(None, hams, spams): ! if ham is not None: ! score = store.spamprob(tokenize(ham)) ! selector = ham["message-id"] or ham["subject"] ! if score > ham_cutoff and selector is not None: ! if verbose: ! print >> sys.stderr, "miss ham: %.6f %s" % ( ! score, selector) ! hmisses += 1 ! tdict[ham["message-id"]] = True ! store.learn(tokenize(ham), False) ! if spam is not None: ! score = store.spamprob(tokenize(spam)) ! selector = (spam["message-id"] or ! spam["subject"]) ! if score < spam_cutoff and selector is not None: ! if verbose: ! print >> sys.stderr, "miss spam: %.6f %s" % ( ! score, selector) ! smisses += 1 ! tdict[spam["message-id"]] = True ! store.learn(tokenize(spam), True) except StopIteration: From sjoerd at users.sourceforge.net Thu Aug 19 15:11:48 2004 From: sjoerd at users.sourceforge.net (Sjoerd Mullender) Date: Thu Aug 19 15:11:56 2004 Subject: [Spambayes-checkins] spambayes/scripts sb_imapfilter.py,1.37,1.38 Message-ID: Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31678/scripts Modified Files: sb_imapfilter.py Log Message: Fix the regular expression to match the Message-ID header by stopping on newline. I have seen a couple of spam with a header Message-ID: in the header which was a few lines later). This eventually resulted in a crash in IMAPMessage.Save when the message couldn't be found again after it was saved. Index: sb_imapfilter.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_imapfilter.py,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** sb_imapfilter.py 9 Aug 2004 07:01:38 -0000 1.37 --- sb_imapfilter.py 19 Aug 2004 13:11:45 -0000 1.38 *************** *** 672,676 **** "\:\s*(\d+(?:\-\d)?)" # Search for our custom id first, for backwards compatibility. ! for id_header in [custom_header_id, "Message-ID\: ?\<([^\>]+)\>"]: mo = re.search(id_header, data["RFC822.HEADER"], re.IGNORECASE) if mo: --- 672,676 ---- "\:\s*(\d+(?:\-\d)?)" # Search for our custom id first, for backwards compatibility. ! for id_header in [custom_header_id, "Message-ID\: ?\<([^\n\>]+)\>"]: mo = re.search(id_header, data["RFC822.HEADER"], re.IGNORECASE) if mo: