From anadelonbrin at users.sourceforge.net Sat Jun 10 06:57:13 2006 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Fri, 09 Jun 2006 21:57:13 -0700 Subject: [Spambayes-checkins] spambayes/Outlook2000 addin.py,1.152,1.153 Message-ID: <20060610045715.ACAD51E400C@bag.python.org> Update of /cvsroot/spambayes/spambayes/Outlook2000 In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30085/Outlook2000 Modified Files: addin.py Log Message: Add simple parts of [ 824651 ] Multibyte (CJK etc.) message support (Lets extractmessages and scoremsg work with charsets other than us-ascii, and lets Outlook plug-in handle tokens that aren't in the right encodng). Index: addin.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v retrieving revision 1.152 retrieving revision 1.153 diff -C2 -d -r1.152 -r1.153 *** addin.py 22 Apr 2005 06:18:09 -0000 1.152 --- addin.py 10 Jun 2006 04:57:10 -0000 1.153 *************** *** 5,8 **** --- 5,9 ---- import traceback import _winreg + from types import UnicodeType # *sigh* - this is for the binary installer, and for the sake of one line *************** *** 522,526 **** else: nham = nspam = "-" ! word = repr(word) push(escape(word) + " " * (35-len(word))) push(format % (prob, nham, nspam)) --- 523,530 ---- else: nham = nspam = "-" ! if isinstance(word, UnicodeType): ! word = word.encode('mbcs', 'replace') ! else: ! word = repr(word) push(escape(word) + " " * (35-len(word))) push(format % (prob, nham, nspam)) *************** *** 550,554 **** # could use pprint, but not worth it. for token in toks: ! push("" + repr(token) + "
\n") # Put the body together, then the rest of the message. --- 554,562 ---- # could use pprint, but not worth it. for token in toks: ! if isinstance(token, UnicodeType): ! token = token.encode('mbcs', 'replace') ! else: ! token = repr(token) ! push("" + token + "
\n") # Put the body together, then the rest of the message. From anadelonbrin at users.sourceforge.net Sat Jun 10 06:57:13 2006 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Fri, 09 Jun 2006 21:57:13 -0700 Subject: [Spambayes-checkins] spambayes/pspam scoremsg.py,1.4,1.5 Message-ID: <20060610045715.B1EB11E400D@bag.python.org> Update of /cvsroot/spambayes/spambayes/pspam In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30085/pspam Modified Files: scoremsg.py Log Message: Add simple parts of [ 824651 ] Multibyte (CJK etc.) message support (Lets extractmessages and scoremsg work with charsets other than us-ascii, and lets Outlook plug-in handle tokens that aren't in the right encodng). Index: scoremsg.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/pspam/scoremsg.py,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** scoremsg.py 18 Dec 2003 06:41:51 -0000 1.4 --- scoremsg.py 10 Jun 2006 04:57:11 -0000 1.5 *************** *** 4,7 **** --- 4,9 ---- import sys import email + import locale + from types import UnicodeType import ZODB *************** *** 20,23 **** --- 22,29 ---- def main(fp): + charset = locale.getdefaultlocale()[1] + if not charset: + charset = 'us-ascii' + db = pspam.database.open() r = db.open().root() *************** *** 32,35 **** --- 38,43 ---- print "-----" for clue, prob in evidence: + if isinstance(clue, UnicodeType): + clue = clue.encode(charset, 'replace') print clue, prob ## print From anadelonbrin at users.sourceforge.net Sat Jun 10 06:57:13 2006 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Fri, 09 Jun 2006 21:57:13 -0700 Subject: [Spambayes-checkins] spambayes/utilities extractmessages.py, 1.3, 1.4 Message-ID: <20060610045715.E37C51E400C@bag.python.org> Update of /cvsroot/spambayes/spambayes/utilities In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30085/utilities Modified Files: extractmessages.py Log Message: Add simple parts of [ 824651 ] Multibyte (CJK etc.) message support (Lets extractmessages and scoremsg work with charsets other than us-ascii, and lets Outlook plug-in handle tokens that aren't in the right encodng). Index: extractmessages.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/utilities/extractmessages.py,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** extractmessages.py 15 Jan 2004 03:05:22 -0000 1.3 --- extractmessages.py 10 Jun 2006 04:57:11 -0000 1.4 *************** *** 25,28 **** --- 25,30 ---- import re import cPickle as pickle + import locale + from email.Header import make_header, decode_header from spambayes.mboxutils import getmbox *************** *** 86,89 **** --- 88,95 ---- return 1 + charset = locale.getdefaultlocale()[1] + if not charset: + charset = 'us-ascii' + mapfile = spamfile = hamfile = None features = set() *************** *** 99,103 **** spamfile = arg elif opt in ("-f", "--feature"): ! features.add(arg) if hamfile is None and spamfile is None: --- 105,109 ---- spamfile = arg elif opt in ("-f", "--feature"): ! features.add(unicode(arg, charset)) if hamfile is None and spamfile is None: *************** *** 126,132 **** evidence = msg.get("X-Spambayes-Evidence", "") evidence = re.sub(r"\s+", " ", evidence) ! features = [e.rsplit(": ", 1)[0] ! for e in evidence.split("; ")[2:]] ! features = set([eval(f) for f in features]) if not features: usage("No X-Spambayes-Evidence headers found") --- 132,143 ---- evidence = msg.get("X-Spambayes-Evidence", "") evidence = re.sub(r"\s+", " ", evidence) ! l = [e.rsplit(": ", 1)[0] ! for e in evidence.split("; ")[2:]] ! for s in l: ! try: ! s = make_header(decode_header(s)).__unicode__() ! except: ! s = unicode(s, 'us-ascii', 'replace') ! features.add(s) if not features: usage("No X-Spambayes-Evidence headers found") From anadelonbrin at users.sourceforge.net Sat Jun 10 07:00:49 2006 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Fri, 09 Jun 2006 22:00:49 -0700 Subject: [Spambayes-checkins] spambayes/spambayes Dibbler.py,1.17,1.18 Message-ID: <20060610050051.485F71E4014@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv31488/spambayes Modified Files: Dibbler.py Log Message: More of [ 824651 ] Multibyte (CJK etc.) message support Specify character set in web interface. Index: Dibbler.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/Dibbler.py,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** Dibbler.py 15 Nov 2005 00:40:28 -0000 1.17 --- Dibbler.py 10 Jun 2006 05:00:44 -0000 1.18 *************** *** 534,538 **** headers.append("HTTP/1.1 200 OK") headers.append("Connection: close") ! headers.append("Content-Type: %s" % contentType) headers.append("Date: %s" % httpNow) for name, value in extraHeaders.items(): --- 534,538 ---- headers.append("HTTP/1.1 200 OK") headers.append("Connection: close") ! headers.append('Content-Type: %s; charset="utf-8"' % contentType) headers.append("Date: %s" % httpNow) for name, value in extraHeaders.items(): *************** *** 550,554 **** headers.append("HTTP/1.0 %d Error" % code) headers.append("Connection: close") ! headers.append("Content-Type: text/html") headers.append("") headers.append("") --- 550,554 ---- headers.append("HTTP/1.0 %d Error" % code) headers.append("Connection: close") ! headers.append('Content-Type: text/html; charset="utf-8"') headers.append("") headers.append("") *************** *** 584,588 **** headers.append('WWW-Authenticate: ' + authString) headers.append('Connection: close') ! headers.append('Content-Type: text/html') headers.append('') headers.append('') --- 584,588 ---- headers.append('WWW-Authenticate: ' + authString) headers.append('Connection: close') ! headers.append('Content-Type: text/html; charset="utf-8"') headers.append('') headers.append('') From htrd at users.sourceforge.net Mon Jun 12 16:08:39 2006 From: htrd at users.sourceforge.net (Toby Dickenson) Date: Mon, 12 Jun 2006 07:08:39 -0700 Subject: [Spambayes-checkins] spambayes/scripts sb_bnserver.py, 1.2.2.1, 1.2.2.2 Message-ID: <20060612140841.E99351E400C@bag.python.org> Update of /cvsroot/spambayes/spambayes/scripts In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv31048 Modified Files: Tag: bnfilter_in_c_branch sb_bnserver.py Log Message: Previously sb_bnfilter was always adding an dummy From line if it did not exist. One user reported it adding a second From line. This makes it behave the same as sb_filter; emit the From line in the output only if it existed in the input. Thanks to Peter Barker Index: sb_bnserver.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/scripts/sb_bnserver.py,v retrieving revision 1.2.2.1 retrieving revision 1.2.2.2 diff -C2 -d -r1.2.2.1 -r1.2.2.2 *** sb_bnserver.py 5 May 2004 21:58:44 -0000 1.2.2.1 --- sb_bnserver.py 12 Jun 2006 14:08:36 -0000 1.2.2.2 *************** *** 161,165 **** for action in actions: action(msg) ! return mboxutils.as_string(msg, 1) --- 161,165 ---- for action in actions: action(msg) ! return mboxutils.as_string(msg, msg.get_unixfrom() is not None) From anadelonbrin at users.sourceforge.net Thu Jun 22 12:37:02 2006 From: anadelonbrin at users.sourceforge.net (Tony Meyer) Date: Thu, 22 Jun 2006 03:37:02 -0700 Subject: [Spambayes-checkins] spambayes/spambayes OptionsClass.py, 1.31, 1.32 Message-ID: <20060622103707.8475E1E4007@bag.python.org> Update of /cvsroot/spambayes/spambayes/spambayes In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv32062/spambayes Modified Files: OptionsClass.py Log Message: Fix bug in regex preventing valid IPs. Index: OptionsClass.py =================================================================== RCS file: /cvsroot/spambayes/spambayes/spambayes/OptionsClass.py,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** OptionsClass.py 6 Jan 2006 08:38:52 -0000 1.31 --- OptionsClass.py 22 Jun 2006 10:36:58 -0000 1.32 *************** *** 845,851 **** FILE = r"[\S]+" FILE_WITH_PATH = PATH ! IP_LIST = r"\*|localhost|((\*|[01]?\d\d?|2[04]\d|25[0-5])\.(\*|[01]?\d" \ ! r"\d?|2[04]\d|25[0-5])\.(\*|[01]?\d\d?|2[04]\d|25[0-5])\.(\*" \ ! r"|[01]?\d\d?|2[04]\d|25[0-5]),?)+" # IMAP seems to allow any character at all in a folder name, # but we want to use the comma as a delimiter for lists, so --- 845,851 ---- FILE = r"[\S]+" FILE_WITH_PATH = PATH ! IP_LIST = r"\*|localhost|((\*|[01]?\d\d?|2[0-4]\d|25[0-5])\.(\*|[01]?\d" \ ! r"\d?|2[0-4]\d|25[0-5])\.(\*|[01]?\d\d?|2[0-4]\d|25[0-5])\.(\*" \ ! r"|[01]?\d\d?|2[0-4]\d|25[0-5]),?)+" # IMAP seems to allow any character at all in a folder name, # but we want to use the comma as a delimiter for lists, so