From anadelonbrin at users.sourceforge.net Sat Jun 10 06:57:13 2006
From: anadelonbrin at users.sourceforge.net (Tony Meyer)
Date: Fri, 09 Jun 2006 21:57:13 -0700
Subject: [Spambayes-checkins] spambayes/Outlook2000 addin.py,1.152,1.153
Message-ID: <20060610045715.ACAD51E400C@bag.python.org>
Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30085/Outlook2000
Modified Files:
addin.py
Log Message:
Add simple parts of [ 824651 ] Multibyte (CJK etc.) message support
(Lets extractmessages and scoremsg work with charsets other than us-ascii, and lets Outlook plug-in handle tokens that aren't in the right encodng).
Index: addin.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/addin.py,v
retrieving revision 1.152
retrieving revision 1.153
diff -C2 -d -r1.152 -r1.153
*** addin.py 22 Apr 2005 06:18:09 -0000 1.152
--- addin.py 10 Jun 2006 04:57:10 -0000 1.153
***************
*** 5,8 ****
--- 5,9 ----
import traceback
import _winreg
+ from types import UnicodeType
# *sigh* - this is for the binary installer, and for the sake of one line
***************
*** 522,526 ****
else:
nham = nspam = "-"
! word = repr(word)
push(escape(word) + " " * (35-len(word)))
push(format % (prob, nham, nspam))
--- 523,530 ----
else:
nham = nspam = "-"
! if isinstance(word, UnicodeType):
! word = word.encode('mbcs', 'replace')
! else:
! word = repr(word)
push(escape(word) + " " * (35-len(word)))
push(format % (prob, nham, nspam))
***************
*** 550,554 ****
# could use pprint, but not worth it.
for token in toks:
! push("" + repr(token) + "
\n")
# Put the body together, then the rest of the message.
--- 554,562 ----
# could use pprint, but not worth it.
for token in toks:
! if isinstance(token, UnicodeType):
! token = token.encode('mbcs', 'replace')
! else:
! token = repr(token)
! push("" + token + "
\n")
# Put the body together, then the rest of the message.
From anadelonbrin at users.sourceforge.net Sat Jun 10 06:57:13 2006
From: anadelonbrin at users.sourceforge.net (Tony Meyer)
Date: Fri, 09 Jun 2006 21:57:13 -0700
Subject: [Spambayes-checkins] spambayes/pspam scoremsg.py,1.4,1.5
Message-ID: <20060610045715.B1EB11E400D@bag.python.org>
Update of /cvsroot/spambayes/spambayes/pspam
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30085/pspam
Modified Files:
scoremsg.py
Log Message:
Add simple parts of [ 824651 ] Multibyte (CJK etc.) message support
(Lets extractmessages and scoremsg work with charsets other than us-ascii, and lets Outlook plug-in handle tokens that aren't in the right encodng).
Index: scoremsg.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/pspam/scoremsg.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** scoremsg.py 18 Dec 2003 06:41:51 -0000 1.4
--- scoremsg.py 10 Jun 2006 04:57:11 -0000 1.5
***************
*** 4,7 ****
--- 4,9 ----
import sys
import email
+ import locale
+ from types import UnicodeType
import ZODB
***************
*** 20,23 ****
--- 22,29 ----
def main(fp):
+ charset = locale.getdefaultlocale()[1]
+ if not charset:
+ charset = 'us-ascii'
+
db = pspam.database.open()
r = db.open().root()
***************
*** 32,35 ****
--- 38,43 ----
print "-----"
for clue, prob in evidence:
+ if isinstance(clue, UnicodeType):
+ clue = clue.encode(charset, 'replace')
print clue, prob
## print
From anadelonbrin at users.sourceforge.net Sat Jun 10 06:57:13 2006
From: anadelonbrin at users.sourceforge.net (Tony Meyer)
Date: Fri, 09 Jun 2006 21:57:13 -0700
Subject: [Spambayes-checkins] spambayes/utilities extractmessages.py, 1.3,
1.4
Message-ID: <20060610045715.E37C51E400C@bag.python.org>
Update of /cvsroot/spambayes/spambayes/utilities
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv30085/utilities
Modified Files:
extractmessages.py
Log Message:
Add simple parts of [ 824651 ] Multibyte (CJK etc.) message support
(Lets extractmessages and scoremsg work with charsets other than us-ascii, and lets Outlook plug-in handle tokens that aren't in the right encodng).
Index: extractmessages.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/utilities/extractmessages.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** extractmessages.py 15 Jan 2004 03:05:22 -0000 1.3
--- extractmessages.py 10 Jun 2006 04:57:11 -0000 1.4
***************
*** 25,28 ****
--- 25,30 ----
import re
import cPickle as pickle
+ import locale
+ from email.Header import make_header, decode_header
from spambayes.mboxutils import getmbox
***************
*** 86,89 ****
--- 88,95 ----
return 1
+ charset = locale.getdefaultlocale()[1]
+ if not charset:
+ charset = 'us-ascii'
+
mapfile = spamfile = hamfile = None
features = set()
***************
*** 99,103 ****
spamfile = arg
elif opt in ("-f", "--feature"):
! features.add(arg)
if hamfile is None and spamfile is None:
--- 105,109 ----
spamfile = arg
elif opt in ("-f", "--feature"):
! features.add(unicode(arg, charset))
if hamfile is None and spamfile is None:
***************
*** 126,132 ****
evidence = msg.get("X-Spambayes-Evidence", "")
evidence = re.sub(r"\s+", " ", evidence)
! features = [e.rsplit(": ", 1)[0]
! for e in evidence.split("; ")[2:]]
! features = set([eval(f) for f in features])
if not features:
usage("No X-Spambayes-Evidence headers found")
--- 132,143 ----
evidence = msg.get("X-Spambayes-Evidence", "")
evidence = re.sub(r"\s+", " ", evidence)
! l = [e.rsplit(": ", 1)[0]
! for e in evidence.split("; ")[2:]]
! for s in l:
! try:
! s = make_header(decode_header(s)).__unicode__()
! except:
! s = unicode(s, 'us-ascii', 'replace')
! features.add(s)
if not features:
usage("No X-Spambayes-Evidence headers found")
From anadelonbrin at users.sourceforge.net Sat Jun 10 07:00:49 2006
From: anadelonbrin at users.sourceforge.net (Tony Meyer)
Date: Fri, 09 Jun 2006 22:00:49 -0700
Subject: [Spambayes-checkins] spambayes/spambayes Dibbler.py,1.17,1.18
Message-ID: <20060610050051.485F71E4014@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv31488/spambayes
Modified Files:
Dibbler.py
Log Message:
More of [ 824651 ] Multibyte (CJK etc.) message support
Specify character set in web interface.
Index: Dibbler.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Dibbler.py,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** Dibbler.py 15 Nov 2005 00:40:28 -0000 1.17
--- Dibbler.py 10 Jun 2006 05:00:44 -0000 1.18
***************
*** 534,538 ****
headers.append("HTTP/1.1 200 OK")
headers.append("Connection: close")
! headers.append("Content-Type: %s" % contentType)
headers.append("Date: %s" % httpNow)
for name, value in extraHeaders.items():
--- 534,538 ----
headers.append("HTTP/1.1 200 OK")
headers.append("Connection: close")
! headers.append('Content-Type: %s; charset="utf-8"' % contentType)
headers.append("Date: %s" % httpNow)
for name, value in extraHeaders.items():
***************
*** 550,554 ****
headers.append("HTTP/1.0 %d Error" % code)
headers.append("Connection: close")
! headers.append("Content-Type: text/html")
headers.append("")
headers.append("")
--- 550,554 ----
headers.append("HTTP/1.0 %d Error" % code)
headers.append("Connection: close")
! headers.append('Content-Type: text/html; charset="utf-8"')
headers.append("")
headers.append("")
***************
*** 584,588 ****
headers.append('WWW-Authenticate: ' + authString)
headers.append('Connection: close')
! headers.append('Content-Type: text/html')
headers.append('')
headers.append('')
--- 584,588 ----
headers.append('WWW-Authenticate: ' + authString)
headers.append('Connection: close')
! headers.append('Content-Type: text/html; charset="utf-8"')
headers.append('')
headers.append('')
From htrd at users.sourceforge.net Mon Jun 12 16:08:39 2006
From: htrd at users.sourceforge.net (Toby Dickenson)
Date: Mon, 12 Jun 2006 07:08:39 -0700
Subject: [Spambayes-checkins] spambayes/scripts sb_bnserver.py, 1.2.2.1,
1.2.2.2
Message-ID: <20060612140841.E99351E400C@bag.python.org>
Update of /cvsroot/spambayes/spambayes/scripts
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv31048
Modified Files:
Tag: bnfilter_in_c_branch
sb_bnserver.py
Log Message:
Previously sb_bnfilter was always adding an dummy From line if it did not exist. One user reported it adding a second From line. This makes it behave the same as sb_filter; emit the From line in the output only if it existed in the input. Thanks to Peter Barker
Index: sb_bnserver.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/scripts/sb_bnserver.py,v
retrieving revision 1.2.2.1
retrieving revision 1.2.2.2
diff -C2 -d -r1.2.2.1 -r1.2.2.2
*** sb_bnserver.py 5 May 2004 21:58:44 -0000 1.2.2.1
--- sb_bnserver.py 12 Jun 2006 14:08:36 -0000 1.2.2.2
***************
*** 161,165 ****
for action in actions:
action(msg)
! return mboxutils.as_string(msg, 1)
--- 161,165 ----
for action in actions:
action(msg)
! return mboxutils.as_string(msg, msg.get_unixfrom() is not None)
From anadelonbrin at users.sourceforge.net Thu Jun 22 12:37:02 2006
From: anadelonbrin at users.sourceforge.net (Tony Meyer)
Date: Thu, 22 Jun 2006 03:37:02 -0700
Subject: [Spambayes-checkins] spambayes/spambayes OptionsClass.py, 1.31, 1.32
Message-ID: <20060622103707.8475E1E4007@bag.python.org>
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv32062/spambayes
Modified Files:
OptionsClass.py
Log Message:
Fix bug in regex preventing valid IPs.
Index: OptionsClass.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/OptionsClass.py,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** OptionsClass.py 6 Jan 2006 08:38:52 -0000 1.31
--- OptionsClass.py 22 Jun 2006 10:36:58 -0000 1.32
***************
*** 845,851 ****
FILE = r"[\S]+"
FILE_WITH_PATH = PATH
! IP_LIST = r"\*|localhost|((\*|[01]?\d\d?|2[04]\d|25[0-5])\.(\*|[01]?\d" \
! r"\d?|2[04]\d|25[0-5])\.(\*|[01]?\d\d?|2[04]\d|25[0-5])\.(\*" \
! r"|[01]?\d\d?|2[04]\d|25[0-5]),?)+"
# IMAP seems to allow any character at all in a folder name,
# but we want to use the comma as a delimiter for lists, so
--- 845,851 ----
FILE = r"[\S]+"
FILE_WITH_PATH = PATH
! IP_LIST = r"\*|localhost|((\*|[01]?\d\d?|2[0-4]\d|25[0-5])\.(\*|[01]?\d" \
! r"\d?|2[0-4]\d|25[0-5])\.(\*|[01]?\d\d?|2[0-4]\d|25[0-5])\.(\*" \
! r"|[01]?\d\d?|2[0-4]\d|25[0-5]),?)+"
# IMAP seems to allow any character at all in a folder name,
# but we want to use the comma as a delimiter for lists, so