[Spambayes-checkins] spambayes/spambayes Options.py, 1.96,
1.97 TestToolsUI.py, 1.1, 1.2 classifier.py, 1.19,
1.20 oe_mailbox.py, 1.4, 1.5 tokenizer.py, 1.26, 1.27
Tim Peters
tim_one at users.sourceforge.net
Tue Dec 30 11:26:35 EST 2003
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv30644/spambayes
Modified Files:
Options.py TestToolsUI.py classifier.py oe_mailbox.py
tokenizer.py
Log Message:
Whitespace normalization.
Index: Options.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Options.py,v
retrieving revision 1.96
retrieving revision 1.97
diff -C2 -d -r1.96 -r1.97
*** Options.py 22 Dec 2003 04:19:56 -0000 1.96
--- Options.py 30 Dec 2003 16:26:33 -0000 1.97
***************
*** 186,190 ****
for messages that contain only a single URL and no other text.""",
BOOLEAN, RESTORE),
!
("x-cache_expiry_days", "Number of days to store URLs in cache", 7,
"""(EXPERIMENTAL) This is the number of days that local cached copies
--- 186,190 ----
for messages that contain only a single URL and no other text.""",
BOOLEAN, RESTORE),
!
("x-cache_expiry_days", "Number of days to store URLs in cache", 7,
"""(EXPERIMENTAL) This is the number of days that local cached copies
Index: TestToolsUI.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/TestToolsUI.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** TestToolsUI.py 22 Dec 2003 04:38:11 -0000 1.1
--- TestToolsUI.py 30 Dec 2003 16:26:33 -0000 1.2
***************
*** 91,95 ****
('TestToolsUI', 'source'),
('TestToolsUI', 'n'),)
!
option_choice = self._buildConfigPageBody(\
configTable, testtools_ini_map)
--- 91,95 ----
('TestToolsUI', 'source'),
('TestToolsUI', 'n'),)
!
option_choice = self._buildConfigPageBody(\
configTable, testtools_ini_map)
***************
*** 224,228 ****
["deleted items", "drafts", "folders",
"offline", "outbox", "pop3uidl",]:
! continue
elif dbx.lower().find("spam") == -1:
spamdirs.append(dbx)
--- 224,228 ----
["deleted items", "drafts", "folders",
"offline", "outbox", "pop3uidl",]:
! continue
elif dbx.lower().find("spam") == -1:
spamdirs.append(dbx)
***************
*** 293,297 ****
sys.stdout = StringIO.StringIO()
sys.stderr = StringIO.StringIO()
!
interesting = filter(lambda line: line.startswith('-> '), ifile)
ifile.close()
--- 293,297 ----
sys.stdout = StringIO.StringIO()
sys.stderr = StringIO.StringIO()
!
interesting = filter(lambda line: line.startswith('-> '), ifile)
ifile.close()
Index: classifier.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/classifier.py,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** classifier.py 17 Dec 2003 09:09:52 -0000 1.19
--- classifier.py 30 Dec 2003 16:26:33 -0000 1.20
***************
*** 211,215 ****
s_cut = options["Categorization", "spam_cutoff"]
! # Get the raw score.
prob, clues = self.chi2_spamprob(wordstream, True)
--- 211,215 ----
s_cut = options["Categorization", "spam_cutoff"]
! # Get the raw score.
prob, clues = self.chi2_spamprob(wordstream, True)
Index: oe_mailbox.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/oe_mailbox.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** oe_mailbox.py 22 Dec 2003 02:28:26 -0000 1.4
--- oe_mailbox.py 30 Dec 2003 16:26:33 -0000 1.5
***************
*** 441,449 ****
fh_entries = dbxFileHeader.FH_ENTRIES
fh_ptr = dbxFileHeader.FH_TREE_ROOT_NODE_PTR
!
info = dbxFileInfo(dbxStream, header.getEntry(file_info_len))
entries = header.getEntry(fh_entries)
address = header.getEntry(fh_ptr)
!
if address and entries:
tree = dbxTree(dbxStream, address, entries)
--- 441,449 ----
fh_entries = dbxFileHeader.FH_ENTRIES
fh_ptr = dbxFileHeader.FH_TREE_ROOT_NODE_PTR
!
info = dbxFileInfo(dbxStream, header.getEntry(file_info_len))
entries = header.getEntry(fh_entries)
address = header.getEntry(fh_ptr)
!
if address and entries:
tree = dbxTree(dbxStream, address, entries)
***************
*** 480,484 ****
# same format).
raise NotImplementedError
!
reg = win32api.RegOpenKeyEx(win32con.HKEY_USERS, "")
user_index = 0
--- 480,484 ----
# same format).
raise NotImplementedError
!
reg = win32api.RegOpenKeyEx(win32con.HKEY_USERS, "")
user_index = 0
***************
*** 506,510 ****
break
identity_index += 1
!
subkey_name = "%s\\%s\\%s" % (user_name, identity_name,
"Software\\Microsoft\\Outlook " \
--- 506,510 ----
break
identity_index += 1
!
subkey_name = "%s\\%s\\%s" % (user_name, identity_name,
"Software\\Microsoft\\Outlook " \
***************
*** 602,606 ****
sys.exit()
! MAILBOX_DIR = args[0]
files = [os.path.join(MAILBOX_DIR, file) for file in \
--- 602,606 ----
sys.exit()
! MAILBOX_DIR = args[0]
files = [os.path.join(MAILBOX_DIR, file) for file in \
Index: tokenizer.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/tokenizer.py,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** tokenizer.py 22 Dec 2003 16:55:05 -0000 1.26
--- tokenizer.py 30 Dec 2003 16:26:33 -0000 1.27
***************
*** 1037,1041 ****
self.setup_done = False
self.do_slurp = True
!
def setup(self):
# Can't import this at the top because it's circular.
--- 1037,1041 ----
self.setup_done = False
self.do_slurp = True
!
def setup(self):
# Can't import this at the top because it's circular.
***************
*** 1061,1065 ****
# Build a new opener without any proxy information.
opener = urllib2.build_opener(urllib2.HTTPHandler)
!
# Install it
urllib2.install_opener(opener)
--- 1061,1065 ----
# Build a new opener without any proxy information.
opener = urllib2.build_opener(urllib2.HTTPHandler)
!
# Install it
urllib2.install_opener(opener)
***************
*** 1142,1146 ****
self.do_slurp):
return tokens
!
# We don't want to do this recursively and check URLs
# on webpages, so we have this little cheat.
--- 1142,1146 ----
self.do_slurp):
return tokens
!
# We don't want to do this recursively and check URLs
# on webpages, so we have this little cheat.
***************
*** 1201,1205 ****
url_key = URL_KEY_RE.sub('_', url)
cached_message = self.urlCorpus.get(url_key)
!
if cached_message is None:
# We're going to ignore everything that isn't text/html,
--- 1201,1205 ----
url_key = URL_KEY_RE.sub('_', url)
cached_message = self.urlCorpus.get(url_key)
!
if cached_message is None:
# We're going to ignore everything that isn't text/html,
***************
*** 1210,1214 ****
self.bad_urls["url:non_html"] += (url,)
return ["url:non_html"]
!
try:
if options["globals", "verbose"]:
--- 1210,1214 ----
self.bad_urls["url:non_html"] += (url,)
return ["url:non_html"]
!
try:
if options["globals", "verbose"]:
***************
*** 1222,1226 ****
self.bad_urls["url:unknown_error"] += (url,)
return ["url:unknown_error"]
!
# Anything that isn't text/html is ignored
content_type = f.info().get('content-type')
--- 1222,1226 ----
self.bad_urls["url:unknown_error"] += (url,)
return ["url:unknown_error"]
!
# Anything that isn't text/html is ignored
content_type = f.info().get('content-type')
More information about the Spambayes-checkins
mailing list