[Spambayes-checkins] spambayes/spambayes classifier.py,1.28,1.29
Tony Meyer
anadelonbrin at users.sourceforge.net
Wed Nov 3 02:15:07 CET 2004
Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20461/spambayes
Modified Files:
classifier.py
Log Message:
Fix [ 922063 ] Intermittent sb_filter.py faliure with URL pickle
This is still ugly experimental code, but it might as well be robust ugly experimental
code <wink>. If something goes wrong loading the URL pickles, start with fresh ones
(they are only caches, so that shouldn't hurt). When saving, save to a temp file
first.
Index: classifier.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/classifier.py,v
retrieving revision 1.28
retrieving revision 1.29
diff -C2 -d -r1.28 -r1.29
*** classifier.py 29 Oct 2004 00:14:42 -0000 1.28
--- classifier.py 3 Nov 2004 01:15:04 -0000 1.29
***************
*** 617,621 ****
if os.path.exists(self.bad_url_cache_name):
b_file = file(self.bad_url_cache_name, "r")
! self.bad_urls = pickle.load(b_file)
b_file.close()
else:
--- 617,630 ----
if os.path.exists(self.bad_url_cache_name):
b_file = file(self.bad_url_cache_name, "r")
! try:
! self.bad_urls = pickle.load(b_file)
! except IOError, ValueError:
! # Something went wrong loading it (bad pickle,
! # probably). Start afresh.
! if options["globals", "verbose"]:
! print >>sys.stderr, "Bad URL pickle, using new."
! self.bad_urls = {"url:non_resolving": (),
! "url:non_html": (),
! "url:unknown_error": ()}
b_file.close()
else:
***************
*** 627,631 ****
if os.path.exists(self.http_error_cache_name):
h_file = file(self.http_error_cache_name, "r")
! self.http_error_urls = pickle.load(h_file)
h_file.close()
else:
--- 636,647 ----
if os.path.exists(self.http_error_cache_name):
h_file = file(self.http_error_cache_name, "r")
! try:
! self.http_error_urls = pickle.load(h_file)
! except IOError, ValueError:
! # Something went wrong loading it (bad pickle,
! # probably). Start afresh.
! if options["globals", "verbose"]:
! print >>sys.stderr, "Bad HHTP error pickle, using new."
! self.http_error_urls = {}
h_file.close()
else:
***************
*** 636,645 ****
# XXX be a good thing long-term (if a previously invalid URL
# XXX becomes valid, for example).
! b_file = file(self.bad_url_cache_name, "w")
! pickle.dump(self.bad_urls, b_file)
! b_file.close()
! h_file = file(self.http_error_cache_name, "w")
! pickle.dump(self.http_error_urls, h_file)
! h_file.close()
def slurp(self, proto, url):
--- 652,668 ----
# XXX be a good thing long-term (if a previously invalid URL
# XXX becomes valid, for example).
! for name, data in [(self.bad_url_cache_name, self.bad_urls),
! (self.http_error_cache_name, self.http_error_urls),]:
! # Save to a temp file first, in case something goes wrong.
! cache = open(name + ".tmp", "w")
! pickle.dump(data, cache)
! cache.close()
! try:
! os.rename(name + ".tmp", name)
! except OSError:
! # Atomic replace isn't possible with win32, so just
! # remove and rename.
! os.remove(name)
! os.rename(name + ".tmp", name)
def slurp(self, proto, url):
More information about the Spambayes-checkins
mailing list