[Spambayes-checkins] spambayes/testtools urlslurper.py,1.1,1.2
Mark Hammond
mhammond at users.sourceforge.net
Thu May 1 05:46:53 EDT 2003
Update of /cvsroot/spambayes/spambayes/testtools
In directory sc8-pr-cvs1:/tmp/cvs-serv31445
Modified Files:
urlslurper.py
Log Message:
* Fix globals statements - they are only necessary when not implicitly global (and 2.3 warns about them). Couple of these were fixed by putting __main__ code into a main() function (thereby making global necessary), and global dicts do not need 'global' as there is no assignment.
( Fixed import - testtools is not a package - at least my copy has no __init__.py
* socket.error is a possible exception
* Print slurp status to stderr, so redirection of stdout allows stat collection, but progress is still visible.
Index: urlslurper.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/testtools/urlslurper.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** urlslurper.py 30 Apr 2003 06:43:28 -0000 1.1
--- urlslurper.py 1 May 2003 11:46:51 -0000 1.2
***************
*** 84,88 ****
from __future__ import generators
! import urllib2
import sys
import re
--- 84,88 ----
from __future__ import generators
! import urllib2, socket
import sys
import re
***************
*** 99,109 ****
from spambayes.Options import options
from spambayes.classifier import Classifier, Bayes
- from testtools import timtest
import spambayes
- global cache_filename
cache_filename = "url.pck"
-
- global proxy_info
proxy_info = {}
# Fill in the details here (and uncomment these lines) if you connect via
--- 99,106 ----
from spambayes.Options import options
from spambayes.classifier import Classifier, Bayes
import spambayes
+ import timtest
cache_filename = "url.pck"
proxy_info = {}
# Fill in the details here (and uncomment these lines) if you connect via
***************
*** 116,122 ****
#}
- global only_slurp_base
only_slurp_base = False
- global url_dict
url_dict = {}
--- 113,117 ----
***************
*** 133,137 ****
def spamprob(self, wordstream, evidence=False, time_limit=None):
- global url_dict
start_time = time.time()
prob, clues = Classifier.spamprob(self, wordstream, True)
--- 128,131 ----
***************
*** 167,171 ****
else:
if options["globals", "verbose"]:
! print "Slurping:", url, "..."
try:
f = urllib2.urlopen(url)
--- 161,165 ----
else:
if options["globals", "verbose"]:
! print >> sys.stderr, "Slurping:", url, "..."
try:
f = urllib2.urlopen(url)
***************
*** 175,182 ****
page = headers + "\r\n" + page
if options["globals", "verbose"]:
! print "Slurped."
! except IOError:
url_dict[url] = 0.5
! print "Couldn't get", url
if not url_dict.has_key(url) or url_dict[url] != 0.5:
# Create a fake Message object since Tokenizer is
--- 169,176 ----
page = headers + "\r\n" + page
if options["globals", "verbose"]:
! print >> sys.stderr, "Slurped."
! except (IOError, socket.error):
url_dict[url] = 0.5
! print >> sys.stderr, "Couldn't get", url
if not url_dict.has_key(url) or url_dict[url] != 0.5:
# Create a fake Message object since Tokenizer is
***************
*** 231,236 ****
f.close()
!
! if __name__ == "__main__":
import getopt
from spambayes import msgs
--- 225,229 ----
f.close()
! def main():
import getopt
from spambayes import msgs
***************
*** 249,262 ****
sys.exit()
elif opt == '-u':
- global proxy_info
proxy_info["user"] = arg
elif opt == '-p':
- global proxy_info
proxy_info["pass"] = arg
elif opt == '-a':
- global proxy_info
proxy_info["host"] = arg
elif opt == '-o':
- global proxy_info
proxy_info["port"] = int(arg)
elif opt == '-f':
--- 242,251 ----
***************
*** 284,285 ****
--- 273,277 ----
msgs.setparms(hamkeep, spamkeep, seed=seed)
timtest.drive(nsets)
+
+ if __name__ == "__main__":
+ main()
More information about the Spambayes-checkins
mailing list