[Spambayes-checkins] spambayes/testtools urlslurper.py,1.1,1.2

Mark Hammond mhammond at users.sourceforge.net
Thu May 1 05:46:53 EDT 2003


Update of /cvsroot/spambayes/spambayes/testtools
In directory sc8-pr-cvs1:/tmp/cvs-serv31445

Modified Files:
	urlslurper.py 
Log Message:
* Fix globals statements - they are only necessary when not implicitly global (and 2.3 warns about them).  Couple of these were fixed by putting __main__ code into a main() function (thereby making global necessary), and global dicts do not need 'global' as there is no assignment.
( Fixed import - testtools is not a package - at least my copy has no __init__.py
* socket.error is a possible exception
* Print slurp status to stderr, so redirection of stdout allows stat collection, but progress is still visible.


Index: urlslurper.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/testtools/urlslurper.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** urlslurper.py	30 Apr 2003 06:43:28 -0000	1.1
--- urlslurper.py	1 May 2003 11:46:51 -0000	1.2
***************
*** 84,88 ****
  from __future__ import generators
  
! import urllib2
  import sys
  import re
--- 84,88 ----
  from __future__ import generators
  
! import urllib2, socket
  import sys
  import re
***************
*** 99,109 ****
  from spambayes.Options import options
  from spambayes.classifier import Classifier, Bayes
- from testtools import timtest
  import spambayes
  
- global cache_filename
  cache_filename = "url.pck"
- 
- global proxy_info
  proxy_info = {}
  # Fill in the details here (and uncomment these lines) if you connect via
--- 99,106 ----
  from spambayes.Options import options
  from spambayes.classifier import Classifier, Bayes
  import spambayes
+ import timtest
  
  cache_filename = "url.pck"
  proxy_info = {}
  # Fill in the details here (and uncomment these lines) if you connect via
***************
*** 116,122 ****
  #}
  
- global only_slurp_base
  only_slurp_base = False
- global url_dict
  url_dict = {}
  
--- 113,117 ----
***************
*** 133,137 ****
  
      def spamprob(self, wordstream, evidence=False, time_limit=None):
-         global url_dict
          start_time = time.time()
          prob, clues = Classifier.spamprob(self, wordstream, True)
--- 128,131 ----
***************
*** 167,171 ****
                  else:
                      if options["globals", "verbose"]:
!                         print "Slurping:", url, "..."
                      try:
                          f = urllib2.urlopen(url)
--- 161,165 ----
                  else:
                      if options["globals", "verbose"]:
!                         print >> sys.stderr, "Slurping:", url, "..."
                      try:
                          f = urllib2.urlopen(url)
***************
*** 175,182 ****
                          page = headers + "\r\n" + page
                          if options["globals", "verbose"]:
!                             print "Slurped."
!                     except IOError:
                          url_dict[url] = 0.5
!                         print "Couldn't get", url
                      if not url_dict.has_key(url) or url_dict[url] != 0.5:
                          # Create a fake Message object since Tokenizer is
--- 169,176 ----
                          page = headers + "\r\n" + page
                          if options["globals", "verbose"]:
!                             print >> sys.stderr, "Slurped."
!                     except (IOError, socket.error):
                          url_dict[url] = 0.5
!                         print >> sys.stderr, "Couldn't get", url
                      if not url_dict.has_key(url) or url_dict[url] != 0.5:
                          # Create a fake Message object since Tokenizer is
***************
*** 231,236 ****
          f.close()
  
!  
! if __name__ == "__main__":
      import getopt
      from spambayes import msgs
--- 225,229 ----
          f.close()
  
! def main():
      import getopt
      from spambayes import msgs
***************
*** 249,262 ****
              sys.exit()
          elif opt == '-u':
-             global proxy_info
              proxy_info["user"] = arg
          elif opt == '-p':
-             global proxy_info
              proxy_info["pass"] = arg
          elif opt == '-a':
-             global proxy_info
              proxy_info["host"] = arg
          elif opt == '-o':
-             global proxy_info
              proxy_info["port"] = int(arg)
          elif opt == '-f':
--- 242,251 ----
***************
*** 284,285 ****
--- 273,277 ----
      msgs.setparms(hamkeep, spamkeep, seed=seed)
      timtest.drive(nsets)
+ 
+ if __name__ == "__main__":
+     main()





More information about the Spambayes-checkins mailing list