[Spambayes-checkins] spambayes imapfilter.py, 1.31, 1.32 pop3proxy.py, 1.77, 1.78 smtpproxy.py, 1.5, 1.6

Tony Meyer anadelonbrin at users.sourceforge.net
Thu Apr 24 01:44:19 EDT 2003


Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv1065

Modified Files:
	imapfilter.py pop3proxy.py smtpproxy.py 
Log Message:
Fix for:
[ 725307 ] Outlook plugin won't load (anymore)
[ 725466 ] Include a proper locale fix in Options.py
[ 726255 ] Problem if bayescustomize.ini not there

Moved the line endings fix from message.py to imapmessage,
which is more appropriate.

Removed all reliance on UpdateableConfigParser from Options.

Removed the ConfigParser object from Options.

Removed the all_options dict from Options.

Changed multiple options to tuples (this required one very small
change in tokenizer).

Fixed the user interface for these changes, and for Tim's
change from True/False to Yes/No.

Removed some debugging statements that had been committed
by mistake at some point.

Fixed a potential problem with the imap ui trying to display a folder
list before a server/username had been entered.

Index: imapfilter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/imapfilter.py,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** imapfilter.py	22 Apr 2003 22:47:13 -0000	1.31
--- imapfilter.py	24 Apr 2003 07:43:44 -0000	1.32
***************
*** 63,73 ****
        "OK", then the filter terminates.  Handling of these errors could be
        much nicer.
      o IMAP over SSL would be nice. (isbg has an example of how to do this)
!     o The flags should be copied along with the message (especially
!       the /Seen flag, but all of them, really).
!     o Should the imap options server and port be combined?  i.e. instead
!       of having server=imap.example.com and port=143, have
!       server=imap.example.com:143?  This would be more consistent with
!       pop3proxy and would get rid of one option at no real cost.
      o Suggestions?
  """
--- 63,72 ----
        "OK", then the filter terminates.  Handling of these errors could be
        much nicer.
+     o We should check if the spam and unsure folders exist before filtering.
      o IMAP over SSL would be nice. (isbg has an example of how to do this)
!     o Develop a test script, like testtools/pop3proxytest.py that runs
!       through some tests (perhaps with a *real* imap server, rather than
!       a dummy one).  This would make it easier to carry out the tests
!       against each server whenever a change is made.
      o Suggestions?
  """
***************
*** 95,98 ****
--- 94,98 ----
  import sys
  import getopt
+ import types
  from getpass import getpass
  import email.Parser
***************
*** 108,120 ****
  imap = None
  
! # global rfc822 fetch command
! rfc822_command = "(RFC822.PEEK)"
  
  class IMAPSession(imaplib.IMAP4):
      '''A class extending the IMAP4 class, with a few optimizations'''
      
!     def __init__(self, server, port, debug):
          imaplib.Debug = debug  # this is a global in the imaplib module
          imaplib.IMAP4.__init__(self, server, port)
          # For efficiency, we remember which folder we are currently
          # in, and only send a select command to the IMAP server if
--- 108,172 ----
  imap = None
  
! CRLF_RE = re.compile(r'\r\n|\r|\n')
! 
! # A flag can have any character in the ascii range 32-127
! # except for (){ %*"\
! FLAG_CHARS = ""
! for i in range(32, 127):
!     if not chr(i) in ['(', ')', '{', ' ', '%', '*', '"', '\\']:
!         FLAG_CHARS += chr(i)
! FLAG = r"\\?[" + re.escape(FLAG_CHARS) + r"]+"
! # The empty flag set "()" doesn't match, so that extract returns
! # data["FLAGS"] == None
! FLAGS_RE = re.compile(r"(FLAGS) (\((" + FLAG + r" )*(" + FLAG + r")\))")
! INTERNALDATE_RE = re.compile(r"(INTERNALDATE) (\"\d{1,2}\-[A-Za-z]{3,3}\-" +
!                              r"\d{2,4} \d{2,2}\:\d{2,2}\:\d{2,2} " +
!                              r"[\+\-]\d{4,4}\")")
! RFC822_RE = re.compile(r"(RFC822) (\{[\d]+\})")
! UID_RE = re.compile(r"(UID) ([\d]+)")
! FETCH_RESPONSE_RE = re.compile(r"([0-9]+) \(([" + \
!                                re.escape(FLAG_CHARS) + r"\"\{\}\(\)\\ ]*)\)?")
! LITERAL_RE = re.compile(r"^\{[\d]+\}$")
! 
! def _extract_fetch_data(response):
!     '''Extract data from the response given to an IMAP FETCH command.'''
!     # response might be a tuple containing literal data
!     if type(response) == types.TupleType:
!         literal = response[1]
!         response = response[0]
!     else:
!         literal = None
!     # the first item will always be the message number
!     mo = FETCH_RESPONSE_RE.match(response)
!     data = {}
!     if mo is None:
!         print """IMAP server gave strange fetch response.  Please
!         report this as a bug."""
!         print response
!     else:
!         data["message_number"] = mo.group(1)
!         response = mo.group(2)
!     # We support the following FETCH items:
!     #  FLAGS
!     #  INTERNALDATE
!     #  RFC822
!     #  UID
!     # All others are ignored.
!     for r in [FLAGS_RE, INTERNALDATE_RE, RFC822_RE, UID_RE]:
!         mo = r.search(response)
!         if mo is not None:
!             if LITERAL_RE.match(mo.group(2)):
!                 data[mo.group(1)] = literal
!             else:
!                 data[mo.group(1)] = mo.group(2)
!     return data
  
  class IMAPSession(imaplib.IMAP4):
      '''A class extending the IMAP4 class, with a few optimizations'''
      
!     def __init__(self, server, port, debug=0, do_expunge=False):
          imaplib.Debug = debug  # this is a global in the imaplib module
          imaplib.IMAP4.__init__(self, server, port)
+         # XXX We should check here to see if the server/port were valid
          # For efficiency, we remember which folder we are currently
          # in, and only send a select command to the IMAP server if
***************
*** 122,126 ****
          # both IMAPMessage and IMAPFolder.
          self.current_folder = None
!         self.current_folder_readonly = None
  
      def login(self, uid, pw):
--- 174,178 ----
          # both IMAPMessage and IMAPFolder.
          self.current_folder = None
!         self.do_expunge = do_expunge
  
      def login(self, uid, pw):
***************
*** 135,158 ****
                  raise
      
!     def logout(self, expunge):
          # sign off
!         if expunge:
              self.expunge()
          imaplib.IMAP4.logout(self)  # superclass logout
          
!     def SelectFolder(self, folder, readOnly=True, force=False):
          '''A method to point ensuing imap operations at a target folder'''
!         
!         if self.current_folder != folder or \
!            self.current_folder_readonly != readOnly or force:
!             # Occasionally, we need to force a command, because we
!             # are interested in the response.  Things would be much
!             # nicer if we cached this information somewhere.
!             response = self.select(folder, readOnly)
              if response[0] != "OK":
!                 print "Invalid response to %s:\n%s" % (command, response)
                  sys.exit(-1)
              self.current_folder = folder
-             self.current_folder_readonly = readOnly
              return response
  
--- 187,214 ----
                  raise
      
!     def logout(self):
          # sign off
!         if self.do_expunge:
              self.expunge()
          imaplib.IMAP4.logout(self)  # superclass logout
          
!     def SelectFolder(self, folder):
          '''A method to point ensuing imap operations at a target folder'''
!         if self.current_folder != folder:
!             if self.current_folder != None:
!                 if self.do_expunge:
!                     # It is faster to do close() than a single
!                     # expunge when we log out (because expunge returns
!                     # a list of all the deleted messages, that we don't do
!                     # anything with)
!                     imap.close()
!             # We *always* use SELECT and not EXAMINE, because this
!             # speeds things up considerably.
!             response = self.select(folder, False)
              if response[0] != "OK":
!                 print "Invalid response to select %s:\n%s" % (folder,
!                                                               response)
                  sys.exit(-1)
              self.current_folder = folder
              return response
  
***************
*** 160,165 ****
      def __init__(self, folder, id):
          message.Message.__init__(self)
- 
          self.id = id
          self.folder = folder
          self.previous_folder = None
--- 216,221 ----
      def __init__(self, folder, id):
          message.Message.__init__(self)
          self.id = id
+         message.msginfoDB._getState(self)
          self.folder = folder
          self.previous_folder = None
***************
*** 170,173 ****
--- 226,242 ----
              sys.exit(-1)
  
+     def _force_CRLF(self, data):
+         """Make sure data uses CRLF for line termination.
+         """
+         return CRLF_RE.sub('\r\n', data)
+ 
+     def as_string(self):
+         # The email package stores line endings in the "internal" Python
+         # format ('\n').  It is up to whoever transmits that information to
+         # convert to appropriate line endings (according to RFC822, that is
+         # \r\n *only*).  imaplib *should* take care of this for us (in the
+         # append function), but does not, so we do it here
+         return self._force_CRLF(message.SBHeaderMessage.as_string(self))
+         
      def extractTime(self):
          # When we create a new copy of a message, we need to specify
***************
*** 189,192 ****
--- 258,262 ----
              self.previous_folder = self.folder
              self.folder = dest
+         self.modified()
  
      def Save(self):
***************
*** 196,208 ****
          response = imap.uid("FETCH", self.id, "(FLAGS INTERNALDATE)")
          self._check(response, 'fetch (flags internaldate)')
!         response_pattern = r"[\d]+ \(UID [\w]+ FLAGS (\([\\\w]*\)) "
!         response_pattern += r'INTERNALDATE ["]?([\w\-\+: ]+)["]?\)'
!         mo = re.match(response_pattern, response[1][0])
!         if mo is None:
              msg_time = self.extractTime()
!             flags = ""
          else:
!             flags = mo.group(1)
!             msg_time = mo.group(2)
  
          # See searching for new uid comments below
--- 266,281 ----
          response = imap.uid("FETCH", self.id, "(FLAGS INTERNALDATE)")
          self._check(response, 'fetch (flags internaldate)')
!         data = _extract_fetch_data(response[1][0])
!         if data.has_key("INTERNALDATE"):
!             msg_time = data["INTERNALDATE"]
!         else:
              msg_time = self.extractTime()
!         if data.has_key("FLAGS"):
!             flags = data["FLAGS"]
!             # The \Recent flag can be fetched, but cannot be stored
!             # We must remove it from the list if it is there.
!             flags = re.sub(r"\\Recent ?|\\ ?Recent", "", flags)
          else:
!             flags = None
  
          # See searching for new uid comments below
***************
*** 210,221 ****
          self["X-Spambayes-IMAP-OldID"] = old_id
                      
!         response = imap.append(self.folder.name, None,
                                 msg_time, self.as_string())
          self._check(response, 'append')
  
          if self.previous_folder is None:
!             imap.SelectFolder(self.folder.name, False)
          else:
!             imap.SelectFolder(self.previous_folder.name, False)
              self.previous_folder = None
          response = imap.uid("STORE", old_id, "+FLAGS.SILENT", "(\\Deleted)")
--- 283,294 ----
          self["X-Spambayes-IMAP-OldID"] = old_id
                      
!         response = imap.append(self.folder.name, flags,
                                 msg_time, self.as_string())
          self._check(response, 'append')
  
          if self.previous_folder is None:
!             imap.SelectFolder(self.folder.name)
          else:
!             imap.SelectFolder(self.previous_folder.name)
              self.previous_folder = None
          response = imap.uid("STORE", old_id, "+FLAGS.SILENT", "(\\Deleted)")
***************
*** 227,232 ****
          # doesn't work reliably anyway.  We instead search for a special
          # header that we add for this explicit purpose.
!         imap.SelectFolder(self.folder.name, False)
!         response = imap.uid("SEARCH", "(HEADER)", "X-Spambayes-IMAP-OldID",
                              old_id)
          self._check(response, 'search')
--- 300,305 ----
          # doesn't work reliably anyway.  We instead search for a special
          # header that we add for this explicit purpose.
!         imap.SelectFolder(self.folder.name)
!         response = imap.uid("SEARCH", "HEADER", "X-Spambayes-IMAP-OldID",
                              old_id)
          self._check(response, 'search')
***************
*** 237,241 ****
          # message is the last one with a recent flag
          if new_id == "":
!             response = imap.uid("SEARCH", "(RECENT)")
              new_id = response[1][0]
              if new_id.find(' ') > -1:
--- 310,314 ----
          # message is the last one with a recent flag
          if new_id == "":
!             response = imap.uid("SEARCH", "RECENT")
              new_id = response[1][0]
              if new_id.find(' ') > -1:
***************
*** 243,251 ****
                  new_id = ids[-1]
  
-         # now that we know the new id, we need to correct the flags
-         if flags != "":
-             response = imap.uid("STORE", new_id, "+FLAGS.SILENT", flags)
-             self._check(response, "store flags")
- 
          #XXX This code to delete the old message id from the message
          #XXX info db and manipulate the message id, is a *serious* hack.
--- 316,319 ----
***************
*** 259,262 ****
--- 327,331 ----
      def __init__(self, folder_name, readOnly=True):
          self.name = folder_name
+         self.rfc822_command = "RFC822.PEEK"
  
      def __cmp__(self, obj):
***************
*** 281,341 ****
          are flagged as recent, but not flagged as deleted.'''
          imap.SelectFolder(self.name, True)
!         response = imap.uid("SEARCH", "(RECENT UNDELETED)")
!         self._check(response, "SEARCH (RECENT UNDELETED)")
          return response[1][0].split(' ')
  
      def keys(self):
!         '''Returns uids for all the messages in the folder'''
          # request message range
!         response = imap.SelectFolder(self.name, True, True)
!         total_messages = response[1][0]
!         if total_messages == '0':
              return []
!         response = imap.fetch("1:" + total_messages, "(UID FLAGS)")
!         r = re.compile(r"[0-9]+ \(UID ([0-9]+) FLAGS \(([\\\w]*)\)\)")
!         uids = []
!         for resp in response[1]:
!             data = self._extract_fetch_data(resp)
!             if data.has_key("FLAGS"):
!                 if data["FLAGS"].lower().find("\\deleted") == -1:
!                     # We are interested in messages not marked as deleted
!                     uids.append(data["UID"])
!             else:
!                 uids.append(data["UID"])
!         return uids
! 
!     def _extract_fetch_data(self, response):
!         '''Extract data from the response given to an IMAP FETCH command.'''
!         data = {}
!         # the first item will always be the message number
!         mo = re.match(r"([0-9]+) \(?([\w\\\(\) ]*)\)?", response)
!         if mo is None:
!             print """IMAP server gave strange fetch response.  Please
!             report this as a bug."""
!         else:
!             data["message_number"] = mo.group(1)
!             response = mo.group(2)
!         mo = re.findall(r"([\w]+) \(?([\w\\]+)\)?", response)
!         for key, val in mo:
!             data[key] = val
!         return data
!         
  
      def __getitem__(self, key):
          '''Return message matching the given uid'''
!         global rfc822_command
!         imap.SelectFolder(self.name, True)
          # We really want to use RFC822.PEEK here, as that doesn't effect
          # the status of the message.  Unfortunately, it appears that not
          # all IMAP servers support this, even though it is in RFC1730
!         response = imap.uid("FETCH", key, rfc822_command)
          if response[0] != "OK":
!             rfc822_command = "(RFC822)"
!             response = imap.uid("FETCH", key, rfc822_command)
          self._check(response, "uid fetch")
!         messageText = response[1][0][1]
          # we return an instance of *our* message class, not the
          # raw rfc822 message
- 
          msg = IMAPMessage(self, key)
          msg.setPayload(messageText)
--- 350,384 ----
          are flagged as recent, but not flagged as deleted.'''
          imap.SelectFolder(self.name, True)
!         response = imap.uid("SEARCH", "RECENT UNDELETED")
!         self._check(response, "SEARCH RECENT UNDELETED")
          return response[1][0].split(' ')
  
      def keys(self):
!         '''Returns uids for all the messages in the folder not
!         marked as deleted.'''
          # request message range
!         imap.SelectFolder(self.name)
!         response = imap.uid("SEARCH", "UNDELETED")
!         self._check(response, "SEARCH UNDELETED")
!         if response[1][0] == "":
              return []
!         return response[1][0].split(' ')
  
      def __getitem__(self, key):
          '''Return message matching the given uid'''
!         imap.SelectFolder(self.name)
          # We really want to use RFC822.PEEK here, as that doesn't effect
          # the status of the message.  Unfortunately, it appears that not
          # all IMAP servers support this, even though it is in RFC1730
!         response = imap.uid("FETCH", key, self.rfc822_command)
          if response[0] != "OK":
!             self.rfc822_command = "RFC822"
!             response = imap.uid("FETCH", key, self.rfc822_command)
          self._check(response, "uid fetch")
!         data = _extract_fetch_data(response[1][0])
!         messageText = data["RFC822"]
! 
          # we return an instance of *our* message class, not the
          # raw rfc822 message
          msg = IMAPMessage(self, key)
          msg.setPayload(messageText)
***************
*** 365,369 ****
          for msg in self:
              if msg.GetClassification() is None:
!                 (prob, clues) = classifier.spamprob(msg.asTokens(), evidence=True)
                  # add headers and remember classification
                  msg.addSBHeaders(prob, clues)
--- 408,413 ----
          for msg in self:
              if msg.GetClassification() is None:
!                 (prob, clues) = classifier.spamprob(msg.asTokens(),
!                                                     evidence=True)
                  # add headers and remember classification
                  msg.addSBHeaders(prob, clues)
***************
*** 395,400 ****
  
          if options["imap", "ham_train_folders"] != "":
!             ham_training_folders = \
!                                  options["imap", "ham_train_folders"].split()
              for fol in ham_training_folders:
                  if options['globals', 'verbose']:
--- 439,443 ----
  
          if options["imap", "ham_train_folders"] != "":
!             ham_training_folders = options["imap", "ham_train_folders"]
              for fol in ham_training_folders:
                  if options['globals', 'verbose']:
***************
*** 407,412 ****
  
          if options["imap", "spam_train_folders"] != "":
!             spam_training_folders = \
!                                   options["imap", "spam_train_folders"].split()
              for fol in spam_training_folders:
                  if options['globals', 'verbose']:
--- 450,454 ----
  
          if options["imap", "spam_train_folders"] != "":
!             spam_training_folders = options["imap", "spam_train_folders"]
              for fol in spam_training_folders:
                  if options['globals', 'verbose']:
***************
*** 429,433 ****
              t = time.time()
              
!         for filter_folder in options["imap", "filter_folders"].split():
              folder = IMAPFolder(filter_folder, False)
              folder.Filter(self.classifier, self.spam_folder,
--- 471,475 ----
              t = time.time()
              
!         for filter_folder in options["imap", "filter_folders"]:
              folder = IMAPFolder(filter_folder, False)
              folder.Filter(self.classifier, self.spam_folder,
***************
*** 455,458 ****
--- 497,502 ----
      promptForPass = False
      launchUI = False
+     server = ""
+     username = ""
  
      for opt, arg in opts:
***************
*** 497,511 ****
  or training will be performed."""
  
-     server = options["imap", "server"]
-     username = options["imap", "username"]
-     if server == "" or username == "":
-         print "You need to specify both a server and a username."
-         sys.exit()
- 
-     if promptForPass:
-         pwd = getpass()
-     else:
-         pwd = options["imap", "password"]
- 
      bdbname = os.path.expanduser(bdbname)
      
--- 541,544 ----
***************
*** 520,524 ****
      if options["globals", "verbose"]:
          print "Done."            
!                 
      if server.find(':') > -1:
          server, port = server.split(':', 1)
--- 553,572 ----
      if options["globals", "verbose"]:
          print "Done."            
! 
!     if options["imap", "server"]:
!         # The options class is ahead of us here:
!         #   it knows that imap:server will eventually be able to have
!         #   multiple values, but for the moment, we just use the first one
!         server = options["imap", "server"][0]
!         username = options["imap", "username"][0]
!         pwd = options["imap", "password"][0]
!     else:
!         if not launchUI:
!             print "You need to specify both a server and a username."
!             sys.exit()
! 
!     if promptForPass:
!         pwd = getpass()
! 
      if server.find(':') > -1:
          server, port = server.split(':', 1)
***************
*** 529,546 ****
          else:
              port = 143
-     imap = IMAPSession(server, port, imapDebug)
  
      imap_filter = IMAPFilter(classifier)
  
      # Web interface
-     # XXX If someone is running *both* pop3proxy and imapfilter
-     # XXX then there will be trouble since both interfaces are
-     # XXX using the same port by default.
      if launchUI:
!         imap.login(username, pwd)
          httpServer = UserInterfaceServer(options["html_ui", "port"])
!         httpServer.register(IMAPUserInterface(classifier, imap))
          Dibbler.run(launchBrowser=launchUI)
      else:
          while True:
              imap.login(username, pwd)
--- 577,592 ----
          else:
              port = 143
  
      imap_filter = IMAPFilter(classifier)
  
      # Web interface
      if launchUI:
!         if server != "":
!             imap = IMAPSession(server, port, imapDebug, doExpunge)
          httpServer = UserInterfaceServer(options["html_ui", "port"])
!         httpServer.register(IMAPUserInterface(classifier, imap, pwd))
          Dibbler.run(launchBrowser=launchUI)
      else:
+         imap = IMAPSession(server, port, imapDebug, doExpunge)
          while True:
              imap.login(username, pwd)
***************
*** 555,559 ****
                  imap_filter.Filter()
  
!             imap.logout(doExpunge)
              
              if sleepTime:
--- 601,605 ----
                  imap_filter.Filter()
  
!             imap.logout()
              
              if sleepTime:

Index: pop3proxy.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v
retrieving revision 1.77
retrieving revision 1.78
diff -C2 -d -r1.77 -r1.78
*** pop3proxy.py	21 Apr 2003 12:18:45 -0000	1.77
--- pop3proxy.py	24 Apr 2003 07:43:44 -0000	1.78
***************
*** 502,506 ****
          self.proxyPorts = []
          if options["pop3proxy", "servers"]:
!             for server in options["pop3proxy", "servers"].split(','):
                  server = server.strip()
                  if server.find(':') > -1:
--- 502,506 ----
          self.proxyPorts = []
          if options["pop3proxy", "servers"]:
!             for server in options["pop3proxy", "servers"]:
                  server = server.strip()
                  if server.find(':') > -1:
***************
*** 511,515 ****
  
          if options["pop3proxy", "ports"]:
!             splitPorts = options["pop3proxy", "ports"].split(',')
              self.proxyPorts = map(_addressAndPort, splitPorts)
  
--- 511,515 ----
  
          if options["pop3proxy", "ports"]:
!             splitPorts = options["pop3proxy", "ports"]
              self.proxyPorts = map(_addressAndPort, splitPorts)
  

Index: smtpproxy.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/smtpproxy.py,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** smtpproxy.py	18 Apr 2003 05:52:13 -0000	1.5
--- smtpproxy.py	24 Apr 2003 07:43:45 -0000	1.6
***************
*** 473,477 ****
      proxyPorts = []
      if options.smtpproxy_servers:
!         for server in options.smtpproxy_servers.split(','):
              server = server.strip()
              if server.find(':') > -1:
--- 473,477 ----
      proxyPorts = []
      if options.smtpproxy_servers:
!         for server in options.smtpproxy_servers:
              server = server.strip()
              if server.find(':') > -1:
***************
*** 481,485 ****
              servers.append((server, int(port)))
      if options.smtpproxy_ports:
!         splitPorts = options.smtpproxy_ports.split(',')
          proxyPorts = map(_addressAndPort, splitPorts)
      if len(servers) != len(proxyPorts):
--- 481,485 ----
              servers.append((server, int(port)))
      if options.smtpproxy_ports:
!         splitPorts = options.smtpproxy_ports
          proxyPorts = map(_addressAndPort, splitPorts)
      if len(servers) != len(proxyPorts):





More information about the Spambayes-checkins mailing list