[Spambayes-checkins] spambayes imapfilter.py,1.3,1.4

Tim Stone timstone4 at users.sourceforge.net
Tue Apr 8 09:28:06 EDT 2003


Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv25441

Modified Files:
	imapfilter.py 
Log Message:
Added logic to ensure that classification and training memory is preserved
when IMAP messages are altered (i.e. deleted and added with a new id),
and when messages are retrained.  Again... unable to test, so your turn,
Tony.  I'm gonna have to get an IMAP thingy if I'm going to do much work
on this <wink>.

Index: imapfilter.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/imapfilter.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** imapfilter.py	8 Apr 2003 07:37:28 -0000	1.3
--- imapfilter.py	8 Apr 2003 15:28:03 -0000	1.4
***************
*** 75,78 ****
--- 75,95 ----
          self.changeId(response[1][0])
  
+     def Delete(self):
+         self._selectFolder(self.folder_name, False)
+         response = imap.uid("STORE", self.getId(), "+FLAGS.SILENT",
+                             "(\\Deleted)")
+         self._check(response, "uid store")
+ 
+         # XXX there should actually be a delete from the msgid database here...
+         self.notTrained()
+         self.notClassified()
+ 
+     def Append(self):
+         response = imap.append(self.folder_name, None,
+                                self.getId(),
+                                self.get_payload())
+         self._check(response, "append")
+ 
+ 
  
  class IMAPFolder(object):
***************
*** 135,138 ****
--- 152,159 ----
          global imap
          imap = imaplib.IMAP4(options.imap_server, options.imap_port)
+         
+         self.spam_folder = IMAPFolder(options.imap_spam_folder)
+         self.unsure_folder = IMAPFolder(options.imap_unsure_folder)
+         
          if options.verbose:
              print "Loading database...",
***************
*** 163,175 ****
          folder = IMAPFolder(folder_name)
          for msg in folder:
!             if msg.isTrained():
!                 if msg.isTrndAs(isSpam):
!                     # already trained, nothing for us to do here
!                     # (we don't want to train the same message twice)
!                     continue
!                 if msg.isTrained():
!                     self.classifier.unlearn(msg.asTokens(), not isSpam)
!             self.classifier.learn(msg.asTokens(), isSpam)
!             msg.trndAs(isSpam)
  
      def Train(self):
--- 184,200 ----
          folder = IMAPFolder(folder_name)
          for msg in folder:
!             # XXX I've rewritten this logic.  It looks a bit strange,
!             # because of the msg.notTrained call immediately before the
!             # test for isTrained, but this is safer.  Once the message has
!             # been untrained, it's training memory should reflect that
!             # on the off chance that for some reason the training breaks,
!             # which happens on occasion (the tokenizer is not yet perfect)
!             if msg.isTrndAs(not isSpam):
!                 self.classifier.unlearn(msg.asTokens(), not isSpam)
!                 msg.notTrained()
! 
!             if not msg.isTrained():
!                 self.classifier.learn(msg.asTokens(), isSpam)
!                 msg.trndAs(isSpam)
  
      def Train(self):
***************
*** 210,220 ****
          imap.logout()
  
-     def _extractTimeFromMessage(self, msg):
-         # When we create a new copy of a message, we need to specify
-         # a timestamp for the message.  Ideally, this would be the
-         # timestamp from the message itself, but for the moment, we
-         # just use the current time.
-         return imaplib.Time2Internaldate(time.time())
- 
      def _moveMessage(self, old_msg, dest):
          # The IMAP copy command makes an alias, not a whole new
--- 235,238 ----
***************
*** 222,241 ****
          # in the correct folder, and delete the old one
          # XXX (someone tell me if this is wrong)
!         response = imap.uid("FETCH", old_msg.getId(), "(RFC822)")
!         self._check(response, 'uid fetch')
!         msg = message.Message()
!         msg.setPayload(response[1][0][1])
          #response = imap.uid("SEARCH", "(TEXT)", msg.get_payload())
          #self._check(response, "search")
          #self.changeId(response[1][0])
  
!         response = imap.append(dest, None,
!                                self._extractTimeFromMessage(msg),
!                                msg.get_payload())
!         self._check(response, "append")
!         self._selectFolder(old_msg.folder_name, False)
!         response = imap.uid("STORE", old_msg.getId(), "+FLAGS.SILENT",
!                             "(\\Deleted)")
!         self._check(response, "uid store")
  
      def _filterMessage(self, msg):
--- 240,275 ----
          # in the correct folder, and delete the old one
          # XXX (someone tell me if this is wrong)
! 
!         # XXX I've redone this logic to use the IMAPMessage class.  It
!         # may be a bit of overkill, but it allows us to maintain the
!         # proper training and classification memory for the message
!         # as it's moved
!         
!         #response = imap.uid("FETCH", old_msg.getId(), "(RFC822)")
!         #self._check(response, 'uid fetch')
!         #msg = message.Message()
!         #msg.setPayload(response[1][0][1])
!         
!         msg = IMAPMessage(dest.uid, dest.folder_name, None)
!         msg.setId(msg.extractTime())  # this is kinda silly
!         msg.copy(old_msg)
!         
          #response = imap.uid("SEARCH", "(TEXT)", msg.get_payload())
          #self._check(response, "search")
          #self.changeId(response[1][0])
  
!         #response = imap.append(dest.folder_name, None,
!         #                       msg.getId(),
!         #                       msg.get_payload())
!         #self._check(response, "append")
! 
!         msg.Append()        
! 
!         #self._selectFolder(old_msg.folder_name, False)
!         #response = imap.uid("STORE", old_msg.getId(), "+FLAGS.SILENT",
!         #                    "(\\Deleted)")
!         #self._check(response, "uid store")
!         
!         old_msg.Delete()
  
      def _filterMessage(self, msg):
***************
*** 245,251 ****
              pass
          elif msg.isClsfdSpam():
!             self._moveMessage(msg, options.imap_spam_folder)
          else:
!             self._moveMessage(msg, options.imap_unsure_folder)
  
  if __name__ == '__main__':
--- 279,287 ----
              pass
          elif msg.isClsfdSpam():
!             #XXX I actually think move should be a method on IMAPMessage
!             #but I'm running out of time.
!             self._moveMessage(msg, self.spam_folder)
          else:
!             self._moveMessage(msg, self.unsure_folder)
  
  if __name__ == '__main__':





More information about the Spambayes-checkins mailing list