[Spambayes-checkins] spambayes/spambayes Corpus.py,1.24,1.25

Tony Meyer anadelonbrin at users.sourceforge.net
Tue Nov 15 01:01:56 CET 2005


Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2957/spambayes

Modified Files:
	Corpus.py 
Log Message:
Stop hiding KeyErrors.

Add __contains__ to corpus objects.

Enhance expiry so that it only iterates through the corpus if necessary.

Index: Corpus.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/Corpus.py,v
retrieving revision 1.24
retrieving revision 1.25
diff -C2 -d -r1.24 -r1.25
*** Corpus.py	9 May 2005 06:30:23 -0000	1.24
--- Corpus.py	15 Nov 2005 00:01:52 -0000	1.25
***************
*** 138,142 ****
          key = message.key()
          if options["globals", "verbose"]:
!             print 'removing message %s from corpus' % (key)
          self.unCacheMessage(key)
          del self.msgs[key]
--- 138,142 ----
          key = message.key()
          if options["globals", "verbose"]:
!             print 'removing message %s from corpus' % (key,)
          self.unCacheMessage(key)
          del self.msgs[key]
***************
*** 153,157 ****
  
          if options["globals", "verbose"]:
!             print 'placing %s in corpus cache' % (key)
  
          self.msgs[key] = message
--- 153,157 ----
  
          if options["globals", "verbose"]:
!             print 'placing %s in corpus cache' % (key,)
  
          self.msgs[key] = message
***************
*** 170,174 ****
  
          if options["globals", "verbose"]:
!             print 'Flushing %s from corpus cache' % (key)
  
          try:
--- 170,174 ----
  
          if options["globals", "verbose"]:
!             print 'Flushing %s from corpus cache' % (key,)
  
          try:
***************
*** 185,188 ****
--- 185,190 ----
          msg = fromcorpus[key]
          msg.load() # ensure that the substance has been loaded
+         # Remove needs to be first, because add changes the directory
+         # of the message, and so remove won't work then.
          fromcorpus.removeMessage(msg)
          self.addMessage(msg)
***************
*** 196,200 ****
      def __getitem__(self, key):
          '''Corpus is a dictionary'''
!         amsg = self.msgs.get(key)
  
          if amsg is None:
--- 198,205 ----
      def __getitem__(self, key):
          '''Corpus is a dictionary'''
!         amsg = self.msgs.get(key, "")
! 
!         if amsg == "":
!             raise KeyError(key)
  
          if amsg is None:
***************
*** 208,218 ****
          return self.msgs.keys()
  
      def __iter__(self):
          '''Corpus is iterable'''
          for key in self.keys():
!             try:
!                 yield self[key]
!             except KeyError:
!                 pass
  
      def __str__(self):
--- 213,223 ----
          return self.msgs.keys()
  
+     def __contains__(self, other):
+         return other in self.msgs
+ 
      def __iter__(self):
          '''Corpus is iterable'''
          for key in self.keys():
!             yield self[key]
  
      def __str__(self):
***************
*** 237,252 ****
  
      def __init__(self, expireBefore):
-         '''Constructor'''
          self.expireBefore = expireBefore
  
      def removeExpiredMessages(self):
          '''Kill expired messages'''
  
!         for msg in self:
!             if msg.createTimestamp() < time.time() - self.expireBefore:
                  if options["globals", "verbose"]:
!                     print 'message %s has expired' % (msg.key())
                  from spambayes.storage import NO_TRAINING_FLAG
                  self.removeMessage(msg, observer_flags=NO_TRAINING_FLAG)
  
  
--- 242,270 ----
  
      def __init__(self, expireBefore):
          self.expireBefore = expireBefore
+         # Only check for expiry after this time.
+         self.expiry_due = time.time()
  
      def removeExpiredMessages(self):
          '''Kill expired messages'''
+         
+         # Only check for expired messages after this time.  We set this to the
+         # closest-to-expiry message's expiry time, so that this method can be
+         # called very regularly, and most of the time it will just immediately
+         # return.
+         if time.time() < self.expiry_due:
+             return
  
!         self.expiry_due = time.time() + self.expireBefore
!         for key in self.keys()[:]:
!             msg = self[key]
!             timestamp = msg.createTimestamp()
!             if timestamp < time.time() - self.expireBefore:
                  if options["globals", "verbose"]:
!                     print 'message %s has expired' % (msg.key(),)
                  from spambayes.storage import NO_TRAINING_FLAG
                  self.removeMessage(msg, observer_flags=NO_TRAINING_FLAG)
+             elif timestamp + self.expireBefore < self.expiry_due:
+                 self.expiry_due = timestamp + self.expireBefore
  
  



More information about the Spambayes-checkins mailing list