[Spambayes-checkins] spambayes pop3proxy.py,1.81,1.82

Richie Hindle richiehindle at users.sourceforge.net
Tue Jul 1 14:19:11 EDT 2003


Update of /cvsroot/spambayes/spambayes
In directory sc8-pr-cvs1:/tmp/cvs-serv23790

Modified Files:
	pop3proxy.py 
Log Message:
Prevent the POP3 proxy from including the POP3 trailing dot in the
text it sends to the email parser.  Cope with the parser adding a boundary
marker but no trailing newline when it fixes broken messages.  Thanks
to Scott Schlesier for these edits.

When an exception is raised by the email parser or the classifier, add an
exception header to the email and recover.  This should prevent broken
emails that aren't handled properly by the email parser, or bugs in the
classifier, from preventing emails getting through.  The exception header
should help us track down the problem - here's an example:

X-Spambayes-Exception: exceptions.TypeError(string payload expected: <type 'list'>) in _handle_text() at C:\PYTHON23\lib\email\Generator.py line 199: raise TypeError, 'string payload expected: %s' % type(payload)


Index: pop3proxy.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/pop3proxy.py,v
retrieving revision 1.81
retrieving revision 1.82
diff -C2 -d -r1.81 -r1.82
*** pop3proxy.py	12 Jun 2003 07:25:03 -0000	1.81
--- pop3proxy.py	1 Jul 2003 20:19:09 -0000	1.82
***************
*** 90,94 ****
  """
  
! import os, sys, re, errno, getopt, time
  import socket
  from thread import start_new_thread
--- 90,94 ----
  """
  
! import os, sys, re, errno, getopt, time, traceback
  import socket
  from thread import start_new_thread
***************
*** 418,452 ****
          # broken emails that don't use the proper line separators.
          if re.search(r'\n\r?\n', response):
              # Break off the first line, which will be '+OK'.
              ok, messageText = response.split('\n', 1)
  
!             msg = spambayes.message.SBHeaderMessage()
!             msg.setPayload(messageText)
!             msg.setId(state.getNewMessageName())
!             # Now find the spam disposition and add the header.
!             (prob, clues) = state.bayes.spamprob(msg.asTokens(),\
!                              evidence=True)
  
!             msg.addSBHeaders(prob, clues)            
!             
!             if command == 'RETR':
!                 cls = msg.GetClassification()
!                 if cls == options["Hammie", "header_ham_string"]:
!                     state.numHams += 1
!                 elif cls == options["Hammie", "header_spam_string"]:
!                     state.numSpams += 1
!                 else:
!                     state.numUnsure += 1
  
!                 # Cache the message; don't pollute the cache with test messages.
!                 if not state.isTest \
!                     and options["pop3proxy", "cache_messages"]:
!                     # Write the message into the Unknown cache.
!                     message = state.unknownCorpus.makeMessage(msg.getId())
!                     message.setSubstance(msg.as_string())
!                     state.unknownCorpus.addMessage(message)
  
!             # Return the +OK and the message with the header added.
!             return ok + "\n" + msg.as_string()
  
          else:
--- 418,483 ----
          # broken emails that don't use the proper line separators.
          if re.search(r'\n\r?\n', response):
+             # Remove the trailing .\r\n before passing to the email parser.
+             if response[-3:] == '.\r\n':
+                 response = response[:-3]
+ 
              # Break off the first line, which will be '+OK'.
              ok, messageText = response.split('\n', 1)
  
!             try:
!                 msg = spambayes.message.SBHeaderMessage()
!                 msg.setPayload(messageText)
!                 msg.setId(state.getNewMessageName())
!                 # Now find the spam disposition and add the header.
!                 (prob, clues) = state.bayes.spamprob(msg.asTokens(),\
!                                  evidence=True)
  
!                 msg.addSBHeaders(prob, clues)
  
!                 if command == 'RETR':
!                     cls = msg.GetClassification()
!                     if cls == options["Hammie", "header_ham_string"]:
!                         state.numHams += 1
!                     elif cls == options["Hammie", "header_spam_string"]:
!                         state.numSpams += 1
!                     else:
!                         state.numUnsure += 1
  
!                     # Cache the message; don't pollute the cache with test messages.
!                     if not state.isTest \
!                         and options["pop3proxy", "cache_messages"]:
!                         # Write the message into the Unknown cache.
!                         message = state.unknownCorpus.makeMessage(msg.getId())
!                         message.setSubstance(msg.as_string())
!                         state.unknownCorpus.addMessage(message)
! 
!                 # We'll return the message with the header added.
!                 messageText = msg.as_string()
! 
!             except:
!                 # Something nasty happened while parsing or classifying -
!                 # report the exception in a hand-appended header and recover.
!                 # This is one case where an unqualified 'except' is OK, 'cos
!                 # anything's better than destroying people's email...
!                 eType, eValue, eTraceback = sys.exc_info()
!                 file, line, func, code = traceback.extract_tb(eTraceback)[-1]
!                 details = "%s(%s) in %s() at %s line %d: %s" % \
!                            (eType, eValue, func, file, line, code)
!                 exceptionHeader = 'X-Spambayes-Exception: %s\r\n' % details
!                 headers, body = re.split(r'\n\r?\n', messageText, 1)
!                 headers = headers + "\n" + exceptionHeader + "\r\n"
!                 messageText = headers + body
! 
!             # Restore the +OK and the full POP3 \r\n.\r\n terminator.  We
!             # need to make sure the first \r\n is there as well as the
!             # trailing .\r\n because the email parser can fix broken messages
!             # by adding a trailing boundary without a \r\n.  Thanks to Scott
!             # Schlesier for this fix.
!             retval = ok + "\n" + messageText
!             if retval[-2:] == '\r\n':
!                 retval += '.\r\n'
!             else:
!                 retval += '\r\n.\r\n'
!             return retval
  
          else:
***************
*** 713,717 ****
      CreateProxies(servers, proxyPorts, state)
      LoadServerInfo()
!     
      if 0 <= len(args) <= 2:
          # Normal usage, with optional server name and port number.
--- 744,748 ----
      CreateProxies(servers, proxyPorts, state)
      LoadServerInfo()
! 
      if 0 <= len(args) <= 2:
          # Normal usage, with optional server name and port number.





More information about the Spambayes-checkins mailing list