[Spambayes-checkins] spambayes/Outlook2000 msgstore.py,1.54,1.55

Mark Hammond mhammond at users.sourceforge.net
Wed Jul 23 00:20:42 EDT 2003


Update of /cvsroot/spambayes/spambayes/Outlook2000
In directory sc8-pr-cvs1:/tmp/cvs-serv32410

Modified Files:
	msgstore.py 
Log Message:
multipart/signed messages could still screw us - particularly when they
are multi-part inside the signed multi-part portion.  We now recurse
through all parts of the message, collecting all text/* portions,
combining them together.
(Irony: the mail this failed on was from a SpamBayes user [asking for
yet another feature])

Also a bug in how attachment properties were fetched - if the body was
"large", we attempted to get the "large property" from the mail object
itself, rather than the attachment.  Thus, these utility functions have
been move to global function taking any MAPI object.


Index: msgstore.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/Outlook2000/msgstore.py,v
retrieving revision 1.54
retrieving revision 1.55
diff -C2 -d -r1.54 -r1.55
*** msgstore.py	23 Jul 2003 04:35:35 -0000	1.54
--- msgstore.py	23 Jul 2003 06:20:40 -0000	1.55
***************
*** 334,337 ****
--- 334,371 ----
  }
  
+ def GetPropFromStream(mapi_object, prop_id):
+     try:
+         stream = mapi_object.OpenProperty(prop_id,
+                                           pythoncom.IID_IStream,
+                                           0, 0)
+         chunks = []
+         while 1:
+             chunk = stream.Read(4096)
+             if not chunk:
+                 break
+             chunks.append(chunk)
+         return "".join(chunks)
+     except pythoncom.com_error, d:
+         print "Error getting property from stream", d
+         return ""
+ 
+ def GetPotentiallyLargeStringProp(mapi_object, prop_id, row):
+     got_tag, got_val = row
+     if PROP_TYPE(got_tag) == PT_ERROR:
+         ret = ""
+         if got_val == mapi.MAPI_E_NOT_FOUND:
+             pass # No property for this message.
+         elif got_val == mapi.MAPI_E_NOT_ENOUGH_MEMORY:
+             # Too big for simple properties - get via a stream
+             ret = GetPropFromStream(mapi_object, prop_id)
+         else:
+             tag_name = mapiutil.GetPropTagName(prop_id)
+             err_string = mapiutil.GetScodeString(got_val)
+             print "Warning - failed to get property %s: %s" % (tag_name,
+                                                                 err_string)
+     else:
+         ret = got_val
+     return ret
+ 
  class MAPIMsgStoreFolder(MsgStoreMsg):
      def __init__(self, msgstore, id, name, count):
***************
*** 522,558 ****
                 not self.is_unsent
  
-     def _GetPropFromStream(self, prop_id):
-         try:
-             stream = self.mapi_object.OpenProperty(prop_id,
-                                                    pythoncom.IID_IStream,
-                                                    0, 0)
-             chunks = []
-             while 1:
-                 chunk = stream.Read(4096)
-                 if not chunk:
-                     break
-                 chunks.append(chunk)
-             return "".join(chunks)
-         except pythoncom.com_error, d:
-             print "Error getting property from stream", d
-             return ""
- 
      def _GetPotentiallyLargeStringProp(self, prop_id, row):
!         got_tag, got_val = row
!         if PROP_TYPE(got_tag) == PT_ERROR:
!             ret = ""
!             if got_val == mapi.MAPI_E_NOT_FOUND:
!                 pass # No property for this message.
!             elif got_val == mapi.MAPI_E_NOT_ENOUGH_MEMORY:
!                 # Too big for simple properties - get via a stream
!                 ret = self._GetPropFromStream(prop_id)
!             else:
!                 tag_name = mapiutil.GetPropTagName(prop_id)
!                 err_string = mapiutil.GetScodeString(got_val)
!                 print "Warning - failed to get property %s: %s" % (tag_name,
!                                                                    err_string)
!         else:
!             ret = got_val
!         return ret
  
      def _GetMessageText(self):
--- 556,561 ----
                 not self.is_unsent
  
      def _GetPotentiallyLargeStringProp(self, prop_id, row):
!         return GetPotentiallyLargeStringProp(self.mapi_object, prop_id, row)
  
      def _GetMessageText(self):
***************
*** 625,630 ****
                  prop_ids = (PR_ATTACH_DATA_BIN,)
                  hr, data = attach.GetProps(prop_ids, 0)
!                 attach_body = self._GetPotentiallyLargeStringProp(
!                     prop_ids[0], data[0])
                  # What we seem to have here now is a *complete* multi-part
                  # mime message - that Outlook must have re-constituted on
--- 628,632 ----
                  prop_ids = (PR_ATTACH_DATA_BIN,)
                  hr, data = attach.GetProps(prop_ids, 0)
!                 attach_body = GetPotentiallyLargeStringProp(attach, prop_ids[0], data[0])
                  # What we seem to have here now is a *complete* multi-part
                  # mime message - that Outlook must have re-constituted on
***************
*** 635,641 ****
                  import email
                  msg = email.message_from_string(attach_body)
!                 assert msg.is_multipart()
!                 sub = msg.get_payload(0)
!                 body = sub.get_payload()
  
          return "%s\n%s\n%s" % (headers, html, body)
--- 637,657 ----
                  import email
                  msg = email.message_from_string(attach_body)
!                 assert msg.is_multipart(), "Should be multi-part: %r" % attach_body
!                 # reduce down all sub messages, collecting all text/ subtypes.
!                 # (we could make a distinction between text and html, but
!                 # it is all joined together by this method anyway.)
!                 def collect_text_parts(msg):
!                     collected = ''
!                     if msg.is_multipart():
!                         for sub in msg.get_payload():
!                             collected += collect_text_parts(sub)
!                     else:
!                         if msg.get_content_maintype()=='text':
!                             collected += msg.get_payload()
!                         else:
!                             #print "skipping content type", msg.get_content_type()
!                             pass
!                     return collected
!                 body = collect_text_parts(msg)
  
          return "%s\n%s\n%s" % (headers, html, body)





More information about the Spambayes-checkins mailing list