[Spambayes-checkins] spambayes tokenizer.py,1.61,1.62
Skip Montanaro
montanaro@users.sourceforge.net
Wed Nov 6 02:12:49 2002
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv16612
Modified Files:
tokenizer.py
Log Message:
move Tokenizer.get_message() to mboxutils.py where it becomes the one true
place to try and generate email.Message.Message objects.
Index: tokenizer.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/tokenizer.py,v
retrieving revision 1.61
retrieving revision 1.62
diff -C2 -d -r1.61 -r1.62
*** tokenizer.py 4 Nov 2002 23:21:43 -0000 1.61
--- tokenizer.py 6 Nov 2002 02:12:47 -0000 1.62
***************
*** 14,17 ****
--- 14,19 ----
from Options import options
+ from mboxutils import get_message
+
# Patch encodings.aliases to recognize 'ansi_x3_4_1968'
from encodings.aliases import aliases # The aliases dictionary
***************
*** 985,1017 ****
def get_message(self, obj):
! """Return an email Message object.
!
! The argument may be a Message object already, in which case it's
! returned as-is.
!
! If the argument is a string or file-like object (supports read()),
! the email package is used to create a Message object from it. This
! can fail if the message is malformed. In that case, the headers
! (everything through the first blank line) are thrown out, and the
! rest of the text is wrapped in a bare email.Message.Message.
! """
!
! if isinstance(obj, email.Message.Message):
! return obj
! # Create an email Message object.
! if hasattr(obj, "read"):
! obj = obj.read()
! try:
! msg = email.message_from_string(obj)
! except email.Errors.MessageParseError:
! # Wrap the raw text in a bare Message object. Since the
! # headers are most likely damaged, we can't use the email
! # package to parse them, so just get rid of them first.
! i = obj.find('\n\n')
! if i >= 0:
! obj = obj[i+2:] # strip headers
! msg = email.Message.Message()
! msg.set_payload(obj)
! return msg
def tokenize(self, obj):
--- 987,991 ----
def get_message(self, obj):
! return get_message(obj)
def tokenize(self, obj):