More mail retr
Christian Tanzer
tanzer at swing.co.at
Thu Mar 9 10:40:10 EST 2000
Ulf Engström <ulf.engstrom at b2b-link.com> wrote:
> I receive my mail with a small Python-app and I want to get the
> text/plain only from a multipartmsg, how do I do that? Do I have to
> use re, string or something to find the right place in the msg? I'm
> currently using the rfc822-module. I get mails which contains:
>
> This is a multi-part message in MIME format.
>
> ------=_NextPart_000_0080_01BF89D8.387387E0 Content-Type: text/plain;
> charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable
>
> short msg
>
> ------=_NextPart_000_0080_01BF89D8.387387E0 Content-Type: text/html;
> <snip>
>
> And I want to get the 'short msg' as efficient as possible. Splitting
> the msg at newline of something isn't such a good idea since msgs
> differs a lot depending on mail client.
The python library provides mimetools for this purpose. But there is
still some work to do. I append some python code I'm using for these
purpose.
Regards,
Christian
-------------------------------------------------------------------------------
#! /usr/bin/python
# Copyright (C) 2000 Mag. Christian Tanzer. All rights reserved
# Glasauergasse 32, A--1130 Wien, Austria. tanzer at swing.co.at
# ****************************************************************************
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the Free
# Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# ****************************************************************************
#
#++
# Name
# Mime_Mail
#
# Purpose
# Model a MIME mail
#
# Revision Dates
# 10-Jan-2000 (CT) Creation
# 11-Jan-2000 (CT) Creation continued
# 1-Feb-2000 (CT) `formatted': don't format parts with subtype `html'
#
#--
import mimetools
import quopri
import re
import string
from binascii import a2b_uu, a2b_base64, a2b_hqx
from mimify import *
from multifile import MultiFile
from cStringIO import StringIO
def a2b_qp (data) :
"""Return a block of binary quoted `data' as string"""
result = StringIO ()
quopri.decode (StringIO (data), result)
return result.getvalue ()
# end def a2b_qp
class Mime_Mail (mimetools.Message) :
"""MIME mail: represents the headers and parts of a mime mail"""
file_name = ""
def __init__ (self, file, seekable = 0) :
"""Create a new message instance from `file'.
`file' can be the name of a file or any input object supporting
the `readline' method (see documentation rfc822.Message).
"""
if type (file) == type ("") :
self.file_name = file
file = open (file, "r")
seekable = 1
mimetools.Message.__init__ (self, file, seekable)
self.parts = []
self._leader = ""
self.mfile = mfile = MultiFile (file, seekable)
if self.maintype == "multipart" :
self.multipart = 1
self.boundary = boundary = self.getparam ("boundary")
mfile.push (boundary)
head = mfile.read ()
if self._add_part (head) :
self._leader = ""
else :
self._leader = head
mfile.next ()
while not mfile.last :
self._add_part (mfile.read ())
mfile.next ()
else :
self.multipart = 0
self.boundary = ""
text = mfile.read ()
if string.strip (text) :
self.parts.append (text)
if self.file_name :
self.fp.close ()
# end def __init__
def __getattr__ (self, name) :
if name == "attachement_name" :
return self._attachement_name ()
elif name == "encoding" :
return self.getencoding ()
elif name == "receiver_address" :
return self._address (self.receiver_header_name) [1]
elif name == "receiver_name" :
return mime_decode_header (self._address
(self.receiver_header_name) [0]
)
elif name == "sender_address" :
return self._address (self.sender_header_name) [1]
elif name == "sender_name" :
return mime_decode_header (self._address
(self.sender_header_name) [0]
)
else :
raise AttributeError, name
# end def __getattr__
def _add_part (self, text) :
msg = Mime_Mail (StringIO (text))
if msg.type == "message/rfc822" and len (msg.parts) == 1:
msg.parts [0] = Mime_Mail (StringIO (msg.parts [0]))
msg.multipart = 1
msg.boundary = msg.getparam ("boundary") or ""
msg._leader = ""
if msg :
self.parts.append (msg)
return msg
# end def _add_part
def write (self, file, write_headers = 1, as_bindary = 0) :
"""Write message `self' to `file', which must be a writable object
with file-semantic or a filename.
"""
if type (file) == type ("") :
file = open (file, "w")
if write_headers and self.headers :
file.write (string.join (self.headers, ""))
file.write ("\n")
if self.multipart :
file.write (self._leader)
for p in self.parts :
if not p : continue
if self.boundary :
file.write (self.mfile.section_divider (self.boundary))
file.write ("\n")
p.write (file)
if self.boundary :
file.write (self.mfile.end_marker (self.boundary))
file.write ("\n")
else :
if as_bindary :
file.write (self.as_binary ())
else :
file.write (self.parts [0])
# end def write
date_header_name = ("date", "delivery-date")
sender_header_name = ("from", "reply-to", "return-path")
receiver_header_name = ("to", "cc")
def _address (self, header_names) :
for n in header_names :
result = self.getaddr (n)
if type (result [0]) == type ("") : return result
return (None, None)
# end def _sender
def _all_headers (self, header_names) :
for n in header_names :
result = self.getallmatchingheaders (n)
if result :
pat = re.compile (r"^%s\s*:\s*" % re.escape (n), re.I)
result = map (lambda h, p = pat : p.sub ("", h), result)
return (n, result)
return ("", ())
# end def _all_headers
def _first_header (self, header_names) :
for n in header_names :
result = self.getheader (n)
if result : return (n, result)
return ("", "")
# end def _first_header
name_pat = re.compile ('''name="(?P<name> [^"]+)"''', re.X)
def _attachement_name (self) :
result = self.getparam ("name")
if not result :
for h in self.headers :
match = self.name_pat.search (h)
if match :
return match.group ("name")
return result
# end def _attachement_name
def __nonzero__ (self) :
return len (self.headers) or (self.parts and len (self.parts [0])) or 0
# end def __nonzero__
def __str__ (self) :
if self.multipart :
return "Multipart message from %s" % self.getheader ("From")
else :
return self.parts [0]
# end def __str__
def __repr__ (self) :
return self.type
# end def __repr__
a2b_converter = { "base64" : a2b_base64
, "binhex4" : a2b_hqx
, "quoted-printable" : a2b_qp
, "uuencode" : a2b_uu
, "x-uuencode" : a2b_uu
}
def as_binary (self) :
"""Returns all parts converted from mime encodings to binary strings."""
if self.multipart :
return map (lambda p : p.as_binary (), self.parts)
else :
if not self.parts : return ""
converter = self.a2b_converter.get (self.encoding)
if converter :
return converter (self.parts [0])
return self.parts [0]
# end def convert_to_binary
def formatted (self, separator_length = 79, n = "") :
"""Returns a string containing the mail in a format suitable for
printing.
"""
result = []
header = self._formatted_headers ()
if header :
result.append (header)
leader = string.strip (self._leader)
if leader :
if header :
result.append ("\n" + ("-" * separator_length) + "\n")
result.append (a2b_qp (leader))
if self.multipart :
i = 0
for p in self.parts :
i = i + 1
if n :
pn = "%s.%s" % (n, i)
else :
pn = i
if ( p.maintype not in ("text", "message", "multipart")
or p.subtype in ("html", )
) :
r = string.join (p.headers, "")
else :
r = p.formatted (separator_length, pn)
r = string.strip (r)
if r :
ph = "\n%s part %s " % ("-" * 5, pn)
result.append ("%s-%s"
% (ph, "-" * (separator_length - len (ph)))
)
result.append (r)
else :
if header or leader :
result.append ("\n" + ("-" * separator_length) + "\n")
result.append (self.as_binary ())
return string.join (filter (None, result) or "", "\n")
# end def formatted
def _formatted_headers (self) :
result = []
(dn, date) = self._first_header (self.date_header_name)
(fn, snd) = self._all_headers (self.sender_header_name)
(tn, rcv) = self._all_headers (self.receiver_header_name)
subject = self.getheader ("subject")
result.append (self._formatted_header ("Date", "", date))
result.append (self._formatted_address ("From", snd))
result.append (self._formatted_address ("To", rcv))
result.append (self._formatted_header ("Subject", "", subject))
return string.join (filter (None, result) or "", "\n")
# end def _formatted_headers
label_width = 8
ws_pat = re.compile (r"\s+")
def _formatted_header (self, label, continuation_tail, * lines) :
if not (lines and lines [0]) : return
if len (label) > self.label_width :
print "Label `%s' too long: %s > %s" % \
(label [:40], len (label), self.label_width)
return
lines = map (mime_decode_header, lines)
head = "%-*s: " % (self.label_width, label)
tail = self._break_lines ( 79 - len (head) - len (continuation_tail)
, lines
)
result = "%s%s" % (head, string.join ( tail or ""
, "%s\n%*s"
% ( continuation_tail
, len (head), " "
)
)
)
return result
# end def _formatted_header
def _formatted_address (self, label, lines) :
line = self.ws_pat.sub (" ", string.strip (string.join (lines, " ")))
lines = tuple (string.split (line, ","))
return apply (self._formatted_header, (label, ",") + lines)
# end def _formatted_address
def _break_lines (self, space, lines) :
result = []
for l in lines :
l = self.ws_pat.sub (" ", string.strip (l))
if not l : continue
while len (l) > space :
i = string.rfind (l, " ", 0, space)
if i < 0 : i = len (l)
result.append (l [:i])
l = l [i + 1:]
if l :
result.append (l)
return result
# end def _break_lines
# end class Mime_Mail
-------------------------------------------------------------------------------
--
Christian Tanzer tanzer at swing.co.at
Glasauergasse 32 Tel: +43 1 876 62 36
A-1130 Vienna, Austria Fax: +43 1 877 66 92
More information about the Python-list
mailing list