More mail retr

Christian Tanzer tanzer at swing.co.at
Thu Mar 9 10:40:10 EST 2000


Ulf Engström <ulf.engstrom at b2b-link.com> wrote:

> I receive my mail with a small Python-app and I want to get the
> text/plain only from a multipartmsg, how do I do that? Do I have to
> use re, string or something to find the right place in the msg? I'm
> currently using the rfc822-module. I get mails which contains:
> 
> This is a multi-part message in MIME format.
> 
> ------=_NextPart_000_0080_01BF89D8.387387E0 Content-Type: text/plain;
> charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable
> 
> short msg
> 
> ------=_NextPart_000_0080_01BF89D8.387387E0 Content-Type: text/html;
> <snip>
> 
> And I want to get the 'short msg' as efficient as possible. Splitting
> the msg at newline of something isn't such a good idea since msgs
> differs a lot depending on mail client.

The python library provides mimetools for this purpose. But there is
still some work to do. I append some python code I'm using for these
purpose.

Regards,
Christian

-------------------------------------------------------------------------------
#! /usr/bin/python
# Copyright (C) 2000 Mag. Christian Tanzer. All rights reserved
# Glasauergasse 32, A--1130 Wien, Austria. tanzer at swing.co.at
# ****************************************************************************
# 
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Library General Public License for more details.
# 
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the Free
# Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# ****************************************************************************
#
#++
# Name
#    Mime_Mail
#
# Purpose
#    Model a MIME mail
#
# Revision Dates
#    10-Jan-2000 (CT) Creation
#    11-Jan-2000 (CT) Creation continued
#     1-Feb-2000 (CT) `formatted': don't format parts with subtype `html'
#
#--

import mimetools
import quopri
import re
import string
from   binascii  import a2b_uu, a2b_base64, a2b_hqx
from   mimify    import *
from   multifile import MultiFile
from   cStringIO import StringIO

def a2b_qp (data) :
    """Return a block of binary quoted `data' as string"""
    result = StringIO      ()
    quopri.decode          (StringIO (data), result)
    return result.getvalue ()
# end def a2b_qp

class Mime_Mail (mimetools.Message) :
    """MIME mail: represents the headers and parts of a mime mail"""
    
    file_name  = ""

    def __init__ (self, file, seekable = 0) :
        """Create a new message instance from `file'.

           `file' can be the name of a file or any input object supporting
           the `readline' method (see documentation rfc822.Message).
        """
        if type (file) == type ("") :
            self.file_name  = file
            file            = open (file, "r")
            seekable        = 1
        mimetools.Message.__init__ (self, file, seekable)
        self.parts   = []
        self._leader = ""
        self.mfile   = mfile = MultiFile (file, seekable)
        if self.maintype == "multipart" :
            self.multipart = 1
            self.boundary  = boundary = self.getparam ("boundary")
            mfile.push               (boundary)
            head = mfile.read        ()
            if self._add_part (head) :
                self._leader = ""
            else :
                self._leader = head
            mfile.next ()                
            while not mfile.last :
                self._add_part (mfile.read ())
                mfile.next     ()
        else :
            self.multipart = 0
            self.boundary  = ""
            text = mfile.read ()
            if string.strip (text) :
                self.parts.append (text)
        if self.file_name :
            self.fp.close ()
    # end def __init__

    def __getattr__ (self, name) :
        if   name == "attachement_name" :
            return self._attachement_name ()
        elif name == "encoding" :
            return self.getencoding ()
        elif name == "receiver_address" :
            return self._address (self.receiver_header_name) [1]
        elif name == "receiver_name" :
            return mime_decode_header (self._address
                                           (self.receiver_header_name) [0]
                                      )
        elif name == "sender_address" :
            return self._address (self.sender_header_name) [1]
        elif name == "sender_name" :
            return mime_decode_header (self._address
                                           (self.sender_header_name) [0]
                                      )
        else :
            raise AttributeError, name
    # end def __getattr__
    
    def _add_part (self, text) :
        msg = Mime_Mail (StringIO (text))
        if msg.type == "message/rfc822" and len (msg.parts) == 1:
            msg.parts [0] = Mime_Mail (StringIO (msg.parts [0]))
            msg.multipart = 1
            msg.boundary  = msg.getparam ("boundary") or ""
            msg._leader   = ""
        if msg :
            self.parts.append (msg)
        return msg
    # end def _add_part

    def write (self, file, write_headers = 1, as_bindary = 0) :
        """Write message `self' to `file', which must be a writable object
           with file-semantic or a filename.
        """
        if type (file) == type ("") :
            file = open        (file, "w")
        if write_headers and self.headers :
            file.write         (string.join (self.headers, ""))
            file.write         ("\n")
        if self.multipart :
            file.write         (self._leader)
            for p in self.parts :
                if not p : continue
                if self.boundary :
                    file.write (self.mfile.section_divider (self.boundary))
                file.write     ("\n")
                p.write        (file)
            if self.boundary :
                file.write     (self.mfile.end_marker (self.boundary))
            file.write         ("\n")
        else :
            if as_bindary :
                file.write     (self.as_binary ())
            else :
                file.write     (self.parts [0])
    # end def write

    date_header_name     = ("date", "delivery-date")
    sender_header_name   = ("from", "reply-to", "return-path")
    receiver_header_name = ("to",   "cc")

    def _address (self, header_names) :
        for n in header_names :
            result = self.getaddr (n)
            if type (result [0]) == type ("") : return result
        return (None, None)
    # end def _sender

    def _all_headers (self, header_names) :
        for n in header_names :
            result = self.getallmatchingheaders (n)
            if result :
                pat    = re.compile (r"^%s\s*:\s*" % re.escape (n), re.I)
                result = map (lambda h, p = pat : p.sub ("", h), result)
                return (n, result)
        return ("", ())
    # end def _all_headers

    def _first_header (self, header_names) :
        for n in header_names :
            result = self.getheader (n)
            if result : return (n, result)
        return ("", "")
    # end def _first_header
    
    name_pat = re.compile ('''name="(?P<name> [^"]+)"''', re.X)

    def _attachement_name (self) :
        result = self.getparam ("name")
        if not result :
            for h in self.headers :
                match = self.name_pat.search (h)
                if match :
                    return match.group  ("name")
        return result
    # end def _attachement_name
    
    def __nonzero__ (self) :
        return len (self.headers) or (self.parts and len (self.parts [0])) or 0
    # end def __nonzero__
    
    def __str__ (self) :
        if self.multipart :
            return "Multipart message from %s" % self.getheader ("From")
        else :
            return self.parts [0]
    # end def __str__

    def __repr__ (self) :
        return self.type
    # end def __repr__

    a2b_converter = { "base64"           : a2b_base64
                    , "binhex4"          : a2b_hqx
                    , "quoted-printable" : a2b_qp
                    , "uuencode"         : a2b_uu
                    , "x-uuencode"       : a2b_uu
                    }

    def as_binary (self) :
        """Returns all parts converted from mime encodings to binary strings."""
        if self.multipart :
            return map (lambda p : p.as_binary (), self.parts)
        else :
            if not self.parts : return ""
            converter = self.a2b_converter.get (self.encoding)
            if converter :
                return converter (self.parts [0])
        return self.parts [0]
    # end def convert_to_binary

    def formatted (self, separator_length = 79, n = "") :
        """Returns a string containing the mail in a format suitable for
           printing.
        """
        result = []
        header = self._formatted_headers ()
        if header : 
            result.append  (header)
        leader = string.strip (self._leader)
        if leader :
            if header :
                result.append  ("\n" + ("-" * separator_length) + "\n")
            result.append  (a2b_qp (leader))
        if self.multipart :
            i = 0
            for p in self.parts :
                i = i + 1
                if n :
                    pn = "%s.%s" % (n, i)
                else :
                    pn = i
                if (  p.maintype not in ("text", "message", "multipart")
                   or p.subtype      in ("html", )
                   ) :
                    r = string.join (p.headers, "")
                else :
                    r = p.formatted (separator_length, pn)
                r = string.strip (r)
                if r :
                    ph = "\n%s part %s "   % ("-" * 5, pn)
                    result.append ("%s-%s"
                                  % (ph, "-" * (separator_length - len (ph)))
                                  )
                    result.append (r)
        else :
            if header or leader :
                result.append  ("\n" + ("-" * separator_length) + "\n")
            result.append  (self.as_binary ())
        return string.join (filter (None, result) or "", "\n")
    # end def formatted

    def _formatted_headers (self) :
        result = []
        (dn, date) = self._first_header        (self.date_header_name)
        (fn, snd)  = self._all_headers         (self.sender_header_name)
        (tn, rcv)  = self._all_headers         (self.receiver_header_name)
        subject    = self.getheader            ("subject")
        result.append (self._formatted_header  ("Date",    "", date))
        result.append (self._formatted_address ("From",    snd))
        result.append (self._formatted_address ("To",      rcv))
        result.append (self._formatted_header  ("Subject", "", subject))
        return string.join (filter (None, result) or "", "\n")
    # end def _formatted_headers

    label_width = 8
    ws_pat      = re.compile (r"\s+")
    
    def _formatted_header (self, label, continuation_tail, * lines) :
        if not (lines and lines [0]) : return
        if len (label) > self.label_width :
            print "Label `%s' too long: %s > %s" % \
                  (label [:40], len (label), self.label_width)
            return
        lines  = map (mime_decode_header, lines)
        head   = "%-*s: " % (self.label_width, label)
        tail   = self._break_lines ( 79 - len (head) - len (continuation_tail)
                                   , lines
                                   )
        result = "%s%s" % (head, string.join ( tail or ""
                                             , "%s\n%*s"
                                             % ( continuation_tail
                                               , len (head), " "
                                               )
                                             )
                          )
        return result
    # end def _formatted_header

    def _formatted_address (self, label, lines) :
        line  = self.ws_pat.sub (" ", string.strip (string.join (lines, " ")))
        lines = tuple           (string.split (line, ","))
        return apply (self._formatted_header, (label, ",") + lines)
    # end def _formatted_address

    def _break_lines (self, space, lines) :
        result = []
        for l in lines :
            l = self.ws_pat.sub (" ", string.strip (l))
            if not l : continue
            while len (l) > space :
                i = string.rfind (l, " ", 0, space)
                if i < 0 : i = len (l)
                result.append (l [:i])
                l = l [i + 1:]
            if l :
                result.append (l)
        return result
    # end def _break_lines
    
# end class Mime_Mail
-------------------------------------------------------------------------------

-- 
Christian Tanzer                                         tanzer at swing.co.at
Glasauergasse 32                                       Tel: +43 1 876 62 36
A-1130 Vienna, Austria                                 Fax: +43 1 877 66 92





More information about the Python-list mailing list