Extract zip file from email attachment

erikcw erikwickstrom at gmail.com
Thu Apr 5 21:03:41 EDT 2007


On Apr 5, 8:00 pm, hlubenow <hluben... at gmx.net> wrote:
> erikcw wrote:
> > Hi all,
>
> > I'm trying to extract zip file (containing an xml file) from an email
> > so I can process it.  But I'm running up against some brick walls.
> > I've been googling and reading all afternoon, and can't seem to figure
> > it out.
>
> > Here is what I have so far.
>
> > p = POP3("mail.server.com")
> > print p.getwelcome()
> > # authentication, etc.
> > print p.user("USER")
> > print p.pass_("PASS")
> > print "This mailbox has %d messages, totaling %d bytes." % p.stat()
> > msg_list = p.list()
> > print msg_list
> > if not msg_list[0].startswith('+OK'):
> >         # Handle error
> >         exit(1)
>
> > for msg in msg_list[1]:
> >         msg_num, _ = msg.split()
> >         resp = p.retr(msg_num)
> >         if resp[0].startswith('+OK'):
> >             #print resp, '=======================\n'
> >             #extract message body and attachment.
> >             parsed_msg = email.message_from_string('\n'.join(resp[1]))
> >             payload= parsed_msg.get_payload(decode=True)
> >             print payload  #doesn't seem to work
> >         else:
> >                 pass# Deal with error retrieving message.
>
> > How do I:
> > a) retrieve the body of the email into a string so I can do some
> > processing? (I can get at the header attributes without any trouble)
> > b) retrieve the zip file attachment, and unzip into a string for xml
> > processing?
>
> > Thanks so much for your help!
> > Erik
>
> Hi,
>
> some weeks ago I wrote some code to extract attachments from emails.
> It's not that long, so maybe it could be of help for you:
>
> -------------------------------------------
>
> #!/usr/bin/env python
>
> import poplib
> import email
> import os
> import sys
> import string
>
> #
> # attsave.py
> # Check emails at PROVIDER for attachments and save them to SAVEDIR.
> #
>
> PROVIDER = "pop.YourMailProvider.de"
> USER = "YourUserName"
> PASSWORD = "YourPassword"
>
> SAVEDIR = "/home/YourUserDirectory"
>
> def saveAttachment(mstring):
>
>     filenames = []
>     attachedcontents = []
>
>     msg = email.message_from_string(mstring)
>
>     for part in msg.walk():
>
>         fn = part.get_filename()
>
>         if fn <> None:
>             filenames.append(fn)
>             attachedcontents.append(part.get_payload())
>
>     for i in range(len(filenames)):
>         fp = file(SAVEDIR + "/" + filenames[i], "wb")
>         fp.write(attachedcontents[i])
>         print 'Found and saved attachment "' + filenames[i] + '".'
>         fp.close()
>
> try:
>     client = poplib.POP3(PROVIDER)
> except:
>     print "Error: Provider not found."
>     sys.exit(1)
>
> client.user(USER)
> client.pass_(PASSWORD)
>
> anzahl_mails = len(client.list()[1])
>
> for i in range(anzahl_mails):
>     lines = client.retr(i + 1)[1]
>     mailstring = string.join(lines, "\n")
>     saveAttachment(mailstring)
>
> client.quit()
>
> -------------------------------------------
>
> See you
>
> H.

Thanks H!

I'm now able to get the name of the zip file, and the contents (is it
still encoded?).

I now need to be able to unzip the zip file into a string and get the
body of the email into a string.

Here is my updated code:
p = POP3("mail.**********.com")
print p.getwelcome()
# authentication, etc.
print p.user("USER")
print p.pass_("PASS")
print "This mailbox has %d messages, totaling %d bytes." % p.stat()
msg_list = p.list()
print msg_list
if not msg_list[0].startswith('+OK'):
        # Handle error in listings
        exit(1)

for msg in msg_list[1]:
        msg_num, _ = msg.split()
        resp = p.retr(msg_num)
        if resp[0].startswith('+OK'):
            #print resp, '=======================\n'
            parsed_msg = email.message_from_string('\n'.join(resp[1]))
            for part in parsed_msg.walk():
                fn = part.get_filename()
                if fn <> None:
                    fileObj = StringIO.StringIO()
                    fileObj.write( part.get_payload() )
                    #attachment = zlib.decompress(part.get_payload())
                    #print zipfile.is_zipfile(fileObj)
                    attachment = zipfile.ZipFile(fileObj)
                    print fn, '\n', attachment
            payload= parsed_msg.get_payload(decode=True)
            print payload

        else:
                pass# Deal with error retrieving message.
I get this error:
Traceback (most recent call last):
  File "wa.py", line 208, in <module>
    attachment = zipfile.ZipFile(fileObj)
  File "/usr/lib/python2.5/zipfile.py", line 346, in __init__
    self._GetContents()
  File "/usr/lib/python2.5/zipfile.py", line 366, in _GetContents
    self._RealGetContents()
  File "/usr/lib/python2.5/zipfile.py", line 378, in _RealGetContents
    raise BadZipfile, "File is not a zip file"
zipfile.BadZipfile: File is not a zip file

Is the zip file still encoded?  Or am I passing in the wrong arguments
to the zipfile module?

Thanks for your help!
Erik




More information about the Python-list mailing list