Zipfile module errors

jwesonga crazylunjay at gmail.com
Wed Jun 4 06:06:11 EDT 2008


Hi,

I have a python script that supposed to go through a folder, pick the
zipped files, unzip them and process the data inside. I'm not sure
where i'm going wrong with this script because it all seems correct:

#! /usr/bin/env python
import zipfile
import os
from elementtree import ElementTree as ET
import MySQLdb
import sys

sys.stdout = open("log",'w')
sys.stderr = open("log",'w')

username = 'xxx'
password = 'xxx'
host = 'xxx'
database = 'xxx'

infolder = "/home/username/received/"
outfolder = "/home/username/webapps/app1/public/processed/"
class RecursiveFileIterator:
    def __init__ (self, *rootDirs):
        self.dirQueue = list (rootDirs)
        self.includeDirs = None
        self.fileQueue = []

    def __getitem__ (self, index):
        while len (self.fileQueue) == 0:
            self.nextDir ()
        result = self.fileQueue [0]
        del self.fileQueue [0]
        return result

    def nextDir (self):
        dir = self.dirQueue [0]   # fails with IndexError, which is
fine
                                  # for iterator interface
        del self.dirQueue [0]
        list = os.listdir (dir)
        join = os.path.join
        isdir = os.path.isdir
        for basename in list:
            fullPath = join (dir, basename)
            if isdir (fullPath):
                self.dirQueue.append (fullPath)
                if self.includeDirs:
                    self.fileQueue.append (fullPath)
            else:
                self.fileQueue.append (fullPath)

def unzip(folder):
    filelist = RecursiveFileIterator(folder)
    for one in filelist:
        xmlname = one.replace(".zip",".xml")
	pngname = one.replace(".zip",".png")
	mp3name = one.replace(".zip",".mp3")
        zfile = zipfile.ZipFile(one,'r')
        for filename in zfile.namelist():
            data = zfile.read(filename)
            file = open(infolder+"/"+filename,'w')
            file.write(data)
            file.close()
        parse_xml(xmlname)
        os.rename(xmlname,outfolder+xmlname.strip(infolder+'/'))
        try:
            os.rename(pngname,outfolder+pngname.strip(infolder+'/'))
        except:
            print one+" has no picture file"
        try:
            os.rename(mp3name,outfolder+mp3name.strip(infolder+'/'))
        except:
            print one+" has no sound file"
        os.remove(one)

def parse_xml(filename):
    topic =[]
    f  = open(filename,'r')
    #print f
    content = f.read()
    f.close()
    #print content
    element = ET.XML(content)
    #Extract the elements from xml file
    for subelement in element:
        if subelement.tag=='datestamp':
            date = str(subelement.text)
        if subelement.tag=='properties':
            for each in subelement:
                if each.tag=='name':
                    name = str(each.text)
                elif each.tag=='age':
                    age = str(each.text)
                elif each.tag=='gender':
                    gender = str(each.text)
                elif each.tag=='email':
                    email = str(each.text)
                elif each.tag=='language':
                    language = str(each.text)
                elif each.tag=='otherlanguage':
                    otherlanguage = str(each.text)
                elif each.tag=='country':
                    country = str(each.text)
                elif each.tag=='city':
                    city = str(each.text)
                elif each.tag=='referral':
                    referral = str(each.text)
        if subelement.tag=='recording':
            for sub_subelement in subelement:
                if sub_subelement.tag=='duration':
                    duration = str(sub_subelement.text)
                if sub_subelement.tag=='file':
                    sound = str(sub_subelement.text)
                if sub_subelement.tag=='image':
                    picture = str(sub_subelement.text)
        if subelement.tag=='summary':
            summary = str(subelement.text)
        if subelement.tag=='categories':
            for sub_subelement in subelement:
                if sub_subelement.text == 'True':
                    topic.append(str(sub_subelement.tag))
                if sub_subelement.tag == 'othercategory':
                    topic.append(str(sub_subelement.text))
 
db=MySQLdb.connect(host=host,user=username,passwd=password,db=database)
    c = db.cursor()
    topic = str(topic)
    c.execute("""INSERT INTO
stories(name,age,gender,email,language,otherlanguage,country,city,referral,duration,audiofilename,picture,summary,topic)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
(name,age,gender,email,language,otherlanguage,country,city,referral,duration,sound,picture,summary,topic))

unzip(infolder)

The error I keep getting is:

Traceback (most recent call last):
  File "processor3.py", line 124, in ?
    unzip(infolder)
  File "processor3.py", line 53, in unzip
    zfile = zipfile.ZipFile(one,'r')
  File "/usr/lib/python2.4/zipfile.py", line 210, in __init__
    self._GetContents()
  File "/usr/lib/python2.4/zipfile.py", line 230, in _GetContents
    self._RealGetContents()
  File "/usr/lib/python2.4/zipfile.py", line 240, in _RealGetContents
    endrec = _EndRecData(fp)
  File "/usr/lib/python2.4/zipfile.py", line 83, in _EndRecData
    fpin.seek(-22, 2)               # Assume no archive comment.
IOError: [Errno 22] Invalid argument


I have confirmed that the folder contains the zipped files, and that
the zipped files contain the xml file. No reason why it shouldn't
work. The server is running Python 2.4. Please help me.




More information about the Python-list mailing list