An Archive::Tar for Python - libtar.py (0/1)

Tom Robinson tom at alkali.spamfree.org
Tue Aug 14 19:22:10 EDT 2001


I've hacked up the following python workalike of the Perl CPAN module
Archive::Tar.  Currently it does most things the Perl ver does, with
the notable exception of reading .tar files off disk and handling
reading/writing of gzipped tars - but I'm working on these.  The core
code is lifted straight out of tar.py from Zope.  I've developed this
on python 2.0.1.  Comments very much appreciated, try it out and tell
me if (when) you find any bugs.  Some of the lines are over 72 chars
so I've attatched it as well.

#!/usr/bin/env python2

"""libtar.py

manipulation routines for the tape archive (.tar) file format

(c) 2001 Tom Robinson <tom at alkali.org>

originally based on tar.py from Zope:

    This product includes software developed by Digital Creations
    for use in the Z Object Publishing Environment
    (http://www.zope.org/).

File format:
http://www.gnu.org/manual/tar/html_node/tar_123.html

STILL TO DO:
   read in tar archives
   read/write gzipped files

"""

import string, os, stat, time, zlib

# helper functions

def oct8(i):
    i=oct(i)
    return '0'*(6-len(i))+i+' \0'

def oct12(i):
    i=oct(i)
    return '0'*(11-len(i))+i+' '
    
def pad(s,l):
    ls=len(s)
    if ls >= l:
        raise ValueError, 'value, %s, too wide for field (%d)' % (s,l)
    return s+'\0'*(l-ls)

#--------------------------------

class TarEntry:

    def __init__(self, path, data,
                 mode = 0644, uid = 0, gid = 0, mtime = None,
                 typeflag = '0', linkname = '', uname = 'nobody',
                 gname = 'nogroup', prefix = ''):
        "Initialize a Tar archive entry"
        if mtime is None: mtime=int(time.time())

        self.path = path
        self.data = data
        self.mode = mode
        self.uid = uid
        self.gid = gid
        self.mtime = mtime
        self.typeflag = typeflag
        self.linkname = linkname
        self.uname = uname
        self.gname = gname
        self.prefix = prefix

    def header(self):
        header = string.join([
            pad(self.path,      100),
            oct8(self.mode),
            oct8(self.uid),
            oct8(self.gid),
            oct12(len(self.data)),
            oct12(self.mtime),
            ' ' * 8,
            self.typeflag,
            pad(self.linkname,  100),
            'ustar\0',
            '00',
            pad(self.uname,      32),
            pad(self.gname,      32),
            '000000 \0',
            '000000 \0',
            pad(self.prefix,    155),
            '\0'*12,
            ], '')

        if len(header) != 512:
            raise 'Bad Header Length', len(header)

        header = (header[:148] +
                  oct8(reduce(lambda a, b: a + b, map(ord, header))) +
                  header[156:])

        return header
    
    def __str__(self):
        data = self.data
        l = len(data)
        if l % 512: data = data + '\0' * (512 - l % 512)
        return self.header() + data

#--------------------------------

class Tar:
    def __init__(self, name):
        self._name = name
        self._entries = []
        self._entrynames = []
        
    def add_entry_data(self, *args):
        self._entries.append(TarEntry(*args))
        self._entrynames.append(args[0])
        
    def add_entry(self, filename):
        self._entrynames.append(filename)
        
        statresult = os.stat(filename)
        mode = statresult[stat.ST_MODE] & 0700
        uid = statresult[stat.ST_UID]
        gid = statresult[stat.ST_GID]
        mtime = statresult[stat.ST_MTIME]

        f = open(filename, 'r')
        data = f.read()
        f.close()

        if os.name == 'posix':
            import pwd, grp
            uname = pwd.getpwuid(uid)[0]
            gname = grp.getgrgid(gid)[0]
        else:
            uname = 'nobody'
            gname = 'nogroup'

        self._entries.append(TarEntry(path=filename, data=data,
                                      mode=mode, uid=uid, gid=gid,
                                      uname=uname, gname=gname))

    def add_entries(self, *filenames):
        "Takes a list of filenames and adds them to the in-memory
archive."
        for x in filenames:
            self.add_entry(x)
            
    def list_files(self):
        "Returns a tuple with the names of all files in the in-memory
archive."
        return tuple(self._entrynames)

    def get_content(self, entryname):
        "Return the content of the named file."
        return self._entries[self._entrynames.index(entryname)].data

    def replace_content(self, entryname, content):
        "Make the string content be the content for the file
entryname."
        self._entries[self._entrynames.index(entryname)].data =
content

    def extract(self, *entrynames):
        "Write files whose names are equivalent to any of the names in
entrynames to disk, creating subdirectories as neccesary."
        for x in entrynames:
            ent = self._entries[self._entrynames.index(x)]
            f = open(x, 'w')
            f.write(ent.data)
            f.close()

    def remove(self, *entrynames):
        "Removes any entries with names matching any of the given
filenames from the in-memory archive."
        for x in entrynames:
            idx = self._entrynames.index(x)
            self._entrynames.pop(idx)
            self._entries.pop(idx)

    def object(self, entryname):
        "Return a ref to the TarEntry object for the given filename."
        return self._entries[self._entrynames.index(entryname)]

    def write(self, name = None, compressed = 0):
        "Write the in-memory archive to disk."
#        if compressed:
#            if name is None: name = self._name + '.gz'
#            data = zlib.compress(self.data())
#        else:
#            pass

        if name is None: name = self._name
        data = self.data()
                
        f = open(name, 'w')
        f.write(data)
        f.close()

    def __str__(self):
        "Return the data for the .tar file in memory."
        data = []
        for x in self._entries:
            data.append(str(x))
        data.append('\0' * 1024)   # end-of-archive entry
        return string.join(data, '')

    def __call__(self):
        "Return the data for the .tar file in memory."
        return str(self)

    def data(self):
        "Return the data for the .tar file in memory."
        return self()
    
#--------------------------------

def main():

    t = Tar('foo.tar')
    #t.add_entry('libtar.py')
    t.add_entry_data('monkey.foo', 'hello')
    t.replace_content('monkey.foo', 'goodbye')
    t.add_entry_data('monkey.bar', 'where is the bar')
#    t.extract('monkey.foo', 'monkey.bar')
    
    print t.list_files()
    print t.get_content('monkey.foo')

    t.write()
#    t.write(compressed = 1)

if __name__ == "__main__": main()


-- 
tom at alkali.spamfree.org
remove 'spamfree.' to respond



More information about the Python-list mailing list