3.2 can't extract tarfile produced by 2.7

Antoon Pardon antoon.pardon at rece.vub.ac.be
Wed Dec 26 10:56:15 EST 2012


I am converting some programs to python 3. These programs manipulate 
tarfiles. In order for the python3 programs to be really useful
they need to be able to process the tarfiles produced by python2 that 
however seems to be a problem.

This is testcode that produces a tarfile.

#! /usr/bin/python

compression = "bz2"
tarmode = "w|%s" % compression
rt = '.'

import os
import os.path
import errno

import tarfile as tar

def process():
     pj = os.path.join
     entries = os.listdir(rt)
     of = open("DUMP.tbz", "w")
     tf = tar.open(mode = tarmode, fileobj = of,
                   encoding = 'ascii', format = tar.PAX_FORMAT)
     for entry in entries:
         fqpn = pj(rt, entry)
         try:
             tf.add(fqpn, entry, recursive = False)
         except OSError as ErrInfo:
             print("%s: disappeared" % fqpn)
             if ErrInfo.errno != errno.ENOENT:
                 raise
     tf.close()
     of.close()

if __name__ == "__main__":
     process()

==============================================================================
This is testcode that checks a tarfile

#!/usr/bin/python

compression = "bz2"
tarmode = "r|%s" % compression

import os
import os.path
import stat

import tarfile as tar

def equalfile(fl1, fl2):
     bf1 = fl1.read(8192)
     bf2 = fl2.read(8192)
     while bf1 == bf2:
         if bf1 == "":
             return True
         bf1 = fl1.read(8192)
         bf2 = fl2.read(8192)
     return False

def process():
     gf = open("DUMP.tbz", "r")
     tf = tar.open(mode = tarmode, fileobj = gf,
                   encoding = 'ascii', format = tar.PAX_FORMAT)
     for tarinfo in tf:
         entry = tarinfo.name
         fileinfo = os.stat(entry)
         if stat.S_ISREG(fileinfo.st_mode) and tarinfo.isreg():
             bfl = tf.extractfile(tarinfo)
             ofl = open(entry)
             if not equalfile(bfl, ofl):
                 print("%s: does not match backup" % entry)
                 sync = False
     tf.close()
     gf.close()

if __name__ == "__main__":
     process()

=================================================================================

When I use python2.7 to produce and later check the tarfile everything 
works as expected. However when I use python3.2 to check the tarfile I
get the following traceback.

Traceback (most recent call last):
   File "tarchck", line 39, in <module>
     process()
   File "tarchck", line 25, in process
     encoding = 'ascii', format = tar.PAX_FORMAT)
   File "/usr/lib/python3.2/tarfile.py", line 1771, in open
     t = cls(name, filemode, stream, **kwargs)
   File "/usr/lib/python3.2/tarfile.py", line 1667, in __init__
     self.firstmember = self.next()
   File "/usr/lib/python3.2/tarfile.py", line 2418, in next
     tarinfo = self.tarinfo.fromtarfile(self)
   File "/usr/lib/python3.2/tarfile.py", line 1281, in fromtarfile
     buf = tarfile.fileobj.read(BLOCKSIZE)
   File "/usr/lib/python3.2/tarfile.py", line 573, in read
     buf = self._read(size)
   File "/usr/lib/python3.2/tarfile.py", line 585, in _read
     buf = self.__read(self.bufsize)
   File "/usr/lib/python3.2/tarfile.py", line 604, in __read
     buf = self.fileobj.read(self.bufsize)
   File "/usr/lib/python3.2/codecs.py", line 300, in decode
     (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x9e in position 10: 
invalid start byte

I have been looking around but have no idea how I have to adapt this 
code in order to have it process the tarfile under python3.2. The 
original code didn't have the coding and format keywords on the tar.open 
statement and after reading the documentation I thought that
would make things work, but no such luck. Further reading didn't
provide anything usefull

-- 
Antoon Pardon



More information about the Python-list mailing list