[Python-checkins] r53153 - in python/trunk: Lib/tarfile.py Lib/test/test_tarfile.py Misc/NEWS

lars.gustaebel python-checkins at python.org
Sat Dec 23 17:40:14 CET 2006


Author: lars.gustaebel
Date: Sat Dec 23 17:40:13 2006
New Revision: 53153

Modified:
   python/trunk/Lib/tarfile.py
   python/trunk/Lib/test/test_tarfile.py
   python/trunk/Misc/NEWS
Log:
Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell()
work correctly together with readline().

Will backport to 2.5.


Modified: python/trunk/Lib/tarfile.py
==============================================================================
--- python/trunk/Lib/tarfile.py	(original)
+++ python/trunk/Lib/tarfile.py	Sat Dec 23 17:40:13 2006
@@ -628,140 +628,194 @@
 #------------------------
 # Extraction file object
 #------------------------
-class ExFileObject(object):
-    """File-like object for reading an archive member.
-       Is returned by TarFile.extractfile(). Support for
-       sparse files included.
+class _FileInFile(object):
+    """A thin wrapper around an existing file object that
+       provides a part of its data as an individual file
+       object.
     """
 
-    def __init__(self, tarfile, tarinfo):
-        self.fileobj = tarfile.fileobj
-        self.name    = tarinfo.name
-        self.mode    = "r"
-        self.closed  = False
-        self.offset  = tarinfo.offset_data
-        self.size    = tarinfo.size
-        self.pos     = 0L
-        self.linebuffer = ""
-        if tarinfo.issparse():
-            self.sparse = tarinfo.sparse
-            self.read = self._readsparse
-        else:
-            self.read = self._readnormal
+    def __init__(self, fileobj, offset, size, sparse=None):
+        self.fileobj = fileobj
+        self.offset = offset
+        self.size = size
+        self.sparse = sparse
+        self.position = 0
 
-    def __read(self, size):
-        """Overloadable read method.
+    def tell(self):
+        """Return the current file position.
         """
-        return self.fileobj.read(size)
-
-    def readline(self, size=-1):
-        """Read a line with approx. size. If size is negative,
-           read a whole line. readline() and read() must not
-           be mixed up (!).
-        """
-        if size < 0:
-            size = sys.maxint
-
-        nl = self.linebuffer.find("\n")
-        if nl >= 0:
-            nl = min(nl, size)
-        else:
-            size -= len(self.linebuffer)
-            while (nl < 0 and size > 0):
-                buf = self.read(min(size, 100))
-                if not buf:
-                    break
-                self.linebuffer += buf
-                size -= len(buf)
-                nl = self.linebuffer.find("\n")
-            if nl == -1:
-                s = self.linebuffer
-                self.linebuffer = ""
-                return s
-        buf = self.linebuffer[:nl]
-        self.linebuffer = self.linebuffer[nl + 1:]
-        while buf[-1:] == "\r":
-            buf = buf[:-1]
-        return buf + "\n"
+        return self.position
 
-    def readlines(self):
-        """Return a list with all (following) lines.
+    def seek(self, position):
+        """Seek to a position in the file.
         """
-        result = []
-        while True:
-            line = self.readline()
-            if not line: break
-            result.append(line)
-        return result
+        self.position = position
 
-    def _readnormal(self, size=None):
-        """Read operation for regular files.
+    def read(self, size=None):
+        """Read data from the file.
         """
-        if self.closed:
-            raise ValueError("file is closed")
-        self.fileobj.seek(self.offset + self.pos)
-        bytesleft = self.size - self.pos
         if size is None:
-            bytestoread = bytesleft
+            size = self.size - self.position
         else:
-            bytestoread = min(size, bytesleft)
-        self.pos += bytestoread
-        return self.__read(bytestoread)
+            size = min(size, self.size - self.position)
 
-    def _readsparse(self, size=None):
-        """Read operation for sparse files.
-        """
-        if self.closed:
-            raise ValueError("file is closed")
+        if self.sparse is None:
+            return self.readnormal(size)
+        else:
+            return self.readsparse(size)
 
-        if size is None:
-            size = self.size - self.pos
+    def readnormal(self, size):
+        """Read operation for regular files.
+        """
+        self.fileobj.seek(self.offset + self.position)
+        self.position += size
+        return self.fileobj.read(size)
 
+    def readsparse(self, size):
+        """Read operation for sparse files.
+        """
         data = []
         while size > 0:
-            buf = self._readsparsesection(size)
+            buf = self.readsparsesection(size)
             if not buf:
                 break
             size -= len(buf)
             data.append(buf)
         return "".join(data)
 
-    def _readsparsesection(self, size):
+    def readsparsesection(self, size):
         """Read a single section of a sparse file.
         """
-        section = self.sparse.find(self.pos)
+        section = self.sparse.find(self.position)
 
         if section is None:
             return ""
 
-        toread = min(size, section.offset + section.size - self.pos)
+        size = min(size, section.offset + section.size - self.position)
+
         if isinstance(section, _data):
-            realpos = section.realpos + self.pos - section.offset
-            self.pos += toread
+            realpos = section.realpos + self.position - section.offset
             self.fileobj.seek(self.offset + realpos)
-            return self.__read(toread)
+            self.position += size
+            return self.fileobj.read(size)
         else:
-            self.pos += toread
-            return NUL * toread
+            self.position += size
+            return NUL * size
+#class _FileInFile
+
+
+class ExFileObject(object):
+    """File-like object for reading an archive member.
+       Is returned by TarFile.extractfile().
+    """
+    blocksize = 1024
+
+    def __init__(self, tarfile, tarinfo):
+        self.fileobj = _FileInFile(tarfile.fileobj,
+                                   tarinfo.offset_data,
+                                   tarinfo.size,
+                                   getattr(tarinfo, "sparse", None))
+        self.name = tarinfo.name
+        self.mode = "r"
+        self.closed = False
+        self.size = tarinfo.size
+
+        self.position = 0
+        self.buffer = ""
+
+    def read(self, size=None):
+        """Read at most size bytes from the file. If size is not
+           present or None, read all data until EOF is reached.
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        buf = ""
+        if self.buffer:
+            if size is None:
+                buf = self.buffer
+                self.buffer = ""
+            else:
+                buf = self.buffer[:size]
+                self.buffer = self.buffer[size:]
+
+        if size is None:
+            buf += self.fileobj.read()
+        else:
+            buf += self.fileobj.read(size - len(buf))
+
+        self.position += len(buf)
+        return buf
+
+    def readline(self, size=-1):
+        """Read one entire line from the file. If size is present
+           and non-negative, return a string with at most that
+           size, which may be an incomplete line.
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        if "\n" in self.buffer:
+            pos = self.buffer.find("\n") + 1
+        else:
+            buffers = [self.buffer]
+            while True:
+                buf = self.fileobj.read(self.blocksize)
+                buffers.append(buf)
+                if not buf or "\n" in buf:
+                    self.buffer = "".join(buffers)
+                    pos = self.buffer.find("\n") + 1
+                    if pos == 0:
+                        # no newline found.
+                        pos = len(self.buffer)
+                    break
+
+        if size != -1:
+            pos = min(size, pos)
+
+        buf = self.buffer[:pos]
+        self.buffer = self.buffer[pos:]
+        self.position += len(buf)
+        return buf
+
+    def readlines(self):
+        """Return a list with all remaining lines.
+        """
+        result = []
+        while True:
+            line = self.readline()
+            if not line: break
+            result.append(line)
+        return result
 
     def tell(self):
         """Return the current file position.
         """
-        return self.pos
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
 
-    def seek(self, pos, whence=0):
+        return self.position
+
+    def seek(self, pos, whence=os.SEEK_SET):
         """Seek to a position in the file.
         """
-        self.linebuffer = ""
-        if whence == 0:
-            self.pos = min(max(pos, 0), self.size)
-        if whence == 1:
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        if whence == os.SEEK_SET:
+            self.position = min(max(pos, 0), self.size)
+        elif whence == os.SEEK_CUR:
             if pos < 0:
-                self.pos = max(self.pos + pos, 0)
+                self.position = max(self.position + pos, 0)
             else:
-                self.pos = min(self.pos + pos, self.size)
-        if whence == 2:
-            self.pos = max(min(self.size + pos, self.size), 0)
+                self.position = min(self.position + pos, self.size)
+        elif whence == os.SEEK_END:
+            self.position = max(min(self.size + pos, self.size), 0)
+        else:
+            raise ValueError("Invalid argument")
+
+        self.buffer = ""
+        self.fileobj.seek(self.position)
 
     def close(self):
         """Close the file object.
@@ -769,20 +823,13 @@
         self.closed = True
 
     def __iter__(self):
-        """Get an iterator over the file object.
+        """Get an iterator over the file's lines.
         """
-        if self.closed:
-            raise ValueError("I/O operation on closed file")
-        return self
-
-    def next(self):
-        """Get the next item from the file iterator.
-        """
-        result = self.readline()
-        if not result:
-            raise StopIteration
-        return result
-
+        while True:
+            line = self.readline()
+            if not line:
+                break
+            yield line
 #class ExFileObject
 
 #------------------

Modified: python/trunk/Lib/test/test_tarfile.py
==============================================================================
--- python/trunk/Lib/test/test_tarfile.py	(original)
+++ python/trunk/Lib/test/test_tarfile.py	Sat Dec 23 17:40:13 2006
@@ -110,7 +110,7 @@
         """Test seek() method of _FileObject, incl. random reading.
         """
         if self.sep != "|":
-            filename = "0-REGTYPE"
+            filename = "0-REGTYPE-TEXT"
             self.tar.extract(filename, dirname())
             f = open(os.path.join(dirname(), filename), "rb")
             data = f.read()
@@ -149,6 +149,16 @@
             s2 = fobj.readlines()
             self.assert_(s1 == s2,
                          "readlines() after seek failed")
+            fobj.seek(0)
+            self.assert_(len(fobj.readline()) == fobj.tell(),
+                         "tell() after readline() failed")
+            fobj.seek(512)
+            self.assert_(len(fobj.readline()) + 512 == fobj.tell(),
+                         "tell() after seek() and readline() failed")
+            fobj.seek(0)
+            line = fobj.readline()
+            self.assert_(fobj.read() == data[len(line):],
+                         "read() after readline() failed")
             fobj.close()
 
     def test_old_dirtype(self):

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sat Dec 23 17:40:13 2006
@@ -103,6 +103,9 @@
 Library
 -------
 
+- Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell()
+  work correctly together with readline().
+
 - Patch #1484695: The tarfile module now raises a HeaderError exception
   if a buffer given to frombuf() is invalid.
 


More information about the Python-checkins mailing list