[Python-checkins] cpython: Closes #13989: Add support for text modes to gzip.open().

nadeem.vawda python-checkins at python.org
Sun May 6 15:11:07 CEST 2012


http://hg.python.org/cpython/rev/55202ca694d7
changeset:   76789:55202ca694d7
user:        Nadeem Vawda <nadeem.vawda at gmail.com>
date:        Sun May 06 15:04:01 2012 +0200
summary:
  Closes #13989: Add support for text modes to gzip.open().

Also, add tests for gzip.open().

files:
  Doc/library/gzip.rst  |  46 +++++++++++----
  Lib/gzip.py           |  42 +++++++++++---
  Lib/test/test_gzip.py |  88 +++++++++++++++++++++++++++++++
  Misc/NEWS             |   2 +
  4 files changed, 157 insertions(+), 21 deletions(-)


diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst
--- a/Doc/library/gzip.rst
+++ b/Doc/library/gzip.rst
@@ -13,9 +13,11 @@
 
 The data compression is provided by the :mod:`zlib` module.
 
-The :mod:`gzip` module provides the :class:`GzipFile` class. The :class:`GzipFile`
-class reads and writes :program:`gzip`\ -format files, automatically compressing
-or decompressing the data so that it looks like an ordinary :term:`file object`.
+The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the
+:func:`gzip.open`, :func:`compress` and :func:`decompress` convenience
+functions. The :class:`GzipFile` class reads and writes :program:`gzip`\ -format
+files, automatically compressing or decompressing the data so that it looks like
+an ordinary :term:`file object`.
 
 Note that additional file formats which can be decompressed by the
 :program:`gzip` and :program:`gunzip` programs, such  as those produced by
@@ -24,6 +26,32 @@
 The module defines the following items:
 
 
+.. function:: open(filename, mode='rb', compresslevel=9, encoding=None, errors=None, newline=None)
+
+   Open *filename* as a gzip-compressed file in binary or text mode.
+
+   Returns a :term:`file object`.
+
+   The *mode* argument can be any of ``'r'``, ``'rb'``, ``'a'``, ``'ab'``,
+   ``'w'``, or ``'wb'`` for binary mode, or ``'rt'``, ``'at'``, or ``'wt'`` for
+   text mode. The default is ``'rb'``.
+
+   The *compresslevel* argument is an integer from 1 to 9, as for the
+   :class:`GzipFile` constructor.
+
+   For binary mode, this function is equivalent to the :class:`GzipFile`
+   constructor: ``GzipFile(filename, mode, compresslevel)``. In this case, the
+   *encoding*, *errors* and *newline* arguments must not be provided.
+
+   For text mode, a :class:`GzipFile` object is created, and wrapped in an
+   :class:`io.TextIOWrapper` instance with the specified encoding, error
+   handling behavior, and line ending(s).
+
+   .. versionchanged:: 3.3
+      Support for text mode was added, along with the *encoding*, *errors* and
+      *newline* arguments.
+
+
 .. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None)
 
    Constructor for the :class:`GzipFile` class, which simulates most of the
@@ -46,9 +74,9 @@
    or ``'wb'``, depending on whether the file will be read or written.  The default
    is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``.
 
-   Note that the file is always opened in binary mode; text mode is not
-   supported. If you need to read a compressed file in text mode, wrap your
-   :class:`GzipFile` with an :class:`io.TextIOWrapper`.
+   Note that the file is always opened in binary mode. To open a compressed file
+   in text mode, use :func:`gzip.open` (or wrap your :class:`GzipFile` with an
+   :class:`io.TextIOWrapper`).
 
    The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the
    level of compression; ``1`` is fastest and produces the least compression, and
@@ -97,12 +125,6 @@
       The :meth:`io.BufferedIOBase.read1` method is now implemented.
 
 
-.. function:: open(filename, mode='rb', compresslevel=9)
-
-   This is a shorthand for ``GzipFile(filename,`` ``mode,`` ``compresslevel)``.
-   The *filename* argument is required; *mode* defaults to ``'rb'`` and
-   *compresslevel* defaults to ``9``.
-
 .. function:: compress(data, compresslevel=9)
 
    Compress the *data*, returning a :class:`bytes` object containing
diff --git a/Lib/gzip.py b/Lib/gzip.py
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -16,6 +16,39 @@
 
 READ, WRITE = 1, 2
 
+def open(filename, mode="rb", compresslevel=9,
+         encoding=None, errors=None, newline=None):
+    """Open a gzip-compressed file in binary or text mode.
+
+    The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode,
+    or "rt", "wt" or "at" for text mode. The default mode is "rb", and the
+    default compresslevel is 9.
+
+    For binary mode, this function is equivalent to the GzipFile constructor:
+    GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
+    and newline arguments must not be provided.
+
+    For text mode, a GzipFile object is created, and wrapped in an
+    io.TextIOWrapper instance with the specified encoding, error handling
+    behavior, and line ending(s).
+
+    """
+    if "t" in mode:
+        if "b" in mode:
+            raise ValueError("Invalid mode: %r" % (mode,))
+    else:
+        if encoding is not None:
+            raise ValueError("Argument 'encoding' not supported in binary mode")
+        if errors is not None:
+            raise ValueError("Argument 'errors' not supported in binary mode")
+        if newline is not None:
+            raise ValueError("Argument 'newline' not supported in binary mode")
+    binary_file = GzipFile(filename, mode.replace("t", ""), compresslevel)
+    if "t" in mode:
+        return io.TextIOWrapper(binary_file, encoding, errors, newline)
+    else:
+        return binary_file
+
 def write32u(output, value):
     # The L format writes the bit pattern correctly whether signed
     # or unsigned.
@@ -24,15 +57,6 @@
 def read32(input):
     return struct.unpack("<I", input.read(4))[0]
 
-def open(filename, mode="rb", compresslevel=9):
-    """Shorthand for GzipFile(filename, mode, compresslevel).
-
-    The filename argument is required; mode defaults to 'rb'
-    and compresslevel defaults to 9.
-
-    """
-    return GzipFile(filename, mode, compresslevel)
-
 class _PaddedFile:
     """Minimal read-only file object that prepends a string to the contents
     of an actual file. Shouldn't be used outside of gzip.py, as it lacks
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -374,6 +374,94 @@
             datac = gzip.compress(data)
             self.assertEqual(gzip.decompress(datac), data)
 
+    # Test the 'open' convenience function.
+
+    def test_open_binary(self):
+        # Test explicit binary modes.
+        uncompressed = data1 * 50
+        with gzip.open(self.filename, "wb") as f:
+            f.write(uncompressed)
+        with open(self.filename, "rb") as f:
+            file_data = gzip.decompress(f.read())
+            self.assertEqual(file_data, uncompressed)
+        with gzip.open(self.filename, "rb") as f:
+            self.assertEqual(f.read(), uncompressed)
+        with gzip.open(self.filename, "ab") as f:
+            f.write(uncompressed)
+        with open(self.filename, "rb") as f:
+            file_data = gzip.decompress(f.read())
+            self.assertEqual(file_data, uncompressed * 2)
+
+    def test_open_default_binary(self):
+        # Test implicit binary modes (no "b" or "t" in mode string).
+        uncompressed = data1 * 50
+        with gzip.open(self.filename, "w") as f:
+            f.write(uncompressed)
+        with open(self.filename, "rb") as f:
+            file_data = gzip.decompress(f.read())
+            self.assertEqual(file_data, uncompressed)
+        with gzip.open(self.filename, "r") as f:
+            self.assertEqual(f.read(), uncompressed)
+        with gzip.open(self.filename, "a") as f:
+            f.write(uncompressed)
+        with open(self.filename, "rb") as f:
+            file_data = gzip.decompress(f.read())
+            self.assertEqual(file_data, uncompressed * 2)
+
+    def test_open_text(self):
+        # Test text modes.
+        uncompressed = data1.decode("ascii") * 50
+        with gzip.open(self.filename, "wt") as f:
+            f.write(uncompressed)
+        with open(self.filename, "rb") as f:
+            file_data = gzip.decompress(f.read()).decode("ascii")
+            self.assertEqual(file_data, uncompressed)
+        with gzip.open(self.filename, "rt") as f:
+            self.assertEqual(f.read(), uncompressed)
+        with gzip.open(self.filename, "at") as f:
+            f.write(uncompressed)
+        with open(self.filename, "rb") as f:
+            file_data = gzip.decompress(f.read()).decode("ascii")
+            self.assertEqual(file_data, uncompressed * 2)
+
+    def test_open_bad_params(self):
+        # Test invalid parameter combinations.
+        with self.assertRaises(ValueError):
+            gzip.open(self.filename, "wbt")
+        with self.assertRaises(ValueError):
+            gzip.open(self.filename, "rb", encoding="utf-8")
+        with self.assertRaises(ValueError):
+            gzip.open(self.filename, "rb", errors="ignore")
+        with self.assertRaises(ValueError):
+            gzip.open(self.filename, "rb", newline="\n")
+
+    def test_open_with_encoding(self):
+        # Test non-default encoding.
+        uncompressed = data1.decode("ascii") * 50
+        with gzip.open(self.filename, "wt", encoding="utf-16") as f:
+            f.write(uncompressed)
+        with open(self.filename, "rb") as f:
+            file_data = gzip.decompress(f.read()).decode("utf-16")
+            self.assertEqual(file_data, uncompressed)
+        with gzip.open(self.filename, "rt", encoding="utf-16") as f:
+            self.assertEqual(f.read(), uncompressed)
+
+    def test_open_with_encoding_error_handler(self):
+        # Test with non-default encoding error handler.
+        with gzip.open(self.filename, "wb") as f:
+            f.write(b"foo\xffbar")
+        with gzip.open(self.filename, "rt", encoding="ascii", errors="ignore") \
+                as f:
+            self.assertEqual(f.read(), "foobar")
+
+    def test_open_with_newline(self):
+        # Test with explicit newline (universal newline mode disabled).
+        uncompressed = data1.decode("ascii") * 50
+        with gzip.open(self.filename, "wt") as f:
+            f.write(uncompressed)
+        with gzip.open(self.filename, "rt", newline="\r") as f:
+            self.assertEqual(f.readlines(), [uncompressed])
+
 def test_main(verbose=None):
     support.run_unittest(TestGzip)
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -17,6 +17,8 @@
 Library
 -------
 
+- Issue #13989: Add support for text mode to gzip.open().
+
 - Issue #14127: The os.stat() result object now provides three additional
   fields: st_ctime_ns, st_mtime_ns, and st_atime_ns, providing those times as an
   integer with nanosecond resolution.  The functions os.utime(), os.lutimes(),

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list