[Python-checkins] cpython: Add a function lzma.open(), to match gzip.open() and bz2.open().

nadeem.vawda python-checkins at python.org
Mon Jun 4 23:55:57 CEST 2012


http://hg.python.org/cpython/rev/3d82ced09043
changeset:   77358:3d82ced09043
user:        Nadeem Vawda <nadeem.vawda at gmail.com>
date:        Mon Jun 04 23:38:12 2012 +0200
summary:
  Add a function lzma.open(), to match gzip.open() and bz2.open().

files:
  Doc/library/lzma.rst  |   29 ++++++++
  Lib/lzma.py           |   50 ++++++++++++++-
  Lib/test/test_lzma.py |  101 ++++++++++++++++++++++++++++++
  Misc/NEWS             |    4 +-
  4 files changed, 180 insertions(+), 4 deletions(-)


diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst
--- a/Doc/library/lzma.rst
+++ b/Doc/library/lzma.rst
@@ -29,6 +29,35 @@
 Reading and writing compressed files
 ------------------------------------
 
+.. function:: open(filename, mode="rb", \*, format=None, check=-1, preset=None, filters=None, encoding=None, errors=None, newline=None)
+
+   Open an LZMA-compressed file in binary or text mode, returning a :term:`file
+   object`.
+
+   The *filename* argument can be either an actual file name (given as a
+   :class:`str` or :class:`bytes` object), in which case the named file is
+   opened, or it can be an existing file object to read from or write to.
+
+   The *mode* argument can be any of ``"r"``, ``"rb"``, ``"w"``, ``"wb"``,
+   ``"a"`` or ``"ab"`` for binary mode, or ``"rt"``, ``"wt"``, or ``"at"`` for
+   text mode. The default is ``"rb"``.
+
+   When opening a file for reading, the *format* and *filters* arguments have
+   the same meanings as for :class:`LZMADecompressor`. In this case, the *check*
+   and *preset* arguments should not be used.
+
+   When opening a file for writing, the *format*, *check*, *preset* and
+   *filters* arguments have the same meanings as for :class:`LZMACompressor`.
+
+   For binary mode, this function is equivalent to the :class:`LZMAFile`
+   constructor: ``LZMAFile(filename, mode, ...)``. In this case, the *encoding*,
+   *errors* and *newline* arguments must not be provided.
+
+   For text mode, a :class:`LZMAFile` object is created, and wrapped in an
+   :class:`io.TextIOWrapper` instance with the specified encoding, error
+   handling behavior, and line ending(s).
+
+
 .. class:: LZMAFile(filename=None, mode="r", \*, format=None, check=-1, preset=None, filters=None)
 
    Open an LZMA-compressed file in binary mode.
diff --git a/Lib/lzma.py b/Lib/lzma.py
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@@ -18,10 +18,11 @@
     "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
 
     "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
-    "compress", "decompress", "is_check_supported",
+    "open", "compress", "decompress", "is_check_supported",
     "encode_filter_properties", "decode_filter_properties",
 ]
 
+import builtins
 import io
 from _lzma import *
 
@@ -122,7 +123,7 @@
         if isinstance(filename, (str, bytes)):
             if "b" not in mode:
                 mode += "b"
-            self._fp = open(filename, mode)
+            self._fp = builtins.open(filename, mode)
             self._closefp = True
             self._mode = mode_code
         elif hasattr(filename, "read") or hasattr(filename, "write"):
@@ -370,6 +371,51 @@
         return self._pos
 
 
+def open(filename, mode="rb", *,
+         format=None, check=-1, preset=None, filters=None,
+         encoding=None, errors=None, newline=None):
+    """Open an LZMA-compressed file in binary or text mode.
+
+    filename can be either an actual file name (given as a str or bytes object),
+    in which case the named file is opened, or it can be an existing file object
+    to read from or write to.
+
+    The mode argument can be "r", "rb" (default), "w", "wb", "a", or "ab" for
+    binary mode, or "rt", "wt" or "at" for text mode.
+
+    The format, check, preset and filters arguments specify the compression
+    settings, as for LZMACompressor, LZMADecompressor and LZMAFile.
+
+    For binary mode, this function is equivalent to the LZMAFile constructor:
+    LZMAFile(filename, mode, ...). In this case, the encoding, errors and
+    newline arguments must not be provided.
+
+    For text mode, a LZMAFile object is created, and wrapped in an
+    io.TextIOWrapper instance with the specified encoding, error handling
+    behavior, and line ending(s).
+
+    """
+    if "t" in mode:
+        if "b" in mode:
+            raise ValueError("Invalid mode: %r" % (mode,))
+    else:
+        if encoding is not None:
+            raise ValueError("Argument 'encoding' not supported in binary mode")
+        if errors is not None:
+            raise ValueError("Argument 'errors' not supported in binary mode")
+        if newline is not None:
+            raise ValueError("Argument 'newline' not supported in binary mode")
+
+    lz_mode = mode.replace("t", "")
+    binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
+                           preset=preset, filters=filters)
+
+    if "t" in mode:
+        return io.TextIOWrapper(binary_file, encoding, errors, newline)
+    else:
+        return binary_file
+
+
 def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
     """Compress a block of data.
 
diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@@ -935,6 +935,106 @@
         self.assertRaises(ValueError, f.tell)
 
 
+class OpenTestCase(unittest.TestCase):
+
+    def test_binary_modes(self):
+        with lzma.open(BytesIO(COMPRESSED_XZ), "rb") as f:
+            self.assertEqual(f.read(), INPUT)
+        with BytesIO() as bio:
+            with lzma.open(bio, "wb") as f:
+                f.write(INPUT)
+            file_data = lzma.decompress(bio.getvalue())
+            self.assertEqual(file_data, INPUT)
+            with lzma.open(bio, "ab") as f:
+                f.write(INPUT)
+            file_data = lzma.decompress(bio.getvalue())
+            self.assertEqual(file_data, INPUT * 2)
+
+    def test_text_modes(self):
+        uncompressed = INPUT.decode("ascii")
+        uncompressed_raw = uncompressed.replace("\n", os.linesep)
+        with lzma.open(BytesIO(COMPRESSED_XZ), "rt") as f:
+            self.assertEqual(f.read(), uncompressed)
+        with BytesIO() as bio:
+            with lzma.open(bio, "wt") as f:
+                f.write(uncompressed)
+            file_data = lzma.decompress(bio.getvalue()).decode("ascii")
+            self.assertEqual(file_data, uncompressed_raw)
+            with lzma.open(bio, "at") as f:
+                f.write(uncompressed)
+            file_data = lzma.decompress(bio.getvalue()).decode("ascii")
+            self.assertEqual(file_data, uncompressed_raw * 2)
+
+    def test_filename(self):
+        with TempFile(TESTFN):
+            with lzma.open(TESTFN, "wb") as f:
+                f.write(INPUT)
+            with open(TESTFN, "rb") as f:
+                file_data = lzma.decompress(f.read())
+                self.assertEqual(file_data, INPUT)
+            with lzma.open(TESTFN, "rb") as f:
+                self.assertEqual(f.read(), INPUT)
+            with lzma.open(TESTFN, "ab") as f:
+                f.write(INPUT)
+            with lzma.open(TESTFN, "rb") as f:
+                self.assertEqual(f.read(), INPUT * 2)
+
+    def test_bad_params(self):
+        # Test invalid parameter combinations.
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "x")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "rbt")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "rb", encoding="utf-8")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "rb", errors="ignore")
+        with self.assertRaises(ValueError):
+            lzma.open(TESTFN, "rb", newline="\n")
+
+    def test_format_and_filters(self):
+        # Test non-default format and filter chain.
+        options = {"format": lzma.FORMAT_RAW, "filters": FILTERS_RAW_1}
+        with lzma.open(BytesIO(COMPRESSED_RAW_1), "rb", **options) as f:
+            self.assertEqual(f.read(), INPUT)
+        with BytesIO() as bio:
+            with lzma.open(bio, "wb", **options) as f:
+                f.write(INPUT)
+            file_data = lzma.decompress(bio.getvalue(), **options)
+            self.assertEqual(file_data, INPUT)
+
+    def test_encoding(self):
+        # Test non-default encoding.
+        uncompressed = INPUT.decode("ascii")
+        uncompressed_raw = uncompressed.replace("\n", os.linesep)
+        with BytesIO() as bio:
+            with lzma.open(bio, "wt", encoding="utf-16-le") as f:
+                f.write(uncompressed)
+            file_data = lzma.decompress(bio.getvalue()).decode("utf-16-le")
+            self.assertEqual(file_data, uncompressed_raw)
+            bio.seek(0)
+            with lzma.open(bio, "rt", encoding="utf-16-le") as f:
+                self.assertEqual(f.read(), uncompressed)
+
+    def test_encoding_error_handler(self):
+        # Test wih non-default encoding error handler.
+        with BytesIO(lzma.compress(b"foo\xffbar")) as bio:
+            with lzma.open(bio, "rt", encoding="ascii", errors="ignore") as f:
+                self.assertEqual(f.read(), "foobar")
+
+    def test_newline(self):
+        # Test with explicit newline (universal newline mode disabled).
+        text = INPUT.decode("ascii")
+        with BytesIO() as bio:
+            with lzma.open(bio, "wt", newline="\n") as f:
+                f.write(text)
+            bio.seek(0)
+            with lzma.open(bio, "rt", newline="\r") as f:
+                self.assertEqual(f.readlines(), [text])
+
+
 class MiscellaneousTestCase(unittest.TestCase):
 
     def test_is_check_supported(self):
@@ -1385,6 +1485,7 @@
         CompressorDecompressorTestCase,
         CompressDecompressFunctionTestCase,
         FileTestCase,
+        OpenTestCase,
         MiscellaneousTestCase,
     )
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -17,8 +17,8 @@
 
 - LZMAFile now accepts the modes "rb"/"wb"/"ab" as synonyms of "r"/"w"/"a".
 
-- The bz2 module now contains an open() function, allowing compressed files to
-  conveniently be opened in text mode as well as binary mode.
+- The bz2 and lzma modules now each contain an open() function, allowing
+  compressed files to readily be opened in text mode as well as binary mode.
 
 - BZ2File.__init__() and LZMAFile.__init__() now accept a file object as their
   first argument, rather than requiring a separate "fileobj" argument.

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list