[Python-checkins] cpython: Issue #23688: Added support of arbitrary bytes-like objects and avoided

serhiy.storchaka python-checkins at python.org
Mon Mar 23 14:28:03 CET 2015


https://hg.python.org/cpython/rev/4dc69e5124f8
changeset:   95140:4dc69e5124f8
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Mon Mar 23 14:59:48 2015 +0200
summary:
  Issue #23688: Added support of arbitrary bytes-like objects and avoided
unnecessary copying of memoryview in gzip.GzipFile.write().
Original patch by Wolfgang Maier.

files:
  Doc/library/gzip.rst  |   4 +++
  Lib/gzip.py           |  19 +++++++++------
  Lib/test/test_gzip.py |  37 +++++++++++++++++++++++++++++++
  Misc/NEWS             |   4 +++
  4 files changed, 56 insertions(+), 8 deletions(-)


diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst
--- a/Doc/library/gzip.rst
+++ b/Doc/library/gzip.rst
@@ -137,6 +137,10 @@
    .. versionchanged:: 3.4
       Added support for the ``'x'`` and ``'xb'`` modes.
 
+   .. versionchanged:: 3.5
+      Added support for writing arbitrary
+      :term:`bytes-like objects <bytes-like object>`.
+
 
 .. function:: compress(data, compresslevel=9)
 
diff --git a/Lib/gzip.py b/Lib/gzip.py
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -334,17 +334,20 @@
         if self.fileobj is None:
             raise ValueError("write() on closed GzipFile object")
 
-        # Convert data type if called by io.BufferedWriter.
-        if isinstance(data, memoryview):
-            data = data.tobytes()
+        if isinstance(data, bytes):
+            length = len(data)
+        else:
+            # accept any data that supports the buffer protocol
+            data = memoryview(data)
+            length = data.nbytes
 
-        if len(data) > 0:
-            self.size = self.size + len(data)
+        if length > 0:
+            self.fileobj.write(self.compress.compress(data))
+            self.size += length
             self.crc = zlib.crc32(data, self.crc) & 0xffffffff
-            self.fileobj.write( self.compress.compress(data) )
-            self.offset += len(data)
+            self.offset += length
 
-        return len(data)
+        return length
 
     def read(self, size=-1):
         self._check_closed()
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -6,6 +6,7 @@
 import os
 import io
 import struct
+import array
 gzip = support.import_module('gzip')
 
 data1 = b"""  int length=DEFAULTALLOC, err = Z_OK;
@@ -43,6 +44,14 @@
 
 
 class TestGzip(BaseTest):
+    def write_and_read_back(self, data, mode='b'):
+        b_data = bytes(data)
+        with gzip.GzipFile(self.filename, 'w'+mode) as f:
+            l = f.write(data)
+        self.assertEqual(l, len(b_data))
+        with gzip.GzipFile(self.filename, 'r'+mode) as f:
+            self.assertEqual(f.read(), b_data)
+
     def test_write(self):
         with gzip.GzipFile(self.filename, 'wb') as f:
             f.write(data1 * 50)
@@ -57,6 +66,34 @@
         # Test multiple close() calls.
         f.close()
 
+    # The following test_write_xy methods test that write accepts
+    # the corresponding bytes-like object type as input
+    # and that the data written equals bytes(xy) in all cases.
+    def test_write_memoryview(self):
+        self.write_and_read_back(memoryview(data1 * 50))
+        m = memoryview(bytes(range(256)))
+        data = m.cast('B', shape=[8,8,4])
+        self.write_and_read_back(data)
+
+    def test_write_bytearray(self):
+        self.write_and_read_back(bytearray(data1 * 50))
+
+    def test_write_array(self):
+        self.write_and_read_back(array.array('I', data1 * 40))
+
+    def test_write_incompatible_type(self):
+        # Test that non-bytes-like types raise TypeError.
+        # Issue #21560: attempts to write incompatible types
+        # should not affect the state of the fileobject
+        with gzip.GzipFile(self.filename, 'wb') as f:
+            with self.assertRaises(TypeError):
+                f.write('')
+            with self.assertRaises(TypeError):
+                f.write([])
+            f.write(data1)
+        with gzip.GzipFile(self.filename, 'rb') as f:
+            self.assertEqual(f.read(), data1)
+
     def test_read(self):
         self.test_write()
         # Try reading.
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -23,6 +23,10 @@
 Library
 -------
 
+- Issue #23688: Added support of arbitrary bytes-like objects and avoided
+  unnecessary copying of memoryview in gzip.GzipFile.write().
+  Original patch by Wolfgang Maier.
+
 - Issue #23252:  Added support for writing ZIP files to unseekable streams.
 
 - Issue #21526: Tkinter now supports new boolean type in Tcl 8.5.

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list