[Python-checkins] cpython (2.7): Issue #13664: GzipFile now supports non-ascii Unicode filenames.

serhiy.storchaka python-checkins at python.org
Sun Oct 12 21:25:47 CEST 2014


https://hg.python.org/cpython/rev/272c78c9c47e
changeset:   93009:272c78c9c47e
branch:      2.7
parent:      93006:ff59b0f9e142
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sun Oct 12 22:23:28 2014 +0300
summary:
  Issue #13664: GzipFile now supports non-ascii Unicode filenames.

files:
  Lib/gzip.py           |  13 ++++++++++---
  Lib/test/test_gzip.py |  11 +++++++++++
  Misc/NEWS             |   2 ++
  3 files changed, 23 insertions(+), 3 deletions(-)


diff --git a/Lib/gzip.py b/Lib/gzip.py
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -164,9 +164,16 @@
     def _write_gzip_header(self):
         self.fileobj.write('\037\213')             # magic header
         self.fileobj.write('\010')                 # compression method
-        fname = os.path.basename(self.name)
-        if fname.endswith(".gz"):
-            fname = fname[:-3]
+        try:
+            # RFC 1952 requires the FNAME field to be Latin-1. Do not
+            # include filenames that cannot be represented that way.
+            fname = os.path.basename(self.name)
+            if not isinstance(fname, str):
+                fname = fname.encode('latin-1')
+            if fname.endswith('.gz'):
+                fname = fname[:-3]
+        except UnicodeEncodeError:
+            fname = ''
         flags = 0
         if fname:
             flags = FNAME
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -30,6 +30,17 @@
     def tearDown(self):
         test_support.unlink(self.filename)
 
+    @test_support.requires_unicode
+    def test_unicode_filename(self):
+        unicode_filename = test_support.TESTFN_UNICODE
+        with gzip.GzipFile(unicode_filename, "wb") as f:
+            f.write(data1 * 50)
+        with gzip.GzipFile(unicode_filename, "rb") as f:
+            self.assertEqual(f.read(), data1 * 50)
+        # Sanity check that we are actually operating on the right file.
+        with open(unicode_filename, 'rb') as fobj, \
+             gzip.GzipFile(fileobj=fobj, mode="rb") as f:
+            self.assertEqual(f.read(), data1 * 50)
 
     def test_write(self):
         with gzip.GzipFile(self.filename, 'wb') as f:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -37,6 +37,8 @@
 Library
 -------
 
+- Issue #13664: GzipFile now supports non-ascii Unicode filenames.
+
 - Issue #13096: Fixed segfault in CTypes POINTER handling of large
   values.
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list