[Python-checkins] cpython (2.7): Issue #9291 Do not attempt to re-encode mimetype data read from registry in

tim.golden python-checkins at python.org
Sun Apr 27 17:37:07 CEST 2014


http://hg.python.org/cpython/rev/18cfc2a42772
changeset:   90472:18cfc2a42772
branch:      2.7
user:        Tim Golden <mail at timgolden.me.uk>
date:        Sun Apr 27 16:36:47 2014 +0100
summary:
  Issue #9291 Do not attempt to re-encode mimetype data read from registry in ANSI mode. Initial patches by Dmitry Jemerov & Vladimir Iofik

files:
  Lib/mimetypes.py           |  38 +++++++------------
  Lib/test/test_mimetypes.py |  50 ++++++++++++++++++++++---
  2 files changed, 58 insertions(+), 30 deletions(-)


diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -242,38 +242,28 @@
             i = 0
             while True:
                 try:
-                    ctype = _winreg.EnumKey(mimedb, i)
+                    yield _winreg.EnumKey(mimedb, i)
                 except EnvironmentError:
                     break
-                try:
-                    ctype = ctype.encode(default_encoding) # omit in 3.x!
-                except UnicodeEncodeError:
-                    pass
-                else:
-                    yield ctype
                 i += 1
 
-        default_encoding = sys.getdefaultencoding()
         with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
             for subkeyname in enum_types(hkcr):
-                try:
-                    with _winreg.OpenKey(hkcr, subkeyname) as subkey:
-                        # Only check file extensions
-                        if not subkeyname.startswith("."):
-                            continue
-                        # raises EnvironmentError if no 'Content Type' value
+                # Only check file extensions, not all possible classes
+                if not subkeyname.startswith("."):
+                    continue
+
+                with _winreg.OpenKey(hkcr, subkeyname) as subkey:
+                    # If there is no "Content Type" value, or if it is not
+                    # a simple string, simply skip
+                    try:
                         mimetype, datatype = _winreg.QueryValueEx(
                             subkey, 'Content Type')
-                        if datatype != _winreg.REG_SZ:
-                            continue
-                        try:
-                            mimetype = mimetype.encode(default_encoding)
-                            subkeyname = subkeyname.encode(default_encoding)
-                        except UnicodeEncodeError:
-                            continue
-                        self.add_type(mimetype, subkeyname, strict)
-                except EnvironmentError:
-                    continue
+                    except EnvironmentError:
+                        continue
+                    if datatype != _winreg.REG_SZ:
+                        continue
+                    self.add_type(mimetype, subkeyname, strict)
 
 def guess_type(url, strict=True):
     """Guess the type of a file based on its URL.
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -71,8 +71,6 @@
         # ensure all entries actually come from the Windows registry
         self.original_types_map = mimetypes.types_map.copy()
         mimetypes.types_map.clear()
-        mimetypes.init()
-        self.db = mimetypes.MimeTypes()
 
     def tearDown(self):
         # restore default settings
@@ -84,14 +82,54 @@
         # Windows registry is undocumented AFAIK.
         # Use file types that should *always* exist:
         eq = self.assertEqual
-        eq(self.db.guess_type("foo.txt"), ("text/plain", None))
-        eq(self.db.guess_type("image.jpg"), ("image/jpeg", None))
-        eq(self.db.guess_type("image.png"), ("image/png", None))
+        mimetypes.init()
+        db = mimetypes.MimeTypes()
+        eq(db.guess_type("foo.txt"), ("text/plain", None))
+        eq(db.guess_type("image.jpg"), ("image/jpeg", None))
+        eq(db.guess_type("image.png"), ("image/png", None))
+
+    def test_non_latin_extension(self):
+        import _winreg
+
+        class MockWinreg(object):
+            def __getattr__(self, name):
+                if name == 'EnumKey':
+                    return lambda key, i: _winreg.EnumKey(key, i) + "\xa3"
+                elif name == "OpenKey":
+                    return lambda key, name: _winreg.OpenKey(key, name.rstrip("\xa3"))
+                elif name == 'QueryValueEx':
+                    return lambda subkey, label: (label + "\xa3", _winreg.REG_SZ)
+                return getattr(_winreg, name)
+
+        mimetypes._winreg = MockWinreg()
+        try:
+            # this used to throw an exception if registry contained non-Latin
+            # characters in extensions (issue #9291)
+            mimetypes.init()
+        finally:
+            mimetypes._winreg = _winreg
+
+    def test_non_latin_type(self):
+        import _winreg
+
+        class MockWinreg(object):
+            def __getattr__(self, name):
+                if name == 'QueryValueEx':
+                    return lambda subkey, label: (label + "\xa3", _winreg.REG_SZ)
+                return getattr(_winreg, name)
+
+        mimetypes._winreg = MockWinreg()
+        try:
+            # this used to throw an exception if registry contained non-Latin
+            # characters in content types (issue #9291)
+            mimetypes.init()
+        finally:
+            mimetypes._winreg = _winreg
 
 def test_main():
     test_support.run_unittest(MimeTypesTestCase,
         Win32MimeTypesTestCase
-        )
+    )
 
 
 if __name__ == "__main__":

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list