[Python-checkins] r79047 - in python/trunk/Lib: test/test_urllib.py urllib.py urlparse.py

senthil.kumaran python-checkins at python.org
Thu Mar 18 13:14:15 CET 2010


Author: senthil.kumaran
Date: Thu Mar 18 13:14:15 2010
New Revision: 79047

Log:
Fix for Issue8135 - urllib.unquote to support mixed percent escapes



Modified:
   python/trunk/Lib/test/test_urllib.py
   python/trunk/Lib/urllib.py
   python/trunk/Lib/urlparse.py

Modified: python/trunk/Lib/test/test_urllib.py
==============================================================================
--- python/trunk/Lib/test/test_urllib.py	(original)
+++ python/trunk/Lib/test/test_urllib.py	Thu Mar 18 13:14:15 2010
@@ -439,6 +439,32 @@
                          "using unquote(): not all characters escaped: "
                          "%s" % result)
 
+    def test_unquoting_badpercent(self):
+        # Test unquoting on bad percent-escapes
+        given = '%xab'
+        expect = given
+        result = urllib.unquote(given)
+        self.assertEqual(expect, result, "using unquote(): %r != %r"
+                         % (expect, result))
+        given = '%x'
+        expect = given
+        result = urllib.unquote(given)
+        self.assertEqual(expect, result, "using unquote(): %r != %r"
+                         % (expect, result))
+        given = '%'
+        expect = given
+        result = urllib.unquote(given)
+        self.assertEqual(expect, result, "using unquote(): %r != %r"
+                         % (expect, result))
+
+    def test_unquoting_mixed_case(self):
+        # Test unquoting on mixed-case hex digits in the percent-escapes
+        given = '%Ab%eA'
+        expect = '\xab\xea'
+        result = urllib.unquote(given)
+        self.assertEqual(expect, result, "using unquote(): %r != %r"
+                         % (expect, result))
+
     def test_unquoting_parts(self):
         # Make sure unquoting works when have non-quoted characters
         # interspersed

Modified: python/trunk/Lib/urllib.py
==============================================================================
--- python/trunk/Lib/urllib.py	(original)
+++ python/trunk/Lib/urllib.py	Thu Mar 18 13:14:15 2010
@@ -1158,8 +1158,8 @@
     if match: return match.group(1, 2)
     return attr, None
 
-_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
-_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+_hexdig = '0123456789ABCDEFabcdef'
+_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig)
 
 def unquote(s):
     """unquote('abc%20def') -> 'abc def'."""

Modified: python/trunk/Lib/urlparse.py
==============================================================================
--- python/trunk/Lib/urlparse.py	(original)
+++ python/trunk/Lib/urlparse.py	Thu Mar 18 13:14:15 2010
@@ -272,8 +272,9 @@
 # Cannot use directly from urllib as it would create circular reference.
 # urllib uses urlparse methods ( urljoin)
 
-_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
-_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+
+_hexdig = '0123456789ABCDEFabcdef'
+_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig)
 
 def unquote(s):
     """unquote('abc%20def') -> 'abc def'."""


More information about the Python-checkins mailing list