[Python-checkins] bpo-32498: urllib.parse.unquote also accepts bytes (GH-7768)
Tal Einat
webhook-mailer at python.org
Mon Oct 14 06:36:33 EDT 2019
https://github.com/python/cpython/commit/aad2ee01561f260c69af1951c0d6fcaf75c4d41b
commit: aad2ee01561f260c69af1951c0d6fcaf75c4d41b
branch: master
author: Stein Karlsen <karlsen.stein at gmail.com>
committer: Tal Einat <taleinat+github at gmail.com>
date: 2019-10-14T13:36:29+03:00
summary:
bpo-32498: urllib.parse.unquote also accepts bytes (GH-7768)
files:
A Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst
M Doc/library/urllib.parse.rst
M Lib/test/test_urllib.py
M Lib/urllib/parse.py
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 49276daa7ff43..84d289bc4415c 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -571,7 +571,7 @@ task isn't already covered by the URL parsing functions above.
percent-encoded sequences into Unicode characters, as accepted by the
:meth:`bytes.decode` method.
- *string* must be a :class:`str`.
+ *string* may be either a :class:`str` or a :class:`bytes`.
*encoding* defaults to ``'utf-8'``.
*errors* defaults to ``'replace'``, meaning invalid sequences are replaced
@@ -579,6 +579,11 @@ task isn't already covered by the URL parsing functions above.
Example: ``unquote('/El%20Ni%C3%B1o/')`` yields ``'/El Niño/'``.
+ .. versionchanged:: 3.9
+ *string* parameter supports bytes and str objects (previously only str).
+
+
+
.. function:: unquote_plus(string, encoding='utf-8', errors='replace')
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 9a6b5f66b7a13..3f59c66084593 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1049,8 +1049,6 @@ def test_unquoting(self):
"%s" % result)
self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
- with support.check_warnings(('', BytesWarning), quiet=True):
- self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
def test_unquoting_badpercent(self):
# Test unquoting on bad percent-escapes
@@ -1210,6 +1208,29 @@ def test_unquote_with_unicode(self):
self.assertEqual(expect, result,
"using unquote(): %r != %r" % (expect, result))
+ def test_unquoting_with_bytes_input(self):
+ # ASCII characters decoded to a string
+ given = b'blueberryjam'
+ expect = 'blueberryjam'
+ result = urllib.parse.unquote(given)
+ self.assertEqual(expect, result,
+ "using unquote(): %r != %r" % (expect, result))
+
+ # A mix of non-ASCII hex-encoded characters and ASCII characters
+ given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
+ expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
+ result = urllib.parse.unquote(given)
+ self.assertEqual(expect, result,
+ "using unquote(): %r != %r" % (expect, result))
+
+ # A mix of non-ASCII percent-encoded characters and ASCII characters
+ given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
+ expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
+ result = urllib.parse.unquote(given)
+ self.assertEqual(expect, result,
+ "using unquote(): %r != %r" % (expect, result))
+
+
class urlencode_Tests(unittest.TestCase):
"""Tests for urlencode()"""
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index b6608783a8947..3a38dc14c9047 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -631,6 +631,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
unquote('abc%20def') -> 'abc def'.
"""
+ if isinstance(string, bytes):
+ return unquote_to_bytes(string).decode(encoding, errors)
if '%' not in string:
string.split
return string
diff --git a/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst b/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst
new file mode 100644
index 0000000000000..9df9e65e55b3e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst
@@ -0,0 +1,2 @@
+Made :func:`urllib.parse.unquote()` accept bytes in addition to strings.
+Patch by Stein Karlsen.
More information about the Python-checkins
mailing list