[Python-checkins] bpo-43882 Remove the newline, and tab early. From query and fragments. (GH-25936)
orsenthil
webhook-mailer at python.org
Wed May 5 19:04:47 EDT 2021
https://github.com/python/cpython/commit/24f1d1a8a2c4aa58a606b4b6d5fa4305a3b91705
commit: 24f1d1a8a2c4aa58a606b4b6d5fa4305a3b91705
branch: 3.10
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: orsenthil <skumaran at gatech.edu>
date: 2021-05-05T16:04:38-07:00
summary:
bpo-43882 Remove the newline, and tab early. From query and fragments. (GH-25936)
(cherry picked from commit 985ac016373403e8ad41f8d563c4355ffa8d49ff)
files:
M Lib/test/test_urlparse.py
M Lib/urllib/parse.py
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 67341fecef94cd..31943f357f49f3 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -614,32 +614,40 @@ def test_urlsplit_attributes(self):
def test_urlsplit_remove_unsafe_bytes(self):
# Remove ASCII tabs and newlines from input
- url = "http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
+ url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "www.python.org")
self.assertEqual(p.path, "/javascript:alert('msg')/")
- self.assertEqual(p.query, "")
- self.assertEqual(p.fragment, "frag")
+ self.assertEqual(p.query, "query=something")
+ self.assertEqual(p.fragment, "fragment")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.hostname, "www.python.org")
self.assertEqual(p.port, None)
- self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/#frag")
+ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
# Remove ASCII tabs and newlines from input as bytes.
- url = b"http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
+ url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, b"http")
self.assertEqual(p.netloc, b"www.python.org")
self.assertEqual(p.path, b"/javascript:alert('msg')/")
- self.assertEqual(p.query, b"")
- self.assertEqual(p.fragment, b"frag")
+ self.assertEqual(p.query, b"query=something")
+ self.assertEqual(p.fragment, b"fragment")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.hostname, b"www.python.org")
self.assertEqual(p.port, None)
- self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/#frag")
+ self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # with scheme as cache-key
+ url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ scheme = "ht\ntp"
+ for _ in range(2):
+ p = urllib.parse.urlsplit(url, scheme=scheme)
+ self.assertEqual(p.scheme, "http")
+ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 4249163f0edde7..b35997bc00ce16 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -456,6 +456,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
"""
url, scheme, _coerce_result = _coerce_args(url, scheme)
+
+ for b in _UNSAFE_URL_BYTES_TO_REMOVE:
+ url = url.replace(b, "")
+ scheme = scheme.replace(b, "")
+
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
@@ -472,9 +477,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
else:
scheme, url = url[:i].lower(), url[i+1:]
- for b in _UNSAFE_URL_BYTES_TO_REMOVE:
- url = url.replace(b, "")
-
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
More information about the Python-checkins
mailing list