|
| 1 | +From a0d225a161732f7d67333105e012d7722c521f54 Mon Sep 17 00:00:00 2001 |
| 2 | +From: JohnJamesUtley < [email protected]> |
| 3 | +Date: Tue, 25 Apr 2023 16:01:03 -0400 |
| 4 | +Subject: [PATCH 1/4] Adds checks to ensure that bracketed hosts found by |
| 5 | + urlsplit are of IPv6 or IPvFuture format |
| 6 | + |
| 7 | +Signed-off-by: ankita < [email protected]> |
| 8 | +--- |
| 9 | + Lib/test/test_urlparse.py | 23 +++++++++++++++++++ |
| 10 | + Lib/urllib/parse.py | 20 +++++++++++++--- |
| 11 | + ...-04-26-09-54-25.gh-issue-103848.aDSnpR.rst | 2 ++ |
| 12 | + 3 files changed, 42 insertions(+), 3 deletions(-) |
| 13 | + create mode 100644 Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst |
| 14 | + |
| 15 | +diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py |
| 16 | +index 574da5b..5d82358 100644 |
| 17 | +--- a/Lib/test/test_urlparse.py |
| 18 | ++++ b/Lib/test/test_urlparse.py |
| 19 | +@@ -1071,6 +1071,29 @@ class UrlParseTestCase(unittest.TestCase): |
| 20 | + self.assertEqual(p2.scheme, 'tel') |
| 21 | + self.assertEqual(p2.path, '+31641044153') |
| 22 | + |
| 23 | ++ def test_splitting_bracketed_hosts(self): |
| 24 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') |
| 25 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') |
| 26 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') |
| 27 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query') |
| 28 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query') |
| 29 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query') |
| 30 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') |
| 31 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') |
| 32 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') |
| 33 | ++ p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') |
| 34 | ++ self.assertEqual(p1.hostname, 'v6a.ip') |
| 35 | ++ self.assertEqual(p1.username, 'user') |
| 36 | ++ self.assertEqual(p1.path, '/path') |
| 37 | ++ p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query') |
| 38 | ++ self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test') |
| 39 | ++ self.assertEqual(p2.username, 'user') |
| 40 | ++ self.assertEqual(p2.path, '/path') |
| 41 | ++ p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query') |
| 42 | ++ self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test') |
| 43 | ++ self.assertEqual(p3.username, 'user') |
| 44 | ++ self.assertEqual(p3.path, '/path') |
| 45 | ++ |
| 46 | + def test_port_casting_failure_message(self): |
| 47 | + message = "Port could not be cast to integer value as 'oracle'" |
| 48 | + p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle') |
| 49 | +diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py |
| 50 | +index 5b7193f..5ab115b 100644 |
| 51 | +--- a/Lib/urllib/parse.py |
| 52 | ++++ b/Lib/urllib/parse.py |
| 53 | +@@ -36,6 +36,7 @@ import sys |
| 54 | + import types |
| 55 | + import collections |
| 56 | + import warnings |
| 57 | ++import ipaddress |
| 58 | + |
| 59 | + __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", |
| 60 | + "urlsplit", "urlunsplit", "urlencode", "parse_qs", |
| 61 | +@@ -212,7 +213,7 @@ class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): |
| 62 | + _, _, hostinfo = netloc.rpartition('@') |
| 63 | + _, have_open_br, bracketed = hostinfo.partition('[') |
| 64 | + if have_open_br: |
| 65 | +- hostname, _, port = bracketed.partition(']') |
| 66 | ++ hostname, _, port = bracketed.rpartition(']') |
| 67 | + _, _, port = port.partition(':') |
| 68 | + else: |
| 69 | + hostname, _, port = hostinfo.partition(':') |
| 70 | +@@ -242,7 +243,7 @@ class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): |
| 71 | + _, _, hostinfo = netloc.rpartition(b'@') |
| 72 | + _, have_open_br, bracketed = hostinfo.partition(b'[') |
| 73 | + if have_open_br: |
| 74 | +- hostname, _, port = bracketed.partition(b']') |
| 75 | ++ hostname, _, port = bracketed.rpartition(b']') |
| 76 | + _, _, port = port.partition(b':') |
| 77 | + else: |
| 78 | + hostname, _, port = hostinfo.partition(b':') |
| 79 | +@@ -442,6 +443,17 @@ def _checknetloc(netloc): |
| 80 | + raise ValueError("netloc '" + netloc + "' contains invalid " + |
| 81 | + "characters under NFKC normalization") |
| 82 | + |
| 83 | ++# Valid bracketed hosts are defined in |
| 84 | ++# https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ |
| 85 | ++def _check_bracketed_host(hostname): |
| 86 | ++ if hostname.startswith('v'): |
| 87 | ++ if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname): |
| 88 | ++ raise ValueError(f"IPvFuture address is invalid") |
| 89 | ++ else: |
| 90 | ++ ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4 |
| 91 | ++ if isinstance(ip, ipaddress.IPv4Address): |
| 92 | ++ raise ValueError(f"An IPv4 address cannot be in brackets") |
| 93 | ++ |
| 94 | + def urlsplit(url, scheme='', allow_fragments=True): |
| 95 | + """Parse a URL into 5 components: |
| 96 | + <scheme>://<netloc>/<path>?<query>#<fragment> |
| 97 | +@@ -488,12 +500,14 @@ def urlsplit(url, scheme='', allow_fragments=True): |
| 98 | + break |
| 99 | + else: |
| 100 | + scheme, url = url[:i].lower(), url[i+1:] |
| 101 | +- |
| 102 | + if url[:2] == '//': |
| 103 | + netloc, url = _splitnetloc(url, 2) |
| 104 | + if (('[' in netloc and ']' not in netloc) or |
| 105 | + (']' in netloc and '[' not in netloc)): |
| 106 | + raise ValueError("Invalid IPv6 URL") |
| 107 | ++ if '[' in netloc and ']' in netloc: |
| 108 | ++ bracketed_host = netloc.partition('[')[2].rpartition(']')[0] |
| 109 | ++ _check_bracketed_host(bracketed_host) |
| 110 | + if allow_fragments and '#' in url: |
| 111 | + url, fragment = url.split('#', 1) |
| 112 | + if '?' in url: |
| 113 | +diff --git a/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst |
| 114 | +new file mode 100644 |
| 115 | +index 0000000..4ba1759 |
| 116 | +--- /dev/null |
| 117 | ++++ b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst |
| 118 | +@@ -0,0 +1,2 @@ |
| 119 | ++Add checks to ensure that bracketed hosts found by urlsplit are of IPv6 or |
| 120 | ++IPvFuture format |
| 121 | +-- |
| 122 | +2.34.1 |
| 123 | + |
| 124 | + |
| 125 | +From eea60813b908105536e0c759909217b011ba226b Mon Sep 17 00:00:00 2001 |
| 126 | +From: "Gregory P. Smith" < [email protected]> |
| 127 | +Date: Tue, 9 May 2023 08:41:46 -0700 |
| 128 | +Subject: [PATCH 2/4] ReSTify NEWS. |
| 129 | + |
| 130 | +Signed-off-by: ankita < [email protected]> |
| 131 | +--- |
| 132 | + .../Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst | 4 ++-- |
| 133 | + 1 file changed, 2 insertions(+), 2 deletions(-) |
| 134 | + |
| 135 | +diff --git a/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst |
| 136 | +index 4ba1759..81e5904 100644 |
| 137 | +--- a/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst |
| 138 | ++++ b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst |
| 139 | +@@ -1,2 +1,2 @@ |
| 140 | +-Add checks to ensure that bracketed hosts found by urlsplit are of IPv6 or |
| 141 | +-IPvFuture format |
| 142 | ++Add checks to ensure that ``[`` bracketed ``]`` hosts found by |
| 143 | ++:func:`urllib.parse.urlsplit` are of IPv6 or IPvFuture format. |
| 144 | +-- |
| 145 | +2.34.1 |
| 146 | + |
| 147 | + |
| 148 | +From 3f8dcc1a85c173308d2a3ef2f0f52267304a59bc Mon Sep 17 00:00:00 2001 |
| 149 | +From: JohnJamesUtley < [email protected]> |
| 150 | +Date: Tue, 9 May 2023 16:21:02 -0400 |
| 151 | +Subject: [PATCH 3/4] Splits bracketed host tests in two, replaces rpartition |
| 152 | + for host brackets, adds comments, and a new test |
| 153 | + |
| 154 | +Signed-off-by: ankita < [email protected]> |
| 155 | +--- |
| 156 | + Lib/test/test_urlparse.py | 5 ++++- |
| 157 | + Lib/urllib/parse.py | 6 +++--- |
| 158 | + 2 files changed, 7 insertions(+), 4 deletions(-) |
| 159 | + |
| 160 | +diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py |
| 161 | +index 5d82358..4488589 100644 |
| 162 | +--- a/Lib/test/test_urlparse.py |
| 163 | ++++ b/Lib/test/test_urlparse.py |
| 164 | +@@ -1071,7 +1071,7 @@ class UrlParseTestCase(unittest.TestCase): |
| 165 | + self.assertEqual(p2.scheme, 'tel') |
| 166 | + self.assertEqual(p2.path, '+31641044153') |
| 167 | + |
| 168 | +- def test_splitting_bracketed_hosts(self): |
| 169 | ++ def test_invalid_bracketed_hosts(self): |
| 170 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') |
| 171 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') |
| 172 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') |
| 173 | +@@ -1081,6 +1081,9 @@ class UrlParseTestCase(unittest.TestCase): |
| 174 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') |
| 175 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') |
| 176 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') |
| 177 | ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') |
| 178 | ++ |
| 179 | ++ def test_splitting_bracketed_hosts(self): |
| 180 | + p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') |
| 181 | + self.assertEqual(p1.hostname, 'v6a.ip') |
| 182 | + self.assertEqual(p1.username, 'user') |
| 183 | +diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py |
| 184 | +index 5ab115b..2eb3448 100644 |
| 185 | +--- a/Lib/urllib/parse.py |
| 186 | ++++ b/Lib/urllib/parse.py |
| 187 | +@@ -213,7 +213,7 @@ class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): |
| 188 | + _, _, hostinfo = netloc.rpartition('@') |
| 189 | + _, have_open_br, bracketed = hostinfo.partition('[') |
| 190 | + if have_open_br: |
| 191 | +- hostname, _, port = bracketed.rpartition(']') |
| 192 | ++ hostname, _, port = bracketed.partition(']') |
| 193 | + _, _, port = port.partition(':') |
| 194 | + else: |
| 195 | + hostname, _, port = hostinfo.partition(':') |
| 196 | +@@ -243,7 +243,7 @@ class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): |
| 197 | + _, _, hostinfo = netloc.rpartition(b'@') |
| 198 | + _, have_open_br, bracketed = hostinfo.partition(b'[') |
| 199 | + if have_open_br: |
| 200 | +- hostname, _, port = bracketed.rpartition(b']') |
| 201 | ++ hostname, _, port = bracketed.partition(b']') |
| 202 | + _, _, port = port.partition(b':') |
| 203 | + else: |
| 204 | + hostname, _, port = hostinfo.partition(b':') |
| 205 | +@@ -506,7 +506,7 @@ def urlsplit(url, scheme='', allow_fragments=True): |
| 206 | + (']' in netloc and '[' not in netloc)): |
| 207 | + raise ValueError("Invalid IPv6 URL") |
| 208 | + if '[' in netloc and ']' in netloc: |
| 209 | +- bracketed_host = netloc.partition('[')[2].rpartition(']')[0] |
| 210 | ++ bracketed_host = netloc.partition('[')[2].partition(']')[0] |
| 211 | + _check_bracketed_host(bracketed_host) |
| 212 | + if allow_fragments and '#' in url: |
| 213 | + url, fragment = url.split('#', 1) |
| 214 | +-- |
| 215 | +2.34.1 |
| 216 | + |
| 217 | + |
| 218 | +From 307ac68e88e93789e82eb002b7ce52d46d415f9a Mon Sep 17 00:00:00 2001 |
| 219 | +From: "Gregory P. Smith" < [email protected]> |
| 220 | +Date: Tue, 9 May 2023 16:53:54 -0700 |
| 221 | +Subject: [PATCH 4/4] remove trailing spaces |
| 222 | + |
| 223 | +Signed-off-by: ankita < [email protected]> |
| 224 | +--- |
| 225 | + Lib/test/test_urlparse.py | 2 +- |
| 226 | + 1 file changed, 1 insertion(+), 1 deletion(-) |
| 227 | + |
| 228 | +diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py |
| 229 | +index 4488589..c84df23 100644 |
| 230 | +--- a/Lib/test/test_urlparse.py |
| 231 | ++++ b/Lib/test/test_urlparse.py |
| 232 | +@@ -1082,7 +1082,7 @@ class UrlParseTestCase(unittest.TestCase): |
| 233 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') |
| 234 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') |
| 235 | + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') |
| 236 | +- |
| 237 | ++ |
| 238 | + def test_splitting_bracketed_hosts(self): |
| 239 | + p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') |
| 240 | + self.assertEqual(p1.hostname, 'v6a.ip') |
| 241 | +-- |
| 242 | +2.34.1 |
| 243 | + |
0 commit comments