From 4f76c449f6bc97c827a11ea1cc5dc406bd1ed0a2 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 01:57:04 -0700 Subject: [PATCH 01/17] wip: alternative fix for gh-87389 still needs test for urlunsplit() change --- Lib/http/server.py | 18 +++++++++++++----- Lib/test/test_httpservers.py | 24 ++++++++++++++++++++++-- Lib/urllib/parse.py | 10 ++++++++-- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index d115dfc162bb14..3794b15f8cfd27 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -678,13 +678,10 @@ def send_head(self): path = self.translate_path(self.path) f = None if os.path.isdir(path): - parts = urllib.parse.urlsplit(self.path) - if not parts.path.endswith('/'): + new_url = _add_trailing_slash(self.path) + if new_url: # redirect browser - doing basically what apache does self.send_response(HTTPStatus.MOVED_PERMANENTLY) - new_parts = (parts[0], parts[1], parts[2] + '/', - parts[3], parts[4]) - new_url = urllib.parse.urlunsplit(new_parts) self.send_header("Location", new_url) self.send_header("Content-Length", "0") self.end_headers() @@ -881,6 +878,17 @@ def guess_type(self, path): return 'application/octet-stream' +def _add_trailing_slash(path): + """Returns URL with trailing slash on path, if required. If not required, + returns None. + """ + path, _, fragment = path.partition('#') + path, _, query = path.partition('?') + if path.endswith('/'): + return None # already has slash, no redirect needed + return urllib.parse.urlunsplit(('', '', path + '/', query, fragment)) + + # Utilities for CGIHTTPRequestHandler def _url_collapse_path(path): diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 1f041aa121f974..4fc2d6a3145f7a 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -334,7 +334,7 @@ class request_handler(NoLogRequestHandler, SimpleHTTPRequestHandler): pass def setUp(self): - BaseTestCase.setUp(self) + super().setUp() self.cwd = os.getcwd() basetempdir = tempfile.gettempdir() os.chdir(basetempdir) @@ -362,7 +362,7 @@ def tearDown(self): except: pass finally: - BaseTestCase.tearDown(self) + super().tearDown() def check_status_and_reason(self, response, status, data=None): def close_conn(): @@ -418,6 +418,26 @@ def test_undecodable_filename(self): self.check_status_and_reason(response, HTTPStatus.OK, data=os_helper.TESTFN_UNDECODABLE) + def test_get_dir_redirect_location_domain_injection_bug(self): + """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. + + //domain/ in a Location header is a redirect to a new domain name. + https://github.com/python/cpython/issues/87389 + + This checks that a path resolving to a directory on our server cannot + resolve into a redirect to another server telling it that the + directory in question exists on the Referrer server. + """ + os.mkdir(os.path.join(self.tempdir, 'existing_directory')) + attack_url = f'//python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' + response = self.request(attack_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertFalse(location.startswith('//'), msg=location) + self.assertEqual(location, attack_url[1:] + '/', + msg='Expected Location: to start with a single / and ' + 'end with a / as this is a directory redirect.') + def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index fd6d9f44c6268c..1c6e811f24786b 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -497,9 +497,15 @@ def urlunsplit(components): This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had unnecessary delimiters (for example, a ? with an empty query; the RFC states that these are equivalent).""" - scheme, netloc, url, query, fragment, _coerce_result = ( + scheme, netloc, path, query, fragment, _coerce_result = ( _coerce_args(*components)) - if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): + if scheme in uses_netloc and path.startswith('//'): + # gh-87389: avoid confusing a path with multiple leading slashes + # as a URI relative reference. + url = '/' + path.lstrip('/') + else: + url = path + if netloc or (scheme and scheme in uses_netloc): if url and url[:1] != '/': url = '/' + url url = '//' + (netloc or '') + url if scheme: From 06b38796c9b553fc3c69573a8bc71ecf4f149862 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 10:52:11 -0700 Subject: [PATCH 02/17] Don't strip slashes if not building relative URL. --- Lib/urllib/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 1c6e811f24786b..1377421943997e 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -499,7 +499,7 @@ def urlunsplit(components): empty query; the RFC states that these are equivalent).""" scheme, netloc, path, query, fragment, _coerce_result = ( _coerce_args(*components)) - if scheme in uses_netloc and path.startswith('//'): + if not scheme and not netloc and path.startswith('//'): # gh-87389: avoid confusing a path with multiple leading slashes # as a URI relative reference. url = '/' + path.lstrip('/') From 00a3a92a8c37d8fadf9317e28afe89c1139c41e2 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 10:52:34 -0700 Subject: [PATCH 03/17] Add unit test for urlunsplit relative URL case. --- Lib/test/test_urlparse.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 2f629c72ae784e..bf02961d4a0397 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1101,6 +1101,22 @@ def test_urlsplit_normalization(self): with self.assertRaises(ValueError): urllib.parse.urlsplit(url) + def test_urlunsplit_relative(self): + cases = [ + # expected result is a scheme/netloc relative URL + (('', 'a', '', '', ''), '//a'), + # extra leading slashes need to be stripped to avoid confusion + # with a relative URL + (('', '', '//a', '', ''), '/a'), + (('', '', '///a', '', ''), '/a'), + # not relative so extra leading slashes don't need stripping since + # they don't cause confusion + (('http', 'x.y', '//a', '', ''), 'http://x.y//a') + ] + for parts, result in cases: + self.assertEqual(urllib.parse.urlunsplit(parts), result) + + class Utility_Tests(unittest.TestCase): """Testcase to test the various utility functions in the urllib.""" # In Python 2 this test class was in test_urllib. From a8e1cc256d4ee7928f45486b625d02adc7d867d0 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 12:06:45 -0700 Subject: [PATCH 04/17] sanitize paths that can be confused as scheme --- Lib/test/test_urlparse.py | 6 ++++-- Lib/urllib/parse.py | 22 +++++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index bf02961d4a0397..a1d934f08d3212 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1103,7 +1103,7 @@ def test_urlsplit_normalization(self): def test_urlunsplit_relative(self): cases = [ - # expected result is a scheme/netloc relative URL + # expected result is a relative URL without netloc and scheme (('', 'a', '', '', ''), '//a'), # extra leading slashes need to be stripped to avoid confusion # with a relative URL @@ -1111,7 +1111,9 @@ def test_urlunsplit_relative(self): (('', '', '///a', '', ''), '/a'), # not relative so extra leading slashes don't need stripping since # they don't cause confusion - (('http', 'x.y', '//a', '', ''), 'http://x.y//a') + (('http', 'x.y', '//a', '', ''), 'http://x.y//a'), + # avoid confusion with path containing colon + (('', '', 'a:b', '', ''), './a:b'), ] for parts, result in cases: self.assertEqual(urllib.parse.urlunsplit(parts), result) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 1377421943997e..089358a19792b6 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -491,6 +491,10 @@ def urlunparse(components): url = "%s;%s" % (url, params) return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) +# Returns true if path can confused with a scheme. I.e. a relative path +# without leading dot that includes a colon in the first component. +_is_scheme_like = re.compile(r'[^/.][^/]*:').match + def urlunsplit(components): """Combine the elements of a tuple as returned by urlsplit() into a complete URL as a string. The data argument can be any five-item iterable. @@ -499,13 +503,21 @@ def urlunsplit(components): empty query; the RFC states that these are equivalent).""" scheme, netloc, path, query, fragment, _coerce_result = ( _coerce_args(*components)) - if not scheme and not netloc and path.startswith('//'): - # gh-87389: avoid confusing a path with multiple leading slashes - # as a URI relative reference. - url = '/' + path.lstrip('/') + if not scheme and not netloc: + # Building a relative URI. Need to be careful that path is not + # confused with scheme or netloc. + if path.startswith('//'): + # gh-87389: don't treat first component of path as netloc + url = '/' + path.lstrip('/') + elif _is_scheme_like(path): + # first component has colon, ensure it will not be parsed as the + # scheme + url = './' + path + else: + url = path else: url = path - if netloc or (scheme and scheme in uses_netloc): + if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): if url and url[:1] != '/': url = '/' + url url = '//' + (netloc or '') + url if scheme: From 83c8332f23db8e139e8cf9beecfb791fc0bba54e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 12:14:00 -0700 Subject: [PATCH 05/17] Add blurb. --- .../2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst new file mode 100644 index 00000000000000..6d1e13b1c31a6f --- /dev/null +++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst @@ -0,0 +1,15 @@ +:mod:`http.server`: Fix an open redirection vulnerability in the HTTP server +when an URI path starts with ``//``. Vulnerability discovered, and initial +fix proposed, by Hamza Avvan. Change `urllib.parse.urlunsplit()` to +sanitize `path` argument in order to avoid confusing the first component of +the path as a net location or scheme. + +Co-authored-by: Gregory P. Smith #.. section: Core and +Builtins #.. section: Library #.. section: Documentation #.. section: Tests +#.. section: Build #.. section: Windows #.. section: macOS #.. section: IDLE +#.. section: Tools/Demos #.. section: C API + +# Write your Misc/NEWS entry below. It should be a simple ReST paragraph. # +Don't start with "- Issue #: " or "- gh-issue-: " or that sort of +stuff. +########################################################################### From f99e80b935613e802ade6f9d85e8e8c3595456a5 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 12:22:21 -0700 Subject: [PATCH 06/17] Improve name and comment for _get_redirect_url(). --- Lib/http/server.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 3794b15f8cfd27..834505024322f5 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -678,7 +678,7 @@ def send_head(self): path = self.translate_path(self.path) f = None if os.path.isdir(path): - new_url = _add_trailing_slash(self.path) + new_url = _get_redirect_url(self.path) if new_url: # redirect browser - doing basically what apache does self.send_response(HTTPStatus.MOVED_PERMANENTLY) @@ -878,10 +878,16 @@ def guess_type(self, path): return 'application/octet-stream' -def _add_trailing_slash(path): +def _get_redirect_url(path): """Returns URL with trailing slash on path, if required. If not required, returns None. """ + # It would be simpler to use urllib.parse.urlsplit() here. However, the + # 'path' is not truly a URI in that it can't have a scheme or netloc. We + # need to avoid parsing it incorrectly. For example, as reported in + # gh-87389, a path starting with a double slash should not be treated as a + # relative URI. Also, a path with a colon in the first component could + # also be parsed wrongly. path, _, fragment = path.partition('#') path, _, query = path.partition('?') if path.endswith('/'): From e542578a84c40fd4cecdc4929471692e2d49ef3e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 12:49:35 -0700 Subject: [PATCH 07/17] Add urllib.parse.pathsplit() function. --- Doc/library/urllib.parse.rst | 12 ++++++++++++ Lib/http/server.py | 8 ++++---- Lib/urllib/parse.py | 25 ++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 1478b34bc95514..d460ef77b35480 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -339,6 +339,18 @@ or on combining URL components into a URL string. .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser +.. function:: pathsplit(abs_path) + + Parse an absolute path that includes an optional query and fragment. Like + :func:`urlsplit`, this function returns a 5-item :term:`named tuple`:: + + (addressing scheme, network location, path, query, fragment identifier). + + The scheme and network location components will always be empty. + + .. versionadded:: 3.11 + + .. function:: urlunsplit(parts) Combine the elements of a tuple as returned by :func:`urlsplit` into a diff --git a/Lib/http/server.py b/Lib/http/server.py index 834505024322f5..4cf856240a41bb 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -888,11 +888,11 @@ def _get_redirect_url(path): # gh-87389, a path starting with a double slash should not be treated as a # relative URI. Also, a path with a colon in the first component could # also be parsed wrongly. - path, _, fragment = path.partition('#') - path, _, query = path.partition('?') - if path.endswith('/'): + parts = urllib.parse.pathsplit(path) + if parts.path.endswith('/'): return None # already has slash, no redirect needed - return urllib.parse.urlunsplit(('', '', path + '/', query, fragment)) + return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query, + parts.fragment)) # Utilities for CGIHTTPRequestHandler diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 089358a19792b6..f088307c5a0608 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -36,7 +36,7 @@ __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "urlencode", "parse_qs", "parse_qsl", "quote", "quote_plus", "quote_from_bytes", - "unquote", "unquote_plus", "unquote_to_bytes", + "unquote", "unquote_plus", "unquote_to_bytes", "pathsplit", "DefragResult", "ParseResult", "SplitResult", "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] @@ -480,6 +480,29 @@ def urlsplit(url, scheme='', allow_fragments=True): v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v) +# typed=True avoids BytesWarnings being emitted during cache key +# comparison since this API supports both bytes and str input. +@functools.lru_cache(typed=True) +def pathsplit(abs_path): + """Parse an absolute path that includes an optional query and fragment. + The full syntax is: + + ?# + + The result is a named 5-tuple with fields set corresponding to the above. + It is either a SplitResult or SplitResultBytes object, depending on the + type of the url parameter. + + Note that % escapes are not expanded. + """ + abs_path, _coerce_result = _coerce_args(abs_path) + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + abs_path = abs_path.replace(b, "") + abs_path, _, fragment = abs_path.partition('#') + abs_path, _, query = abs_path.partition('?') + v = SplitResult('', '', abs_path, query, fragment) + return _coerce_result(v) + def urlunparse(components): """Put a parsed URL back together again. This may result in a slightly different, but equivalent URL, if the URL that was parsed From b7b0b15ce510151c0ee6d024d2654fbf7c303e33 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 12:58:39 -0700 Subject: [PATCH 08/17] Fix blurb formating, add note about pathsplit(). --- .../2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst index 6d1e13b1c31a6f..92ed9a645b839e 100644 --- a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst +++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst @@ -2,14 +2,6 @@ when an URI path starts with ``//``. Vulnerability discovered, and initial fix proposed, by Hamza Avvan. Change `urllib.parse.urlunsplit()` to sanitize `path` argument in order to avoid confusing the first component of -the path as a net location or scheme. +the path as a net location or scheme. Add `urllib.parse.pathsplit()` function. -Co-authored-by: Gregory P. Smith #.. section: Core and -Builtins #.. section: Library #.. section: Documentation #.. section: Tests -#.. section: Build #.. section: Windows #.. section: macOS #.. section: IDLE -#.. section: Tools/Demos #.. section: C API - -# Write your Misc/NEWS entry below. It should be a simple ReST paragraph. # -Don't start with "- Issue #: " or "- gh-issue-: " or that sort of -stuff. -########################################################################### +Co-authored-by: Gregory P. Smith From 6915331310c67474a0934475e3da8bf49f85c591 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 13:04:24 -0700 Subject: [PATCH 09/17] Improve comments and docs. The pathsplit() function will work correctly on relative paths too so don't say "absolute paths". Improve comment for _get_redirect_url(). --- Doc/library/urllib.parse.rst | 4 ++-- Lib/http/server.py | 12 ++++++------ Lib/urllib/parse.py | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index d460ef77b35480..cc38e3c0b2f15c 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -339,9 +339,9 @@ or on combining URL components into a URL string. .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser -.. function:: pathsplit(abs_path) +.. function:: pathsplit(path) - Parse an absolute path that includes an optional query and fragment. Like + Parse a path that includes an optional query and fragment. Like :func:`urlsplit`, this function returns a 5-item :term:`named tuple`:: (addressing scheme, network location, path, query, fragment identifier). diff --git a/Lib/http/server.py b/Lib/http/server.py index 4cf856240a41bb..35d28f62bcb545 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -882,12 +882,12 @@ def _get_redirect_url(path): """Returns URL with trailing slash on path, if required. If not required, returns None. """ - # It would be simpler to use urllib.parse.urlsplit() here. However, the - # 'path' is not truly a URI in that it can't have a scheme or netloc. We - # need to avoid parsing it incorrectly. For example, as reported in - # gh-87389, a path starting with a double slash should not be treated as a - # relative URI. Also, a path with a colon in the first component could - # also be parsed wrongly. + # Previous versions of this module used urllib.parse.urlsplit() here. + # However, the 'path' is not truly a URI in that it can't have a scheme or + # netloc. We need to avoid parsing it incorrectly. For example, as + # reported in gh-87389, a path starting with a double slash should not be + # treated as a relative URI. Also, a path with a colon in the first + # component could also be parsed wrongly. parts = urllib.parse.pathsplit(path) if parts.path.endswith('/'): return None # already has slash, no redirect needed diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index f088307c5a0608..32d9cdd5e2f444 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -483,8 +483,8 @@ def urlsplit(url, scheme='', allow_fragments=True): # typed=True avoids BytesWarnings being emitted during cache key # comparison since this API supports both bytes and str input. @functools.lru_cache(typed=True) -def pathsplit(abs_path): - """Parse an absolute path that includes an optional query and fragment. +def pathsplit(path): + """Parse a path that includes an optional query and fragment. The full syntax is: ?# @@ -495,12 +495,12 @@ def pathsplit(abs_path): Note that % escapes are not expanded. """ - abs_path, _coerce_result = _coerce_args(abs_path) + path, _coerce_result = _coerce_args(path) for b in _UNSAFE_URL_BYTES_TO_REMOVE: - abs_path = abs_path.replace(b, "") - abs_path, _, fragment = abs_path.partition('#') - abs_path, _, query = abs_path.partition('?') - v = SplitResult('', '', abs_path, query, fragment) + path = path.replace(b, "") + path, _, fragment = path.partition('#') + path, _, query = path.partition('?') + v = SplitResult('', '', path, query, fragment) return _coerce_result(v) def urlunparse(components): From 915451cd21f48929f275da140aaee07762e7449c Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 13:12:08 -0700 Subject: [PATCH 10/17] Add basic unit test for pathsplit(). --- Lib/test/test_urlparse.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index a1d934f08d3212..398497d2a2e7f6 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1118,6 +1118,17 @@ def test_urlunsplit_relative(self): for parts, result in cases: self.assertEqual(urllib.parse.urlunsplit(parts), result) + def test_pathsplit(self): + cases = [ + ('//a', ('', '', '//a', '', '')), + ('a:b', ('', '', 'a:b', '', '')), + ('/a/b?x#y', ('', '', '/a/b', 'x', 'y')), + ('/a/b#y', ('', '', '/a/b', '', 'y')), + ('/a/b?x', ('', '', '/a/b', 'x', '')), + ] + for uri, result in cases: + self.assertEqual(urllib.parse.pathsplit(uri), result) + class Utility_Tests(unittest.TestCase): """Testcase to test the various utility functions in the urllib.""" From 952a0f46d19d0163fed3a3e4178243b07750feb4 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 13:14:41 -0700 Subject: [PATCH 11/17] Fix markup in blurb. --- .../Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst index 92ed9a645b839e..a4f57f8d27b037 100644 --- a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst +++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst @@ -1,7 +1,8 @@ :mod:`http.server`: Fix an open redirection vulnerability in the HTTP server when an URI path starts with ``//``. Vulnerability discovered, and initial -fix proposed, by Hamza Avvan. Change `urllib.parse.urlunsplit()` to -sanitize `path` argument in order to avoid confusing the first component of -the path as a net location or scheme. Add `urllib.parse.pathsplit()` function. +fix proposed, by Hamza Avvan. Change :func:`urllib.parse.urlunsplit` to +sanitize ``path`` argument in order to avoid confusing the first component of +the path as a net location or scheme. Add :func:`urllib.parse.pathsplit` +function. Co-authored-by: Gregory P. Smith From 899f512ee3c6618e881c17f4ef8b6173528d1403 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 14:11:38 -0700 Subject: [PATCH 12/17] Use pathsplit() in translate_path(). This avoids "manual parsing" of the Request-URI part of the request and matches what _get_redirect_url() does. --- Lib/http/server.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 35d28f62bcb545..14561d9423579b 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -814,9 +814,8 @@ def translate_path(self, path): probably be diagnosed.) """ - # abandon query parameters - path = path.split('?',1)[0] - path = path.split('#',1)[0] + # extract only path, abandon query parameters and fragment + path = urllib.parse.pathsplit(path).path # Don't forget explicit trailing slash when normalizing. Issue17324 trailing_slash = path.rstrip().endswith('/') try: From a00656c572c121e131808c18c16ae8f05f2b0be4 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 14:13:16 -0700 Subject: [PATCH 13/17] Improved unit tests from gps. --- Lib/test/test_httpservers.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 4fc2d6a3145f7a..cec060ffcccb8c 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -421,23 +421,38 @@ def test_undecodable_filename(self): def test_get_dir_redirect_location_domain_injection_bug(self): """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. - //domain/ in a Location header is a redirect to a new domain name. + //netloc/ in a Location header is a redirect to a new host. https://github.com/python/cpython/issues/87389 This checks that a path resolving to a directory on our server cannot - resolve into a redirect to another server telling it that the - directory in question exists on the Referrer server. + resolve into a redirect to another server. """ os.mkdir(os.path.join(self.tempdir, 'existing_directory')) - attack_url = f'//python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' + url = f'/python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' + expected_location = f'{url}/' # /python.org.../ single slash single prefix, trailing slash + # Canonicalizes to /tmp/tempdir_name/existing_directory which does + # exist and is a dir, triggering the 301 redirect logic. + response = self.request(url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertEqual(location, expected_location, msg='non-attack failed!') + + # //python.org... multi-slash prefix, no trailing slash + attack_url = f'/{url}' response = self.request(attack_url) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) location = response.getheader('Location') self.assertFalse(location.startswith('//'), msg=location) - self.assertEqual(location, attack_url[1:] + '/', - msg='Expected Location: to start with a single / and ' + self.assertEqual(location, expected_location, + msg='Expected Location header to start with a single / and ' 'end with a / as this is a directory redirect.') + # ///python.org... triple-slash prefix, no trailing slash + attack3_url = f'//{url}' + response = self.request(attack3_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + self.assertEqual(response.getheader('Location'), expected_location) + def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test') From 89858530bc6b4efeba195578f92e943d92e4b046 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 14:13:39 -0700 Subject: [PATCH 14/17] Add test for Request-URI containing scheme. --- Lib/test/test_httpservers.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index cec060ffcccb8c..dac2f80f6a0df1 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -453,6 +453,21 @@ def test_get_dir_redirect_location_domain_injection_bug(self): self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) self.assertEqual(response.getheader('Location'), expected_location) + # If the second word in the http request (Request-URI for the http + # method) has a scheme and netloc, it still gets treated as an + # absolute path by the server. In that case, the redirect is + # constructed so it is parsed as a path. The './' part of the path + # is added by urlunsplit() so that the 'https:' part of what is being + # treated as a path is not treated as a scheme in the redirect + # location. http.server is not a proxy and doesn't handle Request-URI + # being an absolute URI with a scheme and or netloc. + attack_scheme_netloc_2slash_url = f'https://pypi.org/{url}' + expected_location = f'./{attack_scheme_netloc_2slash_url}/' + response = self.request(attack_scheme_netloc_2slash_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertEqual(location, expected_location) + def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test') From f1f94aea419921860153853a7b568ee07659ea7e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 14:24:04 -0700 Subject: [PATCH 15/17] Make _get_redirect_url() into a method. Possible that someone could override this so a method is nicer. --- Lib/http/server.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 14561d9423579b..6fe16fc4df355e 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -664,6 +664,23 @@ def do_HEAD(self): if f: f.close() + def _get_redirect_url_for_dir(self): + """Returns URL with trailing slash on path, if required. If not + required, returns None. + """ + # Previous versions of this class used urllib.parse.urlsplit() here. + # However, the 'path' is being treated as a local filesystem path and + # it can't have a scheme or netloc. We need to avoid parsing it + # incorrectly. For example, as reported in gh-87389, a path starting + # with a double slash should not be treated as a relative URI. Also, a + # path with a colon in the first component could also be parsed + # wrongly. + parts = urllib.parse.pathsplit(self.path) + if parts.path.endswith('/'): + return None # already has slash, no redirect needed + return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query, + parts.fragment)) + def send_head(self): """Common code for GET and HEAD commands. @@ -678,7 +695,7 @@ def send_head(self): path = self.translate_path(self.path) f = None if os.path.isdir(path): - new_url = _get_redirect_url(self.path) + new_url = self._get_redirect_url_for_dir() if new_url: # redirect browser - doing basically what apache does self.send_response(HTTPStatus.MOVED_PERMANENTLY) @@ -877,23 +894,6 @@ def guess_type(self, path): return 'application/octet-stream' -def _get_redirect_url(path): - """Returns URL with trailing slash on path, if required. If not required, - returns None. - """ - # Previous versions of this module used urllib.parse.urlsplit() here. - # However, the 'path' is not truly a URI in that it can't have a scheme or - # netloc. We need to avoid parsing it incorrectly. For example, as - # reported in gh-87389, a path starting with a double slash should not be - # treated as a relative URI. Also, a path with a colon in the first - # component could also be parsed wrongly. - parts = urllib.parse.pathsplit(path) - if parts.path.endswith('/'): - return None # already has slash, no redirect needed - return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query, - parts.fragment)) - - # Utilities for CGIHTTPRequestHandler def _url_collapse_path(path): From 8a34cd002fef6139157aed390061d4450f4cb636 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 16 Jun 2022 15:40:39 -0700 Subject: [PATCH 16/17] Futher cleanups, remove urllib.parse.pathsplit(). Since pathsplit() doesn't seem like a generally useful public API, remove it. Instead, add a _request_path_split() method. This ensures that the redirect logic and the translate_path() method use the same path parsing. --- Doc/library/urllib.parse.rst | 12 --------- Lib/http/server.py | 14 +++++++++-- Lib/test/test_urlparse.py | 14 ++--------- Lib/urllib/parse.py | 25 +------------------ ...2-06-16-12-13-55.gh-issue-87389.MS9wAR.rst | 5 ++-- 5 files changed, 17 insertions(+), 53 deletions(-) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index cc38e3c0b2f15c..1478b34bc95514 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -339,18 +339,6 @@ or on combining URL components into a URL string. .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser -.. function:: pathsplit(path) - - Parse a path that includes an optional query and fragment. Like - :func:`urlsplit`, this function returns a 5-item :term:`named tuple`:: - - (addressing scheme, network location, path, query, fragment identifier). - - The scheme and network location components will always be empty. - - .. versionadded:: 3.11 - - .. function:: urlunsplit(parts) Combine the elements of a tuple as returned by :func:`urlsplit` into a diff --git a/Lib/http/server.py b/Lib/http/server.py index 6fe16fc4df355e..81295efc42bb15 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -664,6 +664,16 @@ def do_HEAD(self): if f: f.close() + def _request_path_split(self, path): + """Parse a path that can include an optional query and fragment. + """ + # We only handle the 'abs_path' case for the Request-URI part of the + # request line (the second word). We don't handle the case of a URL + # containing a scheme or netloc. + path, _, query = path.partition('?') + path, _, fragment = path.partition('#') + return urllib.parse.SplitResult('', '', path, query, fragment) + def _get_redirect_url_for_dir(self): """Returns URL with trailing slash on path, if required. If not required, returns None. @@ -675,7 +685,7 @@ def _get_redirect_url_for_dir(self): # with a double slash should not be treated as a relative URI. Also, a # path with a colon in the first component could also be parsed # wrongly. - parts = urllib.parse.pathsplit(self.path) + parts = self._request_path_split(self.path) if parts.path.endswith('/'): return None # already has slash, no redirect needed return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query, @@ -832,7 +842,7 @@ def translate_path(self, path): """ # extract only path, abandon query parameters and fragment - path = urllib.parse.pathsplit(path).path + path = self._request_path_split(path).path # Don't forget explicit trailing slash when normalizing. Issue17324 trailing_slash = path.rstrip().endswith('/') try: diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 398497d2a2e7f6..3782d4cdd800e4 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1116,18 +1116,8 @@ def test_urlunsplit_relative(self): (('', '', 'a:b', '', ''), './a:b'), ] for parts, result in cases: - self.assertEqual(urllib.parse.urlunsplit(parts), result) - - def test_pathsplit(self): - cases = [ - ('//a', ('', '', '//a', '', '')), - ('a:b', ('', '', 'a:b', '', '')), - ('/a/b?x#y', ('', '', '/a/b', 'x', 'y')), - ('/a/b#y', ('', '', '/a/b', '', 'y')), - ('/a/b?x', ('', '', '/a/b', 'x', '')), - ] - for uri, result in cases: - self.assertEqual(urllib.parse.pathsplit(uri), result) + self.assertEqual(urllib.parse.urlunsplit(parts), result, + msg=f'{parts=}') class Utility_Tests(unittest.TestCase): diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 32d9cdd5e2f444..089358a19792b6 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -36,7 +36,7 @@ __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "urlencode", "parse_qs", "parse_qsl", "quote", "quote_plus", "quote_from_bytes", - "unquote", "unquote_plus", "unquote_to_bytes", "pathsplit", + "unquote", "unquote_plus", "unquote_to_bytes", "DefragResult", "ParseResult", "SplitResult", "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] @@ -480,29 +480,6 @@ def urlsplit(url, scheme='', allow_fragments=True): v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v) -# typed=True avoids BytesWarnings being emitted during cache key -# comparison since this API supports both bytes and str input. -@functools.lru_cache(typed=True) -def pathsplit(path): - """Parse a path that includes an optional query and fragment. - The full syntax is: - - ?# - - The result is a named 5-tuple with fields set corresponding to the above. - It is either a SplitResult or SplitResultBytes object, depending on the - type of the url parameter. - - Note that % escapes are not expanded. - """ - path, _coerce_result = _coerce_args(path) - for b in _UNSAFE_URL_BYTES_TO_REMOVE: - path = path.replace(b, "") - path, _, fragment = path.partition('#') - path, _, query = path.partition('?') - v = SplitResult('', '', path, query, fragment) - return _coerce_result(v) - def urlunparse(components): """Put a parsed URL back together again. This may result in a slightly different, but equivalent URL, if the URL that was parsed diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst index a4f57f8d27b037..15117f362792fe 100644 --- a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst +++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst @@ -2,7 +2,6 @@ when an URI path starts with ``//``. Vulnerability discovered, and initial fix proposed, by Hamza Avvan. Change :func:`urllib.parse.urlunsplit` to sanitize ``path`` argument in order to avoid confusing the first component of -the path as a net location or scheme. Add :func:`urllib.parse.pathsplit` -function. +the path as a net location or scheme. -Co-authored-by: Gregory P. Smith +Co-authored-by: Gregory P. Smith [Google LLC] From 23d4b563fb6903650de053195093819ce829ab03 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 3 Jul 2022 12:16:34 -0700 Subject: [PATCH 17/17] reword news --- .../2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst index 15117f362792fe..9dbf777bf23534 100644 --- a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst +++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst @@ -1,7 +1,5 @@ -:mod:`http.server`: Fix an open redirection vulnerability in the HTTP server -when an URI path starts with ``//``. Vulnerability discovered, and initial -fix proposed, by Hamza Avvan. Change :func:`urllib.parse.urlunsplit` to -sanitize ``path`` argument in order to avoid confusing the first component of -the path as a net location or scheme. +Change :func:`urllib.parse.urlunsplit` to sanitize ``path`` argument in order +to avoid confusing the first component of the path as a net location or +scheme. -Co-authored-by: Gregory P. Smith [Google LLC] +Co-authored-by: Gregory P. Smith [Google]