From 4f76c449f6bc97c827a11ea1cc5dc406bd1ed0a2 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 01:57:04 -0700
Subject: [PATCH 01/17] wip: alternative fix for gh-87389

still needs test for urlunsplit() change
---
 Lib/http/server.py           | 18 +++++++++++++-----
 Lib/test/test_httpservers.py | 24 ++++++++++++++++++++++--
 Lib/urllib/parse.py          | 10 ++++++++--
 3 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/Lib/http/server.py b/Lib/http/server.py
index d115dfc162bb14..3794b15f8cfd27 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -678,13 +678,10 @@ def send_head(self):
         path = self.translate_path(self.path)
         f = None
         if os.path.isdir(path):
-            parts = urllib.parse.urlsplit(self.path)
-            if not parts.path.endswith('/'):
+            new_url = _add_trailing_slash(self.path)
+            if new_url:
                 # redirect browser - doing basically what apache does
                 self.send_response(HTTPStatus.MOVED_PERMANENTLY)
-                new_parts = (parts[0], parts[1], parts[2] + '/',
-                             parts[3], parts[4])
-                new_url = urllib.parse.urlunsplit(new_parts)
                 self.send_header("Location", new_url)
                 self.send_header("Content-Length", "0")
                 self.end_headers()
@@ -881,6 +878,17 @@ def guess_type(self, path):
         return 'application/octet-stream'
 
 
+def _add_trailing_slash(path):
+    """Returns URL with trailing slash on path, if required.  If not required,
+    returns None.
+    """
+    path, _, fragment = path.partition('#')
+    path, _, query = path.partition('?')
+    if path.endswith('/'):
+        return None  # already has slash, no redirect needed
+    return urllib.parse.urlunsplit(('', '', path + '/', query, fragment))
+
+
 # Utilities for CGIHTTPRequestHandler
 
 def _url_collapse_path(path):
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index 1f041aa121f974..4fc2d6a3145f7a 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -334,7 +334,7 @@ class request_handler(NoLogRequestHandler, SimpleHTTPRequestHandler):
         pass
 
     def setUp(self):
-        BaseTestCase.setUp(self)
+        super().setUp()
         self.cwd = os.getcwd()
         basetempdir = tempfile.gettempdir()
         os.chdir(basetempdir)
@@ -362,7 +362,7 @@ def tearDown(self):
             except:
                 pass
         finally:
-            BaseTestCase.tearDown(self)
+            super().tearDown()
 
     def check_status_and_reason(self, response, status, data=None):
         def close_conn():
@@ -418,6 +418,26 @@ def test_undecodable_filename(self):
         self.check_status_and_reason(response, HTTPStatus.OK,
                                      data=os_helper.TESTFN_UNDECODABLE)
 
+    def test_get_dir_redirect_location_domain_injection_bug(self):
+        """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location.
+
+        //domain/ in a Location header is a redirect to a new domain name.
+        https://github.com/python/cpython/issues/87389
+
+        This checks that a path resolving to a directory on our server cannot
+        resolve into a redirect to another server telling it that the
+        directory in question exists on the Referrer server.
+        """
+        os.mkdir(os.path.join(self.tempdir, 'existing_directory'))
+        attack_url = f'//python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory'
+        response = self.request(attack_url)
+        self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
+        location = response.getheader('Location')
+        self.assertFalse(location.startswith('//'), msg=location)
+        self.assertEqual(location, attack_url[1:] + '/',
+                msg='Expected Location: to start with a single / and '
+                'end with a / as this is a directory redirect.')
+
     def test_get(self):
         #constructs the path relative to the root directory of the HTTPServer
         response = self.request(self.base_url + '/test')
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index fd6d9f44c6268c..1c6e811f24786b 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -497,9 +497,15 @@ def urlunsplit(components):
     This may result in a slightly different, but equivalent URL, if the URL that
     was parsed originally had unnecessary delimiters (for example, a ? with an
     empty query; the RFC states that these are equivalent)."""
-    scheme, netloc, url, query, fragment, _coerce_result = (
+    scheme, netloc, path, query, fragment, _coerce_result = (
                                           _coerce_args(*components))
-    if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
+    if scheme in uses_netloc and path.startswith('//'):
+        # gh-87389: avoid confusing a path with multiple leading slashes
+        # as a URI relative reference.
+        url = '/' + path.lstrip('/')
+    else:
+        url = path
+    if netloc or (scheme and scheme in uses_netloc):
         if url and url[:1] != '/': url = '/' + url
         url = '//' + (netloc or '') + url
     if scheme:

From 06b38796c9b553fc3c69573a8bc71ecf4f149862 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 10:52:11 -0700
Subject: [PATCH 02/17] Don't strip slashes if not building relative URL.

---
 Lib/urllib/parse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 1c6e811f24786b..1377421943997e 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -499,7 +499,7 @@ def urlunsplit(components):
     empty query; the RFC states that these are equivalent)."""
     scheme, netloc, path, query, fragment, _coerce_result = (
                                           _coerce_args(*components))
-    if scheme in uses_netloc and path.startswith('//'):
+    if not scheme and not netloc and path.startswith('//'):
         # gh-87389: avoid confusing a path with multiple leading slashes
         # as a URI relative reference.
         url = '/' + path.lstrip('/')

From 00a3a92a8c37d8fadf9317e28afe89c1139c41e2 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 10:52:34 -0700
Subject: [PATCH 03/17] Add unit test for urlunsplit relative URL case.

---
 Lib/test/test_urlparse.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 2f629c72ae784e..bf02961d4a0397 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1101,6 +1101,22 @@ def test_urlsplit_normalization(self):
                         with self.assertRaises(ValueError):
                             urllib.parse.urlsplit(url)
 
+    def test_urlunsplit_relative(self):
+        cases = [
+            # expected result is a scheme/netloc relative URL
+            (('', 'a', '', '', ''), '//a'),
+            # extra leading slashes need to be stripped to avoid confusion
+            # with a relative URL
+            (('', '', '//a', '', ''), '/a'),
+            (('', '', '///a', '', ''), '/a'),
+            # not relative so extra leading slashes don't need stripping since
+            # they don't cause confusion
+            (('http', 'x.y', '//a', '', ''), 'http://x.y//a')
+        ]
+        for parts, result in cases:
+            self.assertEqual(urllib.parse.urlunsplit(parts), result)
+
+
 class Utility_Tests(unittest.TestCase):
     """Testcase to test the various utility functions in the urllib."""
     # In Python 2 this test class was in test_urllib.

From a8e1cc256d4ee7928f45486b625d02adc7d867d0 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 12:06:45 -0700
Subject: [PATCH 04/17] sanitize paths that can be confused as scheme

---
 Lib/test/test_urlparse.py |  6 ++++--
 Lib/urllib/parse.py       | 22 +++++++++++++++++-----
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index bf02961d4a0397..a1d934f08d3212 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1103,7 +1103,7 @@ def test_urlsplit_normalization(self):
 
     def test_urlunsplit_relative(self):
         cases = [
-            # expected result is a scheme/netloc relative URL
+            # expected result is a relative URL without netloc and scheme
             (('', 'a', '', '', ''), '//a'),
             # extra leading slashes need to be stripped to avoid confusion
             # with a relative URL
@@ -1111,7 +1111,9 @@ def test_urlunsplit_relative(self):
             (('', '', '///a', '', ''), '/a'),
             # not relative so extra leading slashes don't need stripping since
             # they don't cause confusion
-            (('http', 'x.y', '//a', '', ''), 'http://x.y//a')
+            (('http', 'x.y', '//a', '', ''), 'http://x.y//a'),
+            # avoid confusion with path containing colon
+            (('', '', 'a:b', '', ''), './a:b'),
         ]
         for parts, result in cases:
             self.assertEqual(urllib.parse.urlunsplit(parts), result)
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 1377421943997e..089358a19792b6 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -491,6 +491,10 @@ def urlunparse(components):
         url = "%s;%s" % (url, params)
     return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))
 
+# Returns true if path can confused with a scheme.  I.e. a relative path
+# without leading dot that includes a colon in the first component.
+_is_scheme_like = re.compile(r'[^/.][^/]*:').match
+
 def urlunsplit(components):
     """Combine the elements of a tuple as returned by urlsplit() into a
     complete URL as a string. The data argument can be any five-item iterable.
@@ -499,13 +503,21 @@ def urlunsplit(components):
     empty query; the RFC states that these are equivalent)."""
     scheme, netloc, path, query, fragment, _coerce_result = (
                                           _coerce_args(*components))
-    if not scheme and not netloc and path.startswith('//'):
-        # gh-87389: avoid confusing a path with multiple leading slashes
-        # as a URI relative reference.
-        url = '/' + path.lstrip('/')
+    if not scheme and not netloc:
+        # Building a relative URI.  Need to be careful that path is not
+        # confused with scheme or netloc.
+        if path.startswith('//'):
+            # gh-87389: don't treat first component of path as netloc
+            url = '/' + path.lstrip('/')
+        elif _is_scheme_like(path):
+            # first component has colon, ensure it will not be parsed as the
+            # scheme
+            url = './' + path
+        else:
+            url = path
     else:
         url = path
-    if netloc or (scheme and scheme in uses_netloc):
+    if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
         if url and url[:1] != '/': url = '/' + url
         url = '//' + (netloc or '') + url
     if scheme:

From 83c8332f23db8e139e8cf9beecfb791fc0bba54e Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 12:14:00 -0700
Subject: [PATCH 05/17] Add blurb.

---
 .../2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst

diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
new file mode 100644
index 00000000000000..6d1e13b1c31a6f
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
@@ -0,0 +1,15 @@
+:mod:`http.server`: Fix an open redirection vulnerability in the HTTP server
+when an URI path starts with ``//``.  Vulnerability discovered, and initial
+fix proposed, by Hamza Avvan.  Change `urllib.parse.urlunsplit()` to
+sanitize `path` argument in order to avoid confusing the first component of
+the path as a net location or scheme.
+
+Co-authored-by: Gregory P. Smith <gps@google.com> #.. section: Core and
+Builtins #.. section: Library #.. section: Documentation #.. section: Tests
+#.. section: Build #.. section: Windows #.. section: macOS #.. section: IDLE
+#.. section: Tools/Demos #.. section: C API
+
+# Write your Misc/NEWS entry below.  It should be a simple ReST paragraph. #
+Don't start with "- Issue #<n>: " or "- gh-issue-<n>: " or that sort of
+stuff.
+###########################################################################

From f99e80b935613e802ade6f9d85e8e8c3595456a5 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 12:22:21 -0700
Subject: [PATCH 06/17] Improve name and comment for _get_redirect_url().

---
 Lib/http/server.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/Lib/http/server.py b/Lib/http/server.py
index 3794b15f8cfd27..834505024322f5 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -678,7 +678,7 @@ def send_head(self):
         path = self.translate_path(self.path)
         f = None
         if os.path.isdir(path):
-            new_url = _add_trailing_slash(self.path)
+            new_url = _get_redirect_url(self.path)
             if new_url:
                 # redirect browser - doing basically what apache does
                 self.send_response(HTTPStatus.MOVED_PERMANENTLY)
@@ -878,10 +878,16 @@ def guess_type(self, path):
         return 'application/octet-stream'
 
 
-def _add_trailing_slash(path):
+def _get_redirect_url(path):
     """Returns URL with trailing slash on path, if required.  If not required,
     returns None.
     """
+    # It would be simpler to use urllib.parse.urlsplit() here.  However, the
+    # 'path' is not truly a URI in that it can't have a scheme or netloc.  We
+    # need to avoid parsing it incorrectly.  For example, as reported in
+    # gh-87389, a path starting with a double slash should not be treated as a
+    # relative URI.  Also, a path with a colon in the first component could
+    # also be parsed wrongly.
     path, _, fragment = path.partition('#')
     path, _, query = path.partition('?')
     if path.endswith('/'):

From e542578a84c40fd4cecdc4929471692e2d49ef3e Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 12:49:35 -0700
Subject: [PATCH 07/17] Add urllib.parse.pathsplit() function.

---
 Doc/library/urllib.parse.rst | 12 ++++++++++++
 Lib/http/server.py           |  8 ++++----
 Lib/urllib/parse.py          | 25 ++++++++++++++++++++++++-
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 1478b34bc95514..d460ef77b35480 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -339,6 +339,18 @@ or on combining URL components into a URL string.
 
 .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
 
+.. function:: pathsplit(abs_path)
+
+   Parse an absolute path that includes an optional query and fragment. Like
+   :func:`urlsplit`, this function returns a 5-item :term:`named tuple`::
+
+      (addressing scheme, network location, path, query, fragment identifier).
+
+   The scheme and network location components will always be empty.
+
+   .. versionadded:: 3.11
+
+
 .. function:: urlunsplit(parts)
 
    Combine the elements of a tuple as returned by :func:`urlsplit` into a
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 834505024322f5..4cf856240a41bb 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -888,11 +888,11 @@ def _get_redirect_url(path):
     # gh-87389, a path starting with a double slash should not be treated as a
     # relative URI.  Also, a path with a colon in the first component could
     # also be parsed wrongly.
-    path, _, fragment = path.partition('#')
-    path, _, query = path.partition('?')
-    if path.endswith('/'):
+    parts = urllib.parse.pathsplit(path)
+    if parts.path.endswith('/'):
         return None  # already has slash, no redirect needed
-    return urllib.parse.urlunsplit(('', '', path + '/', query, fragment))
+    return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query,
+                                    parts.fragment))
 
 
 # Utilities for CGIHTTPRequestHandler
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 089358a19792b6..f088307c5a0608 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -36,7 +36,7 @@
 __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
            "urlsplit", "urlunsplit", "urlencode", "parse_qs",
            "parse_qsl", "quote", "quote_plus", "quote_from_bytes",
-           "unquote", "unquote_plus", "unquote_to_bytes",
+           "unquote", "unquote_plus", "unquote_to_bytes", "pathsplit",
            "DefragResult", "ParseResult", "SplitResult",
            "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
 
@@ -480,6 +480,29 @@ def urlsplit(url, scheme='', allow_fragments=True):
     v = SplitResult(scheme, netloc, url, query, fragment)
     return _coerce_result(v)
 
+# typed=True avoids BytesWarnings being emitted during cache key
+# comparison since this API supports both bytes and str input.
+@functools.lru_cache(typed=True)
+def pathsplit(abs_path):
+    """Parse an absolute path that includes an optional query and fragment.
+    The full syntax is:
+
+    <path>?<query>#<fragment>
+
+    The result is a named 5-tuple with fields set corresponding to the above.
+    It is either a SplitResult or SplitResultBytes object, depending on the
+    type of the url parameter.
+
+    Note that % escapes are not expanded.
+    """
+    abs_path, _coerce_result = _coerce_args(abs_path)
+    for b in _UNSAFE_URL_BYTES_TO_REMOVE:
+        abs_path = abs_path.replace(b, "")
+    abs_path, _, fragment = abs_path.partition('#')
+    abs_path, _, query = abs_path.partition('?')
+    v = SplitResult('', '', abs_path, query, fragment)
+    return _coerce_result(v)
+
 def urlunparse(components):
     """Put a parsed URL back together again.  This may result in a
     slightly different, but equivalent URL, if the URL that was parsed

From b7b0b15ce510151c0ee6d024d2654fbf7c303e33 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 12:58:39 -0700
Subject: [PATCH 08/17] Fix blurb formating, add note about pathsplit().

---
 .../2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst    | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
index 6d1e13b1c31a6f..92ed9a645b839e 100644
--- a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
+++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
@@ -2,14 +2,6 @@
 when an URI path starts with ``//``.  Vulnerability discovered, and initial
 fix proposed, by Hamza Avvan.  Change `urllib.parse.urlunsplit()` to
 sanitize `path` argument in order to avoid confusing the first component of
-the path as a net location or scheme.
+the path as a net location or scheme.  Add `urllib.parse.pathsplit()` function.
 
-Co-authored-by: Gregory P. Smith <gps@google.com> #.. section: Core and
-Builtins #.. section: Library #.. section: Documentation #.. section: Tests
-#.. section: Build #.. section: Windows #.. section: macOS #.. section: IDLE
-#.. section: Tools/Demos #.. section: C API
-
-# Write your Misc/NEWS entry below.  It should be a simple ReST paragraph. #
-Don't start with "- Issue #<n>: " or "- gh-issue-<n>: " or that sort of
-stuff.
-###########################################################################
+Co-authored-by: Gregory P. Smith <gps@google.com>

From 6915331310c67474a0934475e3da8bf49f85c591 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 13:04:24 -0700
Subject: [PATCH 09/17] Improve comments and docs.

The pathsplit() function will work correctly on relative paths too so
don't say "absolute paths".  Improve comment for _get_redirect_url().
---
 Doc/library/urllib.parse.rst |  4 ++--
 Lib/http/server.py           | 12 ++++++------
 Lib/urllib/parse.py          | 14 +++++++-------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index d460ef77b35480..cc38e3c0b2f15c 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -339,9 +339,9 @@ or on combining URL components into a URL string.
 
 .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
 
-.. function:: pathsplit(abs_path)
+.. function:: pathsplit(path)
 
-   Parse an absolute path that includes an optional query and fragment. Like
+   Parse a path that includes an optional query and fragment. Like
    :func:`urlsplit`, this function returns a 5-item :term:`named tuple`::
 
       (addressing scheme, network location, path, query, fragment identifier).
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 4cf856240a41bb..35d28f62bcb545 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -882,12 +882,12 @@ def _get_redirect_url(path):
     """Returns URL with trailing slash on path, if required.  If not required,
     returns None.
     """
-    # It would be simpler to use urllib.parse.urlsplit() here.  However, the
-    # 'path' is not truly a URI in that it can't have a scheme or netloc.  We
-    # need to avoid parsing it incorrectly.  For example, as reported in
-    # gh-87389, a path starting with a double slash should not be treated as a
-    # relative URI.  Also, a path with a colon in the first component could
-    # also be parsed wrongly.
+    # Previous versions of this module used urllib.parse.urlsplit() here.
+    # However, the 'path' is not truly a URI in that it can't have a scheme or
+    # netloc.  We need to avoid parsing it incorrectly.  For example, as
+    # reported in gh-87389, a path starting with a double slash should not be
+    # treated as a relative URI.  Also, a path with a colon in the first
+    # component could also be parsed wrongly.
     parts = urllib.parse.pathsplit(path)
     if parts.path.endswith('/'):
         return None  # already has slash, no redirect needed
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index f088307c5a0608..32d9cdd5e2f444 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -483,8 +483,8 @@ def urlsplit(url, scheme='', allow_fragments=True):
 # typed=True avoids BytesWarnings being emitted during cache key
 # comparison since this API supports both bytes and str input.
 @functools.lru_cache(typed=True)
-def pathsplit(abs_path):
-    """Parse an absolute path that includes an optional query and fragment.
+def pathsplit(path):
+    """Parse a path that includes an optional query and fragment.
     The full syntax is:
 
     <path>?<query>#<fragment>
@@ -495,12 +495,12 @@ def pathsplit(abs_path):
 
     Note that % escapes are not expanded.
     """
-    abs_path, _coerce_result = _coerce_args(abs_path)
+    path, _coerce_result = _coerce_args(path)
     for b in _UNSAFE_URL_BYTES_TO_REMOVE:
-        abs_path = abs_path.replace(b, "")
-    abs_path, _, fragment = abs_path.partition('#')
-    abs_path, _, query = abs_path.partition('?')
-    v = SplitResult('', '', abs_path, query, fragment)
+        path = path.replace(b, "")
+    path, _, fragment = path.partition('#')
+    path, _, query = path.partition('?')
+    v = SplitResult('', '', path, query, fragment)
     return _coerce_result(v)
 
 def urlunparse(components):

From 915451cd21f48929f275da140aaee07762e7449c Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 13:12:08 -0700
Subject: [PATCH 10/17] Add basic unit test for pathsplit().

---
 Lib/test/test_urlparse.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index a1d934f08d3212..398497d2a2e7f6 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1118,6 +1118,17 @@ def test_urlunsplit_relative(self):
         for parts, result in cases:
             self.assertEqual(urllib.parse.urlunsplit(parts), result)
 
+    def test_pathsplit(self):
+        cases = [
+            ('//a', ('', '', '//a', '', '')),
+            ('a:b', ('', '', 'a:b', '', '')),
+            ('/a/b?x#y', ('', '', '/a/b', 'x', 'y')),
+            ('/a/b#y', ('', '', '/a/b', '', 'y')),
+            ('/a/b?x', ('', '', '/a/b', 'x', '')),
+        ]
+        for uri, result in cases:
+            self.assertEqual(urllib.parse.pathsplit(uri), result)
+
 
 class Utility_Tests(unittest.TestCase):
     """Testcase to test the various utility functions in the urllib."""

From 952a0f46d19d0163fed3a3e4178243b07750feb4 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 13:14:41 -0700
Subject: [PATCH 11/17] Fix markup in blurb.

---
 .../Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
index 92ed9a645b839e..a4f57f8d27b037 100644
--- a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
+++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
@@ -1,7 +1,8 @@
 :mod:`http.server`: Fix an open redirection vulnerability in the HTTP server
 when an URI path starts with ``//``.  Vulnerability discovered, and initial
-fix proposed, by Hamza Avvan.  Change `urllib.parse.urlunsplit()` to
-sanitize `path` argument in order to avoid confusing the first component of
-the path as a net location or scheme.  Add `urllib.parse.pathsplit()` function.
+fix proposed, by Hamza Avvan.  Change :func:`urllib.parse.urlunsplit` to
+sanitize ``path`` argument in order to avoid confusing the first component of
+the path as a net location or scheme.  Add :func:`urllib.parse.pathsplit`
+function.
 
 Co-authored-by: Gregory P. Smith <gps@google.com>

From 899f512ee3c6618e881c17f4ef8b6173528d1403 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 14:11:38 -0700
Subject: [PATCH 12/17] Use pathsplit() in translate_path().

This avoids "manual parsing" of the Request-URI part of the request and
matches what _get_redirect_url() does.
---
 Lib/http/server.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Lib/http/server.py b/Lib/http/server.py
index 35d28f62bcb545..14561d9423579b 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -814,9 +814,8 @@ def translate_path(self, path):
         probably be diagnosed.)
 
         """
-        # abandon query parameters
-        path = path.split('?',1)[0]
-        path = path.split('#',1)[0]
+        # extract only path, abandon query parameters and fragment
+        path = urllib.parse.pathsplit(path).path
         # Don't forget explicit trailing slash when normalizing. Issue17324
         trailing_slash = path.rstrip().endswith('/')
         try:

From a00656c572c121e131808c18c16ae8f05f2b0be4 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 14:13:16 -0700
Subject: [PATCH 13/17] Improved unit tests from gps.

---
 Lib/test/test_httpservers.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index 4fc2d6a3145f7a..cec060ffcccb8c 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -421,23 +421,38 @@ def test_undecodable_filename(self):
     def test_get_dir_redirect_location_domain_injection_bug(self):
         """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location.
 
-        //domain/ in a Location header is a redirect to a new domain name.
+        //netloc/ in a Location header is a redirect to a new host.
         https://github.com/python/cpython/issues/87389
 
         This checks that a path resolving to a directory on our server cannot
-        resolve into a redirect to another server telling it that the
-        directory in question exists on the Referrer server.
+        resolve into a redirect to another server.
         """
         os.mkdir(os.path.join(self.tempdir, 'existing_directory'))
-        attack_url = f'//python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory'
+        url = f'/python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory'
+        expected_location = f'{url}/'  # /python.org.../ single slash single prefix, trailing slash
+        # Canonicalizes to /tmp/tempdir_name/existing_directory which does
+        # exist and is a dir, triggering the 301 redirect logic.
+        response = self.request(url)
+        self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
+        location = response.getheader('Location')
+        self.assertEqual(location, expected_location, msg='non-attack failed!')
+
+        # //python.org... multi-slash prefix, no trailing slash
+        attack_url = f'/{url}'
         response = self.request(attack_url)
         self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
         location = response.getheader('Location')
         self.assertFalse(location.startswith('//'), msg=location)
-        self.assertEqual(location, attack_url[1:] + '/',
-                msg='Expected Location: to start with a single / and '
+        self.assertEqual(location, expected_location,
+                msg='Expected Location header to start with a single / and '
                 'end with a / as this is a directory redirect.')
 
+        # ///python.org... triple-slash prefix, no trailing slash
+        attack3_url = f'//{url}'
+        response = self.request(attack3_url)
+        self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
+        self.assertEqual(response.getheader('Location'), expected_location)
+
     def test_get(self):
         #constructs the path relative to the root directory of the HTTPServer
         response = self.request(self.base_url + '/test')

From 89858530bc6b4efeba195578f92e943d92e4b046 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 14:13:39 -0700
Subject: [PATCH 14/17] Add test for Request-URI containing scheme.

---
 Lib/test/test_httpservers.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index cec060ffcccb8c..dac2f80f6a0df1 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -453,6 +453,21 @@ def test_get_dir_redirect_location_domain_injection_bug(self):
         self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
         self.assertEqual(response.getheader('Location'), expected_location)
 
+        # If the second word in the http request (Request-URI for the http
+        # method) has a scheme and netloc, it still gets treated as an
+        # absolute path by the server.  In that case, the redirect is
+        # constructed so it is parsed as a path.  The './' part of the path
+        # is added by urlunsplit() so that the 'https:' part of what is being
+        # treated as a path is not treated as a scheme in the redirect
+        # location.  http.server is not a proxy and doesn't handle Request-URI
+        # being an absolute URI with a scheme and or netloc.
+        attack_scheme_netloc_2slash_url = f'https://pypi.org/{url}'
+        expected_location = f'./{attack_scheme_netloc_2slash_url}/'
+        response = self.request(attack_scheme_netloc_2slash_url)
+        self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
+        location = response.getheader('Location')
+        self.assertEqual(location, expected_location)
+
     def test_get(self):
         #constructs the path relative to the root directory of the HTTPServer
         response = self.request(self.base_url + '/test')

From f1f94aea419921860153853a7b568ee07659ea7e Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 14:24:04 -0700
Subject: [PATCH 15/17] Make _get_redirect_url() into a method.

Possible that someone could override this so a method is nicer.
---
 Lib/http/server.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/Lib/http/server.py b/Lib/http/server.py
index 14561d9423579b..6fe16fc4df355e 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -664,6 +664,23 @@ def do_HEAD(self):
         if f:
             f.close()
 
+    def _get_redirect_url_for_dir(self):
+        """Returns URL with trailing slash on path, if required.  If not
+        required, returns None.
+        """
+        # Previous versions of this class used urllib.parse.urlsplit() here.
+        # However, the 'path' is being treated as a local filesystem path and
+        # it can't have a scheme or netloc.  We need to avoid parsing it
+        # incorrectly.  For example, as reported in gh-87389, a path starting
+        # with a double slash should not be treated as a relative URI.  Also, a
+        # path with a colon in the first component could also be parsed
+        # wrongly.
+        parts = urllib.parse.pathsplit(self.path)
+        if parts.path.endswith('/'):
+            return None  # already has slash, no redirect needed
+        return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query,
+                                        parts.fragment))
+
     def send_head(self):
         """Common code for GET and HEAD commands.
 
@@ -678,7 +695,7 @@ def send_head(self):
         path = self.translate_path(self.path)
         f = None
         if os.path.isdir(path):
-            new_url = _get_redirect_url(self.path)
+            new_url = self._get_redirect_url_for_dir()
             if new_url:
                 # redirect browser - doing basically what apache does
                 self.send_response(HTTPStatus.MOVED_PERMANENTLY)
@@ -877,23 +894,6 @@ def guess_type(self, path):
         return 'application/octet-stream'
 
 
-def _get_redirect_url(path):
-    """Returns URL with trailing slash on path, if required.  If not required,
-    returns None.
-    """
-    # Previous versions of this module used urllib.parse.urlsplit() here.
-    # However, the 'path' is not truly a URI in that it can't have a scheme or
-    # netloc.  We need to avoid parsing it incorrectly.  For example, as
-    # reported in gh-87389, a path starting with a double slash should not be
-    # treated as a relative URI.  Also, a path with a colon in the first
-    # component could also be parsed wrongly.
-    parts = urllib.parse.pathsplit(path)
-    if parts.path.endswith('/'):
-        return None  # already has slash, no redirect needed
-    return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query,
-                                    parts.fragment))
-
-
 # Utilities for CGIHTTPRequestHandler
 
 def _url_collapse_path(path):

From 8a34cd002fef6139157aed390061d4450f4cb636 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Thu, 16 Jun 2022 15:40:39 -0700
Subject: [PATCH 16/17] Futher cleanups, remove urllib.parse.pathsplit().

Since pathsplit() doesn't seem like a generally useful public API,
remove it.  Instead, add a _request_path_split() method.  This ensures
that the redirect logic and the translate_path() method use the same
path parsing.
---
 Doc/library/urllib.parse.rst                  | 12 ---------
 Lib/http/server.py                            | 14 +++++++++--
 Lib/test/test_urlparse.py                     | 14 ++---------
 Lib/urllib/parse.py                           | 25 +------------------
 ...2-06-16-12-13-55.gh-issue-87389.MS9wAR.rst |  5 ++--
 5 files changed, 17 insertions(+), 53 deletions(-)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index cc38e3c0b2f15c..1478b34bc95514 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -339,18 +339,6 @@ or on combining URL components into a URL string.
 
 .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
 
-.. function:: pathsplit(path)
-
-   Parse a path that includes an optional query and fragment. Like
-   :func:`urlsplit`, this function returns a 5-item :term:`named tuple`::
-
-      (addressing scheme, network location, path, query, fragment identifier).
-
-   The scheme and network location components will always be empty.
-
-   .. versionadded:: 3.11
-
-
 .. function:: urlunsplit(parts)
 
    Combine the elements of a tuple as returned by :func:`urlsplit` into a
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 6fe16fc4df355e..81295efc42bb15 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -664,6 +664,16 @@ def do_HEAD(self):
         if f:
             f.close()
 
+    def _request_path_split(self, path):
+        """Parse a path that can include an optional query and fragment.
+        """
+        # We only handle the 'abs_path' case for the Request-URI part of the
+        # request line (the second word).  We don't handle the case of a URL
+        # containing a scheme or netloc.
+        path, _, query = path.partition('?')
+        path, _, fragment = path.partition('#')
+        return urllib.parse.SplitResult('', '', path, query, fragment)
+
     def _get_redirect_url_for_dir(self):
         """Returns URL with trailing slash on path, if required.  If not
         required, returns None.
@@ -675,7 +685,7 @@ def _get_redirect_url_for_dir(self):
         # with a double slash should not be treated as a relative URI.  Also, a
         # path with a colon in the first component could also be parsed
         # wrongly.
-        parts = urllib.parse.pathsplit(self.path)
+        parts = self._request_path_split(self.path)
         if parts.path.endswith('/'):
             return None  # already has slash, no redirect needed
         return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query,
@@ -832,7 +842,7 @@ def translate_path(self, path):
 
         """
         # extract only path, abandon query parameters and fragment
-        path = urllib.parse.pathsplit(path).path
+        path = self._request_path_split(path).path
         # Don't forget explicit trailing slash when normalizing. Issue17324
         trailing_slash = path.rstrip().endswith('/')
         try:
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 398497d2a2e7f6..3782d4cdd800e4 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1116,18 +1116,8 @@ def test_urlunsplit_relative(self):
             (('', '', 'a:b', '', ''), './a:b'),
         ]
         for parts, result in cases:
-            self.assertEqual(urllib.parse.urlunsplit(parts), result)
-
-    def test_pathsplit(self):
-        cases = [
-            ('//a', ('', '', '//a', '', '')),
-            ('a:b', ('', '', 'a:b', '', '')),
-            ('/a/b?x#y', ('', '', '/a/b', 'x', 'y')),
-            ('/a/b#y', ('', '', '/a/b', '', 'y')),
-            ('/a/b?x', ('', '', '/a/b', 'x', '')),
-        ]
-        for uri, result in cases:
-            self.assertEqual(urllib.parse.pathsplit(uri), result)
+            self.assertEqual(urllib.parse.urlunsplit(parts), result,
+                             msg=f'{parts=}')
 
 
 class Utility_Tests(unittest.TestCase):
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 32d9cdd5e2f444..089358a19792b6 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -36,7 +36,7 @@
 __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
            "urlsplit", "urlunsplit", "urlencode", "parse_qs",
            "parse_qsl", "quote", "quote_plus", "quote_from_bytes",
-           "unquote", "unquote_plus", "unquote_to_bytes", "pathsplit",
+           "unquote", "unquote_plus", "unquote_to_bytes",
            "DefragResult", "ParseResult", "SplitResult",
            "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
 
@@ -480,29 +480,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
     v = SplitResult(scheme, netloc, url, query, fragment)
     return _coerce_result(v)
 
-# typed=True avoids BytesWarnings being emitted during cache key
-# comparison since this API supports both bytes and str input.
-@functools.lru_cache(typed=True)
-def pathsplit(path):
-    """Parse a path that includes an optional query and fragment.
-    The full syntax is:
-
-    <path>?<query>#<fragment>
-
-    The result is a named 5-tuple with fields set corresponding to the above.
-    It is either a SplitResult or SplitResultBytes object, depending on the
-    type of the url parameter.
-
-    Note that % escapes are not expanded.
-    """
-    path, _coerce_result = _coerce_args(path)
-    for b in _UNSAFE_URL_BYTES_TO_REMOVE:
-        path = path.replace(b, "")
-    path, _, fragment = path.partition('#')
-    path, _, query = path.partition('?')
-    v = SplitResult('', '', path, query, fragment)
-    return _coerce_result(v)
-
 def urlunparse(components):
     """Put a parsed URL back together again.  This may result in a
     slightly different, but equivalent URL, if the URL that was parsed
diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
index a4f57f8d27b037..15117f362792fe 100644
--- a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
+++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
@@ -2,7 +2,6 @@
 when an URI path starts with ``//``.  Vulnerability discovered, and initial
 fix proposed, by Hamza Avvan.  Change :func:`urllib.parse.urlunsplit` to
 sanitize ``path`` argument in order to avoid confusing the first component of
-the path as a net location or scheme.  Add :func:`urllib.parse.pathsplit`
-function.
+the path as a net location or scheme.
 
-Co-authored-by: Gregory P. Smith <gps@google.com>
+Co-authored-by: Gregory P. Smith [Google LLC] <gps@google.com>

From 23d4b563fb6903650de053195093819ce829ab03 Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Sun, 3 Jul 2022 12:16:34 -0700
Subject: [PATCH 17/17] reword news

---
 .../2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst      | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
index 15117f362792fe..9dbf777bf23534 100644
--- a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
+++ b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
@@ -1,7 +1,5 @@
-:mod:`http.server`: Fix an open redirection vulnerability in the HTTP server
-when an URI path starts with ``//``.  Vulnerability discovered, and initial
-fix proposed, by Hamza Avvan.  Change :func:`urllib.parse.urlunsplit` to
-sanitize ``path`` argument in order to avoid confusing the first component of
-the path as a net location or scheme.
+Change :func:`urllib.parse.urlunsplit` to sanitize ``path`` argument in order
+to avoid confusing the first component of the path as a net location or
+scheme.
 
-Co-authored-by: Gregory P. Smith [Google LLC] <gps@google.com>
+Co-authored-by: Gregory P. Smith <greg@krypto.org> [Google]