From 0a32659bc852ce8d4113d1cf8440933759dd0066 Mon Sep 17 00:00:00 2001 From: Ryan Lovett Date: Mon, 3 Nov 2025 12:00:38 -0800 Subject: [PATCH 1/3] Rewrite Location header in redirect responses. Preserve the proxy prefix in the path info when the proxied server sends the client to a location without a scheme or host. --- jupyter_server_proxy/handlers.py | 45 +++++++++++++++++ tests/resources/jupyter_server_config.py | 3 ++ tests/resources/redirectserver.py | 62 ++++++++++++++++++++++++ tests/test_proxies.py | 30 ++++++++++++ 4 files changed, 140 insertions(+) create mode 100644 tests/resources/redirectserver.py diff --git a/jupyter_server_proxy/handlers.py b/jupyter_server_proxy/handlers.py index a5987fc8..a051d997 100644 --- a/jupyter_server_proxy/handlers.py +++ b/jupyter_server_proxy/handlers.py @@ -257,6 +257,42 @@ def _get_context_path(self, host, port): else: return url_path_join(self.base_url, "proxy", host_and_port) + def _rewrite_location_header(self, location, host, port, proxied_path): + """ + Rewrite Location header in redirect responses to preserve the proxy prefix. + + When a backend server issues a redirect, the Location header typically contains + a path relative to the backend's root. We need to prepend the proxy prefix so + the browser navigates to the correct proxied URL. + + For example: + - Original Location: /subdir/ + - Proxy context: /user/{username}/proxy/9000 + - Rewritten Location: /user/{username}/proxy/9000/subdir/ + """ + # Parse the location header + parsed = urlparse(location) + + # Only rewrite if the location is a relative path (no scheme or host) + if parsed.scheme or parsed.netloc: + # Absolute URL - leave as is + self.log.debug(f"Not rewriting absolute Location header: {location}") + return location + + # Get the proxy context path + context_path = self._get_context_path(host, port) + + # Rewrite the path to include the proxy prefix + new_path = url_path_join(context_path, parsed.path) + + # Reconstruct the location with the rewritten path + rewritten = parsed._replace(path=new_path) + + rewritten_location = urlunparse(rewritten) + self.log.info(f"Rewrote Location header: {location} -> {rewritten_location}") + + return rewritten_location + def get_client_uri(self, protocol, host, port, proxied_path): if self.absolute_url: context_path = self._get_context_path(host, port) @@ -542,6 +578,15 @@ def rewrite_pe(rewritable_response: RewritableResponse): self._headers = httputil.HTTPHeaders() for header, v in rewritten_response.headers.get_all(): if header not in ("Content-Length", "Transfer-Encoding", "Connection"): + # Rewrite Location header in redirects to preserve proxy prefix + if header == "Location" and rewritten_response.code in ( + 301, + 302, + 303, + 307, + 308, + ): + v = self._rewrite_location_header(v, host, port, proxied_path) # some header appear multiple times, eg 'Set-Cookie' self.add_header(header, v) diff --git a/tests/resources/jupyter_server_config.py b/tests/resources/jupyter_server_config.py index ac1e0dfe..9620e93e 100644 --- a/tests/resources/jupyter_server_config.py +++ b/tests/resources/jupyter_server_config.py @@ -143,6 +143,9 @@ def my_env(): "unix_socket": True, "raw_socket_proxy": True, }, + "python-redirect": { + "command": [sys.executable, _get_path("redirectserver.py"), "--port={port}"], + }, } c.ServerProxy.non_service_rewrite_response = hello_to_foo diff --git a/tests/resources/redirectserver.py b/tests/resources/redirectserver.py new file mode 100644 index 00000000..64dafc9f --- /dev/null +++ b/tests/resources/redirectserver.py @@ -0,0 +1,62 @@ +""" +Simple webserver that returns 301 redirects to test Location header rewriting. +""" + +import argparse +from http.server import BaseHTTPRequestHandler, HTTPServer +from urllib.parse import urlparse, urlunparse + + +class RedirectHandler(BaseHTTPRequestHandler): + """Handler that returns 301 redirects with relative Location headers.""" + + def do_GET(self): + """ + Handle GET requests: + - Requests without trailing slash: 301 redirect to path with trailing slash + - Requests with trailing slash: 200 OK + - /redirect-to/target: 301 redirect to /target + """ + # Parse the path to separate path and query string + parsed = urlparse(self.path) + path = parsed.path + query = parsed.query + + if path.startswith("/redirect-to/"): + # Extract the target path (remove /redirect-to prefix) + target = path[len("/redirect-to"):] + # Preserve query string if present + if query: + target = f"{target}?{query}" + self.send_response(301) + self.send_header("Location", target) + self.send_header("Content-type", "text/plain") + self.end_headers() + self.wfile.write(b"Redirecting...\n") + elif not path.endswith("/"): + # Add trailing slash, preserve query string + new_path = path + "/" + if query: + new_location = f"{new_path}?{query}" + else: + new_location = new_path + self.send_response(301) + self.send_header("Location", new_location) + self.send_header("Content-type", "text/plain") + self.end_headers() + self.wfile.write(b"Redirecting...\n") + else: + # Normal response + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.end_headers() + self.wfile.write(f"Success: {self.path}\n".encode()) + + +if __name__ == "__main__": + ap = argparse.ArgumentParser() + ap.add_argument("--port", type=int, required=True) + args = ap.parse_args() + + httpd = HTTPServer(("127.0.0.1", args.port), RedirectHandler) + httpd.serve_forever() diff --git a/tests/test_proxies.py b/tests/test_proxies.py index 45f6418f..2517f892 100644 --- a/tests/test_proxies.py +++ b/tests/test_proxies.py @@ -528,3 +528,33 @@ async def test_server_proxy_rawsocket( await conn.write_message(msg) res = await conn.read_message() assert res == msg.swapcase() + + +def test_server_proxy_redirect_location_header_rewrite( + a_server_port_and_token: Tuple[int, str], +) -> None: + """ + Test that Location headers in redirect responses are rewritten to include + the proxy prefix. + + This can happen when servers like python's http.server issue 301 + redirects with relative Location headers (e.g., /subdir/) that don't + include the proxy prefix, causing 404 errors. + """ + PORT, TOKEN = a_server_port_and_token + + # Test 1: Named server proxy - redirect without trailing slash + r = request_get(PORT, "/python-redirect/mydir", TOKEN) + assert r.code == 301 + location = r.headers.get("Location") + # Should be rewritten to include the proxy prefix + # The token query parameter should be preserved in the redirect + assert location == f"/python-redirect/mydir/?token={TOKEN}" + + # Test 2: Named server proxy - explicit redirect-to endpoint + r = request_get(PORT, "/python-redirect/redirect-to/target/path", TOKEN) + assert r.code == 301 + location = r.headers.get("Location") + # Should be rewritten to include the proxy prefix + # The token query parameter should be preserved in the redirect + assert location == f"/python-redirect/target/path?token={TOKEN}" From af200fb7a3e5a71e24542e0f6445f255470de717 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Nov 2025 21:39:42 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/resources/redirectserver.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/resources/redirectserver.py b/tests/resources/redirectserver.py index 64dafc9f..00a829ab 100644 --- a/tests/resources/redirectserver.py +++ b/tests/resources/redirectserver.py @@ -4,12 +4,12 @@ import argparse from http.server import BaseHTTPRequestHandler, HTTPServer -from urllib.parse import urlparse, urlunparse +from urllib.parse import urlparse class RedirectHandler(BaseHTTPRequestHandler): """Handler that returns 301 redirects with relative Location headers.""" - + def do_GET(self): """ Handle GET requests: @@ -21,10 +21,10 @@ def do_GET(self): parsed = urlparse(self.path) path = parsed.path query = parsed.query - + if path.startswith("/redirect-to/"): # Extract the target path (remove /redirect-to prefix) - target = path[len("/redirect-to"):] + target = path[len("/redirect-to") :] # Preserve query string if present if query: target = f"{target}?{query}" @@ -57,6 +57,6 @@ def do_GET(self): ap = argparse.ArgumentParser() ap.add_argument("--port", type=int, required=True) args = ap.parse_args() - + httpd = HTTPServer(("127.0.0.1", args.port), RedirectHandler) httpd.serve_forever() From 5fae67601ffbfb678118abf733ea4a30d2ef4d7d Mon Sep 17 00:00:00 2001 From: Ryan Lovett Date: Mon, 3 Nov 2025 21:51:06 -0800 Subject: [PATCH 3/3] Don't rewrite Location when absolute_url is True. --- jupyter_server_proxy/handlers.py | 14 +++++----- tests/resources/jupyter_server_config.py | 4 +++ tests/test_proxies.py | 35 ++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/jupyter_server_proxy/handlers.py b/jupyter_server_proxy/handlers.py index a051d997..4a3f4e56 100644 --- a/jupyter_server_proxy/handlers.py +++ b/jupyter_server_proxy/handlers.py @@ -578,13 +578,13 @@ def rewrite_pe(rewritable_response: RewritableResponse): self._headers = httputil.HTTPHeaders() for header, v in rewritten_response.headers.get_all(): if header not in ("Content-Length", "Transfer-Encoding", "Connection"): - # Rewrite Location header in redirects to preserve proxy prefix - if header == "Location" and rewritten_response.code in ( - 301, - 302, - 303, - 307, - 308, + # Rewrite Location header in redirects to preserve proxy prefix. + # If absolute_url is True, the backend already sees the + # full path and handles redirects appropriately. + if ( + header == "Location" + and not self.absolute_url + and rewritten_response.code in (301, 302, 303, 307, 308) ): v = self._rewrite_location_header(v, host, port, proxied_path) # some header appear multiple times, eg 'Set-Cookie' diff --git a/tests/resources/jupyter_server_config.py b/tests/resources/jupyter_server_config.py index 9620e93e..c1c197c0 100644 --- a/tests/resources/jupyter_server_config.py +++ b/tests/resources/jupyter_server_config.py @@ -146,6 +146,10 @@ def my_env(): "python-redirect": { "command": [sys.executable, _get_path("redirectserver.py"), "--port={port}"], }, + "python-redirect-abs": { + "command": [sys.executable, _get_path("redirectserver.py"), "--port={port}"], + "absolute_url": True, + }, } c.ServerProxy.non_service_rewrite_response = hello_to_foo diff --git a/tests/test_proxies.py b/tests/test_proxies.py index 2517f892..5053e4b9 100644 --- a/tests/test_proxies.py +++ b/tests/test_proxies.py @@ -558,3 +558,38 @@ def test_server_proxy_redirect_location_header_rewrite( # Should be rewritten to include the proxy prefix # The token query parameter should be preserved in the redirect assert location == f"/python-redirect/target/path?token={TOKEN}" + + +@pytest.mark.parametrize("a_server", ["notebook", "lab"], indirect=True) +def test_server_proxy_redirect_location_header_absolute_url( + a_server_port_and_token: Tuple[int, str], +) -> None: + """ + Test that Location headers in redirect responses are not rewritten when + absolute_url=True is configured. + + When absolute_url=True, the backend server receives the full proxy path + (e.g., /python-redirect-abs/mydir instead of just /mydir). The proxy does + not rewrite Location headers, passing them through as-is from the backend. + + This means the backend must be aware of the proxy prefix to generate + correct redirects, which is the intended behavior of absolute_url=True. + """ + PORT, TOKEN = a_server_port_and_token + + # Test 1: Named server proxy with absolute_url=True, redirect without trailing slash + r = request_get(PORT, "/python-redirect-abs/mydir", TOKEN) + assert r.code == 301 + location = r.headers.get("Location") + # Location header is not rewritten by proxy, passed through as-is from backend + # Backend sees /python-redirect-abs/mydir and adds trailing slash: /python-redirect-abs/mydir/ + assert location == f"/python-redirect-abs/mydir/?token={TOKEN}" + + # Test 2: Named server proxy with absolute_url=True, verify no rewriting occurs + # Request to /python-redirect-abs/abc (without trailing slash) + r = request_get(PORT, "/python-redirect-abs/abc", TOKEN) + assert r.code == 301 + location = r.headers.get("Location") + # Backend returns whatever it wants, proxy doesn't rewrite it + # In this case, backend adds trailing slash to the full path it received + assert location == f"/python-redirect-abs/abc/?token={TOKEN}"