diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index ff903fa4f6c..3a184550da8 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,6 +3813,35 @@ and the number of workers to use. .. versionadded:: 7.3 +.. confval:: linkcheck_case_insensitive + :type: :code-py:`bool` + :default: :code-py:`False` + + When :code-py:`True`, the *linkcheck* builder will compare URL paths + case-insensitively when checking for redirects. + This is useful for checking links on case-insensitive servers + (for example, GitHub, Windows-based servers, or certain hosting platforms) + that may return URLs with different case than the original link. + + When enabled, URL paths like ``/Path`` and ``/path`` are considered + equivalent, preventing false-positive redirect warnings on + case-insensitive servers. + + .. note:: + + This option only affects URL path comparison for redirect detection. + HTML anchor checking remains case-sensitive to match browser behavior, + where fragment identifiers (``#anchor``) are case-sensitive per the + HTML specification. + + Example: + + .. code-block:: python + + linkcheck_case_insensitive = True + + .. versionadded:: 8.2 + .. confval:: linkcheck_rate_limit_timeout :type: :code-py:`int` :default: :code-py:`300` diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index d3ce638fea4..ac803a5cac4 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -409,6 +409,7 @@ def __init__( self.user_agent = config.user_agent self.tls_verify = config.tls_verify self.tls_cacerts = config.tls_cacerts + self.case_insensitive = config.linkcheck_case_insensitive self._session = requests._Session( _ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore)) @@ -629,8 +630,19 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: netloc = urlsplit(req_url).netloc self.rate_limits.pop(netloc, None) + # Compare URLs, optionally case-insensitively + def _normalise_url(url: str) -> str: + """Reduces a URL to a normal/equality-comparable form.""" + normalised_url = url.rstrip('/') + if self.case_insensitive: + normalised_url = normalised_url.casefold() + return normalised_url + + normalised_request_url = _normalise_url(req_url) + normalised_response_url = _normalise_url(response_url) + if ( - (response_url.rstrip('/') == req_url.rstrip('/')) + normalised_request_url == normalised_response_url or _allowed_redirect(req_url, response_url, self.allowed_redirects) ): # fmt: skip return _Status.WORKING, '', 0 @@ -816,6 +828,9 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) + app.add_config_value( + 'linkcheck_case_insensitive', False, '', types=frozenset({bool}) + ) app.add_event('linkcheck-process-uri') diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index a09a4a42216..9ea282f0a63 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1439,3 +1439,119 @@ def test_linkcheck_exclude_documents(app: SphinxTestApp) -> None: 'uri': 'https://www.sphinx-doc.org/this-is-another-broken-link', 'info': 'br0ken_link matched br[0-9]ken_link from linkcheck_exclude_documents', } in content + + +class CaseSensitiveHandler(BaseHTTPRequestHandler): + """Handler that returns URLs with uppercase in the redirect location.""" + + protocol_version = 'HTTP/1.1' + + def do_HEAD(self): + # Simulate a server that returns URLs with different case + if self.path == '/path': + # Return the path with uppercase + self.send_response(200, 'OK') + # Simulate the response URL being in uppercase + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/anchor.html': + self.send_response(200, 'OK') + self.send_header('Content-Length', '0') + self.end_headers() + else: + self.send_response(404, 'Not Found') + self.send_header('Content-Length', '0') + self.end_headers() + + def do_GET(self): + if self.path == '/path': + content = b'ok\n\n' + self.send_response(200, 'OK') + self.send_header('Content-Length', str(len(content))) + self.end_headers() + self.wfile.write(content) + elif self.path == '/anchor.html': + # HTML with anchor in mixed case + doc = '
' + content = doc.encode('utf-8') + self.send_response(200, 'OK') + self.send_header('Content-Length', str(len(content))) + self.end_headers() + self.wfile.write(content) + else: + self.send_response(404, 'Not Found') + self.send_header('Content-Length', '0') + self.end_headers() + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver', + freshenv=True, + confoverrides={'linkcheck_case_insensitive': False}, +) +def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: + """Test that case-sensitive checking is the default behavior.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Monkey-patch the session to change the response URL to uppercase + # to simulate a case-insensitive server + from unittest.mock import patch + + original_request = requests._Session.request + + def mock_request(self, method, url, **kwargs): + response = original_request(self, method, url, **kwargs) + # Change the URL to uppercase to simulate server behavior + if '/path' in str(response.url).lower(): + response.url = str(response.url).replace('/path', '/PATH') + return response + + with patch.object(requests._Session, 'request', mock_request): + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + rowsby = {row['uri']: row for row in rows} + + # With case-sensitive checking, a URL that redirects to different case + # should be marked as redirected + lowercase_uri = f'http://{address}/path' + if lowercase_uri in rowsby: + # Should be redirected because case doesn't match + assert rowsby[lowercase_uri]['status'] == 'redirected' + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver', + freshenv=True, + confoverrides={'linkcheck_case_insensitive': True}, +) +def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: + """Test that linkcheck_case_insensitive=True ignores case differences in URLs.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Monkey-patch the session to change the response URL to uppercase + from unittest.mock import patch + + original_request = requests._Session.request + + def mock_request(self, method, url, **kwargs): + response = original_request(self, method, url, **kwargs) + # Change the URL to uppercase to simulate server behavior + if '/path' in str(response.url).lower(): + response.url = str(response.url).replace('/path', '/PATH') + return response + + with patch.object(requests._Session, 'request', mock_request): + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + rowsby = {row['uri']: row for row in rows} + + # With case-insensitive checking, a URL that differs only in case + # should be marked as working + lowercase_uri = f'http://{address}/path' + if lowercase_uri in rowsby: + # Should be working because case is ignored + assert rowsby[lowercase_uri]['status'] == 'working'