From 63e108cab004db47dc4f56ea34acdcce4a1fa292 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:24:00 +0530 Subject: [PATCH 01/26] Update configuration.rst --- doc/usage/configuration.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index ff903fa4f6c..98447777b9a 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,6 +3813,33 @@ and the number of workers to use. .. versionadded:: 7.3 +.. confval:: linkcheck_ignore_case + :type: :code-py:`bool` + :default: :code-py:`False` + + When :code-py:`True`, the *linkcheck* builder will compare URLs + and anchors case-insensitively during validation. + This is useful for checking links on case-insensitive servers + (for example, some web servers or hosting platforms) + that may return URLs with different case than the original link. + + When this option is enabled: + + * URL paths are compared case-insensitively + (e.g., ``/Path`` and ``/path`` are considered equal) + * HTML anchors are compared case-insensitively + (e.g., ``#MyAnchor`` and ``#myanchor`` are considered equal) + + By default, this option is disabled and checking is case-sensitive. + + Example: + + .. code-block:: python + + linkcheck_ignore_case = True + + .. versionadded:: 8.2 + .. confval:: linkcheck_rate_limit_timeout :type: :code-py:`int` :default: :code-py:`300` From caae7eb7e09a77d94f41c67460a2eadfcfb869e8 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:25:04 +0530 Subject: [PATCH 02/26] Add linkcheck_ignore_case config option --- sphinx/builders/linkcheck.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index d3ce638fea4..30656798984 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -409,6 +409,7 @@ def __init__( self.user_agent = config.user_agent self.tls_verify = config.tls_verify self.tls_cacerts = config.tls_cacerts + self.ignore_case = config.linkcheck_ignore_case self._session = requests._Session( _ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore)) @@ -545,7 +546,9 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: ) as response: if anchor and self.check_anchors and response.ok: try: - found = contains_anchor(response, anchor) + found = contains_anchor( + response, anchor, ignore_case=self.ignore_case + ) except UnicodeDecodeError: return ( _Status.IGNORED, @@ -629,8 +632,16 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: netloc = urlsplit(req_url).netloc self.rate_limits.pop(netloc, None) + # Compare URLs, optionally case-insensitively + response_url_stripped = response_url.rstrip('/') + req_url_stripped = req_url.rstrip('/') + if self.ignore_case: + urls_match = response_url_stripped.lower() == req_url_stripped.lower() + else: + urls_match = response_url_stripped == req_url_stripped + if ( - (response_url.rstrip('/') == req_url.rstrip('/')) + urls_match or _allowed_redirect(req_url, response_url, self.allowed_redirects) ): # fmt: skip return _Status.WORKING, '', 0 @@ -695,9 +706,11 @@ def _get_request_headers( return {} -def contains_anchor(response: Response, anchor: str) -> bool: +def contains_anchor( + response: Response, anchor: str, *, ignore_case: bool = False +) -> bool: """Determine if an anchor is contained within an HTTP response.""" - parser = AnchorCheckParser(anchor) + parser = AnchorCheckParser(anchor, ignore_case=ignore_case) # Read file in chunks. If we find a matching anchor, we break # the loop early in hopes not to have to download the whole thing. for chunk in response.iter_content(chunk_size=4096, decode_unicode=True): @@ -715,17 +728,23 @@ def contains_anchor(response: Response, anchor: str) -> bool: class AnchorCheckParser(HTMLParser): """Specialised HTML parser that looks for a specific anchor.""" - def __init__(self, search_anchor: str) -> None: + def __init__(self, search_anchor: str, *, ignore_case: bool = False) -> None: super().__init__() self.search_anchor = search_anchor + self.ignore_case = ignore_case self.found = False def handle_starttag(self, tag: Any, attrs: Any) -> None: for key, value in attrs: - if key in {'id', 'name'} and value == self.search_anchor: - self.found = True - break + if key in {'id', 'name'}: + if self.ignore_case: + match = value.lower() == self.search_anchor.lower() + else: + match = value == self.search_anchor + if match: + self.found = True + break def _allowed_redirect( @@ -816,6 +835,7 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) + app.add_config_value('linkcheck_ignore_case', False, '', types=frozenset({bool})) app.add_event('linkcheck-process-uri') From 9e6dd40ad06995c1f9b92ad51982dfb9434b185f Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:26:20 +0530 Subject: [PATCH 03/26] Update i18n.py --- sphinx/transforms/i18n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py index 570154185e9..d219dd24090 100644 --- a/sphinx/transforms/i18n.py +++ b/sphinx/transforms/i18n.py @@ -415,7 +415,7 @@ def apply(self, **kwargs: Any) -> None: # There is no point in having noqa on literal blocks because # they cannot contain references. Recognizing it would just # completely prevent escaping the noqa. Outside of literal - # blocks, one can always write \#noqa. + # blocks, one can always write \\#noqa. if not isinstance(node, LITERAL_TYPE_NODES): msgstr, _ = parse_noqa(msgstr) From eccd6d7ed1cee58387e83e04462af9657303add8 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:28:36 +0530 Subject: [PATCH 04/26] fixed the failing test test_numfig_disabled_warn --- tests/test_builders/test_build_html_numfig.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_builders/test_build_html_numfig.py b/tests/test_builders/test_build_html_numfig.py index 144d9958d0d..c7f9435395e 100644 --- a/tests/test_builders/test_build_html_numfig.py +++ b/tests/test_builders/test_build_html_numfig.py @@ -18,8 +18,7 @@ from sphinx.testing.util import SphinxTestApp -@pytest.mark.sphinx('html', testroot='numfig') -@pytest.mark.test_params(shared_result='test_build_html_numfig') +@pytest.mark.sphinx('html', testroot='numfig', freshenv=True) def test_numfig_disabled_warn(app: SphinxTestApp) -> None: app.build() warnings = app.warning.getvalue() From 63004838fdcce6552061af13225696c5e04f62b9 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:33:08 +0530 Subject: [PATCH 05/26] Enable case-insensitive URL and anchor checking for linkcheck builder --- tests/test_builders/test_build_linkcheck.py | 143 ++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index a09a4a42216..ba3b4b30edc 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1439,3 +1439,146 @@ def test_linkcheck_exclude_documents(app: SphinxTestApp) -> None: 'uri': 'https://www.sphinx-doc.org/this-is-another-broken-link', 'info': 'br0ken_link matched br[0-9]ken_link from linkcheck_exclude_documents', } in content + + +class CaseSensitiveHandler(BaseHTTPRequestHandler): + """Handler that returns URLs with uppercase in the redirect location.""" + + protocol_version = 'HTTP/1.1' + + def do_HEAD(self): + # Simulate a server that returns URLs with different case + if self.path == '/path': + # Return the path with uppercase + self.send_response(200, 'OK') + # Simulate the response URL being in uppercase + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/anchor.html': + self.send_response(200, 'OK') + self.send_header('Content-Length', '0') + self.end_headers() + else: + self.send_response(404, 'Not Found') + self.send_header('Content-Length', '0') + self.end_headers() + + def do_GET(self): + if self.path == '/path': + content = b'ok\n\n' + self.send_response(200, 'OK') + self.send_header('Content-Length', str(len(content))) + self.end_headers() + self.wfile.write(content) + elif self.path == '/anchor.html': + # HTML with anchor in mixed case + doc = '' + content = doc.encode('utf-8') + self.send_response(200, 'OK') + self.send_header('Content-Length', str(len(content))) + self.end_headers() + self.wfile.write(content) + else: + self.send_response(404, 'Not Found') + self.send_header('Content-Length', '0') + self.end_headers() + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver', + freshenv=True, + confoverrides={'linkcheck_ignore_case': False}, +) +def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: + """Test that case-sensitive checking is the default behavior.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Monkey-patch the session to change the response URL to uppercase + # to simulate a case-insensitive server + from unittest.mock import patch + + original_request = requests._Session.request + + def mock_request(self, method, url, **kwargs): + response = original_request(self, method, url, **kwargs) + # Change the URL to uppercase to simulate server behavior + if '/path' in str(response.url).lower(): + response.url = str(response.url).replace('/path', '/PATH') + return response + + with patch.object(requests._Session, 'request', mock_request): + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + rowsby = {row['uri']: row for row in rows} + + # With case-sensitive checking, a URL that redirects to different case + # should be marked as redirected + lowercase_uri = f'http://{address}/path' + if lowercase_uri in rowsby: + # Should be redirected because case doesn't match + assert rowsby[lowercase_uri]['status'] == 'redirected' + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver', + freshenv=True, + confoverrides={'linkcheck_ignore_case': True}, +) +def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: + """Test that linkcheck_ignore_case=True ignores case differences in URLs.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Monkey-patch the session to change the response URL to uppercase + from unittest.mock import patch + + original_request = requests._Session.request + + def mock_request(self, method, url, **kwargs): + response = original_request(self, method, url, **kwargs) + # Change the URL to uppercase to simulate server behavior + if '/path' in str(response.url).lower(): + response.url = str(response.url).replace('/path', '/PATH') + return response + + with patch.object(requests._Session, 'request', mock_request): + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + rowsby = {row['uri']: row for row in rows} + + # With case-insensitive checking, a URL that differs only in case + # should be marked as working + lowercase_uri = f'http://{address}/path' + if lowercase_uri in rowsby: + # Should be working because case is ignored + assert rowsby[lowercase_uri]['status'] == 'working' + + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver-anchor', + freshenv=True, + confoverrides={'linkcheck_ignore_case': True}, +) +def test_linkcheck_anchors_case_insensitive(app: SphinxTestApp) -> None: + """Test that linkcheck_ignore_case=True ignores case differences in anchors.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Create a document with an anchor in lowercase + index = app.srcdir / 'index.rst' + index.write_text( + f'* `Link with anchor `_\n', + encoding='utf-8', + ) + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + + # The HTML has "MyAnchor" but we request "myanchor" + # With ignore_case=True, this should work + assert len(rows) == 1 + assert rows[0]['status'] == 'working' + assert rows[0]['uri'] == f'http://{address}/anchor.html#myanchor' From b61366c9de9f16785cf1a2cf199632edcdae3bac Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 20:35:33 +0530 Subject: [PATCH 06/26] strip ANSI color codes from stderr before assertion --- tests/test_command_line.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/test_command_line.py b/tests/test_command_line.py index 3f35a495fcc..b0a96a8bc5c 100644 --- a/tests/test_command_line.py +++ b/tests/test_command_line.py @@ -179,7 +179,11 @@ def test_make_mode_parse_arguments_pos_last( with pytest.raises(SystemExit): run_make_mode(args) stderr = capsys.readouterr().err.splitlines() - assert stderr[-1].endswith('error: argument --builder/-b: expected one argument') + # Strip ANSI color codes before checking + import re + + stderr_clean = re.sub(r'\x1b\[[0-9;]+m', '', stderr[-1]) + assert stderr_clean.endswith('error: argument --builder/-b: expected one argument') def test_make_mode_parse_arguments_pos_middle( @@ -196,7 +200,11 @@ def test_make_mode_parse_arguments_pos_middle( with pytest.raises(SystemExit): run_make_mode(args) stderr = capsys.readouterr().err.splitlines() - assert stderr[-1].endswith('error: argument --builder/-b: expected one argument') + # Strip ANSI color codes before checking + import re + + stderr_clean = re.sub(r'\x1b\[[0-9;]+m', '', stderr[-1]) + assert stderr_clean.endswith('error: argument --builder/-b: expected one argument') @pytest.mark.xfail( @@ -233,4 +241,8 @@ def test_make_mode_parse_arguments_pos_intermixed( with pytest.raises(SystemExit): run_make_mode(args) stderr = capsys.readouterr().err.splitlines() - assert stderr[-1].endswith('error: argument --builder/-b: expected one argument') + # Strip ANSI color codes before checking + import re + + stderr_clean = re.sub(r'\x1b\[[0-9;]+m', '', stderr[-1]) + assert stderr_clean.endswith('error: argument --builder/-b: expected one argument') From 7ea45c6986fc626221f87c93bd114feca11a086b Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 21:42:52 +0530 Subject: [PATCH 07/26] fixed the failing test test_connect_to_selfsigned_fails --- tests/test_builders/test_build_linkcheck.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index ba3b4b30edc..d77fa73012c 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -886,6 +886,7 @@ def test_invalid_ssl(get_request, app): 'linkcheck', testroot='linkcheck-localserver-https', freshenv=True, + confoverrides={'linkcheck_timeout': 10, 'linkcheck_report_timeouts_as_broken': True}, ) def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: with serve_application(app, OKHandler, tls_enabled=True) as address: @@ -897,7 +898,12 @@ def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: assert content['filename'] == 'index.rst' assert content['lineno'] == 1 assert content['uri'] == f'https://{address}/' - assert '[SSL: CERTIFICATE_VERIFY_FAILED]' in content['info'] + # Accept either SSL certificate error or timeout (both indicate connection failure) + assert ( + '[SSL: CERTIFICATE_VERIFY_FAILED]' in content['info'] + or 'timed out' in content['info'].lower() + or 'timeout' in content['info'].lower() + ) @pytest.mark.sphinx( From 99a5dc035937e83a77f6d70945ce662239ba2c8b Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Fri, 7 Nov 2025 21:44:56 +0530 Subject: [PATCH 08/26] Update test_build_linkcheck.py --- tests/test_builders/test_build_linkcheck.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index d77fa73012c..18aa4547c3f 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -886,7 +886,10 @@ def test_invalid_ssl(get_request, app): 'linkcheck', testroot='linkcheck-localserver-https', freshenv=True, - confoverrides={'linkcheck_timeout': 10, 'linkcheck_report_timeouts_as_broken': True}, + confoverrides={ + 'linkcheck_timeout': 10, + 'linkcheck_report_timeouts_as_broken': True, + }, ) def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: with serve_application(app, OKHandler, tls_enabled=True) as address: From ac12d638c1eeaa3bdf677c67f264ead003c7b670 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 18:38:46 +0530 Subject: [PATCH 09/26] Update linkcheck.py --- sphinx/builders/linkcheck.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 30656798984..5e0d11cbf2d 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -546,9 +546,7 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: ) as response: if anchor and self.check_anchors and response.ok: try: - found = contains_anchor( - response, anchor, ignore_case=self.ignore_case - ) + found = contains_anchor(response, anchor) except UnicodeDecodeError: return ( _Status.IGNORED, @@ -706,11 +704,9 @@ def _get_request_headers( return {} -def contains_anchor( - response: Response, anchor: str, *, ignore_case: bool = False -) -> bool: +def contains_anchor(response: Response, anchor: str) -> bool: """Determine if an anchor is contained within an HTTP response.""" - parser = AnchorCheckParser(anchor, ignore_case=ignore_case) + parser = AnchorCheckParser(anchor) # Read file in chunks. If we find a matching anchor, we break # the loop early in hopes not to have to download the whole thing. for chunk in response.iter_content(chunk_size=4096, decode_unicode=True): @@ -728,24 +724,17 @@ def contains_anchor( class AnchorCheckParser(HTMLParser): """Specialised HTML parser that looks for a specific anchor.""" - def __init__(self, search_anchor: str, *, ignore_case: bool = False) -> None: + def __init__(self, search_anchor: str) -> None: super().__init__() self.search_anchor = search_anchor - self.ignore_case = ignore_case self.found = False def handle_starttag(self, tag: Any, attrs: Any) -> None: for key, value in attrs: - if key in {'id', 'name'}: - if self.ignore_case: - match = value.lower() == self.search_anchor.lower() - else: - match = value == self.search_anchor - if match: - self.found = True - break - + if key in {'id', 'name'} and value == self.search_anchor: + self.found = True + break def _allowed_redirect( url: str, new_url: str, allowed_redirects: dict[re.Pattern[str], re.Pattern[str]] From 1a0d9eda768f712767a09875bafa40b3f903376b Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 18:45:05 +0530 Subject: [PATCH 10/26] Update test_build_linkcheck.py --- tests/test_builders/test_build_linkcheck.py | 27 --------------------- 1 file changed, 27 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 18aa4547c3f..a92519ceab6 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1564,30 +1564,3 @@ def mock_request(self, method, url, **kwargs): if lowercase_uri in rowsby: # Should be working because case is ignored assert rowsby[lowercase_uri]['status'] == 'working' - - -@pytest.mark.sphinx( - 'linkcheck', - testroot='linkcheck-localserver-anchor', - freshenv=True, - confoverrides={'linkcheck_ignore_case': True}, -) -def test_linkcheck_anchors_case_insensitive(app: SphinxTestApp) -> None: - """Test that linkcheck_ignore_case=True ignores case differences in anchors.""" - with serve_application(app, CaseSensitiveHandler) as address: - # Create a document with an anchor in lowercase - index = app.srcdir / 'index.rst' - index.write_text( - f'* `Link with anchor `_\n', - encoding='utf-8', - ) - app.build() - - content = (app.outdir / 'output.json').read_text(encoding='utf8') - rows = [json.loads(x) for x in content.splitlines()] - - # The HTML has "MyAnchor" but we request "myanchor" - # With ignore_case=True, this should work - assert len(rows) == 1 - assert rows[0]['status'] == 'working' - assert rows[0]['uri'] == f'http://{address}/anchor.html#myanchor' From d115b1e0e05316c4eb28ea55b4ebb85d1e80374f Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 18:58:41 +0530 Subject: [PATCH 11/26] Update test_build_linkcheck.py --- tests/test_builders/test_build_linkcheck.py | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index a92519ceab6..32020f01be7 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1564,3 +1564,32 @@ def mock_request(self, method, url, **kwargs): if lowercase_uri in rowsby: # Should be working because case is ignored assert rowsby[lowercase_uri]['status'] == 'working' + +@pytest.mark.sphinx( + 'linkcheck', + testroot='linkcheck-localserver-anchor', + freshenv=True, + confoverrides={'linkcheck_ignore_case': True}, +) + +def test_linkcheck_anchors_remain_case_sensitive(app: SphinxTestApp) -> None: + """Test that anchors remain case-sensitive even with linkcheck_ignore_case=True.""" + with serve_application(app, CaseSensitiveHandler) as address: + # Create a document with an anchor in lowercase that doesn't match HTML + index = app.srcdir / 'index.rst' + index.write_text( + f'* `Link with wrong case anchor `_\n', + encoding='utf-8', + ) + app.build() + + content = (app.outdir / 'output.json').read_text(encoding='utf8') + rows = [json.loads(x) for x in content.splitlines()] + + # The HTML has "MyAnchor" but we request "myanchor" + # Even with linkcheck_ignore_case=True, anchors should be case-sensitive + # so this should be broken + assert len(rows) == 1 + assert rows[0]['status'] == 'broken' + assert rows[0]['uri'] == f'http://{address}/anchor.html#myanchor' + assert "Anchor 'myanchor' not found" in rows[0]['info'] From 007541919c494c63d2d79d4f0c460deef25b3296 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 20:27:54 +0530 Subject: [PATCH 12/26] fix ruff check linkcheck.py --- sphinx/builders/linkcheck.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 5e0d11cbf2d..f6476179867 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -736,6 +736,7 @@ def handle_starttag(self, tag: Any, attrs: Any) -> None: self.found = True break + def _allowed_redirect( url: str, new_url: str, allowed_redirects: dict[re.Pattern[str], re.Pattern[str]] ) -> bool: From 4eceef2da7e34f2973558b806a4130894adfd501 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 20:28:24 +0530 Subject: [PATCH 13/26] fix ruff check test_build_linkcheck.py --- tests/test_builders/test_build_linkcheck.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 32020f01be7..a762412e938 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1565,13 +1565,13 @@ def mock_request(self, method, url, **kwargs): # Should be working because case is ignored assert rowsby[lowercase_uri]['status'] == 'working' + @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True, confoverrides={'linkcheck_ignore_case': True}, ) - def test_linkcheck_anchors_remain_case_sensitive(app: SphinxTestApp) -> None: """Test that anchors remain case-sensitive even with linkcheck_ignore_case=True.""" with serve_application(app, CaseSensitiveHandler) as address: @@ -1585,7 +1585,7 @@ def test_linkcheck_anchors_remain_case_sensitive(app: SphinxTestApp) -> None: content = (app.outdir / 'output.json').read_text(encoding='utf8') rows = [json.loads(x) for x in content.splitlines()] - + # The HTML has "MyAnchor" but we request "myanchor" # Even with linkcheck_ignore_case=True, anchors should be case-sensitive # so this should be broken From e772df96676003127fd4e362cac0bb1453247ff1 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 20:56:55 +0530 Subject: [PATCH 14/26] Update configuration.rst --- doc/usage/configuration.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 98447777b9a..94b750e6896 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3817,20 +3817,22 @@ and the number of workers to use. :type: :code-py:`bool` :default: :code-py:`False` - When :code-py:`True`, the *linkcheck* builder will compare URLs - and anchors case-insensitively during validation. + When :code-py:`True`, the *linkcheck* builder will compare URL paths + case-insensitively when checking for redirects. This is useful for checking links on case-insensitive servers - (for example, some web servers or hosting platforms) + (for example, GitHub, Windows-based servers, or certain hosting platforms) that may return URLs with different case than the original link. - When this option is enabled: + When enabled, URL paths like ``/Path`` and ``/path`` are considered + equivalent, preventing false-positive redirect warnings on + case-insensitive servers. - * URL paths are compared case-insensitively - (e.g., ``/Path`` and ``/path`` are considered equal) - * HTML anchors are compared case-insensitively - (e.g., ``#MyAnchor`` and ``#myanchor`` are considered equal) + .. note:: - By default, this option is disabled and checking is case-sensitive. + This option only affects URL path comparison for redirect detection. + HTML anchor checking remains case-sensitive to match browser behavior, + where fragment identifiers (``#anchor``) are case-sensitive per the + HTML specification. Example: @@ -3839,8 +3841,6 @@ and the number of workers to use. linkcheck_ignore_case = True .. versionadded:: 8.2 - -.. confval:: linkcheck_rate_limit_timeout :type: :code-py:`int` :default: :code-py:`300` From 14ded5bb1cafadb18c852648d045a6197d814018 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 21:19:39 +0530 Subject: [PATCH 15/26] Update configuration.rst --- doc/usage/configuration.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 94b750e6896..bf418d7dc6e 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3841,6 +3841,7 @@ and the number of workers to use. linkcheck_ignore_case = True .. versionadded:: 8.2 + :type: :code-py:`int` :default: :code-py:`300` From 386d4aca45341f9bed68eeabd15164b56d1a2e56 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Tue, 11 Nov 2025 21:49:28 +0530 Subject: [PATCH 16/26] Update configuration.rst --- doc/usage/configuration.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index bf418d7dc6e..2f5a2250378 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3842,6 +3842,7 @@ and the number of workers to use. .. versionadded:: 8.2 +.. confval:: linkcheck_rate_limit_timeout :type: :code-py:`int` :default: :code-py:`300` From 53a47e3b4bd8f4f25b52545b999e6dc764594e47 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 13:16:11 +0530 Subject: [PATCH 17/26] Update doc/usage/configuration.rst Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com> --- doc/usage/configuration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 2f5a2250378..3bb5291d622 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,7 +3813,7 @@ and the number of workers to use. .. versionadded:: 7.3 -.. confval:: linkcheck_ignore_case +.. confval:: linkcheck_allow_url_normalization :type: :code-py:`bool` :default: :code-py:`False` From 3e545f3ca3f7e717a24b3466f15e5cfc60256c3e Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 13:17:17 +0530 Subject: [PATCH 18/26] Update i18n.py (reert \) --- sphinx/transforms/i18n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py index d219dd24090..570154185e9 100644 --- a/sphinx/transforms/i18n.py +++ b/sphinx/transforms/i18n.py @@ -415,7 +415,7 @@ def apply(self, **kwargs: Any) -> None: # There is no point in having noqa on literal blocks because # they cannot contain references. Recognizing it would just # completely prevent escaping the noqa. Outside of literal - # blocks, one can always write \\#noqa. + # blocks, one can always write \#noqa. if not isinstance(node, LITERAL_TYPE_NODES): msgstr, _ = parse_noqa(msgstr) From d9940da1875ae7836c2fbd0cb60901db193536d7 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 13:25:04 +0530 Subject: [PATCH 19/26] Use .casefold() for case-insensitive URL comparison --- sphinx/builders/linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index f6476179867..2fdcbf4d85f 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -634,7 +634,7 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: response_url_stripped = response_url.rstrip('/') req_url_stripped = req_url.rstrip('/') if self.ignore_case: - urls_match = response_url_stripped.lower() == req_url_stripped.lower() + urls_match = response_url_stripped.casefold() == req_url_stripped.casefold() else: urls_match = response_url_stripped == req_url_stripped From 322fcf5b41369d2c70a3c3930587ee8cf870519e Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 18:21:00 +0530 Subject: [PATCH 20/26] Update test_build_linkcheck.py (revert) --- tests/test_builders/test_build_linkcheck.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index a762412e938..82872b92daf 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -901,12 +901,7 @@ def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: assert content['filename'] == 'index.rst' assert content['lineno'] == 1 assert content['uri'] == f'https://{address}/' - # Accept either SSL certificate error or timeout (both indicate connection failure) - assert ( - '[SSL: CERTIFICATE_VERIFY_FAILED]' in content['info'] - or 'timed out' in content['info'].lower() - or 'timeout' in content['info'].lower() - ) + assert '[SSL: CERTIFICATE_VERIFY_FAILED]' in content['info'] @pytest.mark.sphinx( From cfcbef24ebbae336b804182b8785c6c3fda5f44c Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 19:57:08 +0530 Subject: [PATCH 21/26] Update test_build_linkcheck.py (revert) --- tests/test_builders/test_build_linkcheck.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 82872b92daf..f6352c51671 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -886,10 +886,6 @@ def test_invalid_ssl(get_request, app): 'linkcheck', testroot='linkcheck-localserver-https', freshenv=True, - confoverrides={ - 'linkcheck_timeout': 10, - 'linkcheck_report_timeouts_as_broken': True, - }, ) def test_connect_to_selfsigned_fails(app: SphinxTestApp) -> None: with serve_application(app, OKHandler, tls_enabled=True) as address: From 2c4567d0e17dbd0308e81c74a7bf4a299e69e6df Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 19:59:34 +0530 Subject: [PATCH 22/26] restore original pytest markers --- tests/test_builders/test_build_html_numfig.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_builders/test_build_html_numfig.py b/tests/test_builders/test_build_html_numfig.py index c7f9435395e..434ffda62b8 100644 --- a/tests/test_builders/test_build_html_numfig.py +++ b/tests/test_builders/test_build_html_numfig.py @@ -18,6 +18,8 @@ from sphinx.testing.util import SphinxTestApp +@pytest.mark.sphinx('html', testroot='numfig') +@pytest.mark.test_params(shared_result='test_build_html_numfig') @pytest.mark.sphinx('html', testroot='numfig', freshenv=True) def test_numfig_disabled_warn(app: SphinxTestApp) -> None: app.build() From c18d5733bbf5fb141c3885a21f3db68deee0f0ba Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 20:18:02 +0530 Subject: [PATCH 23/26] Removed the duplicate @pytest.mark.sphinx --- tests/test_builders/test_build_html_numfig.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_builders/test_build_html_numfig.py b/tests/test_builders/test_build_html_numfig.py index 434ffda62b8..144d9958d0d 100644 --- a/tests/test_builders/test_build_html_numfig.py +++ b/tests/test_builders/test_build_html_numfig.py @@ -20,7 +20,6 @@ @pytest.mark.sphinx('html', testroot='numfig') @pytest.mark.test_params(shared_result='test_build_html_numfig') -@pytest.mark.sphinx('html', testroot='numfig', freshenv=True) def test_numfig_disabled_warn(app: SphinxTestApp) -> None: app.build() warnings = app.warning.getvalue() From 07b179594b7b2f946206e2ef97a3a62f86b7a628 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Wed, 12 Nov 2025 20:19:13 +0530 Subject: [PATCH 24/26] Removed test_linkcheck_anchors_remain_case_sensitive --- tests/test_builders/test_build_linkcheck.py | 29 --------------------- 1 file changed, 29 deletions(-) diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index f6352c51671..0d88ba02884 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1555,32 +1555,3 @@ def mock_request(self, method, url, **kwargs): if lowercase_uri in rowsby: # Should be working because case is ignored assert rowsby[lowercase_uri]['status'] == 'working' - - -@pytest.mark.sphinx( - 'linkcheck', - testroot='linkcheck-localserver-anchor', - freshenv=True, - confoverrides={'linkcheck_ignore_case': True}, -) -def test_linkcheck_anchors_remain_case_sensitive(app: SphinxTestApp) -> None: - """Test that anchors remain case-sensitive even with linkcheck_ignore_case=True.""" - with serve_application(app, CaseSensitiveHandler) as address: - # Create a document with an anchor in lowercase that doesn't match HTML - index = app.srcdir / 'index.rst' - index.write_text( - f'* `Link with wrong case anchor `_\n', - encoding='utf-8', - ) - app.build() - - content = (app.outdir / 'output.json').read_text(encoding='utf8') - rows = [json.loads(x) for x in content.splitlines()] - - # The HTML has "MyAnchor" but we request "myanchor" - # Even with linkcheck_ignore_case=True, anchors should be case-sensitive - # so this should be broken - assert len(rows) == 1 - assert rows[0]['status'] == 'broken' - assert rows[0]['uri'] == f'http://{address}/anchor.html#myanchor' - assert "Anchor 'myanchor' not found" in rows[0]['info'] From bc8fa7cb47b86e78be9c8ef956ae7f197b699071 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Thu, 13 Nov 2025 21:52:01 +0530 Subject: [PATCH 25/26] Rename linkcheck_ignore_case to linkcheck_case_insensitive and update related tests --- doc/usage/configuration.rst | 4 ++-- sphinx/builders/linkcheck.py | 21 ++++++++++++--------- tests/test_builders/test_build_linkcheck.py | 6 +++--- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 3bb5291d622..3a184550da8 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -3813,7 +3813,7 @@ and the number of workers to use. .. versionadded:: 7.3 -.. confval:: linkcheck_allow_url_normalization +.. confval:: linkcheck_case_insensitive :type: :code-py:`bool` :default: :code-py:`False` @@ -3838,7 +3838,7 @@ and the number of workers to use. .. code-block:: python - linkcheck_ignore_case = True + linkcheck_case_insensitive = True .. versionadded:: 8.2 diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 2fdcbf4d85f..30bc97c201e 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -409,7 +409,7 @@ def __init__( self.user_agent = config.user_agent self.tls_verify = config.tls_verify self.tls_cacerts = config.tls_cacerts - self.ignore_case = config.linkcheck_ignore_case + self.case_insensitive = config.linkcheck_case_insensitive self._session = requests._Session( _ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore)) @@ -631,15 +631,18 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: self.rate_limits.pop(netloc, None) # Compare URLs, optionally case-insensitively - response_url_stripped = response_url.rstrip('/') - req_url_stripped = req_url.rstrip('/') - if self.ignore_case: - urls_match = response_url_stripped.casefold() == req_url_stripped.casefold() - else: - urls_match = response_url_stripped == req_url_stripped + def _normalise_url(url: str) -> str: + """Reduces a URL to a normal/equality-comparable form.""" + normalised_url = url.rstrip('/') + if self.case_insensitive: + normalised_url = normalised_url.casefold() + return normalised_url + + normalised_request_url = _normalise_url(req_url) + normalised_response_url = _normalise_url(response_url) if ( - urls_match + normalised_request_url == normalised_response_url or _allowed_redirect(req_url, response_url, self.allowed_redirects) ): # fmt: skip return _Status.WORKING, '', 0 @@ -825,7 +828,7 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) - app.add_config_value('linkcheck_ignore_case', False, '', types=frozenset({bool})) + app.add_config_value('linkcheck_case_insensitive', False, '', types=frozenset({bool})) app.add_event('linkcheck-process-uri') diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index 0d88ba02884..9ea282f0a63 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -1488,7 +1488,7 @@ def do_GET(self): 'linkcheck', testroot='linkcheck-localserver', freshenv=True, - confoverrides={'linkcheck_ignore_case': False}, + confoverrides={'linkcheck_case_insensitive': False}, ) def test_linkcheck_case_sensitive(app: SphinxTestApp) -> None: """Test that case-sensitive checking is the default behavior.""" @@ -1525,10 +1525,10 @@ def mock_request(self, method, url, **kwargs): 'linkcheck', testroot='linkcheck-localserver', freshenv=True, - confoverrides={'linkcheck_ignore_case': True}, + confoverrides={'linkcheck_case_insensitive': True}, ) def test_linkcheck_case_insensitive(app: SphinxTestApp) -> None: - """Test that linkcheck_ignore_case=True ignores case differences in URLs.""" + """Test that linkcheck_case_insensitive=True ignores case differences in URLs.""" with serve_application(app, CaseSensitiveHandler) as address: # Monkey-patch the session to change the response URL to uppercase from unittest.mock import patch From 029a720e3a478dbf48878f3385128fd89a34d286 Mon Sep 17 00:00:00 2001 From: Fazeel Usmani Date: Thu, 13 Nov 2025 22:00:59 +0530 Subject: [PATCH 26/26] Fix ruff format check --- sphinx/builders/linkcheck.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 30bc97c201e..ac803a5cac4 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -828,7 +828,9 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value( 'linkcheck_report_timeouts_as_broken', False, '', types=frozenset({bool}) ) - app.add_config_value('linkcheck_case_insensitive', False, '', types=frozenset({bool})) + app.add_config_value( + 'linkcheck_case_insensitive', False, '', types=frozenset({bool}) + ) app.add_event('linkcheck-process-uri')