From 0cbd6a36579a080bba4e62068821e0469fdefa1f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Oct 2025 06:58:46 +0000 Subject: [PATCH 01/10] build(deps): bump torch from 2.8.0 to 2.9.0 in /requirements Bumps [torch](https://github.com/pytorch/pytorch) from 2.8.0 to 2.9.0. - [Release notes](https://github.com/pytorch/pytorch/releases) - [Changelog](https://github.com/pytorch/pytorch/blob/main/RELEASE.md) - [Commits](https://github.com/pytorch/pytorch/compare/v2.8.0...v2.9.0) --- updated-dependencies: - dependency-name: torch dependency-version: 2.9.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements/fabric/base.txt | 2 +- requirements/pytorch/base.txt | 2 +- requirements/typing.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 0a7629151f6c1..ad36bc25fcc13 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.9.0 +torch >=2.1.0, <2.10.0 fsspec[http] >=2022.5.0, <2025.10.0 packaging >=20.0, <=25.0 typing-extensions >4.5.0, <4.16.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 9c7a60bd49f0e..1dbf53134cb31 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.9.0 +torch >=2.1.0, <2.10.0 tqdm >=4.57.0, <4.68.0 PyYAML >5.4, <6.1.0 fsspec[http] >=2022.5.0, <2025.10.0 diff --git a/requirements/typing.txt b/requirements/typing.txt index dc848c55e583d..8c5ad38fb7825 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==1.18.2 -torch==2.8.0 +torch==2.9.0 types-Markdown types-PyYAML From c5bde432ce0f551e11f122fe052d6bb7ce4efc1a Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Mon, 20 Oct 2025 13:27:28 +0545 Subject: [PATCH 02/10] build(deps): update torchvision version constraint to <0.25.0 in examples.txt --- requirements/fabric/examples.txt | 2 +- requirements/pytorch/examples.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/examples.txt b/requirements/fabric/examples.txt index ab6ffb8b137df..72f13a4128e56 100644 --- a/requirements/fabric/examples.txt +++ b/requirements/fabric/examples.txt @@ -1,5 +1,5 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torchvision >=0.16.0, <0.24.0 +torchvision >=0.16.0, <0.25.0 torchmetrics >=0.10.0, <1.9.0 diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index b64ed5ee47c67..b17e755cd18e2 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -2,6 +2,6 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment requests <2.33.0 -torchvision >=0.16.0, <0.24.0 +torchvision >=0.16.0, <0.25.0 ipython[all] >=8.0.0, <10.0.0 torchmetrics >=0.10.0, <1.9.0 From 3c6c84aa6628e444f178b3fb69c2cec4ba29d240 Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Mon, 20 Oct 2025 14:39:03 +0545 Subject: [PATCH 03/10] add ignore --- src/lightning/fabric/utilities/spike.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lightning/fabric/utilities/spike.py b/src/lightning/fabric/utilities/spike.py index 9c1b0a2a00572..13309458ff57e 100644 --- a/src/lightning/fabric/utilities/spike.py +++ b/src/lightning/fabric/utilities/spike.py @@ -126,16 +126,16 @@ def _handle_spike(self, fabric: "Fabric", batch_idx: int) -> None: raise TrainingSpikeException(batch_idx=batch_idx) def _check_atol(self, val_a: Union[float, torch.Tensor], val_b: Union[float, torch.Tensor]) -> bool: - return (self.atol is None) or bool(abs(val_a - val_b) >= abs(self.atol)) + return (self.atol is None) or bool(abs(val_a - val_b) >= abs(self.atol)) # type: ignore def _check_rtol(self, val_a: Union[float, torch.Tensor], val_b: Union[float, torch.Tensor]) -> bool: - return (self.rtol is None) or bool(abs(val_a - val_b) >= abs(self.rtol * val_b)) + return (self.rtol is None) or bool(abs(val_a - val_b) >= abs(self.rtol * val_b)) # type: ignore def _is_better(self, diff_val: torch.Tensor) -> bool: if self.mode == "min": - return bool((diff_val <= 0.0).all()) + return bool((diff_val <= 0.0).all()) # type: ignore[operator] if self.mode == "max": - return bool((diff_val >= 0).all()) + return bool((diff_val >= 0).all()) # type: ignore[operator] raise ValueError(f"Invalid mode. Has to be min or max, found {self.mode}") From e51148358ed476f0c82f668d39997534def1e498 Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Mon, 20 Oct 2025 14:44:19 +0545 Subject: [PATCH 04/10] remove unused ignore --- src/lightning/fabric/utilities/spike.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lightning/fabric/utilities/spike.py b/src/lightning/fabric/utilities/spike.py index 13309458ff57e..cd2e05309e087 100644 --- a/src/lightning/fabric/utilities/spike.py +++ b/src/lightning/fabric/utilities/spike.py @@ -133,9 +133,9 @@ def _check_rtol(self, val_a: Union[float, torch.Tensor], val_b: Union[float, tor def _is_better(self, diff_val: torch.Tensor) -> bool: if self.mode == "min": - return bool((diff_val <= 0.0).all()) # type: ignore[operator] + return bool((diff_val <= 0.0).all()) if self.mode == "max": - return bool((diff_val >= 0).all()) # type: ignore[operator] + return bool((diff_val >= 0).all()) raise ValueError(f"Invalid mode. Has to be min or max, found {self.mode}") From 4e500b1c9346dd1dd97904f199a57174c4411750 Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Tue, 21 Oct 2025 13:58:34 +0545 Subject: [PATCH 05/10] Empty Commit From f26f1d1dd5ff79ec09b36a5073858e52a7346c70 Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Mon, 10 Nov 2025 10:52:21 +0545 Subject: [PATCH 06/10] fix: add handling for InductorSubproc thread in thread police function --- tests/tests_fabric/conftest.py | 1 + tests/tests_pytorch/conftest.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/tests_fabric/conftest.py b/tests/tests_fabric/conftest.py index 9d4a0b9462f2e..a11a9f93e569d 100644 --- a/tests/tests_fabric/conftest.py +++ b/tests/tests_fabric/conftest.py @@ -111,6 +111,7 @@ def thread_police_duuu_daaa_duuu_daaa(): sys.version_info >= (3, 9) and isinstance(thread, _ExecutorManagerThread) or "ThreadPoolExecutor-" in thread.name + or thread.name == "InductorSubproc" # torch.compile ): # probably `torch.compile`, can't narrow it down further continue diff --git a/tests/tests_pytorch/conftest.py b/tests/tests_pytorch/conftest.py index 878298c6bfd94..da48878c7f670 100644 --- a/tests/tests_pytorch/conftest.py +++ b/tests/tests_pytorch/conftest.py @@ -170,6 +170,7 @@ def thread_police_duuu_daaa_duuu_daaa(): sys.version_info >= (3, 9) and isinstance(thread, _ExecutorManagerThread) or "ThreadPoolExecutor-" in thread.name + or thread.name == "InductorSubproc" # torch.compile ): # probably `torch.compile`, can't narrow it down further continue From 16fa9239a893f3abda88eebe397e0e8730b849b3 Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Mon, 10 Nov 2025 11:36:39 +0545 Subject: [PATCH 07/10] let's try with extra indes url --- .lightning/workflows/pytorch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.lightning/workflows/pytorch.yml b/.lightning/workflows/pytorch.yml index 15dfc4a1f9064..f15c7c9f46511 100644 --- a/.lightning/workflows/pytorch.yml +++ b/.lightning/workflows/pytorch.yml @@ -121,7 +121,7 @@ run: | echo "Install package" extra=$(python -c "print({'lightning': 'pytorch-'}.get('${PACKAGE_NAME}', ''))") - uv pip install -e ".[${extra}dev]" --upgrade + uv pip install -e ".[${extra}dev]" --upgrade --extra-index-url https://download.pytorch.org/whl/cu${CUDA_VERSION_MM} echo "Ensure only a single package is installed" if [ "${PACKAGE_NAME}" == "pytorch" ]; then From 8748445dc66732325fa400c186b528d3d405ed39 Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Mon, 10 Nov 2025 12:02:17 +0545 Subject: [PATCH 08/10] fix: prefer CUDA-specific packages from PyTorch index using find-links --- .lightning/workflows/pytorch.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.lightning/workflows/pytorch.yml b/.lightning/workflows/pytorch.yml index f15c7c9f46511..7783c91b596d0 100644 --- a/.lightning/workflows/pytorch.yml +++ b/.lightning/workflows/pytorch.yml @@ -121,7 +121,10 @@ run: | echo "Install package" extra=$(python -c "print({'lightning': 'pytorch-'}.get('${PACKAGE_NAME}', ''))") - uv pip install -e ".[${extra}dev]" --upgrade --extra-index-url https://download.pytorch.org/whl/cu${CUDA_VERSION_MM} + + # Use find-links to prefer CUDA-specific packages from PyTorch index + uv pip install -e ".[${extra}dev]" --upgrade \ + --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM} echo "Ensure only a single package is installed" if [ "${PACKAGE_NAME}" == "pytorch" ]; then From 98a70c97ce8c8880f1cbdcb3c0f51a2639007fc5 Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Mon, 10 Nov 2025 12:25:36 +0545 Subject: [PATCH 09/10] update --- .lightning/workflows/pytorch.yml | 3 +-- requirements/pytorch/test.txt | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.lightning/workflows/pytorch.yml b/.lightning/workflows/pytorch.yml index 7783c91b596d0..2abbf09c3666a 100644 --- a/.lightning/workflows/pytorch.yml +++ b/.lightning/workflows/pytorch.yml @@ -123,8 +123,7 @@ run: | extra=$(python -c "print({'lightning': 'pytorch-'}.get('${PACKAGE_NAME}', ''))") # Use find-links to prefer CUDA-specific packages from PyTorch index - uv pip install -e ".[${extra}dev]" --upgrade \ - --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM} + uv pip install -e ".[${extra}dev]" --upgrade echo "Ensure only a single package is installed" if [ "${PACKAGE_NAME}" == "pytorch" ]; then diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 9a315c25bfa21..b22b2a3679946 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -21,5 +21,6 @@ uvicorn # for `ServableModuleValidator` # not setting version as re-defined in tensorboard >=2.11, <2.21.0 # for `TensorBoardLogger` -torch-tensorrt; platform_system == "Linux" and python_version >= "3.12" +# TODO: resolve GPU test failures for TensorRT due to defaulting to cu13 installations +torch-tensorrt<2.9.0; platform_system == "Linux" and python_version >= "3.12" huggingface-hub From 4674012102b69d83b4c30122fe4bb9bf5d9a4e93 Mon Sep 17 00:00:00 2001 From: bhimrazy Date: Mon, 10 Nov 2025 13:09:19 +0545 Subject: [PATCH 10/10] add find link --- .lightning/workflows/pytorch.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.lightning/workflows/pytorch.yml b/.lightning/workflows/pytorch.yml index 2abbf09c3666a..e6db4eeb33cd4 100644 --- a/.lightning/workflows/pytorch.yml +++ b/.lightning/workflows/pytorch.yml @@ -123,7 +123,9 @@ run: | extra=$(python -c "print({'lightning': 'pytorch-'}.get('${PACKAGE_NAME}', ''))") # Use find-links to prefer CUDA-specific packages from PyTorch index - uv pip install -e ".[${extra}dev]" --upgrade + uv pip install -e ".[${extra}dev]" --upgrade \ + --find-links="https://download.pytorch.org/whl/${UV_TORCH_BACKEND}" + uv pip list echo "Ensure only a single package is installed" if [ "${PACKAGE_NAME}" == "pytorch" ]; then