From a7d9f57422dbddb0d5d142f5a2cca94f29dc3f55 Mon Sep 17 00:00:00 2001 From: dimidagd <46669905+dimidagd@users.noreply.github.com> Date: Wed, 5 Nov 2025 14:54:06 +0100 Subject: [PATCH 1/3] Update input type for pad function to include Image --- torchvision/transforms/v2/functional/_geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 4fcb7fabe0d..5711f413ca6 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -1509,7 +1509,7 @@ def rotate_video( def pad( - inpt: torch.Tensor, + inpt: Union[Image.Image, torch.Tensor], padding: list[int], fill: Optional[Union[int, float, list[float]]] = None, padding_mode: str = "constant", From 63aa0ebd168e3f1039f5030e072f824932cba276 Mon Sep 17 00:00:00 2001 From: dimidagd <46669905+dimidagd@users.noreply.github.com> Date: Wed, 5 Nov 2025 15:05:14 +0100 Subject: [PATCH 2/3] Update img parameter type to support PIL images --- torchvision/transforms/functional.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 7b950b0c45b..c261de570ae 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -385,7 +385,7 @@ def _compute_resized_output_size( def resize( - img: Tensor, + img: Union[PIL.Image.Image, Tensor], size: list[int], interpolation: InterpolationMode = InterpolationMode.BILINEAR, max_size: Optional[int] = None, @@ -479,7 +479,7 @@ def resize( return F_t.resize(img, size=output_size, interpolation=interpolation.value, antialias=antialias) -def pad(img: Tensor, padding: list[int], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor: +def pad(img: Union[PIL.Image.Image, Tensor], padding: list[int], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor: r"""Pad the given image on all sides with the given "pad" value. If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric, From 99454687edf217a686d29aa1dafeb5f75905a044 Mon Sep 17 00:00:00 2001 From: dimidagd <46669905+dimidagd@users.noreply.github.com> Date: Wed, 5 Nov 2025 15:06:04 +0100 Subject: [PATCH 3/3] Update input type annotations for resize and pad functions --- torchvision/transforms/v2/functional/_geometry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 5711f413ca6..38976ecad48 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -236,7 +236,7 @@ def _compute_resized_output_size( def resize( - inpt: torch.Tensor, + inpt: Union[PIL.Image.Image, torch.Tensor], size: Optional[list[int]], interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, max_size: Optional[int] = None, @@ -1509,7 +1509,7 @@ def rotate_video( def pad( - inpt: Union[Image.Image, torch.Tensor], + inpt: Union[PIL.Image.Image, torch.Tensor], padding: list[int], fill: Optional[Union[int, float, list[float]]] = None, padding_mode: str = "constant",