From e35962b0decb95aec8de751da7768ee2d0aabaa4 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 3 Nov 2025 18:42:47 +0530 Subject: [PATCH 01/11] add tests for qwenimage modular. --- .../qwenimage/before_denoise.py | 13 +-- .../modular_pipelines/qwenimage/decoders.py | 3 +- .../modular_pipelines/qwenimage/encoders.py | 2 + .../qwenimage/modular_pipeline.py | 5 +- tests/modular_pipelines/qwen/__init__.py | 0 .../qwen/test_modular_pipeline_qwenimage.py | 85 +++++++++++++++++++ 6 files changed, 97 insertions(+), 11 deletions(-) create mode 100644 tests/modular_pipelines/qwen/__init__.py create mode 100644 tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py diff --git a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py index fdec95dc506e..f10200503141 100644 --- a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py +++ b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py @@ -132,6 +132,7 @@ def expected_components(self) -> List[ComponentSpec]: @property def inputs(self) -> List[InputParam]: return [ + InputParam("latents"), InputParam(name="height"), InputParam(name="width"), InputParam(name="num_images_per_prompt", default=1), @@ -196,11 +197,11 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) - f"You have passed a list of generators of length {len(block_state.generator)}, but requested an effective batch" f" size of {batch_size}. Make sure the batch size matches the length of the generators." ) - - block_state.latents = randn_tensor( - shape, generator=block_state.generator, device=device, dtype=block_state.dtype - ) - block_state.latents = components.pachifier.pack_latents(block_state.latents) + if block_state.latents is None: + block_state.latents = randn_tensor( + shape, generator=block_state.generator, device=device, dtype=block_state.dtype + ) + block_state.latents = components.pachifier.pack_latents(block_state.latents) self.set_block_state(state, block_state) return components, state @@ -549,7 +550,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) - block_state.width // components.vae_scale_factor // 2, ) ] - * block_state.batch_size + for _ in range(block_state.batch_size) ] block_state.txt_seq_lens = ( block_state.prompt_embeds_mask.sum(dim=1).tolist() if block_state.prompt_embeds_mask is not None else None diff --git a/src/diffusers/modular_pipelines/qwenimage/decoders.py b/src/diffusers/modular_pipelines/qwenimage/decoders.py index 6c82fe989e55..aedb0e4018f3 100644 --- a/src/diffusers/modular_pipelines/qwenimage/decoders.py +++ b/src/diffusers/modular_pipelines/qwenimage/decoders.py @@ -74,8 +74,9 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) - block_state = self.get_block_state(state) # YiYi Notes: remove support for output_type = "latents', we can just skip decode/encode step in modular + vae_scale_factor = 2 ** len(components.vae.temperal_downsample) block_state.latents = components.pachifier.unpack_latents( - block_state.latents, block_state.height, block_state.width + block_state.latents, block_state.height, block_state.width, vae_scale_factor=vae_scale_factor ) block_state.latents = block_state.latents.to(components.vae.dtype) diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py index 04fb3fdc947b..b025c2dc5071 100644 --- a/src/diffusers/modular_pipelines/qwenimage/encoders.py +++ b/src/diffusers/modular_pipelines/qwenimage/encoders.py @@ -503,6 +503,8 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState): block_state.prompt_embeds = block_state.prompt_embeds[:, : block_state.max_sequence_length] block_state.prompt_embeds_mask = block_state.prompt_embeds_mask[:, : block_state.max_sequence_length] + block_state.negative_prompt_embeds = None + block_state.negative_prompt_embeds_mask = None if components.requires_unconditional_embeds: negative_prompt = block_state.negative_prompt or "" block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds( diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py b/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py index d9e30864f660..59e1a13a5db2 100644 --- a/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py +++ b/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py @@ -26,10 +26,7 @@ class QwenImagePachifier(ConfigMixin): config_name = "config.json" @register_to_config - def __init__( - self, - patch_size: int = 2, - ): + def __init__(self, patch_size: int = 2): super().__init__() def pack_latents(self, latents): diff --git a/tests/modular_pipelines/qwen/__init__.py b/tests/modular_pipelines/qwen/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py new file mode 100644 index 000000000000..024edf5f34b0 --- /dev/null +++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py @@ -0,0 +1,85 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch + +from diffusers import ClassifierFreeGuidance +from diffusers.modular_pipelines import QwenImageAutoBlocks, QwenImageModularPipeline + +from ...testing_utils import torch_device +from ..test_modular_pipelines_common import ModularPipelineTesterMixin + + +class QwenImagexModularTests: + pipeline_class = QwenImageModularPipeline + pipeline_blocks_class = QwenImageAutoBlocks + repo = "hf-internal-testing/tiny-qwenimage-modular" + + params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"]) + batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) + + def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): + pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) + pipeline.load_components(torch_dtype=torch_dtype) + pipeline.set_progress_bar_config(disable=None) + return pipeline + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "dance monkey", + "negative_prompt": "bad quality", + "generator": generator, + "num_inference_steps": 2, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "np", + } + return inputs + + +class QwenImageModularGuiderTests: + def test_guider_cfg(self): + pipe = self.get_pipeline() + pipe = pipe.to(torch_device) + + guider = ClassifierFreeGuidance(guidance_scale=1.0) + pipe.update_components(guider=guider) + + inputs = self.get_dummy_inputs(torch_device) + out_no_cfg = pipe(**inputs, output="images") + + guider = ClassifierFreeGuidance(guidance_scale=7.5) + pipe.update_components(guider=guider) + inputs = self.get_dummy_inputs(torch_device) + out_cfg = pipe(**inputs, output="images") + + assert out_cfg.shape == out_no_cfg.shape + max_diff = np.abs(out_cfg - out_no_cfg).max() + assert max_diff > 1e-2, "Output with CFG must be different from normal inference" + + +class QwenImageModularPipelineFastTests( + QwenImagexModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase +): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) From 94fa2029a43d7e6e07d208ec4e9ef6df25bcba65 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 4 Nov 2025 08:36:23 +0530 Subject: [PATCH 02/11] qwenimage edit. --- .../qwen/test_modular_pipeline_qwenimage.py | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py index 024edf5f34b0..adcd08e80ca5 100644 --- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py +++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py @@ -16,16 +16,22 @@ import unittest import numpy as np +import PIL import torch from diffusers import ClassifierFreeGuidance -from diffusers.modular_pipelines import QwenImageAutoBlocks, QwenImageModularPipeline +from diffusers.modular_pipelines import ( + QwenImageAutoBlocks, + QwenImageEditAutoBlocks, + QwenImageEditModularPipeline, + QwenImageModularPipeline, +) from ...testing_utils import torch_device from ..test_modular_pipelines_common import ModularPipelineTesterMixin -class QwenImagexModularTests: +class QwenImageModularTests: pipeline_class = QwenImageModularPipeline pipeline_blocks_class = QwenImageAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-modular" @@ -79,7 +85,20 @@ def test_guider_cfg(self): class QwenImageModularPipelineFastTests( - QwenImagexModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase + QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase ): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + + +class QwenImageEditModularPipelineFastTests( + QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase +): + pipeline_class = QwenImageEditModularPipeline + pipeline_blocks_class = QwenImageEditAutoBlocks + repo = "hf-internal-testing/tiny-qwenimage-edit-modular" + + def get_dummy_inputs(self, device, seed=0): + inputs = super().get_dummy_inputs(device, seed) + inputs["image"] = PIL.Image.new("RGB", (32, 32), 0) + return inputs From 7d3c250722e0d9bc900e91e85217d8667204293a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 4 Nov 2025 10:05:34 +0530 Subject: [PATCH 03/11] qwenimage edit plus. --- .../modular_pipelines/qwenimage/encoders.py | 4 ++ .../qwen/test_modular_pipeline_qwenimage.py | 45 ++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py index b025c2dc5071..3b56981e5290 100644 --- a/src/diffusers/modular_pipelines/qwenimage/encoders.py +++ b/src/diffusers/modular_pipelines/qwenimage/encoders.py @@ -629,6 +629,8 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState): device=device, ) + block_state.negative_prompt_embeds = None + block_state.negative_prompt_embeds_mask = None if components.requires_unconditional_embeds: negative_prompt = block_state.negative_prompt or " " block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit( @@ -681,6 +683,8 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState): device=device, ) + block_state.negative_prompt_embeds = None + block_state.negative_prompt_embeds_mask = None if components.requires_unconditional_embeds: negative_prompt = block_state.negative_prompt or " " block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = ( diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py index adcd08e80ca5..1a49fc222532 100644 --- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py +++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py @@ -17,6 +17,7 @@ import numpy as np import PIL +import pytest import torch from diffusers import ClassifierFreeGuidance @@ -24,6 +25,8 @@ QwenImageAutoBlocks, QwenImageEditAutoBlocks, QwenImageEditModularPipeline, + QwenImageEditPlusAutoBlocks, + QwenImageEditPlusModularPipeline, QwenImageModularPipeline, ) @@ -64,7 +67,7 @@ def get_dummy_inputs(self, device, seed=0): class QwenImageModularGuiderTests: - def test_guider_cfg(self): + def test_guider_cfg(self, tol=1e-2): pipe = self.get_pipeline() pipe = pipe.to(torch_device) @@ -81,7 +84,7 @@ def test_guider_cfg(self): assert out_cfg.shape == out_no_cfg.shape max_diff = np.abs(out_cfg - out_no_cfg).max() - assert max_diff > 1e-2, "Output with CFG must be different from normal inference" + assert max_diff > tol, "Output with CFG must be different from normal inference" class QwenImageModularPipelineFastTests( @@ -100,5 +103,43 @@ class QwenImageEditModularPipelineFastTests( def get_dummy_inputs(self, device, seed=0): inputs = super().get_dummy_inputs(device, seed) + inputs.pop("max_sequence_length") inputs["image"] = PIL.Image.new("RGB", (32, 32), 0) return inputs + + def test_guider_cfg(self): + super().test_guider_cfg(7e-5) + + +class QwenImageEditPlusModularPipelineFastTests( + QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase +): + pipeline_class = QwenImageEditPlusModularPipeline + pipeline_blocks_class = QwenImageEditPlusAutoBlocks + repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular" + + # No `mask_image` yet. + params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image"]) + batch_params = frozenset(["prompt", "negative_prompt", "image"]) + + def get_dummy_inputs(self, device, seed=0): + inputs = super().get_dummy_inputs(device, seed) + inputs.pop("max_sequence_length") + image = PIL.Image.new("RGB", (32, 32), 0) + inputs["image"] = [image] + return inputs + + @pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True) + def test_num_images_per_prompt(self): + super().test_num_images_per_prompt() + + @pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True) + def test_inference_batch_consistent(): + super().test_inference_batch_consistent() + + @pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True) + def test_inference_batch_single_identical(): + super().test_inference_batch_single_identical() + + def test_guider_cfg(self): + super().test_guider_cfg(1e-3) From 7ad48f0c2755dd6f347912627cd0369affb81d64 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 4 Nov 2025 12:08:38 +0530 Subject: [PATCH 04/11] empty From 27a1c259a6aebbcadefa2a8f4d2a543f85621df0 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 10 Nov 2025 09:13:23 +0530 Subject: [PATCH 05/11] align with the latest structure --- .../qwen/test_modular_pipeline_qwenimage.py | 102 +++++++++--------- .../test_modular_pipelines_common.py | 15 +-- 2 files changed, 51 insertions(+), 66 deletions(-) diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py index 1a49fc222532..03479d4f32f7 100644 --- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py +++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py @@ -13,12 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest import numpy as np import PIL import pytest -import torch from diffusers import ClassifierFreeGuidance from diffusers.modular_pipelines import ( @@ -34,38 +32,6 @@ from ..test_modular_pipelines_common import ModularPipelineTesterMixin -class QwenImageModularTests: - pipeline_class = QwenImageModularPipeline - pipeline_blocks_class = QwenImageAutoBlocks - repo = "hf-internal-testing/tiny-qwenimage-modular" - - params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"]) - batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) - - def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): - pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) - pipeline.load_components(torch_dtype=torch_dtype) - pipeline.set_progress_bar_config(disable=None) - return pipeline - - def get_dummy_inputs(self, device, seed=0): - if str(device).startswith("mps"): - generator = torch.manual_seed(seed) - else: - generator = torch.Generator(device=device).manual_seed(seed) - inputs = { - "prompt": "dance monkey", - "negative_prompt": "bad quality", - "generator": generator, - "num_inference_steps": 2, - "height": 32, - "width": 32, - "max_sequence_length": 16, - "output_type": "np", - } - return inputs - - class QwenImageModularGuiderTests: def test_guider_cfg(self, tol=1e-2): pipe = self.get_pipeline() @@ -87,23 +53,48 @@ def test_guider_cfg(self, tol=1e-2): assert max_diff > tol, "Output with CFG must be different from normal inference" -class QwenImageModularPipelineFastTests( - QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase -): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) +class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderTests): + pipeline_class = QwenImageModularPipeline + pipeline_blocks_class = QwenImageAutoBlocks + repo = "hf-internal-testing/tiny-qwenimage-modular" + + params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"]) + batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) + + def get_dummy_inputs(self): + generator = self.get_generator() + inputs = { + "prompt": "dance monkey", + "negative_prompt": "bad quality", + "generator": generator, + "num_inference_steps": 2, + "height": 32, + "width": 32, + "max_sequence_length": 16, + "output_type": "np", + } + return inputs -class QwenImageEditModularPipelineFastTests( - QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase -): +class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderTests): pipeline_class = QwenImageEditModularPipeline pipeline_blocks_class = QwenImageEditAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-edit-modular" - def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) - inputs.pop("max_sequence_length") + params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"]) + batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) + + def get_dummy_inputs(self): + generator = self.get_generator() + inputs = { + "prompt": "dance monkey", + "negative_prompt": "bad quality", + "generator": generator, + "num_inference_steps": 2, + "height": 32, + "width": 32, + "output_type": "np", + } inputs["image"] = PIL.Image.new("RGB", (32, 32), 0) return inputs @@ -111,9 +102,7 @@ def test_guider_cfg(self): super().test_guider_cfg(7e-5) -class QwenImageEditPlusModularPipelineFastTests( - QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase -): +class QwenImageEditPlusModularPipelineFastTests(ModularPipelineTesterMixin, QwenImageModularGuiderTests): pipeline_class = QwenImageEditPlusModularPipeline pipeline_blocks_class = QwenImageEditPlusAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular" @@ -122,11 +111,18 @@ class QwenImageEditPlusModularPipelineFastTests( params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image"]) batch_params = frozenset(["prompt", "negative_prompt", "image"]) - def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) - inputs.pop("max_sequence_length") - image = PIL.Image.new("RGB", (32, 32), 0) - inputs["image"] = [image] + def get_dummy_inputs(self): + generator = self.get_generator() + inputs = { + "prompt": "dance monkey", + "negative_prompt": "bad quality", + "generator": generator, + "num_inference_steps": 2, + "height": 32, + "width": 32, + "output_type": "np", + } + inputs["image"] = PIL.Image.new("RGB", (32, 32), 0) return inputs @pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True) diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index 1325e5c1de3c..c4a2ec13a24e 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -32,20 +32,9 @@ class ModularPipelineTesterMixin: # Canonical parameters that are passed to `__call__` regardless # of the type of pipeline. They are always optional and have common # sense default values. - optional_params = frozenset( - [ - "num_inference_steps", - "num_images_per_prompt", - "latents", - "output_type", - ] - ) + optional_params = frozenset(["num_inference_steps", "num_images_per_prompt", "latents", "output_type"]) # this is modular specific: generator needs to be a intermediate input because it's mutable - intermediate_params = frozenset( - [ - "generator", - ] - ) + intermediate_params = frozenset(["generator"]) def get_generator(self, seed=0): generator = torch.Generator("cpu").manual_seed(seed) From 5d5237779064502d216f8144d8344a77c53034d6 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 10 Nov 2025 09:23:40 +0530 Subject: [PATCH 06/11] up --- .../qwen/test_modular_pipeline_qwenimage.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py index 03479d4f32f7..975c7009a694 100644 --- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py +++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py @@ -32,7 +32,7 @@ from ..test_modular_pipelines_common import ModularPipelineTesterMixin -class QwenImageModularGuiderTests: +class QwenImageModularGuiderMixin: def test_guider_cfg(self, tol=1e-2): pipe = self.get_pipeline() pipe = pipe.to(torch_device) @@ -40,12 +40,12 @@ def test_guider_cfg(self, tol=1e-2): guider = ClassifierFreeGuidance(guidance_scale=1.0) pipe.update_components(guider=guider) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() out_no_cfg = pipe(**inputs, output="images") guider = ClassifierFreeGuidance(guidance_scale=7.5) pipe.update_components(guider=guider) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() out_cfg = pipe(**inputs, output="images") assert out_cfg.shape == out_no_cfg.shape @@ -53,7 +53,7 @@ def test_guider_cfg(self, tol=1e-2): assert max_diff > tol, "Output with CFG must be different from normal inference" -class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderTests): +class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin): pipeline_class = QwenImageModularPipeline pipeline_blocks_class = QwenImageAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-modular" @@ -71,12 +71,12 @@ def get_dummy_inputs(self): "height": 32, "width": 32, "max_sequence_length": 16, - "output_type": "np", + "output_type": "pt", } return inputs -class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderTests): +class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin): pipeline_class = QwenImageEditModularPipeline pipeline_blocks_class = QwenImageEditAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-edit-modular" @@ -93,7 +93,7 @@ def get_dummy_inputs(self): "num_inference_steps": 2, "height": 32, "width": 32, - "output_type": "np", + "output_type": "pt", } inputs["image"] = PIL.Image.new("RGB", (32, 32), 0) return inputs @@ -102,7 +102,7 @@ def test_guider_cfg(self): super().test_guider_cfg(7e-5) -class QwenImageEditPlusModularPipelineFastTests(ModularPipelineTesterMixin, QwenImageModularGuiderTests): +class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin): pipeline_class = QwenImageEditPlusModularPipeline pipeline_blocks_class = QwenImageEditPlusAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular" @@ -120,7 +120,7 @@ def get_dummy_inputs(self): "num_inference_steps": 2, "height": 32, "width": 32, - "output_type": "np", + "output_type": "pt", } inputs["image"] = PIL.Image.new("RGB", (32, 32), 0) return inputs From 237846162696ba109c25830ee1758c17a57a4825 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 10 Nov 2025 09:42:31 +0530 Subject: [PATCH 07/11] up --- .../test_modular_pipelines_common.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index c4a2ec13a24e..386311555143 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -2,22 +2,16 @@ import tempfile from typing import Callable, Union +import pytest import torch import diffusers from diffusers import ComponentsManager, ModularPipeline, ModularPipelineBlocks from diffusers.utils import logging -from ..testing_utils import ( - backend_empty_cache, - numpy_cosine_similarity_distance, - require_accelerator, - require_torch, - torch_device, -) +from ..testing_utils import backend_empty_cache, numpy_cosine_similarity_distance, torch_device -@require_torch class ModularPipelineTesterMixin: """ It provides a set of common tests for each modular pipeline, @@ -204,7 +198,7 @@ def test_inference_batch_single_identical( max_diff = torch.abs(output_batch[0] - output[0]).max() assert max_diff < expected_max_diff, "Batch inference results different from single inference results" - @require_accelerator + @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.") def test_float16_inference(self, expected_max_diff=5e-2): pipe = self.get_pipeline() pipe.to(torch_device, torch.float32) @@ -233,7 +227,7 @@ def test_float16_inference(self, expected_max_diff=5e-2): max_diff = numpy_cosine_similarity_distance(output.flatten(), output_fp16.flatten()) assert max_diff < expected_max_diff, "FP16 inference is different from FP32 inference" - @require_accelerator + @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.") def test_to_device(self): pipe = self.get_pipeline() pipe.set_progress_bar_config(disable=None) @@ -260,7 +254,7 @@ def test_inference_is_not_nan_cpu(self): output = pipe(**self.get_dummy_inputs(), output="images") assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN" - @require_accelerator + @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.") def test_inference_is_not_nan(self): pipe = self.get_pipeline() pipe.set_progress_bar_config(disable=None) @@ -293,7 +287,7 @@ def test_num_images_per_prompt(self): assert images.shape[0] == batch_size * num_images_per_prompt - @require_accelerator + @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.") def test_components_auto_cpu_offload_inference_consistent(self): base_pipe = self.get_pipeline().to(torch_device) From 0e9d1a916653b65355d54c22b56a5d4929b73a04 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 10 Nov 2025 09:56:07 +0530 Subject: [PATCH 08/11] reason --- tests/modular_pipelines/test_modular_pipelines_common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index 386311555143..ecb9ce12ec98 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -198,7 +198,7 @@ def test_inference_batch_single_identical( max_diff = torch.abs(output_batch[0] - output[0]).max() assert max_diff < expected_max_diff, "Batch inference results different from single inference results" - @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.") + @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.") def test_float16_inference(self, expected_max_diff=5e-2): pipe = self.get_pipeline() pipe.to(torch_device, torch.float32) @@ -227,7 +227,7 @@ def test_float16_inference(self, expected_max_diff=5e-2): max_diff = numpy_cosine_similarity_distance(output.flatten(), output_fp16.flatten()) assert max_diff < expected_max_diff, "FP16 inference is different from FP32 inference" - @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.") + @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.") def test_to_device(self): pipe = self.get_pipeline() pipe.set_progress_bar_config(disable=None) @@ -254,7 +254,7 @@ def test_inference_is_not_nan_cpu(self): output = pipe(**self.get_dummy_inputs(), output="images") assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN" - @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.") + @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.") def test_inference_is_not_nan(self): pipe = self.get_pipeline() pipe.set_progress_bar_config(disable=None) @@ -287,7 +287,7 @@ def test_num_images_per_prompt(self): assert images.shape[0] == batch_size * num_images_per_prompt - @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.") + @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.") def test_components_auto_cpu_offload_inference_consistent(self): base_pipe = self.get_pipeline().to(torch_device) From 8144a0e34beca510d774368108fd238c2282fefb Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 10 Nov 2025 10:12:02 +0530 Subject: [PATCH 09/11] up --- tests/modular_pipelines/test_modular_pipelines_common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index ecb9ce12ec98..1edc7abdd613 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -198,7 +198,7 @@ def test_inference_batch_single_identical( max_diff = torch.abs(output_batch[0] - output[0]).max() assert max_diff < expected_max_diff, "Batch inference results different from single inference results" - @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.") + @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") def test_float16_inference(self, expected_max_diff=5e-2): pipe = self.get_pipeline() pipe.to(torch_device, torch.float32) @@ -227,7 +227,7 @@ def test_float16_inference(self, expected_max_diff=5e-2): max_diff = numpy_cosine_similarity_distance(output.flatten(), output_fp16.flatten()) assert max_diff < expected_max_diff, "FP16 inference is different from FP32 inference" - @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.") + @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") def test_to_device(self): pipe = self.get_pipeline() pipe.set_progress_bar_config(disable=None) @@ -254,7 +254,7 @@ def test_inference_is_not_nan_cpu(self): output = pipe(**self.get_dummy_inputs(), output="images") assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN" - @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.") + @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") def test_inference_is_not_nan(self): pipe = self.get_pipeline() pipe.set_progress_bar_config(disable=None) @@ -287,7 +287,7 @@ def test_num_images_per_prompt(self): assert images.shape[0] == batch_size * num_images_per_prompt - @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.") + @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") def test_components_auto_cpu_offload_inference_consistent(self): base_pipe = self.get_pipeline().to(torch_device) From 650424e63353f440998d4c41196612330613527e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 10 Nov 2025 11:30:18 +0530 Subject: [PATCH 10/11] fix multiple issues. --- .../qwen/test_modular_pipeline_qwenimage.py | 32 +------- ...st_modular_pipeline_stable_diffusion_xl.py | 76 +++---------------- .../test_modular_pipelines_common.py | 53 ++++++++----- 3 files changed, 47 insertions(+), 114 deletions(-) diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py index 975c7009a694..1228d0347af4 100644 --- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py +++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py @@ -14,11 +14,9 @@ # limitations under the License. -import numpy as np import PIL import pytest -from diffusers import ClassifierFreeGuidance from diffusers.modular_pipelines import ( QwenImageAutoBlocks, QwenImageEditAutoBlocks, @@ -28,32 +26,10 @@ QwenImageModularPipeline, ) -from ...testing_utils import torch_device -from ..test_modular_pipelines_common import ModularPipelineTesterMixin +from ..test_modular_pipelines_common import ModularGuiderTesterMixin, ModularPipelineTesterMixin -class QwenImageModularGuiderMixin: - def test_guider_cfg(self, tol=1e-2): - pipe = self.get_pipeline() - pipe = pipe.to(torch_device) - - guider = ClassifierFreeGuidance(guidance_scale=1.0) - pipe.update_components(guider=guider) - - inputs = self.get_dummy_inputs() - out_no_cfg = pipe(**inputs, output="images") - - guider = ClassifierFreeGuidance(guidance_scale=7.5) - pipe.update_components(guider=guider) - inputs = self.get_dummy_inputs() - out_cfg = pipe(**inputs, output="images") - - assert out_cfg.shape == out_no_cfg.shape - max_diff = np.abs(out_cfg - out_no_cfg).max() - assert max_diff > tol, "Output with CFG must be different from normal inference" - - -class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin): +class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin): pipeline_class = QwenImageModularPipeline pipeline_blocks_class = QwenImageAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-modular" @@ -76,7 +52,7 @@ def get_dummy_inputs(self): return inputs -class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin): +class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin): pipeline_class = QwenImageEditModularPipeline pipeline_blocks_class = QwenImageEditAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-edit-modular" @@ -102,7 +78,7 @@ def test_guider_cfg(self): super().test_guider_cfg(7e-5) -class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin): +class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin): pipeline_class = QwenImageEditPlusModularPipeline pipeline_blocks_class = QwenImageEditPlusAutoBlocks repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular" diff --git a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py index ea54b2bdff47..9fc16f09c8f0 100644 --- a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py +++ b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py @@ -25,7 +25,7 @@ from ...models.unets.test_models_unet_2d_condition import create_ip_adapter_state_dict from ...testing_utils import enable_full_determinism, floats_tensor, torch_device -from ..test_modular_pipelines_common import ModularPipelineTesterMixin +from ..test_modular_pipelines_common import ModularGuiderTesterMixin, ModularPipelineTesterMixin enable_full_determinism() @@ -37,13 +37,11 @@ class SDXLModularTesterMixin: """ def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, expected_max_diff=1e-2): - sd_pipe = self.get_pipeline() - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) + sd_pipe = self.get_pipeline().to(torch_device) inputs = self.get_dummy_inputs() image = sd_pipe(**inputs, output="images") - image_slice = image[0, -3:, -3:, -1] + image_slice = image[0, -3:, -3:, -1].cpu() assert image.shape == expected_image_shape max_diff = torch.abs(image_slice.flatten() - expected_slice).max() @@ -110,7 +108,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N pipe = blocks.init_pipeline(self.repo) pipe.load_components(torch_dtype=torch.float32) pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + cross_attention_dim = pipe.unet.config.get("cross_attention_dim") # forward pass without ip adapter @@ -219,9 +217,7 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N # compare against static slices and that can be shaky (with a VVVV low probability). expected_max_diff = 9e-4 if torch_device == "cpu" else expected_max_diff - pipe = self.get_pipeline() - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + pipe = self.get_pipeline().to(torch_device) # forward pass without controlnet inputs = self.get_dummy_inputs() @@ -251,9 +247,7 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N assert max_diff_with_controlnet_scale > 1e-2, "Output with controlnet must be different from normal inference" def test_controlnet_cfg(self): - pipe = self.get_pipeline() - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + pipe = self.get_pipeline().to(torch_device) # forward pass with CFG not applied guider = ClassifierFreeGuidance(guidance_scale=1.0) @@ -273,35 +267,11 @@ def test_controlnet_cfg(self): assert max_diff > 1e-2, "Output with CFG must be different from normal inference" -class SDXLModularGuiderTesterMixin: - def test_guider_cfg(self): - pipe = self.get_pipeline() - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - - # forward pass with CFG not applied - guider = ClassifierFreeGuidance(guidance_scale=1.0) - pipe.update_components(guider=guider) - - inputs = self.get_dummy_inputs() - out_no_cfg = pipe(**inputs, output="images") - - # forward pass with CFG applied - guider = ClassifierFreeGuidance(guidance_scale=7.5) - pipe.update_components(guider=guider) - inputs = self.get_dummy_inputs() - out_cfg = pipe(**inputs, output="images") - - assert out_cfg.shape == out_no_cfg.shape - max_diff = np.abs(out_cfg - out_no_cfg).max() - assert max_diff > 1e-2, "Output with CFG must be different from normal inference" - - class TestSDXLModularPipelineFast( SDXLModularTesterMixin, SDXLModularIPAdapterTesterMixin, SDXLModularControlNetTesterMixin, - SDXLModularGuiderTesterMixin, + ModularGuiderTesterMixin, ModularPipelineTesterMixin, ): """Test cases for Stable Diffusion XL modular pipeline fast tests.""" @@ -335,18 +305,7 @@ def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( expected_image_shape=self.expected_image_output_shape, expected_slice=torch.tensor( - [ - 0.5966781, - 0.62939394, - 0.48465094, - 0.51573336, - 0.57593524, - 0.47035995, - 0.53410417, - 0.51436996, - 0.47313565, - ], - device=torch_device, + [0.3886, 0.4685, 0.4953, 0.4217, 0.4317, 0.3945, 0.4847, 0.4704, 0.4731], ), expected_max_diff=1e-2, ) @@ -359,7 +318,7 @@ class TestSDXLImg2ImgModularPipelineFast( SDXLModularTesterMixin, SDXLModularIPAdapterTesterMixin, SDXLModularControlNetTesterMixin, - SDXLModularGuiderTesterMixin, + ModularGuiderTesterMixin, ModularPipelineTesterMixin, ): """Test cases for Stable Diffusion XL image-to-image modular pipeline fast tests.""" @@ -400,20 +359,7 @@ def get_dummy_inputs(self, seed=0): def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( expected_image_shape=self.expected_image_output_shape, - expected_slice=torch.tensor( - [ - 0.56943184, - 0.4702148, - 0.48048905, - 0.6235963, - 0.551138, - 0.49629188, - 0.60031277, - 0.5688907, - 0.43996853, - ], - device=torch_device, - ), + expected_slice=torch.tensor([0.5246, 0.4466, 0.444, 0.3246, 0.4443, 0.5108, 0.5225, 0.559, 0.5147]), expected_max_diff=1e-2, ) @@ -425,7 +371,7 @@ class SDXLInpaintingModularPipelineFastTests( SDXLModularTesterMixin, SDXLModularIPAdapterTesterMixin, SDXLModularControlNetTesterMixin, - SDXLModularGuiderTesterMixin, + ModularGuiderTesterMixin, ModularPipelineTesterMixin, ): """Test cases for Stable Diffusion XL inpainting modular pipeline fast tests.""" diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index 1edc7abdd613..5c556ca9850a 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -7,6 +7,7 @@ import diffusers from diffusers import ComponentsManager, ModularPipeline, ModularPipelineBlocks +from diffusers.guiders import ClassifierFreeGuidance from diffusers.utils import logging from ..testing_utils import backend_empty_cache, numpy_cosine_similarity_distance, torch_device @@ -104,6 +105,7 @@ def teardown_method(self): def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) pipeline.load_components(torch_dtype=torch_dtype) + pipeline.set_progress_bar_config(disable=None) return pipeline def test_pipeline_call_signature(self): @@ -121,9 +123,7 @@ def _check_for_parameters(parameters, expected_parameters, param_type): _check_for_parameters(self.optional_params, optional_parameters, "optional") def test_inference_batch_consistent(self, batch_sizes=[2], batch_generator=True): - pipe = self.get_pipeline() - pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + pipe = self.get_pipeline().to(torch_device) inputs = self.get_dummy_inputs() inputs["generator"] = self.get_generator(0) @@ -162,9 +162,8 @@ def test_inference_batch_single_identical( batch_size=2, expected_max_diff=1e-4, ): - pipe = self.get_pipeline() - pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + pipe = self.get_pipeline().to(torch_device) + inputs = self.get_dummy_inputs() # Reset generator in case it is has been used in self.get_dummy_inputs @@ -202,7 +201,6 @@ def test_inference_batch_single_identical( def test_float16_inference(self, expected_max_diff=5e-2): pipe = self.get_pipeline() pipe.to(torch_device, torch.float32) - pipe.set_progress_bar_config(disable=None) pipe_fp16 = self.get_pipeline() pipe_fp16.to(torch_device, torch.float16) @@ -229,10 +227,8 @@ def test_float16_inference(self, expected_max_diff=5e-2): @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") def test_to_device(self): - pipe = self.get_pipeline() - pipe.set_progress_bar_config(disable=None) + pipe = self.get_pipeline().to("cpu") - pipe.to("cpu") model_devices = [ component.device.type for component in pipe.components.values() if hasattr(component, "device") ] @@ -247,30 +243,23 @@ def test_to_device(self): ) def test_inference_is_not_nan_cpu(self): - pipe = self.get_pipeline() - pipe.set_progress_bar_config(disable=None) - pipe.to("cpu") + pipe = self.get_pipeline().to("cpu") output = pipe(**self.get_dummy_inputs(), output="images") assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN" @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") def test_inference_is_not_nan(self): - pipe = self.get_pipeline() - pipe.set_progress_bar_config(disable=None) - pipe.to(torch_device) + pipe = self.get_pipeline().to(torch_device) output = pipe(**self.get_dummy_inputs(), output="images") assert torch.isnan(output).sum() == 0, "Accelerator Inference returns NaN" def test_num_images_per_prompt(self): - pipe = self.get_pipeline() + pipe = self.get_pipeline().to(torch_device) if "num_images_per_prompt" not in pipe.blocks.input_names: - return - - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + pytest.mark.skip("Skipping test as `num_images_per_prompt` is not present in input names.") batch_sizes = [1, 2] num_images_per_prompts = [1, 2] @@ -325,3 +314,25 @@ def test_save_from_pretrained(self): image_slices.append(image[0, -3:, -3:, -1].flatten()) assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + + +class ModularGuiderTesterMixin: + def test_guider_cfg(self, expected_max_diff=1e-2): + pipe = self.get_pipeline().to(torch_device) + + # forward pass with CFG not applied + guider = ClassifierFreeGuidance(guidance_scale=1.0) + pipe.update_components(guider=guider) + + inputs = self.get_dummy_inputs() + out_no_cfg = pipe(**inputs, output="images") + + # forward pass with CFG applied + guider = ClassifierFreeGuidance(guidance_scale=7.5) + pipe.update_components(guider=guider) + inputs = self.get_dummy_inputs() + out_cfg = pipe(**inputs, output="images") + + assert out_cfg.shape == out_no_cfg.shape + max_diff = torch.abs(out_cfg - out_no_cfg).max() + assert max_diff > expected_max_diff, "Output with CFG must be different from normal inference" From b75096bd7a29195a11433a3b037c0bc2723fe331 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 10 Nov 2025 13:26:03 +0530 Subject: [PATCH 11/11] up --- .../flux/test_modular_pipeline_flux.py | 12 ++++++++++++ .../qwen/test_modular_pipeline_qwenimage.py | 3 +++ .../test_modular_pipelines_common.py | 6 ++---- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py index a29fd436149d..805790ca4ede 100644 --- a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py +++ b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py @@ -55,6 +55,10 @@ def get_dummy_inputs(self, seed=0): } return inputs + # @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") + def test_float16_inference(self): + super().test_float16_inference(9e-2) + class TestFluxImg2ImgModularPipelineFast(ModularPipelineTesterMixin): pipeline_class = FluxModularPipeline @@ -118,6 +122,10 @@ def test_save_from_pretrained(self): assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + # @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") + def test_float16_inference(self): + super().test_float16_inference(8e-2) + class TestFluxKontextModularPipelineFast(ModularPipelineTesterMixin): pipeline_class = FluxKontextModularPipeline @@ -170,3 +178,7 @@ def test_save_from_pretrained(self): image_slices.append(image[0, -3:, -3:, -1].flatten()) assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + + # @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.") + def test_float16_inference(self): + super().test_float16_inference(9e-2) diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py index 1228d0347af4..772fa19927fb 100644 --- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py +++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py @@ -51,6 +51,9 @@ def get_dummy_inputs(self): } return inputs + def test_inference_batch_single_identical(self): + super().test_inference_batch_single_identical(expected_max_diff=5e-4) + class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin): pipeline_class = QwenImageEditModularPipeline diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index 5c556ca9850a..8d1adc4ad187 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -204,7 +204,6 @@ def test_float16_inference(self, expected_max_diff=5e-2): pipe_fp16 = self.get_pipeline() pipe_fp16.to(torch_device, torch.float16) - pipe_fp16.set_progress_bar_config(disable=None) inputs = self.get_dummy_inputs() # Reset generator in case it is used inside dummy inputs @@ -218,9 +217,8 @@ def test_float16_inference(self, expected_max_diff=5e-2): fp16_inputs["generator"] = self.get_generator(0) output_fp16 = pipe_fp16(**fp16_inputs, output="images") - if isinstance(output, torch.Tensor): - output = output.cpu() - output_fp16 = output_fp16.cpu() + output = output.cpu() + output_fp16 = output_fp16.cpu() max_diff = numpy_cosine_similarity_distance(output.flatten(), output_fp16.flatten()) assert max_diff < expected_max_diff, "FP16 inference is different from FP32 inference"