From e35962b0decb95aec8de751da7768ee2d0aabaa4 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 3 Nov 2025 18:42:47 +0530
Subject: [PATCH 01/11] add tests for qwenimage modular.

---
 .../qwenimage/before_denoise.py               | 13 +--
 .../modular_pipelines/qwenimage/decoders.py   |  3 +-
 .../modular_pipelines/qwenimage/encoders.py   |  2 +
 .../qwenimage/modular_pipeline.py             |  5 +-
 tests/modular_pipelines/qwen/__init__.py      |  0
 .../qwen/test_modular_pipeline_qwenimage.py   | 85 +++++++++++++++++++
 6 files changed, 97 insertions(+), 11 deletions(-)
 create mode 100644 tests/modular_pipelines/qwen/__init__.py
 create mode 100644 tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py

diff --git a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
index fdec95dc506e..f10200503141 100644
--- a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
+++ b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
@@ -132,6 +132,7 @@ def expected_components(self) -> List[ComponentSpec]:
     @property
     def inputs(self) -> List[InputParam]:
         return [
+            InputParam("latents"),
             InputParam(name="height"),
             InputParam(name="width"),
             InputParam(name="num_images_per_prompt", default=1),
@@ -196,11 +197,11 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
                 f"You have passed a list of generators of length {len(block_state.generator)}, but requested an effective batch"
                 f" size of {batch_size}. Make sure the batch size matches the length of the generators."
             )
-
-        block_state.latents = randn_tensor(
-            shape, generator=block_state.generator, device=device, dtype=block_state.dtype
-        )
-        block_state.latents = components.pachifier.pack_latents(block_state.latents)
+        if block_state.latents is None:
+            block_state.latents = randn_tensor(
+                shape, generator=block_state.generator, device=device, dtype=block_state.dtype
+            )
+            block_state.latents = components.pachifier.pack_latents(block_state.latents)
 
         self.set_block_state(state, block_state)
         return components, state
@@ -549,7 +550,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
                     block_state.width // components.vae_scale_factor // 2,
                 )
             ]
-            * block_state.batch_size
+            for _ in range(block_state.batch_size)
         ]
         block_state.txt_seq_lens = (
             block_state.prompt_embeds_mask.sum(dim=1).tolist() if block_state.prompt_embeds_mask is not None else None
diff --git a/src/diffusers/modular_pipelines/qwenimage/decoders.py b/src/diffusers/modular_pipelines/qwenimage/decoders.py
index 6c82fe989e55..aedb0e4018f3 100644
--- a/src/diffusers/modular_pipelines/qwenimage/decoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/decoders.py
@@ -74,8 +74,9 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
         block_state = self.get_block_state(state)
 
         # YiYi Notes: remove support for output_type = "latents', we can just skip decode/encode step in modular
+        vae_scale_factor = 2 ** len(components.vae.temperal_downsample)
         block_state.latents = components.pachifier.unpack_latents(
-            block_state.latents, block_state.height, block_state.width
+            block_state.latents, block_state.height, block_state.width, vae_scale_factor=vae_scale_factor
         )
         block_state.latents = block_state.latents.to(components.vae.dtype)
 
diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py
index 04fb3fdc947b..b025c2dc5071 100644
--- a/src/diffusers/modular_pipelines/qwenimage/encoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -503,6 +503,8 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
         block_state.prompt_embeds = block_state.prompt_embeds[:, : block_state.max_sequence_length]
         block_state.prompt_embeds_mask = block_state.prompt_embeds_mask[:, : block_state.max_sequence_length]
 
+        block_state.negative_prompt_embeds = None
+        block_state.negative_prompt_embeds_mask = None
         if components.requires_unconditional_embeds:
             negative_prompt = block_state.negative_prompt or ""
             block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds(
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py b/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py
index d9e30864f660..59e1a13a5db2 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py
@@ -26,10 +26,7 @@ class QwenImagePachifier(ConfigMixin):
     config_name = "config.json"
 
     @register_to_config
-    def __init__(
-        self,
-        patch_size: int = 2,
-    ):
+    def __init__(self, patch_size: int = 2):
         super().__init__()
 
     def pack_latents(self, latents):
diff --git a/tests/modular_pipelines/qwen/__init__.py b/tests/modular_pipelines/qwen/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
new file mode 100644
index 000000000000..024edf5f34b0
--- /dev/null
+++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -0,0 +1,85 @@
+# coding=utf-8
+# Copyright 2025 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import torch
+
+from diffusers import ClassifierFreeGuidance
+from diffusers.modular_pipelines import QwenImageAutoBlocks, QwenImageModularPipeline
+
+from ...testing_utils import torch_device
+from ..test_modular_pipelines_common import ModularPipelineTesterMixin
+
+
+class QwenImagexModularTests:
+    pipeline_class = QwenImageModularPipeline
+    pipeline_blocks_class = QwenImageAutoBlocks
+    repo = "hf-internal-testing/tiny-qwenimage-modular"
+
+    params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
+    batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
+
+    def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
+        pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager)
+        pipeline.load_components(torch_dtype=torch_dtype)
+        pipeline.set_progress_bar_config(disable=None)
+        return pipeline
+
+    def get_dummy_inputs(self, device, seed=0):
+        if str(device).startswith("mps"):
+            generator = torch.manual_seed(seed)
+        else:
+            generator = torch.Generator(device=device).manual_seed(seed)
+        inputs = {
+            "prompt": "dance monkey",
+            "negative_prompt": "bad quality",
+            "generator": generator,
+            "num_inference_steps": 2,
+            "height": 32,
+            "width": 32,
+            "max_sequence_length": 16,
+            "output_type": "np",
+        }
+        return inputs
+
+
+class QwenImageModularGuiderTests:
+    def test_guider_cfg(self):
+        pipe = self.get_pipeline()
+        pipe = pipe.to(torch_device)
+
+        guider = ClassifierFreeGuidance(guidance_scale=1.0)
+        pipe.update_components(guider=guider)
+
+        inputs = self.get_dummy_inputs(torch_device)
+        out_no_cfg = pipe(**inputs, output="images")
+
+        guider = ClassifierFreeGuidance(guidance_scale=7.5)
+        pipe.update_components(guider=guider)
+        inputs = self.get_dummy_inputs(torch_device)
+        out_cfg = pipe(**inputs, output="images")
+
+        assert out_cfg.shape == out_no_cfg.shape
+        max_diff = np.abs(out_cfg - out_no_cfg).max()
+        assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
+
+
+class QwenImageModularPipelineFastTests(
+    QwenImagexModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
+):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)

From 94fa2029a43d7e6e07d208ec4e9ef6df25bcba65 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 4 Nov 2025 08:36:23 +0530
Subject: [PATCH 02/11] qwenimage edit.

---
 .../qwen/test_modular_pipeline_qwenimage.py   | 25 ++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
index 024edf5f34b0..adcd08e80ca5 100644
--- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
+++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -16,16 +16,22 @@
 import unittest
 
 import numpy as np
+import PIL
 import torch
 
 from diffusers import ClassifierFreeGuidance
-from diffusers.modular_pipelines import QwenImageAutoBlocks, QwenImageModularPipeline
+from diffusers.modular_pipelines import (
+    QwenImageAutoBlocks,
+    QwenImageEditAutoBlocks,
+    QwenImageEditModularPipeline,
+    QwenImageModularPipeline,
+)
 
 from ...testing_utils import torch_device
 from ..test_modular_pipelines_common import ModularPipelineTesterMixin
 
 
-class QwenImagexModularTests:
+class QwenImageModularTests:
     pipeline_class = QwenImageModularPipeline
     pipeline_blocks_class = QwenImageAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-modular"
@@ -79,7 +85,20 @@ def test_guider_cfg(self):
 
 
 class QwenImageModularPipelineFastTests(
-    QwenImagexModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
+    QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
 ):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+
+
+class QwenImageEditModularPipelineFastTests(
+    QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
+):
+    pipeline_class = QwenImageEditModularPipeline
+    pipeline_blocks_class = QwenImageEditAutoBlocks
+    repo = "hf-internal-testing/tiny-qwenimage-edit-modular"
+
+    def get_dummy_inputs(self, device, seed=0):
+        inputs = super().get_dummy_inputs(device, seed)
+        inputs["image"] = PIL.Image.new("RGB", (32, 32), 0)
+        return inputs

From 7d3c250722e0d9bc900e91e85217d8667204293a Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 4 Nov 2025 10:05:34 +0530
Subject: [PATCH 03/11] qwenimage edit plus.

---
 .../modular_pipelines/qwenimage/encoders.py   |  4 ++
 .../qwen/test_modular_pipeline_qwenimage.py   | 45 ++++++++++++++++++-
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py
index b025c2dc5071..3b56981e5290 100644
--- a/src/diffusers/modular_pipelines/qwenimage/encoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -629,6 +629,8 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
             device=device,
         )
 
+        block_state.negative_prompt_embeds = None
+        block_state.negative_prompt_embeds_mask = None
         if components.requires_unconditional_embeds:
             negative_prompt = block_state.negative_prompt or " "
             block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit(
@@ -681,6 +683,8 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
             device=device,
         )
 
+        block_state.negative_prompt_embeds = None
+        block_state.negative_prompt_embeds_mask = None
         if components.requires_unconditional_embeds:
             negative_prompt = block_state.negative_prompt or " "
             block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = (
diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
index adcd08e80ca5..1a49fc222532 100644
--- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
+++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 import PIL
+import pytest
 import torch
 
 from diffusers import ClassifierFreeGuidance
@@ -24,6 +25,8 @@
     QwenImageAutoBlocks,
     QwenImageEditAutoBlocks,
     QwenImageEditModularPipeline,
+    QwenImageEditPlusAutoBlocks,
+    QwenImageEditPlusModularPipeline,
     QwenImageModularPipeline,
 )
 
@@ -64,7 +67,7 @@ def get_dummy_inputs(self, device, seed=0):
 
 
 class QwenImageModularGuiderTests:
-    def test_guider_cfg(self):
+    def test_guider_cfg(self, tol=1e-2):
         pipe = self.get_pipeline()
         pipe = pipe.to(torch_device)
 
@@ -81,7 +84,7 @@ def test_guider_cfg(self):
 
         assert out_cfg.shape == out_no_cfg.shape
         max_diff = np.abs(out_cfg - out_no_cfg).max()
-        assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
+        assert max_diff > tol, "Output with CFG must be different from normal inference"
 
 
 class QwenImageModularPipelineFastTests(
@@ -100,5 +103,43 @@ class QwenImageEditModularPipelineFastTests(
 
     def get_dummy_inputs(self, device, seed=0):
         inputs = super().get_dummy_inputs(device, seed)
+        inputs.pop("max_sequence_length")
         inputs["image"] = PIL.Image.new("RGB", (32, 32), 0)
         return inputs
+
+    def test_guider_cfg(self):
+        super().test_guider_cfg(7e-5)
+
+
+class QwenImageEditPlusModularPipelineFastTests(
+    QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
+):
+    pipeline_class = QwenImageEditPlusModularPipeline
+    pipeline_blocks_class = QwenImageEditPlusAutoBlocks
+    repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular"
+
+    # No `mask_image` yet.
+    params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image"])
+    batch_params = frozenset(["prompt", "negative_prompt", "image"])
+
+    def get_dummy_inputs(self, device, seed=0):
+        inputs = super().get_dummy_inputs(device, seed)
+        inputs.pop("max_sequence_length")
+        image = PIL.Image.new("RGB", (32, 32), 0)
+        inputs["image"] = [image]
+        return inputs
+
+    @pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True)
+    def test_num_images_per_prompt(self):
+        super().test_num_images_per_prompt()
+
+    @pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True)
+    def test_inference_batch_consistent():
+        super().test_inference_batch_consistent()
+
+    @pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True)
+    def test_inference_batch_single_identical():
+        super().test_inference_batch_single_identical()
+
+    def test_guider_cfg(self):
+        super().test_guider_cfg(1e-3)

From 7ad48f0c2755dd6f347912627cd0369affb81d64 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 4 Nov 2025 12:08:38 +0530
Subject: [PATCH 04/11] empty


From 27a1c259a6aebbcadefa2a8f4d2a543f85621df0 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 10 Nov 2025 09:13:23 +0530
Subject: [PATCH 05/11] align with the latest structure

---
 .../qwen/test_modular_pipeline_qwenimage.py   | 102 +++++++++---------
 .../test_modular_pipelines_common.py          |  15 +--
 2 files changed, 51 insertions(+), 66 deletions(-)

diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
index 1a49fc222532..03479d4f32f7 100644
--- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
+++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -13,12 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 
 import numpy as np
 import PIL
 import pytest
-import torch
 
 from diffusers import ClassifierFreeGuidance
 from diffusers.modular_pipelines import (
@@ -34,38 +32,6 @@
 from ..test_modular_pipelines_common import ModularPipelineTesterMixin
 
 
-class QwenImageModularTests:
-    pipeline_class = QwenImageModularPipeline
-    pipeline_blocks_class = QwenImageAutoBlocks
-    repo = "hf-internal-testing/tiny-qwenimage-modular"
-
-    params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
-    batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
-
-    def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
-        pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager)
-        pipeline.load_components(torch_dtype=torch_dtype)
-        pipeline.set_progress_bar_config(disable=None)
-        return pipeline
-
-    def get_dummy_inputs(self, device, seed=0):
-        if str(device).startswith("mps"):
-            generator = torch.manual_seed(seed)
-        else:
-            generator = torch.Generator(device=device).manual_seed(seed)
-        inputs = {
-            "prompt": "dance monkey",
-            "negative_prompt": "bad quality",
-            "generator": generator,
-            "num_inference_steps": 2,
-            "height": 32,
-            "width": 32,
-            "max_sequence_length": 16,
-            "output_type": "np",
-        }
-        return inputs
-
-
 class QwenImageModularGuiderTests:
     def test_guider_cfg(self, tol=1e-2):
         pipe = self.get_pipeline()
@@ -87,23 +53,48 @@ def test_guider_cfg(self, tol=1e-2):
         assert max_diff > tol, "Output with CFG must be different from normal inference"
 
 
-class QwenImageModularPipelineFastTests(
-    QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
-):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderTests):
+    pipeline_class = QwenImageModularPipeline
+    pipeline_blocks_class = QwenImageAutoBlocks
+    repo = "hf-internal-testing/tiny-qwenimage-modular"
+
+    params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
+    batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
+
+    def get_dummy_inputs(self):
+        generator = self.get_generator()
+        inputs = {
+            "prompt": "dance monkey",
+            "negative_prompt": "bad quality",
+            "generator": generator,
+            "num_inference_steps": 2,
+            "height": 32,
+            "width": 32,
+            "max_sequence_length": 16,
+            "output_type": "np",
+        }
+        return inputs
 
 
-class QwenImageEditModularPipelineFastTests(
-    QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
-):
+class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderTests):
     pipeline_class = QwenImageEditModularPipeline
     pipeline_blocks_class = QwenImageEditAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-edit-modular"
 
-    def get_dummy_inputs(self, device, seed=0):
-        inputs = super().get_dummy_inputs(device, seed)
-        inputs.pop("max_sequence_length")
+    params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image", "mask_image"])
+    batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
+
+    def get_dummy_inputs(self):
+        generator = self.get_generator()
+        inputs = {
+            "prompt": "dance monkey",
+            "negative_prompt": "bad quality",
+            "generator": generator,
+            "num_inference_steps": 2,
+            "height": 32,
+            "width": 32,
+            "output_type": "np",
+        }
         inputs["image"] = PIL.Image.new("RGB", (32, 32), 0)
         return inputs
 
@@ -111,9 +102,7 @@ def test_guider_cfg(self):
         super().test_guider_cfg(7e-5)
 
 
-class QwenImageEditPlusModularPipelineFastTests(
-    QwenImageModularTests, QwenImageModularGuiderTests, ModularPipelineTesterMixin, unittest.TestCase
-):
+class QwenImageEditPlusModularPipelineFastTests(ModularPipelineTesterMixin, QwenImageModularGuiderTests):
     pipeline_class = QwenImageEditPlusModularPipeline
     pipeline_blocks_class = QwenImageEditPlusAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular"
@@ -122,11 +111,18 @@ class QwenImageEditPlusModularPipelineFastTests(
     params = frozenset(["prompt", "height", "width", "negative_prompt", "attention_kwargs", "image"])
     batch_params = frozenset(["prompt", "negative_prompt", "image"])
 
-    def get_dummy_inputs(self, device, seed=0):
-        inputs = super().get_dummy_inputs(device, seed)
-        inputs.pop("max_sequence_length")
-        image = PIL.Image.new("RGB", (32, 32), 0)
-        inputs["image"] = [image]
+    def get_dummy_inputs(self):
+        generator = self.get_generator()
+        inputs = {
+            "prompt": "dance monkey",
+            "negative_prompt": "bad quality",
+            "generator": generator,
+            "num_inference_steps": 2,
+            "height": 32,
+            "width": 32,
+            "output_type": "np",
+        }
+        inputs["image"] = PIL.Image.new("RGB", (32, 32), 0)
         return inputs
 
     @pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True)
diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py
index 1325e5c1de3c..c4a2ec13a24e 100644
--- a/tests/modular_pipelines/test_modular_pipelines_common.py
+++ b/tests/modular_pipelines/test_modular_pipelines_common.py
@@ -32,20 +32,9 @@ class ModularPipelineTesterMixin:
     # Canonical parameters that are passed to `__call__` regardless
     # of the type of pipeline. They are always optional and have common
     # sense default values.
-    optional_params = frozenset(
-        [
-            "num_inference_steps",
-            "num_images_per_prompt",
-            "latents",
-            "output_type",
-        ]
-    )
+    optional_params = frozenset(["num_inference_steps", "num_images_per_prompt", "latents", "output_type"])
     # this is modular specific: generator needs to be a intermediate input because it's mutable
-    intermediate_params = frozenset(
-        [
-            "generator",
-        ]
-    )
+    intermediate_params = frozenset(["generator"])
 
     def get_generator(self, seed=0):
         generator = torch.Generator("cpu").manual_seed(seed)

From 5d5237779064502d216f8144d8344a77c53034d6 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 10 Nov 2025 09:23:40 +0530
Subject: [PATCH 06/11] up

---
 .../qwen/test_modular_pipeline_qwenimage.py    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
index 03479d4f32f7..975c7009a694 100644
--- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
+++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -32,7 +32,7 @@
 from ..test_modular_pipelines_common import ModularPipelineTesterMixin
 
 
-class QwenImageModularGuiderTests:
+class QwenImageModularGuiderMixin:
     def test_guider_cfg(self, tol=1e-2):
         pipe = self.get_pipeline()
         pipe = pipe.to(torch_device)
@@ -40,12 +40,12 @@ def test_guider_cfg(self, tol=1e-2):
         guider = ClassifierFreeGuidance(guidance_scale=1.0)
         pipe.update_components(guider=guider)
 
-        inputs = self.get_dummy_inputs(torch_device)
+        inputs = self.get_dummy_inputs()
         out_no_cfg = pipe(**inputs, output="images")
 
         guider = ClassifierFreeGuidance(guidance_scale=7.5)
         pipe.update_components(guider=guider)
-        inputs = self.get_dummy_inputs(torch_device)
+        inputs = self.get_dummy_inputs()
         out_cfg = pipe(**inputs, output="images")
 
         assert out_cfg.shape == out_no_cfg.shape
@@ -53,7 +53,7 @@ def test_guider_cfg(self, tol=1e-2):
         assert max_diff > tol, "Output with CFG must be different from normal inference"
 
 
-class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderTests):
+class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
     pipeline_class = QwenImageModularPipeline
     pipeline_blocks_class = QwenImageAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-modular"
@@ -71,12 +71,12 @@ def get_dummy_inputs(self):
             "height": 32,
             "width": 32,
             "max_sequence_length": 16,
-            "output_type": "np",
+            "output_type": "pt",
         }
         return inputs
 
 
-class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderTests):
+class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
     pipeline_class = QwenImageEditModularPipeline
     pipeline_blocks_class = QwenImageEditAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-edit-modular"
@@ -93,7 +93,7 @@ def get_dummy_inputs(self):
             "num_inference_steps": 2,
             "height": 32,
             "width": 32,
-            "output_type": "np",
+            "output_type": "pt",
         }
         inputs["image"] = PIL.Image.new("RGB", (32, 32), 0)
         return inputs
@@ -102,7 +102,7 @@ def test_guider_cfg(self):
         super().test_guider_cfg(7e-5)
 
 
-class QwenImageEditPlusModularPipelineFastTests(ModularPipelineTesterMixin, QwenImageModularGuiderTests):
+class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
     pipeline_class = QwenImageEditPlusModularPipeline
     pipeline_blocks_class = QwenImageEditPlusAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular"
@@ -120,7 +120,7 @@ def get_dummy_inputs(self):
             "num_inference_steps": 2,
             "height": 32,
             "width": 32,
-            "output_type": "np",
+            "output_type": "pt",
         }
         inputs["image"] = PIL.Image.new("RGB", (32, 32), 0)
         return inputs

From 237846162696ba109c25830ee1758c17a57a4825 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 10 Nov 2025 09:42:31 +0530
Subject: [PATCH 07/11] up

---
 .../test_modular_pipelines_common.py           | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py
index c4a2ec13a24e..386311555143 100644
--- a/tests/modular_pipelines/test_modular_pipelines_common.py
+++ b/tests/modular_pipelines/test_modular_pipelines_common.py
@@ -2,22 +2,16 @@
 import tempfile
 from typing import Callable, Union
 
+import pytest
 import torch
 
 import diffusers
 from diffusers import ComponentsManager, ModularPipeline, ModularPipelineBlocks
 from diffusers.utils import logging
 
-from ..testing_utils import (
-    backend_empty_cache,
-    numpy_cosine_similarity_distance,
-    require_accelerator,
-    require_torch,
-    torch_device,
-)
+from ..testing_utils import backend_empty_cache, numpy_cosine_similarity_distance, torch_device
 
 
-@require_torch
 class ModularPipelineTesterMixin:
     """
     It provides a set of common tests for each modular pipeline,
@@ -204,7 +198,7 @@ def test_inference_batch_single_identical(
         max_diff = torch.abs(output_batch[0] - output[0]).max()
         assert max_diff < expected_max_diff, "Batch inference results different from single inference results"
 
-    @require_accelerator
+    @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.")
     def test_float16_inference(self, expected_max_diff=5e-2):
         pipe = self.get_pipeline()
         pipe.to(torch_device, torch.float32)
@@ -233,7 +227,7 @@ def test_float16_inference(self, expected_max_diff=5e-2):
         max_diff = numpy_cosine_similarity_distance(output.flatten(), output_fp16.flatten())
         assert max_diff < expected_max_diff, "FP16 inference is different from FP32 inference"
 
-    @require_accelerator
+    @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.")
     def test_to_device(self):
         pipe = self.get_pipeline()
         pipe.set_progress_bar_config(disable=None)
@@ -260,7 +254,7 @@ def test_inference_is_not_nan_cpu(self):
         output = pipe(**self.get_dummy_inputs(), output="images")
         assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN"
 
-    @require_accelerator
+    @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.")
     def test_inference_is_not_nan(self):
         pipe = self.get_pipeline()
         pipe.set_progress_bar_config(disable=None)
@@ -293,7 +287,7 @@ def test_num_images_per_prompt(self):
 
                 assert images.shape[0] == batch_size * num_images_per_prompt
 
-    @require_accelerator
+    @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.")
     def test_components_auto_cpu_offload_inference_consistent(self):
         base_pipe = self.get_pipeline().to(torch_device)
 

From 0e9d1a916653b65355d54c22b56a5d4929b73a04 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 10 Nov 2025 09:56:07 +0530
Subject: [PATCH 08/11] reason

---
 tests/modular_pipelines/test_modular_pipelines_common.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py
index 386311555143..ecb9ce12ec98 100644
--- a/tests/modular_pipelines/test_modular_pipelines_common.py
+++ b/tests/modular_pipelines/test_modular_pipelines_common.py
@@ -198,7 +198,7 @@ def test_inference_batch_single_identical(
         max_diff = torch.abs(output_batch[0] - output[0]).max()
         assert max_diff < expected_max_diff, "Batch inference results different from single inference results"
 
-    @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.")
+    @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.")
     def test_float16_inference(self, expected_max_diff=5e-2):
         pipe = self.get_pipeline()
         pipe.to(torch_device, torch.float32)
@@ -227,7 +227,7 @@ def test_float16_inference(self, expected_max_diff=5e-2):
         max_diff = numpy_cosine_similarity_distance(output.flatten(), output_fp16.flatten())
         assert max_diff < expected_max_diff, "FP16 inference is different from FP32 inference"
 
-    @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.")
+    @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.")
     def test_to_device(self):
         pipe = self.get_pipeline()
         pipe.set_progress_bar_config(disable=None)
@@ -254,7 +254,7 @@ def test_inference_is_not_nan_cpu(self):
         output = pipe(**self.get_dummy_inputs(), output="images")
         assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN"
 
-    @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.")
+    @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.")
     def test_inference_is_not_nan(self):
         pipe = self.get_pipeline()
         pipe.set_progress_bar_config(disable=None)
@@ -287,7 +287,7 @@ def test_num_images_per_prompt(self):
 
                 assert images.shape[0] == batch_size * num_images_per_prompt
 
-    @pytest.mark.skipif(torch_device != "cpu", "Test needs an accelerator.")
+    @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.")
     def test_components_auto_cpu_offload_inference_consistent(self):
         base_pipe = self.get_pipeline().to(torch_device)
 

From 8144a0e34beca510d774368108fd238c2282fefb Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 10 Nov 2025 10:12:02 +0530
Subject: [PATCH 09/11] up

---
 tests/modular_pipelines/test_modular_pipelines_common.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py
index ecb9ce12ec98..1edc7abdd613 100644
--- a/tests/modular_pipelines/test_modular_pipelines_common.py
+++ b/tests/modular_pipelines/test_modular_pipelines_common.py
@@ -198,7 +198,7 @@ def test_inference_batch_single_identical(
         max_diff = torch.abs(output_batch[0] - output[0]).max()
         assert max_diff < expected_max_diff, "Batch inference results different from single inference results"
 
-    @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.")
+    @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
     def test_float16_inference(self, expected_max_diff=5e-2):
         pipe = self.get_pipeline()
         pipe.to(torch_device, torch.float32)
@@ -227,7 +227,7 @@ def test_float16_inference(self, expected_max_diff=5e-2):
         max_diff = numpy_cosine_similarity_distance(output.flatten(), output_fp16.flatten())
         assert max_diff < expected_max_diff, "FP16 inference is different from FP32 inference"
 
-    @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.")
+    @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
     def test_to_device(self):
         pipe = self.get_pipeline()
         pipe.set_progress_bar_config(disable=None)
@@ -254,7 +254,7 @@ def test_inference_is_not_nan_cpu(self):
         output = pipe(**self.get_dummy_inputs(), output="images")
         assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN"
 
-    @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.")
+    @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
     def test_inference_is_not_nan(self):
         pipe = self.get_pipeline()
         pipe.set_progress_bar_config(disable=None)
@@ -287,7 +287,7 @@ def test_num_images_per_prompt(self):
 
                 assert images.shape[0] == batch_size * num_images_per_prompt
 
-    @pytest.mark.skipif(torch_device != "cpu", reason="Test needs an accelerator.")
+    @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
     def test_components_auto_cpu_offload_inference_consistent(self):
         base_pipe = self.get_pipeline().to(torch_device)
 

From 650424e63353f440998d4c41196612330613527e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 10 Nov 2025 11:30:18 +0530
Subject: [PATCH 10/11] fix multiple issues.

---
 .../qwen/test_modular_pipeline_qwenimage.py   | 32 +-------
 ...st_modular_pipeline_stable_diffusion_xl.py | 76 +++----------------
 .../test_modular_pipelines_common.py          | 53 ++++++++-----
 3 files changed, 47 insertions(+), 114 deletions(-)

diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
index 975c7009a694..1228d0347af4 100644
--- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
+++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -14,11 +14,9 @@
 # limitations under the License.
 
 
-import numpy as np
 import PIL
 import pytest
 
-from diffusers import ClassifierFreeGuidance
 from diffusers.modular_pipelines import (
     QwenImageAutoBlocks,
     QwenImageEditAutoBlocks,
@@ -28,32 +26,10 @@
     QwenImageModularPipeline,
 )
 
-from ...testing_utils import torch_device
-from ..test_modular_pipelines_common import ModularPipelineTesterMixin
+from ..test_modular_pipelines_common import ModularGuiderTesterMixin, ModularPipelineTesterMixin
 
 
-class QwenImageModularGuiderMixin:
-    def test_guider_cfg(self, tol=1e-2):
-        pipe = self.get_pipeline()
-        pipe = pipe.to(torch_device)
-
-        guider = ClassifierFreeGuidance(guidance_scale=1.0)
-        pipe.update_components(guider=guider)
-
-        inputs = self.get_dummy_inputs()
-        out_no_cfg = pipe(**inputs, output="images")
-
-        guider = ClassifierFreeGuidance(guidance_scale=7.5)
-        pipe.update_components(guider=guider)
-        inputs = self.get_dummy_inputs()
-        out_cfg = pipe(**inputs, output="images")
-
-        assert out_cfg.shape == out_no_cfg.shape
-        max_diff = np.abs(out_cfg - out_no_cfg).max()
-        assert max_diff > tol, "Output with CFG must be different from normal inference"
-
-
-class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
+class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin):
     pipeline_class = QwenImageModularPipeline
     pipeline_blocks_class = QwenImageAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-modular"
@@ -76,7 +52,7 @@ def get_dummy_inputs(self):
         return inputs
 
 
-class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
+class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin):
     pipeline_class = QwenImageEditModularPipeline
     pipeline_blocks_class = QwenImageEditAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-edit-modular"
@@ -102,7 +78,7 @@ def test_guider_cfg(self):
         super().test_guider_cfg(7e-5)
 
 
-class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
+class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin):
     pipeline_class = QwenImageEditPlusModularPipeline
     pipeline_blocks_class = QwenImageEditPlusAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular"
diff --git a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py
index ea54b2bdff47..9fc16f09c8f0 100644
--- a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py
+++ b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py
@@ -25,7 +25,7 @@
 
 from ...models.unets.test_models_unet_2d_condition import create_ip_adapter_state_dict
 from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
-from ..test_modular_pipelines_common import ModularPipelineTesterMixin
+from ..test_modular_pipelines_common import ModularGuiderTesterMixin, ModularPipelineTesterMixin
 
 
 enable_full_determinism()
@@ -37,13 +37,11 @@ class SDXLModularTesterMixin:
     """
 
     def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, expected_max_diff=1e-2):
-        sd_pipe = self.get_pipeline()
-        sd_pipe = sd_pipe.to(torch_device)
-        sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe = self.get_pipeline().to(torch_device)
 
         inputs = self.get_dummy_inputs()
         image = sd_pipe(**inputs, output="images")
-        image_slice = image[0, -3:, -3:, -1]
+        image_slice = image[0, -3:, -3:, -1].cpu()
 
         assert image.shape == expected_image_shape
         max_diff = torch.abs(image_slice.flatten() - expected_slice).max()
@@ -110,7 +108,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N
         pipe = blocks.init_pipeline(self.repo)
         pipe.load_components(torch_dtype=torch.float32)
         pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+
         cross_attention_dim = pipe.unet.config.get("cross_attention_dim")
 
         # forward pass without ip adapter
@@ -219,9 +217,7 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N
         # compare against static slices and that can be shaky (with a VVVV low probability).
         expected_max_diff = 9e-4 if torch_device == "cpu" else expected_max_diff
 
-        pipe = self.get_pipeline()
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to(torch_device)
 
         # forward pass without controlnet
         inputs = self.get_dummy_inputs()
@@ -251,9 +247,7 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N
         assert max_diff_with_controlnet_scale > 1e-2, "Output with controlnet must be different from normal inference"
 
     def test_controlnet_cfg(self):
-        pipe = self.get_pipeline()
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to(torch_device)
 
         # forward pass with CFG not applied
         guider = ClassifierFreeGuidance(guidance_scale=1.0)
@@ -273,35 +267,11 @@ def test_controlnet_cfg(self):
         assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
 
 
-class SDXLModularGuiderTesterMixin:
-    def test_guider_cfg(self):
-        pipe = self.get_pipeline()
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
-
-        # forward pass with CFG not applied
-        guider = ClassifierFreeGuidance(guidance_scale=1.0)
-        pipe.update_components(guider=guider)
-
-        inputs = self.get_dummy_inputs()
-        out_no_cfg = pipe(**inputs, output="images")
-
-        # forward pass with CFG applied
-        guider = ClassifierFreeGuidance(guidance_scale=7.5)
-        pipe.update_components(guider=guider)
-        inputs = self.get_dummy_inputs()
-        out_cfg = pipe(**inputs, output="images")
-
-        assert out_cfg.shape == out_no_cfg.shape
-        max_diff = np.abs(out_cfg - out_no_cfg).max()
-        assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
-
-
 class TestSDXLModularPipelineFast(
     SDXLModularTesterMixin,
     SDXLModularIPAdapterTesterMixin,
     SDXLModularControlNetTesterMixin,
-    SDXLModularGuiderTesterMixin,
+    ModularGuiderTesterMixin,
     ModularPipelineTesterMixin,
 ):
     """Test cases for Stable Diffusion XL modular pipeline fast tests."""
@@ -335,18 +305,7 @@ def test_stable_diffusion_xl_euler(self):
         self._test_stable_diffusion_xl_euler(
             expected_image_shape=self.expected_image_output_shape,
             expected_slice=torch.tensor(
-                [
-                    0.5966781,
-                    0.62939394,
-                    0.48465094,
-                    0.51573336,
-                    0.57593524,
-                    0.47035995,
-                    0.53410417,
-                    0.51436996,
-                    0.47313565,
-                ],
-                device=torch_device,
+                [0.3886, 0.4685, 0.4953, 0.4217, 0.4317, 0.3945, 0.4847, 0.4704, 0.4731],
             ),
             expected_max_diff=1e-2,
         )
@@ -359,7 +318,7 @@ class TestSDXLImg2ImgModularPipelineFast(
     SDXLModularTesterMixin,
     SDXLModularIPAdapterTesterMixin,
     SDXLModularControlNetTesterMixin,
-    SDXLModularGuiderTesterMixin,
+    ModularGuiderTesterMixin,
     ModularPipelineTesterMixin,
 ):
     """Test cases for Stable Diffusion XL image-to-image modular pipeline fast tests."""
@@ -400,20 +359,7 @@ def get_dummy_inputs(self, seed=0):
     def test_stable_diffusion_xl_euler(self):
         self._test_stable_diffusion_xl_euler(
             expected_image_shape=self.expected_image_output_shape,
-            expected_slice=torch.tensor(
-                [
-                    0.56943184,
-                    0.4702148,
-                    0.48048905,
-                    0.6235963,
-                    0.551138,
-                    0.49629188,
-                    0.60031277,
-                    0.5688907,
-                    0.43996853,
-                ],
-                device=torch_device,
-            ),
+            expected_slice=torch.tensor([0.5246, 0.4466, 0.444, 0.3246, 0.4443, 0.5108, 0.5225, 0.559, 0.5147]),
             expected_max_diff=1e-2,
         )
 
@@ -425,7 +371,7 @@ class SDXLInpaintingModularPipelineFastTests(
     SDXLModularTesterMixin,
     SDXLModularIPAdapterTesterMixin,
     SDXLModularControlNetTesterMixin,
-    SDXLModularGuiderTesterMixin,
+    ModularGuiderTesterMixin,
     ModularPipelineTesterMixin,
 ):
     """Test cases for Stable Diffusion XL inpainting modular pipeline fast tests."""
diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py
index 1edc7abdd613..5c556ca9850a 100644
--- a/tests/modular_pipelines/test_modular_pipelines_common.py
+++ b/tests/modular_pipelines/test_modular_pipelines_common.py
@@ -7,6 +7,7 @@
 
 import diffusers
 from diffusers import ComponentsManager, ModularPipeline, ModularPipelineBlocks
+from diffusers.guiders import ClassifierFreeGuidance
 from diffusers.utils import logging
 
 from ..testing_utils import backend_empty_cache, numpy_cosine_similarity_distance, torch_device
@@ -104,6 +105,7 @@ def teardown_method(self):
     def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
         pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager)
         pipeline.load_components(torch_dtype=torch_dtype)
+        pipeline.set_progress_bar_config(disable=None)
         return pipeline
 
     def test_pipeline_call_signature(self):
@@ -121,9 +123,7 @@ def _check_for_parameters(parameters, expected_parameters, param_type):
         _check_for_parameters(self.optional_params, optional_parameters, "optional")
 
     def test_inference_batch_consistent(self, batch_sizes=[2], batch_generator=True):
-        pipe = self.get_pipeline()
-        pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to(torch_device)
 
         inputs = self.get_dummy_inputs()
         inputs["generator"] = self.get_generator(0)
@@ -162,9 +162,8 @@ def test_inference_batch_single_identical(
         batch_size=2,
         expected_max_diff=1e-4,
     ):
-        pipe = self.get_pipeline()
-        pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to(torch_device)
+
         inputs = self.get_dummy_inputs()
 
         # Reset generator in case it is has been used in self.get_dummy_inputs
@@ -202,7 +201,6 @@ def test_inference_batch_single_identical(
     def test_float16_inference(self, expected_max_diff=5e-2):
         pipe = self.get_pipeline()
         pipe.to(torch_device, torch.float32)
-        pipe.set_progress_bar_config(disable=None)
 
         pipe_fp16 = self.get_pipeline()
         pipe_fp16.to(torch_device, torch.float16)
@@ -229,10 +227,8 @@ def test_float16_inference(self, expected_max_diff=5e-2):
 
     @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
     def test_to_device(self):
-        pipe = self.get_pipeline()
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to("cpu")
 
-        pipe.to("cpu")
         model_devices = [
             component.device.type for component in pipe.components.values() if hasattr(component, "device")
         ]
@@ -247,30 +243,23 @@ def test_to_device(self):
         )
 
     def test_inference_is_not_nan_cpu(self):
-        pipe = self.get_pipeline()
-        pipe.set_progress_bar_config(disable=None)
-        pipe.to("cpu")
+        pipe = self.get_pipeline().to("cpu")
 
         output = pipe(**self.get_dummy_inputs(), output="images")
         assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN"
 
     @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
     def test_inference_is_not_nan(self):
-        pipe = self.get_pipeline()
-        pipe.set_progress_bar_config(disable=None)
-        pipe.to(torch_device)
+        pipe = self.get_pipeline().to(torch_device)
 
         output = pipe(**self.get_dummy_inputs(), output="images")
         assert torch.isnan(output).sum() == 0, "Accelerator Inference returns NaN"
 
     def test_num_images_per_prompt(self):
-        pipe = self.get_pipeline()
+        pipe = self.get_pipeline().to(torch_device)
 
         if "num_images_per_prompt" not in pipe.blocks.input_names:
-            return
-
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+            pytest.mark.skip("Skipping test as `num_images_per_prompt` is not present in input names.")
 
         batch_sizes = [1, 2]
         num_images_per_prompts = [1, 2]
@@ -325,3 +314,25 @@ def test_save_from_pretrained(self):
             image_slices.append(image[0, -3:, -3:, -1].flatten())
 
         assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3
+
+
+class ModularGuiderTesterMixin:
+    def test_guider_cfg(self, expected_max_diff=1e-2):
+        pipe = self.get_pipeline().to(torch_device)
+
+        # forward pass with CFG not applied
+        guider = ClassifierFreeGuidance(guidance_scale=1.0)
+        pipe.update_components(guider=guider)
+
+        inputs = self.get_dummy_inputs()
+        out_no_cfg = pipe(**inputs, output="images")
+
+        # forward pass with CFG applied
+        guider = ClassifierFreeGuidance(guidance_scale=7.5)
+        pipe.update_components(guider=guider)
+        inputs = self.get_dummy_inputs()
+        out_cfg = pipe(**inputs, output="images")
+
+        assert out_cfg.shape == out_no_cfg.shape
+        max_diff = torch.abs(out_cfg - out_no_cfg).max()
+        assert max_diff > expected_max_diff, "Output with CFG must be different from normal inference"

From b75096bd7a29195a11433a3b037c0bc2723fe331 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 10 Nov 2025 13:26:03 +0530
Subject: [PATCH 11/11] up

---
 .../flux/test_modular_pipeline_flux.py               | 12 ++++++++++++
 .../qwen/test_modular_pipeline_qwenimage.py          |  3 +++
 .../test_modular_pipelines_common.py                 |  6 ++----
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py
index a29fd436149d..805790ca4ede 100644
--- a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py
+++ b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py
@@ -55,6 +55,10 @@ def get_dummy_inputs(self, seed=0):
         }
         return inputs
 
+    # @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
+    def test_float16_inference(self):
+        super().test_float16_inference(9e-2)
+
 
 class TestFluxImg2ImgModularPipelineFast(ModularPipelineTesterMixin):
     pipeline_class = FluxModularPipeline
@@ -118,6 +122,10 @@ def test_save_from_pretrained(self):
 
         assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3
 
+    # @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
+    def test_float16_inference(self):
+        super().test_float16_inference(8e-2)
+
 
 class TestFluxKontextModularPipelineFast(ModularPipelineTesterMixin):
     pipeline_class = FluxKontextModularPipeline
@@ -170,3 +178,7 @@ def test_save_from_pretrained(self):
             image_slices.append(image[0, -3:, -3:, -1].flatten())
 
         assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3
+
+    # @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
+    def test_float16_inference(self):
+        super().test_float16_inference(9e-2)
diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
index 1228d0347af4..772fa19927fb 100644
--- a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
+++ b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -51,6 +51,9 @@ def get_dummy_inputs(self):
         }
         return inputs
 
+    def test_inference_batch_single_identical(self):
+        super().test_inference_batch_single_identical(expected_max_diff=5e-4)
+
 
 class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin):
     pipeline_class = QwenImageEditModularPipeline
diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py
index 5c556ca9850a..8d1adc4ad187 100644
--- a/tests/modular_pipelines/test_modular_pipelines_common.py
+++ b/tests/modular_pipelines/test_modular_pipelines_common.py
@@ -204,7 +204,6 @@ def test_float16_inference(self, expected_max_diff=5e-2):
 
         pipe_fp16 = self.get_pipeline()
         pipe_fp16.to(torch_device, torch.float16)
-        pipe_fp16.set_progress_bar_config(disable=None)
 
         inputs = self.get_dummy_inputs()
         # Reset generator in case it is used inside dummy inputs
@@ -218,9 +217,8 @@ def test_float16_inference(self, expected_max_diff=5e-2):
             fp16_inputs["generator"] = self.get_generator(0)
         output_fp16 = pipe_fp16(**fp16_inputs, output="images")
 
-        if isinstance(output, torch.Tensor):
-            output = output.cpu()
-            output_fp16 = output_fp16.cpu()
+        output = output.cpu()
+        output_fp16 = output_fp16.cpu()
 
         max_diff = numpy_cosine_similarity_distance(output.flatten(), output_fp16.flatten())
         assert max_diff < expected_max_diff, "FP16 inference is different from FP32 inference"