Skip to content

Commit 5ffe280

Browse files
author
Amit Raj
committed
Added support of output dataclass
Signed-off-by: Amit Raj <amitraj@qti.qualcommm.com>
1 parent f6abfeb commit 5ffe280

File tree

8 files changed

+531
-461
lines changed

8 files changed

+531
-461
lines changed

QEfficient/diffusers/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
* Optimized for Qualcomm Cloud AI 100*
1313

14-
<img src="../../docs/image/git_laughing_flux.png" alt="Sample Output" width="400">
14+
<img src="../../docs/image/girl_laughing.png" alt="Sample Output" width="400">
1515

1616
**Generated with**: `black-forest-labs/FLUX.1-schnell``"A girl laughing"` • 4 steps • 0.0 guidance scale • ⚡
1717

QEfficient/diffusers/pipelines/config_manager.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

QEfficient/diffusers/pipelines/flux/pipeline_flux.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@
1414
import torch
1515
from diffusers import FluxPipeline
1616
from diffusers.image_processor import VaeImageProcessor
17-
from diffusers.pipelines.flux.pipeline_output import FluxPipelineOutput
1817
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import retrieve_timesteps # TODO
1918

20-
from QEfficient.diffusers.pipelines.config_manager import config_manager, set_module_device_ids
21-
from QEfficient.diffusers.pipelines.pipeline_utils import (
19+
from QEfficient.diffusers.pipelines.pipeline_module import (
2220
QEffFluxTransformerModel,
2321
QEffTextEncoder,
2422
QEffVAE,
2523
)
24+
from QEfficient.diffusers.pipelines.pipeline_utils import QEffPipelineOutput, config_manager, set_module_device_ids
2625
from QEfficient.generation.cloud_infer import QAICInferenceSession
2726

2827

@@ -259,10 +258,10 @@ def _get_t5_prompt_embeds(
259258
aic_text_input = {"input_ids": text_input_ids.numpy().astype(np.int64)}
260259
import time
261260

262-
start_time = time.time()
261+
start_t5_time = time.time()
263262
prompt_embeds = torch.tensor(self.text_encoder_2.qpc_session.run(aic_text_input)["last_hidden_state"])
264-
end_time = time.time()
265-
print(f"T5 Text encoder inference time: {end_time - start_time:.4f} seconds")
263+
end_t5_time = time.time()
264+
self.text_encoder_2.inference_time = end_t5_time - start_t5_time
266265

267266
_, seq_len, _ = prompt_embeds.shape
268267
# duplicate text embeddings and attention mask for each generation per prompt, using mps friendly method
@@ -325,10 +324,11 @@ def _get_clip_prompt_embeds(
325324

326325
import time
327326

328-
start_time = time.time()
327+
global start_text_encoder_time
328+
start_text_encoder_time = time.time()
329329
aic_embeddings = self.text_encoder.qpc_session.run(aic_text_input)
330-
end_time = time.time()
331-
print(f"CLIP Text encoder inference time: {end_time - start_time:.4f} seconds")
330+
end_text_encoder_time = time.time()
331+
self.text_encoder.inference_time = end_text_encoder_time - start_text_encoder_time
332332
prompt_embeds = torch.tensor(aic_embeddings["pooler_output"])
333333

334334
# duplicate text embeddings for each generation per prompt, using mps friendly method
@@ -595,7 +595,7 @@ def __call__(
595595
}
596596

597597
self.transformer.qpc_session.set_buffers(output_buffer)
598-
598+
self.transformer.inference_time = []
599599
self.scheduler.set_begin_index(0)
600600
with self.progress_bar(total=num_inference_steps) as progress_bar:
601601
for i, t in enumerate(timesteps):
@@ -650,10 +650,10 @@ def __call__(
650650
"adaln_out": adaln_out.detach().numpy(),
651651
}
652652

653-
start_time = time.time()
653+
start_transformer_step_time = time.time()
654654
outputs = self.transformer.qpc_session.run(inputs_aic)
655-
end_time = time.time()
656-
print(f"Transformers inference time : {end_time - start_time:.2f} seconds")
655+
end_transfromer_step_time = time.time()
656+
self.transformer.inference_time.append(end_transfromer_step_time - start_transformer_step_time)
657657

658658
noise_pred = torch.from_numpy(outputs["output"])
659659

@@ -701,14 +701,17 @@ def __call__(
701701
self.vae_decode.qpc_session.set_buffers(output_buffer)
702702

703703
inputs = {"latent_sample": latents.numpy()}
704-
start_time = time.time()
704+
start_decode_time = time.time()
705705
image = self.vae_decode.qpc_session.run(inputs)
706-
end_time = time.time()
707-
print(f"Decoder Text encoder inference time: {end_time - start_time:.4f} seconds")
706+
end_decode_time = time.time()
707+
self.vae_decode.inference_time = end_decode_time - start_decode_time
708708
image_tensor = torch.from_numpy(image["sample"])
709709
image = self.image_processor.postprocess(image_tensor, output_type=output_type)
710710

711-
if not return_dict:
712-
return (image,)
711+
total_time_taken = end_decode_time - start_text_encoder_time
713712

714-
return FluxPipelineOutput(images=image)
713+
return QEffPipelineOutput(
714+
pipeline=self,
715+
images=image,
716+
E2E_time=total_time_taken,
717+
)

0 commit comments

Comments
 (0)