Skip to content
This repository was archived by the owner on Nov 27, 2024. It is now read-only.

Commit 8c76a77

Browse files
committed
StableCascade image_encoder
1 parent 48c18a4 commit 8c76a77

File tree

3 files changed

+143
-3
lines changed

3 files changed

+143
-3
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
{
2+
"input_model": {
3+
"type": "PyTorchModel",
4+
"config": {
5+
"model_path": "stabilityai/stable-cascade",
6+
"model_loader": "image_encoder_load",
7+
"model_script": "models.py",
8+
"io_config": {
9+
"input_names": [ "sample"],
10+
"output_names": [ "latent_sample" ],
11+
"dynamic_axes": { "sample": { "0": "batch", "1": "channels", "2": "height", "3": "width" } }
12+
},
13+
"dummy_inputs_func": "image_encoder_conversion_inputs"
14+
}
15+
},
16+
"systems": {
17+
"local_system": {
18+
"type": "LocalSystem",
19+
"config": {
20+
"accelerators": [
21+
{
22+
"device": "gpu",
23+
"execution_providers": [
24+
"DmlExecutionProvider"
25+
]
26+
}
27+
]
28+
}
29+
}
30+
},
31+
"evaluators": {
32+
"common_evaluator": {
33+
"metrics": [
34+
{
35+
"name": "latency",
36+
"type": "latency",
37+
"sub_types": [{"name": "avg"}],
38+
"user_config": {
39+
"user_script": "models.py",
40+
"dataloader_func": "image_encoder_data_loader",
41+
"batch_size": 1
42+
}
43+
}
44+
]
45+
}
46+
},
47+
"passes": {
48+
"convert": {
49+
"type": "OnnxConversion",
50+
"config": {
51+
"target_opset": 16
52+
}
53+
},
54+
"optimize": {
55+
"type": "OrtTransformersOptimization",
56+
"config": {
57+
"model_type": "clip",
58+
"opt_level": 0,
59+
"float16": true,
60+
"use_gpu": true,
61+
"keep_io_types": true,
62+
"optimization_options": {
63+
"enable_gelu": true,
64+
"enable_layer_norm": true,
65+
"enable_attention": true,
66+
"use_multi_head_attention": true,
67+
"enable_skip_layer_norm": false,
68+
"enable_embed_layer_norm": true,
69+
"enable_bias_skip_layer_norm": false,
70+
"enable_bias_gelu": true,
71+
"enable_gelu_approximation": false,
72+
"enable_qordered_matmul": false,
73+
"enable_shape_inference": true,
74+
"enable_gemm_fast_gelu": false,
75+
"enable_nhwc_conv": false,
76+
"enable_group_norm": true,
77+
"enable_bias_splitgelu": false,
78+
"enable_packed_qkv": true,
79+
"enable_packed_kv": true,
80+
"enable_bias_add": false,
81+
"group_norm_channels_last": false
82+
},
83+
"force_fp32_ops": ["RandomNormalLike"],
84+
"force_fp16_inputs": {
85+
"GroupNorm": [0, 1, 2]
86+
}
87+
}
88+
},
89+
"optimize_cuda": {
90+
"type": "OrtTransformersOptimization",
91+
"config": {
92+
"model_type": "clip",
93+
"opt_level": 0,
94+
"float16": true,
95+
"use_gpu": true,
96+
"keep_io_types": false
97+
}
98+
}
99+
},
100+
"pass_flows": [
101+
["convert", "optimize"]
102+
],
103+
"engine": {
104+
"log_severity_level": 0,
105+
"evaluator": "common_evaluator",
106+
"evaluate_input_model": false,
107+
"host": "local_system",
108+
"target": "local_system",
109+
"cache_dir": "cache",
110+
"output_name": "image_encoder",
111+
"output_dir": "footprints"
112+
}
113+
}

OnnxStack.Converter/stable_cascade/convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def optimize(
121121

122122
model_info = {}
123123

124-
submodel_names = [ "text_encoder", "decoder", "prior"]
124+
submodel_names = [ "text_encoder", "decoder", "prior", "image_encoder"]
125125

126126
has_safety_checker = getattr(pipeline, "safety_checker", None) is not None
127127

OnnxStack.Converter/stable_cascade/models.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import torch
77
from typing import Union, Optional, Tuple
88
from diffusers import AutoencoderKL, StableCascadeUNet
9-
from transformers.models.clip.modeling_clip import CLIPTextModelWithProjection
9+
from transformers.models.clip.modeling_clip import CLIPTextModelWithProjection, CLIPVisionModelWithProjection
1010
from dataclasses import dataclass
1111

1212
# Helper latency-only dataloader that creates random tensors with no label
@@ -111,4 +111,31 @@ def prior_conversion_inputs(model=None):
111111

112112

113113
def prior_data_loader(data_dir, batchsize, *args, **kwargs):
114-
return RandomDataLoader(prior_inputs, batchsize, torch.float16)
114+
return RandomDataLoader(prior_inputs, batchsize, torch.float16)
115+
116+
117+
118+
119+
120+
# -----------------------------------------------------------------------------
121+
# image_encoder
122+
# -----------------------------------------------------------------------------
123+
124+
def image_encoder_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
125+
inputs = {
126+
"sample": torch.rand((batchsize, 3, 224, 224), dtype=torch_dtype)
127+
}
128+
return inputs
129+
130+
131+
def image_encoder_load(model_name):
132+
model = CLIPVisionModelWithProjection.from_pretrained(model_name, subfolder="image_encoder")
133+
return model
134+
135+
136+
def image_encoder_conversion_inputs(model=None):
137+
return tuple(image_encoder_inputs(1, torch.float32, True).values())
138+
139+
140+
def image_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
141+
return RandomDataLoader(image_encoder_inputs, batchsize, torch.float16)

0 commit comments

Comments
 (0)