Skip to content
This repository was archived by the owner on Nov 27, 2024. It is now read-only.

Commit 78d1593

Browse files
committed
SDXL converter
1 parent 7868c0b commit 78d1593

File tree

12 files changed

+1260
-0
lines changed

12 files changed

+1260
-0
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*/*
2+
/result_*.png
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# OnnxStack.Converter
2+
3+
## Requirements
4+
```bash
5+
pip install onnxruntime-directml
6+
pip install olive-ai[directml]
7+
python -m pip install -r requirements.txt
8+
```
9+
10+
## Usage
11+
```bash
12+
python convert.py --model_input "D:\Models\stable-diffusion-xl-base-1.0" --controlnet
13+
```
14+
15+
`--model_input` - Safetensor model to convert
16+
17+
`--model_output` - Output for converted ONNX model
18+
19+
`--controlnet` - Create a ControlNet enabled Unet model
20+
21+
`--clean` - Clear convert/optimize model cache
22+
23+
`--tempDir` - Directory for temp Olive files
24+
25+
26+
## Extra Requirements
27+
To successfully optimize SDXL models you will need the patched `vae` from repository below otherwise you may get black image results
28+
29+
https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
30+
31+
Replace `diffusion_pytorch_model.safetensors` in the SDXL `vae` folder with the one in the `sdxl-vae-fp16-fix` repo
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License.
4+
# --------------------------------------------------------------------------
5+
6+
vae_sample_size = 1024
7+
unet_sample_size = 128
8+
cross_attention_dim = 2048
9+
time_ids_size = 6
10+
text_embeds_size = 1280
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
{
2+
"input_model": {
3+
"type": "PyTorchModel",
4+
"config": {
5+
"model_path": "stabilityai/stable-diffusion-xl-base-1.0",
6+
"model_loader": "controlnet_unet_load",
7+
"model_script": "models.py",
8+
"io_config": {
9+
"input_names": [ "sample", "timestep", "encoder_hidden_states", "text_embeds", "time_ids", "down_block_0_additional_residual", "down_block_1_additional_residual", "down_block_2_additional_residual", "down_block_3_additional_residual", "down_block_4_additional_residual", "down_block_5_additional_residual", "down_block_6_additional_residual", "down_block_7_additional_residual", "down_block_8_additional_residual", "mid_block_additional_residual" ],
10+
"output_names": [ "out_sample" ],
11+
"dynamic_axes": {
12+
"sample": {"0": "unet_sample_batch", "1": "unet_sample_channels", "2": "unet_sample_height", "3": "unet_sample_width"},
13+
"timestep": {"0": "unet_time_batch"},
14+
"encoder_hidden_states": {"0": "unet_hidden_batch", "1": "unet_hidden_sequence"},
15+
"text_embeds": {"0": "unet_text_embeds_batch", "1": "unet_text_embeds_size"},
16+
"time_ids": {"0": "unet_time_ids_batch", "1": "unet_time_ids_size"},
17+
"down_block_0_additional_residual": {"0": "cnet_db0_batch", "1": "cnet_db0_channels", "2": "cnet_db0_height", "3": "cnet_db0_width"},
18+
"down_block_1_additional_residual": {"0": "cnet_db1_batch", "1": "cnet_db1_channels", "2": "cnet_db1_height", "3": "cnet_db1_width"},
19+
"down_block_2_additional_residual": {"0": "cnet_db2_batch", "1": "cnet_db2_channels", "2": "cnet_db2_height", "3": "cnet_db2_width"},
20+
"down_block_3_additional_residual": {"0": "cnet_db3_batch", "1": "cnet_db3_channels", "2": "cnet_db3_height2", "3": "cnet_db3_width2"},
21+
"down_block_4_additional_residual": {"0": "cnet_db4_batch", "1": "cnet_db4_channels", "2": "cnet_db4_height2", "3": "cnet_db4_width2"},
22+
"down_block_5_additional_residual": {"0": "cnet_db5_batch", "1": "cnet_db5_channels", "2": "cnet_db5_height2", "3": "cnet_db5_width2"},
23+
"down_block_6_additional_residual": {"0": "cnet_db6_batch", "1": "cnet_db6_channels", "2": "cnet_db6_height4", "3": "cnet_db6_width4"},
24+
"down_block_7_additional_residual": {"0": "cnet_db7_batch", "1": "cnet_db7_channels", "2": "cnet_db7_height4", "3": "cnet_db7_width4"},
25+
"down_block_8_additional_residual": {"0": "cnet_db8_batch", "1": "cnet_db8_channels", "2": "cnet_db8_height4", "3": "cnet_db8_width4"},
26+
"mid_block_additional_residual": {"0": "cnet_mbar_batch", "1": "cnet_mbar_channels", "2": "cnet_mbar_height8", "3": "cnet_mbar_width8"}
27+
}
28+
},
29+
"dummy_inputs_func": "controlnet_unet_conversion_inputs"
30+
}
31+
},
32+
"systems": {
33+
"local_system": {
34+
"type": "LocalSystem",
35+
"config": {
36+
"accelerators": [
37+
{
38+
"device": "gpu",
39+
"execution_providers": [
40+
"DmlExecutionProvider"
41+
]
42+
}
43+
]
44+
}
45+
}
46+
},
47+
"evaluators": {
48+
"common_evaluator": {
49+
"metrics": [
50+
{
51+
"name": "latency",
52+
"type": "latency",
53+
"sub_types": [{"name": "avg"}],
54+
"user_config": {
55+
"user_script": "models.py",
56+
"dataloader_func": "controlnet_unet_data_loader",
57+
"batch_size": 2
58+
}
59+
}
60+
]
61+
}
62+
},
63+
"passes": {
64+
"convert": {
65+
"type": "OnnxConversion",
66+
"config": {
67+
"target_opset": 16,
68+
"save_as_external_data": true,
69+
"all_tensors_to_one_file": true
70+
}
71+
},
72+
"optimize": {
73+
"type": "OrtTransformersOptimization",
74+
"config": {
75+
"model_type": "unet",
76+
"opt_level": 0,
77+
"float16": true,
78+
"use_gpu": true,
79+
"keep_io_types": false,
80+
"optimization_options": {
81+
"enable_gelu": true,
82+
"enable_layer_norm": true,
83+
"enable_attention": true,
84+
"use_multi_head_attention": true,
85+
"enable_skip_layer_norm": false,
86+
"enable_embed_layer_norm": true,
87+
"enable_bias_skip_layer_norm": false,
88+
"enable_bias_gelu": true,
89+
"enable_gelu_approximation": false,
90+
"enable_qordered_matmul": false,
91+
"enable_shape_inference": true,
92+
"enable_gemm_fast_gelu": false,
93+
"enable_nhwc_conv": false,
94+
"enable_group_norm": true,
95+
"enable_bias_splitgelu": false,
96+
"enable_packed_qkv": true,
97+
"enable_packed_kv": true,
98+
"enable_bias_add": false,
99+
"group_norm_channels_last": false
100+
},
101+
"force_fp32_ops": ["RandomNormalLike"],
102+
"force_fp16_inputs": {
103+
"GroupNorm": [0, 1, 2]
104+
}
105+
}
106+
}
107+
},
108+
"pass_flows": [
109+
["convert", "optimize"]
110+
],
111+
"engine": {
112+
"log_severity_level": 0,
113+
"evaluator": "common_evaluator",
114+
"evaluate_input_model": false,
115+
"host": "local_system",
116+
"target": "local_system",
117+
"cache_dir": "cache",
118+
"output_name": "controlnet",
119+
"output_dir": "footprints"
120+
}
121+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
{
2+
"input_model": {
3+
"type": "PyTorchModel",
4+
"config": {
5+
"model_path": "stabilityai/stable-diffusion-xl-base-1.0",
6+
"model_loader": "text_encoder_load",
7+
"model_script": "models.py",
8+
"io_config": {
9+
"input_names": [ "input_ids" ],
10+
"output_names": [ "last_hidden_state", "pooler_output" ],
11+
"dynamic_axes": { "input_ids": { "0": "batch", "1": "sequence" } }
12+
},
13+
"dummy_inputs_func": "text_encoder_conversion_inputs"
14+
}
15+
},
16+
"systems": {
17+
"local_system": {
18+
"type": "LocalSystem",
19+
"config": {
20+
"accelerators": [
21+
{
22+
"device": "gpu",
23+
"execution_providers": [
24+
"DmlExecutionProvider"
25+
]
26+
}
27+
]
28+
}
29+
}
30+
},
31+
"evaluators": {
32+
"common_evaluator": {
33+
"metrics": [
34+
{
35+
"name": "latency",
36+
"type": "latency",
37+
"sub_types": [{"name": "avg"}],
38+
"user_config": {
39+
"user_script": "models.py",
40+
"dataloader_func": "text_encoder_data_loader",
41+
"batch_size": 1
42+
}
43+
}
44+
]
45+
}
46+
},
47+
"passes": {
48+
"convert": {
49+
"type": "OnnxConversion",
50+
"config": {
51+
"target_opset": 16
52+
}
53+
},
54+
"optimize": {
55+
"type": "OrtTransformersOptimization",
56+
"config": {
57+
"model_type": "clip",
58+
"opt_level": 0,
59+
"float16": true,
60+
"use_gpu": true,
61+
"keep_io_types": false,
62+
"optimization_options": {
63+
"enable_gelu": true,
64+
"enable_layer_norm": true,
65+
"enable_attention": true,
66+
"use_multi_head_attention": true,
67+
"enable_skip_layer_norm": false,
68+
"enable_embed_layer_norm": true,
69+
"enable_bias_skip_layer_norm": false,
70+
"enable_bias_gelu": true,
71+
"enable_gelu_approximation": false,
72+
"enable_qordered_matmul": false,
73+
"enable_shape_inference": true,
74+
"enable_gemm_fast_gelu": false,
75+
"enable_nhwc_conv": false,
76+
"enable_group_norm": true,
77+
"enable_bias_splitgelu": false,
78+
"enable_packed_qkv": true,
79+
"enable_packed_kv": true,
80+
"enable_bias_add": false,
81+
"group_norm_channels_last": false
82+
},
83+
"force_fp32_ops": ["RandomNormalLike"],
84+
"force_fp16_inputs": {
85+
"GroupNorm": [0, 1, 2]
86+
}
87+
}
88+
}
89+
},
90+
"pass_flows": [
91+
["convert", "optimize"]
92+
],
93+
"engine": {
94+
"log_severity_level": 0,
95+
"evaluator": "common_evaluator",
96+
"evaluate_input_model": false,
97+
"host": "local_system",
98+
"target": "local_system",
99+
"cache_dir": "cache",
100+
"output_name": "text_encoder",
101+
"output_dir": "footprints"
102+
}
103+
}

0 commit comments

Comments
 (0)