TensorStack-AI
diff --git a/‎OnnxStack.Converter/stable_diffusion_xl/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎OnnxStack.Converter/stable_diffusion_xl/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎OnnxStack.Converter/stable_diffusion_xl/README.md‎
Lines changed: 31 additions & 0 deletions b/‎OnnxStack.Converter/stable_diffusion_xl/README.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎OnnxStack.Converter/stable_diffusion_xl/config.py‎
Lines changed: 10 additions & 0 deletions b/‎OnnxStack.Converter/stable_diffusion_xl/config.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎OnnxStack.Converter/stable_diffusion_xl/config_controlnet.json‎
Lines changed: 121 additions & 0 deletions b/‎OnnxStack.Converter/stable_diffusion_xl/config_controlnet.json‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎OnnxStack.Converter/stable_diffusion_xl/config_text_encoder.json‎
Lines changed: 103 additions & 0 deletions b/‎OnnxStack.Converter/stable_diffusion_xl/config_text_encoder.json‎
Lines changed: 103 additions & 0 deletions
@@ -0,0 +1,2 @@
+*/*
+/result_*.png
@@ -0,0 +1,31 @@
+# OnnxStack.Converter
+
+## Requirements
+```bash
+pip install onnxruntime-directml
+pip install olive-ai[directml]
+python -m pip install -r requirements.txt
+```
+
+## Usage
+```bash
+python convert.py --model_input "D:\Models\stable-diffusion-xl-base-1.0" --controlnet
+```
+
+`--model_input`  - Safetensor model to convert
+
+`--model_output`  - Output for converted ONNX model
+
+`--controlnet`  - Create a ControlNet enabled Unet model
+
+`--clean`  - Clear convert/optimize model cache
+
+`--tempDir`  - Directory for temp Olive files
+
+
+## Extra Requirements
+To successfully optimize SDXL models you will need the patched `vae` from repository below otherwise you may get black image results
+
+https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
+
+Replace `diffusion_pytorch_model.safetensors` in the SDXL `vae` folder with the one in the `sdxl-vae-fp16-fix` repo
@@ -0,0 +1,10 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+vae_sample_size = 1024
+unet_sample_size = 128
+cross_attention_dim = 2048
+time_ids_size = 6
+text_embeds_size = 1280
@@ -0,0 +1,121 @@
+{
+  "input_model": {
+      "type": "PyTorchModel",
+      "config": {
+          "model_path": "stabilityai/stable-diffusion-xl-base-1.0",
+          "model_loader": "controlnet_unet_load",
+          "model_script": "models.py",
+          "io_config": {
+              "input_names": [ "sample", "timestep", "encoder_hidden_states", "text_embeds", "time_ids", "down_block_0_additional_residual", "down_block_1_additional_residual", "down_block_2_additional_residual", "down_block_3_additional_residual", "down_block_4_additional_residual", "down_block_5_additional_residual", "down_block_6_additional_residual", "down_block_7_additional_residual", "down_block_8_additional_residual", "mid_block_additional_residual" ],
+              "output_names": [ "out_sample" ],
+              "dynamic_axes": {
+                  "sample": {"0": "unet_sample_batch", "1": "unet_sample_channels", "2": "unet_sample_height", "3": "unet_sample_width"},
+                  "timestep": {"0": "unet_time_batch"},
+                  "encoder_hidden_states": {"0": "unet_hidden_batch", "1": "unet_hidden_sequence"},
+				  "text_embeds": {"0": "unet_text_embeds_batch", "1": "unet_text_embeds_size"},
+                  "time_ids": {"0": "unet_time_ids_batch", "1": "unet_time_ids_size"},
+                  "down_block_0_additional_residual": {"0": "cnet_db0_batch", "1": "cnet_db0_channels", "2": "cnet_db0_height", "3": "cnet_db0_width"},
+                  "down_block_1_additional_residual": {"0": "cnet_db1_batch", "1": "cnet_db1_channels", "2": "cnet_db1_height", "3": "cnet_db1_width"},
+                  "down_block_2_additional_residual": {"0": "cnet_db2_batch", "1": "cnet_db2_channels", "2": "cnet_db2_height", "3": "cnet_db2_width"},
+                  "down_block_3_additional_residual": {"0": "cnet_db3_batch", "1": "cnet_db3_channels", "2": "cnet_db3_height2", "3": "cnet_db3_width2"},
+                  "down_block_4_additional_residual": {"0": "cnet_db4_batch", "1": "cnet_db4_channels", "2": "cnet_db4_height2", "3": "cnet_db4_width2"},
+                  "down_block_5_additional_residual": {"0": "cnet_db5_batch", "1": "cnet_db5_channels", "2": "cnet_db5_height2", "3": "cnet_db5_width2"},
+                  "down_block_6_additional_residual": {"0": "cnet_db6_batch", "1": "cnet_db6_channels", "2": "cnet_db6_height4", "3": "cnet_db6_width4"},
+                  "down_block_7_additional_residual": {"0": "cnet_db7_batch", "1": "cnet_db7_channels", "2": "cnet_db7_height4", "3": "cnet_db7_width4"},
+                  "down_block_8_additional_residual": {"0": "cnet_db8_batch", "1": "cnet_db8_channels", "2": "cnet_db8_height4", "3": "cnet_db8_width4"},
+                  "mid_block_additional_residual": {"0": "cnet_mbar_batch", "1": "cnet_mbar_channels", "2": "cnet_mbar_height8", "3": "cnet_mbar_width8"}
+              }
+          },
+          "dummy_inputs_func": "controlnet_unet_conversion_inputs"
+      }
+  },
+  "systems": {
+      "local_system": {
+          "type": "LocalSystem",
+          "config": {
+                "accelerators": [
+                    {
+                        "device": "gpu",
+                        "execution_providers": [
+                            "DmlExecutionProvider"
+                        ]
+                    }
+                ]
+          }
+      }
+  },
+  "evaluators": {
+      "common_evaluator": {
+          "metrics": [
+              {
+                  "name": "latency",
+                  "type": "latency",
+                  "sub_types": [{"name": "avg"}],
+                  "user_config": {
+                      "user_script": "models.py",
+                      "dataloader_func": "controlnet_unet_data_loader",
+                      "batch_size": 2
+                  }
+              }
+          ]
+      }
+  },
+  "passes": {
+      "convert": {
+          "type": "OnnxConversion",
+          "config": {
+              "target_opset": 16,
+              "save_as_external_data": true,
+              "all_tensors_to_one_file": true
+          }
+      },
+      "optimize": {
+            "type": "OrtTransformersOptimization",
+            "config": {
+                "model_type": "unet",
+                "opt_level": 0,
+                "float16": true,
+                "use_gpu": true,
+                "keep_io_types": false,
+                "optimization_options": {
+                    "enable_gelu": true,
+                    "enable_layer_norm": true,
+                    "enable_attention": true,
+                    "use_multi_head_attention": true,
+                    "enable_skip_layer_norm": false,
+                    "enable_embed_layer_norm": true,
+                    "enable_bias_skip_layer_norm": false,
+                    "enable_bias_gelu": true,
+                    "enable_gelu_approximation": false,
+                    "enable_qordered_matmul": false,
+                    "enable_shape_inference": true,
+                    "enable_gemm_fast_gelu": false,
+                    "enable_nhwc_conv": false,
+                    "enable_group_norm": true,
+                    "enable_bias_splitgelu": false,
+                    "enable_packed_qkv": true,
+                    "enable_packed_kv": true,
+                    "enable_bias_add": false,
+                    "group_norm_channels_last": false
+                },
+                "force_fp32_ops": ["RandomNormalLike"],
+                "force_fp16_inputs": {
+                    "GroupNorm": [0, 1, 2]
+                }
+            }
+      }
+    },
+    "pass_flows": [
+        ["convert", "optimize"]
+    ],
+    "engine": {
+        "log_severity_level": 0,
+        "evaluator": "common_evaluator",
+        "evaluate_input_model": false,
+        "host": "local_system",
+        "target": "local_system",
+        "cache_dir": "cache",
+        "output_name": "controlnet",
+        "output_dir": "footprints"
+    }
+}
@@ -0,0 +1,103 @@
+{
+    "input_model": {
+        "type": "PyTorchModel",
+        "config": {
+            "model_path": "stabilityai/stable-diffusion-xl-base-1.0",
+            "model_loader": "text_encoder_load",
+            "model_script": "models.py",
+            "io_config": {
+                "input_names": [ "input_ids" ],
+                "output_names": [ "last_hidden_state", "pooler_output" ],
+                "dynamic_axes": { "input_ids": { "0": "batch", "1": "sequence" } }
+            },
+            "dummy_inputs_func": "text_encoder_conversion_inputs"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "config": {
+                "accelerators": [
+                    {
+                        "device": "gpu",
+                        "execution_providers": [
+                            "DmlExecutionProvider"
+                        ]
+                    }
+                ]
+            }
+        }
+    },
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "sub_types": [{"name": "avg"}],
+                    "user_config": {
+                        "user_script": "models.py",
+                        "dataloader_func": "text_encoder_data_loader",
+                        "batch_size": 1
+                    }
+                }
+            ]
+        }
+    },
+    "passes": {
+        "convert": {
+            "type": "OnnxConversion",
+            "config": {
+                "target_opset": 16
+            }
+        },
+        "optimize": {
+            "type": "OrtTransformersOptimization",
+            "config": {
+                "model_type": "clip",
+                "opt_level": 0,
+                "float16": true,
+                "use_gpu": true,
+                "keep_io_types": false,
+                "optimization_options": {
+                    "enable_gelu": true,
+                    "enable_layer_norm": true,
+                    "enable_attention": true,
+                    "use_multi_head_attention": true,
+                    "enable_skip_layer_norm": false,
+                    "enable_embed_layer_norm": true,
+                    "enable_bias_skip_layer_norm": false,
+                    "enable_bias_gelu": true,
+                    "enable_gelu_approximation": false,
+                    "enable_qordered_matmul": false,
+                    "enable_shape_inference": true,
+                    "enable_gemm_fast_gelu": false,
+                    "enable_nhwc_conv": false,
+                    "enable_group_norm": true,
+                    "enable_bias_splitgelu": false,
+                    "enable_packed_qkv": true,
+                    "enable_packed_kv": true,
+                    "enable_bias_add": false,
+                    "group_norm_channels_last": false
+                },
+                "force_fp32_ops": ["RandomNormalLike"],
+                "force_fp16_inputs": {
+                    "GroupNorm": [0, 1, 2]
+                }
+            }
+        }
+    },
+    "pass_flows": [
+        ["convert", "optimize"]
+    ],
+    "engine": {
+        "log_severity_level": 0,
+        "evaluator": "common_evaluator",
+        "evaluate_input_model": false,
+        "host": "local_system",
+        "target": "local_system",
+        "cache_dir": "cache",
+        "output_name": "text_encoder",
+        "output_dir": "footprints"
+    }
+}