copy models, remove obsolete commands

saddam213 · saddam213 · commit 06e19de96593 · 2024-04-19T15:03:13.000+12:00
diff --git a/OnnxStack.Converter/README.md b/OnnxStack.Converter/README.md
@@ -15,6 +15,6 @@ convert.py --optimize --model_input '..\stable-diffusion-v1-5' --model_output '.
 
 `--model_input`  - Safetensor model to convert
 
-`--model_output`  - Output for converted ONNX model
+`--model_output`  - Output for converted ONNX model (NOTE: This folder is deleted before each run)
 
 `--controlnet`  - Create a ControlNet enabled Unet model
diff --git a/OnnxStack.Converter/stable_cascade/README.md b/OnnxStack.Converter/stable_cascade/README.md
@@ -0,0 +1,20 @@
+﻿# OnnxStack.Converter
+
+## Requirements
+```bash
+pip install onnxruntime-directml
+pip install olive-ai[directml]
+python -m pip install -r requirements.txt
+```
+
+## Usage
+```bash
+convert.py --optimize --model_input '..\stable-cascade' --model_output '..\converted' 
+```
+`--optimize`  - Run the model optimization
+
+`--model_input`  - Safetensor model to convert
+
+`--model_output`  - Output for converted ONNX model (NOTE: This folder is deleted before each run)
+
+`--image_encoder`  - Convert the optional image encoder
diff --git a/OnnxStack.Converter/stable_cascade/config_decoder.json b/OnnxStack.Converter/stable_cascade/config_decoder.json
@@ -55,8 +55,7 @@
             "config": {
                 "target_opset": 16,
                 "save_as_external_data": true,
-                "all_tensors_to_one_file": true,
-                "external_data_name": "weights.pb"
+                "all_tensors_to_one_file": true
             }
         },
         "optimize": {
diff --git a/OnnxStack.Converter/stable_cascade/config_image_encoder.json b/OnnxStack.Converter/stable_cascade/config_image_encoder.json
@@ -7,7 +7,7 @@
             "model_script": "models.py",
             "io_config": {
                 "input_names": [ "sample"],
-                "output_names": [ "latent_sample" ],
+                "output_names": [ "image_embeds", "last_hidden_state"],
                 "dynamic_axes": { "sample": { "0": "batch", "1": "channels", "2": "height", "3": "width" } }
             },
             "dummy_inputs_func": "image_encoder_conversion_inputs"
diff --git a/OnnxStack.Converter/stable_cascade/config_prior.json b/OnnxStack.Converter/stable_cascade/config_prior.json
@@ -56,8 +56,7 @@
             "config": {
                 "target_opset": 16,
                 "save_as_external_data": true,
-                "all_tensors_to_one_file": true,
-                "external_data_name": "weights.pb"
+                "all_tensors_to_one_file": true
             }
         },
         "optimize": {
diff --git a/OnnxStack.Converter/stable_cascade/convert.py b/OnnxStack.Converter/stable_cascade/convert.py
@@ -93,7 +93,7 @@ def optimize(
     model_input: str,
     model_output: Path,
     provider: str,
-    controlnet: bool
+    image_encoder: bool
 ):
     from google.protobuf import __version__ as protobuf_version
 
@@ -109,7 +109,6 @@ def optimize(
     shutil.rmtree(script_dir / "footprints", ignore_errors=True)
     shutil.rmtree(model_output, ignore_errors=True)
 
-
     # Load the entire PyTorch pipeline to ensure all models and their configurations are downloaded and cached.
     # This avoids an issue where the non-ONNX components (tokenizer, scheduler, and feature extractor) are not
     # automatically cached correctly if individual models are fetched one at a time.
@@ -121,15 +120,10 @@ def optimize(
 
     model_info = {}
 
-    submodel_names = [ "text_encoder", "decoder", "prior", "image_encoder"]
-
-    has_safety_checker = getattr(pipeline, "safety_checker", None) is not None
-
-    if has_safety_checker:
-        submodel_names.append("safety_checker")
+    submodel_names = [ "text_encoder", "decoder", "prior", "vqgan"]
 
-    if controlnet:
-        submodel_names.append("controlnet")
+    if image_encoder:
+        submodel_names.append("image_encoder")
 
     for submodel_name in submodel_names:
         print(f"\nOptimizing {submodel_name}")
@@ -138,14 +132,7 @@ def optimize(
         with (script_dir / f"config_{submodel_name}.json").open() as fin:
             olive_config = json.load(fin)
         olive_config = update_config_with_provider(olive_config, provider)
-
-        if submodel_name in ("unet", "controlnet", "text_encoder"):
-            olive_config["input_model"]["config"]["model_path"] = model_dir
-        else:
-            # Only the unet & text encoder are affected by LoRA, so it's better to use the base model ID for
-            # other models: the Olive cache is based on the JSON config, and two LoRA variants with the same
-            # base model ID should be able to reuse previously optimized copies.
-            olive_config["input_model"]["config"]["model_path"] = model_dir
+        olive_config["input_model"]["config"]["model_path"] = model_dir
 
         run_res = olive_run(olive_config)
 
@@ -156,52 +143,22 @@ def optimize(
     from sd_utils.ort import save_onnx_pipeline
 
     save_onnx_pipeline(
-        has_safety_checker, model_info, model_output, pipeline, submodel_names
+        model_info, model_output, pipeline, submodel_names
     )
 
     return model_info
 
 
 def parse_common_args(raw_args):
     parser = argparse.ArgumentParser("Common arguments")
-
     parser.add_argument("--model_input", default="stable-diffusion-v1-5", type=str)
     parser.add_argument("--model_output", default="stable-diffusion-v1-5", type=Path)
-    parser.add_argument("--controlnet",action="store_true", help="Create ControlNet Unet Model")
-    parser.add_argument(
-        "--provider", default="dml", type=str, choices=["dml", "cuda"], help="Execution provider to use"
-    )
+    parser.add_argument("--image_encoder",action="store_true", help="Create image encoder model")
+    parser.add_argument("--provider", default="dml", type=str, choices=["dml", "cuda"], help="Execution provider to use")
     parser.add_argument("--optimize", action="store_true", help="Runs the optimization step")
     parser.add_argument("--clean_cache", action="store_true", help="Deletes the Olive cache")
     parser.add_argument("--test_unoptimized", action="store_true", help="Use unoptimized model for inference")
-    parser.add_argument("--batch_size", default=1, type=int, help="Number of images to generate per batch")
-    parser.add_argument(
-        "--prompt",
-        default=(
-            "castle surrounded by water and nature, village, volumetric lighting, photorealistic, "
-            "detailed and intricate, fantasy, epic cinematic shot, mountains, 8k ultra hd"
-        ),
-        type=str,
-    )
-    parser.add_argument(
-        "--guidance_scale",
-        default=7.5,
-        type=float,
-        help="Guidance scale as defined in Classifier-Free Diffusion Guidance",
-    )
-    parser.add_argument("--num_images", default=1, type=int, help="Number of images to generate")
-    parser.add_argument("--num_inference_steps", default=50, type=int, help="Number of steps in diffusion process")
     parser.add_argument("--tempdir", default=None, type=str, help="Root directory for tempfile directories and files")
-    parser.add_argument(
-        "--strength",
-        default=1.0,
-        type=float,
-        help="Value between 0.0 and 1.0, that controls the amount of noise that is added to the input image. "
-        "Values that approach 1.0 enable lots of variations but will also produce images "
-        "that are not semantically consistent with the input.",
-    )
-    parser.add_argument("--image_size", default=512, type=int, help="Width and height of the images to generate")
-
     return parser.parse_known_args(raw_args)
 
 
@@ -231,8 +188,6 @@ def main(raw_args=None):
     if common_args.clean_cache:
         shutil.rmtree(script_dir / "cache", ignore_errors=True)
 
-    guidance_scale = common_args.guidance_scale
-
     ort_args = None, None
     ort_args, extra_args = parse_ort_args(extra_args)
 
@@ -246,27 +201,10 @@ def main(raw_args=None):
             from sd_utils.ort import validate_args
 
             validate_args(ort_args, common_args.provider)
-            optimize(common_args.model_input, common_args.model_output, common_args.provider, common_args.controlnet)
+            optimize(common_args.model_input, common_args.model_output, common_args.provider, common_args.image_encoder)
 
     if not common_args.optimize:
-        model_dir = model_output / "F32" if common_args.test_unoptimized else model_output / "F16"
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-      
-            from sd_utils.ort import get_ort_pipeline
-
-            pipeline = get_ort_pipeline(model_dir, common_args, ort_args, guidance_scale)
-            run_inference_loop(
-                pipeline,
-                common_args.prompt,
-                common_args.num_images,
-                common_args.batch_size,
-                common_args.image_size,
-                common_args.num_inference_steps,
-                guidance_scale,
-                common_args.strength,
-                provider=provider,
-            )
+        print("TODO: Create OnnxStableCascadePipeline")
 
 
 if __name__ == "__main__":
diff --git a/OnnxStack.Converter/stable_cascade/models.py b/OnnxStack.Converter/stable_cascade/models.py
@@ -27,7 +27,6 @@ def __getitem__(self, idx):
 # TEXT ENCODER
 # -----------------------------------------------------------------------------
 
-
 def text_encoder_inputs(batchsize, torch_dtype):
     return torch.zeros((batchsize, 77), dtype=torch_dtype)
 
@@ -45,11 +44,12 @@ def text_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
     return RandomDataLoader(text_encoder_inputs, batchsize, torch.int32)
 
 
+
+
 # -----------------------------------------------------------------------------
-# decoder
+# DECODER UNET
 # -----------------------------------------------------------------------------
 
-
 def decoder_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
     # TODO(jstoecker): Rename onnx::Concat_4 to text_embeds and onnx::Shape_5 to time_ids
     inputs = {
@@ -81,8 +81,9 @@ def decoder_data_loader(data_dir, batchsize, *args, **kwargs):
 
 
 
+
 # -----------------------------------------------------------------------------
-# prior
+# PRIOR UNET
 # -----------------------------------------------------------------------------
 
 def prior_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
@@ -116,10 +117,9 @@ def prior_data_loader(data_dir, batchsize, *args, **kwargs):
 
 
 
-
     
 # -----------------------------------------------------------------------------
-# image_encoder
+# IMAGE ENCODER
 # -----------------------------------------------------------------------------
 
 def image_encoder_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
@@ -142,8 +142,10 @@ def image_encoder_data_loader(data_dir, batchsize, *args, **kwargs):
     return RandomDataLoader(image_encoder_inputs, batchsize, torch.float16)
 
 
+
+
 # -----------------------------------------------------------------------------
-# vqgan
+# VQGAN
 # -----------------------------------------------------------------------------
 
 def vqgan_inputs(batchsize, torch_dtype, is_conversion_inputs=False):
diff --git a/OnnxStack.Converter/stable_cascade/sd_utils/ort.py b/OnnxStack.Converter/stable_cascade/sd_utils/ort.py
@@ -10,7 +10,7 @@
 from typing import Dict
 
 import onnxruntime as ort
-from diffusers import OnnxRuntimeModel, OnnxStableDiffusionPipeline
+from diffusers import OnnxRuntimeModel, StableCascadePriorPipeline
 from onnxruntime import __version__ as OrtVersion
 from packaging import version
 
@@ -77,9 +77,11 @@ def save_optimized_onnx_submodel(submodel_name, provider, model_info):
         model_info[submodel_name] = {
             "unoptimized": {
                 "path": Path(unoptimized_olive_model.model_path),
+                "data": Path(unoptimized_olive_model.model_path + ".data"),
             },
             "optimized": {
                 "path": Path(optimized_olive_model.model_path),
+                "data": Path(optimized_olive_model.model_path + ".data"),
             },
         }
 
@@ -88,76 +90,28 @@ def save_optimized_onnx_submodel(submodel_name, provider, model_info):
 
 
 def save_onnx_pipeline(
-    has_safety_checker, model_info, model_output, pipeline, submodel_names
+    model_info, model_output, pipeline, submodel_names
 ):
     # Save the unoptimized models in a directory structure that the diffusers library can load and run.
     # This is optional, and the optimized models can be used directly in a custom pipeline if desired.
-    print("\nCreating ONNX pipeline...")
-
-    optimized_model_dir = model_output / "Optimized"
-    unoptimized_model_dir = model_output / "Default"
-    has_controlnet = 'controlnet' in submodel_names
-    if has_safety_checker:
-        safety_checker = OnnxRuntimeModel.from_pretrained(model_info["safety_checker"]["unoptimized"]["path"].parent)
-    else:
-        safety_checker = None
-
-    text_encoder=OnnxRuntimeModel.from_pretrained(model_info["text_encoder"]["unoptimized"]["path"].parent)
-    decoder=OnnxRuntimeModel.from_pretrained(model_info["text_encoder"]["unoptimized"]["path"].parent)
-    prior=OnnxRuntimeModel.from_pretrained(model_info["text_encoder"]["unoptimized"]["path"].parent)
-
+    # print("\nCreating ONNX pipeline...")
    
+    # TODO: Create OnnxStableCascadePipeline
 
-    print("Saving unoptimized models...")
-    text_encoder.save_pretrained(unoptimized_model_dir / "text_encoder")
-    decoder.save_pretrained(unoptimized_model_dir/ "decoder")
-    prior.save_pretrained(unoptimized_model_dir/ "prior")
-   
     # Create a copy of the unoptimized model directory, then overwrite with optimized models from the olive cache.
     print("Copying optimized models...")
-    shutil.copytree(unoptimized_model_dir, optimized_model_dir, ignore=shutil.ignore_patterns("weights.pb"))
-    for submodel_name in submodel_names:
-        src_path = model_info[submodel_name]["optimized"]["path"]
-        dst_path = optimized_model_dir / submodel_name / "model.onnx"
-        exists = os.path.exists(dst_path)
-        if not exists:
-            os.mkdir(optimized_model_dir / submodel_name)
-        shutil.copyfile(src_path, dst_path)
-
-    print(f"The default pipeline is located here: {unoptimized_model_dir}")
-    print(f"The optimized pipeline is located here: {optimized_model_dir}")
-
-
-def get_ort_pipeline(model_dir, common_args, ort_args, guidance_scale):
-    ort.set_default_logger_severity(3)
-
-    print("Loading models into ORT session...")
-    sess_options = ort.SessionOptions()
-    sess_options.enable_mem_pattern = False
-
-    static_dims = not ort_args.dynamic_dims
-    batch_size = common_args.batch_size
-    image_size = common_args.image_size
-    provider = common_args.provider
-
-    if static_dims:
-        hidden_batch_size = batch_size if (guidance_scale == 0.0) else batch_size * 2
-        # Not necessary, but helps DML EP further optimize runtime performance.
-        # batch_size is doubled for sample & hidden state because of classifier free guidance:
-        # https://github.com/huggingface/diffusers/blob/46c52f9b9607e6ecb29c782c052aea313e6487b7/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L672
-        sess_options.add_free_dimension_override_by_name("unet_sample_batch", hidden_batch_size)
-        sess_options.add_free_dimension_override_by_name("unet_sample_channels", 4)
-        sess_options.add_free_dimension_override_by_name("unet_sample_height", image_size // 8)
-        sess_options.add_free_dimension_override_by_name("unet_sample_width", image_size // 8)
-        sess_options.add_free_dimension_override_by_name("unet_time_batch", 1)
-        sess_options.add_free_dimension_override_by_name("unet_hidden_batch", hidden_batch_size)
-        sess_options.add_free_dimension_override_by_name("unet_hidden_sequence", 77)
-
-    provider_map = {
-        "dml": "DmlExecutionProvider",
-        "cuda": "CUDAExecutionProvider",
-    }
-    assert provider in provider_map, f"Unsupported provider: {provider}"
-    return OnnxStableDiffusionPipeline.from_pretrained(
-        model_dir, provider=provider_map[provider], sess_options=sess_options
-    )
+    for passType in ["optimized", "unoptimized"]:
+        model_dir = model_output / passType
+        for submodel_name in submodel_names:
+            src_path = model_info[submodel_name][passType]["path"] # model.onnx
+            src_data_path = model_info[submodel_name][passType]["data"]# model.onnx.data
+
+            dst_path = model_dir / submodel_name
+            if not os.path.exists(dst_path):
+                os.makedirs(dst_path, exist_ok=True)
+
+            shutil.copyfile(src_path, dst_path / "model.onnx")
+            if os.path.exists(src_data_path):
+                shutil.copyfile(src_data_path, dst_path / "model.onnx.data")
+        
+    print(f"The converted model is located here: {model_output}")