Add HF Auth mixin to Stable Diffusion (#1763)

msaroufim · facebook-github-bot · commit 411e3881f419 · 2023-07-18T15:14:39.000-07:00
Summary: Right now stale diffusion and lit-llama are not actually running in CI because they get rate limited by huggingface. since we've now added an auth token as a github secret we can move stable diffusion out of canary and do things like include it in blueberries dashboard We also added some nice errors so people running in torchbench locally know they will need to have a token to run these models Anyways auth is a mixin which seems like the right abstraction # Some relevant details about the model Torchbench has a function `get_module()` that has the intent of testing a `nn.Module` on an actual `torch.Tensor` Unfortunately a `StableDiffusionPipeline` is not an `nn.Module` it's a composition of a tokenizer and 3 seperate `nn.Modules` an encoder, vae and unet. ## text_encoder ```python def get_module(self): batch_size = 1 sequence_length = 10 vocab_size = 32000 # Generate random indices within the valid range input_tensor = torch.randint(low=0, high=vocab_size, size=(batch_size, sequence_length)) # Make sure the tensor has the correct data type input_tensor = input_tensor.long() print(self.pipe.text_encoder(input_tensor)) return self.pipe.text_encoder, input_tensor ``` Text encoder outputs a `BaseModelOutputWithPooling` which has multiple nn modules https://gist.github.com/msaroufim/51f0038863c5cce4cc3045e4d9f9c399 ``` ====================================================================== FAIL: test_stable_diffusion_example_cuda (__main__.TestBenchmark) ---------------------------------------------------------------------- components._impl.workers.subprocess_rpc.ChildTraceException: Traceback (most recent call last): File "/home/ubuntu/benchmark/components/_impl/workers/subprocess_rpc.py", line 482, in _run_block exec( # noqa: P204 File "<subprocess-worker>", line 35, in <module> File "<subprocess-worker>", line 12, in _run_in_worker_f File "/home/ubuntu/benchmark/torchbenchmark/util/model.py", line 26, in __call__ obj.__post__init__() File "/home/ubuntu/benchmark/torchbenchmark/util/model.py", line 126, in __post__init__ self.accuracy = check_accuracy(self) File "/home/ubuntu/benchmark/torchbenchmark/util/env_check.py", line 469, in check_accuracy model, example_inputs = maybe_cast(tbmodel, model, example_inputs) File "/home/ubuntu/benchmark/torchbenchmark/util/env_check.py", line 424, in maybe_cast example_inputs = clone_inputs(example_inputs) File "/home/ubuntu/benchmark/torchbenchmark/util/env_check.py", line 297, in clone_inputs assert isinstance(value, torch.Tensor) AssertionError ``` ## vae ```python def get_module(self): print(self.pipe.vae(torch.randn(9,3,9,9))) ``` Same problem for vae https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/vae.py#L27 ## unet ```python def get_module(self): # This will only benchmark the unet since that's the biggest layer # Stable diffusion is a composition of a text encoder, unet and vae encoder_hidden_states = torch.randn(320, 1024) sample = torch.randn(4, 4, 4, 32) timestep = 5 inputs_to_pipe = {'timestep': timestep, 'encoder_hidden_states': encoder_hidden_states, 'sample': sample} result = self.pipe.unet(**inputs_to_pipe) return self.pipe, inputs_to_pipe ``` Unet unfortunately does not have a tensor input For VAE and encoder the test failure is particularly helpful ``` (sam) ubuntu@ip-172-31-9-217:~/benchmark$ python test.py -k "test_stable_diffusion_example_cuda" F ====================================================================== FAIL: test_stable_diffusion_example_cuda (__main__.TestBenchmark) ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/ubuntu/benchmark/test.py", line 75, in example_fn assert accuracy == "pass" or accuracy == "eager_1st_run_OOM", f"Expected accuracy pass, get {accuracy}" AssertionError: Expected accuracy pass, get eager_1st_run_fail ---------------------------------------------------------------------- Ran 1 test in 7.402s FAILED (failures=1) ``` Pull Request resolved: #1763 Reviewed By: xuzhao9 Differential Revision: D47565523 Pulled By: msaroufim fbshipit-source-id: c949ce8a31c0a4706658937fc6603a22a4bc3ec6
diff --git a/.github/workflows/pr-a10g.yml b/.github/workflows/pr-a10g.yml
@@ -10,6 +10,8 @@ env:
   CONDA_ENV: "torchbench"
   DOCKER_IMAGE: "ghcr.io/pytorch/torchbench:latest"
   SETUP_SCRIPT: "/workspace/setup_instance.sh"
+  HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+
 
 jobs:
   pr-test:
@@ -36,8 +38,9 @@ jobs:
       - name: Install and Test TorchBench
         run: |
           container_name=$(docker run \
-            -e CONDA_ENV \
-            -e SETUP_SCRIPT \
+            -e CONDA_ENV="${CONDA_ENV}" \
+            -e SETUP_SCRIPT="${SETUP_SCRIPT}" \
+            -e HUGGING_FACE_HUB_TOKEN="${HUGGING_FACE_HUB_TOKEN}" \
             --tty \
             --detach \
             --shm-size=32gb \
diff --git a/.github/workflows/pr-gha-runner.yml b/.github/workflows/pr-gha-runner.yml
@@ -10,6 +10,7 @@ env:
   BASE_CONDA_ENV: "torchbench"
   CONDA_ENV: "pr-ci-a100"
   SETUP_SCRIPT: "/workspace/setup_instance.sh"
+  HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
 jobs:
   pr-test:
diff --git a/torchbenchmark/canary_models/stable_diffusion/install.py b/torchbenchmark/canary_models/stable_diffusion/install.py
diff --git a/torchbenchmark/models/stable_diffusion/__init__.py b/torchbenchmark/models/stable_diffusion/__init__.py
@@ -5,12 +5,13 @@
 """
 from torchbenchmark.tasks import COMPUTER_VISION
 from torchbenchmark.util.model import BenchmarkModel
+from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceAuthMixin
 
 import torch
 from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
 
 
-class Model(BenchmarkModel):
+class Model(BenchmarkModel, HuggingFaceAuthMixin):
     task = COMPUTER_VISION.GENERATION
 
     DEFAULT_TRAIN_BSIZE = 1
@@ -19,22 +20,32 @@ class Model(BenchmarkModel):
     # Default eval precision on CUDA device is fp16
     DEFAULT_EVAL_CUDA_PRECISION = "fp16"
 
-
     def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
+        HuggingFaceAuthMixin.__init__(self)
         super().__init__(test=test, device=device, jit=jit,
                          batch_size=batch_size, extra_args=extra_args)
-        assert self.dargs.precision == "fp16", f"Stable Diffusion model only supports fp16 precision."
         model_id = "stabilityai/stable-diffusion-2"
         scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
-        self.pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
+        self.pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler)
         self.pipe.to(self.device)
         self.example_inputs = "a photo of an astronaut riding a horse on mars"
 
     def enable_fp16_half(self):
         pass
 
+    
     def get_module(self):
-        return self.model, self.example_inputs
+        batch_size = 1
+        sequence_length = 10
+        vocab_size = 32000
+
+        # Generate random indices within the valid range
+        input_tensor = torch.randint(low=0, high=vocab_size, size=(batch_size, sequence_length))
+
+        # Make sure the tensor has the correct data type
+        input_tensor = input_tensor.long().to(self.device)
+        return self.pipe.text_encoder, [input_tensor]
+
 
     def train(self):
         raise NotImplementedError("Train test is not implemented for the stable diffusion model.")
diff --git a/torchbenchmark/models/stable_diffusion/install.py b/torchbenchmark/models/stable_diffusion/install.py
@@ -0,0 +1,17 @@
+from torchbenchmark.util.framework.diffusers import install_diffusers
+from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceAuthMixin
+import torch
+import os
+import warnings
+MODEL_NAME = "stabilityai/stable-diffusion-2"
+
+def load_model_checkpoint():
+    from diffusers import StableDiffusionPipeline
+    StableDiffusionPipeline.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, safety_checker=None)
+
+if __name__ == "__main__":
+    if not 'HUGGING_FACE_HUB_TOKEN' in os.environ:
+        warnings.warn("Make sure to set `HUGGINGFACE_HUB_TOKEN` so you can download weights")
+    else:
+        install_diffusers()
+        load_model_checkpoint()
diff --git a/torchbenchmark/models/stable_diffusion/metadata.yaml b/torchbenchmark/models/stable_diffusion/metadata.yaml
@@ -0,0 +1,10 @@
+devices:
+  NVIDIA A100-SXM4-40GB:
+    eval_batch_size: 32
+eval_benchmark: false
+eval_deterministic: false
+eval_nograd: true
+train_benchmark: false
+train_deterministic: false
+not_implemented:
+- device: cpu
diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py
@@ -159,6 +159,11 @@ def eval(self) -> Tuple[torch.Tensor]:
         else:
             return (out["logits"], )
 
+class HuggingFaceAuthMixin:
+    def __init__(self):
+        if not 'HUGGING_FACE_HUB_TOKEN' in os.environ:
+            raise NotImplementedError("Make sure to set `HUGGING_FACE_HUB_TOKEN` so you can download weights")
+
 
 class HuggingFaceGenerationModel(HuggingFaceModel):
     task = NLP.GENERATION
diff --git a/torchbenchmark/util/metadata_utils.py b/torchbenchmark/util/metadata_utils.py
@@ -20,4 +20,4 @@ def skip_by_metadata(test: str, device:str, jit: bool, extra_args: List[str], me
                 match_item("extra_args", extra_args, skip_item)
         if match:
             return True
-    return False
+    return False