vkuzo
diff --git a/‎hf_torchao_vllm/.ruff.toml‎
Lines changed: 63 additions & 0 deletions b/‎hf_torchao_vllm/.ruff.toml‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎hf_torchao_vllm/README.md‎
Lines changed: 8 additions & 1 deletion b/‎hf_torchao_vllm/README.md‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎hf_torchao_vllm/convert_torchao_checkpoint_to_compressed_tensors.py‎
Lines changed: 86 additions & 54 deletions b/‎hf_torchao_vllm/convert_torchao_checkpoint_to_compressed_tensors.py‎
Lines changed: 86 additions & 54 deletions
diff --git a/‎hf_torchao_vllm/inspect_llm_compressor_output.py‎
Lines changed: 8 additions & 6 deletions b/‎hf_torchao_vllm/inspect_llm_compressor_output.py‎
Lines changed: 8 additions & 6 deletions
@@ -0,0 +1,63 @@
+# Ruff configuration for hf_torchao_vllm directory
+# See: https://docs.astral.sh/ruff/configuration/
+
+# Target Python version
+target-version = "py312"
+
+# Line length for formatting
+line-length = 80
+
+# Enable specific rule categories
+lint.select = [
+    "E",   # pycodestyle errors
+    "W",   # pycodestyle warnings
+    "F",   # pyflakes
+    "I",   # isort (import sorting)
+    "B",   # flake8-bugbear (common bugs and design problems)
+    "C4",  # flake8-comprehensions (list/dict/set comprehensions)
+    "UP",  # pyupgrade (modernize Python code)
+    "N",   # pep8-naming (naming conventions)
+    "SIM", # flake8-simplify (code simplification)
+]
+
+# Ignore specific rules
+lint.ignore = [
+    "E501",   # line too long (handled by formatter)
+    "E731",   # do not assign a lambda expression, use a def
+    "B008",   # do not perform function calls in argument defaults
+    "SIM108", # use ternary operator instead of if-else
+    "N806",   # variable name should be lowercase (for ML variables like N, K, etc.)
+    "F841",  # unused vars, TODO fix this
+    "SIM117",  # nested context, TODO fix this
+    "SIM118",  # key in dict, TODO fix this
+    "E721",  # type comparison, TODO fix this
+    "B007",  # loop control var unused, TODO fix this
+]
+
+# Exclude files and directories
+exclude = [
+    "__pycache__",
+    "*.pyc",
+    ".git",
+    "build",
+    "dist",
+    "*.egg-info",
+    "data",
+    "sparse_logs",
+]
+
+[lint.per-file-ignores]
+# Allow unused imports in __init__.py files
+"__init__.py" = ["F401"]
+
+[lint.isort]
+# Sort imports
+known-first-party = ["torchao", "transformers", "torch"]
+force-single-line = false
+split-on-trailing-comma = true
+
+[format]
+# Formatting options
+quote-style = "double"
+indent-style = "space"
+line-ending = "auto"
@@ -1,6 +1,6 @@
 # HF -> torchao -> vLLM convenience scripts
 
-Example
+## Usage Example
 
 ```bash
 # save a quantized model ot data/nvfp4-Qwen1.5-MoE-A2.7B
@@ -9,3 +9,10 @@ python quantize_hf_model_with_torchao.py --model_name "Qwen/Qwen1.5-MoE-A2.7B" -
 # run the model from above in vLLM
 python run_quantized_model_in_vllm.py --model_name "data/torchao/nvfp4-Qwen1.5-MoE-A2.7B" --compile False
 ```
+
+## Code Quality & Linting
+
+```bash
+ruff format .
+ruff check . --fix
+```
@@ -1,46 +1,39 @@
-import copy
 import filecmp
 import json
 import os
 import pathlib
 import shutil
 import subprocess
-from typing import Dict, Any
 
 import fire
-
-import torch
-from torchao.core.config import AOBaseConfig, config_from_dict
-from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, PerRow
-from torchao.quantization.quantize_.workflows.float8.float8_tensor import Float8Tensor
-
 from safetensors import safe_open
-from safetensors.torch import save_file
 
+import torch
+from torchao.core.config import config_from_dict
 from utils import (
-    convert_pt_statedict_to_safetensors, 
-    convert_pt_multifile_index_to_safetensors,
     ao_config_to_compressed_tensors_config,
+    convert_pt_multifile_index_to_safetensors,
+    convert_pt_statedict_to_safetensors,
 )
 
 
 def run(
     # original torchao checkpoint
-    dir_source: str = 'data/torchao/fp8-opt-125m',
+    dir_source: str = "data/torchao/fp8-opt-125m",
     # new compressed-tensors checkpoint
-    dir_target: str = 'data/torchao_compressed_tensors/fp8-opt-125m',
+    dir_target: str = "data/torchao_compressed_tensors/fp8-opt-125m",
     # existing compressed-tensors checkpoint to validate against
-    dir_validation: str = 'data/llmcompressor/fp8-opt-125m',
+    dir_validation: str = "data/llmcompressor/fp8-opt-125m",
     skip_conversion: bool = False,
 ):
-    dir_source = dir_source.rstrip('/')
-    dir_target = dir_target.rstrip('/')
-    dir_validation = dir_validation.rstrip('/')
+    dir_source = dir_source.rstrip("/")
+    dir_target = dir_target.rstrip("/")
+    dir_validation = dir_validation.rstrip("/")
 
     config_name_source = f"{dir_source}/config.json"
     config_name_target = f"{dir_target}/config.json"
     config_name_validation = f"{dir_validation}/config.json"
-    weights_name_source = f"{dir_source}/pytorch_model.bin" 
+    weights_name_source = f"{dir_source}/pytorch_model.bin"
     weights_name_target = f"{dir_target}/model.safetensors"
     weights_name_validation = f"{dir_validation}/model.safetensors"
 
@@ -54,7 +47,7 @@ def run(
         # convert config.json
         #
 
-        with open(config_name_source, 'r') as f:
+        with open(config_name_source) as f:
             config_source = json.load(f)
             print(json.dumps(config_source, indent=2))
 
@@ -63,13 +56,18 @@ def run(
         # we need to translate it to compressed-tensors format
         # example: https://www.internalfb.com/phabricator/paste/view/P1975642629
         old_hf_quantization_config = config_source["quantization_config"]
-        fqn_to_serialized_aobaseconfig = old_hf_quantization_config["quant_type"]
+        fqn_to_serialized_aobaseconfig = old_hf_quantization_config[
+            "quant_type"
+        ]
         assert len(fqn_to_serialized_aobaseconfig) == 1, "unsupported"
 
-        if fqn_to_serialized_aobaseconfig['default']['_type'] == 'ModuleFqnToConfig':
-            fqn_to_serialized_aobaseconfig = \
-                fqn_to_serialized_aobaseconfig['default']['_data']['module_fqn_to_config']
-
+        if (
+            fqn_to_serialized_aobaseconfig["default"]["_type"]
+            == "ModuleFqnToConfig"
+        ):
+            fqn_to_serialized_aobaseconfig = fqn_to_serialized_aobaseconfig[
+                "default"
+            ]["_data"]["module_fqn_to_config"]
 
         new_hf_quantization_config = {
             "config_groups": {},
@@ -82,9 +80,12 @@ def run(
             "version": "torchao_hack",
         }
 
-        for fqn, serialized_aobaseconfig in fqn_to_serialized_aobaseconfig.items():
+        for (
+            fqn,
+            serialized_aobaseconfig,
+        ) in fqn_to_serialized_aobaseconfig.items():
             if serialized_aobaseconfig is None:
-                new_hf_quantization_config['ignore'].append(fqn)
+                new_hf_quantization_config["ignore"].append(fqn)
                 continue
 
             aobaseconfig = config_from_dict(serialized_aobaseconfig)
@@ -97,7 +98,7 @@ def run(
         config_source["quantization_config"] = new_hf_quantization_config
 
         # save to new location
-        with open(config_name_target, 'w') as f:
+        with open(config_name_target, "w") as f:
             json.dump(config_source, f, indent=2)
 
         source_converted_filenames.add(config_name_source)
@@ -109,35 +110,50 @@ def run(
         # not sure why I still need this
         torch.serialization.add_safe_globals([getattr])
 
-        is_single_chunk = os.path.isfile(f'{dir_source}/pytorch_model.bin')
+        is_single_chunk = os.path.isfile(f"{dir_source}/pytorch_model.bin")
         if is_single_chunk:
-            convert_pt_statedict_to_safetensors(weights_name_source, weights_name_target)
+            convert_pt_statedict_to_safetensors(
+                weights_name_source, weights_name_target
+            )
             source_converted_filenames.add(weights_name_source)
         else:
-            # convert each model state_dict file 
+            # convert each model state_dict file
             model_part_filenames = []
             for file_path in pathlib.Path(dir_source).iterdir():
                 if not file_path.is_file():
                     continue
-                if not (('pytorch_model') in str(file_path) and str(file_path).endswith('bin')):
+                if not (
+                    ("pytorch_model") in str(file_path)
+                    and str(file_path).endswith("bin")
+                ):
                     continue
                 pt_sd_filename = str(file_path)
                 # dir_source/pytorch_model-00001-of-00004.bin -> dir_target/model-00001-of-00004.safetensors
-                safetensors_sd_filename = pt_sd_filename.replace(dir_source, dir_target)
-                safetensors_sd_filename = safetensors_sd_filename.replace('pytorch_model', 'model')
-                safetensors_sd_filename = safetensors_sd_filename.replace('.bin', '.safetensors')
+                safetensors_sd_filename = pt_sd_filename.replace(
+                    dir_source, dir_target
+                )
+                safetensors_sd_filename = safetensors_sd_filename.replace(
+                    "pytorch_model", "model"
+                )
+                safetensors_sd_filename = safetensors_sd_filename.replace(
+                    ".bin", ".safetensors"
+                )
                 model_part_filenames.append(safetensors_sd_filename)
                 print(pt_sd_filename, safetensors_sd_filename)
-                convert_pt_statedict_to_safetensors(pt_sd_filename, safetensors_sd_filename)
+                convert_pt_statedict_to_safetensors(
+                    pt_sd_filename, safetensors_sd_filename
+                )
                 source_converted_filenames.add(pt_sd_filename)
 
             # convert pytorch_model.bin.index.json
             convert_pt_multifile_index_to_safetensors(
-                f'{dir_source}/pytorch_model.bin.index.json', 
-                f'{dir_target}/model.safetensors.index.json',
+                f"{dir_source}/pytorch_model.bin.index.json",
+                f"{dir_target}/model.safetensors.index.json",
                 model_part_filenames,
             )
-            source_converted_filenames.add(f'{dir_source}/pytorch_model.bin.index.json')
+            source_converted_filenames.add(
+                f"{dir_source}/pytorch_model.bin.index.json"
+            )
 
         print(source_converted_filenames)
 
@@ -151,7 +167,7 @@ def run(
             # if we got here, we just need to copy the file over without any changes
             file_path = dir_and_file_path.parts[-1]
             target_file_path = f"{dir_target}/{str(file_path)}"
-            print(f'copying {dir_and_file_path} to {target_file_path}')
+            print(f"copying {dir_and_file_path} to {target_file_path}")
             shutil.copyfile(dir_and_file_path, target_file_path)
 
     # validate target_dir vs validation_dir
@@ -160,36 +176,49 @@ def run(
             continue
         file_path_target = dir_and_file_path.parts[-1]
         print("\nvalidating", file_path_target)
-        dir_and_file_path_validation = f"{dir_validation}/{str(file_path_target)}"
+        dir_and_file_path_validation = (
+            f"{dir_validation}/{str(file_path_target)}"
+        )
 
-        if file_path_target == 'config.json':
+        if file_path_target == "config.json":
             # for now just diff and print the output to stdout
-            command = f'diff {dir_and_file_path} {dir_and_file_path_validation}'
+            command = f"diff {dir_and_file_path} {dir_and_file_path_validation}"
             try:
-                result = subprocess.run(command, capture_output=False, text=True, shell=True, check=True)
+                result = subprocess.run(
+                    command,
+                    capture_output=False,
+                    text=True,
+                    shell=True,
+                    check=True,
+                )
             except subprocess.CalledProcessError as e:
                 # this will always fail, for now, as we are not perfectly matching
-                print(e.stderr) 
+                print(e.stderr)
 
         # TODO(future, as needed): also validate the other files, they are unlikely to match
         # exactly for any model with >1 chunk of state dict files since we are not
         # trying to enfore that the same tensors live in the same chunks.
 
-        elif file_path_target == 'model.safetensors':
-
-            with safe_open(dir_and_file_path, framework='pt') as f_target:
-                with safe_open(dir_and_file_path_validation, framework='pt') as f_validation:
+        elif file_path_target == "model.safetensors":
+            with safe_open(dir_and_file_path, framework="pt") as f_target:
+                with safe_open(
+                    dir_and_file_path_validation, framework="pt"
+                ) as f_validation:
                     k_target_seen = set()
                     for k_target in f_target.keys():
                         v_target = f_target.get_tensor(k_target)
                         v_validation = f_validation.get_tensor(k_target)
 
                         # ensure metadata matches
                         if v_target.shape != v_validation.shape:
-                            print(f"shape mismatch: {k_target=}, {v_target.shape=}, {v_validation.shape=}")
+                            print(
+                                f"shape mismatch: {k_target=}, {v_target.shape=}, {v_validation.shape=}"
+                            )
 
-                        if v_target.dtype != v_validation.dtype: 
-                            print(f"dtype mismatch: {k_target=}, {v_target.dtype=}, {v_validation.dtype=}")
+                        if v_target.dtype != v_validation.dtype:
+                            print(
+                                f"dtype mismatch: {k_target=}, {v_target.dtype=}, {v_validation.dtype=}"
+                            )
 
                         # for now, no numerical checks
 
@@ -202,8 +231,11 @@ def run(
         else:
             # approx check, currently fails because modification timestamp is not the
             # same. Since we copy these files ourselves, low-pri to make this better.
-            is_equal = filecmp.cmp(dir_and_file_path, dir_and_file_path_validation, shallow=False)
-            print('filecmp equal', is_equal)
+            is_equal = filecmp.cmp(
+                dir_and_file_path, dir_and_file_path_validation, shallow=False
+            )
+            print("filecmp equal", is_equal)
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     fire.Fire(run)
@@ -1,23 +1,25 @@
 # inspects the output of model created with llm-compressor
 # via the `run_llm_compressor.py` script
 
-import safetensors
 import json
+
 import fire
 
 from utils import inspect_model_state_dict
 
+
 def run(
-    dir_name: str = 'data/llmcompressor/fp8-opt-125m',
+    dir_name: str = "data/llmcompressor/fp8-opt-125m",
 ):
-    json_config_name = f'{dir_name}/config.json'
-    with open(json_config_name, 'r') as f:
+    json_config_name = f"{dir_name}/config.json"
+    with open(json_config_name) as f:
         data = json.load(f)
         # TODO: pretty print
         print(json.dumps(data, indent=2))
 
-    model_name, model_extension = 'model', 'safetensors'
+    model_name, model_extension = "model", "safetensors"
     inspect_model_state_dict(dir_name, model_name, model_extension)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     fire.Fire(run)