From 01bdcd7d2fd26888b591d2b2ae45d4b1bdda1de9 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Tue, 7 Oct 2025 17:03:43 -0500 Subject: [PATCH 01/11] initial commit Signed-off-by: Brian Dellabetta --- examples/llm_compress_eval_example.py | 145 ++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 examples/llm_compress_eval_example.py diff --git a/examples/llm_compress_eval_example.py b/examples/llm_compress_eval_example.py new file mode 100644 index 00000000..deeb8197 --- /dev/null +++ b/examples/llm_compress_eval_example.py @@ -0,0 +1,145 @@ +from automation.pipelines import Pipeline +from automation.tasks import LMEvalTask, LLMCompressorTask + + +def get_quip_modifier(transform_block_size: int | None): + from llmcompressor.modifiers.transform import QuIPModifier + + return QuIPModifier( + transform_type="hadamard", transform_block_size=transform_block_size + ) + + +def get_w4a16_scheme(group_size: int = 128): + from compressed_tensors.quantization import ( + QuantizationScheme, + QuantizationStrategy, + QuantizationType, + QuantizationArgs, + ) + + return QuantizationScheme( + targets=["Linear"], + weights=QuantizationArgs( + num_bits=4, + type=QuantizationType.INT, + strategy=QuantizationStrategy.GROUP, + group_size=group_size, + symmetric=True, + dynamic=False, + ), + ) + + +def get_rtn_modifier(group_size: int = 128): + from llmcompressor.modifiers.quantization import ( + QuantizationModifier, + ) + + # TODO: issue in llm-compressor when loading QuantizationModifiers from generated + # yaml --> Please specify either `targets` or `config_groups` + # manually delete for now + modifier = QuantizationModifier( + config_groups={"group_0": get_w4a16_scheme(group_size)}, ignore=["lm_head"] + ) + modifier.targets = None + return modifier + + +def get_gptq_modifier(group_size: int = 128): + from llmcompressor.modifiers.quantization import ( + GPTQModifier, + ) + + modifier = GPTQModifier( + config_groups={"group_0": get_w4a16_scheme(group_size)}, ignore=["lm_head"] + ) + modifier.targets = None + return modifier + + +recipes = { + "RTN_W4A16G128": get_rtn_modifier(128), + "GPTQ_W4A16G128": get_gptq_modifier(128), + "QUIP_B128_RTN_W4A16G128": [get_quip_modifier(128), get_rtn_modifier(128)], + "QUIP_B128_GPTQ_W4A16G128": [get_quip_modifier(128), get_gptq_modifier(128)], + "QUIP_B64_RTN_W4A16G64": [get_quip_modifier(64), get_rtn_modifier(64)], + "QUIP_B64_GPTQ_W4A16G64": [get_quip_modifier(64), get_gptq_modifier(64)], +} + + +def average_scores(task): + gsm8k_score = task.get_reported_scalars()["gsm8k"]["exact_match,strict-match"]["y"][ + 0 + ] + winogrande_score = task.get_reported_scalars()["winogrande"]["acc,none"]["y"][0] + average_score = (gsm8k_score + winogrande_score) / 2.0 + task.get_logger().report_scalar( + title="score", series="average", iteration=0, value=average_score + ) + + +if __name__ == "__main__": + from llmcompressor.recipe import Recipe + + pipeline = Pipeline( + project_name="brian_transforms", + pipeline_name="transforms_benchmark", + job_end_callback=average_scores, + ) + + for model_id in [ + "meta-llama/Llama-3.2-3B-Instruct", + # "meta-llama/Llama-3.1-8B-Instruct", + ]: + for recipe_id, recipe_modifiers in recipes.items(): + # NOTE: passing recipe in as a list of modifiers results in parsing + # errors. Use `Recipe.from_modifiers(recipe).model_dump_json()` instead + recipe = Recipe.from_modifiers(recipe_modifiers) + compress_step_name = f"compress-{recipe_id}" + compress_step = LLMCompressorTask( + project_name="brian_transforms", + task_name=compress_step_name, + model_id=model_id, + text_samples=512, + recipe=recipe.yaml(), + ) + compress_step.create_task() + + eval_step = LMEvalTask( + project_name="brian_transforms", + task_name=f"eval-{recipe_id}", + model_id="dummuy", # overridden + clearml_model=True, + tasks=["gsm8k", "winogrande"], + num_fewshot=5, + # limit=10, + ) + eval_step.create_task() + + pipeline.add_step( + name=compress_step_name, + base_task_id=compress_step.id, + execution_queue="oneshot-a100x1", + monitor_models=[ + compress_step.get_arguments()["Args"]["save_directory"] + ], + monitor_artifacts=["recipe"], + ) + + pipeline.add_step( + name=f"eval-{recipe_id}", + base_task_id=eval_step.id, + parents=[compress_step_name], + execution_queue="oneshot-a100x1", + parameter_override={ + "Args/model_id": "${" + compress_step_name + ".models.output.-1.id}" + }, + monitor_metrics=[ + ("gsm8k", "exact_match,strict-match"), + ("winogrande", "acc,none"), + ], + ) + + pipeline.start() + # pipeline.execute_locally() From 0b60c6850041e1cf2ea8ce8307ba847aa8bff17a Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Wed, 8 Oct 2025 15:18:31 -0500 Subject: [PATCH 02/11] p2 Signed-off-by: Brian Dellabetta --- examples/llm_compress_eval_example.py | 46 ++++++++++++++++++++------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/examples/llm_compress_eval_example.py b/examples/llm_compress_eval_example.py index deeb8197..d22a0a05 100644 --- a/examples/llm_compress_eval_example.py +++ b/examples/llm_compress_eval_example.py @@ -1,12 +1,17 @@ +from typing import Literal from automation.pipelines import Pipeline from automation.tasks import LMEvalTask, LLMCompressorTask -def get_quip_modifier(transform_block_size: int | None): +def get_quip_modifier( + transform_block_size: int | None, rotations: list[Literal["u", "v"]] = ["u", "v"] +): from llmcompressor.modifiers.transform import QuIPModifier return QuIPModifier( - transform_type="hadamard", transform_block_size=transform_block_size + transform_type="hadamard", + transform_block_size=transform_block_size, + rotations=rotations, ) @@ -61,10 +66,21 @@ def get_gptq_modifier(group_size: int = 128): recipes = { "RTN_W4A16G128": get_rtn_modifier(128), "GPTQ_W4A16G128": get_gptq_modifier(128), - "QUIP_B128_RTN_W4A16G128": [get_quip_modifier(128), get_rtn_modifier(128)], - "QUIP_B128_GPTQ_W4A16G128": [get_quip_modifier(128), get_gptq_modifier(128)], - "QUIP_B64_RTN_W4A16G64": [get_quip_modifier(64), get_rtn_modifier(64)], - "QUIP_B64_GPTQ_W4A16G64": [get_quip_modifier(64), get_gptq_modifier(64)], + "QUIPv_B128_RTN_W4A16G128": [get_quip_modifier(128, ["v"]), get_rtn_modifier(128)], + "QUIPv_B128_GPTQ_W4A16G128": [ + get_quip_modifier(128, ["v"]), + get_gptq_modifier(128), + ], + "QUIPv_B64_RTN_W4A16G64": [get_quip_modifier(64, ["v"]), get_rtn_modifier(64)], + "QUIPv_B64_GPTQ_W4A16G64": [ + get_quip_modifier(64, ["v"]), + get_gptq_modifier(64), + ], + # TODO: Quip U rotations broken in vllm only in clearml env, cannot reproduce locally + # "QUIPu_B128_RTN_W4A16G128": [get_quip_modifier(128, ["u"]), get_rtn_modifier(128)], + # "QUIPu_B128_GPTQ_W4A16G128": [get_quip_modifier(128, ["u"]), get_gptq_modifier(128)], + # "QUIP_B64_RTN_W4A16G64": [get_quip_modifier(64), get_rtn_modifier(64)], + # "QUIP_B64_GPTQ_W4A16G64": [get_quip_modifier(64), get_gptq_modifier(64)], } @@ -90,13 +106,14 @@ def average_scores(task): for model_id in [ "meta-llama/Llama-3.2-3B-Instruct", - # "meta-llama/Llama-3.1-8B-Instruct", + "meta-llama/Llama-3.1-8B-Instruct", ]: + model_name = model_id.split("/")[-1].replace(".", "") for recipe_id, recipe_modifiers in recipes.items(): # NOTE: passing recipe in as a list of modifiers results in parsing # errors. Use `Recipe.from_modifiers(recipe).model_dump_json()` instead recipe = Recipe.from_modifiers(recipe_modifiers) - compress_step_name = f"compress-{recipe_id}" + compress_step_name = f"compress-{model_name}-{recipe_id}" compress_step = LLMCompressorTask( project_name="brian_transforms", task_name=compress_step_name, @@ -108,12 +125,17 @@ def average_scores(task): eval_step = LMEvalTask( project_name="brian_transforms", - task_name=f"eval-{recipe_id}", + task_name=f"eval-{model_name}-{recipe_id}", model_id="dummuy", # overridden clearml_model=True, - tasks=["gsm8k", "winogrande"], + tasks=[ + "gsm8k", + "winogrande", + # TODO: PPL based metrics broken in lm_eval+vllm + # https://github.com/EleutherAI/lm-evaluation-harness/issues/3134 + # "wikitext" + ], num_fewshot=5, - # limit=10, ) eval_step.create_task() @@ -128,7 +150,7 @@ def average_scores(task): ) pipeline.add_step( - name=f"eval-{recipe_id}", + name=f"eval-{model_name}-{recipe_id}", base_task_id=eval_step.id, parents=[compress_step_name], execution_queue="oneshot-a100x1", From ad0e36eb347604cb4c7147948ae2182421615a67 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Wed, 8 Oct 2025 15:20:19 -0500 Subject: [PATCH 03/11] llmcompressor task updates Signed-off-by: Brian Dellabetta --- src/automation/tasks/llmcompressor.py | 62 +++++++++++++++------------ 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/automation/tasks/llmcompressor.py b/src/automation/tasks/llmcompressor.py index b1253d96..95f77181 100644 --- a/src/automation/tasks/llmcompressor.py +++ b/src/automation/tasks/llmcompressor.py @@ -5,10 +5,13 @@ import os import yaml + class LLMCompressorTask(BaseTask): task_packages = [ "git+https://github.com/vllm-project/llm-compressor.git", - "torchvision", + "torch==2.8.0", + "torchvision==0.23.0", + "huggingface-hub>=0.34.0,<1.0", "hf_xet", ] @@ -17,25 +20,25 @@ def __init__( project_name: str, task_name: str, model_id: str, - recipe: Optional[Any]=None, - recipe_args: Optional[dict]=None, - docker_image: str=DEFAULT_DOCKER_IMAGE, - packages: Optional[Sequence[str]]=None, - model_class: str="AutoModelForCausalLM", - dataset_name: Optional[str]="calibration", - dataset_loader: Optional[Callable]=None, - data_collator: Optional[Callable]=None, - clearml_model: bool=False, - force_download: bool=False, - save_directory: str="output", - text_samples: Optional[int]=None, - vision_samples: Optional[int]=None, - max_seq_len: int=8192, - trust_remote_code: bool=False, + recipe: Optional[Any] = None, + recipe_args: Optional[dict] = None, + docker_image: str = DEFAULT_DOCKER_IMAGE, + packages: Optional[Sequence[str]] = None, + model_class: str = "AutoModelForCausalLM", + dataset_name: Optional[str] = "calibration", + dataset_loader: Optional[Callable] = None, + data_collator: Optional[Callable] = None, + clearml_model: bool = False, + force_download: bool = False, + save_directory: str = "output", + text_samples: Optional[int] = None, + vision_samples: Optional[int] = None, + max_seq_len: int = 8192, + trust_remote_code: bool = False, skip_sparsity_compression_stats=True, - tags: Union[str, List[str]]=None, - task_type: str="training", - config: Optional[str]=None, + tags: Union[str, List[str]] = None, + task_type: str = "training", + config: Optional[str] = None, ): # Process config @@ -62,8 +65,10 @@ def __init__( # Store class attributes that may be part of config if "recipe" in config_kwargs and recipe is not None: - raise ValueError("Recipe is already provided in config. It can't be provided in task instantiation.") - + raise ValueError( + "Recipe is already provided in config. It can't be provided in task instantiation." + ) + recipe = config_kwargs.pop("recipe", recipe) if recipe is None: raise ValueError("Recipe must be provided.") @@ -72,6 +77,7 @@ def __init__( recipe = yaml.dump(recipe, default_flow_style=False, sort_keys=False) elif not isinstance(recipe, str): from llmcompressor.recipe import Recipe + recipe = Recipe.from_modifiers(recipe).yaml() self.recipe = recipe @@ -87,7 +93,9 @@ def __init__( self.text_samples = config_kwargs.pop("text_samples", text_samples) self.vision_samples = config_kwargs.pop("vision_samples", vision_samples) self.max_seq_len = config_kwargs.pop("max_seq_len", max_seq_len) - self.trust_remote_code = config_kwargs.pop("trust_remote_code", trust_remote_code) + self.trust_remote_code = config_kwargs.pop( + "trust_remote_code", trust_remote_code + ) self.model_class = model_class self.dataset_loader = dataset_loader self.data_collator = data_collator @@ -104,13 +112,14 @@ def __init__( self.clearml_model = clearml_model self.force_download = force_download self.save_directory = save_directory - self.script_path = os.path.join(".", "src", "automation", "tasks", "scripts", "llmcompressor_script.py") - + self.script_path = os.path.join( + ".", "src", "automation", "tasks", "scripts", "llmcompressor_script.py" + ) def script(self, configurations, args): from automation.tasks.scripts.llmcompressor_script import main + main(configurations, args) - def get_configurations(self): configs = {} @@ -120,7 +129,6 @@ def get_configurations(self): configs["data collator"] = serialize_callable(self.data_collator) return configs - def get_arguments(self): return { "Args": { @@ -140,5 +148,3 @@ def get_arguments(self): "tags": self.tags, }, } - - From 2e369f7706297845edcaa910957cbb7359195955 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Fri, 10 Oct 2025 14:30:41 -0500 Subject: [PATCH 04/11] transforms benchmark v1 Signed-off-by: Brian Dellabetta --- examples/llm_compress_eval_example.py | 62 ++++++++++++++++++--------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/examples/llm_compress_eval_example.py b/examples/llm_compress_eval_example.py index d22a0a05..c5d53e87 100644 --- a/examples/llm_compress_eval_example.py +++ b/examples/llm_compress_eval_example.py @@ -1,7 +1,13 @@ from typing import Literal +from clearml import Task + +# TODO: cannot use PipelineController, fails to clone github.com:neuralmagic/research +# from clearml import PipelineController from automation.pipelines import Pipeline from automation.tasks import LMEvalTask, LLMCompressorTask +PROJECT_NAME = "brian_transforms_v1" + def get_quip_modifier( transform_block_size: int | None, rotations: list[Literal["u", "v"]] = ["u", "v"] @@ -71,11 +77,20 @@ def get_gptq_modifier(group_size: int = 128): get_quip_modifier(128, ["v"]), get_gptq_modifier(128), ], + "RTN_W4A16G64": get_rtn_modifier(64), + "GPTQ_W4A16G64": get_gptq_modifier(64), "QUIPv_B64_RTN_W4A16G64": [get_quip_modifier(64, ["v"]), get_rtn_modifier(64)], "QUIPv_B64_GPTQ_W4A16G64": [ get_quip_modifier(64, ["v"]), get_gptq_modifier(64), ], + "RTN_W4A16G32": get_rtn_modifier(32), + "GPTQ_W4A16G32": get_gptq_modifier(32), + "QUIPv_B32_RTN_W4A16G32": [get_quip_modifier(32, ["v"]), get_rtn_modifier(32)], + "QUIPv_B32_GPTQ_W4A16G32": [ + get_quip_modifier(32, ["v"]), + get_gptq_modifier(32), + ], # TODO: Quip U rotations broken in vllm only in clearml env, cannot reproduce locally # "QUIPu_B128_RTN_W4A16G128": [get_quip_modifier(128, ["u"]), get_rtn_modifier(128)], # "QUIPu_B128_GPTQ_W4A16G128": [get_quip_modifier(128, ["u"]), get_gptq_modifier(128)], @@ -84,38 +99,26 @@ def get_gptq_modifier(group_size: int = 128): } -def average_scores(task): - gsm8k_score = task.get_reported_scalars()["gsm8k"]["exact_match,strict-match"]["y"][ - 0 - ] - winogrande_score = task.get_reported_scalars()["winogrande"]["acc,none"]["y"][0] - average_score = (gsm8k_score + winogrande_score) / 2.0 - task.get_logger().report_scalar( - title="score", series="average", iteration=0, value=average_score - ) - - if __name__ == "__main__": from llmcompressor.recipe import Recipe pipeline = Pipeline( - project_name="brian_transforms", - pipeline_name="transforms_benchmark", - job_end_callback=average_scores, + project_name=PROJECT_NAME, + pipeline_name=f"{PROJECT_NAME}_pipeline", ) for model_id in [ "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.1-8B-Instruct", ]: - model_name = model_id.split("/")[-1].replace(".", "") + model_name = model_id.split("/")[-1].replace(".", "_").replace("-", "_") for recipe_id, recipe_modifiers in recipes.items(): # NOTE: passing recipe in as a list of modifiers results in parsing # errors. Use `Recipe.from_modifiers(recipe).model_dump_json()` instead recipe = Recipe.from_modifiers(recipe_modifiers) - compress_step_name = f"compress-{model_name}-{recipe_id}" + compress_step_name = f"compress--{model_name}--{recipe_id}" compress_step = LLMCompressorTask( - project_name="brian_transforms", + project_name=PROJECT_NAME, task_name=compress_step_name, model_id=model_id, text_samples=512, @@ -123,19 +126,38 @@ def average_scores(task): ) compress_step.create_task() + # NOTE: lm_eval settings set to match those found in + # src/automation/standards/evaluations/openllm.yaml + # apply_chat_template set to False + # anmarques: "We notice that apply_chat_template tends to mess up + # loglikelihood-based evals, which are most of the openllm benchmarks + # (the model tends to blab before predicting the answer)"" eval_step = LMEvalTask( - project_name="brian_transforms", - task_name=f"eval-{model_name}-{recipe_id}", + project_name=PROJECT_NAME, + task_name=f"eval--{model_name}--{recipe_id}", model_id="dummuy", # overridden clearml_model=True, tasks=[ + # openllm tasks + llama variants + "arc_challenge", "gsm8k", + "hellaswag", + "mmlu", "winogrande", + "truthfulqa_mc2", + "arc_challenge_llama", + "gsm8k_llama", # TODO: PPL based metrics broken in lm_eval+vllm # https://github.com/EleutherAI/lm-evaluation-harness/issues/3134 # "wikitext" ], num_fewshot=5, + apply_chat_template=False, + model_args=( + "gpu_memory_utilization=0.4,dtype=auto,max_model_len=4096," + "add_bos_token=True,enable_chunked_prefill=True" + ), + batch_size="auto", ) eval_step.create_task() @@ -163,5 +185,5 @@ def average_scores(task): ], ) - pipeline.start() + pipeline.execute_remotely() # pipeline.execute_locally() From c0bea4e24e1b6f615bef322d32472ad3bdc17e82 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Fri, 10 Oct 2025 16:46:48 -0500 Subject: [PATCH 05/11] hf hub dep version Signed-off-by: Brian Dellabetta --- src/automation/tasks/lmeval.py | 42 +++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/automation/tasks/lmeval.py b/src/automation/tasks/lmeval.py index 708535ca..3b8ea501 100644 --- a/src/automation/tasks/lmeval.py +++ b/src/automation/tasks/lmeval.py @@ -4,12 +4,14 @@ from typing import Optional, Sequence import os + class LMEvalTask(BaseTask): task_packages = [ "vllm", "git+https://github.com/EleutherAI/lm-evaluation-harness.git", "numpy==2.1", + "huggingface-hub>=0.34.0,<1.0", "hf_xet", "rouge-score", "bert-score", @@ -21,13 +23,13 @@ def __init__( project_name: str, task_name: str, model_id: str, - docker_image: str=DEFAULT_DOCKER_IMAGE, - packages: Optional[Sequence[str]]=None, - clearml_model: bool=False, - task_type: str="training", - force_download: bool=False, - config: Optional[str]=None, - model: str="vllm", + docker_image: str = DEFAULT_DOCKER_IMAGE, + packages: Optional[Sequence[str]] = None, + clearml_model: bool = False, + task_type: str = "training", + force_download: bool = False, + config: Optional[str] = None, + model: str = "vllm", **kwargs, ): @@ -43,7 +45,9 @@ def __init__( if "vllm" in package: self.task_packages.pop("vllm") if "lm-evaluation-harness" in package: - self.task_packages.pop("git+https://github.com/EleutherAI/lm-evaluation-harness.git") + self.task_packages.pop( + "git+https://github.com/EleutherAI/lm-evaluation-harness.git" + ) packages = list(set(packages + self.task_packages)) else: packages = self.task_packages @@ -67,7 +71,9 @@ def __init__( continue if key in kwargs: - raise ValueError(f"{key} already defined in config's model_args. It can't be defined again in task instantiation.") + raise ValueError( + f"{key} already defined in config's model_args. It can't be defined again in task instantiation." + ) elif key == "model": model = config_kwargs.pop(key) @@ -75,12 +81,16 @@ def __init__( # in both the config and in the constructor, assuming # the keys used in model_args are complementary if "model_args" in kwargs: - model_args = dict(item.split("=") for item in kwargs.pop("model_args").split(",")) + model_args = dict( + item.split("=") for item in kwargs.pop("model_args").split(",") + ) else: model_args = {} if "model_args" in config_kwargs: - config_model_args = dict(item.split("=") for item in config_kwargs.pop("model_args").split(",")) + config_model_args = dict( + item.split("=") for item in config_kwargs.pop("model_args").split(",") + ) model_args = merge_dicts(model_args, config_model_args) # Set default dtype and enable_chunked_prefill @@ -94,7 +104,7 @@ def __init__( model_args["enforce_eager"] = True kwargs["model_args"] = ",".join(f"{k}={v}" for k, v in model_args.items()) - + kwargs.update(config_kwargs) kwargs["model"] = model @@ -103,20 +113,20 @@ def __init__( self.clearml_model = clearml_model self.lm_eval = kwargs self.force_download = force_download - self.script_path = os.path.join(".", "src", "automation", "tasks", "scripts", "lmeval_script.py") - + self.script_path = os.path.join( + ".", "src", "automation", "tasks", "scripts", "lmeval_script.py" + ) def script(self, configurations, args): from automation.tasks.scripts.lmeval_script import main - main(configurations, args) + main(configurations, args) def get_configurations(self): return { "lm_eval": self.lm_eval, } - def get_arguments(self): return { "Args": { From 49d1133b815b76918ed1e05bae57262cc60de924 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Fri, 17 Oct 2025 13:05:45 -0500 Subject: [PATCH 06/11] set VLLM_USE_PRECOMPILED Signed-off-by: Brian Dellabetta --- src/automation/tasks/base_task.py | 5 +++++ src/automation/tasks/lmeval.py | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 456cf1ff..15e7ced0 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -112,6 +112,11 @@ def create_task(self): repo="https://github.com/neuralmagic/research.git", branch=self.branch, ) + # To avoid precompiling VLLM when installing from main, add env var + self.task.set_base_docker( + docker_image=self.docker_image, + docker_arugments="-e VLLM_USE_PRECOMPILED=1", + ) self.task.output_uri = DEFAULT_OUTPUT_URI self.set_arguments() self.set_configurations() diff --git a/src/automation/tasks/lmeval.py b/src/automation/tasks/lmeval.py index 3b8ea501..62d5ac69 100644 --- a/src/automation/tasks/lmeval.py +++ b/src/automation/tasks/lmeval.py @@ -8,7 +8,9 @@ class LMEvalTask(BaseTask): task_packages = [ - "vllm", + # Use latest vllm release or install from main + # "vllm", + "git+https://github.com/vllm-project/vllm.git", "git+https://github.com/EleutherAI/lm-evaluation-harness.git", "numpy==2.1", "huggingface-hub>=0.34.0,<1.0", From 9cbc2be534571d1fac180493cc0475e263192020 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Fri, 17 Oct 2025 13:13:04 -0500 Subject: [PATCH 07/11] typo Signed-off-by: Brian Dellabetta --- src/automation/tasks/base_task.py | 58 ++++++++++++++----------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 15e7ced0..54543a4e 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -6,12 +6,14 @@ try: from clearml import Task + clearml_available = True except ImportError: print("ClearML not available. Will run tasks locally and not report to ClearML.") clearml_available = False -class BaseTask(): + +class BaseTask: def __init__( self, @@ -19,12 +21,14 @@ def __init__( task_name: str, docker_image: str, branch: Optional[str] = DEFAULT_RESEARCH_BRANCH, - packages: Optional[Sequence[str]]=None, - task_type: str="training", + packages: Optional[Sequence[str]] = None, + task_type: str = "training", ): branch_name = branch or DEFAULT_RESEARCH_BRANCH - base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch_name}"] - + base_packages = [ + f"git+https://github.com/neuralmagic/research.git@{branch_name}" + ] + if packages is not None: packages = list(set(packages + base_packages)) else: @@ -45,7 +49,6 @@ def __init__( self.branch = branch self.script_path = None self.callable_artifacts = None - @property def id(self): @@ -55,11 +58,10 @@ def id(self): def name(self): return self.task_name - def process_config(self, config): if config is None: return {} - + if config in STANDARD_CONFIGS: return yaml.safe_load(open(STANDARD_CONFIGS[config], "r")) elif os.path.exists(config): @@ -69,44 +71,38 @@ def process_config(self, config): else: return yaml.safe_load(config) - def get_arguments(self): return {} - def set_arguments(self): args = self.get_arguments() if clearml_available: for args_name, args_dict in args.items(): self.task.connect(args_dict, args_name) - - return args + return args def get_configurations(self): return {} - def set_configurations(self): configurations = self.get_configurations() if clearml_available: for name, config in configurations.items(): self.task.connect_configuration(config, name=name) - - return configurations + return configurations def script(self, configurations, args): raise NotImplementedError - def create_task(self): - self.task = Task.create( - project_name=self.project_name, - task_name=self.task_name, - task_type=self.task_type, - docker=self.docker_image, - packages=self.packages, + self.task: Task = Task.create( + project_name=self.project_name, + task_name=self.task_name, + task_type=self.task_type, + docker=self.docker_image, + packages=self.packages, add_task_init_call=True, script=self.script_path, repo="https://github.com/neuralmagic/research.git", @@ -115,25 +111,26 @@ def create_task(self): # To avoid precompiling VLLM when installing from main, add env var self.task.set_base_docker( docker_image=self.docker_image, - docker_arugments="-e VLLM_USE_PRECOMPILED=1", + docker_arguments="-e VLLM_USE_PRECOMPILED=1", ) self.task.output_uri = DEFAULT_OUTPUT_URI self.set_arguments() self.set_configurations() - def get_task_id(self): if self.task is not None: return self.task.id else: - raise ValueError("Task ID not available since ClearML task not yet created. Try task.create_task() firts.") - + raise ValueError( + "Task ID not available since ClearML task not yet created. Try task.create_task() firts." + ) def execute_remotely(self, queue_name): if self.task is None: self.create_task() - self.task.execute_remotely(queue_name=queue_name, clone=False, exit_process=True) - + self.task.execute_remotely( + queue_name=queue_name, clone=False, exit_process=True + ) def execute_locally(self): if clearml_available: @@ -141,8 +138,8 @@ def execute_locally(self): raise Exception("Can only execute locally if task is not yet created.") self.task = Task.init( - project_name=self.project_name, - task_name=self.task_name, + project_name=self.project_name, + task_name=self.task_name, task_type=self.task_type, auto_connect_arg_parser=False, ) @@ -154,4 +151,3 @@ def execute_locally(self): args = self.set_arguments() configurations = self.set_configurations() self.script(configurations, args) - \ No newline at end of file From ad921a01a524db23636a4583c584e14e351e5dd1 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Fri, 17 Oct 2025 13:19:39 -0500 Subject: [PATCH 08/11] networkx pin Signed-off-by: Brian Dellabetta --- src/automation/tasks/llmcompressor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/llmcompressor.py b/src/automation/tasks/llmcompressor.py index 95f77181..b4414506 100644 --- a/src/automation/tasks/llmcompressor.py +++ b/src/automation/tasks/llmcompressor.py @@ -13,6 +13,8 @@ class LLMCompressorTask(BaseTask): "torchvision==0.23.0", "huggingface-hub>=0.34.0,<1.0", "hf_xet", + # Will error out for networkx v3.5+, which has python-requires>=3.11 + "networkx~=3.4.2", ] def __init__( From 697675c95ef5f5951671b78c7ec130416eaaf84d Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Fri, 17 Oct 2025 14:25:35 -0500 Subject: [PATCH 09/11] quip uv Signed-off-by: Brian Dellabetta --- examples/llm_compress_eval_example.py | 65 +++++++++++++++------------ 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/examples/llm_compress_eval_example.py b/examples/llm_compress_eval_example.py index c5d53e87..9560bbab 100644 --- a/examples/llm_compress_eval_example.py +++ b/examples/llm_compress_eval_example.py @@ -47,14 +47,9 @@ def get_rtn_modifier(group_size: int = 128): QuantizationModifier, ) - # TODO: issue in llm-compressor when loading QuantizationModifiers from generated - # yaml --> Please specify either `targets` or `config_groups` - # manually delete for now - modifier = QuantizationModifier( + return QuantizationModifier( config_groups={"group_0": get_w4a16_scheme(group_size)}, ignore=["lm_head"] ) - modifier.targets = None - return modifier def get_gptq_modifier(group_size: int = 128): @@ -62,40 +57,52 @@ def get_gptq_modifier(group_size: int = 128): GPTQModifier, ) - modifier = GPTQModifier( + return GPTQModifier( config_groups={"group_0": get_w4a16_scheme(group_size)}, ignore=["lm_head"] ) - modifier.targets = None - return modifier recipes = { - "RTN_W4A16G128": get_rtn_modifier(128), - "GPTQ_W4A16G128": get_gptq_modifier(128), - "QUIPv_B128_RTN_W4A16G128": [get_quip_modifier(128, ["v"]), get_rtn_modifier(128)], - "QUIPv_B128_GPTQ_W4A16G128": [ - get_quip_modifier(128, ["v"]), + "DENSE": [], + # "RTN_W4A16G128": get_rtn_modifier(128), + # "GPTQ_W4A16G128": get_gptq_modifier(128), + # "QUIPv_B128_RTN_W4A16G128": [get_quip_modifier(128, ["v"]), get_rtn_modifier(128)], + # "QUIPv_B128_GPTQ_W4A16G128": [ + # get_quip_modifier(128, ["v"]), + # get_gptq_modifier(128), + # ], + # "QUIPuv_B128_RTN_W4A16G128": [ + # get_quip_modifier(128, ["u", "v"]), + # get_rtn_modifier(128), + # ], + "QUIPuv_B128_GPTQ_W4A16G128": [ + get_quip_modifier(128, ["u", "v"]), get_gptq_modifier(128), ], - "RTN_W4A16G64": get_rtn_modifier(64), - "GPTQ_W4A16G64": get_gptq_modifier(64), - "QUIPv_B64_RTN_W4A16G64": [get_quip_modifier(64, ["v"]), get_rtn_modifier(64)], - "QUIPv_B64_GPTQ_W4A16G64": [ - get_quip_modifier(64, ["v"]), + # "RTN_W4A16G64": get_rtn_modifier(64), + # "GPTQ_W4A16G64": get_gptq_modifier(64), + # "QUIPv_B64_RTN_W4A16G64": [get_quip_modifier(64, ["v"]), get_rtn_modifier(64)], + # "QUIPv_B64_GPTQ_W4A16G64": [ + # get_quip_modifier(64, ["v"]), + # get_gptq_modifier(64), + # ], + # "QUIPuv_B64_RTN_W4A16G64": [get_quip_modifier(64, ["u", "v"]), get_rtn_modifier(64)], + "QUIPuv_B64_GPTQ_W4A16G64": [ + get_quip_modifier(64, ["u", "v"]), get_gptq_modifier(64), ], - "RTN_W4A16G32": get_rtn_modifier(32), - "GPTQ_W4A16G32": get_gptq_modifier(32), - "QUIPv_B32_RTN_W4A16G32": [get_quip_modifier(32, ["v"]), get_rtn_modifier(32)], - "QUIPv_B32_GPTQ_W4A16G32": [ - get_quip_modifier(32, ["v"]), + # "RTN_W4A16G32": get_rtn_modifier(32), + # "GPTQ_W4A16G32": get_gptq_modifier(32), + # "QUIPv_B32_RTN_W4A16G32": [get_quip_modifier(32, ["v"]), get_rtn_modifier(32)], + # "QUIPv_B32_GPTQ_W4A16G32": [ + # get_quip_modifier(32, ["v"]), + # get_gptq_modifier(32), + # ], + # "QUIPuv_B32_RTN_W4A16G32": [get_quip_modifier(32, ["u", "v"]), get_rtn_modifier(32)], + "QUIPuv_B32_GPTQ_W4A16G32": [ + get_quip_modifier(32, ["u", "v"]), get_gptq_modifier(32), ], - # TODO: Quip U rotations broken in vllm only in clearml env, cannot reproduce locally - # "QUIPu_B128_RTN_W4A16G128": [get_quip_modifier(128, ["u"]), get_rtn_modifier(128)], - # "QUIPu_B128_GPTQ_W4A16G128": [get_quip_modifier(128, ["u"]), get_gptq_modifier(128)], - # "QUIP_B64_RTN_W4A16G64": [get_quip_modifier(64), get_rtn_modifier(64)], - # "QUIP_B64_GPTQ_W4A16G64": [get_quip_modifier(64), get_gptq_modifier(64)], } From 62f2ceb8810c7fd28ca63f3f3eed1384400360ae Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Wed, 22 Oct 2025 15:46:04 -0500 Subject: [PATCH 10/11] spinquant experiments Signed-off-by: Brian Dellabetta --- examples/llm_compress_eval_example.py | 52 ++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/examples/llm_compress_eval_example.py b/examples/llm_compress_eval_example.py index 9560bbab..cd800e18 100644 --- a/examples/llm_compress_eval_example.py +++ b/examples/llm_compress_eval_example.py @@ -1,14 +1,25 @@ from typing import Literal from clearml import Task -# TODO: cannot use PipelineController, fails to clone github.com:neuralmagic/research -# from clearml import PipelineController from automation.pipelines import Pipeline from automation.tasks import LMEvalTask, LLMCompressorTask PROJECT_NAME = "brian_transforms_v1" +def get_spinquant_modifier( + transform_block_size: int | None, + rotations: list[Literal["R1", "R2", "R4"]] = ["R1", "R2"], +): + from llmcompressor.modifiers.transform import SpinQuantModifier + + return SpinQuantModifier( + transform_type="hadamard", + transform_block_size=transform_block_size, + rotations=rotations, + ) + + def get_quip_modifier( transform_block_size: int | None, rotations: list[Literal["u", "v"]] = ["u", "v"] ): @@ -75,8 +86,16 @@ def get_gptq_modifier(group_size: int = 128): # get_quip_modifier(128, ["u", "v"]), # get_rtn_modifier(128), # ], - "QUIPuv_B128_GPTQ_W4A16G128": [ - get_quip_modifier(128, ["u", "v"]), + # "QUIPuv_B128_GPTQ_W4A16G128": [ + # get_quip_modifier(128, ["u", "v"]), + # get_gptq_modifier(128), + # ], + "SpinQuantR1R2_B128_GPTQ_W4A16G128": [ + get_spinquant_modifier(128, ["R1", "R2"]), + get_gptq_modifier(128), + ], + "SpinQuantR1R2R4_B128_GPTQ_W4A16G128": [ + get_spinquant_modifier(128, ["R1", "R2", "R4"]), get_gptq_modifier(128), ], # "RTN_W4A16G64": get_rtn_modifier(64), @@ -87,8 +106,16 @@ def get_gptq_modifier(group_size: int = 128): # get_gptq_modifier(64), # ], # "QUIPuv_B64_RTN_W4A16G64": [get_quip_modifier(64, ["u", "v"]), get_rtn_modifier(64)], - "QUIPuv_B64_GPTQ_W4A16G64": [ - get_quip_modifier(64, ["u", "v"]), + # "QUIPuv_B64_GPTQ_W4A16G64": [ + # get_quip_modifier(64, ["u", "v"]), + # get_gptq_modifier(64), + # ], + "SpinQuantR1R2_B64_GPTQ_W4A16G64": [ + get_spinquant_modifier(64, ["R1", "R2"]), + get_gptq_modifier(64), + ], + "SpinQuantR1R2R4_B64_GPTQ_W4A16G64": [ + get_spinquant_modifier(64, ["R1", "R2", "R4"]), get_gptq_modifier(64), ], # "RTN_W4A16G32": get_rtn_modifier(32), @@ -99,8 +126,16 @@ def get_gptq_modifier(group_size: int = 128): # get_gptq_modifier(32), # ], # "QUIPuv_B32_RTN_W4A16G32": [get_quip_modifier(32, ["u", "v"]), get_rtn_modifier(32)], - "QUIPuv_B32_GPTQ_W4A16G32": [ - get_quip_modifier(32, ["u", "v"]), + # "QUIPuv_B32_GPTQ_W4A16G32": [ + # get_quip_modifier(32, ["u", "v"]), + # get_gptq_modifier(32), + # ], + "SpinQuantR1R2_B32_GPTQ_W4A16G32": [ + get_spinquant_modifier(32, ["R1", "R2"]), + get_gptq_modifier(32), + ], + "SpinQuantR1R2R4_B32_GPTQ_W4A16G32": [ + get_spinquant_modifier(32, ["R1", "R2", "R4"]), get_gptq_modifier(32), ], } @@ -193,4 +228,3 @@ def get_gptq_modifier(group_size: int = 128): ) pipeline.execute_remotely() - # pipeline.execute_locally() From e94c2b572e1f804a2bd17aa49cc0decd5b348a20 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Wed, 22 Oct 2025 16:31:00 -0500 Subject: [PATCH 11/11] spinquant experiments p2 Signed-off-by: Brian Dellabetta --- examples/llm_compress_eval_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llm_compress_eval_example.py b/examples/llm_compress_eval_example.py index cd800e18..bd746e58 100644 --- a/examples/llm_compress_eval_example.py +++ b/examples/llm_compress_eval_example.py @@ -74,7 +74,7 @@ def get_gptq_modifier(group_size: int = 128): recipes = { - "DENSE": [], + # "DENSE": [], # "RTN_W4A16G128": get_rtn_modifier(128), # "GPTQ_W4A16G128": get_gptq_modifier(128), # "QUIPv_B128_RTN_W4A16G128": [get_quip_modifier(128, ["v"]), get_rtn_modifier(128)],