From 757f02b148bfcd32ac375bf4aea651e22f6af52d Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 10:46:34 +0000 Subject: [PATCH 01/96] base semantic gen --- src/automation/tasks/__init__.py | 4 +- .../semantic_similarity_generate_script.py | 171 ++++++++++++++++++ .../tasks/semantic_similarity_generate.py | 122 +++++++++++++ 3 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 src/automation/tasks/scripts/semantic_similarity_generate_script.py create mode 100644 src/automation/tasks/semantic_similarity_generate.py diff --git a/src/automation/tasks/__init__.py b/src/automation/tasks/__init__.py index 62e70841..8f1a496d 100644 --- a/src/automation/tasks/__init__.py +++ b/src/automation/tasks/__init__.py @@ -1,4 +1,5 @@ from automation.tasks.base_task import BaseTask +from automation.tasks.semantic_similarity_generate import SemanticSimilarityGenerateTask from automation.tasks.llmcompressor import LLMCompressorTask from automation.tasks.lmeval import LMEvalTask from automation.tasks.lighteval import LightEvalTask @@ -7,9 +8,10 @@ __all__ = [ "BaseTask", + "SemanticSimilarityGenerateTask", "LLMCompressorTask", "LMEvalTask", "LightEvalTask", "GuideLLMTask", "DebugTask", -] \ No newline at end of file +] diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py new file mode 100644 index 00000000..79371d79 --- /dev/null +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -0,0 +1,171 @@ +import json +import os +from tqdm import tqdm +from datasets import load_dataset +from vllm import LLM, SamplingParams +from transformers import AutoTokenizer + +#from automation.utils import resolve_reference_model_id, parse_argument, load_callable_configuration + +try: + from clearml import OutputModel, Task + clearml_available = True +except ImportError: + clearml_available = False + +""" +def llmcompressor_main( + reference_model_id, + model_class, + trust_remote_code, + recipe, + dataset_args, + dataset_loader, + dataset_name, + max_model_len, + num_samples, + max_new_tokens, + skip_sparsity_compression_stats, + save_directory, + data_collator, +): + dtype = "auto" + device_map = "auto" + + # Load model + model_class = getattr(transformers, model_class) + + model = model_class.from_pretrained( + reference_model_id, + torch_dtype=dtype, + device_map=device_map, + trust_remote_code=trust_remote_code, + ) + + # Load recipe + if isinstance(recipe, str) and os.path.isfile(recipe): + with open(recipe, "r", encoding="utf-8") as file: + recipe = file.read() + + if dataset_args is not None: + if "smoothquant_mappings" in dataset_args and dataset_args["smoothquant_mappings"] in MAPPINGS_PER_MODEL_CONFIG: + dataset_args["smoothquant_mappings"] = MAPPINGS_PER_MODEL_CONFIG[dataset_args["smoothquant_mappings"]] + + for key, value in dataset_args.items(): + recipe = recipe.replace(f"${key}", str(value)) + + # Load dataset + processor = AutoProcessor.from_pretrained( + reference_model_id, + trust_remote_code=trust_remote_code, + ) + + if dataset_loader is None: + if dataset_name is None: + dataset = None + elif dataset_name in SUPPORTED_DATASETS: + dataset = SUPPORTED_DATASETS[dataset_name]( + num_samples=num_samples, + max_new_tokens=max_new_tokens, + max_model_len=max_model_len, + processor=processor, + ) + else: + dataset = dataset_loader( + dataset_name, + num_samples=num_samples, + max_new_tokens=max_new_tokens, + max_model_len=max_model_len, + processor=processor, + ) + + num_calibration_samples = 0 + if num_samples is not None: + num_calibration_samples += num_samples + + if max_new_tokens is not None: + num_calibration_samples += max_new_tokens + + kwargs = {} + if data_collator is not None: + kwargs["data_collator"] = data_collator + + # Apply recipe to the model + oneshot( + model=model, + dataset=dataset, + recipe=recipe, + max_model_length=max_model_len, + num_calibration_samples=num_calibration_samples, + **kwargs, + ) + + # Save model compressed + model.save_pretrained(save_directory, save_compressed=True, skip_sparsity_compression_stats=skip_sparsity_compression_stats) + processor.save_pretrained(save_directory) + + return recipe + +""" + +def main(configurations=None, args=None): + if clearml_available: + task = Task.current_task() + args = task.get_parameters_as_dict(cast=True)["Args"] + else: + args = args["Args"] + + # Parse arguments + clearml_model = parse_argument(args["clearml_model"], bool) + force_download = parse_argument(args["force_download"], bool) + trust_remote_code = parse_argument(args["trust_remote_code"], bool) + reference_model_id = parse_argument(args["reference_model_id"], str) + candidate_model_id= parse_argument(args["candidate_model_id"], str) + dataset_name = parse_argument(args["dataset_name"], str) + save_directory = parse_argument(args["save_directory"], str) + max_model_len = parse_argument(args["max_model_len"], int) + num_samples = parse_argument(args["num_samples"], int) + max_new_tokens = parse_argument(args["max_new_tokens"], int) + dataset_args = args.get("dataset_args", None) + tags = args.get("tags", None) + + """ + + dataset_loader_fn = load_callable_configuration("dataset loader", configurations) + data_collator_fn = load_callable_configuration("data collator", configurations) + + # Resolve reference_model_id + reference_model_id = resolve_reference_model_id(reference_model_id, clearml_model, force_download, model_class) + + recipe = llmcompressor_main( + reference_model_id, + model_class, + trust_remote_code, + recipe, + dataset_args, + dataset_loader_fn, + dataset_name, + max_model_len, + num_samples, + max_new_tokens, + skip_sparsity_compression_stats, + save_directory, + data_collator_fn, + ) + + + if clearml_available: + task.upload_artifact("recipe", recipe) + + # Upload model to ClearML + clearml_model_object = OutputModel( + task=task, + name=task.name, + framework="PyTorch", + tags=[tags] if isinstance(tags, str) else tags or [] + ) + clearml_model_object.update_weights(weights_filename=save_directory, auto_delete_file=False) + """ + +if __name__ == '__main__': + main() diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py new file mode 100644 index 00000000..47bf6019 --- /dev/null +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -0,0 +1,122 @@ +from automation.tasks.base_task import BaseTask +from automation.configs import DEFAULT_DOCKER_IMAGE +#from automation.utils import serialize_callable +from typing import Union, List, Optional, Sequence, Any, Callable +import os +import yaml + +class SemanticSimilarityGenerateTask(BaseTask): + task_packages = [ + "vllm==0.10.1.1", + "datasets==4.2.0", + "rouge_score==0.1.2", + "bert-score==0.3.13", + "sentence-transformers==5.1.1", + "pyzmq==27.1.0", + ] + + def __init__( + self, + project_name: str, + task_name: str, + reference_model_id: str, + branch: str, + candidate_model_id: str, + sts_model_id: str, + dataset_args: Optional[dict]=None, + docker_image: str=DEFAULT_DOCKER_IMAGE, + packages: Optional[Sequence[str]]=None, + clearml_model: bool=False, + force_download: bool=False, + save_directory: str="output", + num_samples: Optional[int]=330, + max_new_tokens: int=1024, + max_model_len: int=4096, + trust_remote_code: bool=False, + tags: Union[str, List[str]]=None, + task_type: str="training", + config: Optional[str]=None, + ): + + # Process config + config_kwargs = self.process_config(config) + + # Set packages, taking into account default packages + # for the LMEvalTask and packages set in the config + if packages is not None: + packages = list(set(packages + self.task_packages)) + else: + packages = self.task_packages + + if "packages" in config_kwargs: + packages = list(set(packages + config_kwargs.pop("packages"))) + + # Initialize base parameters + super().__init__( + project_name=project_name, + task_name=task_name, + branch=branch, + docker_image=docker_image, + packages=packages, + task_type=task_type, + ) + + + if dataset_args is None: + self.dataset_args = config_kwargs.pop("dataset_args", None) + else: + config_dataset_args = config_kwargs.pop("dataset_args", {}) + config_dataset_args.update(dataset_args) + self.dataset_args = config_dataset_args + + self.num_samples = config_kwargs.pop("num_samples", num_samples) + self.max_new_tokens = config_kwargs.pop("max_new_tokens", max_new_tokens) + self.max_model_len = config_kwargs.pop("max_model_len", max_model_len) + self.trust_remote_code = config_kwargs.pop("trust_remote_code", trust_remote_code) + self.sts_model_id = sts_model_id + + if tags is not None: + tags = list(set(config_kwargs.pop("tags", []).extend(tags))) + else: + tags = config_kwargs.pop("tags", None) + self.tags = tags + + # Store class attributes + self.reference_model_id = reference_model_id + self.candidate_model_id = candidate_model_id + self.clearml_model = clearml_model + self.force_download = force_download + self.save_directory = save_directory + self.script_path = os.path.join(".", "src", "automation", "tasks", "scripts", "semantic_similarity_generate_script.py") + + + def script(self, configurations, args): + from automation.tasks.scripts.semantic_similarity_generate_script import main + main(configurations, args) + + + def get_configurations(self): + configs = {} + return configs + + + def get_arguments(self): + return { + "Args": { + "reference_model_id": self.reference_model_id, + "candidate_model_id": self.candidate_model_id, + "dataset_args": self.dataset_args, + "sts_model_id": self.sts_model_id, + "clearml_model": self.clearml_model, + "force_download": self.force_download, + "save_directory": self.save_directory, + "num_samples": self.num_samples, + "max_new_tokens": self.max_new_tokens, + "max_model_len": self.max_model_len, + "trust_remote_code": self.trust_remote_code, + "skip_sparsity_compression_stats": self.skip_sparsity_compression_stats, + "tags": self.tags, + }, + } + + From 95d93d2c6bc573c5cb37e5d2dc19963593172ef5 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 11:01:28 +0000 Subject: [PATCH 02/96] base requirements --- .../tasks/semantic_similarity_generate.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index 47bf6019..71db49e3 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -8,11 +8,18 @@ class SemanticSimilarityGenerateTask(BaseTask): task_packages = [ "vllm==0.10.1.1", - "datasets==4.2.0", - "rouge_score==0.1.2", - "bert-score==0.3.13", - "sentence-transformers==5.1.1", - "pyzmq==27.1.0", + "datasets", + "rouge_score", + "bert-score", + "sentence-transformers", + "pyzmq", + "hf_xet", + #"vllm==0.10.1.1", + #"datasets==4.2.0", + #"rouge_score==0.1.2", + #"bert-score==0.3.13", + #"sentence-transformers==5.1.1", + #"pyzmq==27.1.0", ] def __init__( @@ -114,7 +121,6 @@ def get_arguments(self): "max_new_tokens": self.max_new_tokens, "max_model_len": self.max_model_len, "trust_remote_code": self.trust_remote_code, - "skip_sparsity_compression_stats": self.skip_sparsity_compression_stats, "tags": self.tags, }, } From 8b7bf3c78119ec8c4d59ba8f7c29eb96e7b0109e Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 11:08:25 +0000 Subject: [PATCH 03/96] simple package --- src/automation/tasks/semantic_similarity_generate.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index 71db49e3..c1d98683 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -8,12 +8,12 @@ class SemanticSimilarityGenerateTask(BaseTask): task_packages = [ "vllm==0.10.1.1", - "datasets", - "rouge_score", - "bert-score", - "sentence-transformers", - "pyzmq", "hf_xet", + "pyzmq", + #"datasets", + #"rouge_score", + #"bert-score", + #"sentence-transformers", #"vllm==0.10.1.1", #"datasets==4.2.0", #"rouge_score==0.1.2", From 5e5276c8fd8a6854a5cc48dc3e3005031dae2273 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 17:16:32 +0000 Subject: [PATCH 04/96] base method added --- .../semantic_similarity_generate_script.py | 200 +++++++++--------- 1 file changed, 102 insertions(+), 98 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 79371d79..712ee32d 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -5,7 +5,7 @@ from vllm import LLM, SamplingParams from transformers import AutoTokenizer -#from automation.utils import resolve_reference_model_id, parse_argument, load_callable_configuration +from automation.utils import resolve_reference_model_id, parse_argument, load_callable_configuration try: from clearml import OutputModel, Task @@ -13,100 +13,105 @@ except ImportError: clearml_available = False -""" -def llmcompressor_main( + +OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") + +def parse_argument(a,b): + return a + + +def make_alpaca_platypus_prompt(sample): + print("Using Alpaca / Platypus style prompt") + instruction = sample["instruction"].strip() + input_text = sample.get("input", "").strip() + prompt = ( + f"### Instruction:\n{instruction}\n\n" + f"### Input:\n{input_text if input_text else 'N/A'}\n\n" + f"### Response:\n" + ) + + return prompt + + +def make_tulu_prompt(sample): + print("Using Tulu / OASST style prompt") + msgs = [] + for m in sample["messages"]: + role = m.get("role", "user") + content = m.get("content", "").strip() + msgs.append(f"{role.upper()}: {content}") + joined = "\n".join(msgs) + prompt = f"### Conversation:\n{joined}\n\n### Response:\n" + + return prompt + + +def make_default_prompt(sample): + print("Using default prompt") + prompt = f"### Input:\n{json.dumps(sample)}\n\n### Response:\n" + return prompt + + +def semantic_similarity_generate_main( reference_model_id, - model_class, trust_remote_code, - recipe, dataset_args, - dataset_loader, - dataset_name, max_model_len, - num_samples, max_new_tokens, - skip_sparsity_compression_stats, + num_samples, save_directory, - data_collator, ): dtype = "auto" device_map = "auto" - # Load model - model_class = getattr(transformers, model_class) - - model = model_class.from_pretrained( - reference_model_id, - torch_dtype=dtype, - device_map=device_map, - trust_remote_code=trust_remote_code, + from collections import defaultdict + all_prompts = [] + all_samples_dict = defaultdict(list) + + print(">>> Loading dataset...") + for dataset_name,dataset_path in dataset_args.items(): + print(f">>> Loading dataset {dataset_name}...") + dataset = load_dataset(dataset_path, split=f"train[:{num_samples}]") + all_samples_dict[dataset_name].extend(dataset) + + for dataset_name,dataset_samples in all_samples_dict.items(): + print(f">>> Loading values for {dataset_name}...") + for sample in dataset_samples: + if dataset_name == "alpaca" or (dataset_name == "openplatypus"): + prompt = make_alpaca_platypus_prompt(sample) + elif dataset_name == "tulu": + prompt = make_tulu_prompt(sample) + else: + print("Using default prompt") + prompt = make_default_prompt(sample) + all_prompts.append(prompt) + + TEMPERATURE = 0.0 + + print(">>> Loading tokenizer...") + tokenizer = AutoTokenizer.from_pretrained(reference_model_id, trust_remote_code=True) + + print(">>> Initializing vLLM...") + llm = LLM( + model=reference_model_id, + dtype="auto", + trust_remote_code=True, + tensor_parallel_size=8, + enforce_eager=True, + enable_chunked_prefill=True, + max_model_len=max_model_len ) - # Load recipe - if isinstance(recipe, str) and os.path.isfile(recipe): - with open(recipe, "r", encoding="utf-8") as file: - recipe = file.read() - - if dataset_args is not None: - if "smoothquant_mappings" in dataset_args and dataset_args["smoothquant_mappings"] in MAPPINGS_PER_MODEL_CONFIG: - dataset_args["smoothquant_mappings"] = MAPPINGS_PER_MODEL_CONFIG[dataset_args["smoothquant_mappings"]] - - for key, value in dataset_args.items(): - recipe = recipe.replace(f"${key}", str(value)) - - # Load dataset - processor = AutoProcessor.from_pretrained( - reference_model_id, - trust_remote_code=trust_remote_code, + sampling_params = SamplingParams( + temperature=TEMPERATURE, + max_tokens=max_new_tokens, + stop=["### Instruction:", "### Input:", "### Response:"], ) - if dataset_loader is None: - if dataset_name is None: - dataset = None - elif dataset_name in SUPPORTED_DATASETS: - dataset = SUPPORTED_DATASETS[dataset_name]( - num_samples=num_samples, - max_new_tokens=max_new_tokens, - max_model_len=max_model_len, - processor=processor, - ) - else: - dataset = dataset_loader( - dataset_name, - num_samples=num_samples, - max_new_tokens=max_new_tokens, - max_model_len=max_model_len, - processor=processor, - ) - - num_calibration_samples = 0 - if num_samples is not None: - num_calibration_samples += num_samples - - if max_new_tokens is not None: - num_calibration_samples += max_new_tokens - - kwargs = {} - if data_collator is not None: - kwargs["data_collator"] = data_collator - - # Apply recipe to the model - oneshot( - model=model, - dataset=dataset, - recipe=recipe, - max_model_length=max_model_len, - num_calibration_samples=num_calibration_samples, - **kwargs, - ) - - # Save model compressed - model.save_pretrained(save_directory, save_compressed=True, skip_sparsity_compression_stats=skip_sparsity_compression_stats) - processor.save_pretrained(save_directory) - - return recipe + print(">>> Running vLLM generation...") + outputs = llm.generate(all_prompts, sampling_params) -""" + return all_prompts, outputs def main(configurations=None, args=None): if clearml_available: @@ -129,34 +134,33 @@ def main(configurations=None, args=None): dataset_args = args.get("dataset_args", None) tags = args.get("tags", None) - """ - - dataset_loader_fn = load_callable_configuration("dataset loader", configurations) - data_collator_fn = load_callable_configuration("data collator", configurations) - - # Resolve reference_model_id - reference_model_id = resolve_reference_model_id(reference_model_id, clearml_model, force_download, model_class) - - recipe = llmcompressor_main( + all_prompts, outputs = semantic_similarity_generate_main( reference_model_id, - model_class, trust_remote_code, - recipe, dataset_args, - dataset_loader_fn, - dataset_name, max_model_len, - num_samples, max_new_tokens, - skip_sparsity_compression_stats, + num_samples, save_directory, - data_collator_fn, ) + OUTPUT_FILE = os.path.join(OUTPUT_DIR,f"{reference_model_id.replace('/', '_')}.jsonl") + print(">>> Writing outputs to file...") + with open(OUTPUT_FILE, "w") as fout: + for idx, (prompt, output) in enumerate(zip(all_prompts, outputs)): + response = output.outputs[0].text.strip() + fout.write(json.dumps({ + "index": idx, + "prompt": prompt, + "response": response + }) + "\n") + + print(f">>> Completed. Saved {len(outputs)} outputs to {OUTPUT_FILE}") if clearml_available: - task.upload_artifact("recipe", recipe) + task.upload_artifact("jsonl_output", OUTPUT_FILE) + """ # Upload model to ClearML clearml_model_object = OutputModel( task=task, @@ -165,7 +169,7 @@ def main(configurations=None, args=None): tags=[tags] if isinstance(tags, str) else tags or [] ) clearml_model_object.update_weights(weights_filename=save_directory, auto_delete_file=False) - """ + """ if __name__ == '__main__': main() From b8ec69689cf06574c75e9e30b1eb36ed3747b399 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 17:21:31 +0000 Subject: [PATCH 05/96] remove missing libraru inserts --- .../tasks/scripts/semantic_similarity_generate_script.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 712ee32d..d8b8b2f9 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -5,7 +5,7 @@ from vllm import LLM, SamplingParams from transformers import AutoTokenizer -from automation.utils import resolve_reference_model_id, parse_argument, load_callable_configuration +from automation.utils import parse_argument try: from clearml import OutputModel, Task @@ -16,10 +16,6 @@ OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") -def parse_argument(a,b): - return a - - def make_alpaca_platypus_prompt(sample): print("Using Alpaca / Platypus style prompt") instruction = sample["instruction"].strip() From 113f0df4f74bc87b69f182f1fbc60628cf746bfa Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 17:26:40 +0000 Subject: [PATCH 06/96] clean up variables --- .../scripts/semantic_similarity_generate_script.py | 14 ++++++-------- .../tasks/semantic_similarity_generate.py | 13 +++---------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index d8b8b2f9..c7bb3e13 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -49,7 +49,7 @@ def make_default_prompt(sample): def semantic_similarity_generate_main( - reference_model_id, + model_id, trust_remote_code, dataset_args, max_model_len, @@ -85,11 +85,11 @@ def semantic_similarity_generate_main( TEMPERATURE = 0.0 print(">>> Loading tokenizer...") - tokenizer = AutoTokenizer.from_pretrained(reference_model_id, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) print(">>> Initializing vLLM...") llm = LLM( - model=reference_model_id, + model=model_id, dtype="auto", trust_remote_code=True, tensor_parallel_size=8, @@ -120,9 +120,7 @@ def main(configurations=None, args=None): clearml_model = parse_argument(args["clearml_model"], bool) force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) - reference_model_id = parse_argument(args["reference_model_id"], str) - candidate_model_id= parse_argument(args["candidate_model_id"], str) - dataset_name = parse_argument(args["dataset_name"], str) + model_id = parse_argument(args["model_id"], str) save_directory = parse_argument(args["save_directory"], str) max_model_len = parse_argument(args["max_model_len"], int) num_samples = parse_argument(args["num_samples"], int) @@ -131,7 +129,7 @@ def main(configurations=None, args=None): tags = args.get("tags", None) all_prompts, outputs = semantic_similarity_generate_main( - reference_model_id, + model_id, trust_remote_code, dataset_args, max_model_len, @@ -140,7 +138,7 @@ def main(configurations=None, args=None): save_directory, ) - OUTPUT_FILE = os.path.join(OUTPUT_DIR,f"{reference_model_id.replace('/', '_')}.jsonl") + OUTPUT_FILE = os.path.join(OUTPUT_DIR,f"{model_id.replace('/', '_')}.jsonl") print(">>> Writing outputs to file...") with open(OUTPUT_FILE, "w") as fout: for idx, (prompt, output) in enumerate(zip(all_prompts, outputs)): diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index c1d98683..fb68600d 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -1,6 +1,5 @@ from automation.tasks.base_task import BaseTask from automation.configs import DEFAULT_DOCKER_IMAGE -#from automation.utils import serialize_callable from typing import Union, List, Optional, Sequence, Any, Callable import os import yaml @@ -26,10 +25,8 @@ def __init__( self, project_name: str, task_name: str, - reference_model_id: str, + model_id: str, branch: str, - candidate_model_id: str, - sts_model_id: str, dataset_args: Optional[dict]=None, docker_image: str=DEFAULT_DOCKER_IMAGE, packages: Optional[Sequence[str]]=None, @@ -80,7 +77,6 @@ def __init__( self.max_new_tokens = config_kwargs.pop("max_new_tokens", max_new_tokens) self.max_model_len = config_kwargs.pop("max_model_len", max_model_len) self.trust_remote_code = config_kwargs.pop("trust_remote_code", trust_remote_code) - self.sts_model_id = sts_model_id if tags is not None: tags = list(set(config_kwargs.pop("tags", []).extend(tags))) @@ -89,8 +85,7 @@ def __init__( self.tags = tags # Store class attributes - self.reference_model_id = reference_model_id - self.candidate_model_id = candidate_model_id + self.model_id = model_id self.clearml_model = clearml_model self.force_download = force_download self.save_directory = save_directory @@ -110,10 +105,8 @@ def get_configurations(self): def get_arguments(self): return { "Args": { - "reference_model_id": self.reference_model_id, - "candidate_model_id": self.candidate_model_id, + "model_id": self.model_id, "dataset_args": self.dataset_args, - "sts_model_id": self.sts_model_id, "clearml_model": self.clearml_model, "force_download": self.force_download, "save_directory": self.save_directory, From aa2de2947e441d8c508088322ec39ba98b783826 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 17:33:16 +0000 Subject: [PATCH 07/96] clean up input variables --- src/automation/tasks/semantic_similarity_generate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index fb68600d..74692c21 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -27,15 +27,15 @@ def __init__( task_name: str, model_id: str, branch: str, + max_new_tokens: int, + max_model_len: int, + num_samples: Optional[int], dataset_args: Optional[dict]=None, docker_image: str=DEFAULT_DOCKER_IMAGE, packages: Optional[Sequence[str]]=None, clearml_model: bool=False, force_download: bool=False, save_directory: str="output", - num_samples: Optional[int]=330, - max_new_tokens: int=1024, - max_model_len: int=4096, trust_remote_code: bool=False, tags: Union[str, List[str]]=None, task_type: str="training", From 02d6087519fa2f1e0407147a0d8b38e93081326b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 17:48:19 +0000 Subject: [PATCH 08/96] clean up prompt logs --- .../tasks/scripts/semantic_similarity_generate_script.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index c7bb3e13..db0dccf6 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -17,7 +17,6 @@ OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") def make_alpaca_platypus_prompt(sample): - print("Using Alpaca / Platypus style prompt") instruction = sample["instruction"].strip() input_text = sample.get("input", "").strip() prompt = ( @@ -30,7 +29,6 @@ def make_alpaca_platypus_prompt(sample): def make_tulu_prompt(sample): - print("Using Tulu / OASST style prompt") msgs = [] for m in sample["messages"]: role = m.get("role", "user") @@ -43,7 +41,6 @@ def make_tulu_prompt(sample): def make_default_prompt(sample): - print("Using default prompt") prompt = f"### Input:\n{json.dumps(sample)}\n\n### Response:\n" return prompt @@ -59,6 +56,7 @@ def semantic_similarity_generate_main( ): dtype = "auto" device_map = "auto" + TEMPERATURE = 0.0 from collections import defaultdict all_prompts = [] @@ -82,16 +80,15 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) - TEMPERATURE = 0.0 print(">>> Loading tokenizer...") - tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) print(">>> Initializing vLLM...") llm = LLM( model=model_id, dtype="auto", - trust_remote_code=True, + trust_remote_code=trust_remote_code, tensor_parallel_size=8, enforce_eager=True, enable_chunked_prefill=True, From 9faf81dffcda9981e6fd9602eff8272c72003045 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 21:39:04 +0000 Subject: [PATCH 09/96] fix device count --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index db0dccf6..9d149b3d 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -1,5 +1,6 @@ import json import os +from torch.cuda import device_count from tqdm import tqdm from datasets import load_dataset from vllm import LLM, SamplingParams @@ -89,7 +90,7 @@ def semantic_similarity_generate_main( model=model_id, dtype="auto", trust_remote_code=trust_remote_code, - tensor_parallel_size=8, + tensor_parallel_size=device_count(), enforce_eager=True, enable_chunked_prefill=True, max_model_len=max_model_len From 8e407a02c4c093fb2edefe0e33b2e7bff4a8d282 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 15 Oct 2025 23:37:14 +0000 Subject: [PATCH 10/96] add semantic_similarity_args --- .../semantic_similarity_generate_script.py | 18 ++++++------------ .../tasks/semantic_similarity_generate.py | 9 +++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 9d149b3d..71d8f2ee 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -88,7 +88,7 @@ def semantic_similarity_generate_main( print(">>> Initializing vLLM...") llm = LLM( model=model_id, - dtype="auto", + dtype=dtype, trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), enforce_eager=True, @@ -124,8 +124,12 @@ def main(configurations=None, args=None): num_samples = parse_argument(args["num_samples"], int) max_new_tokens = parse_argument(args["max_new_tokens"], int) dataset_args = args.get("dataset_args", None) + semantic_similarity_args= args.get("semantic_similarity_args", None) tags = args.get("tags", None) + print(semantic_similarity_args) + """ + all_prompts, outputs = semantic_similarity_generate_main( model_id, trust_remote_code, @@ -151,17 +155,7 @@ def main(configurations=None, args=None): if clearml_available: task.upload_artifact("jsonl_output", OUTPUT_FILE) - - """ - # Upload model to ClearML - clearml_model_object = OutputModel( - task=task, - name=task.name, - framework="PyTorch", - tags=[tags] if isinstance(tags, str) else tags or [] - ) - clearml_model_object.update_weights(weights_filename=save_directory, auto_delete_file=False) - """ + """ if __name__ == '__main__': main() diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index 74692c21..fe77ae5f 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -31,6 +31,7 @@ def __init__( max_model_len: int, num_samples: Optional[int], dataset_args: Optional[dict]=None, + semantic_similarity_args: Optional[dict]=None, docker_image: str=DEFAULT_DOCKER_IMAGE, packages: Optional[Sequence[str]]=None, clearml_model: bool=False, @@ -73,6 +74,13 @@ def __init__( config_dataset_args.update(dataset_args) self.dataset_args = config_dataset_args + if semantic_similarity_args is None: + self.semantic_similarity_args = config_kwargs.pop("semantic_similarity_args", None) + else: + config_semantic_similarity_args = config_kwargs.pop("semantic_similarity_args", {}) + config_semantic_similarity_args.update(semantic_similarity_args) + self.semantic_similarity_args = config_semantic_similarity_args + self.num_samples = config_kwargs.pop("num_samples", num_samples) self.max_new_tokens = config_kwargs.pop("max_new_tokens", max_new_tokens) self.max_model_len = config_kwargs.pop("max_model_len", max_model_len) @@ -107,6 +115,7 @@ def get_arguments(self): "Args": { "model_id": self.model_id, "dataset_args": self.dataset_args, + "semantic_similarity_args": self.semantic_similarity_args, "clearml_model": self.clearml_model, "force_download": self.force_download, "save_directory": self.save_directory, From 82eaa9a27f64d0550fa23d62c71982bb19d2aac8 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 00:09:15 +0000 Subject: [PATCH 11/96] update model input vars --- .../semantic_similarity_generate_script.py | 25 ++++++++----------- .../tasks/semantic_similarity_generate.py | 6 ++--- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 71d8f2ee..6792d616 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -50,15 +50,12 @@ def semantic_similarity_generate_main( model_id, trust_remote_code, dataset_args, + semantic_similarity_args, max_model_len, max_new_tokens, - num_samples, + num_samples_per_dataset, save_directory, ): - dtype = "auto" - device_map = "auto" - TEMPERATURE = 0.0 - from collections import defaultdict all_prompts = [] all_samples_dict = defaultdict(list) @@ -66,7 +63,7 @@ def semantic_similarity_generate_main( print(">>> Loading dataset...") for dataset_name,dataset_path in dataset_args.items(): print(f">>> Loading dataset {dataset_name}...") - dataset = load_dataset(dataset_path, split=f"train[:{num_samples}]") + dataset = load_dataset(dataset_path, split=f"train[:{num_samples_per_dataset}]") all_samples_dict[dataset_name].extend(dataset) for dataset_name,dataset_samples in all_samples_dict.items(): @@ -88,16 +85,16 @@ def semantic_similarity_generate_main( print(">>> Initializing vLLM...") llm = LLM( model=model_id, - dtype=dtype, + dtype=semantic_similarity_args.get("dtype", "auto"), trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), - enforce_eager=True, - enable_chunked_prefill=True, + enforce_eager=semantic_similarity_args.get("enforce_eager", True), + enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), max_model_len=max_model_len ) sampling_params = SamplingParams( - temperature=TEMPERATURE, + temperature=semantic_similarity_args.get("temperature", 0.0), max_tokens=max_new_tokens, stop=["### Instruction:", "### Input:", "### Response:"], ) @@ -121,22 +118,21 @@ def main(configurations=None, args=None): model_id = parse_argument(args["model_id"], str) save_directory = parse_argument(args["save_directory"], str) max_model_len = parse_argument(args["max_model_len"], int) - num_samples = parse_argument(args["num_samples"], int) + num_samples_per_dataset = parse_argument(args["num_samples"], int) max_new_tokens = parse_argument(args["max_new_tokens"], int) dataset_args = args.get("dataset_args", None) semantic_similarity_args= args.get("semantic_similarity_args", None) tags = args.get("tags", None) print(semantic_similarity_args) - """ - all_prompts, outputs = semantic_similarity_generate_main( model_id, trust_remote_code, dataset_args, + semantic_similarity_args, max_model_len, max_new_tokens, - num_samples, + num_samples_per_dataset, save_directory, ) @@ -155,7 +151,6 @@ def main(configurations=None, args=None): if clearml_available: task.upload_artifact("jsonl_output", OUTPUT_FILE) - """ if __name__ == '__main__': main() diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index fe77ae5f..f6d6345f 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -29,7 +29,7 @@ def __init__( branch: str, max_new_tokens: int, max_model_len: int, - num_samples: Optional[int], + num_samples_per_dataset: Optional[int], dataset_args: Optional[dict]=None, semantic_similarity_args: Optional[dict]=None, docker_image: str=DEFAULT_DOCKER_IMAGE, @@ -81,7 +81,7 @@ def __init__( config_semantic_similarity_args.update(semantic_similarity_args) self.semantic_similarity_args = config_semantic_similarity_args - self.num_samples = config_kwargs.pop("num_samples", num_samples) + self.num_samples_per_dataset = config_kwargs.pop("num_samples_per_dataset", num_samples_per_dataset) self.max_new_tokens = config_kwargs.pop("max_new_tokens", max_new_tokens) self.max_model_len = config_kwargs.pop("max_model_len", max_model_len) self.trust_remote_code = config_kwargs.pop("trust_remote_code", trust_remote_code) @@ -119,7 +119,7 @@ def get_arguments(self): "clearml_model": self.clearml_model, "force_download": self.force_download, "save_directory": self.save_directory, - "num_samples": self.num_samples, + "num_samples_per_dataset": self.num_samples_per_dataset, "max_new_tokens": self.max_new_tokens, "max_model_len": self.max_model_len, "trust_remote_code": self.trust_remote_code, From 70d0ad48d51b0b7f9780e485c223aebc0a194862 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 00:26:20 +0000 Subject: [PATCH 12/96] added more log --- .../tasks/scripts/semantic_similarity_generate_script.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 6792d616..ddb4f582 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -3,7 +3,7 @@ from torch.cuda import device_count from tqdm import tqdm from datasets import load_dataset -from vllm import LLM, SamplingParams +from vllm import LLM, SamplingParams, logs from transformers import AutoTokenizer from automation.utils import parse_argument @@ -78,6 +78,7 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) + logs.set_level("INFO") print(">>> Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) @@ -93,11 +94,14 @@ def semantic_similarity_generate_main( max_model_len=max_model_len ) + print("Completed the model initialization ") + sampling_params = SamplingParams( temperature=semantic_similarity_args.get("temperature", 0.0), max_tokens=max_new_tokens, stop=["### Instruction:", "### Input:", "### Response:"], ) + print("Define sampling parameters") print(">>> Running vLLM generation...") outputs = llm.generate(all_prompts, sampling_params) From bf7d8176beaa2e512c24f5c704c24761fc6ba41a Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 01:04:44 +0000 Subject: [PATCH 13/96] initialize vllm --- .../scripts/semantic_similarity_generate_script.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index ddb4f582..91b58b72 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -3,7 +3,7 @@ from torch.cuda import device_count from tqdm import tqdm from datasets import load_dataset -from vllm import LLM, SamplingParams, logs +from vllm import LLM, SamplingParams from transformers import AutoTokenizer from automation.utils import parse_argument @@ -78,7 +78,6 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) - logs.set_level("INFO") print(">>> Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) @@ -86,12 +85,12 @@ def semantic_similarity_generate_main( print(">>> Initializing vLLM...") llm = LLM( model=model_id, - dtype=semantic_similarity_args.get("dtype", "auto"), - trust_remote_code=trust_remote_code, + #dtype=semantic_similarity_args.get("dtype", "auto"), + #trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), - enforce_eager=semantic_similarity_args.get("enforce_eager", True), - enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), - max_model_len=max_model_len + #enforce_eager=semantic_similarity_args.get("enforce_eager", True), + #enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), + #max_model_len=max_model_len ) print("Completed the model initialization ") From 1d85fa65fe226a10324f96cb21c78ab6f8283621 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 01:11:03 +0000 Subject: [PATCH 14/96] download model beforehand --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 91b58b72..33987089 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -82,6 +82,9 @@ def semantic_similarity_generate_main( print(">>> Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) + from huggingface_hub import snapshot_download + snapshot_download(repo_id=model_id) + print(">>> Initializing vLLM...") llm = LLM( model=model_id, From 6be969df5baaaea1d1d6d30c780f0096610ca02a Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 10:00:27 +0000 Subject: [PATCH 15/96] template score --- src/automation/tasks/__init__.py | 2 + .../semantic_similarity_score_script.py | 149 ++++++++++++++++++ .../tasks/semantic_similarity_score.py | 122 ++++++++++++++ 3 files changed, 273 insertions(+) create mode 100644 src/automation/tasks/scripts/semantic_similarity_score_script.py create mode 100644 src/automation/tasks/semantic_similarity_score.py diff --git a/src/automation/tasks/__init__.py b/src/automation/tasks/__init__.py index 8f1a496d..baae3580 100644 --- a/src/automation/tasks/__init__.py +++ b/src/automation/tasks/__init__.py @@ -1,5 +1,6 @@ from automation.tasks.base_task import BaseTask from automation.tasks.semantic_similarity_generate import SemanticSimilarityGenerateTask +from automation.tasks.semantic_similarity_score import SemanticSimilarityScoreTask from automation.tasks.llmcompressor import LLMCompressorTask from automation.tasks.lmeval import LMEvalTask from automation.tasks.lighteval import LightEvalTask @@ -9,6 +10,7 @@ __all__ = [ "BaseTask", "SemanticSimilarityGenerateTask", + "SemanticSimilarityScoreTask", "LLMCompressorTask", "LMEvalTask", "LightEvalTask", diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py new file mode 100644 index 00000000..08cd8030 --- /dev/null +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -0,0 +1,149 @@ +import json +from tqdm import tqdm +import os +from bert_score import score +from rouge_score import rouge_scorer +from sentence_transformers import SentenceTransformer, util + +#from automation.utils import parse_argument + +try: + from clearml import OutputModel, Task + clearml_available = True +except ImportError: + clearml_available = False + +OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") + + +def parse_argument( + a, + b, +): + return a + + +def semantic_similarity_score_main( + trust_remote_code, + sts_model_id, + rouge_scores, + save_directory, +): + from collections import defaultdict + all_prompts = [] + all_samples_dict = defaultdict(list) + + print(">>> Loading dataset...") + for dataset_name,dataset_path in sts_model_id.items(): + print(f">>> Loading dataset {dataset_name}...") + dataset = load_dataset(dataset_path, split=f"train[:{candidate_model_task_name}]") + all_samples_dict[dataset_name].extend(dataset) + + for dataset_name,dataset_samples in all_samples_dict.items(): + print(f">>> Loading values for {dataset_name}...") + for sample in dataset_samples: + if dataset_name == "alpaca" or (dataset_name == "openplatypus"): + prompt = make_alpaca_platypus_prompt(sample) + elif dataset_name == "tulu": + prompt = make_tulu_prompt(sample) + else: + print("Using default prompt") + prompt = make_default_prompt(sample) + all_prompts.append(prompt) + + + print(">>> Loading tokenizer...") + tokenizer = AutoTokenizer.from_pretrained(reference_model_project_name, trust_remote_code= trust_remote_code) + + from huggingface_hub import snapshot_download + snapshot_download(repo_id=reference_model_project_name) + + print(">>> Initializing vLLM...") + llm = LLM( + model=reference_model_project_name, + #dtype=rouge_scores.get("dtype", "auto"), + #trust_remote_code=trust_remote_code, + tensor_parallel_size=device_count(), + #enforce_eager=rouge_scores.get("enforce_eager", True), + #enable_chunked_prefill=rouge_scores.get("enable_chunked_prefill", True), + #candidate_model_project_name=candidate_model_project_name + ) + + print("Completed the model initialization ") + + sampling_params = SamplingParams( + temperature=rouge_scores.get("temperature", 0.0), + max_tokens=reference_model_task_name, + stop=["### Instruction:", "### Input:", "### Response:"], + ) + print("Define sampling parameters") + + print(">>> Running vLLM generation...") + outputs = llm.generate(all_prompts, sampling_params) + + return all_prompts, outputs + +def main(configurations=None, args=None): + if clearml_available: + task = Task.current_task() + args = task.get_parameters_as_dict(cast=True)["Args"] + else: + args = args["Args"] + + # Parse arguments + clearml_model = parse_argument(args["clearml_model"], bool) + force_download = parse_argument(args["force_download"], bool) + trust_remote_code = parse_argument(args["trust_remote_code"], bool) + reference_model_project_name = parse_argument(args["reference_model_project_name"], str) + candidate_model_project_name = parse_argument(args["candidate_model_project_name"], int) + candidate_model_task_name = parse_argument(args["candidate_model_task_name"], int) + reference_model_task_name = parse_argument(args["reference_model_task_name"], int) + sts_model_id = args.get("sts_model_id", str) + rouge_scores= args.get("rouge_scores", list) + save_directory = parse_argument(args["save_directory"], str) + tags = args.get("tags", None) + + + print(args) + + """ + + if clearml_available: + reference_task = Task.query_tasks(project_name=reference_model_project_name,task_name= reference_model_task_name, task_filter={'order_by': ['-last_update'], 'status': ['completed'] }) + reference_task = Task.get_task(reference_task[0]) + reference_artifact_obj = reference_task.artifacts['jsonl model'].get_local_copy() + + candidate_task = Task.query_tasks(project_name=candidate_model_project_name,task_name= candidate_model_task_name, task_filter={'order_by': ['-last_update'], 'status': ['completed'] }) + candidate_task = Task.get_task(candidate_task[0]) + candidate_artifact_obj = candidate_task.artifacts['jsonl model'].get_local_copy() + + else: + reference_artifact_obj = None + candidate_artifact_obj = None + + all_prompts, outputs = semantic_similarity_score_main( + sts_model_id, + rouge_scores, + trust_remote_code, + save_directory, + ) + + OUTPUT_FILE = os.path.join(OUTPUT_DIR,f"{reference_model_project_name.replace('/', '_')}.jsonl") + print(">>> Writing outputs to file...") + with open(OUTPUT_FILE, "w") as fout: + for idx, (prompt, output) in enumerate(zip(all_prompts, outputs)): + response = output.outputs[0].text.strip() + fout.write(json.dumps({ + "index": idx, + "prompt": prompt, + "response": response + }) + "\n") + + print(f">>> Completed. Saved {len(outputs)} outputs to {OUTPUT_FILE}") + + if clearml_available: + task.upload_artifact("jsonl_output", OUTPUT_FILE) + """ + +if __name__ == '__main__': + main() diff --git a/src/automation/tasks/semantic_similarity_score.py b/src/automation/tasks/semantic_similarity_score.py new file mode 100644 index 00000000..d855d70b --- /dev/null +++ b/src/automation/tasks/semantic_similarity_score.py @@ -0,0 +1,122 @@ +from automation.tasks.base_task import BaseTask +from automation.configs import DEFAULT_DOCKER_IMAGE +from typing import Union, List, Optional, Sequence, Any, Callable +import os +import yaml + +class SemanticSimilarityGenerateTask(BaseTask): + task_packages = [ + "hf_xet", + "pyzmq", + ] + + def __init__( + self, + project_name: str, + task_name: str, + reference_model_project_name: str, + candidate_model_project_name: str, + reference_model_task_name: str, + candidate_model_task_name: str, + sts_model_id: str, + branch: str, + rouge_scores: Optional[len]=None, + scoring_args: Optional[dict]=None, + docker_image: str=DEFAULT_DOCKER_IMAGE, + packages: Optional[Sequence[str]]=None, + clearml_model: bool=False, + force_download: bool=False, + save_directory: str="output", + trust_remote_code: bool=False, + tags: Union[str, List[str]]=None, + task_type: str="training", + config: Optional[str]=None, + ): + + # Process config + config_kwargs = self.process_config(config) + + # Set packages, taking into account default packages + # for the LMEvalTask and packages set in the config + if packages is not None: + packages = list(set(packages + self.task_packages)) + else: + packages = self.task_packages + + if "packages" in config_kwargs: + packages = list(set(packages + config_kwargs.pop("packages"))) + + # Initialize base parameters + super().__init__( + project_name=project_name, + task_name=task_name, + branch=branch, + docker_image=docker_image, + packages=packages, + task_type=task_type, + ) + + + if rouge_scores is None: + self.rouge_scores = config_kwargs.pop("rouge_scores", None) + else: + config_rouge_scores = config_kwargs.pop("rouge_scores", {}) + config_rouge_scores.update(rouge_scores) + self.rouge_scores = config_rouge_scores + + if scoring_args is None: + self.scoring_args = config_kwargs.pop("scoring_args", None) + else: + config_scoring_args = config_kwargs.pop("scoring_args", {}) + config_scoring_args.update(scoring_args) + self.scoring_args = config_scoring_args + + self.trust_remote_code = config_kwargs.pop("trust_remote_code", trust_remote_code) + + if tags is not None: + tags = list(set(config_kwargs.pop("tags", []).extend(tags))) + else: + tags = config_kwargs.pop("tags", None) + self.tags = tags + + # Store class attributes + self.reference_model_project_name = reference_model_project_name + self.candidate_model_project_name = candidate_model_project_name + self.reference_model_task_name = reference_model_task_name + self.candidate_model_task_name = candidate_model_task_name + self.sts_model_id = sts_model_id + self.clearml_model = clearml_model + self.force_download = force_download + self.save_directory = save_directory + self.script_path = os.path.join(".", "src", "automation", "tasks", "scripts", "semantic_similarity_score_script.py") + + + def script(self, configurations, args): + from automation.tasks.scripts.semantic_similarity_score_script import main + main(configurations, args) + + + def get_configurations(self): + configs = {} + return configs + + + def get_arguments(self): + return { + "Args": { + "reference_model_project_name": self.reference_model_project_name, + "candidate_model_project_name": self.candidate_model_project_name, + "reference_model_task_name": self.reference_model_task_name, + "candidate_model_task_name": self.candidate_model_task_name, + "sts_model_id": self.sts_model_id, + "rouge_scores": self.rouge_scores, + "scoring_args": self.scoring_args, + "clearml_model": self.clearml_model, + "force_download": self.force_download, + "save_directory": self.save_directory, + "trust_remote_code": self.trust_remote_code, + "tags": self.tags, + }, + } + + From 0798f615df4bc527b260c157577c2399baf871b5 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 10:02:27 +0000 Subject: [PATCH 16/96] update task name --- src/automation/tasks/semantic_similarity_score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/semantic_similarity_score.py b/src/automation/tasks/semantic_similarity_score.py index d855d70b..acc54aca 100644 --- a/src/automation/tasks/semantic_similarity_score.py +++ b/src/automation/tasks/semantic_similarity_score.py @@ -4,7 +4,7 @@ import os import yaml -class SemanticSimilarityGenerateTask(BaseTask): +class SemanticSimilarityScoreTask(BaseTask): task_packages = [ "hf_xet", "pyzmq", From ca9ff840575627aa1b4539911d5bffcab2661d8b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 10:05:21 +0000 Subject: [PATCH 17/96] rouge score array --- src/automation/tasks/semantic_similarity_score.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/semantic_similarity_score.py b/src/automation/tasks/semantic_similarity_score.py index acc54aca..8a79e40a 100644 --- a/src/automation/tasks/semantic_similarity_score.py +++ b/src/automation/tasks/semantic_similarity_score.py @@ -60,8 +60,8 @@ def __init__( if rouge_scores is None: self.rouge_scores = config_kwargs.pop("rouge_scores", None) else: - config_rouge_scores = config_kwargs.pop("rouge_scores", {}) - config_rouge_scores.update(rouge_scores) + config_rouge_scores = config_kwargs.pop("rouge_scores", []) + config_rouge_scores+= rouge_scores self.rouge_scores = config_rouge_scores if scoring_args is None: From 21f54d098ff62ed6caa2cf7b998521ba4227c80a Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 15:33:45 +0000 Subject: [PATCH 18/96] base scoring script --- .../semantic_similarity_score_script.py | 180 ++++++++---------- 1 file changed, 81 insertions(+), 99 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 08cd8030..619315f9 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -1,11 +1,9 @@ import json -from tqdm import tqdm -import os from bert_score import score from rouge_score import rouge_scorer from sentence_transformers import SentenceTransformer, util - -#from automation.utils import parse_argument +import os +from automation.utils import parse_argument try: from clearml import OutputModel, Task @@ -13,75 +11,62 @@ except ImportError: clearml_available = False -OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") - - -def parse_argument( - a, - b, -): - return a - +SCORING_DIR = os.path.join(os.getcwd(), "outputs") def semantic_similarity_score_main( - trust_remote_code, + reference_file, + candidate_file, sts_model_id, rouge_scores, - save_directory, ): - from collections import defaultdict - all_prompts = [] - all_samples_dict = defaultdict(list) - - print(">>> Loading dataset...") - for dataset_name,dataset_path in sts_model_id.items(): - print(f">>> Loading dataset {dataset_name}...") - dataset = load_dataset(dataset_path, split=f"train[:{candidate_model_task_name}]") - all_samples_dict[dataset_name].extend(dataset) - - for dataset_name,dataset_samples in all_samples_dict.items(): - print(f">>> Loading values for {dataset_name}...") - for sample in dataset_samples: - if dataset_name == "alpaca" or (dataset_name == "openplatypus"): - prompt = make_alpaca_platypus_prompt(sample) - elif dataset_name == "tulu": - prompt = make_tulu_prompt(sample) - else: - print("Using default prompt") - prompt = make_default_prompt(sample) - all_prompts.append(prompt) - - - print(">>> Loading tokenizer...") - tokenizer = AutoTokenizer.from_pretrained(reference_model_project_name, trust_remote_code= trust_remote_code) - - from huggingface_hub import snapshot_download - snapshot_download(repo_id=reference_model_project_name) - - print(">>> Initializing vLLM...") - llm = LLM( - model=reference_model_project_name, - #dtype=rouge_scores.get("dtype", "auto"), - #trust_remote_code=trust_remote_code, - tensor_parallel_size=device_count(), - #enforce_eager=rouge_scores.get("enforce_eager", True), - #enable_chunked_prefill=rouge_scores.get("enable_chunked_prefill", True), - #candidate_model_project_name=candidate_model_project_name - ) - - print("Completed the model initialization ") + # Load reference and candidate data + with open(reference_file, "r") as f_ref, open(candidate_file, "r") as f_cand: + reference_data = [json.loads(line) for line in f_ref] + candidate_data = [json.loads(line) for line in f_cand] + + assert len(reference_data) == len(candidate_data), "Mismatched number of entries!" + + # Extract answers + references = [ref.get("output") or ref["response"] for ref in reference_data] + candidates = [cand["response"] for cand in candidate_data] + + # Load models + sts_model = SentenceTransformer(sts_model_id) + rouge = rouge_scorer.RougeScorer(rouge_scores, use_stemmer=True) + + # Compute BERTScore + _, _, f1_scores = score(candidates, references, lang="en", verbose=False) + all_bert_f1 = [ f1.item() for f1 in f1_scores ] + + # Evaluate metrics + all_rouge1_f1, all_rougeL_f1, all_sts, all_bert_f1 = [], [], [], [] + low_score_indices = [] + + for i, (ref, cand) in enumerate(zip(references, candidates)): + emb_ref = sts_model.encode(ref, convert_to_tensor=True) + emb_cand = sts_model.encode(cand, convert_to_tensor=True) + raw_sts = util.cos_sim(emb_cand, emb_ref).item() + sts = (raw_sts + 1) / 2 # Normalize to [0, 1] + all_sts.append(sts) + + rouge_scores = rouge.score(ref, cand) + + rouge1 = rouge_scores["rouge1"].fmeasure + rougeL = rouge_scores["rougeL"].fmeasure + all_rouge1_f1.append(rouge1) + all_rougeL_f1.append(rougeL) + + + # Compute averages + n = len(references) + avg_bert = sum(all_bert_f1) / n + avg_rouge1 = sum(all_rouge1_f1) / n + avg_rougeL = sum(all_rougeL_f1) / n + avg_sts = sum(all_sts) / n + return avg_bert, avg_rouge1, avg_rougeL, avg_sts - sampling_params = SamplingParams( - temperature=rouge_scores.get("temperature", 0.0), - max_tokens=reference_model_task_name, - stop=["### Instruction:", "### Input:", "### Response:"], - ) - print("Define sampling parameters") - print(">>> Running vLLM generation...") - outputs = llm.generate(all_prompts, sampling_params) - return all_prompts, outputs def main(configurations=None, args=None): if clearml_available: @@ -94,56 +79,53 @@ def main(configurations=None, args=None): clearml_model = parse_argument(args["clearml_model"], bool) force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) - reference_model_project_name = parse_argument(args["reference_model_project_name"], str) - candidate_model_project_name = parse_argument(args["candidate_model_project_name"], int) - candidate_model_task_name = parse_argument(args["candidate_model_task_name"], int) - reference_model_task_name = parse_argument(args["reference_model_task_name"], int) sts_model_id = args.get("sts_model_id", str) rouge_scores= args.get("rouge_scores", list) - save_directory = parse_argument(args["save_directory"], str) + #save_directory = parse_argument(args["save_directory"], str) tags = args.get("tags", None) - print(args) - - """ - if clearml_available: + reference_model_project_name = parse_argument(args["reference_model_project_name"], str) + candidate_model_project_name = parse_argument(args["candidate_model_project_name"], str) + candidate_model_task_name = parse_argument(args["candidate_model_task_name"], str) + reference_model_task_name = parse_argument(args["reference_model_task_name"], str) reference_task = Task.query_tasks(project_name=reference_model_project_name,task_name= reference_model_task_name, task_filter={'order_by': ['-last_update'], 'status': ['completed'] }) reference_task = Task.get_task(reference_task[0]) - reference_artifact_obj = reference_task.artifacts['jsonl model'].get_local_copy() + reference_file = reference_task.artifacts['jsonl model'].get_local_copy() candidate_task = Task.query_tasks(project_name=candidate_model_project_name,task_name= candidate_model_task_name, task_filter={'order_by': ['-last_update'], 'status': ['completed'] }) candidate_task = Task.get_task(candidate_task[0]) - candidate_artifact_obj = candidate_task.artifacts['jsonl model'].get_local_copy() - + candidate_file = candidate_task.artifacts['jsonl model'].get_local_copy() + # add task query to get jsonl else: - reference_artifact_obj = None - candidate_artifact_obj = None - - all_prompts, outputs = semantic_similarity_score_main( + ref_model_json = "Qwen_Qwen3-0.6B.jsonl" + cand_model_json = "RedHatAI_Qwen3-0.6B-quantized.w4a16.jsonl" + reference_file = os.path.join(SCORING_DIR, ref_model_json) + candidate_file = os.path.join(SCORING_DIR, cand_model_json) + + avg_bert, avg_rouge1, avg_rougeL, avg_sts = semantic_similarity_score_main( + reference_file, + candidate_file, sts_model_id, rouge_scores, - trust_remote_code, - save_directory, ) - - OUTPUT_FILE = os.path.join(OUTPUT_DIR,f"{reference_model_project_name.replace('/', '_')}.jsonl") - print(">>> Writing outputs to file...") - with open(OUTPUT_FILE, "w") as fout: - for idx, (prompt, output) in enumerate(zip(all_prompts, outputs)): - response = output.outputs[0].text.strip() - fout.write(json.dumps({ - "index": idx, - "prompt": prompt, - "response": response - }) + "\n") - - print(f">>> Completed. Saved {len(outputs)} outputs to {OUTPUT_FILE}") - + # Print summary + print("\n=== Averages (for Google Sheets) ===") + print("BERTScore F1 | ROUGE-1 F1 | ROUGE-L F1 | STS CosSim") + print(f"{avg_bert:.3f} | {avg_rouge1:.3f} | {avg_rougeL:.3f} | {avg_sts:.3f}") + + out_filename = f"scores_{ref_model_json.lower()}__vs__{cand_model_json.lower()}.txt" + out_filename = os.path.join(SCORING_DIR,out_filename) + # Save results + with open(out_filename, "w") as f_out: + f_out.write("BERTScore F1 | ROUGE-1 F1 | ROUGE-L F1 | STS CosSim\n") + f_out.write(f"{avg_bert:.3f} | {avg_rouge1:.3f} | {avg_rougeL:.3f} | {avg_sts:.3f}\n\n") + + print(f"\nSaved results to {out_filename}") if clearml_available: - task.upload_artifact("jsonl_output", OUTPUT_FILE) - """ + task.upload_artifact("scores", out_filename) + print("Pushing clearml artifact") if __name__ == '__main__': main() From 37b82d9ae97c11c055cd8a25334267986342071f Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 15:47:56 +0000 Subject: [PATCH 19/96] remove snapshot downlad --- .../scripts/semantic_similarity_generate_script.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 33987089..39c06756 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -82,18 +82,18 @@ def semantic_similarity_generate_main( print(">>> Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) - from huggingface_hub import snapshot_download - snapshot_download(repo_id=model_id) + #from huggingface_hub import snapshot_download + #snapshot_download(repo_id=model_id) print(">>> Initializing vLLM...") llm = LLM( model=model_id, - #dtype=semantic_similarity_args.get("dtype", "auto"), - #trust_remote_code=trust_remote_code, + dtype=semantic_similarity_args.get("dtype", "auto"), + trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), - #enforce_eager=semantic_similarity_args.get("enforce_eager", True), - #enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), - #max_model_len=max_model_len + enforce_eager=semantic_similarity_args.get("enforce_eager", True), + enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), + max_model_len=max_model_len ) print("Completed the model initialization ") From 7e1385356ef149dbd64315baca7857c44517c7f9 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 21:39:40 +0000 Subject: [PATCH 20/96] test vllm server --- .../scripts/semantic_similarity_generate_script.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 39c06756..be0ab93b 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -82,6 +82,16 @@ def semantic_similarity_generate_main( print(">>> Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) + # Start vLLM server + vllm_server = VLLMServer( + {}, + model_id, + "http://localhost:8000/v1", + 60, + ) + vllm_server.start() + + """ #from huggingface_hub import snapshot_download #snapshot_download(repo_id=model_id) @@ -107,6 +117,7 @@ def semantic_similarity_generate_main( print(">>> Running vLLM generation...") outputs = llm.generate(all_prompts, sampling_params) + """ return all_prompts, outputs From 1f301504d2505eea4e9251f8da5a1c6fe3aa097a Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 21:48:57 +0000 Subject: [PATCH 21/96] add requests query --- .../semantic_similarity_generate_script.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index be0ab93b..4adbd157 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -1,5 +1,6 @@ import json import os +import requests from torch.cuda import device_count from tqdm import tqdm from datasets import load_dataset @@ -79,8 +80,9 @@ def semantic_similarity_generate_main( all_prompts.append(prompt) - print(">>> Loading tokenizer...") - tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) + #print(">>> Loading tokenizer...") + #tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) + # Start vLLM server vllm_server = VLLMServer( @@ -91,6 +93,21 @@ def semantic_similarity_generate_main( ) vllm_server.start() + + url = "http://localhost:8000/v1/completions" + headers = { + "Content-Type": "application/json", + } + + data = { + "model": model_id, + "prompt": all_prompts[0], + "max_tokens": max_new_tokens + } + + outputs = requests.post(url, headers=headers, json=data) + print(outputs.json()) + """ #from huggingface_hub import snapshot_download #snapshot_download(repo_id=model_id) From a153996804242a358fd39e6c69f6fef7636fa001 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 21:56:06 +0000 Subject: [PATCH 22/96] clean libs --- .../tasks/scripts/semantic_similarity_score_script.py | 3 ++- src/automation/tasks/semantic_similarity_generate.py | 11 +---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 619315f9..98433a39 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -11,7 +11,8 @@ except ImportError: clearml_available = False -SCORING_DIR = os.path.join(os.getcwd(), "outputs") +SCORING_DIR = os.path.join(os.getcwd(), "scoresdirectory") +os.makedirs(SCORING_DIR, exist_ok=True) def semantic_similarity_score_main( reference_file, diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index f6d6345f..38501241 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -6,19 +6,10 @@ class SemanticSimilarityGenerateTask(BaseTask): task_packages = [ - "vllm==0.10.1.1", + "vllm", "hf_xet", "pyzmq", - #"datasets", - #"rouge_score", - #"bert-score", - #"sentence-transformers", #"vllm==0.10.1.1", - #"datasets==4.2.0", - #"rouge_score==0.1.2", - #"bert-score==0.3.13", - #"sentence-transformers==5.1.1", - #"pyzmq==27.1.0", ] def __init__( From 92fe4c937d491a07544e6afc2097fc641a060240 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 21:58:01 +0000 Subject: [PATCH 23/96] fix parse issue --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 4adbd157..b6cfa688 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -152,7 +152,7 @@ def main(configurations=None, args=None): model_id = parse_argument(args["model_id"], str) save_directory = parse_argument(args["save_directory"], str) max_model_len = parse_argument(args["max_model_len"], int) - num_samples_per_dataset = parse_argument(args["num_samples"], int) + num_samples_per_dataset = parse_argument(args["num_samples_per_dataset"], int) max_new_tokens = parse_argument(args["max_new_tokens"], int) dataset_args = args.get("dataset_args", None) semantic_similarity_args= args.get("semantic_similarity_args", None) From a8751c23aba5435c63147fab615f140fa6f87258 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 22:07:01 +0000 Subject: [PATCH 24/96] use start_vllm_server --- .../scripts/semantic_similarity_generate_script.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index b6cfa688..85aa4619 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -7,6 +7,7 @@ from vllm import LLM, SamplingParams from transformers import AutoTokenizer +from automation.vllm import start_vllm_server from automation.utils import parse_argument try: @@ -80,19 +81,18 @@ def semantic_similarity_generate_main( all_prompts.append(prompt) - #print(">>> Loading tokenizer...") - #tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) - - # Start vLLM server - vllm_server = VLLMServer( + server_process, server_initialized, server_log = start_vllm_server( {}, model_id, "http://localhost:8000/v1", 60, ) - vllm_server.start() + if not server_initialized: + kill_process_tree(server_process.pid) + task.upload_artifact(name="vLLM server log", artifact_object=server_log) + raise AssertionError("Server failed to initialize") url = "http://localhost:8000/v1/completions" headers = { @@ -111,6 +111,8 @@ def semantic_similarity_generate_main( """ #from huggingface_hub import snapshot_download #snapshot_download(repo_id=model_id) + print(">>> Loading tokenizer...") + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) print(">>> Initializing vLLM...") llm = LLM( From 5b9539e0f5cc8c3bb8f4c3d603451dae5fec6204 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 22:10:07 +0000 Subject: [PATCH 25/96] test llm generate --- .../tasks/scripts/semantic_similarity_generate_script.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 85aa4619..f829e00c 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -80,8 +80,7 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) - - + """ server_process, server_initialized, server_log = start_vllm_server( {}, model_id, @@ -109,6 +108,7 @@ def semantic_similarity_generate_main( print(outputs.json()) """ + #from huggingface_hub import snapshot_download #snapshot_download(repo_id=model_id) print(">>> Loading tokenizer...") @@ -136,7 +136,6 @@ def semantic_similarity_generate_main( print(">>> Running vLLM generation...") outputs = llm.generate(all_prompts, sampling_params) - """ return all_prompts, outputs From 2a737016be576bd7be988cd4c79c6a21389ce42c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 22:13:18 +0000 Subject: [PATCH 26/96] updated vllm server --- src/automation/vllm/__init__.py | 2 +- src/automation/vllm/server.py | 113 ++++++++++++++------------------ 2 files changed, 51 insertions(+), 64 deletions(-) diff --git a/src/automation/vllm/__init__.py b/src/automation/vllm/__init__.py index 0ee78843..41c14105 100644 --- a/src/automation/vllm/__init__.py +++ b/src/automation/vllm/__init__.py @@ -1 +1 @@ -from automation.vllm.server import VLLMServer \ No newline at end of file +from automation.vllm.server import start_vllm_server \ No newline at end of file diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 0e152c92..303088ae 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -5,80 +5,67 @@ import os import torch from urllib.parse import urlparse -from automation.utils import kill_process_tree -from datetime import datetime -import random +from clearml import Task SERVER_LOG_PREFIX = "vllm_server_log" -class VLLMServer: - def __init__(self, vllm_args, model_id, target, server_wait_time): - self.vllm_args = vllm_args - self.model_id = model_id - self.target = target - self.server_wait_time = server_wait_time - - def start(self): - executable_path = os.path.dirname(sys.executable) - vllm_path = os.path.join(executable_path, "vllm") - num_gpus = torch.cuda.device_count() +def start_vllm_server( + vllm_args, + model_id, + target, + server_wait_time, +): + task = Task.current_task() - parsed_target = urlparse(self.target) + executable_path = os.path.dirname(sys.executable) + vllm_path = os.path.join(executable_path, "vllm") - server_command = [ - f"{vllm_path}", "serve", - self.model_id, - "--host", parsed_target.hostname, - "--port", str(parsed_target.port), - "--tensor-parallel-size", str(num_gpus) - ] + num_gpus = torch.cuda.device_count() - subprocess_env = os.environ.copy() + parsed_target = urlparse(target) - for k, v in self.vllm_args.items(): - if k.startswith("VLLM_"): - subprocess_env[k] = str(v) + server_command = [ + f"{vllm_path}", "serve", + model_id, + "--host", parsed_target.hostname, + "--port", str(parsed_target.port), + "--tensor-parallel-size", str(num_gpus) + ] + + subprocess_env = os.environ.copy() + + for k, v in vllm_args.items(): + if k.startswith("VLLM_"): + subprocess_env[k] = str(v) + else: + if v == True or v == "True": + server_command.append(f"--{k}") else: - if v == True or v == "True": - server_command.append(f"--{k}") - else: - server_command.extend([f"--{k}", str(v)]) - + server_command.extend([f"--{k}", str(v)]) + - random_integer = random.randint(1, 9999) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt" + server_log_file = open(server_log_file_name, "w") - self.server_log_file_name = f"{SERVER_LOG_PREFIX}_{timestamp}_{random_integer:04d}.txt" - self.server_log_file = open(self.server_log_file_name, "w") - self.server_process = subprocess.Popen(server_command, stdout=self.server_log_file, stderr=self.server_log_file, shell=False, env=subprocess_env) + #server_process = subprocess.Popen(server_command, shell=False, env=subprocess_env) + server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) - delay = 5 - self.server_initialized = False - for _ in range(self.server_wait_time // delay): - try: - response = requests.get(self.target + "/models") - if response.status_code == 200 and response.json().get("data"): - print("Server initialized") - self.server_initialized = True - break # Exit the loop if the request is successful - except requests.exceptions.RequestException as e: - pass + delay = 5 + server_initialized = False + for _ in range(server_wait_time // delay): + try: + response = requests.get(target + "/models") + if response.status_code == 200: + print("Server initialized") + server_initialized = True + break # Exit the loop if the request is successful + except requests.exceptions.RequestException as e: + pass - time.sleep(delay) - - def stop(self): - kill_process_tree(self.server_process.pid) - self.server_log_file.close() - - def is_initialized(self): - return self.server_initialized - - def get_log_file_name(self): - return self.server_log_file_name - - def get_log_file(self): - return self.server_log_file - + time.sleep(delay) - + if server_initialized: + return server_process, True, server_log_file_name + else: + return server_process, False, server_log_file_name From a8547f5a715dcec9dbe66de65baa491fa4005719 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 22:21:46 +0000 Subject: [PATCH 27/96] add debug logging level --- .../tasks/scripts/semantic_similarity_generate_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index f829e00c..e8afdc31 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -115,6 +115,7 @@ def semantic_similarity_generate_main( tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) print(">>> Initializing vLLM...") + os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" llm = LLM( model=model_id, dtype=semantic_similarity_args.get("dtype", "auto"), From 274948b1126bed27c7b592dc48f5398e134a9c18 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 22:32:02 +0000 Subject: [PATCH 28/96] base LLM --- .../scripts/semantic_similarity_generate_script.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index e8afdc31..b3548966 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -111,11 +111,17 @@ def semantic_similarity_generate_main( #from huggingface_hub import snapshot_download #snapshot_download(repo_id=model_id) - print(">>> Loading tokenizer...") - tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) + + #print(">>> Loading tokenizer...") + #tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) print(">>> Initializing vLLM...") os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" + llm = LLM( + model=model_id, + ) + + """ llm = LLM( model=model_id, dtype=semantic_similarity_args.get("dtype", "auto"), @@ -125,6 +131,7 @@ def semantic_similarity_generate_main( enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), max_model_len=max_model_len ) + """ print("Completed the model initialization ") From 051afaeb5fbdbeef084a4d3fa8ecd421b35d7d5d Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 22:38:59 +0000 Subject: [PATCH 29/96] try except for vllm --- .../semantic_similarity_generate_script.py | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index b3548966..3175ad63 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -112,15 +112,22 @@ def semantic_similarity_generate_main( #from huggingface_hub import snapshot_download #snapshot_download(repo_id=model_id) - #print(">>> Loading tokenizer...") - #tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= trust_remote_code) - - print(">>> Initializing vLLM...") - os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" - llm = LLM( - model=model_id, + print("Define sampling parameters") + sampling_params = SamplingParams( + temperature=semantic_similarity_args.get("temperature", 0.0), + max_tokens=max_new_tokens, + stop=["### Instruction:", "### Input:", "### Response:"], ) + try: + print(">>> Initializing vLLM...") + os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" + llm = LLM( + model=model_id, + ) + except Exception as e: + print(f"Error initializing LLM: {e}") + """ llm = LLM( model=model_id, @@ -135,12 +142,10 @@ def semantic_similarity_generate_main( print("Completed the model initialization ") - sampling_params = SamplingParams( - temperature=semantic_similarity_args.get("temperature", 0.0), - max_tokens=max_new_tokens, - stop=["### Instruction:", "### Input:", "### Response:"], - ) - print("Define sampling parameters") + + + + print(">>> Running vLLM generation...") outputs = llm.generate(all_prompts, sampling_params) From d51d3cfe8d15bde6245ec6a3cbef43af1bfe0aee Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 16 Oct 2025 22:41:16 +0000 Subject: [PATCH 30/96] use vllmm server --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 3175ad63..90ec711c 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -80,7 +80,6 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) - """ server_process, server_initialized, server_log = start_vllm_server( {}, model_id, @@ -128,7 +127,6 @@ def semantic_similarity_generate_main( except Exception as e: print(f"Error initializing LLM: {e}") - """ llm = LLM( model=model_id, dtype=semantic_similarity_args.get("dtype", "auto"), From e9cd5347eca3efe4e3c1a34480d24159fecafb2b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:19:14 +0000 Subject: [PATCH 31/96] retry snapshot download --- .../tasks/scripts/semantic_similarity_generate_script.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 90ec711c..995ca01a 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -80,6 +80,10 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) + from huggingface_hub import snapshot_download + snapshot_download(repo_id=model_id) + + """ server_process, server_initialized, server_log = start_vllm_server( {}, model_id, @@ -106,11 +110,6 @@ def semantic_similarity_generate_main( outputs = requests.post(url, headers=headers, json=data) print(outputs.json()) - """ - - #from huggingface_hub import snapshot_download - #snapshot_download(repo_id=model_id) - print("Define sampling parameters") sampling_params = SamplingParams( temperature=semantic_similarity_args.get("temperature", 0.0), From ab7fe4a5e4a00a404cf23298148e65eead8afcfe Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:24:54 +0000 Subject: [PATCH 32/96] snapshot down --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 995ca01a..57fd75c9 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -81,7 +81,8 @@ def semantic_similarity_generate_main( all_prompts.append(prompt) from huggingface_hub import snapshot_download - snapshot_download(repo_id=model_id) + #snapshot_download(repo_id=model_id) + snapshot_download(repo_id=model_id, local_dir="/model") """ server_process, server_initialized, server_log = start_vllm_server( From bc2897b07297e3facb02d1cec7dd490f3c76e6c0 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:29:54 +0000 Subject: [PATCH 33/96] snapshot with download_dir --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 57fd75c9..9dd62257 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -82,7 +82,8 @@ def semantic_similarity_generate_main( from huggingface_hub import snapshot_download #snapshot_download(repo_id=model_id) - snapshot_download(repo_id=model_id, local_dir="/model") + #snapshot_download(repo_id=model_id, local_dir="/model") + snapshot_download(repo_id=model_id, download_dir="/model") """ server_process, server_initialized, server_log = start_vllm_server( From 78b005adb14b7ef35dc24eeb71bd9cae3d511f2c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:40:39 +0000 Subject: [PATCH 34/96] add model dir --- .../scripts/semantic_similarity_generate_script.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 9dd62257..208734c2 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -80,10 +80,19 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) - from huggingface_hub import snapshot_download + try: + print(">>> Initializing vLLM...") + os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" + llm = LLM( + model=model_id, + download_dir="/model", + ) + except Exception as e: + print(f"Error initializing LLM: {e}") + #from huggingface_hub import snapshot_download #snapshot_download(repo_id=model_id) #snapshot_download(repo_id=model_id, local_dir="/model") - snapshot_download(repo_id=model_id, download_dir="/model") + #snapshot_download(repo_id=model_id, download_dir="/model") """ server_process, server_initialized, server_log = start_vllm_server( From aac6d694614606ab022bdfd2a14db219c06350e1 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:41:41 +0000 Subject: [PATCH 35/96] add dtype --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 208734c2..ab690d1e 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -85,7 +85,8 @@ def semantic_similarity_generate_main( os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" llm = LLM( model=model_id, - download_dir="/model", + dtype="auto", + #download_dir="/model", ) except Exception as e: print(f"Error initializing LLM: {e}") From 73abff71bf600feeabe530bcf8fbff9d6bdaaf70 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:47:43 +0000 Subject: [PATCH 36/96] model dir --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index ab690d1e..491e1300 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -86,7 +86,7 @@ def semantic_similarity_generate_main( llm = LLM( model=model_id, dtype="auto", - #download_dir="/model", + download_dir="/model", ) except Exception as e: print(f"Error initializing LLM: {e}") From 41bc34af6d7b4ba355a70278b890ac379821e547 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:48:39 +0000 Subject: [PATCH 37/96] add trust remote code --- .../tasks/scripts/semantic_similarity_generate_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 491e1300..88f41e7f 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -87,6 +87,7 @@ def semantic_similarity_generate_main( model=model_id, dtype="auto", download_dir="/model", + trust_remote_code=True ) except Exception as e: print(f"Error initializing LLM: {e}") From d37541a419004e9fcac378550baf6e03670006cb Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:54:39 +0000 Subject: [PATCH 38/96] download safetensors --- .../tasks/scripts/semantic_similarity_generate_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 88f41e7f..df9a8b7b 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -92,6 +92,7 @@ def semantic_similarity_generate_main( except Exception as e: print(f"Error initializing LLM: {e}") #from huggingface_hub import snapshot_download + hf_hub_download(model_id, "model.safetensors", local_dir="./models") #snapshot_download(repo_id=model_id) #snapshot_download(repo_id=model_id, local_dir="/model") #snapshot_download(repo_id=model_id, download_dir="/model") From 67fee2c1bbfb2fca7216d7cf9ec00aab09e4413f Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 00:57:35 +0000 Subject: [PATCH 39/96] move vllm server up --- .../tasks/scripts/semantic_similarity_generate_script.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index df9a8b7b..a6532a8c 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -80,6 +80,8 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) + from huggingface_hub import snapshot_download, hf_hub_download + hf_hub_download(model_id, "model.safetensors", local_dir="./models") try: print(">>> Initializing vLLM...") os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" @@ -92,7 +94,7 @@ def semantic_similarity_generate_main( except Exception as e: print(f"Error initializing LLM: {e}") #from huggingface_hub import snapshot_download - hf_hub_download(model_id, "model.safetensors", local_dir="./models") + #hf_hub_download(model_id, "model.safetensors", local_dir="./models") #snapshot_download(repo_id=model_id) #snapshot_download(repo_id=model_id, local_dir="/model") #snapshot_download(repo_id=model_id, download_dir="/model") From e59e1dfc741ad04181686189331503157aa03743 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 01:15:08 +0000 Subject: [PATCH 40/96] use the same dir --- .../tasks/scripts/semantic_similarity_generate_script.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index a6532a8c..fd770654 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -81,14 +81,14 @@ def semantic_similarity_generate_main( all_prompts.append(prompt) from huggingface_hub import snapshot_download, hf_hub_download - hf_hub_download(model_id, "model.safetensors", local_dir="./models") + hf_hub_download(model_id, "model.safetensors", local_dir="/models") try: print(">>> Initializing vLLM...") os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" llm = LLM( model=model_id, dtype="auto", - download_dir="/model", + download_dir="/models", trust_remote_code=True ) except Exception as e: From bef036e7188ea0e976309e25517f01094ead2140 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 01:21:05 +0000 Subject: [PATCH 41/96] redo snapshot download --- .../semantic_similarity_generate_script.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index fd770654..397f8639 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -80,8 +80,17 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) + print("Define sampling parameters") + sampling_params = SamplingParams( + temperature=semantic_similarity_args.get("temperature", 0.0), + max_tokens=max_new_tokens, + stop=["### Instruction:", "### Input:", "### Response:"], + ) + from huggingface_hub import snapshot_download, hf_hub_download - hf_hub_download(model_id, "model.safetensors", local_dir="/models") + #hf_hub_download(model_id, "model.safetensors", local_dir="/models") + snapshot_download(repo_id=model_id, local_dir="/models") + try: print(">>> Initializing vLLM...") os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" @@ -91,6 +100,9 @@ def semantic_similarity_generate_main( download_dir="/models", trust_remote_code=True ) + print("Completed the model initialization ") + print(">>> Running vLLM generation...") + outputs = llm.generate(all_prompts, sampling_params) except Exception as e: print(f"Error initializing LLM: {e}") #from huggingface_hub import snapshot_download @@ -154,12 +166,6 @@ def semantic_similarity_generate_main( """ print("Completed the model initialization ") - - - - - - print(">>> Running vLLM generation...") outputs = llm.generate(all_prompts, sampling_params) From 044cef7c29638c63d861b425df4e71cba08eeea8 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 01:32:58 +0000 Subject: [PATCH 42/96] trigger --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 397f8639..16f98423 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -87,9 +87,11 @@ def semantic_similarity_generate_main( stop=["### Instruction:", "### Input:", "### Response:"], ) + print(">>> Downloading snapshot ...") from huggingface_hub import snapshot_download, hf_hub_download #hf_hub_download(model_id, "model.safetensors", local_dir="/models") snapshot_download(repo_id=model_id, local_dir="/models") + print(">>> trigger...") try: print(">>> Initializing vLLM...") From 77bc96d53a68d52bfabbfe983d5ddcae5eff8120 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 01:45:11 +0000 Subject: [PATCH 43/96] combined --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 16f98423..9539f086 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -89,8 +89,8 @@ def semantic_similarity_generate_main( print(">>> Downloading snapshot ...") from huggingface_hub import snapshot_download, hf_hub_download - #hf_hub_download(model_id, "model.safetensors", local_dir="/models") snapshot_download(repo_id=model_id, local_dir="/models") + print(">>> trigger...") try: @@ -100,7 +100,6 @@ def semantic_similarity_generate_main( model=model_id, dtype="auto", download_dir="/models", - trust_remote_code=True ) print("Completed the model initialization ") print(">>> Running vLLM generation...") From 67fca569e75f9190b09a7f94f9b405debb7ea22b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 09:47:17 +0000 Subject: [PATCH 44/96] use vllm server --- .../scripts/semantic_similarity_generate_script.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 9539f086..e0e2a1bc 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -80,6 +80,7 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) + """ print("Define sampling parameters") sampling_params = SamplingParams( temperature=semantic_similarity_args.get("temperature", 0.0), @@ -106,11 +107,6 @@ def semantic_similarity_generate_main( outputs = llm.generate(all_prompts, sampling_params) except Exception as e: print(f"Error initializing LLM: {e}") - #from huggingface_hub import snapshot_download - #hf_hub_download(model_id, "model.safetensors", local_dir="./models") - #snapshot_download(repo_id=model_id) - #snapshot_download(repo_id=model_id, local_dir="/model") - #snapshot_download(repo_id=model_id, download_dir="/model") """ server_process, server_initialized, server_log = start_vllm_server( @@ -139,6 +135,8 @@ def semantic_similarity_generate_main( outputs = requests.post(url, headers=headers, json=data) print(outputs.json()) + """ + print("Define sampling parameters") sampling_params = SamplingParams( temperature=semantic_similarity_args.get("temperature", 0.0), @@ -164,11 +162,11 @@ def semantic_similarity_generate_main( enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), max_model_len=max_model_len ) - """ print("Completed the model initialization ") print(">>> Running vLLM generation...") outputs = llm.generate(all_prompts, sampling_params) + """ return all_prompts, outputs From bf80fd4e6b8ce044e2573b41d653120c1e7c445b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 10:08:09 +0000 Subject: [PATCH 45/96] add process tree import --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 +- src/automation/vllm/server.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index e0e2a1bc..8f60b3a1 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -8,7 +8,7 @@ from transformers import AutoTokenizer from automation.vllm import start_vllm_server -from automation.utils import parse_argument +from automation.utils import kill_process_tree, parse_argument try: from clearml import OutputModel, Task diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 303088ae..5634c965 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -48,8 +48,8 @@ def start_vllm_server( server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt" server_log_file = open(server_log_file_name, "w") - #server_process = subprocess.Popen(server_command, shell=False, env=subprocess_env) - server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) + server_process = subprocess.Popen(server_command, shell=False, env=subprocess_env) + #server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) delay = 5 server_initialized = False From f5a21f517a6b4e3e355bddb5362d3c7c83232e3d Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 10:28:38 +0000 Subject: [PATCH 46/96] add clearml conditional --- .../tasks/scripts/semantic_similarity_generate_script.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 8f60b3a1..817956b0 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -56,7 +56,7 @@ def semantic_similarity_generate_main( max_model_len, max_new_tokens, num_samples_per_dataset, - save_directory, + clearml_available, ): from collections import defaultdict all_prompts = [] @@ -118,7 +118,8 @@ def semantic_similarity_generate_main( if not server_initialized: kill_process_tree(server_process.pid) - task.upload_artifact(name="vLLM server log", artifact_object=server_log) + if clearml_available: + task.upload_artifact(name="vLLM server log", artifact_object=server_log) raise AssertionError("Server failed to initialize") url = "http://localhost:8000/v1/completions" @@ -182,7 +183,6 @@ def main(configurations=None, args=None): force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) model_id = parse_argument(args["model_id"], str) - save_directory = parse_argument(args["save_directory"], str) max_model_len = parse_argument(args["max_model_len"], int) num_samples_per_dataset = parse_argument(args["num_samples_per_dataset"], int) max_new_tokens = parse_argument(args["max_new_tokens"], int) @@ -199,7 +199,7 @@ def main(configurations=None, args=None): max_model_len, max_new_tokens, num_samples_per_dataset, - save_directory, + clearml_available, ) OUTPUT_FILE = os.path.join(OUTPUT_DIR,f"{model_id.replace('/', '_')}.jsonl") From b471178c76089aa96e218604a5bcbc77e3de7434 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 10:38:34 +0000 Subject: [PATCH 47/96] add task import --- .../tasks/scripts/semantic_similarity_generate_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 817956b0..a90ace7b 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -119,6 +119,7 @@ def semantic_similarity_generate_main( if not server_initialized: kill_process_tree(server_process.pid) if clearml_available: + from clearml import Task task.upload_artifact(name="vLLM server log", artifact_object=server_log) raise AssertionError("Server failed to initialize") From 08914e501a286178a201a959f9c35a898b1fe456 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 11:05:33 +0000 Subject: [PATCH 48/96] retrieve current task --- .../tasks/scripts/semantic_similarity_generate_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index a90ace7b..5cdad35c 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -120,6 +120,7 @@ def semantic_similarity_generate_main( kill_process_tree(server_process.pid) if clearml_available: from clearml import Task + task = Task.current_task() task.upload_artifact(name="vLLM server log", artifact_object=server_log) raise AssertionError("Server failed to initialize") From 2c3a2995a3d74fd7179bdec028f1de4adb39d9a2 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 11:35:01 +0000 Subject: [PATCH 49/96] output server logs --- src/automation/vllm/server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 5634c965..303088ae 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -48,8 +48,8 @@ def start_vllm_server( server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt" server_log_file = open(server_log_file_name, "w") - server_process = subprocess.Popen(server_command, shell=False, env=subprocess_env) - #server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) + #server_process = subprocess.Popen(server_command, shell=False, env=subprocess_env) + server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) delay = 5 server_initialized = False From c1a0b3c6bec42dbfdec6728695ff91be821af6be Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 17 Oct 2025 12:00:30 +0000 Subject: [PATCH 50/96] print vllm command --- src/automation/vllm/server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 303088ae..fc71a9c1 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -48,6 +48,8 @@ def start_vllm_server( server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt" server_log_file = open(server_log_file_name, "w") + print(f"Running: {server_command}") + #server_process = subprocess.Popen(server_command, shell=False, env=subprocess_env) server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) From f83b044b63fd624080f32706aae24d6a2da891f0 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 20 Oct 2025 19:35:06 +0000 Subject: [PATCH 51/96] output as json --- .../scripts/semantic_similarity_score_script.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 98433a39..791e0452 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -116,13 +116,20 @@ def main(configurations=None, args=None): print("BERTScore F1 | ROUGE-1 F1 | ROUGE-L F1 | STS CosSim") print(f"{avg_bert:.3f} | {avg_rouge1:.3f} | {avg_rougeL:.3f} | {avg_sts:.3f}") + data = { + "BERTScore F1": f"{avg_bert:.3f}", + "ROUGE-1 F1": f"{avg_rouge1:.3f}", + "ROUGE-1 FL": f"{avg_rougeL:.3f}", + "STS CosSim": f"{avg_sts:.3f}", + } + out_filename = f"scores_{ref_model_json.lower()}__vs__{cand_model_json.lower()}.txt" out_filename = os.path.join(SCORING_DIR,out_filename) - # Save results - with open(out_filename, "w") as f_out: - f_out.write("BERTScore F1 | ROUGE-1 F1 | ROUGE-L F1 | STS CosSim\n") - f_out.write(f"{avg_bert:.3f} | {avg_rouge1:.3f} | {avg_rougeL:.3f} | {avg_sts:.3f}\n\n") + # Save results + with open(out_filename, "w") as file: + json.dump(data, file, indent=4) + print(f"\nSaved results to {out_filename}") if clearml_available: task.upload_artifact("scores", out_filename) From d9b447acc1a6e3161e9518c94012db5e49f5db00 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 20 Oct 2025 21:03:11 +0000 Subject: [PATCH 52/96] output artifact --- .../tasks/scripts/semantic_similarity_score_script.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 791e0452..39aeebc1 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -116,6 +116,7 @@ def main(configurations=None, args=None): print("BERTScore F1 | ROUGE-1 F1 | ROUGE-L F1 | STS CosSim") print(f"{avg_bert:.3f} | {avg_rouge1:.3f} | {avg_rougeL:.3f} | {avg_sts:.3f}") + data = { "BERTScore F1": f"{avg_bert:.3f}", "ROUGE-1 F1": f"{avg_rouge1:.3f}", @@ -132,7 +133,8 @@ def main(configurations=None, args=None): print(f"\nSaved results to {out_filename}") if clearml_available: - task.upload_artifact("scores", out_filename) + task.upload_artifact("scores", data) + task.upload_artifact("outscores", out_filename) print("Pushing clearml artifact") if __name__ == '__main__': From 8ebd724c50278f8cfdd675834b53a8f592728ef9 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 22 Oct 2025 14:52:57 +0000 Subject: [PATCH 53/96] retry with python llm interface --- .../semantic_similarity_generate_script.py | 34 ++----------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 5cdad35c..c2a6571a 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -80,7 +80,6 @@ def semantic_similarity_generate_main( prompt = make_default_prompt(sample) all_prompts.append(prompt) - """ print("Define sampling parameters") sampling_params = SamplingParams( temperature=semantic_similarity_args.get("temperature", 0.0), @@ -90,7 +89,7 @@ def semantic_similarity_generate_main( print(">>> Downloading snapshot ...") from huggingface_hub import snapshot_download, hf_hub_download - snapshot_download(repo_id=model_id, local_dir="/models") + snapshot_download(repo_id=model_id, local_dir="/home") print(">>> trigger...") @@ -100,7 +99,7 @@ def semantic_similarity_generate_main( llm = LLM( model=model_id, dtype="auto", - download_dir="/models", + download_dir="/home", ) print("Completed the model initialization ") print(">>> Running vLLM generation...") @@ -140,36 +139,7 @@ def semantic_similarity_generate_main( """ - print("Define sampling parameters") - sampling_params = SamplingParams( - temperature=semantic_similarity_args.get("temperature", 0.0), - max_tokens=max_new_tokens, - stop=["### Instruction:", "### Input:", "### Response:"], - ) - try: - print(">>> Initializing vLLM...") - os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" - llm = LLM( - model=model_id, - ) - except Exception as e: - print(f"Error initializing LLM: {e}") - - llm = LLM( - model=model_id, - dtype=semantic_similarity_args.get("dtype", "auto"), - trust_remote_code=trust_remote_code, - tensor_parallel_size=device_count(), - enforce_eager=semantic_similarity_args.get("enforce_eager", True), - enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), - max_model_len=max_model_len - ) - - print("Completed the model initialization ") - print(">>> Running vLLM generation...") - outputs = llm.generate(all_prompts, sampling_params) - """ return all_prompts, outputs From b9ae4c188c9a37239eaed1d312eb01d27b64f0ec Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 10:39:33 +0000 Subject: [PATCH 54/96] reference the downloaded model --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index c2a6571a..5bc3d730 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -97,9 +97,8 @@ def semantic_similarity_generate_main( print(">>> Initializing vLLM...") os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" llm = LLM( - model=model_id, + model="/home", dtype="auto", - download_dir="/home", ) print("Completed the model initialization ") print(">>> Running vLLM generation...") From ecf9f4b2c8a6b1efbbedd98dae4013c820969dfc Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 11:04:54 +0000 Subject: [PATCH 55/96] add results directory creation --- .../semantic_similarity_generate_script.py | 49 ++++--------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 5bc3d730..733bb2ab 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -16,8 +16,8 @@ except ImportError: clearml_available = False - -OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") +RESULTS_DIR = os.path.join(os.getcwd(), "results") +os.makedirs(RESULTS_DIR, exist_ok=False) def make_alpaca_platypus_prompt(sample): instruction = sample["instruction"].strip() @@ -88,7 +88,7 @@ def semantic_similarity_generate_main( ) print(">>> Downloading snapshot ...") - from huggingface_hub import snapshot_download, hf_hub_download + from huggingface_hub import snapshot_download snapshot_download(repo_id=model_id, local_dir="/home") print(">>> trigger...") @@ -98,7 +98,12 @@ def semantic_similarity_generate_main( os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" llm = LLM( model="/home", - dtype="auto", + dtype=semantic_similarity_args.get("dtype", "auto"), + trust_remote_code=trust_remote_code, + tensor_parallel_size=device_count(), + enforce_eager=semantic_similarity_args.get("enforce_eager", True), + enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), + max_model_len=max_model_len ) print("Completed the model initialization ") print(">>> Running vLLM generation...") @@ -106,40 +111,6 @@ def semantic_similarity_generate_main( except Exception as e: print(f"Error initializing LLM: {e}") - """ - server_process, server_initialized, server_log = start_vllm_server( - {}, - model_id, - "http://localhost:8000/v1", - 60, - ) - - if not server_initialized: - kill_process_tree(server_process.pid) - if clearml_available: - from clearml import Task - task = Task.current_task() - task.upload_artifact(name="vLLM server log", artifact_object=server_log) - raise AssertionError("Server failed to initialize") - - url = "http://localhost:8000/v1/completions" - headers = { - "Content-Type": "application/json", - } - - data = { - "model": model_id, - "prompt": all_prompts[0], - "max_tokens": max_new_tokens - } - - outputs = requests.post(url, headers=headers, json=data) - print(outputs.json()) - - """ - - - return all_prompts, outputs def main(configurations=None, args=None): @@ -173,7 +144,7 @@ def main(configurations=None, args=None): clearml_available, ) - OUTPUT_FILE = os.path.join(OUTPUT_DIR,f"{model_id.replace('/', '_')}.jsonl") + OUTPUT_FILE = os.path.join(RESULTS_DIR,f"{model_id.replace('/', '_')}.jsonl") print(">>> Writing outputs to file...") with open(OUTPUT_FILE, "w") as fout: for idx, (prompt, output) in enumerate(zip(all_prompts, outputs)): From 05b8f0f6ce54d77cee8f9c68fb6fecdf7033141a Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 11:07:05 +0000 Subject: [PATCH 56/96] fix download and read --- .../tasks/scripts/semantic_similarity_generate_script.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 733bb2ab..7b10e5b7 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -17,6 +17,7 @@ clearml_available = False RESULTS_DIR = os.path.join(os.getcwd(), "results") +HUGGINGFACE_DIR = "/home" os.makedirs(RESULTS_DIR, exist_ok=False) def make_alpaca_platypus_prompt(sample): @@ -89,7 +90,7 @@ def semantic_similarity_generate_main( print(">>> Downloading snapshot ...") from huggingface_hub import snapshot_download - snapshot_download(repo_id=model_id, local_dir="/home") + snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) print(">>> trigger...") @@ -97,7 +98,7 @@ def semantic_similarity_generate_main( print(">>> Initializing vLLM...") os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" llm = LLM( - model="/home", + model=HUGGINGFACE_DIR, dtype=semantic_similarity_args.get("dtype", "auto"), trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), From 389a5d84a4e2343e72b129a1d5125687cb6d9e46 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 11:13:50 +0000 Subject: [PATCH 57/96] clean up repo --- .../semantic_similarity_generate_script.py | 1 - .../semantic_similarity_score_script.py | 2 +- src/automation/vllm/__init__.py | 2 +- src/automation/vllm/server.py | 115 ++++++++++-------- 4 files changed, 65 insertions(+), 55 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 7b10e5b7..415de711 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -7,7 +7,6 @@ from vllm import LLM, SamplingParams from transformers import AutoTokenizer -from automation.vllm import start_vllm_server from automation.utils import kill_process_tree, parse_argument try: diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 39aeebc1..1cf8905b 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -12,7 +12,7 @@ clearml_available = False SCORING_DIR = os.path.join(os.getcwd(), "scoresdirectory") -os.makedirs(SCORING_DIR, exist_ok=True) +os.makedirs(SCORING_DIR, exist_ok=False) def semantic_similarity_score_main( reference_file, diff --git a/src/automation/vllm/__init__.py b/src/automation/vllm/__init__.py index 41c14105..0ee78843 100644 --- a/src/automation/vllm/__init__.py +++ b/src/automation/vllm/__init__.py @@ -1 +1 @@ -from automation.vllm.server import start_vllm_server \ No newline at end of file +from automation.vllm.server import VLLMServer \ No newline at end of file diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index fc71a9c1..0e152c92 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -5,69 +5,80 @@ import os import torch from urllib.parse import urlparse -from clearml import Task +from automation.utils import kill_process_tree +from datetime import datetime +import random SERVER_LOG_PREFIX = "vllm_server_log" +class VLLMServer: + def __init__(self, vllm_args, model_id, target, server_wait_time): + self.vllm_args = vllm_args + self.model_id = model_id + self.target = target + self.server_wait_time = server_wait_time + + def start(self): + executable_path = os.path.dirname(sys.executable) + vllm_path = os.path.join(executable_path, "vllm") -def start_vllm_server( - vllm_args, - model_id, - target, - server_wait_time, -): - task = Task.current_task() + num_gpus = torch.cuda.device_count() - executable_path = os.path.dirname(sys.executable) - vllm_path = os.path.join(executable_path, "vllm") + parsed_target = urlparse(self.target) - num_gpus = torch.cuda.device_count() + server_command = [ + f"{vllm_path}", "serve", + self.model_id, + "--host", parsed_target.hostname, + "--port", str(parsed_target.port), + "--tensor-parallel-size", str(num_gpus) + ] - parsed_target = urlparse(target) + subprocess_env = os.environ.copy() - server_command = [ - f"{vllm_path}", "serve", - model_id, - "--host", parsed_target.hostname, - "--port", str(parsed_target.port), - "--tensor-parallel-size", str(num_gpus) - ] - - subprocess_env = os.environ.copy() - - for k, v in vllm_args.items(): - if k.startswith("VLLM_"): - subprocess_env[k] = str(v) - else: - if v == True or v == "True": - server_command.append(f"--{k}") + for k, v in self.vllm_args.items(): + if k.startswith("VLLM_"): + subprocess_env[k] = str(v) else: - server_command.extend([f"--{k}", str(v)]) - - - server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt" - server_log_file = open(server_log_file_name, "w") + if v == True or v == "True": + server_command.append(f"--{k}") + else: + server_command.extend([f"--{k}", str(v)]) + - print(f"Running: {server_command}") + random_integer = random.randint(1, 9999) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - #server_process = subprocess.Popen(server_command, shell=False, env=subprocess_env) - server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) + self.server_log_file_name = f"{SERVER_LOG_PREFIX}_{timestamp}_{random_integer:04d}.txt" + self.server_log_file = open(self.server_log_file_name, "w") + self.server_process = subprocess.Popen(server_command, stdout=self.server_log_file, stderr=self.server_log_file, shell=False, env=subprocess_env) - delay = 5 - server_initialized = False - for _ in range(server_wait_time // delay): - try: - response = requests.get(target + "/models") - if response.status_code == 200: - print("Server initialized") - server_initialized = True - break # Exit the loop if the request is successful - except requests.exceptions.RequestException as e: - pass + delay = 5 + self.server_initialized = False + for _ in range(self.server_wait_time // delay): + try: + response = requests.get(self.target + "/models") + if response.status_code == 200 and response.json().get("data"): + print("Server initialized") + self.server_initialized = True + break # Exit the loop if the request is successful + except requests.exceptions.RequestException as e: + pass - time.sleep(delay) + time.sleep(delay) + + def stop(self): + kill_process_tree(self.server_process.pid) + self.server_log_file.close() + + def is_initialized(self): + return self.server_initialized + + def get_log_file_name(self): + return self.server_log_file_name + + def get_log_file(self): + return self.server_log_file + - if server_initialized: - return server_process, True, server_log_file_name - else: - return server_process, False, server_log_file_name + From 5e8411597e143e17b983b71e9c7e15f1250a67aa Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 11:51:52 +0000 Subject: [PATCH 58/96] clean up scoring and remove hardcoding --- .../scripts/semantic_similarity_score_script.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 1cf8905b..056a644a 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -82,7 +82,6 @@ def main(configurations=None, args=None): trust_remote_code = parse_argument(args["trust_remote_code"], bool) sts_model_id = args.get("sts_model_id", str) rouge_scores= args.get("rouge_scores", list) - #save_directory = parse_argument(args["save_directory"], str) tags = args.get("tags", None) print(args) @@ -93,17 +92,16 @@ def main(configurations=None, args=None): reference_model_task_name = parse_argument(args["reference_model_task_name"], str) reference_task = Task.query_tasks(project_name=reference_model_project_name,task_name= reference_model_task_name, task_filter={'order_by': ['-last_update'], 'status': ['completed'] }) reference_task = Task.get_task(reference_task[0]) - reference_file = reference_task.artifacts['jsonl model'].get_local_copy() + reference_file = reference_task.artifacts['jsonl_output'].get_local_copy() candidate_task = Task.query_tasks(project_name=candidate_model_project_name,task_name= candidate_model_task_name, task_filter={'order_by': ['-last_update'], 'status': ['completed'] }) candidate_task = Task.get_task(candidate_task[0]) - candidate_file = candidate_task.artifacts['jsonl model'].get_local_copy() - # add task query to get jsonl + candidate_file = candidate_task.artifacts['jsonl_output'].get_local_copy() else: - ref_model_json = "Qwen_Qwen3-0.6B.jsonl" - cand_model_json = "RedHatAI_Qwen3-0.6B-quantized.w4a16.jsonl" - reference_file = os.path.join(SCORING_DIR, ref_model_json) - candidate_file = os.path.join(SCORING_DIR, cand_model_json) + ref_model_jsonl = args.get("ref_model_jsonl", str) + cand_model_jsonl = args.get("cand_model_jsonl", str) + reference_file = os.path.join(SCORING_DIR, ref_model_jsonl) + candidate_file = os.path.join(SCORING_DIR, cand_model_jsonl) avg_bert, avg_rouge1, avg_rougeL, avg_sts = semantic_similarity_score_main( reference_file, From 64c5369c57f65b89cef6b0a1046494a651c99130 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 12:30:39 +0000 Subject: [PATCH 59/96] add low score indices --- .../semantic_similarity_score_script.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 056a644a..6c7daff7 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -37,13 +37,13 @@ def semantic_similarity_score_main( # Compute BERTScore _, _, f1_scores = score(candidates, references, lang="en", verbose=False) - all_bert_f1 = [ f1.item() for f1 in f1_scores ] + #all_bert_f1 = [ f1.item() for f1 in f1_scores ] # Evaluate metrics all_rouge1_f1, all_rougeL_f1, all_sts, all_bert_f1 = [], [], [], [] low_score_indices = [] - for i, (ref, cand) in enumerate(zip(references, candidates)): + for i, (ref, cand) in enumerate(zip(references, candidates, f1_scores)): emb_ref = sts_model.encode(ref, convert_to_tensor=True) emb_cand = sts_model.encode(cand, convert_to_tensor=True) raw_sts = util.cos_sim(emb_cand, emb_ref).item() @@ -51,23 +51,23 @@ def semantic_similarity_score_main( all_sts.append(sts) rouge_scores = rouge.score(ref, cand) - rouge1 = rouge_scores["rouge1"].fmeasure rougeL = rouge_scores["rougeL"].fmeasure all_rouge1_f1.append(rouge1) all_rougeL_f1.append(rougeL) - - - # Compute averages - n = len(references) - avg_bert = sum(all_bert_f1) / n - avg_rouge1 = sum(all_rouge1_f1) / n - avg_rougeL = sum(all_rougeL_f1) / n - avg_sts = sum(all_sts) / n - return avg_bert, avg_rouge1, avg_rougeL, avg_sts + all_bert_f1.append(f1.item()) + if f1 < 0.85 or rouge1 < 0.5 or sts < 0.85: + low_score_indices.append(i) + # Compute averages + num_samples = len(references) + avg_bert = sum(all_bert_f1) / num_samples + avg_rouge1 = sum(all_rouge1_f1) / num_samples + avg_rougeL = sum(all_rougeL_f1) / num_samples + avg_sts = sum(all_sts) / num_samples + return avg_bert, avg_rouge1, avg_rougeL, avg_sts, low_score_indices def main(configurations=None, args=None): if clearml_available: @@ -103,7 +103,7 @@ def main(configurations=None, args=None): reference_file = os.path.join(SCORING_DIR, ref_model_jsonl) candidate_file = os.path.join(SCORING_DIR, cand_model_jsonl) - avg_bert, avg_rouge1, avg_rougeL, avg_sts = semantic_similarity_score_main( + avg_bert, avg_rouge1, avg_rougeL, avg_sts, low_score_indices = semantic_similarity_score_main( reference_file, candidate_file, sts_model_id, @@ -114,6 +114,8 @@ def main(configurations=None, args=None): print("BERTScore F1 | ROUGE-1 F1 | ROUGE-L F1 | STS CosSim") print(f"{avg_bert:.3f} | {avg_rouge1:.3f} | {avg_rougeL:.3f} | {avg_sts:.3f}") + print("\n=== Low-score indices (BERT < 0.85, ROUGE-1 < 0.5, STS < 0.85) ===") + print(low_score_indices) data = { "BERTScore F1": f"{avg_bert:.3f}", @@ -122,7 +124,7 @@ def main(configurations=None, args=None): "STS CosSim": f"{avg_sts:.3f}", } - out_filename = f"scores_{ref_model_json.lower()}__vs__{cand_model_json.lower()}.txt" + out_filename = f"scores_{ref_model_jsonl.lower()}__vs__{cand_model_jsonl.lower()}.txt" out_filename = os.path.join(SCORING_DIR,out_filename) # Save results From bed49919710057d69212eab474dbaafbfc73d6f6 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 12:38:10 +0000 Subject: [PATCH 60/96] add f1 score to enum --- .../tasks/scripts/semantic_similarity_score_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 6c7daff7..894dfdd8 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -43,7 +43,7 @@ def semantic_similarity_score_main( all_rouge1_f1, all_rougeL_f1, all_sts, all_bert_f1 = [], [], [], [] low_score_indices = [] - for i, (ref, cand) in enumerate(zip(references, candidates, f1_scores)): + for i, (ref, cand, f1) in enumerate(zip(references, candidates, f1_scores)): emb_ref = sts_model.encode(ref, convert_to_tensor=True) emb_cand = sts_model.encode(cand, convert_to_tensor=True) raw_sts = util.cos_sim(emb_cand, emb_ref).item() From 29b650cd3e4aa1943c3e13706ca6e559fcf808ce Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 12:51:05 +0000 Subject: [PATCH 61/96] simplify output path --- .../tasks/scripts/semantic_similarity_score_script.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 894dfdd8..325baa89 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -124,7 +124,11 @@ def main(configurations=None, args=None): "STS CosSim": f"{avg_sts:.3f}", } - out_filename = f"scores_{ref_model_jsonl.lower()}__vs__{cand_model_jsonl.lower()}.txt" + from pathlib import Path + + reference_file = Path(reference_file).stem.lower() + candidate_file = Path(candidate_file).stem.lower() + out_filename = f"scores_{reference_file}__vs__{candidate_file}.txt" out_filename = os.path.join(SCORING_DIR,out_filename) # Save results From b84a1024b37d662f482f587fc840570d00941b98 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 12:54:34 +0000 Subject: [PATCH 62/96] add examples and clean up --- examples/semantic_similarity_generate.py | 19 +++++++++++++++++++ examples/semantic_similarity_score.py | 17 +++++++++++++++++ .../semantic_similarity_generate_script.py | 6 ++---- 3 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 examples/semantic_similarity_generate.py create mode 100644 examples/semantic_similarity_score.py diff --git a/examples/semantic_similarity_generate.py b/examples/semantic_similarity_generate.py new file mode 100644 index 00000000..34671e36 --- /dev/null +++ b/examples/semantic_similarity_generate.py @@ -0,0 +1,19 @@ +from automation.tasks import SemanticSimilarityGenerateTask + +task = SemanticSimilarityGenerateTask( + project_name="semantic_similarity_debug", + task_name="semantic_generation_qwen3_14b_base", + #task_name="semantic_generation_qwen3_14b_w4a16", + branch="semantic_similarity", + packages = ["huggingface-hub==0.34.3", "triton==3.3.1", "vllm==0.10.1.1"], + dataset_args = {"alpaca": "tatsu-lab/alpaca", "openplatypus": "garage-bAInd/Open-Platypus", "tulu": "allenai/tulu-3-sft-mixture"}, + model_id="Qwen/Qwen3-14B", + #model_id="RedHatAI/Qwen3-14B-quantized.w4a16", + num_samples_per_dataset=330, + #num_samples_per_dataset=10, + max_new_tokens=1024, + max_model_len=4096, + semantic_similarity_args={"enable-chunked-prefill": True, "enforce_eager": True, "dtype" :"auto", "device_map": "auto", "temperature": 0.0}, +) + +task.execute_remotely("oneshot-a100x1") diff --git a/examples/semantic_similarity_score.py b/examples/semantic_similarity_score.py new file mode 100644 index 00000000..c3d77034 --- /dev/null +++ b/examples/semantic_similarity_score.py @@ -0,0 +1,17 @@ +from automation.tasks import SemanticSimilarityScoreTask + +task = SemanticSimilarityScoreTask( + project_name="semantic_similarity_debug", + task_name="semantic_scoring_4b", + branch="semantic_similarity", + packages = ["huggingface-hub==0.34.3", "networkx==3.4.2", "datasets==4.2.0", "rouge_score==0.1.2", "bert-score==0.3.13", "sentence-transformers==5.1.1", "matplotlib"], + reference_model_project_name="semantic_similarity_debug", + candidate_model_project_name="semantic_similarity_debug", + reference_model_task_name="semantic_generation_qwen3_4b_base", + candidate_model_task_name="semantic_generation_qwen3_4b_w4a16", + sts_model_id="all-MiniLM-L6-v2", + rouge_scores=["rouge1", "rougeL"], + scoring_args={}, +) + +task.execute_remotely("oneshot-a100x1") diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 415de711..97ae142c 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -59,6 +59,8 @@ def semantic_similarity_generate_main( clearml_available, ): from collections import defaultdict + from huggingface_hub import snapshot_download + all_prompts = [] all_samples_dict = defaultdict(list) @@ -88,14 +90,10 @@ def semantic_similarity_generate_main( ) print(">>> Downloading snapshot ...") - from huggingface_hub import snapshot_download snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) - print(">>> trigger...") - try: print(">>> Initializing vLLM...") - os.environ["VLLM_LOGGING_LEVEL"]="DEBUG" llm = LLM( model=HUGGINGFACE_DIR, dtype=semantic_similarity_args.get("dtype", "auto"), From c4e1aeabf99181fdeaf2d1beb32d99548906dc51 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 12:58:08 +0000 Subject: [PATCH 63/96] clean up example --- examples/semantic_similarity_generate.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/examples/semantic_similarity_generate.py b/examples/semantic_similarity_generate.py index 34671e36..75681297 100644 --- a/examples/semantic_similarity_generate.py +++ b/examples/semantic_similarity_generate.py @@ -2,15 +2,12 @@ task = SemanticSimilarityGenerateTask( project_name="semantic_similarity_debug", - task_name="semantic_generation_qwen3_14b_base", - #task_name="semantic_generation_qwen3_14b_w4a16", + task_name="semantic_generation_qwen3_14b_w4a16", branch="semantic_similarity", packages = ["huggingface-hub==0.34.3", "triton==3.3.1", "vllm==0.10.1.1"], dataset_args = {"alpaca": "tatsu-lab/alpaca", "openplatypus": "garage-bAInd/Open-Platypus", "tulu": "allenai/tulu-3-sft-mixture"}, - model_id="Qwen/Qwen3-14B", - #model_id="RedHatAI/Qwen3-14B-quantized.w4a16", + model_id="RedHatAI/Qwen3-14B-quantized.w4a16", num_samples_per_dataset=330, - #num_samples_per_dataset=10, max_new_tokens=1024, max_model_len=4096, semantic_similarity_args={"enable-chunked-prefill": True, "enforce_eager": True, "dtype" :"auto", "device_map": "auto", "temperature": 0.0}, From 7cd5a3af04222584ccc2be4f22ea9a1c1d962f56 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 13:09:28 +0000 Subject: [PATCH 64/96] add scoring args dict --- .../tasks/scripts/semantic_similarity_score_script.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 325baa89..5929ec1c 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -80,11 +80,14 @@ def main(configurations=None, args=None): clearml_model = parse_argument(args["clearml_model"], bool) force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) + scoring_args = args.get("scoring_args", dict) sts_model_id = args.get("sts_model_id", str) rouge_scores= args.get("rouge_scores", list) tags = args.get("tags", None) print(args) + print(scoring_args) + if clearml_available: reference_model_project_name = parse_argument(args["reference_model_project_name"], str) candidate_model_project_name = parse_argument(args["candidate_model_project_name"], str) From d5e4210071bcaf382f706c2997df3fa4960375d9 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 23 Oct 2025 13:26:40 +0000 Subject: [PATCH 65/96] add support for variable score limits --- examples/semantic_similarity_score.py | 8 ++++---- .../scripts/semantic_similarity_score_script.py | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/examples/semantic_similarity_score.py b/examples/semantic_similarity_score.py index c3d77034..41f6bbe7 100644 --- a/examples/semantic_similarity_score.py +++ b/examples/semantic_similarity_score.py @@ -2,16 +2,16 @@ task = SemanticSimilarityScoreTask( project_name="semantic_similarity_debug", - task_name="semantic_scoring_4b", + task_name="semantic_scoring_14b", branch="semantic_similarity", packages = ["huggingface-hub==0.34.3", "networkx==3.4.2", "datasets==4.2.0", "rouge_score==0.1.2", "bert-score==0.3.13", "sentence-transformers==5.1.1", "matplotlib"], reference_model_project_name="semantic_similarity_debug", candidate_model_project_name="semantic_similarity_debug", - reference_model_task_name="semantic_generation_qwen3_4b_base", - candidate_model_task_name="semantic_generation_qwen3_4b_w4a16", + reference_model_task_name="semantic_generation_qwen3_14b_base", + candidate_model_task_name="semantic_generation_qwen3_14b_w4a16", sts_model_id="all-MiniLM-L6-v2", rouge_scores=["rouge1", "rougeL"], - scoring_args={}, + scoring_args={"f1": 0.75, "rouge1": 0.5, "sts": 0.75}, ) task.execute_remotely("oneshot-a100x1") diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 5929ec1c..9bc69073 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -19,6 +19,9 @@ def semantic_similarity_score_main( candidate_file, sts_model_id, rouge_scores, + bert_score_limit, + rouge1_score_limit, + sts_score_limit, ): # Load reference and candidate data with open(reference_file, "r") as f_ref, open(candidate_file, "r") as f_cand: @@ -58,7 +61,7 @@ def semantic_similarity_score_main( all_bert_f1.append(f1.item()) - if f1 < 0.85 or rouge1 < 0.5 or sts < 0.85: + if f1 < bert_score_limit or rouge1 < rouge1_score_limit or sts < sts_score_limit: low_score_indices.append(i) # Compute averages @@ -106,18 +109,25 @@ def main(configurations=None, args=None): reference_file = os.path.join(SCORING_DIR, ref_model_jsonl) candidate_file = os.path.join(SCORING_DIR, cand_model_jsonl) + bert_score_limit = scoring_args.get("f1",0.75) + rouge1_score_limit = scoring_args.get("rouge1",0.6) + sts_score_limit = scoring_args.get("sts",0.75) + avg_bert, avg_rouge1, avg_rougeL, avg_sts, low_score_indices = semantic_similarity_score_main( reference_file, candidate_file, sts_model_id, rouge_scores, + bert_score_limit, + rouge1_score_limit, + sts_score_limit, ) # Print summary print("\n=== Averages (for Google Sheets) ===") print("BERTScore F1 | ROUGE-1 F1 | ROUGE-L F1 | STS CosSim") print(f"{avg_bert:.3f} | {avg_rouge1:.3f} | {avg_rougeL:.3f} | {avg_sts:.3f}") - print("\n=== Low-score indices (BERT < 0.85, ROUGE-1 < 0.5, STS < 0.85) ===") + print(f"\n=== Low-score indices (BERT < {bert_score_limit}, ROUGE-1 < {rouge1_score_limit}, STS < {sts_score_limit}) ===") print(low_score_indices) data = { From 9d349e840198dbff5da63dc85815aaeef7ba18c2 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 28 Oct 2025 18:28:08 +0000 Subject: [PATCH 66/96] clearml get model_id --- .../tasks/scripts/semantic_similarity_generate_script.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 97ae142c..8e39e31d 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -89,8 +89,11 @@ def semantic_similarity_generate_main( stop=["### Instruction:", "### Input:", "### Response:"], ) - print(">>> Downloading snapshot ...") - snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) + if clearml_available: + HUGGINGFACE_DIR = Model(model_id).get_local_copy() + else: + print(">>> Downloading snapshot ...") + snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) try: print(">>> Initializing vLLM...") From 98609a20e8c59483568771286f95ea434b224215 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 28 Oct 2025 20:16:24 +0000 Subject: [PATCH 67/96] add clearml model import --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 8e39e31d..03e3eaa0 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -10,7 +10,7 @@ from automation.utils import kill_process_tree, parse_argument try: - from clearml import OutputModel, Task + from clearml import OutputModel, Task, Model clearml_available = True except ImportError: clearml_available = False From 391ecc5bde102578e9c379371aa6b039e30db765 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 29 Oct 2025 13:34:24 +0000 Subject: [PATCH 68/96] check for clearml model --- .../tasks/scripts/semantic_similarity_generate_script.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 03e3eaa0..28ba11b5 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -56,7 +56,7 @@ def semantic_similarity_generate_main( max_model_len, max_new_tokens, num_samples_per_dataset, - clearml_available, + clearml_model, ): from collections import defaultdict from huggingface_hub import snapshot_download @@ -89,7 +89,7 @@ def semantic_similarity_generate_main( stop=["### Instruction:", "### Input:", "### Response:"], ) - if clearml_available: + if clearml_model: HUGGINGFACE_DIR = Model(model_id).get_local_copy() else: print(">>> Downloading snapshot ...") @@ -118,11 +118,12 @@ def main(configurations=None, args=None): if clearml_available: task = Task.current_task() args = task.get_parameters_as_dict(cast=True)["Args"] + clearml_model = parse_argument(args["clearml_model"], bool) else: args = args["Args"] + clearml_model = False # Parse arguments - clearml_model = parse_argument(args["clearml_model"], bool) force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) model_id = parse_argument(args["model_id"], str) @@ -142,7 +143,7 @@ def main(configurations=None, args=None): max_model_len, max_new_tokens, num_samples_per_dataset, - clearml_available, + clearml_model, ) OUTPUT_FILE = os.path.join(RESULTS_DIR,f"{model_id.replace('/', '_')}.jsonl") From 23a5f95706bf92216e8d46a5b6cae890335186e9 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 29 Oct 2025 13:41:41 +0000 Subject: [PATCH 69/96] reference huggingface dir --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 28ba11b5..bc9b8fd1 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -16,7 +16,6 @@ clearml_available = False RESULTS_DIR = os.path.join(os.getcwd(), "results") -HUGGINGFACE_DIR = "/home" os.makedirs(RESULTS_DIR, exist_ok=False) def make_alpaca_platypus_prompt(sample): @@ -89,6 +88,7 @@ def semantic_similarity_generate_main( stop=["### Instruction:", "### Input:", "### Response:"], ) + HUGGINGFACE_DIR = "/home" if clearml_model: HUGGINGFACE_DIR = Model(model_id).get_local_copy() else: From 5feeff79d07fa68bfcc86654834d0eec2adf2f37 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 10:56:40 +0000 Subject: [PATCH 70/96] implement semantic feedback --- src/automation/datasets/utils.py | 28 ++++++++++++ .../semantic_similarity_generate_script.py | 43 +++---------------- .../semantic_similarity_score_script.py | 19 ++++---- .../tasks/semantic_similarity_generate.py | 3 -- .../tasks/semantic_similarity_score.py | 14 +++--- 5 files changed, 51 insertions(+), 56 deletions(-) diff --git a/src/automation/datasets/utils.py b/src/automation/datasets/utils.py index 8085a14e..d62324a6 100644 --- a/src/automation/datasets/utils.py +++ b/src/automation/datasets/utils.py @@ -141,3 +141,31 @@ def preprocess_sample(example): return message_processor(messages, processor) return dataset.map(preprocess_sample, remove_columns=ds.column_names) + +def make_alpaca_platypus_prompt(sample): + instruction = sample["instruction"].strip() + input_text = sample.get("input", "").strip() + prompt = ( + f"### Instruction:\n{instruction}\n\n" + f"### Input:\n{input_text if input_text else 'N/A'}\n\n" + f"### Response:\n" + ) + + return prompt + +def make_tulu_prompt(sample): + msgs = [] + for m in sample["messages"]: + role = m.get("role", "user") + content = m.get("content", "").strip() + msgs.append(f"{role.upper()}: {content}") + joined = "\n".join(msgs) + prompt = f"### Conversation:\n{joined}\n\n### Response:\n" + + return prompt + +def make_default_prompt(sample): + prompt = f"### Input:\n{json.dumps(sample)}\n\n### Response:\n" + + return prompt + diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index bc9b8fd1..108709a3 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -8,6 +8,7 @@ from transformers import AutoTokenizer from automation.utils import kill_process_tree, parse_argument +from automation.datasets.utils import make_alpaca_platypus_prompt, make_tulu_prompt, make_default_prompt try: from clearml import OutputModel, Task, Model @@ -18,35 +19,6 @@ RESULTS_DIR = os.path.join(os.getcwd(), "results") os.makedirs(RESULTS_DIR, exist_ok=False) -def make_alpaca_platypus_prompt(sample): - instruction = sample["instruction"].strip() - input_text = sample.get("input", "").strip() - prompt = ( - f"### Instruction:\n{instruction}\n\n" - f"### Input:\n{input_text if input_text else 'N/A'}\n\n" - f"### Response:\n" - ) - - return prompt - - -def make_tulu_prompt(sample): - msgs = [] - for m in sample["messages"]: - role = m.get("role", "user") - content = m.get("content", "").strip() - msgs.append(f"{role.upper()}: {content}") - joined = "\n".join(msgs) - prompt = f"### Conversation:\n{joined}\n\n### Response:\n" - - return prompt - - -def make_default_prompt(sample): - prompt = f"### Input:\n{json.dumps(sample)}\n\n### Response:\n" - return prompt - - def semantic_similarity_generate_main( model_id, trust_remote_code, @@ -54,7 +26,6 @@ def semantic_similarity_generate_main( semantic_similarity_args, max_model_len, max_new_tokens, - num_samples_per_dataset, clearml_model, ): from collections import defaultdict @@ -64,17 +35,18 @@ def semantic_similarity_generate_main( all_samples_dict = defaultdict(list) print(">>> Loading dataset...") - for dataset_name,dataset_path in dataset_args.items(): + for dataset_path, num_samples_per_dataset in dataset_args.items(): + dataset_name = dataset_path.split("/")[1].lower() print(f">>> Loading dataset {dataset_name}...") - dataset = load_dataset(dataset_path, split=f"train[:{num_samples_per_dataset}]") + dataset = load_dataset(dataset_path, split=f"train[:{int(num_samples_per_dataset)}]") all_samples_dict[dataset_name].extend(dataset) for dataset_name,dataset_samples in all_samples_dict.items(): print(f">>> Loading values for {dataset_name}...") for sample in dataset_samples: - if dataset_name == "alpaca" or (dataset_name == "openplatypus"): + if dataset_name == "alpaca" or (dataset_name == "open-platypus"): prompt = make_alpaca_platypus_prompt(sample) - elif dataset_name == "tulu": + elif dataset_name == "tulu-3-sft-mixture": prompt = make_tulu_prompt(sample) else: print("Using default prompt") @@ -128,13 +100,11 @@ def main(configurations=None, args=None): trust_remote_code = parse_argument(args["trust_remote_code"], bool) model_id = parse_argument(args["model_id"], str) max_model_len = parse_argument(args["max_model_len"], int) - num_samples_per_dataset = parse_argument(args["num_samples_per_dataset"], int) max_new_tokens = parse_argument(args["max_new_tokens"], int) dataset_args = args.get("dataset_args", None) semantic_similarity_args= args.get("semantic_similarity_args", None) tags = args.get("tags", None) - print(semantic_similarity_args) all_prompts, outputs = semantic_similarity_generate_main( model_id, trust_remote_code, @@ -142,7 +112,6 @@ def main(configurations=None, args=None): semantic_similarity_args, max_model_len, max_new_tokens, - num_samples_per_dataset, clearml_model, ) diff --git a/src/automation/tasks/scripts/semantic_similarity_score_script.py b/src/automation/tasks/scripts/semantic_similarity_score_script.py index 9bc69073..1b447104 100644 --- a/src/automation/tasks/scripts/semantic_similarity_score_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_score_script.py @@ -1,7 +1,4 @@ import json -from bert_score import score -from rouge_score import rouge_scorer -from sentence_transformers import SentenceTransformer, util import os from automation.utils import parse_argument @@ -23,6 +20,10 @@ def semantic_similarity_score_main( rouge1_score_limit, sts_score_limit, ): + from bert_score import score + from rouge_score import rouge_scorer + from sentence_transformers import SentenceTransformer, util + # Load reference and candidate data with open(reference_file, "r") as f_ref, open(candidate_file, "r") as f_cand: reference_data = [json.loads(line) for line in f_ref] @@ -83,13 +84,13 @@ def main(configurations=None, args=None): clearml_model = parse_argument(args["clearml_model"], bool) force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) - scoring_args = args.get("scoring_args", dict) + low_score_threshold_args = args.get("low_score_threshold_args", dict) sts_model_id = args.get("sts_model_id", str) rouge_scores= args.get("rouge_scores", list) tags = args.get("tags", None) print(args) - print(scoring_args) + print(low_score_threshold_args) if clearml_available: reference_model_project_name = parse_argument(args["reference_model_project_name"], str) @@ -109,9 +110,9 @@ def main(configurations=None, args=None): reference_file = os.path.join(SCORING_DIR, ref_model_jsonl) candidate_file = os.path.join(SCORING_DIR, cand_model_jsonl) - bert_score_limit = scoring_args.get("f1",0.75) - rouge1_score_limit = scoring_args.get("rouge1",0.6) - sts_score_limit = scoring_args.get("sts",0.75) + bert_score_limit = low_score_threshold_args.get("f1",0.75) + rouge1_score_limit = low_score_threshold_args.get("rouge1",0.6) + sts_score_limit = low_score_threshold_args.get("sts",0.75) avg_bert, avg_rouge1, avg_rougeL, avg_sts, low_score_indices = semantic_similarity_score_main( reference_file, @@ -133,7 +134,7 @@ def main(configurations=None, args=None): data = { "BERTScore F1": f"{avg_bert:.3f}", "ROUGE-1 F1": f"{avg_rouge1:.3f}", - "ROUGE-1 FL": f"{avg_rougeL:.3f}", + "ROUGE-L F1": f"{avg_rougeL:.3f}", "STS CosSim": f"{avg_sts:.3f}", } diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index 38501241..4385a479 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -20,7 +20,6 @@ def __init__( branch: str, max_new_tokens: int, max_model_len: int, - num_samples_per_dataset: Optional[int], dataset_args: Optional[dict]=None, semantic_similarity_args: Optional[dict]=None, docker_image: str=DEFAULT_DOCKER_IMAGE, @@ -72,7 +71,6 @@ def __init__( config_semantic_similarity_args.update(semantic_similarity_args) self.semantic_similarity_args = config_semantic_similarity_args - self.num_samples_per_dataset = config_kwargs.pop("num_samples_per_dataset", num_samples_per_dataset) self.max_new_tokens = config_kwargs.pop("max_new_tokens", max_new_tokens) self.max_model_len = config_kwargs.pop("max_model_len", max_model_len) self.trust_remote_code = config_kwargs.pop("trust_remote_code", trust_remote_code) @@ -110,7 +108,6 @@ def get_arguments(self): "clearml_model": self.clearml_model, "force_download": self.force_download, "save_directory": self.save_directory, - "num_samples_per_dataset": self.num_samples_per_dataset, "max_new_tokens": self.max_new_tokens, "max_model_len": self.max_model_len, "trust_remote_code": self.trust_remote_code, diff --git a/src/automation/tasks/semantic_similarity_score.py b/src/automation/tasks/semantic_similarity_score.py index 8a79e40a..0194b6b9 100644 --- a/src/automation/tasks/semantic_similarity_score.py +++ b/src/automation/tasks/semantic_similarity_score.py @@ -21,7 +21,7 @@ def __init__( sts_model_id: str, branch: str, rouge_scores: Optional[len]=None, - scoring_args: Optional[dict]=None, + low_score_threshold_args: Optional[dict]=None, docker_image: str=DEFAULT_DOCKER_IMAGE, packages: Optional[Sequence[str]]=None, clearml_model: bool=False, @@ -64,12 +64,12 @@ def __init__( config_rouge_scores+= rouge_scores self.rouge_scores = config_rouge_scores - if scoring_args is None: - self.scoring_args = config_kwargs.pop("scoring_args", None) + if low_score_threshold_args is None: + self.low_score_threshold_args = config_kwargs.pop("low_score_threshold_args", None) else: - config_scoring_args = config_kwargs.pop("scoring_args", {}) - config_scoring_args.update(scoring_args) - self.scoring_args = config_scoring_args + config_low_score_threshold_args = config_kwargs.pop("low_score_threshold_args", {}) + config_low_score_threshold_args.update(low_score_threshold_args) + self.low_score_threshold_args = config_low_score_threshold_args self.trust_remote_code = config_kwargs.pop("trust_remote_code", trust_remote_code) @@ -110,7 +110,7 @@ def get_arguments(self): "candidate_model_task_name": self.candidate_model_task_name, "sts_model_id": self.sts_model_id, "rouge_scores": self.rouge_scores, - "scoring_args": self.scoring_args, + "low_score_threshold_args": self.low_score_threshold_args, "clearml_model": self.clearml_model, "force_download": self.force_download, "save_directory": self.save_directory, From 4768cf82301ec35135b5cc2c34d68585216386a2 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 11:04:46 +0000 Subject: [PATCH 71/96] add db path debug --- .../tasks/scripts/semantic_similarity_generate_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 108709a3..b1007110 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -36,6 +36,7 @@ def semantic_similarity_generate_main( print(">>> Loading dataset...") for dataset_path, num_samples_per_dataset in dataset_args.items(): + print(f"The dataset path is: {dataset_path}") dataset_name = dataset_path.split("/")[1].lower() print(f">>> Loading dataset {dataset_name}...") dataset = load_dataset(dataset_path, split=f"train[:{int(num_samples_per_dataset)}]") From 2b3a4f7b0e585d7e92f88c3b854ada085de29731 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 11:11:11 +0000 Subject: [PATCH 72/96] more debug --- .../tasks/scripts/semantic_similarity_generate_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index b1007110..e4725712 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -36,6 +36,7 @@ def semantic_similarity_generate_main( print(">>> Loading dataset...") for dataset_path, num_samples_per_dataset in dataset_args.items(): + print(f"The dataset args: {dataset_args}") print(f"The dataset path is: {dataset_path}") dataset_name = dataset_path.split("/")[1].lower() print(f">>> Loading dataset {dataset_name}...") From 6dcfd74abead3515d20cc05b358f192b0523c5ed Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 11:20:57 +0000 Subject: [PATCH 73/96] debug dataset_args --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index e4725712..75357ca1 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -107,6 +107,8 @@ def main(configurations=None, args=None): semantic_similarity_args= args.get("semantic_similarity_args", None) tags = args.get("tags", None) + print(f"Input dataset_args: {dataset_args}") + all_prompts, outputs = semantic_similarity_generate_main( model_id, trust_remote_code, From 11ba9fc7eb478e1670448d2cfc0890ea44e5b0ce Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 11:34:01 +0000 Subject: [PATCH 74/96] hardcode dataset args --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 75357ca1..f2abb3fd 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -108,6 +108,9 @@ def main(configurations=None, args=None): tags = args.get("tags", None) print(f"Input dataset_args: {dataset_args}") + dataset_args = {"tatsu-lab/alpaca" : 300 , "garage-bAInd/Open-Platypus": "310", "allenai/tulu-3-sft-mixture": 320} + + print(f"Hardcode dataset_args: {dataset_args}") all_prompts, outputs = semantic_similarity_generate_main( model_id, From 0da978ee1b019fb5ee1a19bc7262005ee7ea9bad Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 15:47:18 +0000 Subject: [PATCH 75/96] update examples --- examples/semantic_similarity_generate.py | 9 +++++---- examples/semantic_similarity_score.py | 11 +++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/examples/semantic_similarity_generate.py b/examples/semantic_similarity_generate.py index 75681297..22056365 100644 --- a/examples/semantic_similarity_generate.py +++ b/examples/semantic_similarity_generate.py @@ -2,15 +2,16 @@ task = SemanticSimilarityGenerateTask( project_name="semantic_similarity_debug", - task_name="semantic_generation_qwen3_14b_w4a16", + task_name="semantic_generation_qwen3_14b_w4a16_feedback", + #task_name="semantic_generation_qwen3_14b_feedback", branch="semantic_similarity", packages = ["huggingface-hub==0.34.3", "triton==3.3.1", "vllm==0.10.1.1"], - dataset_args = {"alpaca": "tatsu-lab/alpaca", "openplatypus": "garage-bAInd/Open-Platypus", "tulu": "allenai/tulu-3-sft-mixture"}, - model_id="RedHatAI/Qwen3-14B-quantized.w4a16", - num_samples_per_dataset=330, + dataset_args = {"tatsu-lab/alpaca" : 300 , "garage-bAInd/Open-Platypus": "310", "allenai/tulu-3-sft-mixture": 320}, + model_id="Qwen/Qwen3-14B", max_new_tokens=1024, max_model_len=4096, semantic_similarity_args={"enable-chunked-prefill": True, "enforce_eager": True, "dtype" :"auto", "device_map": "auto", "temperature": 0.0}, ) task.execute_remotely("oneshot-a100x1") + diff --git a/examples/semantic_similarity_score.py b/examples/semantic_similarity_score.py index 41f6bbe7..c00417ce 100644 --- a/examples/semantic_similarity_score.py +++ b/examples/semantic_similarity_score.py @@ -2,16 +2,19 @@ task = SemanticSimilarityScoreTask( project_name="semantic_similarity_debug", - task_name="semantic_scoring_14b", + #task_name="semantic_scoring_14b", + task_name="semantic_scoring_4b", branch="semantic_similarity", packages = ["huggingface-hub==0.34.3", "networkx==3.4.2", "datasets==4.2.0", "rouge_score==0.1.2", "bert-score==0.3.13", "sentence-transformers==5.1.1", "matplotlib"], reference_model_project_name="semantic_similarity_debug", candidate_model_project_name="semantic_similarity_debug", - reference_model_task_name="semantic_generation_qwen3_14b_base", - candidate_model_task_name="semantic_generation_qwen3_14b_w4a16", + reference_model_task_name="semantic_generation_qwen3_14b_feedback", + #reference_model_task_name="semantic_generation_qwen3_14b_base", + candidate_model_task_name="semantic_generation_qwen3_14b_w4a16_feedback", + #candidate_model_task_name="semantic_generation_qwen3_14b_w4a16", sts_model_id="all-MiniLM-L6-v2", rouge_scores=["rouge1", "rougeL"], - scoring_args={"f1": 0.75, "rouge1": 0.5, "sts": 0.75}, + low_score_threshold_args={"f1": 0.79, "rouge1": 0.65, "sts": 0.71}, ) task.execute_remotely("oneshot-a100x1") From 44f0c620c44f4d0c166d12af007aa93b7f901fbf Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 18:08:13 +0000 Subject: [PATCH 76/96] moved from utils --- src/automation/datasets/__init__.py | 10 ++++++- src/automation/datasets/alpaca.py | 10 +++++++ src/automation/datasets/defaults.py | 5 ++++ src/automation/datasets/openplatypus.py | 10 +++++++ src/automation/datasets/tulu.py | 11 ++++++++ src/automation/datasets/utils.py | 28 ------------------- .../semantic_similarity_generate_script.py | 13 +++++---- .../tasks/semantic_similarity_generate.py | 3 ++ 8 files changed, 56 insertions(+), 34 deletions(-) create mode 100644 src/automation/datasets/alpaca.py create mode 100644 src/automation/datasets/defaults.py create mode 100644 src/automation/datasets/openplatypus.py create mode 100644 src/automation/datasets/tulu.py diff --git a/src/automation/datasets/__init__.py b/src/automation/datasets/__init__.py index 629001aa..4f7c8d28 100644 --- a/src/automation/datasets/__init__.py +++ b/src/automation/datasets/__init__.py @@ -4,6 +4,10 @@ from automation.datasets.openthoughts import DATASET_PATH as OPENTHOUGHTSDATASET from automation.datasets.utils import load_llm_messages, load_vlm_messages from automation.datasets.fleurs import load_fleurs_dataset +from automation.datasets.tulu import make_tulu_prompt +from automation.datasets.openplatypus import make_openplatypus_prompt +from automation.datasets.alpaca import make_alpaca_prompt +from automation.datasets.defaults import make_default_prompt SUPPORTED_DATASETS = { "calibration": load_calibration_dataset, @@ -17,6 +21,10 @@ "load_openthoughts_dataset", "load_llm_messages", "load_vlm_messages", + "make_tulu_prompt", + "make_openplatypus_prompt", + "make_alpaca_prompt", + "make_default_prompt", "load_fleurs_dataset", "SUPPORTED_DATASETS", -] \ No newline at end of file +] diff --git a/src/automation/datasets/alpaca.py b/src/automation/datasets/alpaca.py new file mode 100644 index 00000000..8589e1c6 --- /dev/null +++ b/src/automation/datasets/alpaca.py @@ -0,0 +1,10 @@ +def make_alpaca_prompt(sample): + instruction = sample["instruction"].strip() + input_text = sample.get("input", "").strip() + prompt = ( + f"### Instruction:\n{instruction}\n\n" + f"### Input:\n{input_text if input_text else 'N/A'}\n\n" + f"### Response:\n" + ) + + return prompt diff --git a/src/automation/datasets/defaults.py b/src/automation/datasets/defaults.py new file mode 100644 index 00000000..cbcc4572 --- /dev/null +++ b/src/automation/datasets/defaults.py @@ -0,0 +1,5 @@ +def make_default_prompt(sample): + prompt = f"### Input:\n{json.dumps(sample)}\n\n### Response:\n" + + return prompt + diff --git a/src/automation/datasets/openplatypus.py b/src/automation/datasets/openplatypus.py new file mode 100644 index 00000000..8604499d --- /dev/null +++ b/src/automation/datasets/openplatypus.py @@ -0,0 +1,10 @@ +def make_openplatypus_prompt(sample): + instruction = sample["instruction"].strip() + input_text = sample.get("input", "").strip() + prompt = ( + f"### Instruction:\n{instruction}\n\n" + f"### Input:\n{input_text if input_text else 'N/A'}\n\n" + f"### Response:\n" + ) + + return prompt diff --git a/src/automation/datasets/tulu.py b/src/automation/datasets/tulu.py new file mode 100644 index 00000000..82798b10 --- /dev/null +++ b/src/automation/datasets/tulu.py @@ -0,0 +1,11 @@ + +def make_tulu_prompt(sample): + msgs = [] + for m in sample["messages"]: + role = m.get("role", "user") + content = m.get("content", "").strip() + msgs.append(f"{role.upper()}: {content}") + joined = "\n".join(msgs) + prompt = f"### Conversation:\n{joined}\n\n### Response:\n" + + return prompt diff --git a/src/automation/datasets/utils.py b/src/automation/datasets/utils.py index d62324a6..8085a14e 100644 --- a/src/automation/datasets/utils.py +++ b/src/automation/datasets/utils.py @@ -141,31 +141,3 @@ def preprocess_sample(example): return message_processor(messages, processor) return dataset.map(preprocess_sample, remove_columns=ds.column_names) - -def make_alpaca_platypus_prompt(sample): - instruction = sample["instruction"].strip() - input_text = sample.get("input", "").strip() - prompt = ( - f"### Instruction:\n{instruction}\n\n" - f"### Input:\n{input_text if input_text else 'N/A'}\n\n" - f"### Response:\n" - ) - - return prompt - -def make_tulu_prompt(sample): - msgs = [] - for m in sample["messages"]: - role = m.get("role", "user") - content = m.get("content", "").strip() - msgs.append(f"{role.upper()}: {content}") - joined = "\n".join(msgs) - prompt = f"### Conversation:\n{joined}\n\n### Response:\n" - - return prompt - -def make_default_prompt(sample): - prompt = f"### Input:\n{json.dumps(sample)}\n\n### Response:\n" - - return prompt - diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index f2abb3fd..40b99954 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -8,7 +8,10 @@ from transformers import AutoTokenizer from automation.utils import kill_process_tree, parse_argument -from automation.datasets.utils import make_alpaca_platypus_prompt, make_tulu_prompt, make_default_prompt +from automation.datasets.tulu import make_tulu_prompt +from automation.datasets.openplatypus import make_openplatypus_prompt +from automation.datasets.alpaca import make_alpaca_prompt +from automation.datasets.defaults import make_default_prompt try: from clearml import OutputModel, Task, Model @@ -36,8 +39,6 @@ def semantic_similarity_generate_main( print(">>> Loading dataset...") for dataset_path, num_samples_per_dataset in dataset_args.items(): - print(f"The dataset args: {dataset_args}") - print(f"The dataset path is: {dataset_path}") dataset_name = dataset_path.split("/")[1].lower() print(f">>> Loading dataset {dataset_name}...") dataset = load_dataset(dataset_path, split=f"train[:{int(num_samples_per_dataset)}]") @@ -46,8 +47,10 @@ def semantic_similarity_generate_main( for dataset_name,dataset_samples in all_samples_dict.items(): print(f">>> Loading values for {dataset_name}...") for sample in dataset_samples: - if dataset_name == "alpaca" or (dataset_name == "open-platypus"): - prompt = make_alpaca_platypus_prompt(sample) + if dataset_name == "alpaca": + prompt = make_alpaca_prompt(sample) + elif dataset_name == "open-platypus": + prompt = make_openplatypus_prompt(sample) elif dataset_name == "tulu-3-sft-mixture": prompt = make_tulu_prompt(sample) else: diff --git a/src/automation/tasks/semantic_similarity_generate.py b/src/automation/tasks/semantic_similarity_generate.py index 4385a479..87ef543e 100644 --- a/src/automation/tasks/semantic_similarity_generate.py +++ b/src/automation/tasks/semantic_similarity_generate.py @@ -56,6 +56,9 @@ def __init__( task_type=task_type, ) + for key in config_kwargs: + if key in kwargs: + raise ValueError(f"{key} already defined in config's args. It can't be defined again in task instantiation.") if dataset_args is None: self.dataset_args = config_kwargs.pop("dataset_args", None) From 3b09a3f3d85c9d421e52965df57cedfd9b8b1ac6 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 19:53:50 +0000 Subject: [PATCH 77/96] dataset args through parse --- .../tasks/scripts/semantic_similarity_generate_script.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 40b99954..638ea3b6 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -106,14 +106,15 @@ def main(configurations=None, args=None): model_id = parse_argument(args["model_id"], str) max_model_len = parse_argument(args["max_model_len"], int) max_new_tokens = parse_argument(args["max_new_tokens"], int) - dataset_args = args.get("dataset_args", None) + #dataset_args = args.get("dataset_args", None) + dataset_args = parse_argument(args["dataset_args"], dict) semantic_similarity_args= args.get("semantic_similarity_args", None) tags = args.get("tags", None) - print(f"Input dataset_args: {dataset_args}") - dataset_args = {"tatsu-lab/alpaca" : 300 , "garage-bAInd/Open-Platypus": "310", "allenai/tulu-3-sft-mixture": 320} + print(f"Input dataset_args post parse : {dataset_args}") + #dataset_args = {"tatsu-lab/alpaca" : 300 , "garage-bAInd/Open-Platypus": "310", "allenai/tulu-3-sft-mixture": 320} - print(f"Hardcode dataset_args: {dataset_args}") + #print(f"Hardcode dataset_args: {dataset_args}") all_prompts, outputs = semantic_similarity_generate_main( model_id, From 640ae0a668e4aa4cd244ce7a9c63aa9072630fe3 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 20:00:41 +0000 Subject: [PATCH 78/96] add more dataset arg prints --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 638ea3b6..9f8bef53 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -100,6 +100,8 @@ def main(configurations=None, args=None): args = args["Args"] clearml_model = False + dataset_args = args["dataset_args"] + print(f"Input dataset_args pre parse : {dataset_args}") # Parse arguments force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) From ae9d928bd7e122b0c41f5d400748a73e7f480b52 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 31 Oct 2025 20:49:58 +0000 Subject: [PATCH 79/96] add dict flattening --- .../semantic_similarity_generate_script.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 9f8bef53..9193ec83 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -91,6 +91,13 @@ def semantic_similarity_generate_main( return all_prompts, outputs +def flatten_nested_dict(nested_dataset_args): + flattened_dict = {} + for org, datasets in nested_dataset_args.items(): + for dataset, count in datasets.items(): + flattened_dict[f"{org}/{dataset}"] = count + return flattened_dict + def main(configurations=None, args=None): if clearml_available: task = Task.current_task() @@ -100,8 +107,11 @@ def main(configurations=None, args=None): args = args["Args"] clearml_model = False - dataset_args = args["dataset_args"] - print(f"Input dataset_args pre parse : {dataset_args}") + nested_dataset_args = parse_argument(args["dataset_args"], dict) + print(f"Input dataset_args : {nested_dataset_args}") + + dataset_args = flatten_nested_dict(nested_dataset_args) + print(f"Input dataset_args post parse : {dataset_args}") # Parse arguments force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) @@ -109,11 +119,9 @@ def main(configurations=None, args=None): max_model_len = parse_argument(args["max_model_len"], int) max_new_tokens = parse_argument(args["max_new_tokens"], int) #dataset_args = args.get("dataset_args", None) - dataset_args = parse_argument(args["dataset_args"], dict) semantic_similarity_args= args.get("semantic_similarity_args", None) tags = args.get("tags", None) - print(f"Input dataset_args post parse : {dataset_args}") #dataset_args = {"tatsu-lab/alpaca" : 300 , "garage-bAInd/Open-Platypus": "310", "allenai/tulu-3-sft-mixture": 320} #print(f"Hardcode dataset_args: {dataset_args}") From e74e0d68908289f182c17c84a4c91ba4e8462af6 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 3 Nov 2025 09:49:24 +0000 Subject: [PATCH 80/96] added dictionary flattening --- .../semantic_similarity_generate_script.py | 19 ++----------------- src/automation/utils.py | 9 ++++++++- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 9193ec83..2684c5b0 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -7,7 +7,7 @@ from vllm import LLM, SamplingParams from transformers import AutoTokenizer -from automation.utils import kill_process_tree, parse_argument +from automation.utils import kill_process_tree, parse_argument, flatten_nested_dict from automation.datasets.tulu import make_tulu_prompt from automation.datasets.openplatypus import make_openplatypus_prompt from automation.datasets.alpaca import make_alpaca_prompt @@ -91,12 +91,6 @@ def semantic_similarity_generate_main( return all_prompts, outputs -def flatten_nested_dict(nested_dataset_args): - flattened_dict = {} - for org, datasets in nested_dataset_args.items(): - for dataset, count in datasets.items(): - flattened_dict[f"{org}/{dataset}"] = count - return flattened_dict def main(configurations=None, args=None): if clearml_available: @@ -107,25 +101,16 @@ def main(configurations=None, args=None): args = args["Args"] clearml_model = False - nested_dataset_args = parse_argument(args["dataset_args"], dict) - print(f"Input dataset_args : {nested_dataset_args}") - - dataset_args = flatten_nested_dict(nested_dataset_args) - print(f"Input dataset_args post parse : {dataset_args}") # Parse arguments force_download = parse_argument(args["force_download"], bool) trust_remote_code = parse_argument(args["trust_remote_code"], bool) model_id = parse_argument(args["model_id"], str) max_model_len = parse_argument(args["max_model_len"], int) max_new_tokens = parse_argument(args["max_new_tokens"], int) - #dataset_args = args.get("dataset_args", None) + dataset_args = flatten_nested_dict(parse_argument(args["dataset_args"], dict)) semantic_similarity_args= args.get("semantic_similarity_args", None) tags = args.get("tags", None) - #dataset_args = {"tatsu-lab/alpaca" : 300 , "garage-bAInd/Open-Platypus": "310", "allenai/tulu-3-sft-mixture": 320} - - #print(f"Hardcode dataset_args: {dataset_args}") - all_prompts, outputs = semantic_similarity_generate_main( model_id, trust_remote_code, diff --git a/src/automation/utils.py b/src/automation/utils.py index 7b4263f9..c90d3ff9 100644 --- a/src/automation/utils.py +++ b/src/automation/utils.py @@ -177,4 +177,11 @@ def merge_dicts(d1, d2): else: raise ValueError(f"{key} already defined. It can't be defined again.") d1.update(d2) - return d1 \ No newline at end of file + return d1 + +def flatten_nested_dict(nested_dataset_args): + flattened_dict = {} + for org, datasets in nested_dataset_args.items(): + for dataset, count in datasets.items(): + flattened_dict[f"{org}/{dataset}"] = count + return flattened_dict From 0ee0288d29e8cb786012103ba407a7150ea8ea1e Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 11 Nov 2025 20:54:45 +0000 Subject: [PATCH 81/96] update prompt to chat --- src/automation/datasets/alpaca.py | 24 ++++++++++++++++++------ src/automation/datasets/openplatypus.py | 23 +++++++++++++++++------ src/automation/datasets/tulu.py | 6 ++++++ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/automation/datasets/alpaca.py b/src/automation/datasets/alpaca.py index 8589e1c6..7d07be4d 100644 --- a/src/automation/datasets/alpaca.py +++ b/src/automation/datasets/alpaca.py @@ -1,10 +1,22 @@ def make_alpaca_prompt(sample): instruction = sample["instruction"].strip() input_text = sample.get("input", "").strip() - prompt = ( - f"### Instruction:\n{instruction}\n\n" - f"### Input:\n{input_text if input_text else 'N/A'}\n\n" - f"### Response:\n" - ) - return prompt + if input_text == "": + messages = [ + { + "role": "user", + "content": f"{instruction}", + } + ] + + + else: + messages = [ + { + "role": "user", + "content": f"{instruction}\n{input_text}", + } + ] + + return messages diff --git a/src/automation/datasets/openplatypus.py b/src/automation/datasets/openplatypus.py index 8604499d..6b6f2116 100644 --- a/src/automation/datasets/openplatypus.py +++ b/src/automation/datasets/openplatypus.py @@ -1,10 +1,21 @@ def make_openplatypus_prompt(sample): instruction = sample["instruction"].strip() input_text = sample.get("input", "").strip() - prompt = ( - f"### Instruction:\n{instruction}\n\n" - f"### Input:\n{input_text if input_text else 'N/A'}\n\n" - f"### Response:\n" - ) - return prompt + if input_text == "": + messages = [ + { + "role": "user", + "content": f"{instruction}", + } + ] + + else: + messages = [ + { + "role": "user", + "content": f"{instruction}\n{input_text}", + } + ] + + return messages diff --git a/src/automation/datasets/tulu.py b/src/automation/datasets/tulu.py index 82798b10..cc9fc75e 100644 --- a/src/automation/datasets/tulu.py +++ b/src/automation/datasets/tulu.py @@ -1,4 +1,9 @@ +def make_tulu_prompt(sample): + return sample["messages"] + + +""" def make_tulu_prompt(sample): msgs = [] for m in sample["messages"]: @@ -9,3 +14,4 @@ def make_tulu_prompt(sample): prompt = f"### Conversation:\n{joined}\n\n### Response:\n" return prompt +""" From 8c71fac6e4c19cb2aba1559d8ed6719c1b336c18 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 11 Nov 2025 22:38:21 +0000 Subject: [PATCH 82/96] string output prompts --- src/automation/datasets/alpaca.py | 3 ++- src/automation/datasets/defaults.py | 9 ++++++++- src/automation/datasets/openplatypus.py | 3 ++- src/automation/datasets/tulu.py | 9 +-------- .../tasks/scripts/semantic_similarity_generate_script.py | 3 +-- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/automation/datasets/alpaca.py b/src/automation/datasets/alpaca.py index 7d07be4d..67c4ccbf 100644 --- a/src/automation/datasets/alpaca.py +++ b/src/automation/datasets/alpaca.py @@ -19,4 +19,5 @@ def make_alpaca_prompt(sample): } ] - return messages + prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) + return prompt diff --git a/src/automation/datasets/defaults.py b/src/automation/datasets/defaults.py index cbcc4572..36ab8953 100644 --- a/src/automation/datasets/defaults.py +++ b/src/automation/datasets/defaults.py @@ -1,5 +1,12 @@ def make_default_prompt(sample): - prompt = f"### Input:\n{json.dumps(sample)}\n\n### Response:\n" + messages = [ + { + "role": "user", + "content": f"{json.dumps(sample)}", + } + ] + + prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) return prompt diff --git a/src/automation/datasets/openplatypus.py b/src/automation/datasets/openplatypus.py index 6b6f2116..7761b3d9 100644 --- a/src/automation/datasets/openplatypus.py +++ b/src/automation/datasets/openplatypus.py @@ -18,4 +18,5 @@ def make_openplatypus_prompt(sample): } ] - return messages + prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) + return prompt diff --git a/src/automation/datasets/tulu.py b/src/automation/datasets/tulu.py index cc9fc75e..537722db 100644 --- a/src/automation/datasets/tulu.py +++ b/src/automation/datasets/tulu.py @@ -1,17 +1,10 @@ -def make_tulu_prompt(sample): - return sample["messages"] - - -""" def make_tulu_prompt(sample): msgs = [] for m in sample["messages"]: role = m.get("role", "user") content = m.get("content", "").strip() msgs.append(f"{role.upper()}: {content}") - joined = "\n".join(msgs) - prompt = f"### Conversation:\n{joined}\n\n### Response:\n" + prompt = "\n".join(msgs) return prompt -""" diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 2684c5b0..ce0db09a 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -61,8 +61,7 @@ def semantic_similarity_generate_main( print("Define sampling parameters") sampling_params = SamplingParams( temperature=semantic_similarity_args.get("temperature", 0.0), - max_tokens=max_new_tokens, - stop=["### Instruction:", "### Input:", "### Response:"], + max_tokens=max_new_tokens ) HUGGINGFACE_DIR = "/home" From 1ae047a4db19bb1216d39679b84e634f435f0a6e Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 12 Nov 2025 03:20:20 +0000 Subject: [PATCH 83/96] moved from prompt to conversation --- .../tasks/scripts/semantic_similarity_generate_script.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index ce0db09a..5e8c7049 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -84,11 +84,14 @@ def semantic_similarity_generate_main( ) print("Completed the model initialization ") print(">>> Running vLLM generation...") - outputs = llm.generate(all_prompts, sampling_params) + all_conversations = all_prompts.copy() + #outputs = llm.generate(all_prompts, sampling_params) + outputs = llm.chat(messages=all_conversations , sampling_params) except Exception as e: print(f"Error initializing LLM: {e}") - return all_prompts, outputs + return all_conversations, outputs + #return all_prompts, outputs def main(configurations=None, args=None): From e382646537051bcef1340f708fd11d8166f77379 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 12 Nov 2025 04:04:07 +0000 Subject: [PATCH 84/96] retry with tqdm --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 5e8c7049..d51bebe0 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -86,7 +86,7 @@ def semantic_similarity_generate_main( print(">>> Running vLLM generation...") all_conversations = all_prompts.copy() #outputs = llm.generate(all_prompts, sampling_params) - outputs = llm.chat(messages=all_conversations , sampling_params) + outputs = llm.chat(messages=all_conversations , sampling_params=sampling_params, use_tqdm=True) except Exception as e: print(f"Error initializing LLM: {e}") From a3ecdb58cbee38e6069748a812da6e755d02a0db Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 12 Nov 2025 04:22:47 +0000 Subject: [PATCH 85/96] re-add messages list --- src/automation/datasets/alpaca.py | 5 +++-- src/automation/datasets/openplatypus.py | 3 ++- src/automation/datasets/tulu.py | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/automation/datasets/alpaca.py b/src/automation/datasets/alpaca.py index 67c4ccbf..44e13209 100644 --- a/src/automation/datasets/alpaca.py +++ b/src/automation/datasets/alpaca.py @@ -19,5 +19,6 @@ def make_alpaca_prompt(sample): } ] - prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) - return prompt + #prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) + return messages + #return prompt diff --git a/src/automation/datasets/openplatypus.py b/src/automation/datasets/openplatypus.py index 7761b3d9..cfe7f334 100644 --- a/src/automation/datasets/openplatypus.py +++ b/src/automation/datasets/openplatypus.py @@ -19,4 +19,5 @@ def make_openplatypus_prompt(sample): ] prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) - return prompt + return messages + #return prompt diff --git a/src/automation/datasets/tulu.py b/src/automation/datasets/tulu.py index 537722db..ec1d7caf 100644 --- a/src/automation/datasets/tulu.py +++ b/src/automation/datasets/tulu.py @@ -1,4 +1,9 @@ +def make_tulu_prompt(sample): + return sample["messages"] + + +""" def make_tulu_prompt(sample): msgs = [] for m in sample["messages"]: @@ -8,3 +13,4 @@ def make_tulu_prompt(sample): prompt = "\n".join(msgs) return prompt +""" From 9f56b0beaef75b91f6b56c00de8fc4074658db68 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 12 Nov 2025 04:51:58 +0000 Subject: [PATCH 86/96] clean up convos --- src/automation/datasets/alpaca.py | 2 -- src/automation/datasets/openplatypus.py | 2 -- src/automation/datasets/tulu.py | 13 ------------- .../semantic_similarity_generate_script.py | 15 +++++++-------- 4 files changed, 7 insertions(+), 25 deletions(-) diff --git a/src/automation/datasets/alpaca.py b/src/automation/datasets/alpaca.py index 44e13209..7d07be4d 100644 --- a/src/automation/datasets/alpaca.py +++ b/src/automation/datasets/alpaca.py @@ -19,6 +19,4 @@ def make_alpaca_prompt(sample): } ] - #prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) return messages - #return prompt diff --git a/src/automation/datasets/openplatypus.py b/src/automation/datasets/openplatypus.py index cfe7f334..6b6f2116 100644 --- a/src/automation/datasets/openplatypus.py +++ b/src/automation/datasets/openplatypus.py @@ -18,6 +18,4 @@ def make_openplatypus_prompt(sample): } ] - prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) return messages - #return prompt diff --git a/src/automation/datasets/tulu.py b/src/automation/datasets/tulu.py index ec1d7caf..6dcf2ac8 100644 --- a/src/automation/datasets/tulu.py +++ b/src/automation/datasets/tulu.py @@ -1,16 +1,3 @@ def make_tulu_prompt(sample): return sample["messages"] - - -""" -def make_tulu_prompt(sample): - msgs = [] - for m in sample["messages"]: - role = m.get("role", "user") - content = m.get("content", "").strip() - msgs.append(f"{role.upper()}: {content}") - prompt = "\n".join(msgs) - - return prompt -""" diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index d51bebe0..f7475768 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -34,7 +34,7 @@ def semantic_similarity_generate_main( from collections import defaultdict from huggingface_hub import snapshot_download - all_prompts = [] + all_conversations = [] all_samples_dict = defaultdict(list) print(">>> Loading dataset...") @@ -44,7 +44,9 @@ def semantic_similarity_generate_main( dataset = load_dataset(dataset_path, split=f"train[:{int(num_samples_per_dataset)}]") all_samples_dict[dataset_name].extend(dataset) - for dataset_name,dataset_samples in all_samples_dict.items(): + sorted_all_samples_dict = dict(sorted(all_samples_dict.items())) + + for dataset_name,dataset_samples in sorted_all_samples_dict.items(): print(f">>> Loading values for {dataset_name}...") for sample in dataset_samples: if dataset_name == "alpaca": @@ -56,7 +58,7 @@ def semantic_similarity_generate_main( else: print("Using default prompt") prompt = make_default_prompt(sample) - all_prompts.append(prompt) + all_conversations.append(prompt) print("Define sampling parameters") sampling_params = SamplingParams( @@ -84,14 +86,11 @@ def semantic_similarity_generate_main( ) print("Completed the model initialization ") print(">>> Running vLLM generation...") - all_conversations = all_prompts.copy() - #outputs = llm.generate(all_prompts, sampling_params) outputs = llm.chat(messages=all_conversations , sampling_params=sampling_params, use_tqdm=True) except Exception as e: print(f"Error initializing LLM: {e}") return all_conversations, outputs - #return all_prompts, outputs def main(configurations=None, args=None): @@ -113,7 +112,7 @@ def main(configurations=None, args=None): semantic_similarity_args= args.get("semantic_similarity_args", None) tags = args.get("tags", None) - all_prompts, outputs = semantic_similarity_generate_main( + all_conversations, outputs = semantic_similarity_generate_main( model_id, trust_remote_code, dataset_args, @@ -126,7 +125,7 @@ def main(configurations=None, args=None): OUTPUT_FILE = os.path.join(RESULTS_DIR,f"{model_id.replace('/', '_')}.jsonl") print(">>> Writing outputs to file...") with open(OUTPUT_FILE, "w") as fout: - for idx, (prompt, output) in enumerate(zip(all_prompts, outputs)): + for idx, (prompt, output) in enumerate(zip(all_conversations, outputs)): response = output.outputs[0].text.strip() fout.write(json.dumps({ "index": idx, From 57a25960e68525ac0c6abfeedf580aaf26b58465 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 13 Nov 2025 10:21:38 +0000 Subject: [PATCH 87/96] add debug to know which model is being initialised --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index f7475768..cea05784 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -74,7 +74,7 @@ def semantic_similarity_generate_main( snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) try: - print(">>> Initializing vLLM...") + print(f"Initializing vLLM: {model_id}...") llm = LLM( model=HUGGINGFACE_DIR, dtype=semantic_similarity_args.get("dtype", "auto"), From b36b9988a511163bc2f0aa03089afe1712440fcb Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 13 Nov 2025 10:46:12 +0000 Subject: [PATCH 88/96] add mistral exception --- .../tasks/scripts/semantic_similarity_generate_script.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index cea05784..5c654037 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -71,12 +71,13 @@ def semantic_similarity_generate_main( HUGGINGFACE_DIR = Model(model_id).get_local_copy() else: print(">>> Downloading snapshot ...") - snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) + if "mistral" not in model_id : + snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) try: print(f"Initializing vLLM: {model_id}...") llm = LLM( - model=HUGGINGFACE_DIR, + model=model_id if "mistral" in model_id else HUGGINGFACE_DIR, dtype=semantic_similarity_args.get("dtype", "auto"), trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), From 91009b93d40af0ae18326d7be9c97e5807c7f9d6 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 13 Nov 2025 10:52:16 +0000 Subject: [PATCH 89/96] allow existing results dir --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 5c654037..f588c785 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -20,7 +20,7 @@ clearml_available = False RESULTS_DIR = os.path.join(os.getcwd(), "results") -os.makedirs(RESULTS_DIR, exist_ok=False) +os.makedirs(RESULTS_DIR, exist_ok=True) def semantic_similarity_generate_main( model_id, From 3c400ecaa8c997ae113f64ef704c36fe3a3e968c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 13 Nov 2025 10:58:55 +0000 Subject: [PATCH 90/96] snapshot download only --- .../tasks/scripts/semantic_similarity_generate_script.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index f588c785..6f5d1d25 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -70,14 +70,12 @@ def semantic_similarity_generate_main( if clearml_model: HUGGINGFACE_DIR = Model(model_id).get_local_copy() else: - print(">>> Downloading snapshot ...") - if "mistral" not in model_id : - snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) + snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) try: print(f"Initializing vLLM: {model_id}...") llm = LLM( - model=model_id if "mistral" in model_id else HUGGINGFACE_DIR, + model=HUGGINGFACE_DIR, dtype=semantic_similarity_args.get("dtype", "auto"), trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), From e21f100602930e4cb58bab9d13bbf5074eec0365 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 13 Nov 2025 18:07:51 +0000 Subject: [PATCH 91/96] add tokenizer mode for mistral --- .../tasks/scripts/semantic_similarity_generate_script.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 6f5d1d25..34b197b7 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -81,11 +81,12 @@ def semantic_similarity_generate_main( tensor_parallel_size=device_count(), enforce_eager=semantic_similarity_args.get("enforce_eager", True), enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), - max_model_len=max_model_len + max_model_len=max_model_len, + tokenizer_mode="mistral" if "mistral" in model_id.lower() else "auto" ) print("Completed the model initialization ") print(">>> Running vLLM generation...") - outputs = llm.chat(messages=all_conversations , sampling_params=sampling_params, use_tqdm=True) + outputs = llm.chat(messages=all_conversations, sampling_params=sampling_params) except Exception as e: print(f"Error initializing LLM: {e}") From 96e33be7cb8ab21ca2fdf4c289a1d77138e27ca1 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 13 Nov 2025 23:43:34 +0000 Subject: [PATCH 92/96] llm direct from model id --- .../tasks/scripts/semantic_similarity_generate_script.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 34b197b7..f373c0d3 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -70,12 +70,12 @@ def semantic_similarity_generate_main( if clearml_model: HUGGINGFACE_DIR = Model(model_id).get_local_copy() else: - snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) + #snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) try: print(f"Initializing vLLM: {model_id}...") llm = LLM( - model=HUGGINGFACE_DIR, + model= model_id if "mistral" in model_id.lower() else HUGGINGFACE_DIR, dtype=semantic_similarity_args.get("dtype", "auto"), trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), From ff351b9a391d13a121030961432b1203fc861af6 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 13 Nov 2025 23:48:45 +0000 Subject: [PATCH 93/96] add print --- .../tasks/scripts/semantic_similarity_generate_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index f373c0d3..c9fe5585 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -70,6 +70,7 @@ def semantic_similarity_generate_main( if clearml_model: HUGGINGFACE_DIR = Model(model_id).get_local_copy() else: + print("Download snapshot") #snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) try: From 48c0e6d6b53fc11484a56dc31a9b11cde429510b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 14 Nov 2025 00:13:35 +0000 Subject: [PATCH 94/96] fix format --- .../tasks/scripts/semantic_similarity_generate_script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index c9fe5585..7ee316c9 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -83,6 +83,8 @@ def semantic_similarity_generate_main( enforce_eager=semantic_similarity_args.get("enforce_eager", True), enable_chunked_prefill=semantic_similarity_args.get("enable_chunked_prefill", True), max_model_len=max_model_len, + load_format="mistral", + config_format="mistral", tokenizer_mode="mistral" if "mistral" in model_id.lower() else "auto" ) print("Completed the model initialization ") From ce27ec419cda2db988763e875d15d55b9577eae3 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 14 Nov 2025 00:23:26 +0000 Subject: [PATCH 95/96] add list dir --- .../tasks/scripts/semantic_similarity_generate_script.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 7ee316c9..65d23c2c 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -71,12 +71,14 @@ def semantic_similarity_generate_main( HUGGINGFACE_DIR = Model(model_id).get_local_copy() else: print("Download snapshot") - #snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) + snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) + print(os.listdir(HUGGINGFACE_DIR)) try: print(f"Initializing vLLM: {model_id}...") llm = LLM( - model= model_id if "mistral" in model_id.lower() else HUGGINGFACE_DIR, + model= HUGGINGFACE_DIR, + #model= model_id if "mistral" in model_id.lower() else HUGGINGFACE_DIR, dtype=semantic_similarity_args.get("dtype", "auto"), trust_remote_code=trust_remote_code, tensor_parallel_size=device_count(), From aeaca8fdfc3e908b1a872786d8acbe9942a8051c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 14 Nov 2025 00:37:42 +0000 Subject: [PATCH 96/96] add params download --- .../tasks/scripts/semantic_similarity_generate_script.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/automation/tasks/scripts/semantic_similarity_generate_script.py b/src/automation/tasks/scripts/semantic_similarity_generate_script.py index 65d23c2c..be1e2630 100644 --- a/src/automation/tasks/scripts/semantic_similarity_generate_script.py +++ b/src/automation/tasks/scripts/semantic_similarity_generate_script.py @@ -73,6 +73,9 @@ def semantic_similarity_generate_main( print("Download snapshot") snapshot_download(repo_id=model_id, local_dir=HUGGINGFACE_DIR) print(os.listdir(HUGGINGFACE_DIR)) + if "mistral" in model_id.lower(): + from huggingface_hub import hf_hub_download + hf_hub_download(repo_id="mistralai/Mistral-Small-3.1-24B-Instruct-2503", filename="params.json", local_dir=HUGGINGFACE_DIR) try: print(f"Initializing vLLM: {model_id}...")