foundation-model-stack
diff --git a/‎aiu_fms_testing_utils/scripts/drive_paged_programs.py‎
Lines changed: 204 additions & 51 deletions b/‎aiu_fms_testing_utils/scripts/drive_paged_programs.py‎
Lines changed: 204 additions & 51 deletions
diff --git a/‎aiu_fms_testing_utils/scripts/generate_layers_metrics.py‎
Lines changed: 5 additions & 1 deletion b/‎aiu_fms_testing_utils/scripts/generate_layers_metrics.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎aiu_fms_testing_utils/scripts/generate_metrics.py‎
Lines changed: 7 additions & 7 deletions b/‎aiu_fms_testing_utils/scripts/generate_metrics.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎aiu_fms_testing_utils/scripts/inference.py‎
Lines changed: 1 addition & 1 deletion b/‎aiu_fms_testing_utils/scripts/inference.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aiu_fms_testing_utils/scripts/validation.py‎
Lines changed: 1 addition & 1 deletion b/‎aiu_fms_testing_utils/scripts/validation.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aiu_fms_testing_utils/testing/utils.py‎
Lines changed: 24 additions & 0 deletions b/‎aiu_fms_testing_utils/testing/utils.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎aiu_fms_testing_utils/testing/validation.py‎
Lines changed: 28 additions & 17 deletions b/‎aiu_fms_testing_utils/testing/validation.py‎
Lines changed: 28 additions & 17 deletions
diff --git a/‎aiu_fms_testing_utils/utils/__init__.py‎
Lines changed: 64 additions & 5 deletions b/‎aiu_fms_testing_utils/utils/__init__.py‎
Lines changed: 64 additions & 5 deletions
@@ -473,7 +473,11 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
                 cos_sim = tensor_cos_sim(tensor_cpu_out, cuda_output)
 
             prefix = get_default_validation_prefix(
-                model_path, max_new_token, batch_size, seq_length, "float16"
+                model_id=model_path,
+                max_new_tokens=max_new_token,
+                batch_size=batch_size,
+                seq_length=seq_length,
+                dtype="float16",
             )
             layer_name = str(layer_key).replace("[", "").replace("]", "")
 
 
@@ -134,11 +134,11 @@
 
 # this follows the same pattern of naming in test_shapes. This way we can save and re-use for quicker shape testing.
 prefix = get_default_validation_prefix(
-    args.variant,
-    args.max_new_tokens,
-    args.batch_size,
-    args.min_pad_length,
-    args.default_dtype,
+    model_id=args.variant,
+    max_new_tokens=args.max_new_tokens,
+    batch_size=args.batch_size,
+    seq_len=args.min_pad_length,
+    dtype=args.default_dtype,
 )
 if os.path.exists(os.path.join(args.output_dir, f"{prefix}.prob_mean.csv")):
     print("skipping metric generation as it has already been done")
@@ -259,7 +259,7 @@ def write_csv(metrics, path, metric_name):
         ids.to("cuda"),
         args.max_new_tokens,
         None,
-        only_last_token=True,
+        last_n_tokens=1,
         **{k: v.to("cuda") for k, v in padding_kwargs.items()},
     )
     cuda_static_tokens = cuda_validation_info.get_info("tokens")
@@ -334,7 +334,7 @@ def write_csv(metrics, path, metric_name):
             ids.to("cuda"),
             args.max_new_tokens,
             GoldenTokenHook(cpu_validation_info.get_info("tokens"), "cuda"),
-            only_last_token=True,
+            last_n_tokens=1,
             **{k: v.to("cuda") for k, v in padding_kwargs.items()},
         )
 
 
@@ -771,7 +771,7 @@ def infer(use_cache, do_sample, warmup):
     global extra_generation_kwargs
     if extra_generation_kwargs is None:
         extra_generation_kwargs = {}
-    extra_generation_kwargs["only_last_token"] = "paged" not in attn_name
+    extra_generation_kwargs["last_n_tokens"] = 64 if "paged" in attn_name else 1
 
     if not args.no_early_termination and not warmup:
         eos_token_id = tokenizer.eos_token_id
 
@@ -710,7 +710,7 @@ def print_result(result, result_idx: int = 0, file_prefix: str = ""):
     args.max_new_tokens,
     post_iteration_hook,
     eos_token_id=None if args.no_early_termination else tokenizer.eos_token_id,
-    only_last_token=True,
+    last_n_tokens=1,
     timing=args.timing,
     **padding_kwargs,
 )
 
@@ -0,0 +1,24 @@
+from collections.abc import Iterable
+
+
+def format_kwargs_to_string(**kwargs):
+    """
+    Turns kwargs into a str with variable names using `-`, variables separated by `_` and iterable separated by `,`
+    """
+    formatted_pairs = []
+    for key, value in sorted(kwargs.items()):
+        formatted_value = None
+        if isinstance(value, str):
+            formatted_value = value
+        elif isinstance(value, Iterable):
+            formatted_value = ",".join(map(str, value))
+        elif value:
+            formatted_value = str(value)
+        # only append if formatted_value exists
+        if formatted_value:
+            # Keep previous convention of variable names with `-` instead of `_`
+            formatted_pairs.append(
+                f"{key.replace('_', '-')}-{formatted_value.replace('/', '--')}"
+            )
+
+    return "_".join(formatted_pairs)
@@ -5,6 +5,9 @@
 from aiu_fms_testing_utils.utils.aiu_setup import dprint
 from aiu_fms_testing_utils._version import version_tuple
 import os
+from aiu_fms_testing_utils.testing.utils import format_kwargs_to_string
+
+import hashlib
 
 
 class LogitsExtractorHook(
@@ -125,13 +128,7 @@ def __len__(self):
 
 
 def get_default_validation_prefix(
-    model_id: str,
-    max_new_tokens: int,
-    batch_size: int,
-    seq_length: int,
-    dtype: str,
-    attn_type: str,
-    aftu_version: str,
+    **kwargs,
 ):
     """
     Args:
@@ -144,9 +141,17 @@ def get_default_validation_prefix(
         aftu_version (str): introduced in v0.3.0 to track changed in log
 
     Returns:
-        str: A prefix that will be prepended to the file name
+        str: A hashed prefix that will be prepended to the file name
     """
-    return f"{model_id.replace('/', '--')}_max-new-tokens-{max_new_tokens}_batch-size-{batch_size}_seq-length-{seq_length}_dtype-{dtype}_attn-type-{attn_type}.{aftu_version}"
+    aftu_version = kwargs.pop(
+        "aftu_version", ".".join([str(_) for _ in version_tuple[:3]])
+    )
+    kwargs_str = format_kwargs_to_string(**kwargs)
+
+    filename = f"{kwargs_str}"
+    hash_object = hashlib.sha256(filename.encode("utf-8"))
+    hex_digest = hash_object.hexdigest()
+    return f"{hex_digest}_{aftu_version}"
 
 
 def load_validation_information(
@@ -256,7 +261,7 @@ def extract_validation_information(
     post_iteration_hook,
     attn_algorithm=None,
     eos_token_id=None,
-    only_last_token=False,
+    last_n_tokens=0,
     timing="",
     **extra_kwargs,
 ):
@@ -270,10 +275,10 @@ def extract_validation_information(
         attention_specific_kwargs["contiguous_cache"] = True
         attention_specific_kwargs["max_seq_len"] = input_ids.shape[1] + max_new_tokens
 
-    # Add only_last_token optimization
+    # Add last_n_tokens optimization
     extra_generation_kwargs = {**extra_kwargs}
-    if only_last_token:
-        extra_generation_kwargs["only_last_token"] = only_last_token
+    if last_n_tokens != 0:
+        extra_generation_kwargs["last_n_tokens"] = last_n_tokens
     if attn_algorithm is not None:
         extra_generation_kwargs["attn_algorithm"] = attn_algorithm
 
@@ -416,26 +421,29 @@ def get_validation_info_path(
     aftu_version: Optional[Tuple[int, int, int]] = None,
     device_type: str = "cpu",
     dtype: str = "fp16",
+    **kwargs,
 ):
     if aftu_version is None:
         aftu_version = version_tuple
 
-    validation_file_name = f"{get_default_validation_prefix(model_variant, max_new_tokens, batch_size, seq_length, dtype, attn_type, '.'.join([str(_) for _ in aftu_version[:3]]))}.{device_type}_validation_info.{seed}.out"
+    sample_key = kwargs.get("sample_key", None)
+
+    validation_file_name = f"{get_default_validation_prefix(aftu_version='.'.join([str(_) for _ in aftu_version[:3]]), model_id=model_variant, max_new_tokens=max_new_tokens, batch_size=batch_size, seq_length=seq_length, dtype=dtype, attn_type=attn_type, sample_key=sample_key)}.{device_type}_validation_info.{seed}.out"
     full_path = os.path.join(validation_info_dir, validation_file_name)
     return full_path
 
 
-def __decrement_version(version: Tuple[int, int, int]):
+def __decrement_version(version: Tuple[int, int, int], max_minor=25, max_patch=25):
     """
     Function designed to prevent triple nested for loop while decrementing version
     """
     major, minor, patch = version
     if patch > 0:
         return (major, minor, patch - 1)
     elif minor > 0:
-        return (major, minor - 1, 0)
+        return (major, minor - 1, max_patch)
     elif major > 0:
-        return (major - 1, 0, 0)
+        return (major - 1, max_minor, max_patch)
     else:
         return None
 
@@ -452,10 +460,12 @@ def find_validation_info_path(
     version_allow_decrement: bool = False,
     device_type: str = "cpu",
     dtype: str = "fp16",
+    **kwargs,
 ):
     """
     Find the validation info path if it exists, otherwise return None
     """
+    sample_key = kwargs.get("sample_key", None)
 
     if aftu_version is None:
         loc_version_tuple = version_tuple[:3]
@@ -476,6 +486,7 @@ def find_validation_info_path(
             loc_version_tuple,
             device_type,
             dtype,
+            sample_key=sample_key,
         )
         # if the path is found, we are done searching and can return
         if os.path.exists(full_path):
 
@@ -11,6 +11,7 @@
 
 from aiu_fms_testing_utils.utils.aiu_setup import dprint, rank, world_size
 from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+from aiu_fms_testing_utils.testing.utils import format_kwargs_to_string
 
 from fms.utils.generation import pad_input_ids
 import torch
@@ -85,7 +86,7 @@ def warmup_model(
                 **extra_kwargs,
             )
 
-    extra_kwargs = {**_extra_kwargs, "only_last_token": "paged" not in attn_name}
+    extra_kwargs = {**_extra_kwargs, "last_n_tokens": 64 if "paged" in attn_name else 1}
 
     with stagger_region(stagger_update_lazyhandle):
         with torch_sendnn.warmup_mode():
@@ -421,8 +422,11 @@ def __sample_requests(
                     prompt_token_ids = tokenizer.encode(
                         prompt, add_special_tokens=False
                     )
+                    # If we don't set clean_up_tokenization_spaces=False, encoding then decoding text might result in different lengths which would break expected results from the sampler
                     truncated_prompt = tokenizer.decode(
-                        prompt_token_ids[:truncate_to_size], skip_special_tokens=True
+                        prompt_token_ids[:truncate_to_size],
+                        skip_special_tokens=True,
+                        clean_up_tokenization_spaces=False,
                     )
                     enforced_dataset.append((truncated_prompt, truncate_to_size))
                     enforce_sizes_with_truncation.remove(truncation_found)
@@ -479,6 +483,7 @@ def sample_rag_factoid_requests(
     enforce_sizes: List[int] = [],
     truncation: bool = False,
     pad_multiple: int = 64,
+    return_key: bool = False,
 ) -> List[Tuple[str, int]]:
     if not os.path.exists(dataset_path):
         print("error dataset does not exist")
@@ -489,7 +494,7 @@ def sample_rag_factoid_requests(
         for line in f:
             dataset.append(line)
 
-    return __sample_requests(
+    sample_request = __sample_requests(
         dataset,
         num_requests,
         tokenizer,
@@ -503,6 +508,24 @@ def sample_rag_factoid_requests(
         _cached_dataset_key=dataset_path,
     )
 
+    if return_key:
+        sample_key: str = format_kwargs_to_string(
+            dataset="rag_factoid",
+            num_requests=num_requests,
+            tokenizer=tokenizer.name_or_path.replace("/", "--"),
+            prompt_length_min=prompt_length_min,
+            prompt_length_max=prompt_length_max,
+            seed=seed,
+            enforce_heterogeneous=enforce_heterogeneous,
+            enforce_sizes=enforce_sizes,
+            truncate=truncation,
+            pad_multiple=pad_multiple,
+        )
+
+        return sample_request, sample_key
+    else:
+        return sample_request
+
 
 def sample_sharegpt_requests(
     dataset_path: str,
@@ -515,6 +538,7 @@ def sample_sharegpt_requests(
     enforce_sizes: List[int] | None = None,
     truncation: bool = False,
     pad_multiple: int = 64,
+    return_key: bool = False,
 ) -> List[Tuple[str, int]]:
     if not os.path.exists(dataset_path):
         print("downloading share-gpt dataset as it does not exist")
@@ -540,7 +564,7 @@ def sample_sharegpt_requests(
     dataset = [data for data in dataset if len(data["conversations"]) >= 2]
     dataset: List[str] = [data["conversations"][0]["value"] for data in dataset]
 
-    return __sample_requests(
+    sample_request = __sample_requests(
         dataset,
         num_requests,
         tokenizer,
@@ -554,6 +578,23 @@ def sample_sharegpt_requests(
         _cached_dataset_key=dataset_path,
     )
 
+    if return_key:
+        sample_key: str = format_kwargs_to_string(
+            dataset="sharegpt",
+            num_requests=num_requests,
+            tokenizer=tokenizer.name_or_path.replace("/", "--"),
+            prompt_length_min=prompt_length_min,
+            prompt_length_max=prompt_length_max,
+            seed=seed,
+            enforce_heterogeneous=enforce_heterogeneous,
+            enforce_sizes=enforce_sizes,
+            truncate=truncation,
+            pad_multiple=pad_multiple,
+        )
+        return sample_request, sample_key
+    else:
+        return sample_request
+
 
 def sample_squad_v2_qa_requests(
     dataset_path: str,
@@ -566,6 +607,7 @@ def sample_squad_v2_qa_requests(
     enforce_sizes: List[int] | None = None,
     truncation: bool = False,
     pad_multiple: int = 64,
+    return_key: bool = False,
 ) -> List[Tuple[str, int]]:
     from datasets import load_dataset
 
@@ -579,7 +621,7 @@ def sample_squad_v2_qa_requests(
 
     ds = [f"{data['context']}\n{data['question']}" for data in ds]
 
-    return __sample_requests(
+    sample_request = __sample_requests(
         ds,
         num_requests,
         tokenizer,
@@ -592,6 +634,23 @@ def sample_squad_v2_qa_requests(
         pad_multiple,
     )
 
+    if return_key:
+        sample_key: str = format_kwargs_to_string(
+            dataset="squad_v2",
+            num_requests=num_requests,
+            tokenizer=tokenizer.name_or_path.replace("/", "--"),
+            prompt_length_min=prompt_length_min,
+            prompt_length_max=prompt_length_max,
+            seed=seed,
+            enforce_heterogeneous=enforce_heterogeneous,
+            enforce_sizes=enforce_sizes,
+            truncate=truncation,
+            pad_multiple=pad_multiple,
+        )
+        return sample_request, sample_key
+    else:
+        return sample_request
+
 
 def prepare_inputs(
     batch_size, seq_length, tokenizer, ds_path, seed=0, ds_type="sharegpt"
Original file line number	Diff line number	Diff line change
`@@ -710,7 +710,7 @@ def print_result(result, result_idx: int = 0, file_prefix: str = ""):`
`710`	`710`	`args.max_new_tokens,`
`711`	`711`	`post_iteration_hook,`
`712`	`712`	`eos_token_id=None if args.no_early_termination else tokenizer.eos_token_id,`
`713`		`- only_last_token=True,`
	`713`	`+ last_n_tokens=1,`
`714`	`714`	`timing=args.timing,`
`715`	`715`	`**padding_kwargs,`
`716`	`716`	`)`