From 7b55ce5c56e8014f695f918fa409d1f77c1bfd66 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 25 Sep 2025 15:21:20 +0200 Subject: [PATCH 01/59] first commit --- .../extractive_question_answering/bert_large/run_mlperf.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 57130f6c..25146a4d 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -34,6 +34,8 @@ def parse_args(): type=str, default="tf", choices=["tf", "pytorch"], help="specify the framework in which a model should be run") + parser.add_argument("--fixed-input", action='store_true', + help="truncate input to fixed shape") parser.add_argument("--timeout", type=float, default=60.0, help="timeout in seconds") @@ -102,6 +104,9 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl def run_single_pass(pytorch_runner, squad): input_tensor = squad.get_input_arrays() + print(input_tensor) + print(type(input_tensor)) + quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): @@ -117,7 +122,7 @@ def run_single_pass(pytorch_runner, squad): padding=True, truncation=True, model_max_length=512) def tokenize(question, text): - return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") + return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt") def detokenize(answer): return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer)) From 1ede310c3d1c83b3120ddfffb693a1b8f97a0cce Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 25 Sep 2025 16:55:09 +0200 Subject: [PATCH 02/59] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 25146a4d..4e9aefb2 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -106,6 +106,10 @@ def run_single_pass(pytorch_runner, squad): input_tensor = squad.get_input_arrays() print(input_tensor) print(type(input_tensor)) + print(input_tensor["input_ids"].size()[1]) + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) From a31e5e1aaf445f98fa3eee83ba70e88c837c0af4 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 25 Sep 2025 16:57:56 +0200 Subject: [PATCH 03/59] wip --- .../bert_large/run_mlperf.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 4e9aefb2..bcaed825 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -103,13 +103,14 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl from utils.pytorch import PyTorchRunner def run_single_pass(pytorch_runner, squad): - input_tensor = squad.get_input_arrays() - print(input_tensor) - print(type(input_tensor)) - print(input_tensor["input_ids"].size()[1]) - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) + for _ in range(10): + input_tensor = squad.get_input_arrays() + print(type(input_tensor)) + print(input_tensor["input_ids"].size()[1]) + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) + print("-------") quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) From 6d5003ddffe484d079ca64d6e5a176f5641c8785 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 10:44:18 +0200 Subject: [PATCH 04/59] wip --- .../bert_large/run_mlperf.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index bcaed825..a4b2553d 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -34,8 +34,6 @@ def parse_args(): type=str, default="tf", choices=["tf", "pytorch"], help="specify the framework in which a model should be run") - parser.add_argument("--fixed-input", action='store_true', - help="truncate input to fixed shape") parser.add_argument("--timeout", type=float, default=60.0, help="timeout in seconds") @@ -45,6 +43,8 @@ def parse_args(): parser.add_argument("--squad_path", type=str, help="path to directory with ImageNet validation images") + parser.add_argument("--fixed_input", action='store_true', + help="truncate input to fixed shape") parser.add_argument("--disable_jit_freeze", action='store_true', help="if true model will be run not in jit freeze mode") return parser.parse_args() @@ -95,7 +95,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False, fixed_input=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering @@ -127,7 +127,10 @@ def run_single_pass(pytorch_runner, squad): padding=True, truncation=True, model_max_length=512) def tokenize(question, text): - return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt") + if fixed_input: + return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt") + else: + return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") def detokenize(answer): return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer)) @@ -209,8 +212,8 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input) def main(): From 776e70d252c0d04c5aebc2df4885e553b163e177 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 10:54:29 +0200 Subject: [PATCH 05/59] wip --- .../bert_large/run_mlperf.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index a4b2553d..3e0a5a10 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -103,15 +103,13 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl from utils.pytorch import PyTorchRunner def run_single_pass(pytorch_runner, squad): - for _ in range(10): - input_tensor = squad.get_input_arrays() - print(type(input_tensor)) - print(input_tensor["input_ids"].size()[1]) - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) - print("-------") - quit() + input_tensor = squad.get_input_arrays() + print(type(input_tensor)) + print(input_tensor["input_ids"].size()[1]) + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) + print("-------") output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): From b3c4a0c3a0606ce1f13f886212c13d0b6268a368 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 10:59:27 +0200 Subject: [PATCH 06/59] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 3e0a5a10..de874c5a 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -104,12 +104,6 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl def run_single_pass(pytorch_runner, squad): input_tensor = squad.get_input_arrays() - print(type(input_tensor)) - print(input_tensor["input_ids"].size()[1]) - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) - print("-------") output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): From f8c8b06cb7792c0802e72c03ed02882e84df8d4c Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 13:16:10 +0200 Subject: [PATCH 07/59] wip --- computer_vision/object_detection/yolo_v5/run.py | 2 +- computer_vision/object_detection/yolo_v8/run.py | 2 +- .../extractive_question_answering/bert_large/run_mlperf.py | 2 +- recommendation/dlrm/run.py | 2 +- tests/test_pytorch_models.py | 3 ++- utils/cv/pre_processing.py | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/computer_vision/object_detection/yolo_v5/run.py b/computer_vision/object_detection/yolo_v5/run.py index 945727fd..dd8d1828 100644 --- a/computer_vision/object_detection/yolo_v5/run.py +++ b/computer_vision/object_detection/yolo_v5/run.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC try: from utils import misc # noqa except ModuleNotFoundError: diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py index 7df1d629..4dc6c67a 100644 --- a/computer_vision/object_detection/yolo_v8/run.py +++ b/computer_vision/object_detection/yolo_v8/run.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC try: from utils import misc # noqa except ModuleNotFoundError: diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index de874c5a..a5605993 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC try: from utils import misc # noqa except ModuleNotFoundError: diff --git a/recommendation/dlrm/run.py b/recommendation/dlrm/run.py index 97ce3a19..5997e085 100644 --- a/recommendation/dlrm/run.py +++ b/recommendation/dlrm/run.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC try: from utils import misc # noqa except ModuleNotFoundError: diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 60b99472..43c546ab 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -222,7 +222,8 @@ def wrapper(**kwargs): exact_match_ref, f1_ref = 0.750, 0.817 acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path, - "batch_size": 1, "num_runs": 24, "timeout": None, "disable_jit_freeze": False}) + "batch_size": 1, "num_runs": 24, "timeout": None, + "disable_jit_freeze": False, "fixed_input": False}) self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) self.assertTrue(acc["f1"] / f1_ref > 0.95) diff --git a/utils/cv/pre_processing.py b/utils/cv/pre_processing.py index 7d452069..ae17a4b1 100644 --- a/utils/cv/pre_processing.py +++ b/utils/cv/pre_processing.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC import numpy as np import utils.misc as utils From df07b65ab3eca70e8bd4151245be20f2521f8815 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 14:13:20 +0200 Subject: [PATCH 08/59] wip --- requirements.txt | 1 + tests/test_pytorch_models.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 25e13945..a0666590 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,3 +36,4 @@ open-clip-torch<2.26.1 diffusers accelerate boto3==1.29.0; python_version>='3.12' +torchcodec \ No newline at end of file diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 43c546ab..403dc211 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC import os import signal import time From 6626f910b61e42933817c3880af3b513c070dd15 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 16:00:32 +0200 Subject: [PATCH 09/59] wip --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 8580f840..42a38322 100644 --- a/LICENSE +++ b/LICENSE @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright (c) 2024, Ampere Computing LLC + Copyright (c) 2025, Ampere Computing LLC Copyright (c) 2022 Andrej Karpathy Copyright (c) 2022 OpenAI Copyright (c) 2022 Stability AI From 01cb4def14450343736d491a87e50fe81378bc8c Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 30 Sep 2025 14:47:05 +0200 Subject: [PATCH 10/59] wip --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a0666590..9cc6c4c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,5 +35,4 @@ kornia open-clip-torch<2.26.1 diffusers accelerate -boto3==1.29.0; python_version>='3.12' -torchcodec \ No newline at end of file +boto3==1.29.0; python_version>='3.12' \ No newline at end of file From bc405d651b50e01246a9e967da191c0e9d588a73 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 30 Sep 2025 16:15:49 +0200 Subject: [PATCH 11/59] wip --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 9cc6c4c6..f8921397 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,6 +17,7 @@ tiktoken ultralytics evaluate datasets +datasets[audio] soundfile librosa numba From ddf25ed597b25c5511e764ec6ffe8a11b8933df7 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 1 Oct 2025 10:17:02 +0200 Subject: [PATCH 12/59] wip --- computer_vision/object_detection/yolo_v8/run.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py index 4dc6c67a..bc35d293 100644 --- a/computer_vision/object_detection/yolo_v8/run.py +++ b/computer_vision/object_detection/yolo_v8/run.py @@ -61,7 +61,7 @@ def run_ort_fp32(model_path, batch_size, num_runs, timeout, images_path, anno_pa # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user # to set it to True if needed from utils.ort import OrtRunner - from ultralytics.yolo.utils import ops + from ultralytics.utils import nms def run_single_pass(ort_runner, coco): shape = (640, 640) @@ -69,7 +69,7 @@ def run_single_pass(ort_runner, coco): output = ort_runner.run(batch_size) output = torch.from_numpy(output[0]) - output = ops.non_max_suppression(output) + output = nms.non_max_suppression(output) for i in range(batch_size): for d in range(output[i].shape[0]): @@ -97,11 +97,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_ # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user # to set it to True if needed from utils.pytorch import PyTorchRunner - from ultralytics.yolo.utils import ops + from ultralytics.utils import nms def run_single_pass(pytorch_runner, coco): output = pytorch_runner.run(batch_size, coco.get_input_array((640, 640))) - output = ops.non_max_suppression(output) + output = nms.non_max_suppression(output) for i in range(batch_size): for d in range(output[i].shape[0]): @@ -122,6 +122,7 @@ def run_single_pass(pytorch_runner, coco): runner = PyTorchRunner(torch.jit.load(torchscript_model), disable_jit_freeze=disable_jit_freeze, example_inputs=torch.stack(dataset.get_input_array((640, 640)))) + #example_inputs=torch.stack((dataset.get_input_array((640, 640)),))) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From c4e81b0a7ff1a9e714e9aa844686a09ee43b325e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 1 Oct 2025 10:18:35 +0200 Subject: [PATCH 13/59] wip --- computer_vision/object_detection/yolo_v8/run.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py index bc35d293..bbd51c24 100644 --- a/computer_vision/object_detection/yolo_v8/run.py +++ b/computer_vision/object_detection/yolo_v8/run.py @@ -121,8 +121,7 @@ def run_single_pass(pytorch_runner, coco): runner = PyTorchRunner(torch.jit.load(torchscript_model), disable_jit_freeze=disable_jit_freeze, - example_inputs=torch.stack(dataset.get_input_array((640, 640)))) - #example_inputs=torch.stack((dataset.get_input_array((640, 640)),))) + example_inputs=torch.stack((dataset.get_input_array((640, 640)),))) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From c7764d4f6e724dc692901d660abfe37a306e2774 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 2 Oct 2025 13:00:31 +0200 Subject: [PATCH 14/59] wip --- .github/workflows/test.yml | 2 ++ tests/test_pytorch_models.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2921a548..0a3cf245 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,6 +84,8 @@ jobs: wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null + apt-get update && apt-get install ffmpeg + wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 403dc211..4ed6d07e 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -368,6 +368,8 @@ def setUp(self): def test_yolo_v8_s(self): from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 + from utils.benchmark import set_global_intra_op_parallelism_threads + set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS"))) def wrapper(**kwargs): kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) From 146e3b039d7473834b5f4fbf806ca53bd24d068f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 2 Oct 2025 13:27:29 +0200 Subject: [PATCH 15/59] wip --- tests/test_pytorch_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 4ed6d07e..8d27c83c 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -369,7 +369,8 @@ def setUp(self): def test_yolo_v8_s(self): from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 from utils.benchmark import set_global_intra_op_parallelism_threads - set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS"))) + #set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS"))) + set_global_intra_op_parallelism_threads(32) def wrapper(**kwargs): kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) From 83d284e1417fa3ae137d8a135a666aafc8ac2957 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 2 Oct 2025 15:56:07 +0200 Subject: [PATCH 16/59] wip --- .github/workflows/test.yml | 28 ++++++++++++++-------------- tests/test_pytorch_models.py | 1 - 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0a3cf245..8c54a461 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,7 +84,7 @@ jobs: wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null - apt-get update && apt-get install ffmpeg + apt-get update && apt-get install -y ffmpeg wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 @@ -151,24 +151,24 @@ jobs: tar -xf aio_objdet_dataset.tar.gz > /dev/null wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60 python3 speech_recognition/whisper/run.py -m small.en wget $S3_URL_SSD_INCEPTION_V2_TF_FP32 > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60 wget https://zenodo.org/records/4735647/files/resnet50_v1.onnx > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort wget https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.tar.gz > /dev/null 2>&1 tar -xf vgg16.tar.gz > /dev/null - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort test_pytorch_arm64_sh: if: false @@ -259,21 +259,21 @@ jobs: tar -xf aio_objdet_dataset.tar.gz > /dev/null wget https://github.com/tloen/alpaca-lora/raw/main/alpaca_data.json > /dev/null 2>&1 - AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/text_generation/llama2/run.py -m meta-llama/Llama-2-7b-chat-hf --dataset_path=alpaca_data.json + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/text_generation/llama2/run.py -m meta-llama/Llama-2-7b-chat-hf --dataset_path=alpaca_data.json - AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 recommendation/dlrm_torchbench/run.py -p fp32 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 recommendation/dlrm_torchbench/run.py -p fp32 - IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/classification/resnet_50_v15/run.py -m resnet50 -p fp32 -b 16 -f pytorch + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/classification/resnet_50_v15/run.py -m resnet50 -p fp32 -b 16 -f pytorch - AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 speech_recognition/whisper/run.py -m tiny.en + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 speech_recognition/whisper/run.py -m tiny.en - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8l.pt -p fp32 -f pytorch + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8l.pt -p fp32 -f pytorch wget -O bert_large_mlperf.pt https://zenodo.org/records/3733896/files/model.pytorch?download=1 > /dev/null 2>&1 - AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py -m bert_large_mlperf.pt -p fp32 -f pytorch + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py -m bert_large_mlperf.pt -p fp32 -f pytorch test_tensorflow_arm64: runs-on: self-hosted diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 8d27c83c..916925f2 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -369,7 +369,6 @@ def setUp(self): def test_yolo_v8_s(self): from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 from utils.benchmark import set_global_intra_op_parallelism_threads - #set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS"))) set_global_intra_op_parallelism_threads(32) def wrapper(**kwargs): From 93ed7b4242a2c9b930c94c0b1909fb6d1a34b473 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 3 Oct 2025 10:36:15 +0200 Subject: [PATCH 17/59] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index a5605993..8a9c081a 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -45,6 +45,8 @@ def parse_args(): help="path to directory with ImageNet validation images") parser.add_argument("--fixed_input", action='store_true', help="truncate input to fixed shape") + parser.add_argument("--input_size", type=int, default=384, + help='size of the input') parser.add_argument("--disable_jit_freeze", action='store_true', help="if true model will be run not in jit freeze mode") return parser.parse_args() @@ -104,6 +106,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl def run_single_pass(pytorch_runner, squad): input_tensor = squad.get_input_arrays() + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) + + quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): From 07a1a34343afdfdc56429d7b3f13a3dbb58b1407 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 3 Oct 2025 11:23:39 +0200 Subject: [PATCH 18/59] wip --- .../bert_large/run_mlperf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 8a9c081a..e6f3af16 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False, fixed_input=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering @@ -127,7 +127,8 @@ def run_single_pass(pytorch_runner, squad): def tokenize(question, text): if fixed_input: - return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt") + return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size, + return_tensors="pt") else: return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") @@ -211,8 +212,8 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size) def main(): From ccddf0a437422f7bf633965593dfb6c80205241e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:30:38 +0200 Subject: [PATCH 19/59] wip --- .../bert_large/run_mlperf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index e6f3af16..39b83ef0 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -105,10 +105,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_ from utils.pytorch import PyTorchRunner def run_single_pass(pytorch_runner, squad): - input_tensor = squad.get_input_arrays() - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) + for _ in range(10): + input_tensor = squad.get_input_arrays() + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) From 2a518e7cf8bf519af25162c9c670df0ffb95613f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:31:29 +0200 Subject: [PATCH 20/59] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 39b83ef0..5b226bcf 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -110,6 +110,7 @@ def run_single_pass(pytorch_runner, squad): print(input_tensor["input_ids"].shape) print(input_tensor["attention_mask"].shape) print(input_tensor["token_type_ids"].shape) + print('---') quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) From fe213de615ff666829abfd0e6f27f63c38b7ed37 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:37:31 +0200 Subject: [PATCH 21/59] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 5b226bcf..80313b80 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, fixed_input, disable_jit_freeze=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering From 02d4de6950a851f88f554394f1add212f5cfc7cf Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:38:43 +0200 Subject: [PATCH 22/59] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 80313b80..c02f9e3b 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -104,6 +104,9 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_ import torch from utils.pytorch import PyTorchRunner + print(input_size) + print(fixed_input) + def run_single_pass(pytorch_runner, squad): for _ in range(10): input_tensor = squad.get_input_arrays() @@ -129,6 +132,7 @@ def run_single_pass(pytorch_runner, squad): def tokenize(question, text): if fixed_input: + print('h1') return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size, return_tensors="pt") else: From d720b06443c6eb3374300f24390966bc8c35e6d1 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:41:32 +0200 Subject: [PATCH 23/59] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index c02f9e3b..0e2b723f 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, fixed_input, disable_jit_freeze=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering @@ -218,8 +218,8 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input) def main(): From c682f18849e1f137afebb03c82f1ad562cb10eba Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:47:26 +0200 Subject: [PATCH 24/59] wip --- .../bert_large/run_mlperf.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 0e2b723f..9449859f 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -97,16 +97,14 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, + input_size, disable_jit_freeze=False, fixed_input=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering import torch from utils.pytorch import PyTorchRunner - print(input_size) - print(fixed_input) - def run_single_pass(pytorch_runner, squad): for _ in range(10): input_tensor = squad.get_input_arrays() @@ -132,9 +130,8 @@ def run_single_pass(pytorch_runner, squad): def tokenize(question, text): if fixed_input: - print('h1') - return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size, - return_tensors="pt") + return tokenizer(question, text, padding="max_length", truncation=True, + max_length=input_size, return_tensors="pt") else: return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") @@ -218,8 +215,10 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, + input_size, disable_jit_freeze, fixed_input, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, + input_size, disable_jit_freeze, fixed_input) def main(): From 2b56ab0df9011ff2ac24e6f8ec8eeac8c876b106 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Oct 2025 11:23:27 +0200 Subject: [PATCH 25/59] wip --- tests/test_pytorch_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 916925f2..02de8df6 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -223,7 +223,7 @@ def wrapper(**kwargs): exact_match_ref, f1_ref = 0.750, 0.817 acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path, "batch_size": 1, "num_runs": 24, "timeout": None, - "disable_jit_freeze": False, "fixed_input": False}) + "input_size": 384, "disable_jit_freeze": False, "fixed_input": False}) self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) self.assertTrue(acc["f1"] / f1_ref > 0.95) From 23e4287433f3641986c2df697d942a89f04976e7 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Oct 2025 12:48:50 +0200 Subject: [PATCH 26/59] wip --- .../bert_large/run_mlperf.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 9449859f..16413c74 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -106,14 +106,7 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, from utils.pytorch import PyTorchRunner def run_single_pass(pytorch_runner, squad): - for _ in range(10): - input_tensor = squad.get_input_arrays() - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) - print('---') - - quit() + input_tensor = squad.get_input_arrays() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): From 871de3a6f91866a3f97d784f0c133eaa14e16138 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 16 Oct 2025 14:57:26 +0200 Subject: [PATCH 27/59] wip --- .../bert_large/run_mlperf.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 16413c74..84ed1311 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -43,9 +43,7 @@ def parse_args(): parser.add_argument("--squad_path", type=str, help="path to directory with ImageNet validation images") - parser.add_argument("--fixed_input", action='store_true', - help="truncate input to fixed shape") - parser.add_argument("--input_size", type=int, default=384, + parser.add_argument("--fixed_input_size", type=int, help='size of the input') parser.add_argument("--disable_jit_freeze", action='store_true', help="if true model will be run not in jit freeze mode") @@ -97,8 +95,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, - input_size, disable_jit_freeze=False, fixed_input=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering @@ -120,13 +117,17 @@ def run_single_pass(pytorch_runner, squad): tokenizer = AutoTokenizer.from_pretrained( "bert-large-uncased-whole-word-masking-finetuned-squad", padding=True, truncation=True, model_max_length=512) - + print(fixed_input_size) + def tokenize(question, text): - if fixed_input: + if fixed_input_size is not None: + print('h1') return tokenizer(question, text, padding="max_length", truncation=True, - max_length=input_size, return_tensors="pt") + max_length=fixed_input_size, return_tensors="pt") else: + print('h2') return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") + quit() def detokenize(answer): return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer)) @@ -208,10 +209,9 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, - input_size, disable_jit_freeze, fixed_input, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, - input_size, disable_jit_freeze, fixed_input) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze, + **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze) def main(): From 5ff448670e487397c604f41484e8b34754936b49 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 13:44:10 +0200 Subject: [PATCH 28/59] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 6 +----- tests/test_pytorch_models.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 84ed1311..4f555ab4 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -117,17 +117,13 @@ def run_single_pass(pytorch_runner, squad): tokenizer = AutoTokenizer.from_pretrained( "bert-large-uncased-whole-word-masking-finetuned-squad", padding=True, truncation=True, model_max_length=512) - print(fixed_input_size) - + def tokenize(question, text): if fixed_input_size is not None: - print('h1') return tokenizer(question, text, padding="max_length", truncation=True, max_length=fixed_input_size, return_tensors="pt") else: - print('h2') return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") - quit() def detokenize(answer): return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer)) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 02de8df6..b7a2ecaa 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -223,7 +223,7 @@ def wrapper(**kwargs): exact_match_ref, f1_ref = 0.750, 0.817 acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path, "batch_size": 1, "num_runs": 24, "timeout": None, - "input_size": 384, "disable_jit_freeze": False, "fixed_input": False}) + "fixed_input_size": None, "disable_jit_freeze": False}) self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) self.assertTrue(acc["f1"] / f1_ref > 0.95) From ee2acbd09fc8e50119d1ecbacf5eb3e86fe05e3b Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 15:02:38 +0200 Subject: [PATCH 29/59] wip --- setup_deb.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 2e6b4a63..34323add 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -53,6 +53,8 @@ fi if ! pip3 --version; then apt-get install -y python3-pip fi +pip install --upgrade pip + PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then @@ -81,7 +83,7 @@ pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" || apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ - libmpg123-dev pkg-config + libmpg123-dev pkg-config ffmpeg apt remove -y libsndfile1 git clone -b 1.2.2 https://github.com/libsndfile/libsndfile.git && cd libsndfile/ && autoreconf -vif && ./configure --enable-werror && make -j && make install && ldconfig && cd .. && rm -rf libsndfile From 0b9196f72c99a466d62c0863f265e6193736aedc Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 15:13:15 +0200 Subject: [PATCH 30/59] wip --- setup_deb.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 34323add..3820f857 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -11,6 +11,7 @@ log() { } ARCH=$(uname -m) +PIP_BREAK_SYSTEM_PACKAGES=1 if [ -z ${SCRIPT_DIR+x} ]; then SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) @@ -53,7 +54,6 @@ fi if ! pip3 --version; then apt-get install -y python3-pip fi -pip install --upgrade pip PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") @@ -78,9 +78,12 @@ sleep 1 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps +pip3 install --upgrade pip pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" || pip3 install -r "$(dirname "$0")/requirements.txt" + + apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ libmpg123-dev pkg-config ffmpeg From 5e97ac496c7e5fa285e79de312b16e5638bd2c62 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 16:25:28 +0200 Subject: [PATCH 31/59] wip --- setup_deb.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 3820f857..8348d2da 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -11,7 +11,6 @@ log() { } ARCH=$(uname -m) -PIP_BREAK_SYSTEM_PACKAGES=1 if [ -z ${SCRIPT_DIR+x} ]; then SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) @@ -78,9 +77,9 @@ sleep 1 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps -pip3 install --upgrade pip -pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" || - pip3 install -r "$(dirname "$0")/requirements.txt" +python3 -m pip install --ignore-installed --break-system-packages --upgrade pip +python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || + python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" From 3ae639f1f41ec23fab0ca84c9ddb5d001a3ae422 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:06:05 +0200 Subject: [PATCH 32/59] wip --- setup_deb.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 8348d2da..2c50c32c 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -77,10 +77,16 @@ sleep 1 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps -python3 -m pip install --ignore-installed --break-system-packages --upgrade pip +echo here1 + +python3 -m pip install --ignore-installed --upgrade pip + +echo here2 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" +echo here3 + apt install -y autoconf autogen automake build-essential libasound2-dev \ From 27af2ee5a103889bdd2746992c5f45e9f210ed06 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:11:46 +0200 Subject: [PATCH 33/59] wip --- setup_deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 2c50c32c..3358f800 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -79,7 +79,7 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps echo here1 -python3 -m pip install --ignore-installed --upgrade pip +python3 -m pip install --break-system-packages --ignore-installed --upgrade pip echo here2 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || From 84eaf81710397df030fd2f5f33165c401b09900e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:16:26 +0200 Subject: [PATCH 34/59] wip --- setup_deb.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 3358f800..4e6a60ba 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -79,13 +79,16 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps echo here1 +python3 -m pip --version +python3 -m pip install --upgrade --ignore-installed pip +echo here2 python3 -m pip install --break-system-packages --ignore-installed --upgrade pip -echo here2 +echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" -echo here3 +echo here4 From 7181c4350a83886f555e58d74bd8a57827aca33a Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:24:57 +0200 Subject: [PATCH 35/59] wip --- setup_deb.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 4e6a60ba..51e943af 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -79,8 +79,11 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps echo here1 +export PIP_CACHE_DIR=${PIP_CACHE_DIR:-/tmp/pip-cache} +mkdir -p "$PIP_CACHE_DIR" || true + python3 -m pip --version -python3 -m pip install --upgrade --ignore-installed pip +#python3 -m pip install --upgrade --ignore-installed pip echo here2 python3 -m pip install --break-system-packages --ignore-installed --upgrade pip From 47e0779719372dfab8f935023fcc9bace83bdfd6 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:58:26 +0200 Subject: [PATCH 36/59] wip --- setup_deb.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 51e943af..f71a7392 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -48,10 +48,12 @@ sleep 1 apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then + apt-get update -y apt-get install -y python3 python3-pip fi if ! pip3 --version; then - apt-get install -y python3-pip + apt-get install -y python3-pip || true + python3 -m ensurepip --upgrade || true fi PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') @@ -79,9 +81,6 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps echo here1 -export PIP_CACHE_DIR=${PIP_CACHE_DIR:-/tmp/pip-cache} -mkdir -p "$PIP_CACHE_DIR" || true - python3 -m pip --version #python3 -m pip install --upgrade --ignore-installed pip echo here2 From 5ca52494702d584378bfe943f1dfe0320d73a9ef Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 18:01:57 +0200 Subject: [PATCH 37/59] wip --- setup_deb.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index f71a7392..42bd2d0f 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -49,7 +49,8 @@ apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y - apt-get install -y python3 python3-pip + apt-get install -y python3 python3-pip || true + python3 -m ensurepip --upgrade || true fi if ! pip3 --version; then apt-get install -y python3-pip || true From c2e309db9145cb1b0d46fc1328e9625952593be0 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 12:39:51 +0200 Subject: [PATCH 38/59] wip --- setup_deb.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 42bd2d0f..a01aa490 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -49,14 +49,15 @@ apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y - apt-get install -y python3 python3-pip || true - python3 -m ensurepip --upgrade || true + apt-get install -y python3 python3-pip fi if ! pip3 --version; then - apt-get install -y python3-pip || true - python3 -m ensurepip --upgrade || true + apt-get install -y python3-pip fi +python3 -m venv /opt/venv +. /opt/venv/bin/activate + PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then From fba70f675775881672248dbf2507e152ee0e0e1e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 12:43:42 +0200 Subject: [PATCH 39/59] wip --- setup_deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index a01aa490..63519c34 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -49,7 +49,7 @@ apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y - apt-get install -y python3 python3-pip + apt-get install -y python3 python3-pip python3-venv fi if ! pip3 --version; then apt-get install -y python3-pip From 242ca724cd003a5727f8652efce1c3e675d21138 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 12:49:33 +0200 Subject: [PATCH 40/59] wip --- setup_deb.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 63519c34..ed1dd039 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -49,12 +49,13 @@ apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y - apt-get install -y python3 python3-pip python3-venv + apt-get install -y python3 python3-pip fi if ! pip3 --version; then apt-get install -y python3-pip fi +apt-get install -y python3-venv python3 -m venv /opt/venv . /opt/venv/bin/activate From e5f63abf825967879757647b1aaf8023f562dfe6 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 12:52:39 +0200 Subject: [PATCH 41/59] wip --- setup_deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index ed1dd039..304b7866 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -87,7 +87,7 @@ echo here1 python3 -m pip --version #python3 -m pip install --upgrade --ignore-installed pip echo here2 -python3 -m pip install --break-system-packages --ignore-installed --upgrade pip +python3 -m pip install --ignore-installed --upgrade pip echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || From 8628d9e72bb6a701fc08c79bd10a95968d3abf96 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 14:38:20 +0200 Subject: [PATCH 42/59] wip --- setup_deb.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 304b7866..93325da7 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -55,9 +55,10 @@ if ! pip3 --version; then apt-get install -y python3-pip fi -apt-get install -y python3-venv -python3 -m venv /opt/venv -. /opt/venv/bin/activate +#apt-get install -y python3-venv +#python3 -m venv /opt/venv +#. /opt/venv/bin/activate +#python3 -m pip config set global.break-system-packages true PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") From 572eb528a3b69a53e851b7372464a67944efddb9 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 14:43:23 +0200 Subject: [PATCH 43/59] wip --- setup_deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 93325da7..58313a98 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -88,7 +88,7 @@ echo here1 python3 -m pip --version #python3 -m pip install --upgrade --ignore-installed pip echo here2 -python3 -m pip install --ignore-installed --upgrade pip +PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip install --ignore-installed --upgrade pip echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || From 58b26c7475be9a36b7e71a3e4767eead0526bb3d Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 13:01:13 +0200 Subject: [PATCH 44/59] wip --- setup_deb.sh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 58313a98..81404dd5 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -55,11 +55,6 @@ if ! pip3 --version; then apt-get install -y python3-pip fi -#apt-get install -y python3-venv -#python3 -m venv /opt/venv -#. /opt/venv/bin/activate -#python3 -m pip config set global.break-system-packages true - PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then @@ -96,8 +91,6 @@ python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements. echo here4 - - apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ libmpg123-dev pkg-config ffmpeg @@ -123,3 +116,6 @@ else touch "$SCRIPT_DIR"/.setup_completed fi log "Setup completed. Please run: source $SCRIPT_DIR/set_env_variables.sh" + +echo HERe555 +exit 1 \ No newline at end of file From 6c47e2f35fdcf7dd7e0d24f43c7a339426addbce Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 13:09:13 +0200 Subject: [PATCH 45/59] wip --- setup_deb.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 81404dd5..a6617573 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -89,7 +89,9 @@ echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" -echo here4 +python3 -m pip --version +echo HERe555 +exit 1 apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ @@ -116,6 +118,3 @@ else touch "$SCRIPT_DIR"/.setup_completed fi log "Setup completed. Please run: source $SCRIPT_DIR/set_env_variables.sh" - -echo HERe555 -exit 1 \ No newline at end of file From 006ebb12cd8669d53d9e8bba7bdd3f118c599682 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 13:18:05 +0200 Subject: [PATCH 46/59] wip --- .github/workflows/test.yml | 2 -- setup_deb.sh | 11 ----------- 2 files changed, 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8c54a461..9ba08463 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,8 +84,6 @@ jobs: wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null - apt-get update && apt-get install -y ffmpeg - wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 diff --git a/setup_deb.sh b/setup_deb.sh index a6617573..c1d10385 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -78,21 +78,10 @@ sleep 1 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps -echo here1 - -python3 -m pip --version -#python3 -m pip install --upgrade --ignore-installed pip -echo here2 PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip install --ignore-installed --upgrade pip - -echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" -python3 -m pip --version -echo HERe555 -exit 1 - apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ libmpg123-dev pkg-config ffmpeg From 13e9fd7b28ce69a6c7b627fef0431df9798269ab Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 14:40:01 +0200 Subject: [PATCH 47/59] wip --- .github/workflows/test.yml | 7 +++++++ setup_deb.sh | 2 ++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9ba08463..55a3c3da 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,6 +84,13 @@ jobs: wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null + which -a ffmpeg + ffmpeg -version + + ffmpeg -version | head -n1 + + exit 1 + wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 diff --git a/setup_deb.sh b/setup_deb.sh index c1d10385..a8285c37 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -55,6 +55,8 @@ if ! pip3 --version; then apt-get install -y python3-pip fi +apt-get update && apt-get install -y --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* + PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then From 706fe6e837e0601cfa4e3d4c9ed096aef4ff33f6 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 15:02:21 +0200 Subject: [PATCH 48/59] wip --- setup_deb.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index a8285c37..c1d10385 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -55,8 +55,6 @@ if ! pip3 --version; then apt-get install -y python3-pip fi -apt-get update && apt-get install -y --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* - PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then From ad8cf9fe85068a0e999216ebad3aedcdc2b399ae Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 15:57:40 +0200 Subject: [PATCH 49/59] wip --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 55a3c3da..ecba2ae5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -81,15 +81,15 @@ jobs: - name: End-user smoke test run: | - wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 - tar -xf aio_objdet_dataset.tar.gz > /dev/null - which -a ffmpeg ffmpeg -version ffmpeg -version | head -n1 - exit 1 + echo here1 + + wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 + tar -xf aio_objdet_dataset.tar.gz > /dev/null wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 From 8983fae596acfdde740ebabe4bc1a8eea5ce9bdb Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 17:58:51 +0200 Subject: [PATCH 50/59] wip --- .github/workflows/test.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ecba2ae5..bea349f5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -80,14 +80,8 @@ jobs: python3 -m unittest tests.test_pytorch_models - name: End-user smoke test - run: | - which -a ffmpeg + run: | ffmpeg -version - - ffmpeg -version | head -n1 - - echo here1 - wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null From b22bd269f6bd13113dfacf6d2131a8b60c786691 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 23 Oct 2025 11:30:56 +0200 Subject: [PATCH 51/59] wip --- setup_deb.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index c1d10385..abb4c8fd 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -4,6 +4,9 @@ set -eo pipefail +ln -fs /usr/share/zoneinfo/Europe/Warsaw /etc/localtime +echo "Europe/Warsaw" | tee /etc/timezone >/dev/null + log() { COLOR_DEFAULT='\033[0m' COLOR_CYAN='\033[1;36m' @@ -46,7 +49,7 @@ fi log "Installing system dependencies ..." sleep 1 apt-get update -y -apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake +apt-get install -y build-essential libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y apt-get install -y python3 python3-pip @@ -84,7 +87,7 @@ python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements. apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ - libmpg123-dev pkg-config ffmpeg + libmpg123-dev pkg-config apt remove -y libsndfile1 git clone -b 1.2.2 https://github.com/libsndfile/libsndfile.git && cd libsndfile/ && autoreconf -vif && ./configure --enable-werror && make -j && make install && ldconfig && cd .. && rm -rf libsndfile @@ -101,6 +104,9 @@ if [ "$(python3 -c 'import torch; print(torch.cuda.is_available())')" == "True" fi log "done.\n" +apt-get update -y +apt-get install -y ffmpeg + if [ -f "/etc/machine-id" ]; then cat /etc/machine-id >"$SCRIPT_DIR"/.setup_completed else From 51f3b94b2eff0dc464eff1c6352fa49f1aa4c7d8 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 23 Oct 2025 13:32:44 +0200 Subject: [PATCH 52/59] wip --- .github/workflows/test.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bea349f5..ef0be073 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -116,6 +116,7 @@ jobs: COCO_IMG_PATH: aio_objdet_dataset COCO_ANNO_PATH: aio_objdet_dataset/annotations.json OMP_NUM_THREADS: 32 + AIO_NUM_THREADS: 32 S3_URL_CRITEO_DATASET: ${{ secrets.S3_URL_CRITEO_DATASET }} S3_URL_RESNET_50_V15_TF_FP32: ${{ secrets.S3_URL_RESNET_50_V15_TF_FP32 }} S3_URL_SSD_INCEPTION_V2_TF_FP32: ${{ secrets.S3_URL_SSD_INCEPTION_V2_TF_FP32 }} @@ -150,24 +151,24 @@ jobs: tar -xf aio_objdet_dataset.tar.gz > /dev/null wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 + IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 + IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt > /dev/null 2>&1 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60 + IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60 python3 speech_recognition/whisper/run.py -m small.en wget $S3_URL_SSD_INCEPTION_V2_TF_FP32 > /dev/null 2>&1 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60 + IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60 wget https://zenodo.org/records/4735647/files/resnet50_v1.onnx > /dev/null 2>&1 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort + IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort wget https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.tar.gz > /dev/null 2>&1 tar -xf vgg16.tar.gz > /dev/null - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort + IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort test_pytorch_arm64_sh: if: false From 3f70599cba3dfad97a5c7ca517127b5a04cf169b Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 24 Oct 2025 15:50:45 +0200 Subject: [PATCH 53/59] wip --- tests/test_pytorch_models.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index b7a2ecaa..b38dba04 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -366,19 +366,19 @@ def setUp(self): # "timeout": None, "disable_jit_freeze": False}) # self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95) - def test_yolo_v8_s(self): - from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 - from utils.benchmark import set_global_intra_op_parallelism_threads - set_global_intra_op_parallelism_threads(32) - - def wrapper(**kwargs): - kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - - coco_map_ref = 0.353 - acc = run_process(wrapper, {"model_path": self.yolo_v8_s_path, "images_path": self.dataset_path, - "anno_path": self.annotations_path, "batch_size": 1, "num_runs": 465, - "timeout": None, "disable_jit_freeze": False}) - self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95) + # def test_yolo_v8_s(self): + # from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 + # from utils.benchmark import set_global_intra_op_parallelism_threads + # set_global_intra_op_parallelism_threads(32) + # + # def wrapper(**kwargs): + # kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) + # + # coco_map_ref = 0.353 + # acc = run_process(wrapper, {"model_path": self.yolo_v8_s_path, "images_path": self.dataset_path, + # "anno_path": self.annotations_path, "batch_size": 1, "num_runs": 465, + # "timeout": None, "disable_jit_freeze": False}) + # self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95) if __name__ == "__main__": From 4d8ec7223f854e7156d7598a38bc2ea3b825b635 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 30 Oct 2025 16:04:00 +0100 Subject: [PATCH 54/59] wip --- tests/test_pytorch_models.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index b38dba04..afe84cb3 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -305,17 +305,17 @@ class VGG(unittest.TestCase): def setUp(self): self.dataset_path, self.labels_path = download_imagenet_maybe() - def test_vgg16(self): - from computer_vision.classification.vgg_16.run import run_pytorch_fp32 - - def wrapper(**kwargs): - kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - - top_1_ref, top_5_ref = 0.661, 0.896 - acc = run_process(wrapper, {"model_name": "vgg16", "images_path": self.dataset_path, - "labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None}) - self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95) - self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) + # def test_vgg16(self): + # from computer_vision.classification.vgg_16.run import run_pytorch_fp32 + # + # def wrapper(**kwargs): + # kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) + # + # top_1_ref, top_5_ref = 0.661, 0.896 + # acc = run_process(wrapper, {"model_name": "vgg16", "images_path": self.dataset_path, + # "labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None}) + # self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95) + # self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) def download_coco_maybe(): From cfb802a1e2b13e6c71894767610157a44c2ff705 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 30 Oct 2025 16:07:12 +0100 Subject: [PATCH 55/59] wip --- tests/test_pytorch_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index afe84cb3..1c183fc5 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -313,7 +313,8 @@ def setUp(self): # # top_1_ref, top_5_ref = 0.661, 0.896 # acc = run_process(wrapper, {"model_name": "vgg16", "images_path": self.dataset_path, - # "labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None}) + # "labels_path": self.labels_path, "batch_size": 32, + # "num_runs": 10, "timeout": None}) # self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95) # self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) From 0face5a73f60c1f9202404a4fdd0d6a080470608 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 31 Oct 2025 09:55:35 +0100 Subject: [PATCH 56/59] wip --- .github/workflows/test.yml | 1 + tests/test_pytorch_models.py | 74 ++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ef0be073..8386918f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -246,6 +246,7 @@ jobs: - name: Unittest run: | AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 -m unittest tests.test_pytorch_models + echo HERE1 - name: benchmark.py test run: | diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 1c183fc5..5acbaf7d 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -214,18 +214,18 @@ def setUp(self): f"{'https://zenodo.org/records/3733896/files/model.pytorch?download=1'}".split(), check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - def test_bert_large_mlperf(self): - from natural_language_processing.extractive_question_answering.bert_large.run_mlperf import run_pytorch_fp32 - - def wrapper(**kwargs): - kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - - exact_match_ref, f1_ref = 0.750, 0.817 - acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path, - "batch_size": 1, "num_runs": 24, "timeout": None, - "fixed_input_size": None, "disable_jit_freeze": False}) - self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) - self.assertTrue(acc["f1"] / f1_ref > 0.95) + # def test_bert_large_mlperf(self): + # from natural_language_processing.extractive_question_answering.bert_large.run_mlperf import run_pytorch_fp32 + # + # def wrapper(**kwargs): + # kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) + # + # exact_match_ref, f1_ref = 0.750, 0.817 + # acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path, + # "batch_size": 1, "num_runs": 24, "timeout": None, + # "fixed_input_size": None, "disable_jit_freeze": False}) + # self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) + # self.assertTrue(acc["f1"] / f1_ref > 0.95) def download_imagenet_maybe(): @@ -252,36 +252,36 @@ class DenseNet(unittest.TestCase): def setUp(self): self.dataset_path, self.labels_path = download_imagenet_maybe() - def test_densenet_121(self): - from computer_vision.classification.densenet_121.run import run_pytorch_fp32 - - def wrapper(**kwargs): - kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - - top_1_ref, top_5_ref = 0.717, 0.905 - acc = run_process(wrapper, {"model_name": "densenet121", "images_path": self.dataset_path, - "labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None, - "disable_jit_freeze": False}) - self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95) - self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) + # def test_densenet_121(self): + # from computer_vision.classification.densenet_121.run import run_pytorch_fp32 + # + # def wrapper(**kwargs): + # kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) + # + # top_1_ref, top_5_ref = 0.717, 0.905 + # acc = run_process(wrapper, {"model_name": "densenet121", "images_path": self.dataset_path, + # "labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None, + # "disable_jit_freeze": False}) + # self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95) + # self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) class Inception(unittest.TestCase): def setUp(self): self.dataset_path, self.labels_path = download_imagenet_maybe() - def test_inception_v3(self): - from computer_vision.classification.inception_v3.run import run_pytorch_fp32 - - def wrapper(**kwargs): - kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - - top_1_ref, top_5_ref = 0.765, 0.932 - acc = run_process(wrapper, {"model_name": "inception_v3", "images_path": self.dataset_path, - "labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None, - "disable_jit_freeze": False}) - self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95) - self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) + # def test_inception_v3(self): + # from computer_vision.classification.inception_v3.run import run_pytorch_fp32 + # + # def wrapper(**kwargs): + # kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) + # + # top_1_ref, top_5_ref = 0.765, 0.932 + # acc = run_process(wrapper, {"model_name": "inception_v3", "images_path": self.dataset_path, + # "labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None, + # "disable_jit_freeze": False}) + # self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95) + # self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) class ResNet(unittest.TestCase): @@ -383,4 +383,6 @@ def setUp(self): if __name__ == "__main__": + print('here2') unittest.main() + print('here3') From 14f0ab3fb7861be71c5d828dab41d54b837a7f72 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 31 Oct 2025 11:40:40 +0100 Subject: [PATCH 57/59] wip --- tests/test_pytorch_models.py | 96 ++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 5acbaf7d..ef74ecdc 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -48,23 +48,23 @@ def wrapper(**kwargs): self.wrapper = wrapper - @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") - @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required") - def test_llama2_7b(self): - f1_ref = 0.330 - acc = run_process(self.wrapper, - {"model_name": "meta-llama/Llama-2-7b-chat-hf", "batch_size": 1, "num_runs": 50, - "timeout": None, "dataset_path": self.dataset_path}) - self.assertTrue(acc["f1"] / f1_ref > 0.95) - - @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 200, "too little memory") - @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required") - def test_llama2_13b(self): - f1_ref = 0.261 - acc = run_process(self.wrapper, - {"model_name": "meta-llama/Llama-2-13b-chat-hf", "batch_size": 1, "num_runs": 50, - "timeout": None, "dataset_path": self.dataset_path}) - self.assertTrue(acc["f1"] / f1_ref > 0.95) + # @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") + # @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required") + # def test_llama2_7b(self): + # f1_ref = 0.330 + # acc = run_process(self.wrapper, + # {"model_name": "meta-llama/Llama-2-7b-chat-hf", "batch_size": 1, "num_runs": 50, + # "timeout": None, "dataset_path": self.dataset_path}) + # self.assertTrue(acc["f1"] / f1_ref > 0.95) + # + # @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 200, "too little memory") + # @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required") + # def test_llama2_13b(self): + # f1_ref = 0.261 + # acc = run_process(self.wrapper, + # {"model_name": "meta-llama/Llama-2-13b-chat-hf", "batch_size": 1, "num_runs": 50, + # "timeout": None, "dataset_path": self.dataset_path}) + # self.assertTrue(acc["f1"] / f1_ref > 0.95) class Alpaca(unittest.TestCase): @@ -85,19 +85,19 @@ def setUp(self): subprocess.run("rm /tmp/alpaca_recovered.tar.gz".split(), check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") - @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required") - def test_alpaca(self): - from natural_language_processing.text_generation.alpaca.run import run_pytorch_fp32 - - def wrapper(**kwargs): - kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - - exact_match_ref, f1_ref = 0.220, 0.547 - acc = run_process(wrapper, {"model_path": self.model_path, "batch_size": 1, "num_runs": 50, - "timeout": None, "dataset_path": self.dataset_path}) - self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) - self.assertTrue(acc["f1"] / f1_ref > 0.95) + # @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") + # @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required") + # def test_alpaca(self): + # from natural_language_processing.text_generation.alpaca.run import run_pytorch_fp32 + # + # def wrapper(**kwargs): + # kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) + # + # exact_match_ref, f1_ref = 0.220, 0.547 + # acc = run_process(wrapper, {"model_path": self.model_path, "batch_size": 1, "num_runs": 50, + # "timeout": None, "dataset_path": self.dataset_path}) + # self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) + # self.assertTrue(acc["f1"] / f1_ref > 0.95) class Whisper(unittest.TestCase): @@ -156,13 +156,13 @@ def wrapper(**kwargs): self.wrapper = wrapper - @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") - @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "too slow to run with native") - def test_whisper_translate_medium(self): - wer_ref = 0.475 - acc = run_process(self.wrapper, {"model_name": "large", "num_runs": 30, "timeout": None, - "dataset_path": self.dataset_path}) - self.assertTrue(wer_ref / acc["bleu_score"] > 0.95) + # @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") + # @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "too slow to run with native") + # def test_whisper_translate_medium(self): + # wer_ref = 0.475 + # acc = run_process(self.wrapper, {"model_name": "large", "num_runs": 30, "timeout": None, + # "dataset_path": self.dataset_path}) + # self.assertTrue(wer_ref / acc["bleu_score"] > 0.95) class DLRM(unittest.TestCase): @@ -184,17 +184,17 @@ def setUp(self): f"{'https://dlrm.s3-us-west-1.amazonaws.com/models/tb0875_10M.pt'}".split(), check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") - def test_dlrm_debug(self): - from recommendation.dlrm.run import run_pytorch_fp32 - - def wrapper(**kwargs): - kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - - auc_ref = 0.583 - acc = run_process(wrapper, {"model_path": self.model_path, "dataset_path": self.dataset_path, - "batch_size": 2048, "num_runs": 30, "timeout": None, "debug": True}) - self.assertTrue(acc["auc"] / auc_ref > 0.95) + # @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") + # def test_dlrm_debug(self): + # from recommendation.dlrm.run import run_pytorch_fp32 + # + # def wrapper(**kwargs): + # kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) + # + # auc_ref = 0.583 + # acc = run_process(wrapper, {"model_path": self.model_path, "dataset_path": self.dataset_path, + # "batch_size": 2048, "num_runs": 30, "timeout": None, "debug": True}) + # self.assertTrue(acc["auc"] / auc_ref > 0.95) class BERT(unittest.TestCase): From 8351e914362e06502026c3c8b4496e67c58f86aa Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 31 Oct 2025 14:10:44 +0100 Subject: [PATCH 58/59] wip --- tests/test_pytorch_models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index ef74ecdc..9a47ac94 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -300,6 +300,8 @@ def wrapper(**kwargs): self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95) self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) + print('here-resnet') + class VGG(unittest.TestCase): def setUp(self): From 0f8b11a99e12d4117a5ba7e926539cdc8e175d0c Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 31 Oct 2025 15:02:09 +0100 Subject: [PATCH 59/59] wip --- .github/workflows/test.yml | 1 - tests/test_pytorch_models.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8386918f..57d4df50 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -81,7 +81,6 @@ jobs: - name: End-user smoke test run: | - ffmpeg -version wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 9a47ac94..9a50a177 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -301,6 +301,7 @@ def wrapper(**kwargs): self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95) print('here-resnet') + print('here-resnet1') class VGG(unittest.TestCase): @@ -385,6 +386,4 @@ def setUp(self): if __name__ == "__main__": - print('here2') unittest.main() - print('here3')