From 7b55ce5c56e8014f695f918fa409d1f77c1bfd66 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 25 Sep 2025 15:21:20 +0200 Subject: [PATCH 01/55] first commit --- .../extractive_question_answering/bert_large/run_mlperf.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 57130f6c..25146a4d 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -34,6 +34,8 @@ def parse_args(): type=str, default="tf", choices=["tf", "pytorch"], help="specify the framework in which a model should be run") + parser.add_argument("--fixed-input", action='store_true', + help="truncate input to fixed shape") parser.add_argument("--timeout", type=float, default=60.0, help="timeout in seconds") @@ -102,6 +104,9 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl def run_single_pass(pytorch_runner, squad): input_tensor = squad.get_input_arrays() + print(input_tensor) + print(type(input_tensor)) + quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): @@ -117,7 +122,7 @@ def run_single_pass(pytorch_runner, squad): padding=True, truncation=True, model_max_length=512) def tokenize(question, text): - return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") + return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt") def detokenize(answer): return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer)) From 1ede310c3d1c83b3120ddfffb693a1b8f97a0cce Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 25 Sep 2025 16:55:09 +0200 Subject: [PATCH 02/55] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 25146a4d..4e9aefb2 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -106,6 +106,10 @@ def run_single_pass(pytorch_runner, squad): input_tensor = squad.get_input_arrays() print(input_tensor) print(type(input_tensor)) + print(input_tensor["input_ids"].size()[1]) + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) From a31e5e1aaf445f98fa3eee83ba70e88c837c0af4 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 25 Sep 2025 16:57:56 +0200 Subject: [PATCH 03/55] wip --- .../bert_large/run_mlperf.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 4e9aefb2..bcaed825 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -103,13 +103,14 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl from utils.pytorch import PyTorchRunner def run_single_pass(pytorch_runner, squad): - input_tensor = squad.get_input_arrays() - print(input_tensor) - print(type(input_tensor)) - print(input_tensor["input_ids"].size()[1]) - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) + for _ in range(10): + input_tensor = squad.get_input_arrays() + print(type(input_tensor)) + print(input_tensor["input_ids"].size()[1]) + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) + print("-------") quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) From 6d5003ddffe484d079ca64d6e5a176f5641c8785 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 10:44:18 +0200 Subject: [PATCH 04/55] wip --- .../bert_large/run_mlperf.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index bcaed825..a4b2553d 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -34,8 +34,6 @@ def parse_args(): type=str, default="tf", choices=["tf", "pytorch"], help="specify the framework in which a model should be run") - parser.add_argument("--fixed-input", action='store_true', - help="truncate input to fixed shape") parser.add_argument("--timeout", type=float, default=60.0, help="timeout in seconds") @@ -45,6 +43,8 @@ def parse_args(): parser.add_argument("--squad_path", type=str, help="path to directory with ImageNet validation images") + parser.add_argument("--fixed_input", action='store_true', + help="truncate input to fixed shape") parser.add_argument("--disable_jit_freeze", action='store_true', help="if true model will be run not in jit freeze mode") return parser.parse_args() @@ -95,7 +95,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False, fixed_input=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering @@ -127,7 +127,10 @@ def run_single_pass(pytorch_runner, squad): padding=True, truncation=True, model_max_length=512) def tokenize(question, text): - return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt") + if fixed_input: + return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt") + else: + return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") def detokenize(answer): return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer)) @@ -209,8 +212,8 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input) def main(): From 776e70d252c0d04c5aebc2df4885e553b163e177 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 10:54:29 +0200 Subject: [PATCH 05/55] wip --- .../bert_large/run_mlperf.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index a4b2553d..3e0a5a10 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -103,15 +103,13 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl from utils.pytorch import PyTorchRunner def run_single_pass(pytorch_runner, squad): - for _ in range(10): - input_tensor = squad.get_input_arrays() - print(type(input_tensor)) - print(input_tensor["input_ids"].size()[1]) - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) - print("-------") - quit() + input_tensor = squad.get_input_arrays() + print(type(input_tensor)) + print(input_tensor["input_ids"].size()[1]) + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) + print("-------") output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): From b3c4a0c3a0606ce1f13f886212c13d0b6268a368 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 10:59:27 +0200 Subject: [PATCH 06/55] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 3e0a5a10..de874c5a 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -104,12 +104,6 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl def run_single_pass(pytorch_runner, squad): input_tensor = squad.get_input_arrays() - print(type(input_tensor)) - print(input_tensor["input_ids"].size()[1]) - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) - print("-------") output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): From f8c8b06cb7792c0802e72c03ed02882e84df8d4c Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 13:16:10 +0200 Subject: [PATCH 07/55] wip --- computer_vision/object_detection/yolo_v5/run.py | 2 +- computer_vision/object_detection/yolo_v8/run.py | 2 +- .../extractive_question_answering/bert_large/run_mlperf.py | 2 +- recommendation/dlrm/run.py | 2 +- tests/test_pytorch_models.py | 3 ++- utils/cv/pre_processing.py | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/computer_vision/object_detection/yolo_v5/run.py b/computer_vision/object_detection/yolo_v5/run.py index 945727fd..dd8d1828 100644 --- a/computer_vision/object_detection/yolo_v5/run.py +++ b/computer_vision/object_detection/yolo_v5/run.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC try: from utils import misc # noqa except ModuleNotFoundError: diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py index 7df1d629..4dc6c67a 100644 --- a/computer_vision/object_detection/yolo_v8/run.py +++ b/computer_vision/object_detection/yolo_v8/run.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC try: from utils import misc # noqa except ModuleNotFoundError: diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index de874c5a..a5605993 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC try: from utils import misc # noqa except ModuleNotFoundError: diff --git a/recommendation/dlrm/run.py b/recommendation/dlrm/run.py index 97ce3a19..5997e085 100644 --- a/recommendation/dlrm/run.py +++ b/recommendation/dlrm/run.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC try: from utils import misc # noqa except ModuleNotFoundError: diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 60b99472..43c546ab 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -222,7 +222,8 @@ def wrapper(**kwargs): exact_match_ref, f1_ref = 0.750, 0.817 acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path, - "batch_size": 1, "num_runs": 24, "timeout": None, "disable_jit_freeze": False}) + "batch_size": 1, "num_runs": 24, "timeout": None, + "disable_jit_freeze": False, "fixed_input": False}) self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) self.assertTrue(acc["f1"] / f1_ref > 0.95) diff --git a/utils/cv/pre_processing.py b/utils/cv/pre_processing.py index 7d452069..ae17a4b1 100644 --- a/utils/cv/pre_processing.py +++ b/utils/cv/pre_processing.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC import numpy as np import utils.misc as utils From df07b65ab3eca70e8bd4151245be20f2521f8815 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 14:13:20 +0200 Subject: [PATCH 08/55] wip --- requirements.txt | 1 + tests/test_pytorch_models.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 25e13945..a0666590 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,3 +36,4 @@ open-clip-torch<2.26.1 diffusers accelerate boto3==1.29.0; python_version>='3.12' +torchcodec \ No newline at end of file diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 43c546ab..403dc211 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2024, Ampere Computing LLC +# Copyright (c) 2025, Ampere Computing LLC import os import signal import time From 6626f910b61e42933817c3880af3b513c070dd15 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 26 Sep 2025 16:00:32 +0200 Subject: [PATCH 09/55] wip --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 8580f840..42a38322 100644 --- a/LICENSE +++ b/LICENSE @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright (c) 2024, Ampere Computing LLC + Copyright (c) 2025, Ampere Computing LLC Copyright (c) 2022 Andrej Karpathy Copyright (c) 2022 OpenAI Copyright (c) 2022 Stability AI From 01cb4def14450343736d491a87e50fe81378bc8c Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 30 Sep 2025 14:47:05 +0200 Subject: [PATCH 10/55] wip --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a0666590..9cc6c4c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,5 +35,4 @@ kornia open-clip-torch<2.26.1 diffusers accelerate -boto3==1.29.0; python_version>='3.12' -torchcodec \ No newline at end of file +boto3==1.29.0; python_version>='3.12' \ No newline at end of file From bc405d651b50e01246a9e967da191c0e9d588a73 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 30 Sep 2025 16:15:49 +0200 Subject: [PATCH 11/55] wip --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 9cc6c4c6..f8921397 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,6 +17,7 @@ tiktoken ultralytics evaluate datasets +datasets[audio] soundfile librosa numba From ddf25ed597b25c5511e764ec6ffe8a11b8933df7 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 1 Oct 2025 10:17:02 +0200 Subject: [PATCH 12/55] wip --- computer_vision/object_detection/yolo_v8/run.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py index 4dc6c67a..bc35d293 100644 --- a/computer_vision/object_detection/yolo_v8/run.py +++ b/computer_vision/object_detection/yolo_v8/run.py @@ -61,7 +61,7 @@ def run_ort_fp32(model_path, batch_size, num_runs, timeout, images_path, anno_pa # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user # to set it to True if needed from utils.ort import OrtRunner - from ultralytics.yolo.utils import ops + from ultralytics.utils import nms def run_single_pass(ort_runner, coco): shape = (640, 640) @@ -69,7 +69,7 @@ def run_single_pass(ort_runner, coco): output = ort_runner.run(batch_size) output = torch.from_numpy(output[0]) - output = ops.non_max_suppression(output) + output = nms.non_max_suppression(output) for i in range(batch_size): for d in range(output[i].shape[0]): @@ -97,11 +97,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_ # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user # to set it to True if needed from utils.pytorch import PyTorchRunner - from ultralytics.yolo.utils import ops + from ultralytics.utils import nms def run_single_pass(pytorch_runner, coco): output = pytorch_runner.run(batch_size, coco.get_input_array((640, 640))) - output = ops.non_max_suppression(output) + output = nms.non_max_suppression(output) for i in range(batch_size): for d in range(output[i].shape[0]): @@ -122,6 +122,7 @@ def run_single_pass(pytorch_runner, coco): runner = PyTorchRunner(torch.jit.load(torchscript_model), disable_jit_freeze=disable_jit_freeze, example_inputs=torch.stack(dataset.get_input_array((640, 640)))) + #example_inputs=torch.stack((dataset.get_input_array((640, 640)),))) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From c4e81b0a7ff1a9e714e9aa844686a09ee43b325e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 1 Oct 2025 10:18:35 +0200 Subject: [PATCH 13/55] wip --- computer_vision/object_detection/yolo_v8/run.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py index bc35d293..bbd51c24 100644 --- a/computer_vision/object_detection/yolo_v8/run.py +++ b/computer_vision/object_detection/yolo_v8/run.py @@ -121,8 +121,7 @@ def run_single_pass(pytorch_runner, coco): runner = PyTorchRunner(torch.jit.load(torchscript_model), disable_jit_freeze=disable_jit_freeze, - example_inputs=torch.stack(dataset.get_input_array((640, 640)))) - #example_inputs=torch.stack((dataset.get_input_array((640, 640)),))) + example_inputs=torch.stack((dataset.get_input_array((640, 640)),))) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From c7764d4f6e724dc692901d660abfe37a306e2774 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 2 Oct 2025 13:00:31 +0200 Subject: [PATCH 14/55] wip --- .github/workflows/test.yml | 2 ++ tests/test_pytorch_models.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2921a548..0a3cf245 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,6 +84,8 @@ jobs: wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null + apt-get update && apt-get install ffmpeg + wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 403dc211..4ed6d07e 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -368,6 +368,8 @@ def setUp(self): def test_yolo_v8_s(self): from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 + from utils.benchmark import set_global_intra_op_parallelism_threads + set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS"))) def wrapper(**kwargs): kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) From 146e3b039d7473834b5f4fbf806ca53bd24d068f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 2 Oct 2025 13:27:29 +0200 Subject: [PATCH 15/55] wip --- tests/test_pytorch_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 4ed6d07e..8d27c83c 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -369,7 +369,8 @@ def setUp(self): def test_yolo_v8_s(self): from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 from utils.benchmark import set_global_intra_op_parallelism_threads - set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS"))) + #set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS"))) + set_global_intra_op_parallelism_threads(32) def wrapper(**kwargs): kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) From 83d284e1417fa3ae137d8a135a666aafc8ac2957 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 2 Oct 2025 15:56:07 +0200 Subject: [PATCH 16/55] wip --- .github/workflows/test.yml | 28 ++++++++++++++-------------- tests/test_pytorch_models.py | 1 - 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0a3cf245..8c54a461 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,7 +84,7 @@ jobs: wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null - apt-get update && apt-get install ffmpeg + apt-get update && apt-get install -y ffmpeg wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 @@ -151,24 +151,24 @@ jobs: tar -xf aio_objdet_dataset.tar.gz > /dev/null wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60 python3 speech_recognition/whisper/run.py -m small.en wget $S3_URL_SSD_INCEPTION_V2_TF_FP32 > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60 wget https://zenodo.org/records/4735647/files/resnet50_v1.onnx > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort wget https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.tar.gz > /dev/null 2>&1 tar -xf vgg16.tar.gz > /dev/null - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort test_pytorch_arm64_sh: if: false @@ -259,21 +259,21 @@ jobs: tar -xf aio_objdet_dataset.tar.gz > /dev/null wget https://github.com/tloen/alpaca-lora/raw/main/alpaca_data.json > /dev/null 2>&1 - AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/text_generation/llama2/run.py -m meta-llama/Llama-2-7b-chat-hf --dataset_path=alpaca_data.json + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/text_generation/llama2/run.py -m meta-llama/Llama-2-7b-chat-hf --dataset_path=alpaca_data.json - AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 recommendation/dlrm_torchbench/run.py -p fp32 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 recommendation/dlrm_torchbench/run.py -p fp32 - IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/classification/resnet_50_v15/run.py -m resnet50 -p fp32 -b 16 -f pytorch + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/classification/resnet_50_v15/run.py -m resnet50 -p fp32 -b 16 -f pytorch - AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 speech_recognition/whisper/run.py -m tiny.en + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 speech_recognition/whisper/run.py -m tiny.en - IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt > /dev/null 2>&1 - IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8l.pt -p fp32 -f pytorch + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8l.pt -p fp32 -f pytorch wget -O bert_large_mlperf.pt https://zenodo.org/records/3733896/files/model.pytorch?download=1 > /dev/null 2>&1 - AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py -m bert_large_mlperf.pt -p fp32 -f pytorch + OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py -m bert_large_mlperf.pt -p fp32 -f pytorch test_tensorflow_arm64: runs-on: self-hosted diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 8d27c83c..916925f2 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -369,7 +369,6 @@ def setUp(self): def test_yolo_v8_s(self): from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 from utils.benchmark import set_global_intra_op_parallelism_threads - #set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS"))) set_global_intra_op_parallelism_threads(32) def wrapper(**kwargs): From 93ed7b4242a2c9b930c94c0b1909fb6d1a34b473 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 3 Oct 2025 10:36:15 +0200 Subject: [PATCH 17/55] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index a5605993..8a9c081a 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -45,6 +45,8 @@ def parse_args(): help="path to directory with ImageNet validation images") parser.add_argument("--fixed_input", action='store_true', help="truncate input to fixed shape") + parser.add_argument("--input_size", type=int, default=384, + help='size of the input') parser.add_argument("--disable_jit_freeze", action='store_true', help="if true model will be run not in jit freeze mode") return parser.parse_args() @@ -104,6 +106,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl def run_single_pass(pytorch_runner, squad): input_tensor = squad.get_input_arrays() + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) + + quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): From 07a1a34343afdfdc56429d7b3f13a3dbb58b1407 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 3 Oct 2025 11:23:39 +0200 Subject: [PATCH 18/55] wip --- .../bert_large/run_mlperf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 8a9c081a..e6f3af16 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False, fixed_input=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering @@ -127,7 +127,8 @@ def run_single_pass(pytorch_runner, squad): def tokenize(question, text): if fixed_input: - return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt") + return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size, + return_tensors="pt") else: return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") @@ -211,8 +212,8 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size) def main(): From ccddf0a437422f7bf633965593dfb6c80205241e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:30:38 +0200 Subject: [PATCH 19/55] wip --- .../bert_large/run_mlperf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index e6f3af16..39b83ef0 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -105,10 +105,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_ from utils.pytorch import PyTorchRunner def run_single_pass(pytorch_runner, squad): - input_tensor = squad.get_input_arrays() - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) + for _ in range(10): + input_tensor = squad.get_input_arrays() + print(input_tensor["input_ids"].shape) + print(input_tensor["attention_mask"].shape) + print(input_tensor["token_type_ids"].shape) quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) From 2a518e7cf8bf519af25162c9c670df0ffb95613f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:31:29 +0200 Subject: [PATCH 20/55] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 39b83ef0..5b226bcf 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -110,6 +110,7 @@ def run_single_pass(pytorch_runner, squad): print(input_tensor["input_ids"].shape) print(input_tensor["attention_mask"].shape) print(input_tensor["token_type_ids"].shape) + print('---') quit() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) From fe213de615ff666829abfd0e6f27f63c38b7ed37 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:37:31 +0200 Subject: [PATCH 21/55] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 5b226bcf..80313b80 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, fixed_input, disable_jit_freeze=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering From 02d4de6950a851f88f554394f1add212f5cfc7cf Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:38:43 +0200 Subject: [PATCH 22/55] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 80313b80..c02f9e3b 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -104,6 +104,9 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_ import torch from utils.pytorch import PyTorchRunner + print(input_size) + print(fixed_input) + def run_single_pass(pytorch_runner, squad): for _ in range(10): input_tensor = squad.get_input_arrays() @@ -129,6 +132,7 @@ def run_single_pass(pytorch_runner, squad): def tokenize(question, text): if fixed_input: + print('h1') return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size, return_tensors="pt") else: From d720b06443c6eb3374300f24390966bc8c35e6d1 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:41:32 +0200 Subject: [PATCH 23/55] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index c02f9e3b..0e2b723f 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, fixed_input, disable_jit_freeze=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering @@ -218,8 +218,8 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input) def main(): From c682f18849e1f137afebb03c82f1ad562cb10eba Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Oct 2025 16:47:26 +0200 Subject: [PATCH 24/55] wip --- .../bert_large/run_mlperf.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 0e2b723f..9449859f 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -97,16 +97,14 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, + input_size, disable_jit_freeze=False, fixed_input=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering import torch from utils.pytorch import PyTorchRunner - print(input_size) - print(fixed_input) - def run_single_pass(pytorch_runner, squad): for _ in range(10): input_tensor = squad.get_input_arrays() @@ -132,9 +130,8 @@ def run_single_pass(pytorch_runner, squad): def tokenize(question, text): if fixed_input: - print('h1') - return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size, - return_tensors="pt") + return tokenizer(question, text, padding="max_length", truncation=True, + max_length=input_size, return_tensors="pt") else: return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") @@ -218,8 +215,10 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, + input_size, disable_jit_freeze, fixed_input, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, + input_size, disable_jit_freeze, fixed_input) def main(): From 2b56ab0df9011ff2ac24e6f8ec8eeac8c876b106 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Oct 2025 11:23:27 +0200 Subject: [PATCH 25/55] wip --- tests/test_pytorch_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 916925f2..02de8df6 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -223,7 +223,7 @@ def wrapper(**kwargs): exact_match_ref, f1_ref = 0.750, 0.817 acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path, "batch_size": 1, "num_runs": 24, "timeout": None, - "disable_jit_freeze": False, "fixed_input": False}) + "input_size": 384, "disable_jit_freeze": False, "fixed_input": False}) self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) self.assertTrue(acc["f1"] / f1_ref > 0.95) From 23e4287433f3641986c2df697d942a89f04976e7 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Oct 2025 12:48:50 +0200 Subject: [PATCH 26/55] wip --- .../bert_large/run_mlperf.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 9449859f..16413c74 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -106,14 +106,7 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, from utils.pytorch import PyTorchRunner def run_single_pass(pytorch_runner, squad): - for _ in range(10): - input_tensor = squad.get_input_arrays() - print(input_tensor["input_ids"].shape) - print(input_tensor["attention_mask"].shape) - print(input_tensor["token_type_ids"].shape) - print('---') - - quit() + input_tensor = squad.get_input_arrays() output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor)) for i in range(batch_size): From 871de3a6f91866a3f97d784f0c133eaa14e16138 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 16 Oct 2025 14:57:26 +0200 Subject: [PATCH 27/55] wip --- .../bert_large/run_mlperf.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 16413c74..84ed1311 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -43,9 +43,7 @@ def parse_args(): parser.add_argument("--squad_path", type=str, help="path to directory with ImageNet validation images") - parser.add_argument("--fixed_input", action='store_true', - help="truncate input to fixed shape") - parser.add_argument("--input_size", type=int, default=384, + parser.add_argument("--fixed_input_size", type=int, help='size of the input') parser.add_argument("--disable_jit_freeze", action='store_true', help="if true model will be run not in jit freeze mode") @@ -97,8 +95,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs) return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path) -def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, - input_size, disable_jit_freeze=False, fixed_input=False): +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze=False): from utils.benchmark import run_model from utils.nlp.squad import Squad_v1_1 from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering @@ -120,13 +117,17 @@ def run_single_pass(pytorch_runner, squad): tokenizer = AutoTokenizer.from_pretrained( "bert-large-uncased-whole-word-masking-finetuned-squad", padding=True, truncation=True, model_max_length=512) - + print(fixed_input_size) + def tokenize(question, text): - if fixed_input: + if fixed_input_size is not None: + print('h1') return tokenizer(question, text, padding="max_length", truncation=True, - max_length=input_size, return_tensors="pt") + max_length=fixed_input_size, return_tensors="pt") else: + print('h2') return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") + quit() def detokenize(answer): return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer)) @@ -208,10 +209,9 @@ def detokenize(answer): return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) -def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, - input_size, disable_jit_freeze, fixed_input, **kwargs): - return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, - input_size, disable_jit_freeze, fixed_input) +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze, + **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze) def main(): From 5ff448670e487397c604f41484e8b34754936b49 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 13:44:10 +0200 Subject: [PATCH 28/55] wip --- .../extractive_question_answering/bert_large/run_mlperf.py | 6 +----- tests/test_pytorch_models.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py index 84ed1311..4f555ab4 100644 --- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py +++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py @@ -117,17 +117,13 @@ def run_single_pass(pytorch_runner, squad): tokenizer = AutoTokenizer.from_pretrained( "bert-large-uncased-whole-word-masking-finetuned-squad", padding=True, truncation=True, model_max_length=512) - print(fixed_input_size) - + def tokenize(question, text): if fixed_input_size is not None: - print('h1') return tokenizer(question, text, padding="max_length", truncation=True, max_length=fixed_input_size, return_tensors="pt") else: - print('h2') return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt") - quit() def detokenize(answer): return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer)) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 02de8df6..b7a2ecaa 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -223,7 +223,7 @@ def wrapper(**kwargs): exact_match_ref, f1_ref = 0.750, 0.817 acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path, "batch_size": 1, "num_runs": 24, "timeout": None, - "input_size": 384, "disable_jit_freeze": False, "fixed_input": False}) + "fixed_input_size": None, "disable_jit_freeze": False}) self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95) self.assertTrue(acc["f1"] / f1_ref > 0.95) From ee2acbd09fc8e50119d1ecbacf5eb3e86fe05e3b Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 15:02:38 +0200 Subject: [PATCH 29/55] wip --- setup_deb.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 2e6b4a63..34323add 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -53,6 +53,8 @@ fi if ! pip3 --version; then apt-get install -y python3-pip fi +pip install --upgrade pip + PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then @@ -81,7 +83,7 @@ pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" || apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ - libmpg123-dev pkg-config + libmpg123-dev pkg-config ffmpeg apt remove -y libsndfile1 git clone -b 1.2.2 https://github.com/libsndfile/libsndfile.git && cd libsndfile/ && autoreconf -vif && ./configure --enable-werror && make -j && make install && ldconfig && cd .. && rm -rf libsndfile From 0b9196f72c99a466d62c0863f265e6193736aedc Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 15:13:15 +0200 Subject: [PATCH 30/55] wip --- setup_deb.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 34323add..3820f857 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -11,6 +11,7 @@ log() { } ARCH=$(uname -m) +PIP_BREAK_SYSTEM_PACKAGES=1 if [ -z ${SCRIPT_DIR+x} ]; then SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) @@ -53,7 +54,6 @@ fi if ! pip3 --version; then apt-get install -y python3-pip fi -pip install --upgrade pip PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") @@ -78,9 +78,12 @@ sleep 1 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps +pip3 install --upgrade pip pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" || pip3 install -r "$(dirname "$0")/requirements.txt" + + apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ libmpg123-dev pkg-config ffmpeg From 5e97ac496c7e5fa285e79de312b16e5638bd2c62 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 16:25:28 +0200 Subject: [PATCH 31/55] wip --- setup_deb.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 3820f857..8348d2da 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -11,7 +11,6 @@ log() { } ARCH=$(uname -m) -PIP_BREAK_SYSTEM_PACKAGES=1 if [ -z ${SCRIPT_DIR+x} ]; then SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) @@ -78,9 +77,9 @@ sleep 1 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps -pip3 install --upgrade pip -pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" || - pip3 install -r "$(dirname "$0")/requirements.txt" +python3 -m pip install --ignore-installed --break-system-packages --upgrade pip +python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || + python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" From 3ae639f1f41ec23fab0ca84c9ddb5d001a3ae422 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:06:05 +0200 Subject: [PATCH 32/55] wip --- setup_deb.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 8348d2da..2c50c32c 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -77,10 +77,16 @@ sleep 1 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps -python3 -m pip install --ignore-installed --break-system-packages --upgrade pip +echo here1 + +python3 -m pip install --ignore-installed --upgrade pip + +echo here2 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" +echo here3 + apt install -y autoconf autogen automake build-essential libasound2-dev \ From 27af2ee5a103889bdd2746992c5f45e9f210ed06 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:11:46 +0200 Subject: [PATCH 33/55] wip --- setup_deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 2c50c32c..3358f800 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -79,7 +79,7 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps echo here1 -python3 -m pip install --ignore-installed --upgrade pip +python3 -m pip install --break-system-packages --ignore-installed --upgrade pip echo here2 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || From 84eaf81710397df030fd2f5f33165c401b09900e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:16:26 +0200 Subject: [PATCH 34/55] wip --- setup_deb.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 3358f800..4e6a60ba 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -79,13 +79,16 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps echo here1 +python3 -m pip --version +python3 -m pip install --upgrade --ignore-installed pip +echo here2 python3 -m pip install --break-system-packages --ignore-installed --upgrade pip -echo here2 +echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" -echo here3 +echo here4 From 7181c4350a83886f555e58d74bd8a57827aca33a Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:24:57 +0200 Subject: [PATCH 35/55] wip --- setup_deb.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 4e6a60ba..51e943af 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -79,8 +79,11 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps echo here1 +export PIP_CACHE_DIR=${PIP_CACHE_DIR:-/tmp/pip-cache} +mkdir -p "$PIP_CACHE_DIR" || true + python3 -m pip --version -python3 -m pip install --upgrade --ignore-installed pip +#python3 -m pip install --upgrade --ignore-installed pip echo here2 python3 -m pip install --break-system-packages --ignore-installed --upgrade pip From 47e0779719372dfab8f935023fcc9bace83bdfd6 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 17:58:26 +0200 Subject: [PATCH 36/55] wip --- setup_deb.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 51e943af..f71a7392 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -48,10 +48,12 @@ sleep 1 apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then + apt-get update -y apt-get install -y python3 python3-pip fi if ! pip3 --version; then - apt-get install -y python3-pip + apt-get install -y python3-pip || true + python3 -m ensurepip --upgrade || true fi PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') @@ -79,9 +81,6 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps echo here1 -export PIP_CACHE_DIR=${PIP_CACHE_DIR:-/tmp/pip-cache} -mkdir -p "$PIP_CACHE_DIR" || true - python3 -m pip --version #python3 -m pip install --upgrade --ignore-installed pip echo here2 From 5ca52494702d584378bfe943f1dfe0320d73a9ef Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 17 Oct 2025 18:01:57 +0200 Subject: [PATCH 37/55] wip --- setup_deb.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index f71a7392..42bd2d0f 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -49,7 +49,8 @@ apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y - apt-get install -y python3 python3-pip + apt-get install -y python3 python3-pip || true + python3 -m ensurepip --upgrade || true fi if ! pip3 --version; then apt-get install -y python3-pip || true From c2e309db9145cb1b0d46fc1328e9625952593be0 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 12:39:51 +0200 Subject: [PATCH 38/55] wip --- setup_deb.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 42bd2d0f..a01aa490 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -49,14 +49,15 @@ apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y - apt-get install -y python3 python3-pip || true - python3 -m ensurepip --upgrade || true + apt-get install -y python3 python3-pip fi if ! pip3 --version; then - apt-get install -y python3-pip || true - python3 -m ensurepip --upgrade || true + apt-get install -y python3-pip fi +python3 -m venv /opt/venv +. /opt/venv/bin/activate + PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then From fba70f675775881672248dbf2507e152ee0e0e1e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 12:43:42 +0200 Subject: [PATCH 39/55] wip --- setup_deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index a01aa490..63519c34 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -49,7 +49,7 @@ apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y - apt-get install -y python3 python3-pip + apt-get install -y python3 python3-pip python3-venv fi if ! pip3 --version; then apt-get install -y python3-pip From 242ca724cd003a5727f8652efce1c3e675d21138 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 12:49:33 +0200 Subject: [PATCH 40/55] wip --- setup_deb.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 63519c34..ed1dd039 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -49,12 +49,13 @@ apt-get update -y apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y - apt-get install -y python3 python3-pip python3-venv + apt-get install -y python3 python3-pip fi if ! pip3 --version; then apt-get install -y python3-pip fi +apt-get install -y python3-venv python3 -m venv /opt/venv . /opt/venv/bin/activate From e5f63abf825967879757647b1aaf8023f562dfe6 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 12:52:39 +0200 Subject: [PATCH 41/55] wip --- setup_deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index ed1dd039..304b7866 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -87,7 +87,7 @@ echo here1 python3 -m pip --version #python3 -m pip install --upgrade --ignore-installed pip echo here2 -python3 -m pip install --break-system-packages --ignore-installed --upgrade pip +python3 -m pip install --ignore-installed --upgrade pip echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || From 8628d9e72bb6a701fc08c79bd10a95968d3abf96 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 14:38:20 +0200 Subject: [PATCH 42/55] wip --- setup_deb.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 304b7866..93325da7 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -55,9 +55,10 @@ if ! pip3 --version; then apt-get install -y python3-pip fi -apt-get install -y python3-venv -python3 -m venv /opt/venv -. /opt/venv/bin/activate +#apt-get install -y python3-venv +#python3 -m venv /opt/venv +#. /opt/venv/bin/activate +#python3 -m pip config set global.break-system-packages true PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") From 572eb528a3b69a53e851b7372464a67944efddb9 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 20 Oct 2025 14:43:23 +0200 Subject: [PATCH 43/55] wip --- setup_deb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup_deb.sh b/setup_deb.sh index 93325da7..58313a98 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -88,7 +88,7 @@ echo here1 python3 -m pip --version #python3 -m pip install --upgrade --ignore-installed pip echo here2 -python3 -m pip install --ignore-installed --upgrade pip +PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip install --ignore-installed --upgrade pip echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || From 58b26c7475be9a36b7e71a3e4767eead0526bb3d Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 13:01:13 +0200 Subject: [PATCH 44/55] wip --- setup_deb.sh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 58313a98..81404dd5 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -55,11 +55,6 @@ if ! pip3 --version; then apt-get install -y python3-pip fi -#apt-get install -y python3-venv -#python3 -m venv /opt/venv -#. /opt/venv/bin/activate -#python3 -m pip config set global.break-system-packages true - PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then @@ -96,8 +91,6 @@ python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements. echo here4 - - apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ libmpg123-dev pkg-config ffmpeg @@ -123,3 +116,6 @@ else touch "$SCRIPT_DIR"/.setup_completed fi log "Setup completed. Please run: source $SCRIPT_DIR/set_env_variables.sh" + +echo HERe555 +exit 1 \ No newline at end of file From 6c47e2f35fdcf7dd7e0d24f43c7a339426addbce Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 13:09:13 +0200 Subject: [PATCH 45/55] wip --- setup_deb.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 81404dd5..a6617573 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -89,7 +89,9 @@ echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" -echo here4 +python3 -m pip --version +echo HERe555 +exit 1 apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ @@ -116,6 +118,3 @@ else touch "$SCRIPT_DIR"/.setup_completed fi log "Setup completed. Please run: source $SCRIPT_DIR/set_env_variables.sh" - -echo HERe555 -exit 1 \ No newline at end of file From 006ebb12cd8669d53d9e8bba7bdd3f118c599682 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 13:18:05 +0200 Subject: [PATCH 46/55] wip --- .github/workflows/test.yml | 2 -- setup_deb.sh | 11 ----------- 2 files changed, 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8c54a461..9ba08463 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,8 +84,6 @@ jobs: wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null - apt-get update && apt-get install -y ffmpeg - wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 diff --git a/setup_deb.sh b/setup_deb.sh index a6617573..c1d10385 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -78,21 +78,10 @@ sleep 1 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py # get almost all python deps -echo here1 - -python3 -m pip --version -#python3 -m pip install --upgrade --ignore-installed pip -echo here2 PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip install --ignore-installed --upgrade pip - -echo here3 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" || python3 -m pip3 install -r "$(dirname "$0")/requirements.txt" -python3 -m pip --version -echo HERe555 -exit 1 - apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ libmpg123-dev pkg-config ffmpeg From 13e9fd7b28ce69a6c7b627fef0431df9798269ab Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 14:40:01 +0200 Subject: [PATCH 47/55] wip --- .github/workflows/test.yml | 7 +++++++ setup_deb.sh | 2 ++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9ba08463..55a3c3da 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,6 +84,13 @@ jobs: wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null + which -a ffmpeg + ffmpeg -version + + ffmpeg -version | head -n1 + + exit 1 + wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 diff --git a/setup_deb.sh b/setup_deb.sh index c1d10385..a8285c37 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -55,6 +55,8 @@ if ! pip3 --version; then apt-get install -y python3-pip fi +apt-get update && apt-get install -y --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* + PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then From 706fe6e837e0601cfa4e3d4c9ed096aef4ff33f6 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 15:02:21 +0200 Subject: [PATCH 48/55] wip --- setup_deb.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index a8285c37..c1d10385 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -55,8 +55,6 @@ if ! pip3 --version; then apt-get install -y python3-pip fi -apt-get update && apt-get install -y --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/* - PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))') PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev") if [[ -n "$PYTHON_DEV_SEARCH" ]]; then From ad8cf9fe85068a0e999216ebad3aedcdc2b399ae Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 15:57:40 +0200 Subject: [PATCH 49/55] wip --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 55a3c3da..ecba2ae5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -81,15 +81,15 @@ jobs: - name: End-user smoke test run: | - wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 - tar -xf aio_objdet_dataset.tar.gz > /dev/null - which -a ffmpeg ffmpeg -version ffmpeg -version | head -n1 - exit 1 + echo here1 + + wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 + tar -xf aio_objdet_dataset.tar.gz > /dev/null wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 From 8983fae596acfdde740ebabe4bc1a8eea5ce9bdb Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 22 Oct 2025 17:58:51 +0200 Subject: [PATCH 50/55] wip --- .github/workflows/test.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ecba2ae5..bea349f5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -80,14 +80,8 @@ jobs: python3 -m unittest tests.test_pytorch_models - name: End-user smoke test - run: | - which -a ffmpeg + run: | ffmpeg -version - - ffmpeg -version | head -n1 - - echo here1 - wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1 tar -xf aio_objdet_dataset.tar.gz > /dev/null From b22bd269f6bd13113dfacf6d2131a8b60c786691 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 23 Oct 2025 11:30:56 +0200 Subject: [PATCH 51/55] wip --- setup_deb.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index c1d10385..abb4c8fd 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -4,6 +4,9 @@ set -eo pipefail +ln -fs /usr/share/zoneinfo/Europe/Warsaw /etc/localtime +echo "Europe/Warsaw" | tee /etc/timezone >/dev/null + log() { COLOR_DEFAULT='\033[0m' COLOR_CYAN='\033[1;36m' @@ -46,7 +49,7 @@ fi log "Installing system dependencies ..." sleep 1 apt-get update -y -apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake +apt-get install -y build-essential libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake if ! python3 -c ""; then apt-get update -y apt-get install -y python3 python3-pip @@ -84,7 +87,7 @@ python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements. apt install -y autoconf autogen automake build-essential libasound2-dev \ libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \ - libmpg123-dev pkg-config ffmpeg + libmpg123-dev pkg-config apt remove -y libsndfile1 git clone -b 1.2.2 https://github.com/libsndfile/libsndfile.git && cd libsndfile/ && autoreconf -vif && ./configure --enable-werror && make -j && make install && ldconfig && cd .. && rm -rf libsndfile @@ -101,6 +104,9 @@ if [ "$(python3 -c 'import torch; print(torch.cuda.is_available())')" == "True" fi log "done.\n" +apt-get update -y +apt-get install -y ffmpeg + if [ -f "/etc/machine-id" ]; then cat /etc/machine-id >"$SCRIPT_DIR"/.setup_completed else From 51f3b94b2eff0dc464eff1c6352fa49f1aa4c7d8 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 23 Oct 2025 13:32:44 +0200 Subject: [PATCH 52/55] wip --- .github/workflows/test.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bea349f5..ef0be073 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -116,6 +116,7 @@ jobs: COCO_IMG_PATH: aio_objdet_dataset COCO_ANNO_PATH: aio_objdet_dataset/annotations.json OMP_NUM_THREADS: 32 + AIO_NUM_THREADS: 32 S3_URL_CRITEO_DATASET: ${{ secrets.S3_URL_CRITEO_DATASET }} S3_URL_RESNET_50_V15_TF_FP32: ${{ secrets.S3_URL_RESNET_50_V15_TF_FP32 }} S3_URL_SSD_INCEPTION_V2_TF_FP32: ${{ secrets.S3_URL_SSD_INCEPTION_V2_TF_FP32 }} @@ -150,24 +151,24 @@ jobs: tar -xf aio_objdet_dataset.tar.gz > /dev/null wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 + IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 + IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60 wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt > /dev/null 2>&1 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60 + IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60 python3 speech_recognition/whisper/run.py -m small.en wget $S3_URL_SSD_INCEPTION_V2_TF_FP32 > /dev/null 2>&1 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60 + IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60 wget https://zenodo.org/records/4735647/files/resnet50_v1.onnx > /dev/null 2>&1 - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort + IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort wget https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.tar.gz > /dev/null 2>&1 tar -xf vgg16.tar.gz > /dev/null - OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort + IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort test_pytorch_arm64_sh: if: false From 3f70599cba3dfad97a5c7ca517127b5a04cf169b Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Fri, 24 Oct 2025 15:50:45 +0200 Subject: [PATCH 53/55] wip --- tests/test_pytorch_models.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index b7a2ecaa..b38dba04 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -366,19 +366,19 @@ def setUp(self): # "timeout": None, "disable_jit_freeze": False}) # self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95) - def test_yolo_v8_s(self): - from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 - from utils.benchmark import set_global_intra_op_parallelism_threads - set_global_intra_op_parallelism_threads(32) - - def wrapper(**kwargs): - kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - - coco_map_ref = 0.353 - acc = run_process(wrapper, {"model_path": self.yolo_v8_s_path, "images_path": self.dataset_path, - "anno_path": self.annotations_path, "batch_size": 1, "num_runs": 465, - "timeout": None, "disable_jit_freeze": False}) - self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95) + # def test_yolo_v8_s(self): + # from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32 + # from utils.benchmark import set_global_intra_op_parallelism_threads + # set_global_intra_op_parallelism_threads(32) + # + # def wrapper(**kwargs): + # kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) + # + # coco_map_ref = 0.353 + # acc = run_process(wrapper, {"model_path": self.yolo_v8_s_path, "images_path": self.dataset_path, + # "anno_path": self.annotations_path, "batch_size": 1, "num_runs": 465, + # "timeout": None, "disable_jit_freeze": False}) + # self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95) if __name__ == "__main__": From d918720f97389ded793719d07747d6317d3d0d30 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 28 Oct 2025 13:28:57 +0100 Subject: [PATCH 54/55] wip --- .../object_detection/yolo_v11/README.md | 118 +++++++++++++++++ .../object_detection/yolo_v11/run.py | 124 ++++++++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 computer_vision/object_detection/yolo_v11/README.md create mode 100644 computer_vision/object_detection/yolo_v11/run.py diff --git a/computer_vision/object_detection/yolo_v11/README.md b/computer_vision/object_detection/yolo_v11/README.md new file mode 100644 index 00000000..5f72cf7d --- /dev/null +++ b/computer_vision/object_detection/yolo_v11/README.md @@ -0,0 +1,118 @@ +# YOLO v8 + +This folder contains the script to run YOLO v8 on COCO object detection task. + +Variants supplied below for PyTorch and ONNX Runtime in fp32 precision accept input of shape 640x640. + +The original documentation of the model is available here: https://docs.ultralytics.com/#ultralytics-yolov8 + + +### Metrics + +Based on 1000 images from COCO Dataset for YOLOv8n model in PyTorch framework in fp32 precision + +| Metric | IoU | Area | maxDets |Score | +|:---: |:---: |:---: |:---: |:---: | +| Average Precision (AP) |0.50:0.95 | all | 100 | 0.338 | +| Average Precision (AP) |0.50 | all | 100 | 0.452 | +| Average Precision (AP) |0.75 | all | 100 | 0.370 | +| Average Precision (AP) |0.50:0.95 | small | 100 | 0.122 | +| Average Precision (AP) |0.50:0.95 | medium | 100 | 0.351 | +| Average Precision (AP) |0.50:0.95 | large | 100 | 0.504 | +| Average Recall (AR) |0.50:0.95 | all | 1 | 0.265 | +| Average Recall (AR) |0.50:0.95 | all | 10 | 0.375 | +| Average Recall (AR) |0.50:0.95 | all | 100 | 0.381 | +| Average Recall (AR) |0.50:0.95 | small | 100 | 0.133 | +| Average Recall (AR) |0.50:0.95 | medium | 100 | 0.385 | +| Average Recall (AR) |0.50:0.95 | large | 100 | 0.569 | + +Based on 1000 images from COCO Dataset for YOLOv8n model in ONNX Runtime framework in fp32 precision + +| Metric | IoU | Area | maxDets |Score | +|:---: |:---: |:---: |:---: |:---: | +| Average Precision (AP) |0.50:0.95 | all | 100 | 0.338| +| Average Precision (AP) |0.50 | all | 100 | 0.452| +| Average Precision (AP) |0.75 | all | 100 | 0.370| +| Average Precision (AP) |0.50:0.95 | small | 100 | 0.122| +| Average Precision (AP) |0.50:0.95 | medium | 100 | 0.351| +| Average Precision (AP) |0.50:0.95 | large | 100 | 0.504| +| Average Recall (AR) |0.50:0.95 | all | 1 | 0.265| +| Average Recall (AR) |0.50:0.95 | all | 10 | 0.375| +| Average Recall (AR) |0.50:0.95 | all | 100 | 0.381| +| Average Recall (AR) |0.50:0.95 | small | 100 | 0.133| +| Average Recall (AR) |0.50:0.95 | medium | 100 | 0.385| +| Average Recall (AR) |0.50:0.95 | large | 100 | 0.569| + +Based on 1000 images from COCO Dataset for YOLOv8x model in ONNX Runtime framework in fp32 precision + +| Metric | IoU | Area | maxDets |Score | +|:---: |:---: |:---: |:---: |:---: | +| Average Precision (AP) |0.50:0.95 | all | 100 | 0.575| +| Average Precision (AP) |0.50 | all | 100 | 0.714| +| Average Precision (AP) |0.75 | all | 100 | 0.639| +| Average Precision (AP) |0.50:0.95 | small | 100 | 0.336| +| Average Precision (AP) |0.50:0.95 | medium | 100 | 0.633| +| Average Precision (AP) |0.50:0.95 | large | 100 | 0.812| +| Average Recall (AR) |0.50:0.95 | all | 1 | 0.409| +| Average Recall (AR) |0.50:0.95 | all | 10 | 0.611| +| Average Recall (AR) |0.50:0.95 | all | 100 | 0.620| +| Average Recall (AR) |0.50:0.95 | small | 100 | 0.361| +| Average Recall (AR) |0.50:0.95 | medium | 100 | 0.676| +| Average Recall (AR) |0.50:0.95 | large | 100 | 0.849| + + +### Dataset and model + +Dataset can be downloaded from here: https://cocodataset.org/#download + +PyTorch models in fp32 precision can be downloaded here: +``` +wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt +wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt +wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt +wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt +wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt +``` + +You can export a PyTorch model to ONNX Runtime model using the following Python code: + +```python +from ultralytics import YOLO +model = YOLO('/path/to/yolov8n.pt') +model.export(format='onnx') +``` + +### Running instructions + +Before running any code you should first export the PYTHONPATH variable with path pointing to the model zoo directory, +as well as AIO_NUM_THREADS specifying the number of threads to be used. + +``` +export PYTHONPATH=/path/to/model_zoo +export AIO_NUM_THREADS=1 +``` + +For the best experience we also recommend setting environment variables as specified below. + +``` +export COCO_IMG_PATH=/path/to/images +export COCO_ANNO_PATH=/path/to/annotations +``` + +Now you are able to run the run.py script. + +To get detailed information on the script's recognized arguments run it with -h flag for help. + +The path to model (with a flag "-m") as well as its precision (with a flag "-p") have to be specified. + +Please note that the default batch size is 1 and if not specified otherwise the script will run for 1 minute. + +Example command: + +``` +python3 run.py -m /path/to/model.onnx -p fp32 --framework ort +``` + +``` +python3 run.py -m /path/to/model.pt -p fp32 --framework pytorch +``` \ No newline at end of file diff --git a/computer_vision/object_detection/yolo_v11/run.py b/computer_vision/object_detection/yolo_v11/run.py new file mode 100644 index 00000000..ec782733 --- /dev/null +++ b/computer_vision/object_detection/yolo_v11/run.py @@ -0,0 +1,124 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025, Ampere Computing LLC +try: + from utils import misc # noqa +except ModuleNotFoundError: + import os + import sys + filename = "set_env_variables.sh" + directory = os.path.realpath(__file__).split("/")[:-1] + for idx in range(1, len(directory) - 1): + subdir = "/".join(directory[:-idx]) + if filename in os.listdir(subdir): + print(f"\nPlease run \033[91m'source {os.path.join(subdir, filename)}'\033[0m first.") + break + else: + print(f"\n\033[91mFAIL: Couldn't find {filename}, are you running this script as part of Ampere Model Library?" + f"\033[0m") + sys.exit(1) + + +def parse_args(): + import argparse + parser = argparse.ArgumentParser(description="Run YOLOv11 model.") + parser.add_argument("-m", "--model_path", + type=str, required=True, + help="path to the model") + parser.add_argument("-p", "--precision", + type=str, choices=["fp32"], default="fp32", + help="precision of the model provided") + parser.add_argument("-b", "--batch_size", + type=int, default=1, + help="batch size to feed the model with") + parser.add_argument("-f", "--framework", + type=str, + choices=["pytorch"], required=True, + help="specify the framework in which a model should be run") + parser.add_argument("--timeout", + type=float, default=60.0, + help="timeout in seconds") + parser.add_argument("--num_runs", + type=int, + help="number of passes through network to execute") + parser.add_argument("--images_path", + type=str, + help="path to directory with COCO validation images") + parser.add_argument("--anno_path", + type=str, + help="path to file with validation annotations") + parser.add_argument("--disable_jit_freeze", action='store_true', + help="if true model will be run not in jit freeze mode") + return parser.parse_args() + + +def run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_path, disable_jit_freeze=False): + import torch + import os + from utils.cv.coco import COCODataset + from utils.benchmark import run_model + + os.environ["YOLO_VERBOSE"] = os.getenv("YOLO_VERBOSE", "False") + # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user + # to set it to True if needed + from utils.pytorch import PyTorchRunner + from ultralytics.yolo.utils import ops + + def run_single_pass(pytorch_runner, coco): + output = pytorch_runner.run(batch_size, coco.get_input_array((640, 640))) + output = ops.non_max_suppression(output) + + for i in range(batch_size): + for d in range(output[i].shape[0]): + coco.submit_bbox_prediction( + i, + coco.convert_bbox_to_coco_order(output[i][d][:4].tolist()), + output[i][d][4].item(), + coco.translate_cat_id_to_coco(output[i][d][5].item()) + ) + + dataset = COCODataset(batch_size, "RGB", "COCO_val2014_000000000000", images_path, + anno_path, pre_processing="PyTorch_objdet", sort_ascending=True, order="NCHW") + + from ultralytics import YOLO + model = YOLO(model_path) + torchscript_model = model.export(format="torchscript") + + runner = PyTorchRunner(torch.jit.load(torchscript_model), + disable_jit_freeze=disable_jit_freeze, + example_inputs=torch.stack(dataset.get_input_array((640, 640)))) + + return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) + + +def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, images_path, anno_path, disable_jit_freeze, **kwargs): + return run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_path, disable_jit_freeze) + + +def main(): + from utils.misc import print_goodbye_message_and_die + args = parse_args() + + if args.framework == "pytorch": + import torch + if torch.cuda.is_available(): + run_pytorch_cuda(**vars(args)) + elif args.precision == "fp32": + run_pytorch_fp32(**vars(args)) + else: + print_goodbye_message_and_die( + "this model seems to be unsupported in a specified precision: " + args.precision) + elif args.framework == "ort": + if args.precision == "fp32": + if args.batch_size != 1: + raise ValueError("Batch size must be 1 for this model.") + run_ort_fp32(**vars(args)) + else: + print_goodbye_message_and_die( + "this model seems to be unsupported in a specified precision: " + args.precision) + else: + print_goodbye_message_and_die( + "this model seems to be unsupported in a specified framework: " + args.framework) + + +if __name__ == "__main__": + main() From 2b55c04741edb42181a070e4a979a12d1fde6e62 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 28 Oct 2025 13:50:18 +0100 Subject: [PATCH 55/55] wip --- computer_vision/object_detection/yolo_v11/run.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/computer_vision/object_detection/yolo_v11/run.py b/computer_vision/object_detection/yolo_v11/run.py index ec782733..af254d7d 100644 --- a/computer_vision/object_detection/yolo_v11/run.py +++ b/computer_vision/object_detection/yolo_v11/run.py @@ -61,11 +61,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_ # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user # to set it to True if needed from utils.pytorch import PyTorchRunner - from ultralytics.yolo.utils import ops + from ultralytics.utils import nms def run_single_pass(pytorch_runner, coco): output = pytorch_runner.run(batch_size, coco.get_input_array((640, 640))) - output = ops.non_max_suppression(output) + output = nms.non_max_suppression(output) for i in range(batch_size): for d in range(output[i].shape[0]): @@ -85,7 +85,7 @@ def run_single_pass(pytorch_runner, coco): runner = PyTorchRunner(torch.jit.load(torchscript_model), disable_jit_freeze=disable_jit_freeze, - example_inputs=torch.stack(dataset.get_input_array((640, 640)))) + example_inputs=torch.stack((dataset.get_input_array((640, 640)),))) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) @@ -107,14 +107,6 @@ def main(): else: print_goodbye_message_and_die( "this model seems to be unsupported in a specified precision: " + args.precision) - elif args.framework == "ort": - if args.precision == "fp32": - if args.batch_size != 1: - raise ValueError("Batch size must be 1 for this model.") - run_ort_fp32(**vars(args)) - else: - print_goodbye_message_and_die( - "this model seems to be unsupported in a specified precision: " + args.precision) else: print_goodbye_message_and_die( "this model seems to be unsupported in a specified framework: " + args.framework)