From 7b55ce5c56e8014f695f918fa409d1f77c1bfd66 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 25 Sep 2025 15:21:20 +0200
Subject: [PATCH 01/55] first commit

---
 .../extractive_question_answering/bert_large/run_mlperf.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 57130f6c..25146a4d 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -34,6 +34,8 @@ def parse_args():
                         type=str, default="tf",
                         choices=["tf", "pytorch"],
                         help="specify the framework in which a model should be run")
+    parser.add_argument("--fixed-input", action='store_true',
+                        help="truncate input to fixed shape")
     parser.add_argument("--timeout",
                         type=float, default=60.0,
                         help="timeout in seconds")
@@ -102,6 +104,9 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl
 
     def run_single_pass(pytorch_runner, squad):
         input_tensor = squad.get_input_arrays()
+        print(input_tensor)
+        print(type(input_tensor))
+        quit()
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))
 
         for i in range(batch_size):
@@ -117,7 +122,7 @@ def run_single_pass(pytorch_runner, squad):
         padding=True, truncation=True, model_max_length=512)
 
     def tokenize(question, text):
-        return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt")
+        return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
 
     def detokenize(answer):
         return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer))

From 1ede310c3d1c83b3120ddfffb693a1b8f97a0cce Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 25 Sep 2025 16:55:09 +0200
Subject: [PATCH 02/55] wip

---
 .../extractive_question_answering/bert_large/run_mlperf.py    | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 25146a4d..4e9aefb2 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -106,6 +106,10 @@ def run_single_pass(pytorch_runner, squad):
         input_tensor = squad.get_input_arrays()
         print(input_tensor)
         print(type(input_tensor))
+        print(input_tensor["input_ids"].size()[1])
+        print(input_tensor["input_ids"].shape)
+        print(input_tensor["attention_mask"].shape)
+        print(input_tensor["token_type_ids"].shape)
         quit()
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))
 

From a31e5e1aaf445f98fa3eee83ba70e88c837c0af4 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 25 Sep 2025 16:57:56 +0200
Subject: [PATCH 03/55] wip

---
 .../bert_large/run_mlperf.py                      | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 4e9aefb2..bcaed825 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -103,13 +103,14 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl
     from utils.pytorch import PyTorchRunner
 
     def run_single_pass(pytorch_runner, squad):
-        input_tensor = squad.get_input_arrays()
-        print(input_tensor)
-        print(type(input_tensor))
-        print(input_tensor["input_ids"].size()[1])
-        print(input_tensor["input_ids"].shape)
-        print(input_tensor["attention_mask"].shape)
-        print(input_tensor["token_type_ids"].shape)
+        for _ in range(10):
+            input_tensor = squad.get_input_arrays()
+            print(type(input_tensor))
+            print(input_tensor["input_ids"].size()[1])
+            print(input_tensor["input_ids"].shape)
+            print(input_tensor["attention_mask"].shape)
+            print(input_tensor["token_type_ids"].shape)
+            print("-------")
         quit()
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))
 

From 6d5003ddffe484d079ca64d6e5a176f5641c8785 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 26 Sep 2025 10:44:18 +0200
Subject: [PATCH 04/55] wip

---
 .../bert_large/run_mlperf.py                      | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index bcaed825..a4b2553d 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -34,8 +34,6 @@ def parse_args():
                         type=str, default="tf",
                         choices=["tf", "pytorch"],
                         help="specify the framework in which a model should be run")
-    parser.add_argument("--fixed-input", action='store_true',
-                        help="truncate input to fixed shape")
     parser.add_argument("--timeout",
                         type=float, default=60.0,
                         help="timeout in seconds")
@@ -45,6 +43,8 @@ def parse_args():
     parser.add_argument("--squad_path",
                         type=str,
                         help="path to directory with ImageNet validation images")
+    parser.add_argument("--fixed_input", action='store_true',
+                        help="truncate input to fixed shape")
     parser.add_argument("--disable_jit_freeze", action='store_true',
                         help="if true model will be run not in jit freeze mode")
     return parser.parse_args()
@@ -95,7 +95,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs)
     return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path)
 
 
-def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False):
+def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False, fixed_input=False):
     from utils.benchmark import run_model
     from utils.nlp.squad import Squad_v1_1
     from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering
@@ -127,7 +127,10 @@ def run_single_pass(pytorch_runner, squad):
         padding=True, truncation=True, model_max_length=512)
 
     def tokenize(question, text):
-        return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
+        if fixed_input:
+            return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
+        else:
+            return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt")
 
     def detokenize(answer):
         return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer))
@@ -209,8 +212,8 @@ def detokenize(answer):
     return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
 
 
-def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, **kwargs):
-    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze)
+def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, **kwargs):
+    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input)
 
 
 def main():

From 776e70d252c0d04c5aebc2df4885e553b163e177 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 26 Sep 2025 10:54:29 +0200
Subject: [PATCH 05/55] wip

---
 .../bert_large/run_mlperf.py                     | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index a4b2553d..3e0a5a10 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -103,15 +103,13 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl
     from utils.pytorch import PyTorchRunner
 
     def run_single_pass(pytorch_runner, squad):
-        for _ in range(10):
-            input_tensor = squad.get_input_arrays()
-            print(type(input_tensor))
-            print(input_tensor["input_ids"].size()[1])
-            print(input_tensor["input_ids"].shape)
-            print(input_tensor["attention_mask"].shape)
-            print(input_tensor["token_type_ids"].shape)
-            print("-------")
-        quit()
+        input_tensor = squad.get_input_arrays()
+        print(type(input_tensor))
+        print(input_tensor["input_ids"].size()[1])
+        print(input_tensor["input_ids"].shape)
+        print(input_tensor["attention_mask"].shape)
+        print(input_tensor["token_type_ids"].shape)
+        print("-------")
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))
 
         for i in range(batch_size):

From b3c4a0c3a0606ce1f13f886212c13d0b6268a368 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 26 Sep 2025 10:59:27 +0200
Subject: [PATCH 06/55] wip

---
 .../extractive_question_answering/bert_large/run_mlperf.py  | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 3e0a5a10..de874c5a 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -104,12 +104,6 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl
 
     def run_single_pass(pytorch_runner, squad):
         input_tensor = squad.get_input_arrays()
-        print(type(input_tensor))
-        print(input_tensor["input_ids"].size()[1])
-        print(input_tensor["input_ids"].shape)
-        print(input_tensor["attention_mask"].shape)
-        print(input_tensor["token_type_ids"].shape)
-        print("-------")
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))
 
         for i in range(batch_size):

From f8c8b06cb7792c0802e72c03ed02882e84df8d4c Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 26 Sep 2025 13:16:10 +0200
Subject: [PATCH 07/55] wip

---
 computer_vision/object_detection/yolo_v5/run.py                | 2 +-
 computer_vision/object_detection/yolo_v8/run.py                | 2 +-
 .../extractive_question_answering/bert_large/run_mlperf.py     | 2 +-
 recommendation/dlrm/run.py                                     | 2 +-
 tests/test_pytorch_models.py                                   | 3 ++-
 utils/cv/pre_processing.py                                     | 2 +-
 6 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/computer_vision/object_detection/yolo_v5/run.py b/computer_vision/object_detection/yolo_v5/run.py
index 945727fd..dd8d1828 100644
--- a/computer_vision/object_detection/yolo_v5/run.py
+++ b/computer_vision/object_detection/yolo_v5/run.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-# Copyright (c) 2024, Ampere Computing LLC
+# Copyright (c) 2025, Ampere Computing LLC
 try:
     from utils import misc  # noqa
 except ModuleNotFoundError:
diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py
index 7df1d629..4dc6c67a 100644
--- a/computer_vision/object_detection/yolo_v8/run.py
+++ b/computer_vision/object_detection/yolo_v8/run.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-# Copyright (c) 2024, Ampere Computing LLC
+# Copyright (c) 2025, Ampere Computing LLC
 try:
     from utils import misc  # noqa
 except ModuleNotFoundError:
diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index de874c5a..a5605993 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-# Copyright (c) 2024, Ampere Computing LLC
+# Copyright (c) 2025, Ampere Computing LLC
 try:
     from utils import misc  # noqa
 except ModuleNotFoundError:
diff --git a/recommendation/dlrm/run.py b/recommendation/dlrm/run.py
index 97ce3a19..5997e085 100644
--- a/recommendation/dlrm/run.py
+++ b/recommendation/dlrm/run.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-# Copyright (c) 2024, Ampere Computing LLC
+# Copyright (c) 2025, Ampere Computing LLC
 try:
     from utils import misc  # noqa
 except ModuleNotFoundError:
diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index 60b99472..43c546ab 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -222,7 +222,8 @@ def wrapper(**kwargs):
 
         exact_match_ref, f1_ref = 0.750, 0.817
         acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path,
-                                    "batch_size": 1, "num_runs": 24, "timeout": None, "disable_jit_freeze": False})
+                                    "batch_size": 1, "num_runs": 24, "timeout": None,
+                                    "disable_jit_freeze": False, "fixed_input": False})
         self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95)
         self.assertTrue(acc["f1"] / f1_ref > 0.95)
 
diff --git a/utils/cv/pre_processing.py b/utils/cv/pre_processing.py
index 7d452069..ae17a4b1 100644
--- a/utils/cv/pre_processing.py
+++ b/utils/cv/pre_processing.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-# Copyright (c) 2024, Ampere Computing LLC
+# Copyright (c) 2025, Ampere Computing LLC
 import numpy as np
 import utils.misc as utils
 

From df07b65ab3eca70e8bd4151245be20f2521f8815 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 26 Sep 2025 14:13:20 +0200
Subject: [PATCH 08/55] wip

---
 requirements.txt             | 1 +
 tests/test_pytorch_models.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 25e13945..a0666590 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,3 +36,4 @@ open-clip-torch<2.26.1
 diffusers
 accelerate
 boto3==1.29.0; python_version>='3.12'
+torchcodec
\ No newline at end of file
diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index 43c546ab..403dc211 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-# Copyright (c) 2024, Ampere Computing LLC
+# Copyright (c) 2025, Ampere Computing LLC
 import os
 import signal
 import time

From 6626f910b61e42933817c3880af3b513c070dd15 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 26 Sep 2025 16:00:32 +0200
Subject: [PATCH 09/55] wip

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 8580f840..42a38322 100644
--- a/LICENSE
+++ b/LICENSE
@@ -187,7 +187,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright (c) 2024, Ampere Computing LLC
+   Copyright (c) 2025, Ampere Computing LLC
    Copyright (c) 2022 Andrej Karpathy
    Copyright (c) 2022 OpenAI
    Copyright (c) 2022 Stability AI

From 01cb4def14450343736d491a87e50fe81378bc8c Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 30 Sep 2025 14:47:05 +0200
Subject: [PATCH 10/55] wip

---
 requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index a0666590..9cc6c4c6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -35,5 +35,4 @@ kornia
 open-clip-torch<2.26.1
 diffusers
 accelerate
-boto3==1.29.0; python_version>='3.12'
-torchcodec
\ No newline at end of file
+boto3==1.29.0; python_version>='3.12'
\ No newline at end of file

From bc405d651b50e01246a9e967da191c0e9d588a73 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 30 Sep 2025 16:15:49 +0200
Subject: [PATCH 11/55] wip

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 9cc6c4c6..f8921397 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,6 +17,7 @@ tiktoken
 ultralytics
 evaluate
 datasets
+datasets[audio]
 soundfile
 librosa
 numba

From ddf25ed597b25c5511e764ec6ffe8a11b8933df7 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 1 Oct 2025 10:17:02 +0200
Subject: [PATCH 12/55] wip

---
 computer_vision/object_detection/yolo_v8/run.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py
index 4dc6c67a..bc35d293 100644
--- a/computer_vision/object_detection/yolo_v8/run.py
+++ b/computer_vision/object_detection/yolo_v8/run.py
@@ -61,7 +61,7 @@ def run_ort_fp32(model_path, batch_size, num_runs, timeout, images_path, anno_pa
     # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user
     # to set it to True if needed
     from utils.ort import OrtRunner
-    from ultralytics.yolo.utils import ops
+    from ultralytics.utils import nms
 
     def run_single_pass(ort_runner, coco):
         shape = (640, 640)
@@ -69,7 +69,7 @@ def run_single_pass(ort_runner, coco):
         output = ort_runner.run(batch_size)
 
         output = torch.from_numpy(output[0])
-        output = ops.non_max_suppression(output)
+        output = nms.non_max_suppression(output)
 
         for i in range(batch_size):
             for d in range(output[i].shape[0]):
@@ -97,11 +97,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_
     # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user
     # to set it to True if needed
     from utils.pytorch import PyTorchRunner
-    from ultralytics.yolo.utils import ops
+    from ultralytics.utils import nms
 
     def run_single_pass(pytorch_runner, coco):
         output = pytorch_runner.run(batch_size, coco.get_input_array((640, 640)))
-        output = ops.non_max_suppression(output)
+        output = nms.non_max_suppression(output)
 
         for i in range(batch_size):
             for d in range(output[i].shape[0]):
@@ -122,6 +122,7 @@ def run_single_pass(pytorch_runner, coco):
     runner = PyTorchRunner(torch.jit.load(torchscript_model),
                            disable_jit_freeze=disable_jit_freeze,
                            example_inputs=torch.stack(dataset.get_input_array((640, 640))))
+                           #example_inputs=torch.stack((dataset.get_input_array((640, 640)),)))
 
     return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
 

From c4e81b0a7ff1a9e714e9aa844686a09ee43b325e Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 1 Oct 2025 10:18:35 +0200
Subject: [PATCH 13/55] wip

---
 computer_vision/object_detection/yolo_v8/run.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/computer_vision/object_detection/yolo_v8/run.py b/computer_vision/object_detection/yolo_v8/run.py
index bc35d293..bbd51c24 100644
--- a/computer_vision/object_detection/yolo_v8/run.py
+++ b/computer_vision/object_detection/yolo_v8/run.py
@@ -121,8 +121,7 @@ def run_single_pass(pytorch_runner, coco):
 
     runner = PyTorchRunner(torch.jit.load(torchscript_model),
                            disable_jit_freeze=disable_jit_freeze,
-                           example_inputs=torch.stack(dataset.get_input_array((640, 640))))
-                           #example_inputs=torch.stack((dataset.get_input_array((640, 640)),)))
+                           example_inputs=torch.stack((dataset.get_input_array((640, 640)),)))
 
     return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
 

From c7764d4f6e724dc692901d660abfe37a306e2774 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 2 Oct 2025 13:00:31 +0200
Subject: [PATCH 14/55] wip

---
 .github/workflows/test.yml   | 2 ++
 tests/test_pytorch_models.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2921a548..0a3cf245 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -84,6 +84,8 @@ jobs:
           wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1
           tar -xf aio_objdet_dataset.tar.gz > /dev/null
           
+          apt-get update && apt-get install ffmpeg
+          
           wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1
           IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60
           
diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index 403dc211..4ed6d07e 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -368,6 +368,8 @@ def setUp(self):
 
     def test_yolo_v8_s(self):
         from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32
+        from utils.benchmark import set_global_intra_op_parallelism_threads
+        set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS")))
 
         def wrapper(**kwargs):
             kwargs["q"].put(run_pytorch_fp32(**kwargs)[0])

From 146e3b039d7473834b5f4fbf806ca53bd24d068f Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 2 Oct 2025 13:27:29 +0200
Subject: [PATCH 15/55] wip

---
 tests/test_pytorch_models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index 4ed6d07e..8d27c83c 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -369,7 +369,8 @@ def setUp(self):
     def test_yolo_v8_s(self):
         from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32
         from utils.benchmark import set_global_intra_op_parallelism_threads
-        set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS")))
+        #set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS")))
+        set_global_intra_op_parallelism_threads(32)
 
         def wrapper(**kwargs):
             kwargs["q"].put(run_pytorch_fp32(**kwargs)[0])

From 83d284e1417fa3ae137d8a135a666aafc8ac2957 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 2 Oct 2025 15:56:07 +0200
Subject: [PATCH 16/55] wip

---
 .github/workflows/test.yml   | 28 ++++++++++++++--------------
 tests/test_pytorch_models.py |  1 -
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0a3cf245..8c54a461 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -84,7 +84,7 @@ jobs:
           wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1
           tar -xf aio_objdet_dataset.tar.gz > /dev/null
           
-          apt-get update && apt-get install ffmpeg
+          apt-get update && apt-get install -y ffmpeg
           
           wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1
           IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60
@@ -151,24 +151,24 @@ jobs:
           tar -xf aio_objdet_dataset.tar.gz > /dev/null
           
           wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1
-          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60
           
-          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60
           
           wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt > /dev/null 2>&1
-          IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60
           
           python3 speech_recognition/whisper/run.py -m small.en
           
           wget $S3_URL_SSD_INCEPTION_V2_TF_FP32 > /dev/null 2>&1
-          IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60
           
           wget https://zenodo.org/records/4735647/files/resnet50_v1.onnx > /dev/null 2>&1
-          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort
           
           wget https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.tar.gz > /dev/null 2>&1
           tar -xf vgg16.tar.gz > /dev/null
-          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort
 
   test_pytorch_arm64_sh:
     if: false
@@ -259,21 +259,21 @@ jobs:
           tar -xf aio_objdet_dataset.tar.gz > /dev/null
           
           wget https://github.com/tloen/alpaca-lora/raw/main/alpaca_data.json > /dev/null 2>&1
-          AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/text_generation/llama2/run.py -m meta-llama/Llama-2-7b-chat-hf --dataset_path=alpaca_data.json
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/text_generation/llama2/run.py -m meta-llama/Llama-2-7b-chat-hf --dataset_path=alpaca_data.json
           
-          AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 recommendation/dlrm_torchbench/run.py -p fp32
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 recommendation/dlrm_torchbench/run.py -p fp32
           
-          IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/classification/resnet_50_v15/run.py -m resnet50 -p fp32 -b 16 -f pytorch
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/classification/resnet_50_v15/run.py -m resnet50 -p fp32 -b 16 -f pytorch
           
-          AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 speech_recognition/whisper/run.py -m tiny.en 
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 speech_recognition/whisper/run.py -m tiny.en 
           
-          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60
           
           wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt > /dev/null 2>&1
-          IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8l.pt -p fp32 -f pytorch              
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8l.pt -p fp32 -f pytorch              
           
           wget -O bert_large_mlperf.pt https://zenodo.org/records/3733896/files/model.pytorch?download=1 > /dev/null 2>&1
-          AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py -m bert_large_mlperf.pt -p fp32 -f pytorch
+          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py -m bert_large_mlperf.pt -p fp32 -f pytorch
 
   test_tensorflow_arm64:
     runs-on: self-hosted
diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index 8d27c83c..916925f2 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -369,7 +369,6 @@ def setUp(self):
     def test_yolo_v8_s(self):
         from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32
         from utils.benchmark import set_global_intra_op_parallelism_threads
-        #set_global_intra_op_parallelism_threads(int(os.environ.get("AIO_NUM_THREADS")))
         set_global_intra_op_parallelism_threads(32)
 
         def wrapper(**kwargs):

From 93ed7b4242a2c9b930c94c0b1909fb6d1a34b473 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 3 Oct 2025 10:36:15 +0200
Subject: [PATCH 17/55] wip

---
 .../extractive_question_answering/bert_large/run_mlperf.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index a5605993..8a9c081a 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -45,6 +45,8 @@ def parse_args():
                         help="path to directory with ImageNet validation images")
     parser.add_argument("--fixed_input", action='store_true',
                         help="truncate input to fixed shape")
+    parser.add_argument("--input_size", type=int, default=384,
+                        help='size of the input')
     parser.add_argument("--disable_jit_freeze", action='store_true',
                         help="if true model will be run not in jit freeze mode")
     return parser.parse_args()
@@ -104,6 +106,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disabl
 
     def run_single_pass(pytorch_runner, squad):
         input_tensor = squad.get_input_arrays()
+        print(input_tensor["input_ids"].shape)
+        print(input_tensor["attention_mask"].shape)
+        print(input_tensor["token_type_ids"].shape)
+
+        quit()
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))
 
         for i in range(batch_size):

From 07a1a34343afdfdc56429d7b3f13a3dbb58b1407 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 3 Oct 2025 11:23:39 +0200
Subject: [PATCH 18/55] wip

---
 .../bert_large/run_mlperf.py                             | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 8a9c081a..e6f3af16 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs)
     return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path)
 
 
-def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze=False, fixed_input=False):
+def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False):
     from utils.benchmark import run_model
     from utils.nlp.squad import Squad_v1_1
     from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering
@@ -127,7 +127,8 @@ def run_single_pass(pytorch_runner, squad):
 
     def tokenize(question, text):
         if fixed_input:
-            return tokenizer(question, text, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
+            return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size,
+                             return_tensors="pt")
         else:
             return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt")
 
@@ -211,8 +212,8 @@ def detokenize(answer):
     return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
 
 
-def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, **kwargs):
-    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input)
+def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size, **kwargs):
+    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size)
 
 
 def main():

From ccddf0a437422f7bf633965593dfb6c80205241e Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 7 Oct 2025 16:30:38 +0200
Subject: [PATCH 19/55] wip

---
 .../bert_large/run_mlperf.py                             | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index e6f3af16..39b83ef0 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -105,10 +105,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_
     from utils.pytorch import PyTorchRunner
 
     def run_single_pass(pytorch_runner, squad):
-        input_tensor = squad.get_input_arrays()
-        print(input_tensor["input_ids"].shape)
-        print(input_tensor["attention_mask"].shape)
-        print(input_tensor["token_type_ids"].shape)
+        for _ in range(10):
+            input_tensor = squad.get_input_arrays()
+            print(input_tensor["input_ids"].shape)
+            print(input_tensor["attention_mask"].shape)
+            print(input_tensor["token_type_ids"].shape)
 
         quit()
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))

From 2a518e7cf8bf519af25162c9c670df0ffb95613f Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 7 Oct 2025 16:31:29 +0200
Subject: [PATCH 20/55] wip

---
 .../extractive_question_answering/bert_large/run_mlperf.py       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 39b83ef0..5b226bcf 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -110,6 +110,7 @@ def run_single_pass(pytorch_runner, squad):
             print(input_tensor["input_ids"].shape)
             print(input_tensor["attention_mask"].shape)
             print(input_tensor["token_type_ids"].shape)
+            print('---')
 
         quit()
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))

From fe213de615ff666829abfd0e6f27f63c38b7ed37 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 7 Oct 2025 16:37:31 +0200
Subject: [PATCH 21/55] wip

---
 .../extractive_question_answering/bert_large/run_mlperf.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 5b226bcf..80313b80 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs)
     return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path)
 
 
-def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False):
+def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, fixed_input, disable_jit_freeze=False):
     from utils.benchmark import run_model
     from utils.nlp.squad import Squad_v1_1
     from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering

From 02d4de6950a851f88f554394f1add212f5cfc7cf Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 7 Oct 2025 16:38:43 +0200
Subject: [PATCH 22/55] wip

---
 .../extractive_question_answering/bert_large/run_mlperf.py    | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 80313b80..c02f9e3b 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -104,6 +104,9 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_
     import torch
     from utils.pytorch import PyTorchRunner
 
+    print(input_size)
+    print(fixed_input)
+
     def run_single_pass(pytorch_runner, squad):
         for _ in range(10):
             input_tensor = squad.get_input_arrays()
@@ -129,6 +132,7 @@ def run_single_pass(pytorch_runner, squad):
 
     def tokenize(question, text):
         if fixed_input:
+            print('h1')
             return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size,
                              return_tensors="pt")
         else:

From d720b06443c6eb3374300f24390966bc8c35e6d1 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 7 Oct 2025 16:41:32 +0200
Subject: [PATCH 23/55] wip

---
 .../extractive_question_answering/bert_large/run_mlperf.py  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index c02f9e3b..0e2b723f 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -97,7 +97,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs)
     return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path)
 
 
-def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, fixed_input, disable_jit_freeze=False):
+def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False):
     from utils.benchmark import run_model
     from utils.nlp.squad import Squad_v1_1
     from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering
@@ -218,8 +218,8 @@ def detokenize(answer):
     return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
 
 
-def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size, **kwargs):
-    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, disable_jit_freeze, fixed_input, input_size)
+def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input, **kwargs):
+    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input)
 
 
 def main():

From c682f18849e1f137afebb03c82f1ad562cb10eba Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 7 Oct 2025 16:47:26 +0200
Subject: [PATCH 24/55] wip

---
 .../bert_large/run_mlperf.py                    | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 0e2b723f..9449859f 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -97,16 +97,14 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs)
     return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path)
 
 
-def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze=False, fixed_input=False):
+def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path,
+                   input_size, disable_jit_freeze=False, fixed_input=False):
     from utils.benchmark import run_model
     from utils.nlp.squad import Squad_v1_1
     from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering
     import torch
     from utils.pytorch import PyTorchRunner
 
-    print(input_size)
-    print(fixed_input)
-
     def run_single_pass(pytorch_runner, squad):
         for _ in range(10):
             input_tensor = squad.get_input_arrays()
@@ -132,9 +130,8 @@ def run_single_pass(pytorch_runner, squad):
 
     def tokenize(question, text):
         if fixed_input:
-            print('h1')
-            return tokenizer(question, text, padding="max_length", truncation=True, max_length=input_size,
-                             return_tensors="pt")
+            return tokenizer(question, text, padding="max_length", truncation=True,
+                             max_length=input_size, return_tensors="pt")
         else:
             return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt")
 
@@ -218,8 +215,10 @@ def detokenize(answer):
     return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
 
 
-def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input, **kwargs):
-    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, input_size, disable_jit_freeze, fixed_input)
+def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path,
+                     input_size, disable_jit_freeze, fixed_input, **kwargs):
+    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path,
+                          input_size, disable_jit_freeze, fixed_input)
 
 
 def main():

From 2b56ab0df9011ff2ac24e6f8ec8eeac8c876b106 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 8 Oct 2025 11:23:27 +0200
Subject: [PATCH 25/55] wip

---
 tests/test_pytorch_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index 916925f2..02de8df6 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -223,7 +223,7 @@ def wrapper(**kwargs):
         exact_match_ref, f1_ref = 0.750, 0.817
         acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path,
                                     "batch_size": 1, "num_runs": 24, "timeout": None,
-                                    "disable_jit_freeze": False, "fixed_input": False})
+                                    "input_size": 384, "disable_jit_freeze": False, "fixed_input": False})
         self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95)
         self.assertTrue(acc["f1"] / f1_ref > 0.95)
 

From 23e4287433f3641986c2df697d942a89f04976e7 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 8 Oct 2025 12:48:50 +0200
Subject: [PATCH 26/55] wip

---
 .../bert_large/run_mlperf.py                             | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 9449859f..16413c74 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -106,14 +106,7 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path,
     from utils.pytorch import PyTorchRunner
 
     def run_single_pass(pytorch_runner, squad):
-        for _ in range(10):
-            input_tensor = squad.get_input_arrays()
-            print(input_tensor["input_ids"].shape)
-            print(input_tensor["attention_mask"].shape)
-            print(input_tensor["token_type_ids"].shape)
-            print('---')
-
-        quit()
+        input_tensor = squad.get_input_arrays()
         output = pytorch_runner.run(batch_size * input_tensor["input_ids"].size()[1], **dict(input_tensor))
 
         for i in range(batch_size):

From 871de3a6f91866a3f97d784f0c133eaa14e16138 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 16 Oct 2025 14:57:26 +0200
Subject: [PATCH 27/55] wip

---
 .../bert_large/run_mlperf.py                  | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 16413c74..84ed1311 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -43,9 +43,7 @@ def parse_args():
     parser.add_argument("--squad_path",
                         type=str,
                         help="path to directory with ImageNet validation images")
-    parser.add_argument("--fixed_input", action='store_true',
-                        help="truncate input to fixed shape")
-    parser.add_argument("--input_size", type=int, default=384,
+    parser.add_argument("--fixed_input_size", type=int,
                         help='size of the input')
     parser.add_argument("--disable_jit_freeze", action='store_true',
                         help="if true model will be run not in jit freeze mode")
@@ -97,8 +95,7 @@ def run_tf_fp16(model_path, batch_size, num_runs, timeout, squad_path, **kwargs)
     return run_tf_fp(model_path, batch_size, num_runs, timeout, squad_path)
 
 
-def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path,
-                   input_size, disable_jit_freeze=False, fixed_input=False):
+def run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze=False):
     from utils.benchmark import run_model
     from utils.nlp.squad import Squad_v1_1
     from transformers import AutoTokenizer, BertConfig, BertForQuestionAnswering
@@ -120,13 +117,17 @@ def run_single_pass(pytorch_runner, squad):
     tokenizer = AutoTokenizer.from_pretrained(
         "bert-large-uncased-whole-word-masking-finetuned-squad",
         padding=True, truncation=True, model_max_length=512)
-
+    print(fixed_input_size)
+    
     def tokenize(question, text):
-        if fixed_input:
+        if fixed_input_size is not None:
+            print('h1')
             return tokenizer(question, text, padding="max_length", truncation=True,
-                             max_length=input_size, return_tensors="pt")
+                             max_length=fixed_input_size, return_tensors="pt")
         else:
+            print('h2')
             return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt")
+    quit()
 
     def detokenize(answer):
         return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer))
@@ -208,10 +209,9 @@ def detokenize(answer):
     return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
 
 
-def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path,
-                     input_size, disable_jit_freeze, fixed_input, **kwargs):
-    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path,
-                          input_size, disable_jit_freeze, fixed_input)
+def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze,
+                     **kwargs):
+    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, squad_path, fixed_input_size, disable_jit_freeze)
 
 
 def main():

From 5ff448670e487397c604f41484e8b34754936b49 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 13:44:10 +0200
Subject: [PATCH 28/55] wip

---
 .../extractive_question_answering/bert_large/run_mlperf.py  | 6 +-----
 tests/test_pytorch_models.py                                | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
index 84ed1311..4f555ab4 100644
--- a/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
+++ b/natural_language_processing/extractive_question_answering/bert_large/run_mlperf.py
@@ -117,17 +117,13 @@ def run_single_pass(pytorch_runner, squad):
     tokenizer = AutoTokenizer.from_pretrained(
         "bert-large-uncased-whole-word-masking-finetuned-squad",
         padding=True, truncation=True, model_max_length=512)
-    print(fixed_input_size)
-    
+
     def tokenize(question, text):
         if fixed_input_size is not None:
-            print('h1')
             return tokenizer(question, text, padding="max_length", truncation=True,
                              max_length=fixed_input_size, return_tensors="pt")
         else:
-            print('h2')
             return tokenizer(question, text, padding=True, truncation=True, return_tensors="pt")
-    quit()
 
     def detokenize(answer):
         return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(answer))
diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index 02de8df6..b7a2ecaa 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -223,7 +223,7 @@ def wrapper(**kwargs):
         exact_match_ref, f1_ref = 0.750, 0.817
         acc = run_process(wrapper, {"model_path": self.model_path, "squad_path": self.dataset_path,
                                     "batch_size": 1, "num_runs": 24, "timeout": None,
-                                    "input_size": 384, "disable_jit_freeze": False, "fixed_input": False})
+                                    "fixed_input_size": None, "disable_jit_freeze": False})
         self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95)
         self.assertTrue(acc["f1"] / f1_ref > 0.95)
 

From ee2acbd09fc8e50119d1ecbacf5eb3e86fe05e3b Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 15:02:38 +0200
Subject: [PATCH 29/55] wip

---
 setup_deb.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 2e6b4a63..34323add 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -53,6 +53,8 @@ fi
 if ! pip3 --version; then
     apt-get install -y python3-pip
 fi
+pip install --upgrade pip
+
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
 PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev")
 if [[ -n "$PYTHON_DEV_SEARCH" ]]; then
@@ -81,7 +83,7 @@ pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
 
 apt install -y autoconf autogen automake build-essential libasound2-dev \
     libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \
-    libmpg123-dev pkg-config
+    libmpg123-dev pkg-config ffmpeg
 apt remove -y libsndfile1
 git clone -b 1.2.2 https://github.com/libsndfile/libsndfile.git && cd libsndfile/ && autoreconf -vif && ./configure --enable-werror && make -j && make install && ldconfig && cd .. && rm -rf libsndfile
 

From 0b9196f72c99a466d62c0863f265e6193736aedc Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 15:13:15 +0200
Subject: [PATCH 30/55] wip

---
 setup_deb.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 34323add..3820f857 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -11,6 +11,7 @@ log() {
 }
 
 ARCH=$(uname -m)
+PIP_BREAK_SYSTEM_PACKAGES=1
 
 if [ -z ${SCRIPT_DIR+x} ]; then
     SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
@@ -53,7 +54,6 @@ fi
 if ! pip3 --version; then
     apt-get install -y python3-pip
 fi
-pip install --upgrade pip
 
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
 PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev")
@@ -78,9 +78,12 @@ sleep 1
 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py
 
 # get almost all python deps
+pip3 install --upgrade pip
 pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
     pip3 install -r "$(dirname "$0")/requirements.txt"
 
+
+
 apt install -y autoconf autogen automake build-essential libasound2-dev \
     libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \
     libmpg123-dev pkg-config ffmpeg

From 5e97ac496c7e5fa285e79de312b16e5638bd2c62 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 16:25:28 +0200
Subject: [PATCH 31/55] wip

---
 setup_deb.sh | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 3820f857..8348d2da 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -11,7 +11,6 @@ log() {
 }
 
 ARCH=$(uname -m)
-PIP_BREAK_SYSTEM_PACKAGES=1
 
 if [ -z ${SCRIPT_DIR+x} ]; then
     SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
@@ -78,9 +77,9 @@ sleep 1
 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py
 
 # get almost all python deps
-pip3 install --upgrade pip
-pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
-    pip3 install -r "$(dirname "$0")/requirements.txt"
+python3 -m pip install --ignore-installed --break-system-packages --upgrade pip
+python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
+    python3 -m pip3 install -r "$(dirname "$0")/requirements.txt"
 
 
 

From 3ae639f1f41ec23fab0ca84c9ddb5d001a3ae422 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 17:06:05 +0200
Subject: [PATCH 32/55] wip

---
 setup_deb.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 8348d2da..2c50c32c 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -77,10 +77,16 @@ sleep 1
 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py
 
 # get almost all python deps
-python3 -m pip install --ignore-installed --break-system-packages --upgrade pip
+echo here1
+
+python3 -m pip install --ignore-installed --upgrade pip
+
+echo here2
 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
     python3 -m pip3 install -r "$(dirname "$0")/requirements.txt"
 
+echo here3
+
 
 
 apt install -y autoconf autogen automake build-essential libasound2-dev \

From 27af2ee5a103889bdd2746992c5f45e9f210ed06 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 17:11:46 +0200
Subject: [PATCH 33/55] wip

---
 setup_deb.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 2c50c32c..3358f800 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -79,7 +79,7 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py
 # get almost all python deps
 echo here1
 
-python3 -m pip install --ignore-installed --upgrade pip
+python3 -m pip install --break-system-packages --ignore-installed --upgrade pip
 
 echo here2
 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||

From 84eaf81710397df030fd2f5f33165c401b09900e Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 17:16:26 +0200
Subject: [PATCH 34/55] wip

---
 setup_deb.sh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 3358f800..4e6a60ba 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -79,13 +79,16 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py
 # get almost all python deps
 echo here1
 
+python3 -m pip --version
+python3 -m pip install --upgrade --ignore-installed pip
+echo here2
 python3 -m pip install --break-system-packages --ignore-installed --upgrade pip
 
-echo here2
+echo here3
 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
     python3 -m pip3 install -r "$(dirname "$0")/requirements.txt"
 
-echo here3
+echo here4
 
 
 

From 7181c4350a83886f555e58d74bd8a57827aca33a Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 17:24:57 +0200
Subject: [PATCH 35/55] wip

---
 setup_deb.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 4e6a60ba..51e943af 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -79,8 +79,11 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py
 # get almost all python deps
 echo here1
 
+export PIP_CACHE_DIR=${PIP_CACHE_DIR:-/tmp/pip-cache}
+mkdir -p "$PIP_CACHE_DIR" || true
+
 python3 -m pip --version
-python3 -m pip install --upgrade --ignore-installed pip
+#python3 -m pip install --upgrade --ignore-installed pip
 echo here2
 python3 -m pip install --break-system-packages --ignore-installed --upgrade pip
 

From 47e0779719372dfab8f935023fcc9bace83bdfd6 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 17:58:26 +0200
Subject: [PATCH 36/55] wip

---
 setup_deb.sh | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 51e943af..f71a7392 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -48,10 +48,12 @@ sleep 1
 apt-get update -y
 apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
 if ! python3 -c ""; then
+    apt-get update -y
     apt-get install -y python3 python3-pip
 fi
 if ! pip3 --version; then
-    apt-get install -y python3-pip
+    apt-get install -y python3-pip || true
+    python3 -m ensurepip --upgrade || true
 fi
 
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
@@ -79,9 +81,6 @@ ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py
 # get almost all python deps
 echo here1
 
-export PIP_CACHE_DIR=${PIP_CACHE_DIR:-/tmp/pip-cache}
-mkdir -p "$PIP_CACHE_DIR" || true
-
 python3 -m pip --version
 #python3 -m pip install --upgrade --ignore-installed pip
 echo here2

From 5ca52494702d584378bfe943f1dfe0320d73a9ef Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 17 Oct 2025 18:01:57 +0200
Subject: [PATCH 37/55] wip

---
 setup_deb.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index f71a7392..42bd2d0f 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -49,7 +49,8 @@ apt-get update -y
 apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
 if ! python3 -c ""; then
     apt-get update -y
-    apt-get install -y python3 python3-pip
+    apt-get install -y python3 python3-pip || true
+    python3 -m ensurepip --upgrade || true
 fi
 if ! pip3 --version; then
     apt-get install -y python3-pip || true

From c2e309db9145cb1b0d46fc1328e9625952593be0 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Mon, 20 Oct 2025 12:39:51 +0200
Subject: [PATCH 38/55] wip

---
 setup_deb.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 42bd2d0f..a01aa490 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -49,14 +49,15 @@ apt-get update -y
 apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
 if ! python3 -c ""; then
     apt-get update -y
-    apt-get install -y python3 python3-pip || true
-    python3 -m ensurepip --upgrade || true
+    apt-get install -y python3 python3-pip
 fi
 if ! pip3 --version; then
-    apt-get install -y python3-pip || true
-    python3 -m ensurepip --upgrade || true
+    apt-get install -y python3-pip
 fi
 
+python3 -m venv /opt/venv
+. /opt/venv/bin/activate
+
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
 PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev")
 if [[ -n "$PYTHON_DEV_SEARCH" ]]; then

From fba70f675775881672248dbf2507e152ee0e0e1e Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Mon, 20 Oct 2025 12:43:42 +0200
Subject: [PATCH 39/55] wip

---
 setup_deb.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index a01aa490..63519c34 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -49,7 +49,7 @@ apt-get update -y
 apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
 if ! python3 -c ""; then
     apt-get update -y
-    apt-get install -y python3 python3-pip
+    apt-get install -y python3 python3-pip python3-venv
 fi
 if ! pip3 --version; then
     apt-get install -y python3-pip

From 242ca724cd003a5727f8652efce1c3e675d21138 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Mon, 20 Oct 2025 12:49:33 +0200
Subject: [PATCH 40/55] wip

---
 setup_deb.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 63519c34..ed1dd039 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -49,12 +49,13 @@ apt-get update -y
 apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
 if ! python3 -c ""; then
     apt-get update -y
-    apt-get install -y python3 python3-pip python3-venv
+    apt-get install -y python3 python3-pip
 fi
 if ! pip3 --version; then
     apt-get install -y python3-pip
 fi
 
+apt-get install -y python3-venv
 python3 -m venv /opt/venv
 . /opt/venv/bin/activate
 

From e5f63abf825967879757647b1aaf8023f562dfe6 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Mon, 20 Oct 2025 12:52:39 +0200
Subject: [PATCH 41/55] wip

---
 setup_deb.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index ed1dd039..304b7866 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -87,7 +87,7 @@ echo here1
 python3 -m pip --version
 #python3 -m pip install --upgrade --ignore-installed pip
 echo here2
-python3 -m pip install --break-system-packages --ignore-installed --upgrade pip
+python3 -m pip install --ignore-installed --upgrade pip
 
 echo here3
 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||

From 8628d9e72bb6a701fc08c79bd10a95968d3abf96 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Mon, 20 Oct 2025 14:38:20 +0200
Subject: [PATCH 42/55] wip

---
 setup_deb.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 304b7866..93325da7 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -55,9 +55,10 @@ if ! pip3 --version; then
     apt-get install -y python3-pip
 fi
 
-apt-get install -y python3-venv
-python3 -m venv /opt/venv
-. /opt/venv/bin/activate
+#apt-get install -y python3-venv
+#python3 -m venv /opt/venv
+#. /opt/venv/bin/activate
+#python3 -m pip config set global.break-system-packages true
 
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
 PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev")

From 572eb528a3b69a53e851b7372464a67944efddb9 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Mon, 20 Oct 2025 14:43:23 +0200
Subject: [PATCH 43/55] wip

---
 setup_deb.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 93325da7..58313a98 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -88,7 +88,7 @@ echo here1
 python3 -m pip --version
 #python3 -m pip install --upgrade --ignore-installed pip
 echo here2
-python3 -m pip install --ignore-installed --upgrade pip
+PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip install --ignore-installed --upgrade pip
 
 echo here3
 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||

From 58b26c7475be9a36b7e71a3e4767eead0526bb3d Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 22 Oct 2025 13:01:13 +0200
Subject: [PATCH 44/55] wip

---
 setup_deb.sh | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 58313a98..81404dd5 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -55,11 +55,6 @@ if ! pip3 --version; then
     apt-get install -y python3-pip
 fi
 
-#apt-get install -y python3-venv
-#python3 -m venv /opt/venv
-#. /opt/venv/bin/activate
-#python3 -m pip config set global.break-system-packages true
-
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
 PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev")
 if [[ -n "$PYTHON_DEV_SEARCH" ]]; then
@@ -96,8 +91,6 @@ python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.
 
 echo here4
 
-
-
 apt install -y autoconf autogen automake build-essential libasound2-dev \
     libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \
     libmpg123-dev pkg-config ffmpeg
@@ -123,3 +116,6 @@ else
     touch "$SCRIPT_DIR"/.setup_completed
 fi
 log "Setup completed. Please run: source $SCRIPT_DIR/set_env_variables.sh"
+
+echo HERe555
+exit 1
\ No newline at end of file

From 6c47e2f35fdcf7dd7e0d24f43c7a339426addbce Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 22 Oct 2025 13:09:13 +0200
Subject: [PATCH 45/55] wip

---
 setup_deb.sh | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index 81404dd5..a6617573 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -89,7 +89,9 @@ echo here3
 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
     python3 -m pip3 install -r "$(dirname "$0")/requirements.txt"
 
-echo here4
+python3 -m pip --version
+echo HERe555
+exit 1
 
 apt install -y autoconf autogen automake build-essential libasound2-dev \
     libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \
@@ -116,6 +118,3 @@ else
     touch "$SCRIPT_DIR"/.setup_completed
 fi
 log "Setup completed. Please run: source $SCRIPT_DIR/set_env_variables.sh"
-
-echo HERe555
-exit 1
\ No newline at end of file

From 006ebb12cd8669d53d9e8bba7bdd3f118c599682 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 22 Oct 2025 13:18:05 +0200
Subject: [PATCH 46/55] wip

---
 .github/workflows/test.yml |  2 --
 setup_deb.sh               | 11 -----------
 2 files changed, 13 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8c54a461..9ba08463 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -84,8 +84,6 @@ jobs:
           wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1
           tar -xf aio_objdet_dataset.tar.gz > /dev/null
           
-          apt-get update && apt-get install -y ffmpeg
-          
           wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1
           IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60
           
diff --git a/setup_deb.sh b/setup_deb.sh
index a6617573..c1d10385 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -78,21 +78,10 @@ sleep 1
 ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py
 
 # get almost all python deps
-echo here1
-
-python3 -m pip --version
-#python3 -m pip install --upgrade --ignore-installed pip
-echo here2
 PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip install --ignore-installed --upgrade pip
-
-echo here3
 python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
     python3 -m pip3 install -r "$(dirname "$0")/requirements.txt"
 
-python3 -m pip --version
-echo HERe555
-exit 1
-
 apt install -y autoconf autogen automake build-essential libasound2-dev \
     libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \
     libmpg123-dev pkg-config ffmpeg

From 13e9fd7b28ce69a6c7b627fef0431df9798269ab Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 22 Oct 2025 14:40:01 +0200
Subject: [PATCH 47/55] wip

---
 .github/workflows/test.yml | 7 +++++++
 setup_deb.sh               | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9ba08463..55a3c3da 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -84,6 +84,13 @@ jobs:
           wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1
           tar -xf aio_objdet_dataset.tar.gz > /dev/null
           
+          which -a ffmpeg
+          ffmpeg -version
+          
+          ffmpeg -version | head -n1
+          
+          exit 1
+          
           wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1
           IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60
           
diff --git a/setup_deb.sh b/setup_deb.sh
index c1d10385..a8285c37 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -55,6 +55,8 @@ if ! pip3 --version; then
     apt-get install -y python3-pip
 fi
 
+apt-get update && apt-get install -y --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/*
+
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
 PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev")
 if [[ -n "$PYTHON_DEV_SEARCH" ]]; then

From 706fe6e837e0601cfa4e3d4c9ed096aef4ff33f6 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 22 Oct 2025 15:02:21 +0200
Subject: [PATCH 48/55] wip

---
 setup_deb.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index a8285c37..c1d10385 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -55,8 +55,6 @@ if ! pip3 --version; then
     apt-get install -y python3-pip
 fi
 
-apt-get update && apt-get install -y --no-install-recommends ffmpeg && rm -rf /var/lib/apt/lists/*
-
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
 PYTHON_DEV_SEARCH=$(apt-cache search --names-only "python${PYTHON_VERSION}-dev")
 if [[ -n "$PYTHON_DEV_SEARCH" ]]; then

From ad8cf9fe85068a0e999216ebad3aedcdc2b399ae Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 22 Oct 2025 15:57:40 +0200
Subject: [PATCH 49/55] wip

---
 .github/workflows/test.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 55a3c3da..ecba2ae5 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -81,15 +81,15 @@ jobs:
       
       - name: End-user smoke test
         run: |            
-          wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1
-          tar -xf aio_objdet_dataset.tar.gz > /dev/null
-          
           which -a ffmpeg
           ffmpeg -version
           
           ffmpeg -version | head -n1
           
-          exit 1
+          echo here1
+          
+          wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1
+          tar -xf aio_objdet_dataset.tar.gz > /dev/null
           
           wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1
           IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60

From 8983fae596acfdde740ebabe4bc1a8eea5ce9bdb Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Wed, 22 Oct 2025 17:58:51 +0200
Subject: [PATCH 50/55] wip

---
 .github/workflows/test.yml | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ecba2ae5..bea349f5 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -80,14 +80,8 @@ jobs:
           python3 -m unittest tests.test_pytorch_models
       
       - name: End-user smoke test
-        run: |            
-          which -a ffmpeg
+        run: |                      
           ffmpeg -version
-          
-          ffmpeg -version | head -n1
-          
-          echo here1
-          
           wget https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/aio_objdet_dataset.tar.gz > /dev/null 2>&1
           tar -xf aio_objdet_dataset.tar.gz > /dev/null
           

From b22bd269f6bd13113dfacf6d2131a8b60c786691 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 23 Oct 2025 11:30:56 +0200
Subject: [PATCH 51/55] wip

---
 setup_deb.sh | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/setup_deb.sh b/setup_deb.sh
index c1d10385..abb4c8fd 100644
--- a/setup_deb.sh
+++ b/setup_deb.sh
@@ -4,6 +4,9 @@
 
 set -eo pipefail
 
+ln -fs /usr/share/zoneinfo/Europe/Warsaw /etc/localtime
+echo "Europe/Warsaw" | tee /etc/timezone >/dev/null
+
 log() {
     COLOR_DEFAULT='\033[0m'
     COLOR_CYAN='\033[1;36m'
@@ -46,7 +49,7 @@ fi
 log "Installing system dependencies ..."
 sleep 1
 apt-get update -y
-apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
+apt-get install -y build-essential libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
 if ! python3 -c ""; then
     apt-get update -y
     apt-get install -y python3 python3-pip
@@ -84,7 +87,7 @@ python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.
 
 apt install -y autoconf autogen automake build-essential libasound2-dev \
     libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \
-    libmpg123-dev pkg-config ffmpeg
+    libmpg123-dev pkg-config
 apt remove -y libsndfile1
 git clone -b 1.2.2 https://github.com/libsndfile/libsndfile.git && cd libsndfile/ && autoreconf -vif && ./configure --enable-werror && make -j && make install && ldconfig && cd .. && rm -rf libsndfile
 
@@ -101,6 +104,9 @@ if [ "$(python3 -c 'import torch; print(torch.cuda.is_available())')" == "True"
 fi
 log "done.\n"
 
+apt-get update -y
+apt-get install -y ffmpeg
+
 if [ -f "/etc/machine-id" ]; then
     cat /etc/machine-id >"$SCRIPT_DIR"/.setup_completed
 else

From 51f3b94b2eff0dc464eff1c6352fa49f1aa4c7d8 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Thu, 23 Oct 2025 13:32:44 +0200
Subject: [PATCH 52/55] wip

---
 .github/workflows/test.yml | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index bea349f5..ef0be073 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -116,6 +116,7 @@ jobs:
       COCO_IMG_PATH: aio_objdet_dataset
       COCO_ANNO_PATH: aio_objdet_dataset/annotations.json
       OMP_NUM_THREADS: 32
+      AIO_NUM_THREADS: 32
       S3_URL_CRITEO_DATASET: ${{ secrets.S3_URL_CRITEO_DATASET }}
       S3_URL_RESNET_50_V15_TF_FP32: ${{ secrets.S3_URL_RESNET_50_V15_TF_FP32 }}
       S3_URL_SSD_INCEPTION_V2_TF_FP32: ${{ secrets.S3_URL_SSD_INCEPTION_V2_TF_FP32 }}
@@ -150,24 +151,24 @@ jobs:
           tar -xf aio_objdet_dataset.tar.gz > /dev/null
           
           wget $S3_URL_RESNET_50_V15_TF_FP32 > /dev/null 2>&1
-          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60
+          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v15/run.py -m resnet_50_v15_tf_fp32.pb -p fp32 -f tf --timeout=60
           
-          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60
+          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60
           
           wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt > /dev/null 2>&1
-          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60
+          IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/yolo_v8/run.py -m yolov8n.pt -f pytorch -p fp32 --timeout=60
           
           python3 speech_recognition/whisper/run.py -m small.en
           
           wget $S3_URL_SSD_INCEPTION_V2_TF_FP32 > /dev/null 2>&1
-          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60
+          IGNORE_DATASET_LIMITS=1 python3 computer_vision/object_detection/ssd_inception_v2/run.py -m ssd_inception_v2_tf_fp32.pb -p fp32 --timeout=60
           
           wget https://zenodo.org/records/4735647/files/resnet50_v1.onnx > /dev/null 2>&1
-          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort
+          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/resnet_50_v1/run.py -m resnet50_v1.onnx -p fp32 -f ort
           
           wget https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.tar.gz > /dev/null 2>&1
           tar -xf vgg16.tar.gz > /dev/null
-          OMP_NUM_THREADS=32 AIO_NUM_THREADS=32 IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort
+          IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/vgg_16/run.py -m vgg16/vgg16.onnx -p fp32 -f ort
 
   test_pytorch_arm64_sh:
     if: false

From 3f70599cba3dfad97a5c7ca517127b5a04cf169b Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Fri, 24 Oct 2025 15:50:45 +0200
Subject: [PATCH 53/55] wip

---
 tests/test_pytorch_models.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index b7a2ecaa..b38dba04 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -366,19 +366,19 @@ def setUp(self):
     #                                 "timeout": None, "disable_jit_freeze": False})
     #     self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95)
 
-    def test_yolo_v8_s(self):
-        from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32
-        from utils.benchmark import set_global_intra_op_parallelism_threads
-        set_global_intra_op_parallelism_threads(32)
-
-        def wrapper(**kwargs):
-            kwargs["q"].put(run_pytorch_fp32(**kwargs)[0])
-
-        coco_map_ref = 0.353
-        acc = run_process(wrapper, {"model_path": self.yolo_v8_s_path, "images_path": self.dataset_path,
-                                    "anno_path": self.annotations_path, "batch_size": 1, "num_runs": 465,
-                                    "timeout": None, "disable_jit_freeze": False})
-        self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95)
+    # def test_yolo_v8_s(self):
+    #     from computer_vision.object_detection.yolo_v8.run import run_pytorch_fp32
+    #     from utils.benchmark import set_global_intra_op_parallelism_threads
+    #     set_global_intra_op_parallelism_threads(32)
+    #
+    #     def wrapper(**kwargs):
+    #         kwargs["q"].put(run_pytorch_fp32(**kwargs)[0])
+    #
+    #     coco_map_ref = 0.353
+    #     acc = run_process(wrapper, {"model_path": self.yolo_v8_s_path, "images_path": self.dataset_path,
+    #                                 "anno_path": self.annotations_path, "batch_size": 1, "num_runs": 465,
+    #                                 "timeout": None, "disable_jit_freeze": False})
+    #     self.assertTrue(acc["coco_map"] / coco_map_ref > 0.95)
 
 
 if __name__ == "__main__":

From d918720f97389ded793719d07747d6317d3d0d30 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 28 Oct 2025 13:28:57 +0100
Subject: [PATCH 54/55] wip

---
 .../object_detection/yolo_v11/README.md       | 118 +++++++++++++++++
 .../object_detection/yolo_v11/run.py          | 124 ++++++++++++++++++
 2 files changed, 242 insertions(+)
 create mode 100644 computer_vision/object_detection/yolo_v11/README.md
 create mode 100644 computer_vision/object_detection/yolo_v11/run.py

diff --git a/computer_vision/object_detection/yolo_v11/README.md b/computer_vision/object_detection/yolo_v11/README.md
new file mode 100644
index 00000000..5f72cf7d
--- /dev/null
+++ b/computer_vision/object_detection/yolo_v11/README.md
@@ -0,0 +1,118 @@
+# YOLO v8
+
+This folder contains the script to run YOLO v8 on COCO object detection task.
+
+Variants supplied below for PyTorch and ONNX Runtime in fp32 precision accept input of shape 640x640.
+
+The original documentation of the model is available here: https://docs.ultralytics.com/#ultralytics-yolov8
+
+
+### Metrics
+
+Based on 1000 images from COCO Dataset for YOLOv8n model in PyTorch framework in fp32 precision
+
+| Metric                  | IoU       | Area   | maxDets |Score  |
+|:---:                    |:---:      |:---:   |:---:    |:---:  |
+| Average Precision  (AP) |0.50:0.95 |    all | 100 | 0.338 |
+| Average Precision  (AP) |0.50      |    all | 100 | 0.452 |
+| Average Precision  (AP) |0.75      |    all | 100 | 0.370 |
+| Average Precision  (AP) |0.50:0.95 |  small | 100 | 0.122 |
+| Average Precision  (AP) |0.50:0.95 | medium | 100 | 0.351 |
+| Average Precision  (AP) |0.50:0.95 |  large | 100 | 0.504 |
+| Average Recall     (AR) |0.50:0.95 |    all |   1 | 0.265 |
+| Average Recall     (AR) |0.50:0.95 |    all |  10 | 0.375 |
+| Average Recall     (AR) |0.50:0.95 |    all | 100 | 0.381 |
+| Average Recall     (AR) |0.50:0.95 |  small | 100 | 0.133 |
+| Average Recall     (AR) |0.50:0.95 | medium | 100 | 0.385 |
+| Average Recall     (AR) |0.50:0.95 |  large | 100 | 0.569 |
+
+Based on 1000 images from COCO Dataset for YOLOv8n model in ONNX Runtime framework in fp32 precision
+
+| Metric                  | IoU       | Area   | maxDets |Score  |
+|:---:                    |:---:      |:---:   |:---:    |:---:  |
+| Average Precision  (AP) |0.50:0.95 |    all | 100 | 0.338|
+| Average Precision  (AP) |0.50      |    all | 100 | 0.452|
+| Average Precision  (AP) |0.75      |    all | 100 | 0.370|
+| Average Precision  (AP) |0.50:0.95 |  small | 100 | 0.122|
+| Average Precision  (AP) |0.50:0.95 | medium | 100 | 0.351|
+| Average Precision  (AP) |0.50:0.95 |  large | 100 | 0.504|
+| Average Recall     (AR) |0.50:0.95 |    all |   1 | 0.265|
+| Average Recall     (AR) |0.50:0.95 |    all |  10 | 0.375|
+| Average Recall     (AR) |0.50:0.95 |    all | 100 | 0.381|
+| Average Recall     (AR) |0.50:0.95 |  small | 100 | 0.133|
+| Average Recall     (AR) |0.50:0.95 | medium | 100 | 0.385|
+| Average Recall     (AR) |0.50:0.95 |  large | 100 | 0.569|
+
+Based on 1000 images from COCO Dataset for YOLOv8x model in ONNX Runtime framework in fp32 precision
+
+| Metric                  | IoU       | Area   | maxDets |Score  |
+|:---:                    |:---:      |:---:   |:---:    |:---:  |
+| Average Precision  (AP) |0.50:0.95 |    all | 100 | 0.575|
+| Average Precision  (AP) |0.50      |    all | 100 | 0.714|
+| Average Precision  (AP) |0.75      |    all | 100 | 0.639|
+| Average Precision  (AP) |0.50:0.95 |  small | 100 | 0.336|
+| Average Precision  (AP) |0.50:0.95 | medium | 100 | 0.633|
+| Average Precision  (AP) |0.50:0.95 |  large | 100 | 0.812|
+| Average Recall     (AR) |0.50:0.95 |    all |   1 | 0.409|
+| Average Recall     (AR) |0.50:0.95 |    all |  10 | 0.611|
+| Average Recall     (AR) |0.50:0.95 |    all | 100 | 0.620|
+| Average Recall     (AR) |0.50:0.95 |  small | 100 | 0.361|
+| Average Recall     (AR) |0.50:0.95 | medium | 100 | 0.676|
+| Average Recall     (AR) |0.50:0.95 |  large | 100 | 0.849|
+
+
+### Dataset and model
+
+Dataset can be downloaded from here: https://cocodataset.org/#download
+
+PyTorch models in fp32 precision can be downloaded here:
+```
+wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt
+wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt
+wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt
+wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt
+wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt
+```
+
+You can export a PyTorch model to ONNX Runtime model using the following Python code:
+
+```python
+from ultralytics import YOLO
+model = YOLO('/path/to/yolov8n.pt')
+model.export(format='onnx')
+```
+
+### Running instructions
+
+Before running any code you should first export the PYTHONPATH variable with path pointing to the model zoo directory,
+as well as AIO_NUM_THREADS specifying the number of threads to be used.
+
+```
+export PYTHONPATH=/path/to/model_zoo
+export AIO_NUM_THREADS=1
+```
+
+For the best experience we also recommend setting environment variables as specified below.
+
+```
+export COCO_IMG_PATH=/path/to/images
+export COCO_ANNO_PATH=/path/to/annotations
+```
+
+Now you are able to run the run.py script. 
+
+To get detailed information on the script's recognized arguments run it with -h flag for help.
+
+The path to model (with a flag "-m") as well as its precision (with a flag "-p") have to be specified.
+
+Please note that the default batch size is 1 and if not specified otherwise the script will run for 1 minute.
+
+Example command: 
+
+```
+python3 run.py -m /path/to/model.onnx -p fp32 --framework ort
+```
+
+```
+python3 run.py -m /path/to/model.pt -p fp32 --framework pytorch
+```
\ No newline at end of file
diff --git a/computer_vision/object_detection/yolo_v11/run.py b/computer_vision/object_detection/yolo_v11/run.py
new file mode 100644
index 00000000..ec782733
--- /dev/null
+++ b/computer_vision/object_detection/yolo_v11/run.py
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2025, Ampere Computing LLC
+try:
+    from utils import misc  # noqa
+except ModuleNotFoundError:
+    import os
+    import sys
+    filename = "set_env_variables.sh"
+    directory = os.path.realpath(__file__).split("/")[:-1]
+    for idx in range(1, len(directory) - 1):
+        subdir = "/".join(directory[:-idx])
+        if filename in os.listdir(subdir):
+            print(f"\nPlease run \033[91m'source {os.path.join(subdir, filename)}'\033[0m first.")
+            break
+    else:
+        print(f"\n\033[91mFAIL: Couldn't find {filename}, are you running this script as part of Ampere Model Library?"
+              f"\033[0m")
+    sys.exit(1)
+
+
+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser(description="Run YOLOv11 model.")
+    parser.add_argument("-m", "--model_path",
+                        type=str, required=True,
+                        help="path to the model")
+    parser.add_argument("-p", "--precision",
+                        type=str, choices=["fp32"], default="fp32",
+                        help="precision of the model provided")
+    parser.add_argument("-b", "--batch_size",
+                        type=int, default=1,
+                        help="batch size to feed the model with")
+    parser.add_argument("-f", "--framework",
+                        type=str,
+                        choices=["pytorch"], required=True,
+                        help="specify the framework in which a model should be run")
+    parser.add_argument("--timeout",
+                        type=float, default=60.0,
+                        help="timeout in seconds")
+    parser.add_argument("--num_runs",
+                        type=int,
+                        help="number of passes through network to execute")
+    parser.add_argument("--images_path",
+                        type=str,
+                        help="path to directory with COCO validation images")
+    parser.add_argument("--anno_path",
+                        type=str,
+                        help="path to file with validation annotations")
+    parser.add_argument("--disable_jit_freeze", action='store_true',
+                        help="if true model will be run not in jit freeze mode")
+    return parser.parse_args()
+
+
+def run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_path, disable_jit_freeze=False):
+    import torch
+    import os
+    from utils.cv.coco import COCODataset
+    from utils.benchmark import run_model
+
+    os.environ["YOLO_VERBOSE"] = os.getenv("YOLO_VERBOSE", "False")
+    # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user
+    # to set it to True if needed
+    from utils.pytorch import PyTorchRunner
+    from ultralytics.yolo.utils import ops
+
+    def run_single_pass(pytorch_runner, coco):
+        output = pytorch_runner.run(batch_size, coco.get_input_array((640, 640)))
+        output = ops.non_max_suppression(output)
+
+        for i in range(batch_size):
+            for d in range(output[i].shape[0]):
+                coco.submit_bbox_prediction(
+                    i,
+                    coco.convert_bbox_to_coco_order(output[i][d][:4].tolist()),
+                    output[i][d][4].item(),
+                    coco.translate_cat_id_to_coco(output[i][d][5].item())
+                )
+
+    dataset = COCODataset(batch_size, "RGB", "COCO_val2014_000000000000", images_path,
+                          anno_path, pre_processing="PyTorch_objdet", sort_ascending=True, order="NCHW")
+
+    from ultralytics import YOLO
+    model = YOLO(model_path)
+    torchscript_model = model.export(format="torchscript")
+
+    runner = PyTorchRunner(torch.jit.load(torchscript_model),
+                           disable_jit_freeze=disable_jit_freeze,
+                           example_inputs=torch.stack(dataset.get_input_array((640, 640))))
+
+    return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
+
+
+def run_pytorch_fp32(model_path, batch_size, num_runs, timeout, images_path, anno_path, disable_jit_freeze, **kwargs):
+    return run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_path, disable_jit_freeze)
+
+
+def main():
+    from utils.misc import print_goodbye_message_and_die
+    args = parse_args()
+
+    if args.framework == "pytorch":
+        import torch
+        if torch.cuda.is_available():
+            run_pytorch_cuda(**vars(args))
+        elif args.precision == "fp32":
+            run_pytorch_fp32(**vars(args))
+        else:
+            print_goodbye_message_and_die(
+                "this model seems to be unsupported in a specified precision: " + args.precision)
+    elif args.framework == "ort":
+        if args.precision == "fp32":
+            if args.batch_size != 1:
+                raise ValueError("Batch size must be 1 for this model.")
+            run_ort_fp32(**vars(args))
+        else:
+            print_goodbye_message_and_die(
+                "this model seems to be unsupported in a specified precision: " + args.precision)
+    else:
+        print_goodbye_message_and_die(
+            "this model seems to be unsupported in a specified framework: " + args.framework)
+
+
+if __name__ == "__main__":
+    main()

From 2b55c04741edb42181a070e4a979a12d1fde6e62 Mon Sep 17 00:00:00 2001
From: Marcel Wilnicki <marcel.wilnicki@gmail.com>
Date: Tue, 28 Oct 2025 13:50:18 +0100
Subject: [PATCH 55/55] wip

---
 computer_vision/object_detection/yolo_v11/run.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/computer_vision/object_detection/yolo_v11/run.py b/computer_vision/object_detection/yolo_v11/run.py
index ec782733..af254d7d 100644
--- a/computer_vision/object_detection/yolo_v11/run.py
+++ b/computer_vision/object_detection/yolo_v11/run.py
@@ -61,11 +61,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_
     # Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user
     # to set it to True if needed
     from utils.pytorch import PyTorchRunner
-    from ultralytics.yolo.utils import ops
+    from ultralytics.utils import nms
 
     def run_single_pass(pytorch_runner, coco):
         output = pytorch_runner.run(batch_size, coco.get_input_array((640, 640)))
-        output = ops.non_max_suppression(output)
+        output = nms.non_max_suppression(output)
 
         for i in range(batch_size):
             for d in range(output[i].shape[0]):
@@ -85,7 +85,7 @@ def run_single_pass(pytorch_runner, coco):
 
     runner = PyTorchRunner(torch.jit.load(torchscript_model),
                            disable_jit_freeze=disable_jit_freeze,
-                           example_inputs=torch.stack(dataset.get_input_array((640, 640))))
+                           example_inputs=torch.stack((dataset.get_input_array((640, 640)),)))
 
     return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)
 
@@ -107,14 +107,6 @@ def main():
         else:
             print_goodbye_message_and_die(
                 "this model seems to be unsupported in a specified precision: " + args.precision)
-    elif args.framework == "ort":
-        if args.precision == "fp32":
-            if args.batch_size != 1:
-                raise ValueError("Batch size must be 1 for this model.")
-            run_ort_fp32(**vars(args))
-        else:
-            print_goodbye_message_and_die(
-                "this model seems to be unsupported in a specified precision: " + args.precision)
     else:
         print_goodbye_message_and_die(
             "this model seems to be unsupported in a specified framework: " + args.framework)