Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ jobs:
COCO_IMG_PATH: aio_objdet_dataset
COCO_ANNO_PATH: aio_objdet_dataset/annotations.json
OMP_NUM_THREADS: 32
AIO_NUM_THREADS: 32
S3_URL_CRITEO_DATASET: ${{ secrets.S3_URL_CRITEO_DATASET }}
S3_URL_RESNET_50_V15_TF_FP32: ${{ secrets.S3_URL_RESNET_50_V15_TF_FP32 }}
S3_URL_SSD_INCEPTION_V2_TF_FP32: ${{ secrets.S3_URL_SSD_INCEPTION_V2_TF_FP32 }}
Expand Down Expand Up @@ -220,6 +221,7 @@ jobs:
PYTHONPATH: ./
COCO_IMG_PATH: aio_objdet_dataset
COCO_ANNO_PATH: aio_objdet_dataset/annotations.json
OMP_NUM_THREADS: 32
AIO_NUM_THREADS: 32
AIO_DEBUG_MODE: 0
S3_URL_CRITEO_DATASET: ${{ secrets.S3_URL_CRITEO_DATASET }}
Expand Down Expand Up @@ -263,7 +265,7 @@ jobs:
IGNORE_DATASET_LIMITS=1 AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 computer_vision/classification/resnet_50_v15/run.py -m resnet50 -p fp32 -b 16 -f pytorch
AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 speech_recognition/whisper/run.py -m tiny.en
# AIO_IMPLICIT_FP16_TRANSFORM_FILTER=".*" python3 speech_recognition/whisper/run.py -m tiny.en
IGNORE_DATASET_LIMITS=1 python3 computer_vision/classification/mobilenet_v2/run.py -p fp32 -f pytorch --timeout=60
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright (c) 2024, Ampere Computing LLC
Copyright (c) 2025, Ampere Computing LLC
Copyright (c) 2022 Andrej Karpathy
Copyright (c) 2022 OpenAI
Copyright (c) 2022 Stability AI
Expand Down
10 changes: 6 additions & 4 deletions benchmark.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2024, Ampere Computing LLC
# Copyright (c) 2025, Ampere Computing LLC

import os
import sys
import json
Expand All @@ -15,8 +16,8 @@
"ResNet-50 v1.5": "https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/lookups_aml/q80_30%40ampere_pytorch_1.10.0%40resnet_50_v1.5.json", # noqa
"YOLO v8s": "https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/lookups_aml/q80_30%40ampere_pytorch_1.10.0%40yolo_v8_s.json", # noqa
"BERT large": "https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/lookups_aml/q80_30%40ampere_pytorch_1.10.0%40bert_large_mlperf_squad.json", # noqa
"DLRM": "https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/lookups_aml/q80_30%40ampere_pytorch_1.10.0%40dlrm_torchbench.json", # noqa
"Whisper medium EN": "https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/lookups_aml/q80_30%40ampere_pytorch_1.10.0%40whisper_medium.en.json" # noqa
"DLRM": "https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/lookups_aml/q80_30%40ampere_pytorch_1.10.0%40dlrm_torchbench.json" # noqa
# "Whisper medium EN": "https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/lookups_aml/q80_30%40ampere_pytorch_1.10.0%40whisper_medium.en.json" # noqa
},
"Altra Max": {
"ResNet-50 v1.5": "https://ampereaimodelzoo.s3.eu-central-1.amazonaws.com/lookups_aml/m128_30%40ampere_pytorch_1.10.0%40resnet_50_v1.5.json", # noqa
Expand Down Expand Up @@ -676,7 +677,8 @@ def convert_name(text):


def main():
models = [ResNet50, YOLO, BERT, DLRM, Whisper]
# models = [ResNet50, YOLO, BERT, DLRM, Whisper]
models = [ResNet50, YOLO, BERT, DLRM]
parser = argparse.ArgumentParser(prog="AML benchmarking tool")
parser.add_argument("--no-interactive", action="store_true", help="don't ask for user input")
parser.add_argument("--model", type=str, choices=[convert_name(model.model_name) for model in models],
Expand Down
2 changes: 1 addition & 1 deletion computer_vision/object_detection/yolo_v5/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2024, Ampere Computing LLC
# Copyright (c) 2025, Ampere Computing LLC
try:
from utils import misc # noqa
except ModuleNotFoundError:
Expand Down
12 changes: 6 additions & 6 deletions computer_vision/object_detection/yolo_v8/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2024, Ampere Computing LLC
# Copyright (c) 2025, Ampere Computing LLC
try:
from utils import misc # noqa
except ModuleNotFoundError:
Expand Down Expand Up @@ -61,15 +61,15 @@ def run_ort_fp32(model_path, batch_size, num_runs, timeout, images_path, anno_pa
# Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user
# to set it to True if needed
from utils.ort import OrtRunner
from ultralytics.yolo.utils import ops
from ultralytics.utils import nms

def run_single_pass(ort_runner, coco):
shape = (640, 640)
ort_runner.set_input_tensor("images", coco.get_input_array(shape).astype("float32"))
output = ort_runner.run(batch_size)

output = torch.from_numpy(output[0])
output = ops.non_max_suppression(output)
output = nms.non_max_suppression(output)

for i in range(batch_size):
for d in range(output[i].shape[0]):
Expand Down Expand Up @@ -97,11 +97,11 @@ def run_pytorch_fp(model_path, batch_size, num_runs, timeout, images_path, anno_
# Ultralytics sets it to True by default. This way we suppress the logging by default while still allowing the user
# to set it to True if needed
from utils.pytorch import PyTorchRunner
from ultralytics.yolo.utils import ops
from ultralytics.utils import nms

def run_single_pass(pytorch_runner, coco):
output = pytorch_runner.run(batch_size, coco.get_input_array((640, 640)))
output = ops.non_max_suppression(output)
output = nms.non_max_suppression(output)

for i in range(batch_size):
for d in range(output[i].shape[0]):
Expand All @@ -121,7 +121,7 @@ def run_single_pass(pytorch_runner, coco):

runner = PyTorchRunner(torch.jit.load(torchscript_model),
disable_jit_freeze=disable_jit_freeze,
example_inputs=torch.stack(dataset.get_input_array((640, 640))))
example_inputs=torch.stack((dataset.get_input_array((640, 640)),)))

return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2024, Ampere Computing LLC
# Copyright (c) 2025, Ampere Computing LLC
try:
from utils import misc # noqa
except ModuleNotFoundError:
Expand Down
2 changes: 1 addition & 1 deletion recommendation/dlrm/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2024, Ampere Computing LLC
# Copyright (c) 2025, Ampere Computing LLC
try:
from utils import misc # noqa
except ModuleNotFoundError:
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ sentencepiece
tiktoken
ultralytics
evaluate
datasets
datasets>=2.19
datasets[audio]
soundfile
librosa
numba
Expand Down
10 changes: 7 additions & 3 deletions setup_deb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fi
log "Installing system dependencies ..."
sleep 1
apt-get update -y
apt-get install -y build-essential ffmpeg libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
apt-get install -y build-essential libsm6 libxext6 wget git unzip numactl libhdf5-dev cmake
if ! python3 -c ""; then
apt-get install -y python3 python3-pip
fi
Expand Down Expand Up @@ -76,8 +76,9 @@ sleep 1
ARCH=$ARCH python3 "$SCRIPT_DIR"/utils/setup/install_frameworks.py

# get almost all python deps
pip3 install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
pip3 install -r "$(dirname "$0")/requirements.txt"
PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip install --ignore-installed --upgrade pip
python3 -m pip install --break-system-packages -r "$(dirname "$0")/requirements.txt" ||
python3 -m pip3 install -r "$(dirname "$0")/requirements.txt"

apt install -y autoconf autogen automake build-essential libasound2-dev \
libflac-dev libogg-dev libtool libvorbis-dev libopus-dev libmp3lame-dev \
Expand All @@ -98,6 +99,9 @@ if [ "$(python3 -c 'import torch; print(torch.cuda.is_available())')" == "True"
fi
log "done.\n"

apt-get update -y
apt-get install -y ffmpeg

if [ -f "/etc/machine-id" ]; then
cat /etc/machine-id >"$SCRIPT_DIR"/.setup_completed
else
Expand Down
59 changes: 31 additions & 28 deletions tests/test_pytorch_models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2024, Ampere Computing LLC
# Copyright (c) 2025, Ampere Computing LLC
import os
import signal
import time
Expand Down Expand Up @@ -113,25 +113,25 @@ def wrapper_hf(**kwargs):
self.wrapper_openai = wrapper_openai
self.wrapper_hf = wrapper_hf

@unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 50, "too little memory")
def test_whisper_tiny_en(self):
wer_ref = 0.155
acc = run_process(self.wrapper_openai, {"model_name": "tiny.en", "num_runs": 30, "timeout": None})
self.assertTrue(wer_ref / acc["wer_score"] > 0.95)
# @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 50, "too little memory")
# def test_whisper_tiny_en(self):
# wer_ref = 0.155
# acc = run_process(self.wrapper_openai, {"model_name": "tiny.en", "num_runs": 30, "timeout": None})
# self.assertTrue(wer_ref / acc["wer_score"] > 0.95)

@unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 50, "too little memory")
def test_whisper_hf_tiny_en(self):
wer_ref = 0.111
acc = run_process(self.wrapper_hf, {"model_name": "openai/whisper-tiny.en", "num_runs": 18,
"batch_size": 4, "timeout": None})
self.assertTrue(wer_ref / acc["wer_score"] > 0.95)
# @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 50, "too little memory")
# def test_whisper_hf_tiny_en(self):
# wer_ref = 0.111
# acc = run_process(self.wrapper_hf, {"model_name": "openai/whisper-tiny.en", "num_runs": 18,
# "batch_size": 4, "timeout": None})
# self.assertTrue(wer_ref / acc["wer_score"] > 0.95)

@unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory")
@unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "too slow to run with native")
def test_whisper_large(self):
wer_ref = 0.124
acc = run_process(self.wrapper_openai, {"model_name": "large", "num_runs": 30, "timeout": None})
self.assertTrue(wer_ref / acc["wer_score"] > 0.95)
# @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory")
# @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "too slow to run with native")
# def test_whisper_large(self):
# wer_ref = 0.124
# acc = run_process(self.wrapper_openai, {"model_name": "large", "num_runs": 30, "timeout": None})
# self.assertTrue(wer_ref / acc["wer_score"] > 0.95)


class WhisperTranslate(unittest.TestCase):
Expand All @@ -156,13 +156,13 @@ def wrapper(**kwargs):

self.wrapper = wrapper

@unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory")
@unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "too slow to run with native")
def test_whisper_translate_medium(self):
wer_ref = 0.475
acc = run_process(self.wrapper, {"model_name": "large", "num_runs": 30, "timeout": None,
"dataset_path": self.dataset_path})
self.assertTrue(wer_ref / acc["bleu_score"] > 0.95)
# @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory")
# @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "too slow to run with native")
# def test_whisper_translate_medium(self):
# wer_ref = 0.475
# acc = run_process(self.wrapper, {"model_name": "large", "num_runs": 30, "timeout": None,
# "dataset_path": self.dataset_path})
# self.assertTrue(wer_ref / acc["bleu_score"] > 0.95)


class DLRM(unittest.TestCase):
Expand Down Expand Up @@ -259,7 +259,8 @@ def wrapper(**kwargs):

top_1_ref, top_5_ref = 0.717, 0.905
acc = run_process(wrapper, {"model_name": "densenet121", "images_path": self.dataset_path,
"labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None,
"labels_path": self.labels_path, "batch_size": 32, "num_runs": 10,
"timeout": None,
"disable_jit_freeze": False})
self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95)
self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95)
Expand All @@ -277,7 +278,8 @@ def wrapper(**kwargs):

top_1_ref, top_5_ref = 0.765, 0.932
acc = run_process(wrapper, {"model_name": "inception_v3", "images_path": self.dataset_path,
"labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None,
"labels_path": self.labels_path, "batch_size": 32, "num_runs": 10,
"timeout": None,
"disable_jit_freeze": False})
self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95)
self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95)
Expand Down Expand Up @@ -312,7 +314,8 @@ def wrapper(**kwargs):

top_1_ref, top_5_ref = 0.661, 0.896
acc = run_process(wrapper, {"model_name": "vgg16", "images_path": self.dataset_path,
"labels_path": self.labels_path, "batch_size": 32, "num_runs": 10, "timeout": None})
"labels_path": self.labels_path, "batch_size": 32, "num_runs": 10,
"timeout": None})
self.assertTrue(acc["top_1_acc"] / top_1_ref > 0.95)
self.assertTrue(acc["top_5_acc"] / top_5_ref > 0.95)

Expand Down
2 changes: 1 addition & 1 deletion utils/cv/pre_processing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2024, Ampere Computing LLC
# Copyright (c) 2025, Ampere Computing LLC
import numpy as np
import utils.misc as utils

Expand Down