From d6db1d255c1200db65226683867f060533621370 Mon Sep 17 00:00:00 2001 From: Egor Krivov Date: Mon, 17 Nov 2025 14:50:46 +0000 Subject: [PATCH 1/3] Add options to print SW efficiency --- .../benchmark_testing.py | 34 ++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/benchmarks/triton_kernels_benchmark/benchmark_testing.py b/benchmarks/triton_kernels_benchmark/benchmark_testing.py index 3d3f1d154a..0e1a314ee9 100644 --- a/benchmarks/triton_kernels_benchmark/benchmark_testing.py +++ b/benchmarks/triton_kernels_benchmark/benchmark_testing.py @@ -352,6 +352,8 @@ class MarkArgs: reports: str = "" n_runs: int = 1 brief: bool = False + hw_gbps: float = None + hw_tflops: float = None @staticmethod def load_cli_args() -> MarkArgs: @@ -375,8 +377,32 @@ def load_cli_args() -> MarkArgs: action="store_true", help="Print only mean values without min, max, CV.", ) + parser.add_argument( + "--hw_gbps", + type=float, + help="Hardware bandwidth in GB/s to calculate efficiency.", + ) + parser.add_argument( + "--hw_tflops", + type=float, + help="Hardware peak performance in TFLOPS to calculate efficiency.", + ) args = parser.parse_args() - return MarkArgs(args.reports, args.n_runs, args.brief) + return MarkArgs(args.reports, args.n_runs, args.brief, args.hw_gbps, args.hw_tflops) + + +def enhance_df(df, mark_args: MarkArgs): + df = df.copy() + if mark_args.brief: + df = df[[c for c in df.columns if not any(map(c.endswith, ("min", "max", "CV")))]] + + for col in df.columns: + if col.lower().replace("/", "p").endswith("gbps") and mark_args.hw_gbps: + df[col + "-eff"] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}") + elif col.lower().endswith("tflops") and mark_args.hw_tflops: + df[col + "-eff"] = (df[col] / mark_args.hw_tflops).apply(lambda x: f"{x:.1%}") + + return df class Mark: @@ -462,12 +488,10 @@ def _run(self, bench: Benchmark, save_path: str, show_plots: bool, print_data: b col0, col1 = df.columns.tolist() df["Diff"] = df[col1] - df[col0] + df = enhance_df(df, mark_args) if print_data: print(bench.plot_name + ":") - if mark_args.brief: - print(df[[c for c in df.columns if not any(map(c.endswith, ("min", "max", "CV")))]].to_string()) - else: - print(df.to_string()) + print(df.to_string()) if save_path: df.to_csv(os.path.join(save_path, f"{filename}.csv"), float_format=f"%.{save_precision}f", index=False) From 89ef2d39a45218a1d30a41e1680108159d8bdfd1 Mon Sep 17 00:00:00 2001 From: Egor Krivov Date: Mon, 17 Nov 2025 15:01:28 +0000 Subject: [PATCH 2/3] Removed noise --- benchmarks/triton_kernels_benchmark/benchmark_testing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/triton_kernels_benchmark/benchmark_testing.py b/benchmarks/triton_kernels_benchmark/benchmark_testing.py index 0e1a314ee9..19c035578b 100644 --- a/benchmarks/triton_kernels_benchmark/benchmark_testing.py +++ b/benchmarks/triton_kernels_benchmark/benchmark_testing.py @@ -155,7 +155,6 @@ def do_bench_upstream_pytorch_profiler(fn, n_warmup=25, n_repeat=100, grad_to_no fn() synchronize() i += 1 - print(f"Stopped warmup after {i} iterations") else: for _ in range(n_warmup): fn() From 8fbc416281cf87311ae75c55f6b02bc9a88f4707 Mon Sep 17 00:00:00 2001 From: Egor Krivov Date: Wed, 19 Nov 2025 15:22:01 +0000 Subject: [PATCH 3/3] Add json file with HW capability --- benchmarks/gpu_info.json | 19 +++++ .../benchmark_testing.py | 75 ++++++++++++++----- 2 files changed, 77 insertions(+), 17 deletions(-) create mode 100644 benchmarks/gpu_info.json diff --git a/benchmarks/gpu_info.json b/benchmarks/gpu_info.json new file mode 100644 index 0000000000..fc473ae44c --- /dev/null +++ b/benchmarks/gpu_info.json @@ -0,0 +1,19 @@ +{ + "_comment": "GPU -> [BF16/FP16 DPAS TFLOPs , Memory bandwidth GB/s]", + "Intel(R) Data Center GPU Max 1100": [ + 355.53, + 1228.80 + ], + "Intel(R) Data Center GPU Max 1550": [ + 419.43, + 3276.8 + ], + "Intel(R) Arc(TM) B580 Graphics": [ + 116.74, + 456.0 + ], + "Intel(R) Arc(TM) B570 Graphics": [ + 103.22, + 380.0 + ] +} diff --git a/benchmarks/triton_kernels_benchmark/benchmark_testing.py b/benchmarks/triton_kernels_benchmark/benchmark_testing.py index 19c035578b..9873a91668 100644 --- a/benchmarks/triton_kernels_benchmark/benchmark_testing.py +++ b/benchmarks/triton_kernels_benchmark/benchmark_testing.py @@ -1,6 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod +import re from typing import Callable, ClassVar, Dict, Optional, List, Tuple, Union, Set from collections.abc import Iterable from enum import Enum @@ -13,8 +14,10 @@ import datetime import os import time +from pathlib import Path import scipy.stats +import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -335,6 +338,30 @@ def filter_providers( return supported_providers +def get_gpu_info(): + device_name = torch.xpu.is_available() and torch.xpu.get_device_name() + if device_name is None: + print("Couldn't read device name.") + return None, None + + # benchmarks/triton_kernels_benchmark/benchmark_testing.py -> benchmarks/gpu_info.json + current_dir = Path(__file__).parent.resolve() + gpu_info_path = current_dir.parent / "gpu_info.json" + + if not gpu_info_path.exists(): + print(f"Warning: '{gpu_info_path}' not found.") + return None, None + + with open(gpu_info_path, "r", encoding="utf-8") as f: + gpu_info = json.load(f) + + if device_name not in gpu_info: + print(f"Warning: Device '{device_name}' not found in {gpu_info_path}") + return None, None + + return gpu_info[device_name] + + def perf_report(benchmarks): """ Mark a function for benchmarking. The benchmark can then be executed by using the :code:`.run` method on the return value. @@ -351,8 +378,7 @@ class MarkArgs: reports: str = "" n_runs: int = 1 brief: bool = False - hw_gbps: float = None - hw_tflops: float = None + eff: bool = False @staticmethod def load_cli_args() -> MarkArgs: @@ -377,29 +403,44 @@ def load_cli_args() -> MarkArgs: help="Print only mean values without min, max, CV.", ) parser.add_argument( - "--hw_gbps", - type=float, - help="Hardware bandwidth in GB/s to calculate efficiency.", - ) - parser.add_argument( - "--hw_tflops", - type=float, - help="Hardware peak performance in TFLOPS to calculate efficiency.", + "--eff", + "-e", + action="store_true", + help="Print HW utilization, will use internal database from 'gpu_info.json'.", ) args = parser.parse_args() - return MarkArgs(args.reports, args.n_runs, args.brief, args.hw_gbps, args.hw_tflops) + return MarkArgs(args.reports, args.n_runs, args.brief, args.eff) -def enhance_df(df, mark_args: MarkArgs): +def enhance_df(df, bench, mark_args: MarkArgs): + hw_tflops, hw_gbps = None, None + if mark_args.eff: + hw_tflops, hw_gbps = get_gpu_info() + df = df.copy() if mark_args.brief: df = df[[c for c in df.columns if not any(map(c.endswith, ("min", "max", "CV")))]] + # Find and write down HW efficiency columns + tflops_labels = [l for l in bench.ylabel if l.lower().endswith("tflops")] + tflops_pattern = "-(" + "|".join(tflops_labels) + ")(-min|-max)?$" + + gbps_labels = [l for l in bench.ylabel if l.lower().replace("/", "p").endswith("gbps")] + gbps_pattern = "-(" + "|".join(gbps_labels) + ")(-min|-max)?$" + for col in df.columns: - if col.lower().replace("/", "p").endswith("gbps") and mark_args.hw_gbps: - df[col + "-eff"] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}") - elif col.lower().endswith("tflops") and mark_args.hw_tflops: - df[col + "-eff"] = (df[col] / mark_args.hw_tflops).apply(lambda x: f"{x:.1%}") + if re.search(tflops_pattern, col) and hw_tflops: + df[re.sub(tflops_pattern, "-ceff", col)] = df[col] / hw_tflops + if re.search(gbps_pattern, col) and hw_gbps: + df[re.sub(gbps_pattern, "-meff", col)] = df[col] / hw_gbps + # df[re.sub(gbps_pattern, "-meff", col)] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}") + # We will only keep resulting efficiency column, we are either compute or memory bound. + for provider in bench.line_names: + if f"{provider}-ceff" in df.columns and f"{provider}-meff" in df.columns: + df[f"{provider}-eff"] = np.maximum(df[f"{provider}-ceff"], + df[f"{provider}-meff"]).apply(lambda x: f"{x:.2%}") + del df[f"{provider}-ceff"] + del df[f"{provider}-meff"] return df @@ -487,7 +528,7 @@ def _run(self, bench: Benchmark, save_path: str, show_plots: bool, print_data: b col0, col1 = df.columns.tolist() df["Diff"] = df[col1] - df[col0] - df = enhance_df(df, mark_args) + df = enhance_df(df, bench, mark_args) if print_data: print(bench.plot_name + ":") print(df.to_string())