Skip to content

Commit 49a986e

Browse files
authored
[Benchmark] multi_turn: Report warmup-inclusive runtime (#28937)
Signed-off-by: Ido Segev <idos@pliops.com>
1 parent f6aa122 commit 49a986e

File tree

2 files changed

+53
-10
lines changed

2 files changed

+53
-10
lines changed

benchmarks/multi_turn/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ output_num_chunks 166.0 99.01 11.80 79.00 90.00 98.00 108.75
5555
----------------------------------------------------------------------------------------------------
5656
```
5757

58+
If you run with `--warmup-step`, the summary will also include `warmup_runtime_sec`
59+
and `total_runtime_incl_warmup_sec` (while `runtime_sec` continues to reflect the
60+
benchmark-only runtime so the reported throughput stays comparable).
61+
5862
### JSON configuration file for synthetic conversations generation
5963

6064
The input flag `--input-file` is used to determine the input conversations for the benchmark.<br/>

benchmarks/multi_turn/benchmark_serving_multi_turn.py

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,7 @@ def process_statistics(
10761076
verbose: bool,
10771077
gen_conv_args: GenConvArgs | None = None,
10781078
excel_output: bool = False,
1079+
warmup_runtime_sec: float | None = None,
10791080
) -> None:
10801081
if len(client_metrics) == 0:
10811082
logger.info("No samples to process")
@@ -1169,8 +1170,13 @@ def process_statistics(
11691170
# Convert milliseconds to seconds
11701171
runtime_sec = runtime_sec / 1000.0
11711172
requests_per_sec = float(len(df)) / runtime_sec
1172-
1173-
params = {"runtime_sec": runtime_sec, "requests_per_sec": requests_per_sec}
1173+
params = {
1174+
"runtime_sec": runtime_sec,
1175+
"requests_per_sec": requests_per_sec,
1176+
}
1177+
if warmup_runtime_sec is not None:
1178+
params["warmup_runtime_sec"] = warmup_runtime_sec
1179+
params["total_runtime_incl_warmup_sec"] = runtime_sec + warmup_runtime_sec
11741180

11751181
# Generate a summary of relevant metrics (and drop irrelevant data)
11761182
df = df.drop(columns=exclude).describe(percentiles=percentiles).transpose()
@@ -1552,6 +1558,8 @@ async def main() -> None:
15521558
url=args.url, num_clients=args.num_clients, early_stop=not args.no_early_stop
15531559
)
15541560

1561+
warmup_runtime_sec: float | None = None
1562+
15551563
# Warm-up step
15561564
if args.warmup_step:
15571565
# Only send a single user prompt from every conversation.
@@ -1566,26 +1574,56 @@ async def main() -> None:
15661574
# all clients should finish their work before exiting
15671575
warmup_bench_args = bench_args._replace(early_stop=False)
15681576

1569-
logger.info(f"{Color.PURPLE}Warmup start{Color.RESET}")
1577+
logger.info("%sWarmup start%s", Color.PURPLE, Color.RESET)
1578+
warmup_start_ns = time.perf_counter_ns()
15701579
conversations, _ = await main_mp(
15711580
warmup_client_args, req_args, warmup_bench_args, tokenizer, conversations
15721581
)
1573-
logger.info(f"{Color.PURPLE}Warmup done{Color.RESET}")
1582+
warmup_runtime_sec = nanosec_to_sec(time.perf_counter_ns() - warmup_start_ns)
1583+
logger.info(
1584+
"%sWarmup runtime: %.3f sec (%.3f ms)%s",
1585+
Color.PURPLE,
1586+
warmup_runtime_sec,
1587+
warmup_runtime_sec * 1000,
1588+
Color.RESET,
1589+
)
1590+
logger.info("%sWarmup done%s", Color.PURPLE, Color.RESET)
15741591

15751592
# Run the benchmark
1576-
start_time = time.perf_counter_ns()
1593+
benchmark_start_ns = time.perf_counter_ns()
15771594
client_convs, client_metrics = await main_mp(
15781595
client_args, req_args, bench_args, tokenizer, conversations
15791596
)
1580-
total_runtime_ms = nanosec_to_millisec(time.perf_counter_ns() - start_time)
1597+
benchmark_runtime_sec = nanosec_to_sec(time.perf_counter_ns() - benchmark_start_ns)
15811598

15821599
# Calculate requests per second
1583-
total_runtime_sec = total_runtime_ms / 1000.0
1584-
rps = len(client_metrics) / total_runtime_sec
1600+
requests_per_sec = len(client_metrics) / benchmark_runtime_sec
1601+
benchmark_runtime_ms = benchmark_runtime_sec * 1000.0
15851602
logger.info(
1586-
f"{Color.GREEN}All clients finished, total runtime: {total_runtime_sec:.3f} sec"
1587-
f" ({total_runtime_ms:.3f} ms), requests per second: {rps:.3f}{Color.RESET}"
1603+
"%sAll clients finished, benchmark runtime: %.3f sec (%.3f ms), "
1604+
"requests per second: %.3f%s",
1605+
Color.GREEN,
1606+
benchmark_runtime_sec,
1607+
benchmark_runtime_ms,
1608+
requests_per_sec,
1609+
Color.RESET,
15881610
)
1611+
if warmup_runtime_sec is not None:
1612+
total_runtime_sec = benchmark_runtime_sec + warmup_runtime_sec
1613+
logger.info(
1614+
"%sWarmup runtime: %.3f sec (%.3f ms)%s",
1615+
Color.GREEN,
1616+
warmup_runtime_sec,
1617+
warmup_runtime_sec * 1000,
1618+
Color.RESET,
1619+
)
1620+
logger.info(
1621+
"%sTotal runtime (including warmup): %.3f sec (%.3f ms)%s",
1622+
Color.GREEN,
1623+
total_runtime_sec,
1624+
total_runtime_sec * 1000,
1625+
Color.RESET,
1626+
)
15891627

15901628
# Benchmark parameters
15911629
params = {
@@ -1610,6 +1648,7 @@ async def main() -> None:
16101648
verbose=args.verbose,
16111649
gen_conv_args=gen_conv_args,
16121650
excel_output=args.excel_output,
1651+
warmup_runtime_sec=warmup_runtime_sec,
16131652
)
16141653

16151654
if args.output_file is not None:

0 commit comments

Comments
 (0)