Skip to content

Commit c3d122b

Browse files
author
davidz-ampere
committed
adjust for 2P run.
1 parent 9d153ec commit c3d122b

File tree

3 files changed

+37
-8
lines changed

3 files changed

+37
-8
lines changed

benchmarks/run.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,18 @@ def get_file_dir():
1111
return os.path.dirname(os.path.realpath(__file__))
1212

1313

14-
def docker_init():
14+
def docker_init(node):
1515
tag = "amperecomputingai/llama.cpp:2.0.0"
1616
if subprocess.run(
1717
["docker", "pull", tag]).returncode != 0:
1818
print("Docker pull process failed!")
1919
sys.exit(1)
20-
container_name = "llama_benchmark"
20+
container_name = f"llama_benchmark_n{node}"
2121
subprocess.run(["docker", "rm", "-f", container_name])
2222
memory = (psutil.virtual_memory().total >> 30) - 30 # leave 30GB for OS
2323
assert memory > 10, "less than 10GB of memory available on the system for llama.cpp"
2424
if subprocess.run(
25-
["docker", "run", "--privileged=true", "--name", container_name, "-d", "-m", f"{str(memory)}g", "-v",
25+
["docker", "run", "--privileged=true", "--cpuset-mems", f"{str(node)}", "--name", container_name, "-d", "-m", f"{str(memory)}g", "-v",
2626
f"{get_file_dir()}:/runner", "--entrypoint", "/bin/bash", "-it", tag]).returncode != 0:
2727
print("Docker run process failed!")
2828
sys.exit(1)
@@ -106,12 +106,16 @@ def parse_args():
106106
parser.add_argument("--timeout",
107107
type=float, default=900,
108108
help="timeout to apply per single benchmark case")
109+
parser.add_argument("-n", "--numa",
110+
type=int, default=0,
111+
help="numa mode of the docker container")
112+
109113
return parser.parse_args()
110114

111115

112116
def main():
113117
args = parse_args()
114-
benchmark(docker_init(), args)
118+
benchmark(docker_init(args.numa), args)
115119

116120

117121
if __name__ == "__main__":

benchmarks/run_2p.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
set -e
2+
3+
sync
4+
echo 3 | sudo tee /proc/sys/vm/drop_caches
5+
echo 1 | sudo tee /proc/sys/vm/swappiness
6+
echo 8 | sudo tee /proc/sys/vm/dirty_ratio
7+
echo 1 | sudo tee /proc/sys/vm/zone_reclaim_mode
8+
echo 0 | sudo tee /proc/sys/kernel/numa_balancing
9+
10+
VAR_PAGESIZE=$(getconf PAGESIZE)
11+
if [ $VAR_PAGESIZE = 4096 ]; then
12+
echo always | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
13+
elif [ $VAR_PAGESIZE = 65536 ]; then
14+
echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
15+
fi
16+
17+
# Warm up
18+
python3 run.py -m DeepSeek-R1-Distill-Qwen-7B-Q8R16_n0.gguf -t 80 -b 1 -p 512 -r 0-79 -n 0
19+
python3 run.py -m DeepSeek-R1-Distill-Qwen-7B-Q8R16_n1.gguf -t 80 -b 1 -p 512 -r 80-159 -n 1
20+
21+
# Run
22+
python3 run.py -m DeepSeek-R1-Distill-Qwen-7B-Q8R16_n0.gguf -t 80 64 48 40 32 24 20 16 12 10 8 -b 1 2 4 8 -p 512 -r 0-79 -n 0 &
23+
python3 run.py -m DeepSeek-R1-Distill-Qwen-7B-Q8R16_n1.gguf -t 80 64 48 40 32 24 20 16 12 10 8 -b 1 2 4 8 -p 512 -r 80-159 -n 1 &
24+
wait

benchmarks/utils/benchmark.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,9 @@ def summarize_results(logs_dir, args, start, finish):
9393
["n_proc", "n_threads", "batch_size", "prompt_size", "output_tokens", "pp_throughput_tps",
9494
"pp_avg_latency_sec", "tg_throughput_tps", "tg_avg_latency_sec", "pp+tg_throughput_tps", "concurrency", "start", "finish"])
9595
writer.writerow(
96-
[args.num_processes, args.num_threads, args.batch_size, args.prompt_size, TOKENS, pp_throughput,
97-
avg_pp_latency, tg_throughput, avg_tg_latency, avg_total_speed, args.batch_size * args.num_processes, start, finish])
96+
[args.num_processes, args.num_threads, args.batch_size, args.prompt_size, TOKENS, f"{pp_throughput:.3f}",
97+
f"{avg_pp_latency:.3f}", f"{tg_throughput:.3f}", f"{avg_tg_latency:.3f}", f"{avg_total_speed:.3f}", args.batch_size * args.num_processes, f"{start:.3f}", f"{finish:.3f}"])
98+
9899
print(f"Result saved in {results_filename}")
99100

100101

@@ -118,12 +119,12 @@ def main():
118119
logfile = f"{logs_dir}/log_{n}"
119120
if os.path.exists("/llm/batched-bench"):
120121
# command-line for v1
121-
cmd = ["numactl", f"--physcpubind={gen_threads_config(args.num_threads, n)}",
122+
cmd = ["numactl", f"--physcpubind={gen_threads_config(args.num_threads, n)}", "--localalloc",
122123
"/llm/batched-bench", args.model, str(args.kv_cache), "2048", "512", "0", "0", "0", str(args.prompt_size), str(TOKENS),
123124
str(args.batch_size), str(args.num_threads)]
124125
elif os.path.exists("/llm/llama-batched-bench"):
125126
# command-line for v2
126-
cmd = ["numactl", f"--physcpubind={gen_threads_config(args.num_threads, n)}",
127+
cmd = ["numactl", f"--physcpubind={gen_threads_config(args.num_threads, n)}", "--localalloc",
127128
"/llm/llama-batched-bench", "-m", args.model, "-c", str(args.kv_cache), "-b", "2048", "-ub", "512", "-npp", str(args.prompt_size), "-ntg", str(TOKENS),
128129
"-npl", str(args.batch_size), "-t", str(args.num_threads), "-tb", str(args.num_threads), "-td", str(args.num_threads)]
129130
else:

0 commit comments

Comments
 (0)