@@ -35,6 +35,9 @@ def parse_args():
3535 parser .add_argument ("-t" , "--num_threads" ,
3636 type = int , default = 1 ,
3737 help = "number of threads to use per process" )
38+ parser .add_argument ("--mp" ,
39+ type = str , default = "local" ,
40+ help = "memory placement policy, 'local','interleave' or 'none'" )
3841 return parser .parse_args ()
3942
4043
@@ -93,8 +96,9 @@ def summarize_results(logs_dir, args, start, finish):
9396 ["n_proc" , "n_threads" , "batch_size" , "prompt_size" , "output_tokens" , "pp_throughput_tps" ,
9497 "pp_avg_latency_sec" , "tg_throughput_tps" , "tg_avg_latency_sec" , "pp+tg_throughput_tps" , "concurrency" , "start" , "finish" ])
9598 writer .writerow (
96- [args .num_processes , args .num_threads , args .batch_size , args .prompt_size , TOKENS , pp_throughput ,
97- avg_pp_latency , tg_throughput , avg_tg_latency , avg_total_speed , args .batch_size * args .num_processes , start , finish ])
99+ [args .num_processes , args .num_threads , args .batch_size , args .prompt_size , TOKENS , f"{ pp_throughput :.3f} " ,
100+ f"{ avg_pp_latency :.3f} " , f"{ tg_throughput :.3f} " , f"{ avg_tg_latency :.3f} " , f"{ avg_total_speed :.3f} " , args .batch_size * args .num_processes , f"{ start :.3f} " , f"{ finish :.3f} " ])
101+
98102 print (f"Result saved in { results_filename } " )
99103
100104
@@ -114,21 +118,40 @@ def main():
114118 logs_dir = os .path .join ("/tmp" , str (uuid .uuid4 ()))
115119 os .mkdir (logs_dir )
116120 current_subprocesses = list ()
121+ if args .mp == "local" :
122+ mem_place = "--localalloc"
123+ elif args .mp == "interleave" :
124+ mem_place = "--interleave=all"
125+ else :
126+ mem_place = "none"
127+
117128 for n in range (args .num_processes ):
118129 logfile = f"{ logs_dir } /log_{ n } "
119130 if os .path .exists ("/llm/batched-bench" ):
120131 # command-line for v1
121- cmd = ["numactl" , f"--physcpubind={ gen_threads_config (args .num_threads , n )} " ,
122- "/llm/batched-bench" , args .model , str (args .kv_cache ), "2048" , "512" , "0" , "0" , "0" , str (args .prompt_size ), str (TOKENS ),
123- str (args .batch_size ), str (args .num_threads )]
132+ if mem_place == "none" :
133+ cmd = ["numactl" , f"--physcpubind={ gen_threads_config (args .num_threads , n )} " ,
134+ "/llm/batched-bench" , args .model , str (args .kv_cache ), "2048" , "512" , "0" , "0" , "0" , str (args .prompt_size ), str (TOKENS ),
135+ str (args .batch_size ), str (args .num_threads )]
136+ else :
137+ cmd = ["numactl" , f"--physcpubind={ gen_threads_config (args .num_threads , n )} " , str (mem_place ),
138+ "/llm/batched-bench" , args .model , str (args .kv_cache ), "2048" , "512" , "0" , "0" , "0" , str (args .prompt_size ), str (TOKENS ),
139+ str (args .batch_size ), str (args .num_threads )]
124140 elif os .path .exists ("/llm/llama-batched-bench" ):
125141 # command-line for v2
126- cmd = ["numactl" , f"--physcpubind={ gen_threads_config (args .num_threads , n )} " ,
127- "/llm/llama-batched-bench" , "-m" , args .model , "-c" , str (args .kv_cache ), "-b" , "2048" , "-ub" , "512" , "-npp" , str (args .prompt_size ), "-ntg" , str (TOKENS ),
128- "-npl" , str (args .batch_size ), "-t" , str (args .num_threads ), "-tb" , str (args .num_threads ), "-td" , str (args .num_threads )]
142+ if mem_place == "none" :
143+ cmd = ["numactl" , f"--physcpubind={ gen_threads_config (args .num_threads , n )} " ,
144+ "/llm/llama-batched-bench" , "-m" , args .model , "-c" , str (args .kv_cache ), "-b" , "2048" , "-ub" , "512" , "-npp" , str (args .prompt_size ), "-ntg" , str (TOKENS ),
145+ "-npl" , str (args .batch_size ), "-t" , str (args .num_threads ), "-tb" , str (args .num_threads ), "-td" , str (args .num_threads )]
146+ else :
147+ cmd = ["numactl" , f"--physcpubind={ gen_threads_config (args .num_threads , n )} " ,str (mem_place ),
148+ "/llm/llama-batched-bench" , "-m" , args .model , "-c" , str (args .kv_cache ), "-b" , "2048" , "-ub" , "512" , "-npp" , str (args .prompt_size ), "-ntg" , str (TOKENS ),
149+ "-npl" , str (args .batch_size ), "-t" , str (args .num_threads ), "-tb" , str (args .num_threads ), "-td" , str (args .num_threads )]
150+
129151 else :
130152 print ("FAIL: batched-bench not found!" )
131153 sys .exit (1 )
154+
132155 current_subprocesses .append (
133156 subprocess .Popen (cmd , stdout = open (logfile , 'wb' ), stderr = open (logfile , 'wb' )))
134157 start = time .time ()
0 commit comments