This repository was archived by the owner on Oct 9, 2024. It is now read-only.
File tree Expand file tree Collapse file tree 3 files changed +8
-8
lines changed Expand file tree Collapse file tree 3 files changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -149,11 +149,11 @@ def generate():
149149 generated = generate ()
150150 total_new_tokens_generated += sum (new_tokens for _ , _ , new_tokens in generated )
151151 torch .cuda .synchronize ()
152- througput = (time .time () - t0 ) / (total_new_tokens_generated )
152+ throughput = (time .time () - t0 ) / (total_new_tokens_generated )
153153 print_rank0 (
154154 f"""
155155*** Performance stats:
156- Throughput per token including tokenize: { througput * 1000 :.2f} msecs
156+ Throughput per token including tokenize: { throughput * 1000 :.2f} msecs
157157Start to ready to generate: { t_ready - t_start :.3f} secs
158158Tokenize and generate { total_new_tokens_generated } (bs={ args .batch_size } ) tokens: { t_generate_span :.3f} secs
159159Start to finish: { t_ready - t_start + t_generate_span :.3f} secs
Original file line number Diff line number Diff line change @@ -282,11 +282,11 @@ def generate():
282282 generated = generate ()
283283 total_new_tokens_generated += sum (new_tokens for _ , _ , new_tokens in generated )
284284 torch .cuda .synchronize ()
285- througput = (time .time () - t0 ) / (total_new_tokens_generated )
285+ throughput = (time .time () - t0 ) / (total_new_tokens_generated )
286286 print_rank0 (
287287 f"""
288288*** Performance stats:
289- Throughput per token including tokenize: { througput * 1000 :.2f} msecs
289+ Throughput per token including tokenize: { throughput * 1000 :.2f} msecs
290290Start to ready to generate: { t_ready - t_start :.3f} secs
291291Tokenize and generate { total_new_tokens_generated } (bs={ args .batch_size } ) tokens: { t_generate_span :.3f} secs
292292Start to finish: { t_ready - t_start + t_generate_span :.3f} secs
Original file line number Diff line number Diff line change 11# usage:
2- # deepspeed --num_gpus 8 bloom-ds-inference.py --name bigscience/bloom
2+ # deepspeed --num_gpus 8 bloom-ds-zero- inference.py --name bigscience/bloom
33#
44# to run benchmarks:
5- # deepspeed --num_gpus 8 bloom-ds-inference.py --name bigscience/bloom --benchmark
5+ # deepspeed --num_gpus 8 bloom-ds-zero- inference.py --name bigscience/bloom --benchmark
66#
77
88
@@ -212,11 +212,11 @@ def generate():
212212 torch .cuda .synchronize ()
213213 # note that we actually generate world_size unique streams (though the benchmark feeds the same inputs)
214214 total_new_tokens_generated *= world_size
215- througput = (time .time () - t0 ) / (total_new_tokens_generated )
215+ throughput = (time .time () - t0 ) / (total_new_tokens_generated )
216216 print_rank0 (
217217 f"""
218218*** Performance stats:
219- Throughput per token including tokenize: { througput * 1000 :.2f} msecs
219+ Throughput per token including tokenize: { throughput * 1000 :.2f} msecs
220220Start to ready to generate: { t_ready - t_start :.3f} secs
221221Tokenize and generate { total_new_tokens_generated } (bs={ args .batch_size } ) tokens: { t_generate_span :.3f} secs
222222Start to finish: { t_ready - t_start + t_generate_span :.3f} secs
You can’t perform that action at this time.
0 commit comments