Skip to content

Commit c2562fc

Browse files
authored
[https://nvbugs/5687820][fix] Remove self.abort() in DetokenizedGenerationResult (#9449)
Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
1 parent 1c9158f commit c2562fc

File tree

5 files changed

+11
-3
lines changed

5 files changed

+11
-3
lines changed

examples/disaggregated/slurm/benchmark/config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ worker_config:
7676
- 2048
7777
- 256
7878
print_iter_log: true
79+
trust_remote_code: true
7980
kv_cache_config:
8081
enable_block_reuse: false
8182
free_gpu_memory_fraction: 0.8
@@ -102,6 +103,7 @@ worker_config:
102103
enable_attention_dp: true
103104
pipeline_parallel_size: 1
104105
print_iter_log: true
106+
trust_remote_code: true
105107
cuda_graph_config: null
106108
disable_overlap_scheduler: true
107109
kv_cache_config:

examples/disaggregated/slurm/benchmark/run_benchmark.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ for concurrency in ${concurrency_list}; do
5252
--dataset-path ${dataset_file} \
5353
--num-prompts ${num_prompts} \
5454
--max-concurrency ${concurrency} \
55+
--trust-remote-code \
5556
--ignore-eos \
5657
--no-test-input \
5758
--save-result \

examples/disaggregated/slurm/benchmark/run_benchmark_nv_sa.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ for concurrency in ${concurrency_list}; do
7575
--dataset-name random \
7676
--num-prompts "${num_prompts}" \
7777
--max-concurrency "${concurrency}" \
78+
--trust-remote-code \
7879
--ignore-eos \
7980
--random-input-len "${input_seq_len}" \
8081
--random-output-len "${output_seq_len}" \

examples/disaggregated/slurm/benchmark/submit.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,13 @@ def submit_job(config):
105105
log_base = os.path.join(env_config['work_dir'], f"{isl}-{osl}")
106106

107107
# Get eplb num_slots for gen worker
108-
eplb_num_slots = (config['worker_config']['gen'].get('moe_config', {}).get(
109-
'load_balancer', {}).get('num_slots', 0))
108+
load_balancer_config = config['worker_config']['gen'].get(
109+
'moe_config', {}).get('load_balancer', {})
110+
if isinstance(load_balancer_config, str):
111+
with open(load_balancer_config, 'r') as f:
112+
load_balancer_config = yaml.safe_load(f)
113+
eplb_num_slots = load_balancer_config.get('num_slots', 0)
114+
110115
# Determine directory suffix based on attention_dp
111116
if gen_enable_attention_dp:
112117
dir_suffix = f"ctx{ctx_num}_gen{gen_num}_dep{gen_tp_size}_batch{gen_batch_size}_eplb{eplb_num_slots}_mtp{mtp_size}"

tensorrt_llm/executor/result.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,6 @@ def _handle_response(self, response: "GenerationExecutor.Response"):
761761

762762
beam_output.finish_reason = 'stop'
763763
beam_output.stop_reason = stop_reason
764-
self.abort()
765764
self._done = True
766765
break
767766

0 commit comments

Comments
 (0)