Skip to content

Commit 6078657

Browse files
authored
[None][fix] Mitigate test timeout issues (#9445)
Signed-off-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
1 parent a2d9e62 commit 6078657

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def launch_disaggregated_llm(
146146

147147
for i, port in enumerate(ctx_ports):
148148
env_ctx = os.environ.copy()
149+
env_ctx["TRTLLM_USE_UCX_KVCACHE"] = "1"
149150
gpu_range = range(current_gpu_offset,
150151
current_gpu_offset + ctx_total_gpus)
151152
env_ctx["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_range))
@@ -166,6 +167,7 @@ def launch_disaggregated_llm(
166167

167168
for i, port in enumerate(gen_ports):
168169
env_gen = os.environ.copy()
170+
env_ctx["TRTLLM_USE_UCX_KVCACHE"] = "1"
169171
gpu_range = range(current_gpu_offset,
170172
current_gpu_offset + gen_total_gpus)
171173
env_gen["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_range))
@@ -1103,15 +1105,12 @@ def test_chunked_prefill(self):
11031105
},
11041106
"enable_chunked_prefill": True,
11051107
"max_num_tokens": 256,
1106-
"max_batch_size":
1107-
1, # max_batch_size=1 will stabilize the accuracy test result at a cost of speed
11081108
}
11091109
gen_server_config = {
11101110
"cuda_graph_config": None,
11111111
"cache_transceiver_config": {
11121112
"backend": "DEFAULT"
1113-
},
1114-
"max_batch_size": 1,
1113+
}
11151114
}
11161115
disaggregated_server_config = {
11171116
"hostname": "localhost",

0 commit comments

Comments
 (0)