[None][fix] Mitigate test timeout issues (#9445)

Shixiaowei02 · web-flow · commit 60786574db6c · 2025-11-25T20:17:54.000+08:00
Signed-off-by: Shixiaowei02 &lt;39303645+Shixiaowei02@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py
@@ -146,6 +146,7 @@ def launch_disaggregated_llm(
 
     for i, port in enumerate(ctx_ports):
         env_ctx = os.environ.copy()
+        env_ctx["TRTLLM_USE_UCX_KVCACHE"] = "1"
         gpu_range = range(current_gpu_offset,
                           current_gpu_offset + ctx_total_gpus)
         env_ctx["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_range))
@@ -166,6 +167,7 @@ def launch_disaggregated_llm(
 
     for i, port in enumerate(gen_ports):
         env_gen = os.environ.copy()
+        env_ctx["TRTLLM_USE_UCX_KVCACHE"] = "1"
         gpu_range = range(current_gpu_offset,
                           current_gpu_offset + gen_total_gpus)
         env_gen["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_range))
@@ -1103,15 +1105,12 @@ def test_chunked_prefill(self):
             },
             "enable_chunked_prefill": True,
             "max_num_tokens": 256,
-            "max_batch_size":
-            1,  # max_batch_size=1 will stabilize the accuracy test result at a cost of speed
         }
         gen_server_config = {
             "cuda_graph_config": None,
             "cache_transceiver_config": {
                 "backend": "DEFAULT"
-            },
-            "max_batch_size": 1,
+            }
         }
         disaggregated_server_config = {
             "hostname": "localhost",