From a431dc4dd95a476f57083a53bc4e0842dc38e00a Mon Sep 17 00:00:00 2001 From: Yuanjing Xue <197832395+yuanjingx87@users.noreply.github.com> Date: Thu, 13 Nov 2025 02:39:18 -0800 Subject: [PATCH 1/2] test b200 Signed-off-by: Yuanjing Xue <197832395+yuanjingx87@users.noreply.github.com> --- jenkins/L0_Test.groovy | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index d71d510e36c..0e97bf2a7a4 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -1,4 +1,4 @@ -@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@main']) _ +@Library(['bloom-jenkins-shared-lib@dev-yuanjingx-add_custom_ssh_option', 'trtllm-jenkins-shared-lib@main']) _ import java.lang.InterruptedException import groovy.transform.Field @@ -1000,7 +1000,8 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG timeout: false, script: Utils.sshUserCmd( remote, - scriptExecPathNode + scriptExecPathNode, + "-o ConnectTimeout=600" ) ) } From c898bb636ac009cc47091479fc33b18217e74cc3 Mon Sep 17 00:00:00 2001 From: Yuanjing Xue <197832395+yuanjingx87@users.noreply.github.com> Date: Thu, 13 Nov 2025 15:10:06 -0800 Subject: [PATCH 2/2] debug Signed-off-by: Yuanjing Xue <197832395+yuanjingx87@users.noreply.github.com> --- jenkins/L0_Test.groovy | 4 ++-- jenkins/scripts/slurm_run_test.sh | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100755 jenkins/scripts/slurm_run_test.sh diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index 0e97bf2a7a4..77335ac3415 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -846,7 +846,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG sh "cd ${llmPath} && tar -zxf ${BUILD_CONFIGS[config][TARNAME]}" // Upload slurm_run_sh to Frontend node - def scriptRunLocalPath = "${llmSrcLocal}/jenkins/scripts/slurm_run.sh" + def scriptRunLocalPath = "${llmSrcLocal}/jenkins/scripts/slurm_run_test.sh" Utils.exec(pipeline, script: "echo \"Script to trigger slurm job: \" && cat ${scriptRunLocalPath}") Utils.copyFileToRemoteHost( @@ -997,7 +997,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG stage("[${stageName}] Run Pytest") { Utils.exec( pipeline, - timeout: false, + timeout: 200, script: Utils.sshUserCmd( remote, scriptExecPathNode, diff --git a/jenkins/scripts/slurm_run_test.sh b/jenkins/scripts/slurm_run_test.sh new file mode 100755 index 00000000000..cae084337e9 --- /dev/null +++ b/jenkins/scripts/slurm_run_test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +nvidia-smi +sleep 1000 +echo "Done"