“docs”

qw86972190 · qw86972190 · commit cfca9a1e9482 · 2025-11-28T00:05:15.000+08:00
diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
@@ -319,17 +319,252 @@ export XSHMEM_MODE=1
 export XSHMEM_QP_NUM_PER_RANK=32
 export BKCL_RDMA_VERBS=1
 
+wget -q https://paddle-qa.bj.bcebos.com/xpu_third_party/xDeepEP.tar.gz
+tar -xzf xDeepEP.tar.gz
+cd xDeepEP
+bash build.sh
+cd -
+
+export port_num=$((8188 + XPU_ID * 100))
+# 启动服务
+python -m fastdeploy.entrypoints.openai.api_server \
+    --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
+    --port $port_num \
+    --tensor-parallel-size 4 \
+    --enable-expert-parallel \
+    --data-parallel-size 1 \
+    --max-model-len 32768 \
+    --max-num-seqs 64 \
+    --quantization "wint4" \
+    --engine-worker-queue-port $((port_num + 10)) \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
+    --disable-sequence-parallel-moe \
+    --gpu-memory-utilization 0.9 \
+    --load-choices "default" > server.log 2>&1 &
+
+sleep 60
+# 探活
+TIMEOUT=$((15 * 60))
+INTERVAL=10
+ENDPOINT="http://0.0.0.0:${port_num}/health"
+START_TIME=$(date +%s)
+echo "开始服务健康检查，最长等待时间：${TIMEOUT}秒"
+while true; do
+    CURRENT_TIME=$(date +%s)
+    ELAPSED=$((CURRENT_TIME - START_TIME))
+    if [ $ELAPSED -ge $TIMEOUT ]; then
+        echo -e "\n服务启动超时：经过 $((TIMEOUT/60)) 分钟服务仍未启动！"
+        stop_processes
+        cat server.log
+        echo "log/workerlog.0"
+        cat log/workerlog.0
+        exit 1
+    fi
+    HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
+    echo -e "\r服务健康检查中... 已等待 ${ELAPSED} 秒，当前状态码：${HTTP_CODE}"
+    if [ "$HTTP_CODE" = "200" ]; then
+        echo -e "\n服务启动成功！耗时 ${ELAPSED} 秒"
+        break
+    else
+        sleep $INTERVAL
+    fi
+done
+
+
+# 执行在线推理验证脚本
+python -m pytest -s tests/ci_use/XPU_45T/run_ep_online.py
+ep_online_exit_code=$?
+echo ep_online_exit_code is ${ep_online_exit_code}
+
+unset BKCL_ENABLE_XDR
+unset BKCL_RDMA_NICS
+unset BKCL_TRACE_TOPO
+unset BKCL_PCIE_RING
+unset XSHMEM_MODE
+unset XSHMEM_QP_NUM_PER_RANK
+unset BKCL_RDMA_VERBS
+stop_processes >kill.log 2>&1
+
+if [ ${ep_online_exit_code} -ne 0 ]; then
+    echo "server.log"
+    cat server.log
+    cat log/workerlog.0
+    echo "EP4TP4 在线服务相关测试失败，请检查pr代码"
+    exit 1
+fi
+
+echo "============================开始 EP4TP1 在线服务测试!============================"
+sleep 5
+rm -rf log/*
+rm -f core*
+# pkill -9 python #流水线不执行这个
+ipcrm --all=msg
+xpu-smi
+if [[ "$XPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+export BKCL_ENABLE_XDR=1
+export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
+export BKCL_TRACE_TOPO=1
+export BKCL_PCIE_RING=1
+export XSHMEM_MODE=1
+export XSHMEM_QP_NUM_PER_RANK=32
+export BKCL_RDMA_VERBS=1
+
+export port_num=$((8188 + XPU_ID * 100))
+# 启动服务
+python -m fastdeploy.entrypoints.openai.api_server \
+    --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
+    --port $port_num \
+    --tensor-parallel-size 1 \
+    --enable-expert-parallel \
+    --data-parallel-size 4 \
+    --max-model-len 32768 \
+    --max-num-seqs 64 \
+    --quantization "wint4" \
+    --engine-worker-queue-port "$((port_num + 10)),$((port_num + 20)),$((port_num + 30)),$((port_num + 40))" \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
+    --gpu-memory-utilization 0.9 \
+    --load-choices "default" > server.log 2>&1 &
+
+sleep 60
+# 探活（同上）
+TIMEOUT=$((15 * 60))
+INTERVAL=10
+ENDPOINT="http://0.0.0.0:${port_num}/health"
+START_TIME=$(date +%s)
+while true; do
+    CURRENT_TIME=$(date +%s)
+    ELAPSED=$((CURRENT_TIME - START_TIME))
+    if [ $ELAPSED -ge $TIMEOUT ]; then
+        echo -e "\n服务启动超时：经过 $((TIMEOUT/60)) 分钟服务仍未启动！"
+        stop_processes
+        cat server.log
+        cat log/workerlog.0
+        exit 1
+    fi
+    HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
+    echo -e "\r服务健康检查中... 已等待 ${ELAPSED} 秒，当前状态码：${HTTP_CODE}"
+    if [ "$HTTP_CODE" = "200" ]; then
+        echo -e "\n服务启动成功！耗时 ${ELAPSED} 秒"
+        break
+    else
+        sleep $INTERVAL
+    fi
+done
+
+
+# 执行在线推理验证脚本
+python -m pytest -s tests/ci_use/XPU_45T/run_ep_online.py
+ep_online_exit_code=$?
+echo ep_online_exit_code is ${ep_online_exit_code}
+
+unset BKCL_ENABLE_XDR
+unset BKCL_RDMA_NICS
+unset BKCL_TRACE_TOPO
+unset BKCL_PCIE_RING
+unset XSHMEM_MODE
+unset XSHMEM_QP_NUM_PER_RANK
+unset BKCL_RDMA_VERBS
 stop_processes >kill.log 2>&1
 
-export PYTHONPATH=/work/wq/qq/FastDeploy
-export XPU_VISIBLE_DEVICES="0"
+if [ ${ep_online_exit_code} -ne 0 ]; then
+    echo "server.log"
+    cat server.log
+    cat log/workerlog.0
+    echo "EP4TP1 在线服务相关测试失败，请检查pr代码"
+    exit 1
+fi
+
+echo "============================开始 EP4TP4 all2all 测试!============================"
+sleep 5
+rm -rf log/*
+rm -f core*
+# pkill -9 python #流水线不执行这个
+ipcrm --all=msg
+xpu-smi
+if [[ "$XPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+
+export BKCL_ENABLE_XDR=1
+export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
+export BKCL_TRACE_TOPO=1
+export BKCL_PCIE_RING=1
+export XSHMEM_MODE=1
+export XSHMEM_QP_NUM_PER_RANK=32
+export BKCL_RDMA_VERBS=1
+
+export port_num=$((8188 + XPU_ID * 100))
+# 启动服务
 python -m fastdeploy.entrypoints.openai.api_server \
---model ../../../models/ERNIE-4.5-0.3B-Paddle \
---port 8188 \
---tensor-parallel-size 1 \
---max-model-len 32768 \
---max-num-seqs 128 \
---quantization "wint8" \
---gpu-memory-utilization 0.9 \
---enable-logprob \
---max-logprobs 5
+    --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
+    --port $port_num \
+    --tensor-parallel-size 4 \
+    --enable-expert-parallel \
+    --data-parallel-size 1 \
+    --max-model-len 32768 \
+    --max-num-seqs 64 \
+    --quantization "wint4" \
+    --engine-worker-queue-port $((port_num + 10)) \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
+    --gpu-memory-utilization 0.9 \
+    --load-choices "default" > server.log 2>&1 &
+
+sleep 60
+# 探活
+TIMEOUT=$((15 * 60))
+INTERVAL=10
+ENDPOINT="http://0.0.0.0:${port_num}/health"
+START_TIME=$(date +%s)
+echo "开始服务健康检查，最长等待时间：${TIMEOUT}秒"
+while true; do
+    CURRENT_TIME=$(date +%s)
+    ELAPSED=$((CURRENT_TIME - START_TIME))
+    if [ $ELAPSED -ge $TIMEOUT ]; then
+        echo -e "\n服务启动超时：经过 $((TIMEOUT/60)) 分钟服务仍未启动！"
+        stop_processes
+        cat server.log
+        echo "log/workerlog.0"
+        cat log/workerlog.0
+        exit 1
+    fi
+    HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
+    echo -e "\r服务健康检查中... 已等待 ${ELAPSED} 秒，当前状态码：${HTTP_CODE}"
+    if [ "$HTTP_CODE" = "200" ]; then
+        echo -e "\n服务启动成功！耗时 ${ELAPSED} 秒"
+        break
+    else
+        sleep $INTERVAL
+    fi
+done
+
+
+# 执行在线推理验证脚本
+python -m pytest -s tests/ci_use/XPU_45T/run_ep_online.py
+ep_online_exit_code=$?
+echo ep_online_exit_code is ${ep_online_exit_code}
+
+unset BKCL_ENABLE_XDR
+unset BKCL_RDMA_NICS
+unset BKCL_TRACE_TOPO
+unset BKCL_PCIE_RING
+unset XSHMEM_MODE
+unset XSHMEM_QP_NUM_PER_RANK
+unset BKCL_RDMA_VERBS
+stop_processes >kill.log 2>&1
+
+if [ ${ep_online_exit_code} -ne 0 ]; then
+    echo "server.log"
+    cat server.log
+    cat log/workerlog.0
+    echo "EP4TP4 all2all 在线服务相关测试失败，请检查pr代码"
+    exit 1
+fi