@@ -319,17 +319,252 @@ export XSHMEM_MODE=1
319319export XSHMEM_QP_NUM_PER_RANK=32
320320export BKCL_RDMA_VERBS=1
321321
322+ wget -q https://paddle-qa.bj.bcebos.com/xpu_third_party/xDeepEP.tar.gz
323+ tar -xzf xDeepEP.tar.gz
324+ cd xDeepEP
325+ bash build.sh
326+ cd -
327+
328+ export port_num=$(( 8188 + XPU_ID * 100 ))
329+ # 启动服务
330+ python -m fastdeploy.entrypoints.openai.api_server \
331+ --model ${MODEL_PATH} /ERNIE-4.5-300B-A47B-Paddle \
332+ --port $port_num \
333+ --tensor-parallel-size 4 \
334+ --enable-expert-parallel \
335+ --data-parallel-size 1 \
336+ --max-model-len 32768 \
337+ --max-num-seqs 64 \
338+ --quantization " wint4" \
339+ --engine-worker-queue-port $(( port_num + 10 )) \
340+ --metrics-port $(( port_num + 2 )) \
341+ --cache-queue-port $(( port_num + 47873 )) \
342+ --disable-sequence-parallel-moe \
343+ --gpu-memory-utilization 0.9 \
344+ --load-choices " default" > server.log 2>&1 &
345+
346+ sleep 60
347+ # 探活
348+ TIMEOUT=$(( 15 * 60 ))
349+ INTERVAL=10
350+ ENDPOINT=" http://0.0.0.0:${port_num} /health"
351+ START_TIME=$( date +%s)
352+ echo " 开始服务健康检查,最长等待时间:${TIMEOUT} 秒"
353+ while true ; do
354+ CURRENT_TIME=$( date +%s)
355+ ELAPSED=$(( CURRENT_TIME - START_TIME))
356+ if [ $ELAPSED -ge $TIMEOUT ]; then
357+ echo -e " \n服务启动超时:经过 $(( TIMEOUT/ 60 )) 分钟服务仍未启动!"
358+ stop_processes
359+ cat server.log
360+ echo " log/workerlog.0"
361+ cat log/workerlog.0
362+ exit 1
363+ fi
364+ HTTP_CODE=$( curl -s -o /dev/null -w " %{http_code}" -m 2 " $ENDPOINT " || true)
365+ echo -e " \r服务健康检查中... 已等待 ${ELAPSED} 秒,当前状态码:${HTTP_CODE} "
366+ if [ " $HTTP_CODE " = " 200" ]; then
367+ echo -e " \n服务启动成功!耗时 ${ELAPSED} 秒"
368+ break
369+ else
370+ sleep $INTERVAL
371+ fi
372+ done
373+
374+
375+ # 执行在线推理验证脚本
376+ python -m pytest -s tests/ci_use/XPU_45T/run_ep_online.py
377+ ep_online_exit_code=$?
378+ echo ep_online_exit_code is ${ep_online_exit_code}
379+
380+ unset BKCL_ENABLE_XDR
381+ unset BKCL_RDMA_NICS
382+ unset BKCL_TRACE_TOPO
383+ unset BKCL_PCIE_RING
384+ unset XSHMEM_MODE
385+ unset XSHMEM_QP_NUM_PER_RANK
386+ unset BKCL_RDMA_VERBS
387+ stop_processes > kill.log 2>&1
388+
389+ if [ ${ep_online_exit_code} -ne 0 ]; then
390+ echo " server.log"
391+ cat server.log
392+ cat log/workerlog.0
393+ echo " EP4TP4 在线服务相关测试失败,请检查pr代码"
394+ exit 1
395+ fi
396+
397+ echo " ============================开始 EP4TP1 在线服务测试!============================"
398+ sleep 5
399+ rm -rf log/*
400+ rm -f core*
401+ # pkill -9 python #流水线不执行这个
402+ ipcrm --all=msg
403+ xpu-smi
404+ if [[ " $XPU_ID " == " 0" ]]; then
405+ export XPU_VISIBLE_DEVICES=" 0,1,2,3"
406+ else
407+ export XPU_VISIBLE_DEVICES=" 4,5,6,7"
408+ fi
409+ export BKCL_ENABLE_XDR=1
410+ export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
411+ export BKCL_TRACE_TOPO=1
412+ export BKCL_PCIE_RING=1
413+ export XSHMEM_MODE=1
414+ export XSHMEM_QP_NUM_PER_RANK=32
415+ export BKCL_RDMA_VERBS=1
416+
417+ export port_num=$(( 8188 + XPU_ID * 100 ))
418+ # 启动服务
419+ python -m fastdeploy.entrypoints.openai.api_server \
420+ --model ${MODEL_PATH} /ERNIE-4.5-300B-A47B-Paddle \
421+ --port $port_num \
422+ --tensor-parallel-size 1 \
423+ --enable-expert-parallel \
424+ --data-parallel-size 4 \
425+ --max-model-len 32768 \
426+ --max-num-seqs 64 \
427+ --quantization " wint4" \
428+ --engine-worker-queue-port " $(( port_num + 10 )) ,$(( port_num + 20 )) ,$(( port_num + 30 )) ,$(( port_num + 40 )) " \
429+ --metrics-port $(( port_num + 2 )) \
430+ --cache-queue-port $(( port_num + 47873 )) \
431+ --gpu-memory-utilization 0.9 \
432+ --load-choices " default" > server.log 2>&1 &
433+
434+ sleep 60
435+ # 探活(同上)
436+ TIMEOUT=$(( 15 * 60 ))
437+ INTERVAL=10
438+ ENDPOINT=" http://0.0.0.0:${port_num} /health"
439+ START_TIME=$( date +%s)
440+ while true ; do
441+ CURRENT_TIME=$( date +%s)
442+ ELAPSED=$(( CURRENT_TIME - START_TIME))
443+ if [ $ELAPSED -ge $TIMEOUT ]; then
444+ echo -e " \n服务启动超时:经过 $(( TIMEOUT/ 60 )) 分钟服务仍未启动!"
445+ stop_processes
446+ cat server.log
447+ cat log/workerlog.0
448+ exit 1
449+ fi
450+ HTTP_CODE=$( curl -s -o /dev/null -w " %{http_code}" -m 2 " $ENDPOINT " || true)
451+ echo -e " \r服务健康检查中... 已等待 ${ELAPSED} 秒,当前状态码:${HTTP_CODE} "
452+ if [ " $HTTP_CODE " = " 200" ]; then
453+ echo -e " \n服务启动成功!耗时 ${ELAPSED} 秒"
454+ break
455+ else
456+ sleep $INTERVAL
457+ fi
458+ done
459+
460+
461+ # 执行在线推理验证脚本
462+ python -m pytest -s tests/ci_use/XPU_45T/run_ep_online.py
463+ ep_online_exit_code=$?
464+ echo ep_online_exit_code is ${ep_online_exit_code}
465+
466+ unset BKCL_ENABLE_XDR
467+ unset BKCL_RDMA_NICS
468+ unset BKCL_TRACE_TOPO
469+ unset BKCL_PCIE_RING
470+ unset XSHMEM_MODE
471+ unset XSHMEM_QP_NUM_PER_RANK
472+ unset BKCL_RDMA_VERBS
322473stop_processes > kill.log 2>&1
323474
324- export PYTHONPATH=/work/wq/qq/FastDeploy
325- export XPU_VISIBLE_DEVICES=" 0"
475+ if [ ${ep_online_exit_code} -ne 0 ]; then
476+ echo " server.log"
477+ cat server.log
478+ cat log/workerlog.0
479+ echo " EP4TP1 在线服务相关测试失败,请检查pr代码"
480+ exit 1
481+ fi
482+
483+ echo " ============================开始 EP4TP4 all2all 测试!============================"
484+ sleep 5
485+ rm -rf log/*
486+ rm -f core*
487+ # pkill -9 python #流水线不执行这个
488+ ipcrm --all=msg
489+ xpu-smi
490+ if [[ " $XPU_ID " == " 0" ]]; then
491+ export XPU_VISIBLE_DEVICES=" 0,1,2,3"
492+ else
493+ export XPU_VISIBLE_DEVICES=" 4,5,6,7"
494+ fi
495+
496+ export BKCL_ENABLE_XDR=1
497+ export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
498+ export BKCL_TRACE_TOPO=1
499+ export BKCL_PCIE_RING=1
500+ export XSHMEM_MODE=1
501+ export XSHMEM_QP_NUM_PER_RANK=32
502+ export BKCL_RDMA_VERBS=1
503+
504+ export port_num=$(( 8188 + XPU_ID * 100 ))
505+ # 启动服务
326506python -m fastdeploy.entrypoints.openai.api_server \
327- --model ../../../models/ERNIE-4.5-0.3B-Paddle \
328- --port 8188 \
329- --tensor-parallel-size 1 \
330- --max-model-len 32768 \
331- --max-num-seqs 128 \
332- --quantization " wint8" \
333- --gpu-memory-utilization 0.9 \
334- --enable-logprob \
335- --max-logprobs 5
507+ --model ${MODEL_PATH} /ERNIE-4.5-300B-A47B-Paddle \
508+ --port $port_num \
509+ --tensor-parallel-size 4 \
510+ --enable-expert-parallel \
511+ --data-parallel-size 1 \
512+ --max-model-len 32768 \
513+ --max-num-seqs 64 \
514+ --quantization " wint4" \
515+ --engine-worker-queue-port $(( port_num + 10 )) \
516+ --metrics-port $(( port_num + 2 )) \
517+ --cache-queue-port $(( port_num + 47873 )) \
518+ --gpu-memory-utilization 0.9 \
519+ --load-choices " default" > server.log 2>&1 &
520+
521+ sleep 60
522+ # 探活
523+ TIMEOUT=$(( 15 * 60 ))
524+ INTERVAL=10
525+ ENDPOINT=" http://0.0.0.0:${port_num} /health"
526+ START_TIME=$( date +%s)
527+ echo " 开始服务健康检查,最长等待时间:${TIMEOUT} 秒"
528+ while true ; do
529+ CURRENT_TIME=$( date +%s)
530+ ELAPSED=$(( CURRENT_TIME - START_TIME))
531+ if [ $ELAPSED -ge $TIMEOUT ]; then
532+ echo -e " \n服务启动超时:经过 $(( TIMEOUT/ 60 )) 分钟服务仍未启动!"
533+ stop_processes
534+ cat server.log
535+ echo " log/workerlog.0"
536+ cat log/workerlog.0
537+ exit 1
538+ fi
539+ HTTP_CODE=$( curl -s -o /dev/null -w " %{http_code}" -m 2 " $ENDPOINT " || true)
540+ echo -e " \r服务健康检查中... 已等待 ${ELAPSED} 秒,当前状态码:${HTTP_CODE} "
541+ if [ " $HTTP_CODE " = " 200" ]; then
542+ echo -e " \n服务启动成功!耗时 ${ELAPSED} 秒"
543+ break
544+ else
545+ sleep $INTERVAL
546+ fi
547+ done
548+
549+
550+ # 执行在线推理验证脚本
551+ python -m pytest -s tests/ci_use/XPU_45T/run_ep_online.py
552+ ep_online_exit_code=$?
553+ echo ep_online_exit_code is ${ep_online_exit_code}
554+
555+ unset BKCL_ENABLE_XDR
556+ unset BKCL_RDMA_NICS
557+ unset BKCL_TRACE_TOPO
558+ unset BKCL_PCIE_RING
559+ unset XSHMEM_MODE
560+ unset XSHMEM_QP_NUM_PER_RANK
561+ unset BKCL_RDMA_VERBS
562+ stop_processes > kill.log 2>&1
563+
564+ if [ ${ep_online_exit_code} -ne 0 ]; then
565+ echo " server.log"
566+ cat server.log
567+ cat log/workerlog.0
568+ echo " EP4TP4 all2all 在线服务相关测试失败,请检查pr代码"
569+ exit 1
570+ fi
0 commit comments