From a8b06f4363402faf23bae2b72cfe9c6e96ff50b3 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Wed, 5 Nov 2025 14:47:54 -0800 Subject: [PATCH 01/38] inital commit --- .../build_artifacts/sagemaker_entrypoint.sh | 6 ++ sglang/buildspec-sm.yaml | 53 +++++++++++++ sglang/x86_64/gpu/Dockerfile | 75 +++++++++++++++++++ vllm/buildspec-sm.yml | 14 ++-- 4 files changed, 141 insertions(+), 7 deletions(-) create mode 100644 sglang/build_artifacts/sagemaker_entrypoint.sh create mode 100644 sglang/buildspec-sm.yaml create mode 100644 sglang/x86_64/gpu/Dockerfile diff --git a/sglang/build_artifacts/sagemaker_entrypoint.sh b/sglang/build_artifacts/sagemaker_entrypoint.sh new file mode 100644 index 000000000000..e47f1dd40d87 --- /dev/null +++ b/sglang/build_artifacts/sagemaker_entrypoint.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Check if telemetry file exists before executing +# Execute telemetry script if it exists, suppress errors +bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true + +eval "$@" diff --git a/sglang/buildspec-sm.yaml b/sglang/buildspec-sm.yaml new file mode 100644 index 000000000000..929f5193f0f0 --- /dev/null +++ b/sglang/buildspec-sm.yaml @@ -0,0 +1,53 @@ +account_id: &ACCOUNT_ID +prod_account_id: &PROD_ACCOUNT_ID 763104351884 +region: ®ION +framework: &FRAMEWORK sglang +version: &VERSION "0.5.4" +short_version: &SHORT_VERSION "0.5" +arch_type: &ARCH_TYPE x86_64 +autopatch_build: "False" + +repository_info: + build_repository: &BUILD_REPOSITORY + image_type: &IMAGE_TYPE gpu + root: . + repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK ] + repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ] + release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK ] + release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ] + +context: + build_context: &BUILD_CONTEXT + deep_learning_container: + source: src/deep_learning_container.py + target: deep_learning_container.py + install_efa: + source: scripts/install_efa.sh + target: install_efa.sh + sagemaker_entrypoint: + source: sglang/build_artifacts/sagemaker_entrypoint.sh + target: sagemaker_entrypoint.sh + +images: + sglang_sm: + <<: *BUILD_REPOSITORY + context: + <<: *BUILD_CONTEXT + image_size_baseline: 26000 + device_type: &DEVICE_TYPE gpu + cuda_version: &CUDA_VERSION cu129 + python_version: &DOCKER_PYTHON_VERSION py3 + tag_python_version: &TAG_PYTHON_VERSION py312 + os_version: &OS_VERSION ubuntu22.04 + tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ] + latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ] + docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /Dockerfile ] + target: sglang-sagemaker + build: true + enable_common_stage_build: false + test_configs: + test_platforms: + - sanity + - security + - sagemaker + - eks diff --git a/sglang/x86_64/gpu/Dockerfile b/sglang/x86_64/gpu/Dockerfile new file mode 100644 index 000000000000..fcdecde2e835 --- /dev/null +++ b/sglang/x86_64/gpu/Dockerfile @@ -0,0 +1,75 @@ +FROM lmsysorg/sglang:v0.5.4-cu129-amd64 + +ARG PYTHON="python3" +ARG EFA_VERSION="1.43.3" + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + DLC_CONTAINER_TYPE=base \ + # Python won’t try to write .pyc or .pyo files on the import of source modules + # Force stdin, stdout and stderr to be totally unbuffered. Good for logging + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING=UTF-8 \ + LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \ + PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" + +WORKDIR / + +COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py +COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh +COPY install_efa.sh install_efa.sh + +RUN chmod +x /usr/local/bin/deep_learning_container.py && \ + chmod +x /usr/local/bin/bash_telemetry.sh && \ + echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \ + # OSS compliance - use Python zipfile instead of unzip + HOME_DIR=/root && \ + curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ + python3 -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" && \ + cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ + chmod +x /usr/local/bin/testOSSCompliance && \ + chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ + ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} && \ + # create symlink for python + ln -s /usr/bin/python3 /usr/bin/python && \ + # clean up + rm -rf ${HOME_DIR}/oss_compliance* && \ + rm -rf /tmp/tmp* && \ + rm -rf /tmp/uv* && \ + rm -rf /var/lib/apt/lists/* && \ + rm -rf /root/.cache | true + + +RUN bash install_efa.sh ${EFA_VERSION} && \ + rm install_efa.sh && \ + mkdir -p /tmp/nvjpeg \ + && cd /tmp/nvjpeg \ + && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ + && rm -rf /tmp/nvjpeg \ + # remove cuobjdump and nvdisasm + && rm -rf /usr/local/cuda/bin/cuobjdump* \ + && rm -rf /usr/local/cuda/bin/nvdisasm* + + +# ====================== sagemaker ========================================= +FROM base AS vllm-sagemaker + +RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold && \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get clean + +COPY sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh +RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh + +ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"] diff --git a/vllm/buildspec-sm.yml b/vllm/buildspec-sm.yml index 3cec8812c0f0..59086de3464c 100644 --- a/vllm/buildspec-sm.yml +++ b/vllm/buildspec-sm.yml @@ -25,7 +25,7 @@ context: source: scripts/install_efa.sh target: install_efa.sh sagemaker_entrypoint: - source: vllm/build_artifacts/sagemaker_entrypoint.sh + source: sglang/build_artifacts/sagemaker_entrypoint.sh target: sagemaker_entrypoint.sh images: @@ -45,9 +45,9 @@ images: target: vllm-sagemaker build: true enable_common_stage_build: false - test_configs: - test_platforms: - - sanity - - security - - sagemaker - - eks + # test_configs: + # test_platforms: + # - sanity + # - security + # - sagemaker + # - eks From 2ea4c546ec8e3888325852390c17e4febdc74ca9 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Wed, 5 Nov 2025 23:35:07 +0000 Subject: [PATCH 02/38] update sglang container and entrypoint --- .../build_artifacts/sagemaker_entrypoint.sh | 30 +++++++- sglang/x86_64/gpu/Dockerfile | 77 ++++++++++--------- 2 files changed, 68 insertions(+), 39 deletions(-) diff --git a/sglang/build_artifacts/sagemaker_entrypoint.sh b/sglang/build_artifacts/sagemaker_entrypoint.sh index e47f1dd40d87..b8ed15686348 100644 --- a/sglang/build_artifacts/sagemaker_entrypoint.sh +++ b/sglang/build_artifacts/sagemaker_entrypoint.sh @@ -3,4 +3,32 @@ # Execute telemetry script if it exists, suppress errors bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true -eval "$@" +echo "Starting server" + +SERVER_ARGS="--host 0.0.0.0 --port 8080" + +if [ -n "$TENSOR_PARALLEL_DEGREE" ]; then + SERVER_ARGS="${SERVER_ARGS} --tp-size ${TENSOR_PARALLEL_DEGREE}" +fi + +if [ -n "$DATA_PARALLEL_DEGREE" ]; then + SERVER_ARGS="${SERVER_ARGS} --dp-size ${DATA_PARALLEL_DEGREE}" +fi + +if [ -n "$EXPERT_PARALLEL_DEGREE" ]; then + SERVER_ARGS="${SERVER_ARGS} --ep-size ${EXPERT_PARALLEL_DEGREE}" +fi + +if [ -n "$MEM_FRACTION_STATIC" ]; then + SERVER_ARGS="${SERVER_ARGS} --mem-fraction-static ${MEM_FRACTION_STATIC}" +fi + +if [ -n "$QUANTIZATION" ]; then + SERVER_ARGS="${SERVER_ARGS} --quantization ${QUANTIZATION}" +fi + +if [ -n "$CHUNKED_PREFILL_SIZE" ]; then + SERVER_ARGS="${SERVER_ARGS} --chunked-prefill-size ${CHUNKED_PREFILL_SIZE}" +fi + +python3 -m sglang.launch_server --model-path /opt/ml/model $SERVER_ARGS diff --git a/sglang/x86_64/gpu/Dockerfile b/sglang/x86_64/gpu/Dockerfile index fcdecde2e835..da48233f8d19 100644 --- a/sglang/x86_64/gpu/Dockerfile +++ b/sglang/x86_64/gpu/Dockerfile @@ -1,4 +1,4 @@ -FROM lmsysorg/sglang:v0.5.4-cu129-amd64 +FROM lmsysorg/sglang:v0.5.4-cu129-amd64 AS base ARG PYTHON="python3" ARG EFA_VERSION="1.43.3" @@ -24,50 +24,51 @@ COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh COPY install_efa.sh install_efa.sh -RUN chmod +x /usr/local/bin/deep_learning_container.py && \ - chmod +x /usr/local/bin/bash_telemetry.sh && \ - echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \ - # OSS compliance - use Python zipfile instead of unzip - HOME_DIR=/root && \ - curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ - python3 -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" && \ - cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ - chmod +x /usr/local/bin/testOSSCompliance && \ - chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ - ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} && \ - # create symlink for python - ln -s /usr/bin/python3 /usr/bin/python && \ - # clean up - rm -rf ${HOME_DIR}/oss_compliance* && \ - rm -rf /tmp/tmp* && \ - rm -rf /tmp/uv* && \ - rm -rf /var/lib/apt/lists/* && \ - rm -rf /root/.cache | true +RUN chmod +x /usr/local/bin/deep_learning_container.py \ + && chmod +x /usr/local/bin/bash_telemetry.sh \ + && echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc \ + # OSS compliance - use Python zipfile instead of unzip + && HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && python3 -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + # create symlink for python + && rm -rf /usr/bin/python \ + && ln -s /usr/bin/python3 /usr/bin/python \ + # clean up + && rm -rf ${HOME_DIR}/oss_compliance* \ + && rm -rf /tmp/tmp* \ + && rm -rf /tmp/uv* \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /root/.cache | true -RUN bash install_efa.sh ${EFA_VERSION} && \ - rm install_efa.sh && \ - mkdir -p /tmp/nvjpeg \ - && cd /tmp/nvjpeg \ - && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ - && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ - && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ - && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ - && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ - && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ - && rm -rf /tmp/nvjpeg \ - # remove cuobjdump and nvdisasm - && rm -rf /usr/local/cuda/bin/cuobjdump* \ - && rm -rf /usr/local/cuda/bin/nvdisasm* +RUN bash install_efa.sh ${EFA_VERSION} \ + && rm install_efa.sh \ + && mkdir -p /tmp/nvjpeg \ + && cd /tmp/nvjpeg \ + && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ + && rm -rf /tmp/nvjpeg \ + # remove cuobjdump and nvdisasm + && rm -rf /usr/local/cuda/bin/cuobjdump* \ + && rm -rf /usr/local/cuda/bin/nvdisasm* # ====================== sagemaker ========================================= FROM base AS vllm-sagemaker -RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold && \ - apt-get update && \ - apt-get upgrade -y && \ - apt-get clean +RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \ + && apt-get update \ + && apt-get upgrade -y \ + && apt-get clean COPY sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh From 2b624d32fcd414919b7dd9c2cbda4fc3615aa1b3 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Wed, 5 Nov 2025 15:40:52 -0800 Subject: [PATCH 03/38] add buildspec.yaml --- sglang/buildspec.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 sglang/buildspec.yml diff --git a/sglang/buildspec.yml b/sglang/buildspec.yml new file mode 100644 index 000000000000..f91a3b188954 --- /dev/null +++ b/sglang/buildspec.yml @@ -0,0 +1 @@ +buildspec_pointer: buildspec-sm.yml From d81a34360691238f36b63b62913916c56d901634 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Thu, 6 Nov 2025 00:08:49 +0000 Subject: [PATCH 04/38] tmp test qwen --- sglang/build_artifacts/sagemaker_entrypoint.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sglang/build_artifacts/sagemaker_entrypoint.sh b/sglang/build_artifacts/sagemaker_entrypoint.sh index b8ed15686348..6f2ff6e990cc 100644 --- a/sglang/build_artifacts/sagemaker_entrypoint.sh +++ b/sglang/build_artifacts/sagemaker_entrypoint.sh @@ -31,4 +31,5 @@ if [ -n "$CHUNKED_PREFILL_SIZE" ]; then SERVER_ARGS="${SERVER_ARGS} --chunked-prefill-size ${CHUNKED_PREFILL_SIZE}" fi -python3 -m sglang.launch_server --model-path /opt/ml/model $SERVER_ARGS +# python3 -m sglang.launch_server --model-path /opt/ml/model $SERVER_ARGS +python3 -m sgland.launch_server --model-path Qwen/Qwen3-0.6B --reasoning-parser qwen3 From 5ea61327234bda57d36b0d760ff86efd4f472c73 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Thu, 6 Nov 2025 00:34:02 +0000 Subject: [PATCH 05/38] revert vllm --- sglang/{buildspec-sm.yaml => buildspec-sm.yml} | 12 ++++++------ sglang/buildspec.yaml | 0 vllm/buildspec-sm.yml | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) rename sglang/{buildspec-sm.yaml => buildspec-sm.yml} (93%) delete mode 100644 sglang/buildspec.yaml diff --git a/sglang/buildspec-sm.yaml b/sglang/buildspec-sm.yml similarity index 93% rename from sglang/buildspec-sm.yaml rename to sglang/buildspec-sm.yml index 929f5193f0f0..9242e40e1909 100644 --- a/sglang/buildspec-sm.yaml +++ b/sglang/buildspec-sm.yml @@ -45,9 +45,9 @@ images: target: sglang-sagemaker build: true enable_common_stage_build: false - test_configs: - test_platforms: - - sanity - - security - - sagemaker - - eks + # test_configs: + # test_platforms: + # - sanity + # - security + # - sagemaker + # - eks diff --git a/sglang/buildspec.yaml b/sglang/buildspec.yaml deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/vllm/buildspec-sm.yml b/vllm/buildspec-sm.yml index 59086de3464c..3cec8812c0f0 100644 --- a/vllm/buildspec-sm.yml +++ b/vllm/buildspec-sm.yml @@ -25,7 +25,7 @@ context: source: scripts/install_efa.sh target: install_efa.sh sagemaker_entrypoint: - source: sglang/build_artifacts/sagemaker_entrypoint.sh + source: vllm/build_artifacts/sagemaker_entrypoint.sh target: sagemaker_entrypoint.sh images: @@ -45,9 +45,9 @@ images: target: vllm-sagemaker build: true enable_common_stage_build: false - # test_configs: - # test_platforms: - # - sanity - # - security - # - sagemaker - # - eks + test_configs: + test_platforms: + - sanity + - security + - sagemaker + - eks From 91cf705cf3a343ce5e8c451023647242a0effed8 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Thu, 6 Nov 2025 19:32:29 +0000 Subject: [PATCH 06/38] fix sm path --- sglang/build_artifacts/sagemaker_entrypoint.sh | 3 +-- sglang/x86_64/gpu/Dockerfile | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sglang/build_artifacts/sagemaker_entrypoint.sh b/sglang/build_artifacts/sagemaker_entrypoint.sh index 6f2ff6e990cc..b8ed15686348 100644 --- a/sglang/build_artifacts/sagemaker_entrypoint.sh +++ b/sglang/build_artifacts/sagemaker_entrypoint.sh @@ -31,5 +31,4 @@ if [ -n "$CHUNKED_PREFILL_SIZE" ]; then SERVER_ARGS="${SERVER_ARGS} --chunked-prefill-size ${CHUNKED_PREFILL_SIZE}" fi -# python3 -m sglang.launch_server --model-path /opt/ml/model $SERVER_ARGS -python3 -m sgland.launch_server --model-path Qwen/Qwen3-0.6B --reasoning-parser qwen3 +python3 -m sglang.launch_server --model-path /opt/ml/model $SERVER_ARGS diff --git a/sglang/x86_64/gpu/Dockerfile b/sglang/x86_64/gpu/Dockerfile index da48233f8d19..7fca9140c931 100644 --- a/sglang/x86_64/gpu/Dockerfile +++ b/sglang/x86_64/gpu/Dockerfile @@ -35,9 +35,6 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py \ && chmod +x /usr/local/bin/testOSSCompliance \ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ - # create symlink for python - && rm -rf /usr/bin/python \ - && ln -s /usr/bin/python3 /usr/bin/python \ # clean up && rm -rf ${HOME_DIR}/oss_compliance* \ && rm -rf /tmp/tmp* \ @@ -50,6 +47,7 @@ RUN bash install_efa.sh ${EFA_VERSION} \ && rm install_efa.sh \ && mkdir -p /tmp/nvjpeg \ && cd /tmp/nvjpeg \ + # latest cu12 libnvjpeg available is cu124 && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ From ef528b46cb603e7d3bcdcaf847411ade4f2e5df8 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Thu, 6 Nov 2025 23:11:42 +0000 Subject: [PATCH 07/38] fix sglang entrpoint --- .../build_artifacts/sagemaker_entrypoint.sh | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/sglang/build_artifacts/sagemaker_entrypoint.sh b/sglang/build_artifacts/sagemaker_entrypoint.sh index b8ed15686348..e32f15542d53 100644 --- a/sglang/build_artifacts/sagemaker_entrypoint.sh +++ b/sglang/build_artifacts/sagemaker_entrypoint.sh @@ -5,30 +5,34 @@ bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true echo "Starting server" -SERVER_ARGS="--host 0.0.0.0 --port 8080" +PREFIX="SM_SGLANG_" +ARG_PREFIX="--" -if [ -n "$TENSOR_PARALLEL_DEGREE" ]; then - SERVER_ARGS="${SERVER_ARGS} --tp-size ${TENSOR_PARALLEL_DEGREE}" -fi +ARGS=() -if [ -n "$DATA_PARALLEL_DEGREE" ]; then - SERVER_ARGS="${SERVER_ARGS} --dp-size ${DATA_PARALLEL_DEGREE}" -fi +while IFS='=' read -r key value; do + arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-') -if [ -n "$EXPERT_PARALLEL_DEGREE" ]; then - SERVER_ARGS="${SERVER_ARGS} --ep-size ${EXPERT_PARALLEL_DEGREE}" -fi + ARGS+=("${ARG_PREFIX}${arg_name}") + if [ -n "$value" ]; then + ARGS+=("$value") + fi +done < <(env | grep "^${PREFIX}") -if [ -n "$MEM_FRACTION_STATIC" ]; then - SERVER_ARGS="${SERVER_ARGS} --mem-fraction-static ${MEM_FRACTION_STATIC}" +# Add default port only if not already set +if ! [[ " ${ARGS[@]} " =~ " --port " ]]; then + ARGS+=(--port "${SM_SGLANG_PORT:-8080}") fi -if [ -n "$QUANTIZATION" ]; then - SERVER_ARGS="${SERVER_ARGS} --quantization ${QUANTIZATION}" +# Add default host only if not already set +if ! [[ " ${ARGS[@]} " =~ " --host " ]]; then + ARGS+=(--host "${SM_SGLANG_HOST:-0.0.0.0}") fi -if [ -n "$CHUNKED_PREFILL_SIZE" ]; then - SERVER_ARGS="${SERVER_ARGS} --chunked-prefill-size ${CHUNKED_PREFILL_SIZE}" +# Add default model-path only if not already set +if ! [[ " ${ARGS[@]} " =~ " --model-path " ]]; then + ARGS+=(--model-path "${SM_SGLANG_MODEL_PATH:-/opt/ml/model}") fi -python3 -m sglang.launch_server --model-path /opt/ml/model $SERVER_ARGS +echo "Running command: exec python3 -m sglang.launch_server ${ARGS[@]}" +exec python3 -m sglang.launch_server "${ARGS[@]}" From 62eaf279bf1359684ab21288e20dd332970c3df6 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 10 Nov 2025 20:20:35 +0000 Subject: [PATCH 08/38] finalize dockerfile --- .../build_artifacts/sagemaker_entrypoint.sh | 5 ++ sglang/build_artifacts/start_cuda_compat.sh | 25 ++++++++ sglang/buildspec-sm.yml | 3 + sglang/x86_64/gpu/Dockerfile | 60 +++++++++++++------ 4 files changed, 76 insertions(+), 17 deletions(-) create mode 100644 sglang/build_artifacts/start_cuda_compat.sh diff --git a/sglang/build_artifacts/sagemaker_entrypoint.sh b/sglang/build_artifacts/sagemaker_entrypoint.sh index e32f15542d53..aa5b2bba8089 100644 --- a/sglang/build_artifacts/sagemaker_entrypoint.sh +++ b/sglang/build_artifacts/sagemaker_entrypoint.sh @@ -3,6 +3,11 @@ # Execute telemetry script if it exists, suppress errors bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true +CUDA_AVAILABLE=$(python3 -c "import torch; print(torch.cuda.is_available())") +if [ "$CUDA_AVAILABLE" = "True" ]; then + bash /usr/local/bin/start_cuda_compat.sh +fi + echo "Starting server" PREFIX="SM_SGLANG_" diff --git a/sglang/build_artifacts/start_cuda_compat.sh b/sglang/build_artifacts/start_cuda_compat.sh new file mode 100644 index 000000000000..d09eac2dec8f --- /dev/null +++ b/sglang/build_artifacts/start_cuda_compat.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +verlte() { + [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] +} + +COMPAT_FILE=/usr/local/cuda/compat/lib.real/libcuda.so.1 +if [ -f $COMPAT_FILE ]; then + CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink $COMPAT_FILE | cut -d'.' -f 3-) + echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" + NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) + if [ -z "$NVIDIA_DRIVER_VERSION" ]; then + NVIDIA_DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0 2>/dev/null || true) + fi + echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}" + if verlte $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then + echo "Adding CUDA compat to LD_LIBRARY_PATH" + export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH + echo $LD_LIBRARY_PATH + else + echo "Skipping CUDA compat setup as newer NVIDIA driver is installed" + fi +else + echo "Skipping CUDA compat setup as package not found" +fi diff --git a/sglang/buildspec-sm.yml b/sglang/buildspec-sm.yml index 9242e40e1909..3fd07df7ea61 100644 --- a/sglang/buildspec-sm.yml +++ b/sglang/buildspec-sm.yml @@ -24,6 +24,9 @@ context: install_efa: source: scripts/install_efa.sh target: install_efa.sh + start_cuda_compat: + source: sglang/build_artifacts/start_cuda_compat.sh + target: start_cuda_compat.sh sagemaker_entrypoint: source: sglang/build_artifacts/sagemaker_entrypoint.sh target: sagemaker_entrypoint.sh diff --git a/sglang/x86_64/gpu/Dockerfile b/sglang/x86_64/gpu/Dockerfile index 7fca9140c931..0a9fb9e5d268 100644 --- a/sglang/x86_64/gpu/Dockerfile +++ b/sglang/x86_64/gpu/Dockerfile @@ -1,4 +1,8 @@ -FROM lmsysorg/sglang:v0.5.4-cu129-amd64 AS base +FROM lmsysorg/sglang:v0.5.5-cu129-amd64 AS base + +# ==================================================== +# ====================== common ====================== +# ==================================================== ARG PYTHON="python3" ARG EFA_VERSION="1.43.3" @@ -20,29 +24,28 @@ ENV DEBIAN_FRONTEND=noninteractive \ WORKDIR / +# Copy artifacts +# =============== COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh COPY install_efa.sh install_efa.sh +COPY start_cuda_compat.sh /usr/local/bin/start_cuda_compat.sh RUN chmod +x /usr/local/bin/deep_learning_container.py \ && chmod +x /usr/local/bin/bash_telemetry.sh \ - && echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc \ - # OSS compliance - use Python zipfile instead of unzip - && HOME_DIR=/root \ - && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ - && python3 -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" \ - && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ - && chmod +x /usr/local/bin/testOSSCompliance \ - && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ - && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ - # clean up - && rm -rf ${HOME_DIR}/oss_compliance* \ - && rm -rf /tmp/tmp* \ - && rm -rf /tmp/uv* \ - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /root/.cache | true + && chmod +x /usr/local/bin/start_cuda_compat.sh +# Install cuda compat +# ==================== +# RUN apt-get update \ +# && apt-get -y upgrade --only-upgrade systemd \ +# && apt-get install -y --allow-change-held-packages --no-install-recommends \ +# cuda-compat-12-9 \ +# && rm -rf /var/lib/apt/lists/* \ +# && apt-get clean +# Install EFA and remove vulnerable nvjpeg +# ========================================= RUN bash install_efa.sh ${EFA_VERSION} \ && rm install_efa.sh \ && mkdir -p /tmp/nvjpeg \ @@ -55,12 +58,35 @@ RUN bash install_efa.sh ${EFA_VERSION} \ && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ && rm -rf /tmp/nvjpeg \ + # create symlink for python + && rm -rf /usr/bin/python \ + && ln -s /usr/bin/python3 /usr/bin/python \ # remove cuobjdump and nvdisasm && rm -rf /usr/local/cuda/bin/cuobjdump* \ && rm -rf /usr/local/cuda/bin/nvdisasm* +# Run OSS compliance script +# ========================== +RUN echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc \ + # OSS compliance - use Python zipfile instead of unzip + && HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && python3 -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + # clean up + && rm -rf ${HOME_DIR}/oss_compliance* \ + && rm -rf /tmp/tmp* \ + && rm -rf /tmp/uv* \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /root/.cache | true + +# ======================================================= +# ====================== sagemaker ====================== +# ======================================================= -# ====================== sagemaker ========================================= FROM base AS vllm-sagemaker RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \ From 2031b398d03a69f1c026e129abff4eb1098a3a8d Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 10 Nov 2025 22:56:06 +0000 Subject: [PATCH 09/38] add toml file --- dlc_developer_config.toml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index ce4cad98d4e8..54d62e91dd5e 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -36,8 +36,8 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. -# available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] +# available frameworks - ["base", "vllm", "sglang" "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] +build_frameworks = ["sglang"] # By default we build both training and inference containers. Set true/false values to determine which to build. @@ -58,7 +58,7 @@ notify_test_failures = false [test] # Set to true to use the new test structure path for frameworks # Off by default (set to false) -use_new_test_structure = false +use_new_test_structure = true ### On by default sanity_tests = true @@ -90,7 +90,7 @@ enable_ipv6 = false ### b. Configure the default security group to allow SSH traffic using IPv4 ### ### 3. Create an EFA-enabled security group: -### a. Follow 'Step 1: Prepare an EFA-enabled security group' in: +### a. Follow 'Step 1: Prepare an EFA-enabled security group' in: ### https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html#efa-start-security ### b. Configure this security group to also allow SSH traffic via IPv4 ipv6_vpc_name = "" @@ -185,3 +185,6 @@ dlc-pr-tensorflow-2-eia-inference = "" # vllm dlc-pr-vllm = "" + +# sglang +dlc-pr-sglang = "" From 1352c62686df4829fa8ea3baf5484f0145437638 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 10 Nov 2025 23:19:15 +0000 Subject: [PATCH 10/38] add get job type func --- src/image_builder.py | 45 +++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/image_builder.py b/src/image_builder.py index 62357442338f..bbe3bf47705c 100644 --- a/src/image_builder.py +++ b/src/image_builder.py @@ -15,28 +15,25 @@ import concurrent.futures import datetime +import itertools import os import re import tempfile - from copy import deepcopy import constants -import utils -import itertools import patch_helper - -from codebuild_environment import get_codebuild_project_name, get_cloned_folder_path -from config import is_build_enabled, is_autopatch_build_enabled +import utils +from buildspec import Buildspec +from codebuild_environment import get_cloned_folder_path, get_codebuild_project_name +from common_stage_image import CommonStageImage +from config import is_autopatch_build_enabled, is_build_enabled from context import Context -from metrics import Metrics from image import DockerImage -from common_stage_image import CommonStageImage -from buildspec import Buildspec +from metrics import Metrics from output import OutputFormatter from utils import get_dummy_boto_client - FORMATTER = OutputFormatter(constants.PADDING) build_context = os.getenv("BUILD_CONTEXT") @@ -241,17 +238,7 @@ def image_builder(buildspec, image_types=[], device_types=[]): ) # Determine job_type (inference, training, or base) based on the image repository URI. # This is used to set the job_type label on the container image. - if "training" in image_repo_uri: - label_job_type = "training" - elif "inference" in image_repo_uri: - label_job_type = "inference" - elif "base" in image_repo_uri or "vllm" in image_repo_uri: - label_job_type = "general" - else: - raise RuntimeError( - f"Cannot find inference, training or base job type in {image_repo_uri}. " - f"This is required to set job_type label." - ) + label_job_type = get_job_type(image_repo_uri) bash_template_file = os.path.join( os.sep, get_cloned_folder_path(), "miscellaneous_scripts", "bash_telemetry.sh" @@ -690,3 +677,19 @@ def modify_repository_name_for_context(image_repo_uri, build_context): constants.PR_REPO_PREFIX, constants.NIGHTLY_REPO_PREFIX ) return "/".join(repo_uri_values) + + +def get_job_type(uri): + general_types = {"base", "vllm"} + + if "training" in uri: + return "training" + if "inference" in uri: + return "inference" + if any(t in uri for t in general_types): + return "general" + + raise RuntimeError( + f"Cannot determine job type from {uri}. " + f"Expected training, inference, or one of: {', '.join(general_types)}" + ) From f803c15fc98ef84b4910d9df8c06e4adb70875cd Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 10 Nov 2025 23:32:39 +0000 Subject: [PATCH 11/38] use dict job type --- src/image_builder.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/image_builder.py b/src/image_builder.py index bbe3bf47705c..433519588ccb 100644 --- a/src/image_builder.py +++ b/src/image_builder.py @@ -679,17 +679,20 @@ def modify_repository_name_for_context(image_repo_uri, build_context): return "/".join(repo_uri_values) -def get_job_type(uri): - general_types = {"base", "vllm"} - - if "training" in uri: - return "training" - if "inference" in uri: - return "inference" - if any(t in uri for t in general_types): - return "general" +def get_job_type(image_repo_uri): + job_type_mapping = { + "training": "training", + "inference": "inference", + "base": "general", + "vllm": "general", + # "sglang": "general", + } + + for key, job_type in job_type_mapping.items(): + if key in image_repo_uri: + return job_type raise RuntimeError( - f"Cannot determine job type from {uri}. " - f"Expected training, inference, or one of: {', '.join(general_types)}" + f"Cannot determine job type from {image_repo_uri}. " + f"Expected one of: {', '.join(job_type_mapping.keys())}" ) From b6716a2003ff154e37e7219e6a6fab69dd269ee0 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 10 Nov 2025 23:35:11 +0000 Subject: [PATCH 12/38] add sglang --- src/image_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/image_builder.py b/src/image_builder.py index 433519588ccb..f101f33fa7fb 100644 --- a/src/image_builder.py +++ b/src/image_builder.py @@ -685,7 +685,7 @@ def get_job_type(image_repo_uri): "inference": "inference", "base": "general", "vllm": "general", - # "sglang": "general", + "sglang": "general", } for key, job_type in job_type_mapping.items(): From ca48eb43b0508b24b1fcc05da9be892dfb5e9569 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 10 Nov 2025 23:41:45 +0000 Subject: [PATCH 13/38] fix target name --- sglang/buildspec-sm.yml | 2 +- sglang/x86_64/gpu/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sglang/buildspec-sm.yml b/sglang/buildspec-sm.yml index 3fd07df7ea61..72afd0962c1c 100644 --- a/sglang/buildspec-sm.yml +++ b/sglang/buildspec-sm.yml @@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID prod_account_id: &PROD_ACCOUNT_ID 763104351884 region: ®ION framework: &FRAMEWORK sglang -version: &VERSION "0.5.4" +version: &VERSION "0.5.5" short_version: &SHORT_VERSION "0.5" arch_type: &ARCH_TYPE x86_64 autopatch_build: "False" diff --git a/sglang/x86_64/gpu/Dockerfile b/sglang/x86_64/gpu/Dockerfile index 0a9fb9e5d268..e08606eeaf6e 100644 --- a/sglang/x86_64/gpu/Dockerfile +++ b/sglang/x86_64/gpu/Dockerfile @@ -87,7 +87,7 @@ RUN echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc \ # ====================== sagemaker ====================== # ======================================================= -FROM base AS vllm-sagemaker +FROM base AS sglang-sagemaker RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \ && apt-get update \ From c6927adc8bb5dcb6a43d51117fcaf8890380b63a Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 00:05:10 +0000 Subject: [PATCH 14/38] add tests to buildspec --- sglang/buildspec-sm.yml | 11 +- test/sglang/sagemaker/test_sm_endpoint.py | 225 ++++++++++++++++++++++ 2 files changed, 230 insertions(+), 6 deletions(-) create mode 100644 test/sglang/sagemaker/test_sm_endpoint.py diff --git a/sglang/buildspec-sm.yml b/sglang/buildspec-sm.yml index 72afd0962c1c..4bfc8466365f 100644 --- a/sglang/buildspec-sm.yml +++ b/sglang/buildspec-sm.yml @@ -48,9 +48,8 @@ images: target: sglang-sagemaker build: true enable_common_stage_build: false - # test_configs: - # test_platforms: - # - sanity - # - security - # - sagemaker - # - eks + test_configs: + test_platforms: + - sanity + - security + - sagemaker diff --git a/test/sglang/sagemaker/test_sm_endpoint.py b/test/sglang/sagemaker/test_sm_endpoint.py new file mode 100644 index 000000000000..afa1bae4ac49 --- /dev/null +++ b/test/sglang/sagemaker/test_sm_endpoint.py @@ -0,0 +1,225 @@ +import json +import time + +import boto3 +import sagemaker +from sagemaker import serializers +from sagemaker.config.config_schema import MODEL +from sagemaker.model import Model +from sagemaker.predictor import Predictor + +# Fixed parameters +MODEL_ID = "Qwen/Qwen3-0.6B" +AWS_REGION = "us-west-2" +INSTANCE_TYPE = "ml.g5.12xlarge" +ROLE = "SageMakerRole" + + +def get_secret_hf_token(): + print("Retrieving HuggingFace token from AWS Secrets Manager...") + secret_name = "test/hf_token" + region_name = "us-west-2" + + session = boto3.session.Session() + client = session.client(service_name="secretsmanager", region_name=region_name) + try: + get_secret_value_response = client.get_secret_value(SecretId=secret_name) + print("Successfully retrieved HuggingFace token") + except ClientError as e: + print(f"Failed to retrieve HuggingFace token: {e}") + raise e + + response = json.loads(get_secret_value_response["SecretString"]) + return response + + +def deploy_endpoint(name, image_uri, role, instance_type): + try: + print(f"Starting deployment of endpoint: {name}") + print(f"Using image: {image_uri}") + print(f"Instance type: {instance_type}") + + response = get_secret_hf_token() + hf_token = response.get("HF_TOKEN") + print("Creating SageMaker model...") + + model = Model( + name=name, + image_uri=image_uri, + role=role, + env={ + "SM_SGLANG_MODEL_PATH": MODEL_ID, + "SM_SGLANG_REASONING_PARSER": "qwen3", + "HF_TOKEN": hf_token, + }, + ) + print("Model created successfully") + print("Starting endpoint deployment (this may take 10-15 minutes)...") + + endpoint_config = model.deploy( + instance_type=instance_type, + initial_instance_count=1, + endpoint_name=name, + inference_ami_version="al2-ami-sagemaker-inference-gpu-3-1", + wait=True, + ) + print("Endpoint deployment completed successfully") + return True + except Exception as e: + print(f"Deployment failed: {str(e)}") + return False + + +def invoke_endpoint(endpoint_name, prompt, max_tokens=2400, temperature=0.01): + try: + print(f"Creating predictor for endpoint: {endpoint_name}") + predictor = Predictor( + endpoint_name=endpoint_name, + serializer=serializers.JSONSerializer(), + ) + + payload = { + "model": MODEL_ID, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": max_tokens, + "temperature": temperature, + "top_p": 0.9, + "top_k": 50, + } + print(f"Sending inference request with prompt: '{prompt[:50]}...'") + print(f"Request parameters: max_tokens={max_tokens}, temperature={temperature}") + + response = predictor.predict(payload) + print("Inference request completed successfully") + + if isinstance(response, bytes): + response = response.decode("utf-8") + + if isinstance(response, str): + try: + response = json.loads(response) + except json.JSONDecodeError: + print("Warning: Response is not valid JSON. Returning as string.") + + return response + except Exception as e: + print(f"Inference failed: {str(e)}") + return None + + +def delete_endpoint(endpoint_name): + try: + sagemaker_client = boto3.client("sagemaker", region_name=AWS_REGION) + + print(f"Deleting endpoint: {endpoint_name}") + sagemaker_client.delete_endpoint(EndpointName=endpoint_name) + + print(f"Deleting endpoint configuration: {endpoint_name}") + sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name) + + print(f"Deleting model: {endpoint_name}") + sagemaker_client.delete_model(ModelName=endpoint_name) + + print("Successfully deleted all resources") + return True + except Exception as e: + print(f"Error during deletion: {str(e)}") + return False + + +def wait_for_endpoint(endpoint_name, timeout=1800): + sagemaker_client = boto3.client("sagemaker", region_name=AWS_REGION) + start_time = time.time() + + while time.time() - start_time < timeout: + try: + response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name) + status = response["EndpointStatus"] + + if status == "InService": + return True + elif status in ["Failed", "OutOfService"]: + print(f"Endpoint creation failed with status: {status}") + return False + + print(f"Endpoint status: {status}. Waiting...") + time.sleep(30) + except Exception as e: + print(f"Error checking endpoint status: {str(e)}") + return False + + print("Timeout waiting for endpoint to be ready") + return False + + +def test_sglang_on_sagemaker(image_uri, endpoint_name): + print("\n" + "=" * 80) + print("STARTING SGLang SAGEMAKER ENDPOINT TEST".center(80)) + print("=" * 80) + print(f"Test Configuration:") + print(f" Image URI: {image_uri}") + print(f" Endpoint name: {endpoint_name}") + print(f" Region: {AWS_REGION}") + print(f" Instance type: {INSTANCE_TYPE}") + print("\n" + "-" * 80) + print("PHASE 1: ENDPOINT DEPLOYMENT".center(80)) + print("-" * 80) + + if not deploy_endpoint(endpoint_name, image_uri, ROLE, INSTANCE_TYPE): + print("\n" + "=" * 80) + print("DEPLOYMENT FAILED - CLEANING UP".center(80)) + print("=" * 80) + # Cleanup any partially created resources + delete_endpoint(endpoint_name) + raise Exception("SageMaker endpoint deployment failed") + + print("\n" + "-" * 80) + print("PHASE 2: WAITING FOR ENDPOINT READINESS".center(80)) + print("-" * 80) + if not wait_for_endpoint(endpoint_name): + print("\nEndpoint failed to become ready. Initiating cleanup...") + delete_endpoint(endpoint_name) + print("\n" + "=" * 80) + print("ENDPOINT READINESS FAILED".center(80)) + print("=" * 80) + raise Exception("SageMaker endpoint failed to become ready") + + print("\nEndpoint is ready for inference!") + print("\n" + "-" * 80) + print("PHASE 3: TESTING INFERENCE".center(80)) + print("-" * 80) + test_prompt = "Write a python script to calculate square of n" + + response = invoke_endpoint( + endpoint_name=endpoint_name, prompt=test_prompt, max_tokens=2400, temperature=0.01 + ) + + if response: + print("\n Inference test successful!") + print("\n Response from endpoint:") + print("-" * 40) + if isinstance(response, (dict, list)): + print(json.dumps(response, indent=2)) + else: + print(response) + print("-" * 40) + + print("\n" + "-" * 80) + print(" PHASE 4: CLEANUP".center(80)) + print("-" * 80) + if delete_endpoint(endpoint_name): + print("\n" + "=" * 80) + print(" TEST COMPLETED SUCCESSFULLY! ".center(80)) + print("=" * 80) + else: + print("\n Cleanup failed") + else: + print("\n No response received from the endpoint.") + print("\n" + "-" * 80) + print(" PHASE 4: CLEANUP (FAILED INFERENCE)".center(80)) + print("-" * 80) + delete_endpoint(endpoint_name) + print("\n" + "=" * 80) + print(" TEST FAILED ".center(80)) + print("=" * 80) + raise Exception("SageMaker endpoint inference test failed") From dd97fc1e7dfb24b4e6d157711e7187b940ca2919 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 00:36:22 +0000 Subject: [PATCH 15/38] fix test runner and get framework func --- test/test_utils/__init__.py | 1 + test/testrunner.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/test/test_utils/__init__.py b/test/test_utils/__init__.py index 6390fbde74a6..7e2bd04f287a 100644 --- a/test/test_utils/__init__.py +++ b/test/test_utils/__init__.py @@ -1940,6 +1940,7 @@ def get_framework_from_image_uri(image_uri): "autogluon": "autogluon", "base": "base", "vllm": "vllm", + "sglang": "sglang", } for image_pattern, framework in framework_map.items(): diff --git a/test/testrunner.py b/test/testrunner.py index dcab2f3b3b24..ee5196e1d84a 100644 --- a/test/testrunner.py +++ b/test/testrunner.py @@ -391,6 +391,10 @@ def main(): run_vllm_tests(f"{specific_test_type}", all_image_list, new_test_structure_enabled) return + if framework == "sglang": + run_new_tests() + return + eks_cluster_name = f"dlc-{framework}-{build_context}" eks_utils.eks_setup() if eks_utils.is_eks_cluster_active(eks_cluster_name): @@ -494,6 +498,10 @@ def main(): run_vllm_tests("sagemaker", all_image_list, new_test_structure_enabled) return + if "sglang" in dlc_images: + run_new_tests() + return + if "habana" in dlc_images: LOGGER.info(f"Skipping SM tests for Habana. Images: {dlc_images}") # Creating an empty file for because codebuild job fails without it From e24c955fc2f27e9bcc809aa763ff145cf3b5fd80 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 00:54:27 +0000 Subject: [PATCH 16/38] add job type --- test/test_utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_utils/__init__.py b/test/test_utils/__init__.py index 7e2bd04f287a..c9d058b9c42f 100644 --- a/test/test_utils/__init__.py +++ b/test/test_utils/__init__.py @@ -2068,7 +2068,7 @@ def get_job_type_from_image(image_uri): :return: Job Type """ tested_job_type = None - allowed_job_types = ("training", "inference", "base", "vllm") + allowed_job_types = ("training", "inference", "base", "vllm", "sglang") for job_type in allowed_job_types: if job_type in image_uri: tested_job_type = job_type From b4444a97419f75e78d0a14cd3ae3b434e7e97a46 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 02:32:14 +0000 Subject: [PATCH 17/38] fix sanity and security tests --- sglang/x86_64/gpu/Dockerfile | 2 + test/dlc_tests/sanity/test_anaconda.py | 2 +- .../test_boottime_container_security.py | 6 +- test/dlc_tests/sanity/test_dlc_labels.py | 6 +- test/dlc_tests/sanity/test_ecr_scan.py | 6 +- test/dlc_tests/sanity/test_pre_release.py | 99 ++++++++++--------- .../sanity/test_safety_report_file.py | 7 +- 7 files changed, 70 insertions(+), 58 deletions(-) diff --git a/sglang/x86_64/gpu/Dockerfile b/sglang/x86_64/gpu/Dockerfile index e08606eeaf6e..6191298b88c3 100644 --- a/sglang/x86_64/gpu/Dockerfile +++ b/sglang/x86_64/gpu/Dockerfile @@ -94,6 +94,8 @@ RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark ho && apt-get upgrade -y \ && apt-get clean +RUN rm -rf /tmp/* + COPY sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh diff --git a/test/dlc_tests/sanity/test_anaconda.py b/test/dlc_tests/sanity/test_anaconda.py index 50ee479ef756..520a68d3ae7d 100644 --- a/test/dlc_tests/sanity/test_anaconda.py +++ b/test/dlc_tests/sanity/test_anaconda.py @@ -15,7 +15,7 @@ def test_repo_anaconda_not_present(image): # First check to see if image has conda installed, if not, skip test since no packages installed from conda present conda_present = test_utils.run_cmd_on_container( - container_name, ctx, 'find . -name conda -not -path "**/.github/*" -ignore_readdir_race' + container_name, ctx, 'find . -name conda -not -path "**/.github/*" -not -path "**/.oh-my-zsh/*" -ignore_readdir_race' ).stdout.strip() if not conda_present: pytest.skip(f"Image {image} does not have conda installed, skipping test.") diff --git a/test/dlc_tests/sanity/test_boottime_container_security.py b/test/dlc_tests/sanity/test_boottime_container_security.py index 3be83c12b88c..ded6c61b0e3d 100644 --- a/test/dlc_tests/sanity/test_boottime_container_security.py +++ b/test/dlc_tests/sanity/test_boottime_container_security.py @@ -1,5 +1,4 @@ import pytest - from invoke import run @@ -7,9 +6,10 @@ @pytest.mark.model("N/A") @pytest.mark.canary("Run security test regularly on production images") def test_security(image): - if "vllm" in image: + upstream_types = ["vllm"] + if any(t in image for t in upstream_types): pytest.skip( - "vLLM images do not require pip check as they are managed by vLLM devs. Skipping test." + f"{', '.join(upstream_types)} images do not require boot time security check as they are managed by upstream devs. Skipping test." ) repo_name, image_tag = image.split("/")[-1].split(":") container_name = f"{repo_name}-{image_tag}-security" diff --git a/test/dlc_tests/sanity/test_dlc_labels.py b/test/dlc_tests/sanity/test_dlc_labels.py index 6d1d10e05fb9..87f68922eb9d 100644 --- a/test/dlc_tests/sanity/test_dlc_labels.py +++ b/test/dlc_tests/sanity/test_dlc_labels.py @@ -31,10 +31,12 @@ def test_dlc_major_version_label(image, region): @pytest.mark.integration("dlc_labels") @pytest.mark.model("N/A") def test_dlc_standard_labels(image, region): - if "vllm" in image: + upstream_types = ["vllm", "sglang"] + if any(t in image for t in upstream_types): pytest.skip( - "vLLM images do not require test_dlc_standard_labels check as they are managed by vLLM devs. Skipping test." + f"{', '.join(upstream_types)} images do not require test_dlc_standard_labels check as they are managed by upstream devs. Skipping test." ) + customer_type_label_prefix = "ec2" if test_utils.is_ec2_image(image) else "sagemaker" framework, fw_version = test_utils.get_framework_and_version_from_tag(image) diff --git a/test/dlc_tests/sanity/test_ecr_scan.py b/test/dlc_tests/sanity/test_ecr_scan.py index 3a92c66415ed..2d81ff3958eb 100644 --- a/test/dlc_tests/sanity/test_ecr_scan.py +++ b/test/dlc_tests/sanity/test_ecr_scan.py @@ -435,10 +435,12 @@ def test_ecr_enhanced_scan(image, ecr_client, sts_client, region): :param sts_client: boto3 Client for STS :param region: str Name of region where test is executed """ - if "vllm" in image: + upstream_types = ["vllm", "sglang"] + if any(t in image for t in upstream_types): pytest.skip( - "vLLM images do not require test_ecr_enhanced_scan check as they are managed by vLLM devs. Skipping test." + f"{', '.join(upstream_types)} images do not require test_ecr_enhanced_scan check as they are managed by upstream devs. Skipping test." ) + LOGGER.info(f"Running test_ecr_enhanced_scan for image {image}") image = conduct_preprocessing_of_images_before_running_ecr_scans( image, ecr_client, sts_client, region diff --git a/test/dlc_tests/sanity/test_pre_release.py b/test/dlc_tests/sanity/test_pre_release.py index 048bebd7e299..793405a12020 100644 --- a/test/dlc_tests/sanity/test_pre_release.py +++ b/test/dlc_tests/sanity/test_pre_release.py @@ -1,58 +1,55 @@ +import filecmp +import json import os import re import subprocess -import botocore -import boto3 -import json import time - -from packaging.version import Version -from packaging.specifiers import SpecifierSet - -import pytest -import requests -import filecmp - -from urllib3.util.retry import Retry -from invoke.context import Context -from botocore.exceptions import ClientError - -from src.buildspec import Buildspec -import src.utils as src_utils from test.test_utils import ( - LOGGER, + AL2023_BASE_DLAMI_ARM64_US_WEST_2, CONTAINER_TESTS_PREFIX, + LOGGER, + DockerImagePullException, ec2, + execute_env_variables_test, + get_account_id_from_image_uri, + get_all_the_tags_of_an_image_from_ecr, + get_buildspec_path, get_container_name, + get_cuda_version_from_tag, get_framework_and_version_from_tag, - get_neuron_sdk_version_from_tag, + get_image_spec_from_buildspec, + get_installed_python_packages_using_image_uri, + get_installed_python_packages_with_version, + get_labels_from_ecr_image, get_neuron_release_manifest, + get_neuron_sdk_version_from_tag, + get_python_version_from_image_uri, + get_pytorch_version_from_autogluon_image, + get_region_from_image_uri, + get_repository_and_tag_from_image_uri, + get_repository_local_path, is_canary_context, is_dlc_cicd_context, + is_nightly_context, + login_to_ecr_registry, run_cmd_on_container, start_container, stop_and_remove_container, - get_repository_local_path, - get_repository_and_tag_from_image_uri, - get_python_version_from_image_uri, - get_pytorch_version_from_autogluon_image, - get_cuda_version_from_tag, - get_labels_from_ecr_image, - get_buildspec_path, - get_all_the_tags_of_an_image_from_ecr, - is_nightly_context, - execute_env_variables_test, - AL2023_BASE_DLAMI_ARM64_US_WEST_2, - get_installed_python_packages_with_version, - login_to_ecr_registry, - get_account_id_from_image_uri, - get_region_from_image_uri, - DockerImagePullException, - get_installed_python_packages_with_version, - get_installed_python_packages_using_image_uri, - get_image_spec_from_buildspec, ) +import boto3 +import botocore +import pytest +import requests +from botocore.exceptions import ClientError +from invoke.context import Context +from packaging.specifiers import SpecifierSet +from packaging.version import Version +from urllib3.util.retry import Retry + +import src.utils as src_utils +from src.buildspec import Buildspec + def tail_n_lines(fname, n): try: @@ -112,9 +109,10 @@ def test_stray_files(image): :param image: ECR image URI """ - if "vllm" in image: + upstream_types = ["vllm", "sglang"] + if any(t in image for t in upstream_types): pytest.skip( - "vLLM images do not require pip check as they are managed by vLLM devs. Skipping test." + f"{', '.join(upstream_types)} images do not require pip check as they are managed by upstream devs. Skipping test." ) ctx = Context() @@ -585,9 +583,10 @@ def test_pip_check(image): :param image: ECR image URI """ - if "vllm" in image: + upstream_types = ["vllm", "sglang"] + if any(t in image for t in upstream_types): pytest.skip( - "vLLM images do not require pip check as they are managed by vLLM devs. Skipping test." + f"{', '.join(upstream_types)} images do not require pip check as they are managed by upstream devs. Skipping test." ) allowed_exceptions = [] @@ -733,9 +732,10 @@ def test_cuda_paths(gpu): :param gpu: gpu image uris """ image = gpu - if "base" in image or "vllm" in image: + general_types = ["base", "vllm", "sglang"] + if any(t in image for t in general_types): pytest.skip( - "Base/vLLM DLC doesn't have the same directory structure and buildspec as other images" + f"{', '.join(general_types)} DLC doesn't have the same directory structure and buildspec as other images" ) if "example" in image: pytest.skip("Skipping Example Dockerfiles which are not explicitly tied to a cuda version") @@ -1070,8 +1070,9 @@ def test_license_file(image): """ Check that license file within the container is readable and valid """ - if "base" in image or "vllm" in image: - pytest.skip("Base DLC has doesn't embed license.txt. Skipping test.") + general_types = ["base", "vllm", "sglang"] + if any(t in image for t in general_types): + pytest.skip(f"{', '.join(general_types)} DLC doesn't embed license.txt. Skipping test.") framework, version = get_framework_and_version_from_tag(image) @@ -1194,8 +1195,10 @@ def test_core_package_version(image): In this test, we ensure that if a core_packages.json file exists for an image, the packages installed in the image satisfy the version constraints specified in the core_packages.json file. """ - if "base" in image or "vllm" in image: - pytest.skip("Base/vLLM images do not have core packages. Skipping test.") + general_types = ["base", "vllm", "sglang"] + if any(t in image for t in general_types): + pytest.skip(f"{', '.join(general_types)} images do not have core packages. Skipping test.") + core_packages_path = src_utils.get_core_packages_path(image) if not os.path.exists(core_packages_path): pytest.skip(f"Core packages file {core_packages_path} does not exist for {image}") diff --git a/test/dlc_tests/sanity/test_safety_report_file.py b/test/dlc_tests/sanity/test_safety_report_file.py index df78a42e2e30..f8860d53784a 100644 --- a/test/dlc_tests/sanity/test_safety_report_file.py +++ b/test/dlc_tests/sanity/test_safety_report_file.py @@ -73,10 +73,13 @@ def test_safety_file_exists_and_is_valid(image): pytest.skip( "Base images do not require safety file as there isn't much python libs in it. Skipping test." ) - if "vllm" in image: + + upstream_types = ["vllm", "sglang"] + if any(t in image for t in upstream_types): pytest.skip( - "vllm images do not require safety file as they are managed by vLLM devs. Skipping test." + f"{', '.join(upstream_types)} images do not require safety file as they are managed by upstream devs. Skipping test." ) + repo_name, image_tag = image.split("/")[-1].split(":") # Make sure this container name doesn't conflict with the safety check test container name container_name = f"{repo_name}-{image_tag}-safety-file" From d9bf7c1fb8db45d4336f2dcac43021125a3bb4c7 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 02:50:50 +0000 Subject: [PATCH 18/38] revert run new tests --- test/testrunner.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/testrunner.py b/test/testrunner.py index ee5196e1d84a..6bc39a27e34e 100644 --- a/test/testrunner.py +++ b/test/testrunner.py @@ -391,9 +391,9 @@ def main(): run_vllm_tests(f"{specific_test_type}", all_image_list, new_test_structure_enabled) return - if framework == "sglang": - run_new_tests() - return + # if framework == "sglang": + # run_new_tests() + # return eks_cluster_name = f"dlc-{framework}-{build_context}" eks_utils.eks_setup() @@ -498,9 +498,9 @@ def main(): run_vllm_tests("sagemaker", all_image_list, new_test_structure_enabled) return - if "sglang" in dlc_images: - run_new_tests() - return + # if "sglang" in dlc_images: + # run_new_tests() + # return if "habana" in dlc_images: LOGGER.info(f"Skipping SM tests for Habana. Images: {dlc_images}") From 71b11825da366124ded819e4ffdfddf02fe73a34 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 03:01:04 +0000 Subject: [PATCH 19/38] formatting --- test/dlc_tests/sanity/test_anaconda.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/dlc_tests/sanity/test_anaconda.py b/test/dlc_tests/sanity/test_anaconda.py index 520a68d3ae7d..d0609daf17b8 100644 --- a/test/dlc_tests/sanity/test_anaconda.py +++ b/test/dlc_tests/sanity/test_anaconda.py @@ -15,7 +15,9 @@ def test_repo_anaconda_not_present(image): # First check to see if image has conda installed, if not, skip test since no packages installed from conda present conda_present = test_utils.run_cmd_on_container( - container_name, ctx, 'find . -name conda -not -path "**/.github/*" -not -path "**/.oh-my-zsh/*" -ignore_readdir_race' + container_name, + ctx, + 'find . -name conda -not -path "**/.github/*" -not -path "**/.oh-my-zsh/*" -ignore_readdir_race', ).stdout.strip() if not conda_present: pytest.skip(f"Image {image} does not have conda installed, skipping test.") From 2f86d52846c154f907d21ce2c2794187d25554df Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 03:11:22 +0000 Subject: [PATCH 20/38] fix jobtype func and add sglang general integration sagemaker dir --- test/sagemaker_tests/sglang/__init__.py | 13 +++++++++++++ .../sagemaker_tests/sglang/general/__init__.py | 13 +++++++++++++ .../sglang/general/integration/__init__.py | 13 +++++++++++++ .../general/integration/sagemaker/__init__.py | 0 .../integration}/sagemaker/test_sm_endpoint.py | 0 test/test_utils/__init__.py | 18 ++++++++++++------ 6 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 test/sagemaker_tests/sglang/__init__.py create mode 100644 test/sagemaker_tests/sglang/general/__init__.py create mode 100644 test/sagemaker_tests/sglang/general/integration/__init__.py create mode 100644 test/sagemaker_tests/sglang/general/integration/sagemaker/__init__.py rename test/{sglang => sagemaker_tests/sglang/general/integration}/sagemaker/test_sm_endpoint.py (100%) diff --git a/test/sagemaker_tests/sglang/__init__.py b/test/sagemaker_tests/sglang/__init__.py new file mode 100644 index 000000000000..79cb9cdfaf79 --- /dev/null +++ b/test/sagemaker_tests/sglang/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import diff --git a/test/sagemaker_tests/sglang/general/__init__.py b/test/sagemaker_tests/sglang/general/__init__.py new file mode 100644 index 000000000000..79cb9cdfaf79 --- /dev/null +++ b/test/sagemaker_tests/sglang/general/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import diff --git a/test/sagemaker_tests/sglang/general/integration/__init__.py b/test/sagemaker_tests/sglang/general/integration/__init__.py new file mode 100644 index 000000000000..79cb9cdfaf79 --- /dev/null +++ b/test/sagemaker_tests/sglang/general/integration/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import diff --git a/test/sagemaker_tests/sglang/general/integration/sagemaker/__init__.py b/test/sagemaker_tests/sglang/general/integration/sagemaker/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/sglang/sagemaker/test_sm_endpoint.py b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py similarity index 100% rename from test/sglang/sagemaker/test_sm_endpoint.py rename to test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py diff --git a/test/test_utils/__init__.py b/test/test_utils/__init__.py index c9d058b9c42f..53421e25ffe4 100644 --- a/test/test_utils/__init__.py +++ b/test/test_utils/__init__.py @@ -2068,19 +2068,25 @@ def get_job_type_from_image(image_uri): :return: Job Type """ tested_job_type = None - allowed_job_types = ("training", "inference", "base", "vllm", "sglang") - for job_type in allowed_job_types: - if job_type in image_uri: + job_type_mapping = { + "training": "training", + "inference": "inference", + "base": "general", + "vllm": "general", + "sglang": "general", + } + + for key, job_type in job_type_mapping.items(): + if key in image_uri: tested_job_type = job_type - break if not tested_job_type and "eia" in image_uri: tested_job_type = "inference" if not tested_job_type: raise RuntimeError( - f"Cannot find Job Type in image uri {image_uri} " - f"from allowed frameworks {allowed_job_types}" + f"Cannot determine job type from {image_uri}. " + f"Expected one of: {', '.join(job_type_mapping.keys())}" ) return tested_job_type From 456bdc6afc62f380d58ecc03c9c0fdbd2870b620 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 12:27:01 -0800 Subject: [PATCH 21/38] add sglang and vllm to frameworks --- src/constants.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/constants.py b/src/constants.py index 7f961003d2fb..73f07931c2be 100644 --- a/src/constants.py +++ b/src/constants.py @@ -25,6 +25,8 @@ "autogluon", "stabilityai_pytorch", "base", + "vllm", + "sglang", } DEVICE_TYPES = {"cpu", "gpu", "hpu", "eia", "inf", "neuron", "neuronx"} IMAGE_TYPES = {"training", "inference"} From 7309d6761fd354f4efa1ad9bcd9b0d9c3774925a Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 12:54:51 -0800 Subject: [PATCH 22/38] add skip general types --- test/dlc_tests/sanity/test_pre_release.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/dlc_tests/sanity/test_pre_release.py b/test/dlc_tests/sanity/test_pre_release.py index 793405a12020..b8379478a2c6 100644 --- a/test/dlc_tests/sanity/test_pre_release.py +++ b/test/dlc_tests/sanity/test_pre_release.py @@ -875,9 +875,10 @@ def _test_framework_and_cuda_version(gpu, ec2_connection): :param ec2_connection: fixture to establish connection with an ec2 instance """ image = gpu - if "base" in image or "vllm" in image: + general_types = ["base", "vllm", "sglang"] + if any(t in image for t in general_types): pytest.skip( - "Base/vLLM DLC has doesn't follow the assumptions made by inference/training. Skipping test." + f"{', '.join(upstream_t_typesypes)} images do not follow the assumptions made by inference/training. Skipping test." ) tested_framework, tag_framework_version = get_framework_and_version_from_tag(image) @@ -1247,10 +1248,10 @@ def test_package_version_regression_in_image(image): keys in the buildspec - as these keys are used to extract the released image uri. Additionally, if the image is not already released, this test would be skipped. """ - if "base" in image or "vllm" in image: - pytest.skip( - "Base/vLLM images don't have python packages that needs to be checked. Skipping test." - ) + general_types = ["base", "vllm", "sglang"] + if any(t in image for t in general_types): + pytest.skip(f"{', '.join(general_types)} images don't have python packages that needs to be checked. Skipping test.") + dlc_path = os.getcwd().split("/test/")[0] corresponding_image_spec = get_image_spec_from_buildspec( image_uri=image, dlc_folder_path=dlc_path From 2ed025f91d53d8a645b16941561e2443d8dadbf4 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 21:03:28 +0000 Subject: [PATCH 23/38] fix cuda compat and entrypoint --- sglang/build_artifacts/sagemaker_entrypoint.sh | 3 +-- sglang/build_artifacts/start_cuda_compat.sh | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sglang/build_artifacts/sagemaker_entrypoint.sh b/sglang/build_artifacts/sagemaker_entrypoint.sh index aa5b2bba8089..0f13cf5cb002 100644 --- a/sglang/build_artifacts/sagemaker_entrypoint.sh +++ b/sglang/build_artifacts/sagemaker_entrypoint.sh @@ -3,8 +3,7 @@ # Execute telemetry script if it exists, suppress errors bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true -CUDA_AVAILABLE=$(python3 -c "import torch; print(torch.cuda.is_available())") -if [ "$CUDA_AVAILABLE" = "True" ]; then +if command -v nvidia-smi >/dev/null 2>&1 && command -v nvcc >/dev/null 2>&1; then bash /usr/local/bin/start_cuda_compat.sh fi diff --git a/sglang/build_artifacts/start_cuda_compat.sh b/sglang/build_artifacts/start_cuda_compat.sh index d09eac2dec8f..791d355c5abe 100644 --- a/sglang/build_artifacts/start_cuda_compat.sh +++ b/sglang/build_artifacts/start_cuda_compat.sh @@ -4,7 +4,7 @@ verlte() { [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] } -COMPAT_FILE=/usr/local/cuda/compat/lib.real/libcuda.so.1 +COMPAT_FILE=/usr/local/cuda/compat/libcuda.so.1 if [ -f $COMPAT_FILE ]; then CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink $COMPAT_FILE | cut -d'.' -f 3-) echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" From 563709507a23e35b7424e7514e30a278f5f6d2f0 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 13:12:50 -0800 Subject: [PATCH 24/38] fix dlc container type --- sglang/x86_64/gpu/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sglang/x86_64/gpu/Dockerfile b/sglang/x86_64/gpu/Dockerfile index 6191298b88c3..346daba748e5 100644 --- a/sglang/x86_64/gpu/Dockerfile +++ b/sglang/x86_64/gpu/Dockerfile @@ -13,7 +13,7 @@ LABEL dlc_major_version="1" ENV DEBIAN_FRONTEND=noninteractive \ LANG=C.UTF-8 \ LC_ALL=C.UTF-8 \ - DLC_CONTAINER_TYPE=base \ + DLC_CONTAINER_TYPE=general \ # Python won’t try to write .pyc or .pyo files on the import of source modules # Force stdin, stdout and stderr to be totally unbuffered. Good for logging PYTHONDONTWRITEBYTECODE=1 \ From cce1e879f9cf6a7a5b5c6e427d8cbd3332db4263 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 13:22:55 -0800 Subject: [PATCH 25/38] install boto3 --- sglang/x86_64/gpu/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sglang/x86_64/gpu/Dockerfile b/sglang/x86_64/gpu/Dockerfile index 346daba748e5..7c2100878941 100644 --- a/sglang/x86_64/gpu/Dockerfile +++ b/sglang/x86_64/gpu/Dockerfile @@ -94,6 +94,9 @@ RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark ho && apt-get upgrade -y \ && apt-get clean +RUN pip install --no-cache-dir -U \ + boto3 + RUN rm -rf /tmp/* COPY sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh From 1927956f7f9db449914a48a3f5f4db7ac9391061 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 13:23:40 -0800 Subject: [PATCH 26/38] add sglang to types --- src/deep_learning_container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/deep_learning_container.py b/src/deep_learning_container.py index 35e730d745d6..2b9f66dd56db 100644 --- a/src/deep_learning_container.py +++ b/src/deep_learning_container.py @@ -239,7 +239,7 @@ def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tensorflow", "mxnet", "pytorch", "base", "vllm"], + choices=["tensorflow", "mxnet", "pytorch", "base", "vllm", "sglang"], help="framework of container image.", required=True, ) From 8aa5c9c74f904e18f341829e5c2cca5c435cb00d Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 13:45:36 -0800 Subject: [PATCH 27/38] sgl fix bug --- test/dlc_tests/sanity/test_pre_release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/dlc_tests/sanity/test_pre_release.py b/test/dlc_tests/sanity/test_pre_release.py index b8379478a2c6..6f12565fed2e 100644 --- a/test/dlc_tests/sanity/test_pre_release.py +++ b/test/dlc_tests/sanity/test_pre_release.py @@ -878,7 +878,7 @@ def _test_framework_and_cuda_version(gpu, ec2_connection): general_types = ["base", "vllm", "sglang"] if any(t in image for t in general_types): pytest.skip( - f"{', '.join(upstream_t_typesypes)} images do not follow the assumptions made by inference/training. Skipping test." + f"{', '.join(general_types)} images do not follow the assumptions made by inference/training. Skipping test." ) tested_framework, tag_framework_version = get_framework_and_version_from_tag(image) From a95e10c0ec760260da88d781a5d87cfcdd618ad1 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 14:57:16 -0800 Subject: [PATCH 28/38] add pytest --- test/dlc_tests/sanity/test_pre_release.py | 4 +++- .../integration/sagemaker/test_sm_endpoint.py | 18 ++++++++++++++++++ .../sglang/general/requirements.txt | 5 +++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 test/sagemaker_tests/sglang/general/requirements.txt diff --git a/test/dlc_tests/sanity/test_pre_release.py b/test/dlc_tests/sanity/test_pre_release.py index 6f12565fed2e..b2959cfcdca7 100644 --- a/test/dlc_tests/sanity/test_pre_release.py +++ b/test/dlc_tests/sanity/test_pre_release.py @@ -1250,7 +1250,9 @@ def test_package_version_regression_in_image(image): """ general_types = ["base", "vllm", "sglang"] if any(t in image for t in general_types): - pytest.skip(f"{', '.join(general_types)} images don't have python packages that needs to be checked. Skipping test.") + pytest.skip( + f"{', '.join(general_types)} images don't have python packages that needs to be checked. Skipping test." + ) dlc_path = os.getcwd().split("/test/")[0] corresponding_image_spec = get_image_spec_from_buildspec( diff --git a/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py index afa1bae4ac49..9ada6c9298ef 100644 --- a/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py +++ b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py @@ -1,7 +1,23 @@ +# Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + import json +import os import time import boto3 +import pytest import sagemaker from sagemaker import serializers from sagemaker.config.config_schema import MODEL @@ -152,6 +168,8 @@ def wait_for_endpoint(endpoint_name, timeout=1800): return False +@pytest.mark.processor("gpu") +@pytest.mark.team("conda") def test_sglang_on_sagemaker(image_uri, endpoint_name): print("\n" + "=" * 80) print("STARTING SGLang SAGEMAKER ENDPOINT TEST".center(80)) diff --git a/test/sagemaker_tests/sglang/general/requirements.txt b/test/sagemaker_tests/sglang/general/requirements.txt new file mode 100644 index 000000000000..86f537941142 --- /dev/null +++ b/test/sagemaker_tests/sglang/general/requirements.txt @@ -0,0 +1,5 @@ +botocore +boto3 +awscli +protobuf +sagemaker From ad5e24da388c001fdd81727efe4f37d4a4ca6140 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 15:14:52 -0800 Subject: [PATCH 29/38] add print debug --- test/test_utils/sagemaker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_utils/sagemaker.py b/test/test_utils/sagemaker.py index ad428cd47ff9..31707479bca1 100644 --- a/test/test_utils/sagemaker.py +++ b/test/test_utils/sagemaker.py @@ -307,7 +307,7 @@ def execute_local_tests(image, pytest_cache_params): image, SAGEMAKER_LOCAL_TEST_TYPE ) pytest_command += " --last-failed --last-failed-no-failures all " - print(pytest_command) + print(f"Running local sm test with command: {pytest_command}") framework, _ = get_framework_and_version_from_tag(image) framework = framework.replace("_trcomp", "") random.seed(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}") @@ -435,6 +435,7 @@ def execute_sagemaker_remote_tests(process_index, image, global_pytest_cache, py pytest_command, path, tag, job_type = generate_sagemaker_pytest_cmd( image, SAGEMAKER_REMOTE_TEST_TYPE ) + print(f"Running remote sm test with command: {pytest_command}") context = Context() with context.cd(path): context.run(f"virtualenv {tag}") From c89a8f5f824f4f82c92374c091997472792df7cb Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 15:28:38 -0800 Subject: [PATCH 30/38] add conftest --- .../sglang/general/conftest.py | 83 +++++++++++++++++++ .../general/integration/sagemaker/__init__.py | 13 +++ 2 files changed, 96 insertions(+) create mode 100644 test/sagemaker_tests/sglang/general/conftest.py diff --git a/test/sagemaker_tests/sglang/general/conftest.py b/test/sagemaker_tests/sglang/general/conftest.py new file mode 100644 index 000000000000..eca73546e530 --- /dev/null +++ b/test/sagemaker_tests/sglang/general/conftest.py @@ -0,0 +1,83 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +import logging +import os +import sys + +import pytest + +logger = logging.getLogger(__name__) + + +dir_path = os.path.dirname(os.path.realpath(__file__)) + + +def pytest_addoption(parser): + parser.addoption("--build-image", "-D", action="store_true") + parser.addoption("--build-base-image", "-B", action="store_true") + parser.addoption("--aws-id") + parser.addoption("--instance-type") + parser.addoption("--docker-base-name", default="pytorch") + parser.addoption("--region", default="us-west-2") + parser.addoption("--framework-version", default="") + parser.addoption( + "--py-version", + choices=["312"], + default=str(sys.version_info.major), + ) + parser.addoption("--processor", choices=["gpu"], default="gpu") + # If not specified, will default to {framework-version}-{processor}-py{py-version} + parser.addoption("--tag", default=None) + parser.addoption( + "--generate-coverage-doc", + default=False, + action="store_true", + help="use this option to generate test coverage doc", + ) + parser.addoption( + "--efa", + action="store_true", + default=False, + help="Run only efa tests", + ) + parser.addoption("--sagemaker-regions", default="us-west-2") + + +def pytest_configure(config): + config.addinivalue_line("markers", "efa(): explicitly mark to run efa tests") + + +def pytest_runtest_setup(item): + efa_tests = [mark for mark in item.iter_markers(name="efa")] + if item.config.getoption("--efa") and not efa_tests: + pytest.skip("Skipping non-efa tests due to --efa flag") + elif not item.config.getoption("--efa") and efa_tests: + pytest.skip("Skipping efa tests because --efa flag is missing") + + +def pytest_collection_modifyitems(session, config, items): + for item in items: + print(f"item {item}") + for marker in item.iter_markers(name="team"): + print(f"item {marker}") + team_name = marker.args[0] + item.user_properties.append(("team_marker", team_name)) + print(f"item.user_properties {item.user_properties}") + + if config.getoption("--generate-coverage-doc"): + from test.test_utils.test_reporting import TestReportGenerator + + report_generator = TestReportGenerator(items, is_sagemaker=True) + report_generator.generate_coverage_doc(framework="pytorch", job_type="training") diff --git a/test/sagemaker_tests/sglang/general/integration/sagemaker/__init__.py b/test/sagemaker_tests/sglang/general/integration/sagemaker/__init__.py index e69de29bb2d1..79cb9cdfaf79 100644 --- a/test/sagemaker_tests/sglang/general/integration/sagemaker/__init__.py +++ b/test/sagemaker_tests/sglang/general/integration/sagemaker/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import From eb524f7c0737b007cf06d4fd36f67de246cc8d8f Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 15:44:18 -0800 Subject: [PATCH 31/38] fix conftest --- .../sglang/general/__init__.py | 38 ++++++++ .../sglang/general/conftest.py | 87 +++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/test/sagemaker_tests/sglang/general/__init__.py b/test/sagemaker_tests/sglang/general/__init__.py index 79cb9cdfaf79..ccd50a97c4a0 100644 --- a/test/sagemaker_tests/sglang/general/__init__.py +++ b/test/sagemaker_tests/sglang/general/__init__.py @@ -11,3 +11,41 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from __future__ import absolute_import + +import os +from enum import Enum + +import botocore + + +def _botocore_resolver(): + """ + Get the DNS suffix for the given region. + :return: endpoint object + """ + loader = botocore.loaders.create_loader() + return botocore.regions.EndpointResolver(loader.load_data("endpoints")) + + +def get_ecr_registry(account, region): + """ + Get prefix of ECR image URI + :param account: Account ID + :param region: region where ECR repo exists + :return: AWS ECR registry + """ + endpoint_data = _botocore_resolver().construct_endpoint("ecr", region) + return "{}.dkr.{}".format(account, endpoint_data["hostname"]) + +def get_efa_test_instance_type(default: list): + """ + Get the instance type to be used for EFA tests from the environment, or default to a given value if the type + isn't specified in the environment. + + :param default: list of instance type to be used for tests + :return: list of instance types to be parametrized for a test + """ + configured_instance_type = os.getenv("SM_EFA_TEST_INSTANCE_TYPE") + if configured_instance_type: + return [configured_instance_type] + return default diff --git a/test/sagemaker_tests/sglang/general/conftest.py b/test/sagemaker_tests/sglang/general/conftest.py index eca73546e530..f7b8953a1174 100644 --- a/test/sagemaker_tests/sglang/general/conftest.py +++ b/test/sagemaker_tests/sglang/general/conftest.py @@ -16,7 +16,11 @@ import os import sys +import boto3 import pytest +from sagemaker import LocalSession, Session + +from . import get_ecr_registry, get_efa_test_instance_type logger = logging.getLogger(__name__) @@ -81,3 +85,86 @@ def pytest_collection_modifyitems(session, config, items): report_generator = TestReportGenerator(items, is_sagemaker=True) report_generator.generate_coverage_doc(framework="pytorch", job_type="training") + + +@pytest.fixture(scope="session", name="docker_base_name") +def fixture_docker_base_name(request): + return request.config.getoption("--docker-base-name") + + +@pytest.fixture(scope="session", name="region") +def fixture_region(request): + return request.config.getoption("--region") + + +@pytest.fixture(scope="session", name="framework_version") +def fixture_framework_version(request): + return request.config.getoption("--framework-version") + + +@pytest.fixture(scope="session", name="py_version") +def fixture_py_version(request): + return "py{}".format(int(request.config.getoption("--py-version"))) + + +@pytest.fixture(scope="session", name="processor") +def fixture_processor(request): + return request.config.getoption("--processor") + + +@pytest.fixture(scope="session", name="sagemaker_regions") +def fixture_sagemaker_regions(request): + sagemaker_regions = request.config.getoption("--sagemaker-regions") + return sagemaker_regions.split(",") + + +@pytest.fixture(scope="session", name="tag") +def fixture_tag(request, framework_version, processor, py_version): + provided_tag = request.config.getoption("--tag") + default_tag = "{}-{}-{}".format(framework_version, processor, py_version) + return provided_tag if provided_tag else default_tag + + +@pytest.fixture(scope="session", name="docker_image") +def fixture_docker_image(docker_base_name, tag): + return "{}:{}".format(docker_base_name, tag) + + +@pytest.fixture(scope="session", name="sagemaker_session") +def fixture_sagemaker_session(region): + return Session(boto_session=boto3.Session(region_name=region)) + + +@pytest.fixture(name="efa_instance_type") +def fixture_efa_instance_type(request): + try: + return request.param + except AttributeError: + return get_efa_test_instance_type(default=["ml.p4d.24xlarge"])[0] + + +@pytest.fixture(scope="session", name="sagemaker_local_session") +def fixture_sagemaker_local_session(region): + return LocalSession(boto_session=boto3.Session(region_name=region)) + + +@pytest.fixture(name="aws_id", scope="session") +def fixture_aws_id(request): + return request.config.getoption("--aws-id") + + +@pytest.fixture(name="instance_type", scope="session") +def fixture_instance_type(request, processor): + provided_instance_type = request.config.getoption("--instance-type") + default_instance_type = "local" if processor == "cpu" else "local_gpu" + return provided_instance_type or default_instance_type + + +@pytest.fixture(name="docker_registry", scope="session") +def fixture_docker_registry(aws_id, region): + return get_ecr_registry(aws_id, region) + + +@pytest.fixture(name="ecr_image", scope="session") +def fixture_ecr_image(docker_registry, docker_base_name, tag): + return "{}/{}:{}".format(docker_registry, docker_base_name, tag) From 1c13adb41453251ce3ac6e89823c015a78abba06 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 16:01:46 -0800 Subject: [PATCH 32/38] fix fixtures --- test/sagemaker_tests/sglang/general/__init__.py | 1 + .../general/integration/sagemaker/test_sm_endpoint.py | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/sagemaker_tests/sglang/general/__init__.py b/test/sagemaker_tests/sglang/general/__init__.py index ccd50a97c4a0..f766e7c1aa7b 100644 --- a/test/sagemaker_tests/sglang/general/__init__.py +++ b/test/sagemaker_tests/sglang/general/__init__.py @@ -37,6 +37,7 @@ def get_ecr_registry(account, region): endpoint_data = _botocore_resolver().construct_endpoint("ecr", region) return "{}.dkr.{}".format(account, endpoint_data["hostname"]) + def get_efa_test_instance_type(default: list): """ Get the instance type to be used for EFA tests from the environment, or default to a given value if the type diff --git a/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py index 9ada6c9298ef..f6de53ac0349 100644 --- a/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py +++ b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py @@ -170,12 +170,14 @@ def wait_for_endpoint(endpoint_name, timeout=1800): @pytest.mark.processor("gpu") @pytest.mark.team("conda") -def test_sglang_on_sagemaker(image_uri, endpoint_name): +def test_sglang_on_sagemaker(ecr_image): + endpoint_name = f"test-{ecr_image}-{MODEL_ID.replace('/', '-')}-sglang-{INSTANCE_TYPE.replace('.', '-')}" + print("\n" + "=" * 80) print("STARTING SGLang SAGEMAKER ENDPOINT TEST".center(80)) print("=" * 80) print(f"Test Configuration:") - print(f" Image URI: {image_uri}") + print(f" Image URI: {ecr_image}") print(f" Endpoint name: {endpoint_name}") print(f" Region: {AWS_REGION}") print(f" Instance type: {INSTANCE_TYPE}") @@ -183,7 +185,7 @@ def test_sglang_on_sagemaker(image_uri, endpoint_name): print("PHASE 1: ENDPOINT DEPLOYMENT".center(80)) print("-" * 80) - if not deploy_endpoint(endpoint_name, image_uri, ROLE, INSTANCE_TYPE): + if not deploy_endpoint(endpoint_name, ecr_image, ROLE, INSTANCE_TYPE): print("\n" + "=" * 80) print("DEPLOYMENT FAILED - CLEANING UP".center(80)) print("=" * 80) From cd8a500ebce0310ad1bdc14c47c1febffad30acb Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 16:26:36 -0800 Subject: [PATCH 33/38] printing responses --- .../integration/sagemaker/test_sm_endpoint.py | 121 ++++++++++-------- 1 file changed, 66 insertions(+), 55 deletions(-) diff --git a/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py index f6de53ac0349..2271a0fe36e7 100644 --- a/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py +++ b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py @@ -31,6 +31,38 @@ ROLE = "SageMakerRole" +def print_section(title, char="=", width=80): + """Print a section header with specified character and width.""" + print(f"\n{char * width}") + print(f"{title:^{width}}") + print(f"{char * width}") + + +def print_phase(phase_name, width=80): + """Print a phase header.""" + print(f"\n{'-' * width}") + print(f"{phase_name:^{width}}") + print(f"{'-' * width}") + + +def print_config(config_dict): + """Print configuration details.""" + print("Test Configuration:") + for key, value in config_dict.items(): + print(f" {key}: {value}") + + +def print_response(response): + """Print formatted response.""" + print("\n Response from endpoint:") + print("-" * 40) + if isinstance(response, (dict, list)): + print(json.dumps(response, indent=2)) + else: + print(response) + print("-" * 40) + + def get_secret_hf_token(): print("Retrieving HuggingFace token from AWS Secrets Manager...") secret_name = "test/hf_token" @@ -171,75 +203,54 @@ def wait_for_endpoint(endpoint_name, timeout=1800): @pytest.mark.processor("gpu") @pytest.mark.team("conda") def test_sglang_on_sagemaker(ecr_image): - endpoint_name = f"test-{ecr_image}-{MODEL_ID.replace('/', '-')}-sglang-{INSTANCE_TYPE.replace('.', '-')}" - - print("\n" + "=" * 80) - print("STARTING SGLang SAGEMAKER ENDPOINT TEST".center(80)) - print("=" * 80) - print(f"Test Configuration:") - print(f" Image URI: {ecr_image}") - print(f" Endpoint name: {endpoint_name}") - print(f" Region: {AWS_REGION}") - print(f" Instance type: {INSTANCE_TYPE}") - print("\n" + "-" * 80) - print("PHASE 1: ENDPOINT DEPLOYMENT".center(80)) - print("-" * 80) - + endpoint_name = f"test-sglang-{MODEL_ID.replace('/', '-')}-{INSTANCE_TYPE.replace('.', '-')}" + + print_section("STARTING SGLang SAGEMAKER ENDPOINT TEST") + config = { + "Image URI": ecr_image, + "Endpoint name": endpoint_name, + "Region": AWS_REGION, + "Instance type": INSTANCE_TYPE, + } + print_config(config) + + # Phase 1: Deployment + print_phase("PHASE 1: ENDPOINT DEPLOYMENT") if not deploy_endpoint(endpoint_name, ecr_image, ROLE, INSTANCE_TYPE): - print("\n" + "=" * 80) - print("DEPLOYMENT FAILED - CLEANING UP".center(80)) - print("=" * 80) - # Cleanup any partially created resources + print_section("DEPLOYMENT FAILED - CLEANING UP") delete_endpoint(endpoint_name) raise Exception("SageMaker endpoint deployment failed") - print("\n" + "-" * 80) - print("PHASE 2: WAITING FOR ENDPOINT READINESS".center(80)) - print("-" * 80) + # Phase 2: Endpoint Readiness + print_phase("PHASE 2: WAITING FOR ENDPOINT READINESS") if not wait_for_endpoint(endpoint_name): print("\nEndpoint failed to become ready. Initiating cleanup...") delete_endpoint(endpoint_name) - print("\n" + "=" * 80) - print("ENDPOINT READINESS FAILED".center(80)) - print("=" * 80) + print_section("ENDPOINT READINESS FAILED") raise Exception("SageMaker endpoint failed to become ready") print("\nEndpoint is ready for inference!") - print("\n" + "-" * 80) - print("PHASE 3: TESTING INFERENCE".center(80)) - print("-" * 80) - test_prompt = "Write a python script to calculate square of n" + # Phase 3: Testing Inference + print_phase("PHASE 3: TESTING INFERENCE") + test_prompt = "Write a python script to calculate square of n" response = invoke_endpoint( endpoint_name=endpoint_name, prompt=test_prompt, max_tokens=2400, temperature=0.01 ) - if response: - print("\n Inference test successful!") - print("\n Response from endpoint:") - print("-" * 40) - if isinstance(response, (dict, list)): - print(json.dumps(response, indent=2)) - else: - print(response) - print("-" * 40) - - print("\n" + "-" * 80) - print(" PHASE 4: CLEANUP".center(80)) - print("-" * 80) - if delete_endpoint(endpoint_name): - print("\n" + "=" * 80) - print(" TEST COMPLETED SUCCESSFULLY! ".center(80)) - print("=" * 80) - else: - print("\n Cleanup failed") - else: - print("\n No response received from the endpoint.") - print("\n" + "-" * 80) - print(" PHASE 4: CLEANUP (FAILED INFERENCE)".center(80)) - print("-" * 80) + if not response: + print("\nNo response received from the endpoint.") + print_phase("PHASE 4: CLEANUP (FAILED INFERENCE)") delete_endpoint(endpoint_name) - print("\n" + "=" * 80) - print(" TEST FAILED ".center(80)) - print("=" * 80) + print_section("TEST FAILED") raise Exception("SageMaker endpoint inference test failed") + + print("\nInference test successful!") + print_response(response) + + # Phase 4: Cleanup + print_phase("PHASE 4: CLEANUP") + if delete_endpoint(endpoint_name): + print_section("TEST COMPLETED SUCCESSFULLY!") + else: + print("\nCleanup failed") From d7e0f05c2b145ae74629cf440e974cc0bcb4a4a4 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 16:39:56 -0800 Subject: [PATCH 34/38] fix endpoint name --- .../sglang/general/integration/sagemaker/test_sm_endpoint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py index 2271a0fe36e7..988abf67203e 100644 --- a/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py +++ b/test/sagemaker_tests/sglang/general/integration/sagemaker/test_sm_endpoint.py @@ -15,6 +15,7 @@ import json import os import time +from datetime import datetime import boto3 import pytest @@ -203,7 +204,8 @@ def wait_for_endpoint(endpoint_name, timeout=1800): @pytest.mark.processor("gpu") @pytest.mark.team("conda") def test_sglang_on_sagemaker(ecr_image): - endpoint_name = f"test-sglang-{MODEL_ID.replace('/', '-')}-{INSTANCE_TYPE.replace('.', '-')}" + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + endpoint_name = f"test-sglang-{MODEL_ID.replace('/', '-').replace('.','')}-{INSTANCE_TYPE.replace('.', '-')}-{timestamp}" print_section("STARTING SGLang SAGEMAKER ENDPOINT TEST") config = { From 481fa34cbed2abfbae735239a9a08aed0caa3b17 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 17:03:35 -0800 Subject: [PATCH 35/38] remove sm local --- test/testrunner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/testrunner.py b/test/testrunner.py index 6bc39a27e34e..4f3dcaa73d9c 100644 --- a/test/testrunner.py +++ b/test/testrunner.py @@ -557,6 +557,7 @@ def main(): "neuron": "Skipping - there are no local mode tests for Neuron", "huggingface-tensorflow-training": "Skipping - there are no local mode tests for HF TF training", "vllm": "Skipping - there are no local mode tests for VLLM", + "sglang": "Skipping - there are no local mode tests for sgland", } for skip_condition, reason in sm_local_to_skip.items(): From f2a1eb0898e74a5ca1ee69ddd59177d76b268d14 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 18:26:15 -0800 Subject: [PATCH 36/38] revert sglang --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 54d62e91dd5e..18f86df6c8ce 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "sglang" "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = ["sglang"] +build_frameworks = [] # By default we build both training and inference containers. Set true/false values to determine which to build. From 3dfcb320faefabc1fef18cd0cad6bdef3cb2e2a9 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 18:30:21 -0800 Subject: [PATCH 37/38] revert new test structure --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 18f86df6c8ce..12c7996725e3 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -58,7 +58,7 @@ notify_test_failures = false [test] # Set to true to use the new test structure path for frameworks # Off by default (set to false) -use_new_test_structure = true +use_new_test_structure = false ### On by default sanity_tests = true From 5d33f6eaeda833fe4b48aa8e236d86e1a47bb5fb Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Tue, 11 Nov 2025 18:40:27 -0800 Subject: [PATCH 38/38] fix syntax --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 12c7996725e3..389bd905ccf0 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -36,7 +36,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. -# available frameworks - ["base", "vllm", "sglang" "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] +# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] build_frameworks = []