From 8ea932b73ac294ba918cf553ab506936e11f2d76 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 14:54:16 -0800
Subject: [PATCH 01/63] try build

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml             |  11 +
 .gitignore                                   |   1 +
 .pre-commit-config.yaml                      |  28 +-
 DEVELOPMENT.md                               |   7 +
 docker/vllm/Dockerfile.rayserve              |  68 ++++
 scripts/dockerd_entrypoint.sh                |   6 +
 scripts/install_efa.sh                       | 102 +++++
 scripts/setup_oss_compliance.sh              |  34 ++
 scripts/telemetry/bash_telemetry.sh          |  11 +
 scripts/telemetry/deep_learning_container.py | 395 +++++++++++++++++++
 10 files changed, 650 insertions(+), 13 deletions(-)
 create mode 100644 docker/vllm/Dockerfile.rayserve
 create mode 100755 scripts/dockerd_entrypoint.sh
 create mode 100755 scripts/install_efa.sh
 create mode 100755 scripts/setup_oss_compliance.sh
 create mode 100755 scripts/telemetry/bash_telemetry.sh
 create mode 100755 scripts/telemetry/deep_learning_container.py

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index e69804295944..81b28112ea14 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -41,6 +41,17 @@ jobs:
       - uses: actions/checkout@v5
       - run: .github/scripts/runner_setup.sh
       - run: .github/scripts/buildkitd.sh
+      - name: build vllm-rayserve-ec2 image
+        shell: bash
+        run: |
+          DATE=$(date +"%Y-%m-%d")
+          COMMIT_REF=$(git rev-parse --short HEAD)
+          DOCKER_BUILDKIT=1 docker build --progress plain \
+            --build-arg CACHE_REFRESH=${DATE} \
+            --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-${COMMIT_REF} \
+            --target vllm-rayserve-ec2 \
+            -f docker/vllm/Dockerfile.rayserve .
+          docker image ls
 
   example-on-g6xl-runner-1:
     needs: [example-on-build-runner]
diff --git a/.gitignore b/.gitignore
index 18b67f20119c..126c4416f381 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ __pycache__
 .idea
 *.pyc
 .venv
+.ruff_cache
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3ec28eba9d20..63f60f47f387 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,23 +15,14 @@ repos:
           # optional: add additional arguments here
           - --indent=2
           - --write
-        stages: [manual] # run in CI
-  - repo: https://github.com/rhysd/actionlint
-    rev: v1.7.7
-    hooks:
-    - id: actionlint
-      stages: [manual] # run in CI
+        stages: [pre-commit]
   - repo: https://github.com/scop/pre-commit-shfmt
     rev: v3.12.0-2 # Use the latest stable revision
     hooks:
       - id: shfmt
         # Optional: Add arguments to shfmt if needed, e.g., to enable "simplify" mode
         args: ["-s"]
-  - repo: https://github.com/crate-ci/typos
-    rev: v1.38.1
-    hooks:
-    - id: typos
-      args: [--force-exclude]
+        stages: [pre-commit]
   - repo: https://github.com/hukkin/mdformat
     rev: 1.0.0  # Use the ref you want to point at
     hooks:
@@ -40,17 +31,28 @@ repos:
       additional_dependencies:
       - mdformat-gfm
       - mdformat-black
+      stages: [pre-commit]
   - repo: https://github.com/igorshubovych/markdownlint-cli
     rev: v0.45.0
     hooks:
       - id: markdownlint
         args: [--fix]
+        stages: [pre-commit]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.14.3
     hooks:
-      - id: ruff-check
-        args: [ --fix, --output-format=github ]
       - id: ruff-format
+        stages: [pre-commit]
+      - id: ruff-check
+  - repo: https://github.com/rhysd/actionlint
+    rev: v1.7.7
+    hooks:
+    - id: actionlint
+  - repo: https://github.com/crate-ci/typos
+    rev: v1.38.1
+    hooks:
+    - id: typos
+      args: [--force-exclude]
   - repo: local
     hooks:
       - id: signoff-commit
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index 97ec98b254b1..55fefbd3a911 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -28,6 +28,13 @@ uv pip install pre-commit
 pre-commit install
 ```
 
+Install go using [homebrew](https://brew.sh/), below example assume on Mac.
+
+```bash
+brew install go
+go env -w GOPROXY=direct
+```
+
 To manually run all linters:
 
 ```bash
diff --git a/docker/vllm/Dockerfile.rayserve b/docker/vllm/Dockerfile.rayserve
new file mode 100644
index 000000000000..3eeff6528797
--- /dev/null
+++ b/docker/vllm/Dockerfile.rayserve
@@ -0,0 +1,68 @@
+FROM docker.io/vllm/vllm-openai:v0.10.2 AS base
+ARG PYTHON="python3"
+LABEL maintainer="Amazon AI"
+ARG EFA_VERSION="1.43.3"
+LABEL dlc_major_version="1"
+ENV DEBIAN_FRONTEND=noninteractive \
+  LANG=C.UTF-8 \
+  LC_ALL=C.UTF-8 \
+  DLC_CONTAINER_TYPE=base \
+  # Python won’t try to write .pyc or .pyo files on the import of source modules
+  # Force stdin, stdout and stderr to be totally unbuffered. Good for logging
+  PYTHONDONTWRITEBYTECODE=1 \
+  PYTHONUNBUFFERED=1 \
+  PYTHONIOENCODING=UTF-8 \
+  LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \
+  PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}"
+
+WORKDIR /
+
+COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+COPY ./scripts/telemetry/bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
+COPY ./scripts/setup_oss_compliance.sh setup_oss_compliance.sh
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py \
+  && chmod +x /usr/local/bin/bash_telemetry.sh \
+  && echo 'source /usr/local/bin/bash_telemetry.sh' >>/etc/bash.bashrc \
+  && bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh \
+  # create symlink for python
+  && ln -s /usr/bin/python3 /usr/bin/python \
+  # clean up
+  && rm -rf ${HOME_DIR}/oss_compliance* \
+  && rm -rf /tmp/tmp* \
+  && rm -rf /tmp/uv* \
+  && rm -rf /var/lib/apt/lists/* \
+  && rm -rf /root/.cache | true
+
+COPY ./scripts/install_efa.sh install_efa.sh
+RUN bash install_efa.sh ${EFA_VERSION} \
+  && rm install_efa.sh \
+  && mkdir -p /tmp/nvjpeg \
+  && cd /tmp/nvjpeg \
+  && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
+  && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
+  && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \
+  && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \
+  && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/lib64/ \
+  && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/include/ \
+  && rm -rf /tmp/nvjpeg \
+  # remove cuobjdump and nvdisasm
+  && rm -rf /usr/local/cuda/bin/cuobjdump* \
+  && rm -rf /usr/local/cuda/bin/nvdisasm*
+
+# ====================== ray serve =========================================
+FROM base AS vllm-rayserve-ec2
+
+RUN uv pip install --system ray[serve]==2.49.0 \
+  && uv cache clean
+
+ARG CACHE_REFRESH=0
+RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \
+  && apt-get update \
+  && apt-get upgrade -y \
+  && apt-get clean
+
+COPY ./scripts/dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh
+RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh
+
+ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"]
\ No newline at end of file
diff --git a/scripts/dockerd_entrypoint.sh b/scripts/dockerd_entrypoint.sh
new file mode 100755
index 000000000000..c05dab13dfa1
--- /dev/null
+++ b/scripts/dockerd_entrypoint.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+# Check if telemetry file exists before executing
+# Execute telemetry script if it exists, suppress errors
+bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
+
+python3 -m vllm.entrypoints.openai.api_server "$@"
\ No newline at end of file
diff --git a/scripts/install_efa.sh b/scripts/install_efa.sh
new file mode 100755
index 000000000000..75cbc6e93116
--- /dev/null
+++ b/scripts/install_efa.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+set -ex
+
+ARCH=$(uname -m)
+case $ARCH in
+    x86_64)
+        ARCH_DIR="x86_64-linux-gnu"
+        ;;
+    aarch64)
+        ARCH_DIR="aarch64-linux-gnu"
+        ;;
+    *)
+        echo "Unsupported architecture: $ARCH"
+        exit 1
+        ;;
+esac
+
+function check_libnccl_net_so {
+    OFI_LIB_DIR="/opt/amazon/ofi-nccl/lib/${ARCH_DIR}"
+    NCCL_NET_SO="$OFI_LIB_DIR/libnccl-net.so"
+
+    # Check if file exists
+    if [ ! -f "$NCCL_NET_SO" ]; then
+        echo "ERROR: $NCCL_NET_SO does not exist"
+        return 1
+    fi
+}
+
+function install_efa {
+    EFA_VERSION=$1
+    OPEN_MPI_PATH="/opt/amazon/openmpi"
+    
+    # Install build time tools
+    apt-get update
+    apt-get install -y --allow-change-held-packages --no-install-recommends \
+        curl \
+        build-essential \
+        cmake \
+        git
+
+    # Install EFA
+    mkdir /tmp/efa
+    cd /tmp/efa
+    curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz
+    tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz
+    cd aws-efa-installer
+    ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify
+    rm -rf /tmp/efa
+    # Configure Open MPI and configure NCCL parameters
+    mv ${OPEN_MPI_PATH}/bin/mpirun ${OPEN_MPI_PATH}/bin/mpirun.real
+    echo '#!/bin/bash' > ${OPEN_MPI_PATH}/bin/mpirun
+    echo "${OPEN_MPI_PATH}/bin/mpirun.real --allow-run-as-root \"\$@\"" >> ${OPEN_MPI_PATH}/bin/mpirun
+    chmod a+x ${OPEN_MPI_PATH}/bin/mpirun
+    echo "hwloc_base_binding_policy = none" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf
+    echo "rmaps_base_mapping_policy = slot" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf
+    echo NCCL_DEBUG=INFO >> /etc/nccl.conf
+    echo NCCL_SOCKET_IFNAME=^docker0,lo >> /etc/nccl.conf
+
+    # Install OpenSSH for MPI to communicate between containers, allow OpenSSH to talk to containers without asking for confirmation
+    apt-get install -y --no-install-recommends \
+        openssh-client \
+        openssh-server
+    mkdir -p /var/run/sshd
+    cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new
+    echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new
+    mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+    # Configure OpenSSH so that nodes can communicate with each other
+    mkdir -p /var/run/sshd
+    sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+    rm -rf /root/.ssh/
+    mkdir -p /root/.ssh/
+    ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa
+    cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
+    printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
+        
+    # Remove build time tools
+    # apt-get remove -y
+    #     curl
+    #     build-essential
+    #     cmake
+    #     git
+    
+    # Cleanup
+    apt-get clean
+    apt-get autoremove -y
+    rm -rf /var/lib/apt/lists/*
+    ldconfig
+    check_libnccl_net_so
+}
+
+# idiomatic parameter and option handling in sh
+while test $# -gt 0
+do
+    case "$1" in
+    [0-9].[0-9]*.[0-9]*) install_efa $1; 
+        ;;
+    *) echo "bad argument $1"; exit 1
+        ;;
+    esac
+    shift
+done
diff --git a/scripts/setup_oss_compliance.sh b/scripts/setup_oss_compliance.sh
new file mode 100755
index 000000000000..426f8fb52f63
--- /dev/null
+++ b/scripts/setup_oss_compliance.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+set -ex
+
+function install_oss_compliance {
+    HOME_DIR="/root"
+    PYTHON=$1
+
+    if [ -z "$PYTHON" ]; then
+        echo "Python version not specified. Using default Python."
+        PYTHON="python3"
+    fi
+    curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip
+    ${PYTHON} -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')"
+    cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance
+    chmod +x /usr/local/bin/testOSSCompliance
+    chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh
+    ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON}
+    rm -rf ${HOME_DIR}/oss_compliance*
+    rm -rf /tmp/tmp*
+    # Removing the cache as it is needed for security verification
+    rm -rf /root/.cache | true
+}
+
+while test $# -gt 0
+do
+    case "$1" in
+        python*) install_oss_compliance $1;
+            ;;
+        *) echo "bad argument $1"; exit 1
+            ;;
+    esac
+    shift
+done
\ No newline at end of file
diff --git a/scripts/telemetry/bash_telemetry.sh b/scripts/telemetry/bash_telemetry.sh
new file mode 100755
index 000000000000..390000bacfca
--- /dev/null
+++ b/scripts/telemetry/bash_telemetry.sh
@@ -0,0 +1,11 @@
+# telemetry.sh
+#!/bin/bash
+if [ -f /usr/local/bin/deep_learning_container.py ] && [[ -z "${OPT_OUT_TRACKING}" || "${OPT_OUT_TRACKING,,}" != "true" ]]; then
+    (
+        python /usr/local/bin/deep_learning_container.py \
+            --framework "${FRAMEWORK}" \
+            --framework-version "${FRAMEWORK_VERSION}" \
+            --container-type "${CONTAINER_TYPE}" \
+            &>/dev/null &
+    )
+fi
diff --git a/scripts/telemetry/deep_learning_container.py b/scripts/telemetry/deep_learning_container.py
new file mode 100755
index 000000000000..35e730d745d6
--- /dev/null
+++ b/scripts/telemetry/deep_learning_container.py
@@ -0,0 +1,395 @@
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+import argparse
+import json
+import logging
+import multiprocessing
+import os
+import re
+import signal
+import sys
+
+import botocore.session
+import requests
+
+TIMEOUT_SECS = 5
+REGION_MAPPING = {
+    "ap-northeast-1": "ddce303c",
+    "ap-northeast-2": "528c8d92",
+    "ap-southeast-1": "c35f9f00",
+    "ap-southeast-2": "d2add9c0",
+    "ap-south-1": "9deb4123",
+    "ca-central-1": "b95e2bf4",
+    "eu-central-1": "bfec3957",
+    "eu-north-1": "b453c092",
+    "eu-west-1": "d763c260",
+    "eu-west-2": "ea20d193",
+    "eu-west-3": "1894043c",
+    "sa-east-1": "030b4357",
+    "us-east-1": "487d6534",
+    "us-east-2": "72252b46",
+    "us-west-1": "d02c1125",
+    "us-west-2": "d8c0d063",
+    "af-south-1": "08ea8dc5",
+    "eu-south-1": "29566eac",
+    "me-south-1": "7ea07793",
+    "ap-southeast-7": "1699f14f",
+    "ap-southeast-3": "be0a3174",
+    "me-central-1": "6e06aaeb",
+    "ap-east-1": "5e1fbf92",
+    "ap-south-2": "50209442",
+    "ap-northeast-3": "fa298003",
+    "ap-southeast-5": "5852cd87",
+    "us-northeast-1": "bbf9e961",
+    "ap-southeast-4": "dc6f76ce",
+    "mx-central-1": "ed0da79c",
+    "il-central-1": "2fb2448e",
+    "ap-east-2": "8947749e",
+    "ca-west-1": "ea83ea06",
+    "eu-south-2": "df2c9d70",
+    "eu-central-2": "aa7aabcc",
+}
+
+
+def requests_helper(url, headers=None, timeout=0.1):
+    """
+    Requests to get instance metadata using imdsv1 and imdsv2
+    :param url: str, url to get the request
+    :param headers: str, headers needed to make a request
+    :param timeout: float, timeout value for a request
+    """
+    response = None
+    try:
+        if headers:
+            response = requests.get(url, headers=headers, timeout=timeout)
+        else:
+            response = requests.get(url, timeout=timeout)
+
+    except requests.exceptions.RequestException as e:
+        logging.error("Request exception: {}".format(e))
+
+    return response
+
+
+def requests_helper_imds(url, token=None):
+    """
+    Requests to get instance metadata using imdsv1 and imdsv2
+    :param url: str, url to get the request
+    :param token: str, token is needed to use imdsv2
+    """
+    response_text = None
+    response = None
+    headers = None
+    if token:
+        headers = {"X-aws-ec2-metadata-token": token}
+    timeout = 1
+    try:
+        while timeout <= 3:
+            if headers:
+                response = requests.get(url, headers=headers, timeout=timeout)
+            else:
+                response = requests.get(url, timeout=timeout)
+            if response:
+                break
+            timeout += 1
+
+    except requests.exceptions.RequestException as e:
+        logging.error("Request exception: {}".format(e))
+
+    if response is not None and not (400 <= response.status_code < 600):
+        response_text = response.text
+
+    return response_text
+
+
+def get_imdsv2_token():
+    """
+    Retrieve token using imdsv2 service
+    """
+    response = None
+    token = None
+    headers = {"X-aws-ec2-metadata-token-ttl-seconds": "600"}
+    url = "http://169.254.169.254/latest/api/token"
+    timeout = 1
+
+    try:
+        while timeout <= 3:
+            response = requests.put(url, headers=headers, timeout=timeout)
+            if response:
+                break
+            timeout += 1
+    except requests.exceptions.RequestException as e:
+        logging.error("Request exception: {}".format(e))
+
+    if response is not None and not (400 <= response.status_code < 600):
+        token = response.text
+
+    return token
+
+
+def _validate_instance_id(instance_id):
+    """
+    Validate instance ID
+    """
+    instance_id_regex = r"^(i-\S{17})"
+    compiled_regex = re.compile(instance_id_regex)
+    match = compiled_regex.match(instance_id)
+
+    if not match:
+        return None
+
+    return match.group(1)
+
+
+def _retrieve_instance_id(token=None):
+    """
+    Retrieve instance ID from instance metadata service
+    """
+    instance_id = None
+    instance_url = "http://169.254.169.254/latest/meta-data/instance-id"
+
+    if token:
+        instance_id = requests_helper_imds(instance_url, token)
+    else:
+        instance_id = requests_helper_imds(instance_url)
+
+    if instance_id:
+        instance_id = _validate_instance_id(instance_id)
+
+    return instance_id
+
+
+def _retrieve_instance_region(token=None):
+    """
+    Retrieve instance region from instance metadata service
+    """
+    region = None
+    response_json = None
+
+    region_url = "http://169.254.169.254/latest/dynamic/instance-identity/document"
+
+    if token:
+        response_text = requests_helper_imds(region_url, token)
+    else:
+        response_text = requests_helper_imds(region_url)
+
+    if response_text:
+        response_json = json.loads(response_text)
+
+        if response_json["region"] in REGION_MAPPING:
+            region = response_json["region"]
+
+    return region
+
+
+def _retrieve_device():
+    return (
+        "gpu"
+        if os.path.isdir("/usr/local/cuda")
+        else (
+            "eia"
+            if os.path.isdir("/opt/ei_tools")
+            else (
+                "neuron"
+                if os.path.exists("/usr/local/bin/tensorflow_model_server_neuron")
+                else "cpu"
+            )
+        )
+    )
+
+
+def _retrieve_cuda():
+    cuda_version = ""
+    try:
+        cuda_path = os.path.basename(os.readlink("/usr/local/cuda"))
+        cuda_version_search = re.search(r"\d+\.\d+", cuda_path)
+        cuda_version = "" if not cuda_version_search else cuda_version_search.group()
+    except Exception as e:
+        logging.error(f"Failed to get cuda path: {e}")
+    return cuda_version
+
+
+def _retrieve_os():
+    version = ""
+    name = ""
+    with open("/etc/os-release", "r") as f:
+        for line in f.readlines():
+            if re.match(r"^ID=\w+$", line):
+                name = re.search(r"^ID=(\w+)$", line).group(1)
+            if re.match(r'^VERSION_ID="\d+\.\d+"$', line):
+                version = re.search(r'^VERSION_ID="(\d+\.\d+)"$', line).group(1)
+    return name + version
+
+
+def parse_args():
+    """
+    Parsing function to parse input arguments.
+    Return: args, which containers parsed input arguments.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--framework",
+        choices=["tensorflow", "mxnet", "pytorch", "base", "vllm"],
+        help="framework of container image.",
+        required=True,
+    )
+    parser.add_argument(
+        "--framework-version", help="framework version of container image.", required=True
+    )
+    parser.add_argument(
+        "--container-type",
+        choices=["training", "inference", "general"],
+        help="What kind of jobs you want to run on container. Either training or inference.",
+        required=True,
+    )
+
+    args, _unknown = parser.parse_known_args()
+
+    fw_version_pattern = r"\d+(\.\d+){1,2}(-rc\d)?"
+
+    # PT 1.10 and above has +cpu or +cu113 string, so handle accordingly
+    if args.framework == "pytorch":
+        pt_fw_version_pattern = r"(\d+(\.\d+){1,2}(-rc\d)?)((\+cpu)|(\+cu\d{3})|(a0\+git\w{7}))"
+        pt_fw_version_match = re.fullmatch(pt_fw_version_pattern, args.framework_version)
+        if pt_fw_version_match:
+            args.framework_version = pt_fw_version_match.group(1)
+    assert re.fullmatch(fw_version_pattern, args.framework_version), (
+        f"args.framework_version = {args.framework_version} does not match {fw_version_pattern}\n"
+        f"Please specify framework version as X.Y.Z or X.Y."
+    )
+    # TFS 2.12.1 still uses TF 2.12.0 and breaks the telemetry check as it is checking TF version
+    # instead of TFS version. WE are forcing the version we want.
+    if (
+        args.framework == "tensorflow"
+        and args.container_type == "inference"
+        and args.framework_version == "2.12.0"
+    ):
+        args.framework_version = "2.12.1"
+
+    return args
+
+
+def query_bucket(instance_id, region):
+    """
+    GET request on an empty object from an Amazon S3 bucket
+    """
+
+    response = None
+    args = parse_args()
+    framework, framework_version, container_type = (
+        args.framework,
+        args.framework_version,
+        args.container_type,
+    )
+
+    py_version = sys.version.split(" ")[0]
+
+    if instance_id is not None and region is not None:
+        url = (
+            "https://aws-deep-learning-containers-{0}.s3.{1}.amazonaws.com"
+            "/dlc-containers-{2}.txt?x-instance-id={2}&x-framework={3}&x-framework_version={4}&x-py_version={5}&x-container_type={6}".format(
+                REGION_MAPPING[region],
+                region,
+                instance_id,
+                framework,
+                framework_version,
+                py_version,
+                container_type,
+            )
+        )
+        response = requests_helper(url, timeout=0.2)
+        if os.environ.get("TEST_MODE") == str(1):
+            with open(os.path.join(os.sep, "tmp", "test_request.txt"), "w+") as rf:
+                rf.write(url)
+
+    logging.debug("Query bucket finished: {}".format(response))
+
+    return response
+
+
+def tag_instance(instance_id, region):
+    """
+    Apply instance tag on the instance that is running the container using botocore
+    """
+    args = parse_args()
+    framework, framework_version, container_type = (
+        args.framework,
+        args.framework_version,
+        args.container_type,
+    )
+    py_version = sys.version.split(" ")[0]
+    device = _retrieve_device()
+    cuda_version = f"_cuda{_retrieve_cuda()}" if device == "gpu" else ""
+    os_version = _retrieve_os()
+
+    tag = f"{framework}_{container_type}_{framework_version}_python{py_version}_{device}{cuda_version}_{os_version}"
+    tag_struct = {"Key": "aws-dlc-autogenerated-tag-do-not-delete", "Value": tag}
+
+    request_status = None
+    if instance_id and region:
+        try:
+            session = botocore.session.get_session()
+            ec2_client = session.create_client("ec2", region_name=region)
+            response = ec2_client.create_tags(Resources=[instance_id], Tags=[tag_struct])
+            request_status = response.get("ResponseMetadata").get("HTTPStatusCode")
+            if os.environ.get("TEST_MODE") == str(1):
+                with open(os.path.join(os.sep, "tmp", "test_tag_request.txt"), "w+") as rf:
+                    rf.write(json.dumps(tag_struct, indent=4))
+        except Exception as e:
+            logging.error(f"Error. {e}")
+        logging.debug("Instance tagged successfully: {}".format(request_status))
+    else:
+        logging.error("Failed to retrieve instance_id or region")
+
+    return request_status
+
+
+def main():
+    """
+    Invoke bucket query
+    """
+    # Logs are not necessary for normal run. Remove this line while debugging.
+    logging.getLogger().disabled = True
+
+    logging.basicConfig(level=logging.ERROR)
+
+    token = None
+    instance_id = None
+    region = None
+    token = get_imdsv2_token()
+    if token:
+        instance_id = _retrieve_instance_id(token)
+        region = _retrieve_instance_region(token)
+    else:
+        instance_id = _retrieve_instance_id()
+        region = _retrieve_instance_region()
+
+    bucket_process = multiprocessing.Process(target=query_bucket, args=(instance_id, region))
+    tag_process = multiprocessing.Process(target=tag_instance, args=(instance_id, region))
+
+    bucket_process.start()
+    tag_process.start()
+
+    tag_process.join(TIMEOUT_SECS)
+    bucket_process.join(TIMEOUT_SECS)
+
+    if tag_process.is_alive():
+        os.kill(tag_process.pid, signal.SIGKILL)
+        tag_process.join()
+    if bucket_process.is_alive():
+        os.kill(bucket_process.pid, signal.SIGKILL)
+        bucket_process.join()
+
+
+if __name__ == "__main__":
+    main()

From 50e9793f741219a39747c08b41a0231653b098c1 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 15:06:13 -0800
Subject: [PATCH 02/63] fix action

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 81b28112ea14..949b8b3f734f 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -47,8 +47,8 @@ jobs:
           DATE=$(date +"%Y-%m-%d")
           COMMIT_REF=$(git rev-parse --short HEAD)
           DOCKER_BUILDKIT=1 docker build --progress plain \
-            --build-arg CACHE_REFRESH=${DATE} \
-            --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-${COMMIT_REF} \
+            --build-arg CACHE_REFRESH="${DATE}" \
+            --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-"${COMMIT_REF}" \
             --target vllm-rayserve-ec2 \
             -f docker/vllm/Dockerfile.rayserve .
           docker image ls

From 47e7bf6a28d4acfba3d48ae6326e5f1feee5a3bc Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 15:15:36 -0800
Subject: [PATCH 03/63] using long commit ref

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 949b8b3f734f..e67ef20351b2 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -45,7 +45,7 @@ jobs:
         shell: bash
         run: |
           DATE=$(date +"%Y-%m-%d")
-          COMMIT_REF=$(git rev-parse --short HEAD)
+          COMMIT_REF=$(git rev-parse HEAD)
           DOCKER_BUILDKIT=1 docker build --progress plain \
             --build-arg CACHE_REFRESH="${DATE}" \
             --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-"${COMMIT_REF}" \

From f3e7416dc5a7f049ef6b2f536ceeff82e56b091c Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 15:57:17 -0800
Subject: [PATCH 04/63] install/update uv only if not already installed

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/runner_setup.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/runner_setup.sh b/.github/scripts/runner_setup.sh
index bb1b7d0976b4..e6eb3aecd88e 100755
--- a/.github/scripts/runner_setup.sh
+++ b/.github/scripts/runner_setup.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 set -e
 
-curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
-uv self update
+if ! command -v uv &> /dev/null; then
+    curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
+    uv self update
+fi
 docker --version

From 96d976b59a312ea0f532ad09be0c344d8ccdf12e Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 16:16:12 -0800
Subject: [PATCH 05/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index e67ef20351b2..32d33daf2328 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -46,12 +46,17 @@ jobs:
         run: |
           DATE=$(date +"%Y-%m-%d")
           COMMIT_REF=$(git rev-parse HEAD)
+          REPO=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci
+          TAG=vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}-"${COMMIT_REF}"
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           DOCKER_BUILDKIT=1 docker build --progress plain \
             --build-arg CACHE_REFRESH="${DATE}" \
-            --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-"${COMMIT_REF}" \
+            --tag "${REPO}":"${TAG}" \
             --target vllm-rayserve-ec2 \
             -f docker/vllm/Dockerfile.rayserve .
-          docker image ls
+          docker push "${REPO}":"${TAG}"
+          docker rmi "${REPO}":"${TAG}"
+          yes | docker system prune
 
   example-on-g6xl-runner-1:
     needs: [example-on-build-runner]

From e21334c31d3df4400784d990edf709a6c577ae16 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 16:56:45 -0800
Subject: [PATCH 06/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 32d33daf2328..f9d3fbb6c313 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -44,20 +44,16 @@ jobs:
       - name: build vllm-rayserve-ec2 image
         shell: bash
         run: |
-          DATE=$(date +"%Y-%m-%d")
-          COMMIT_REF=$(git rev-parse HEAD)
-          REPO=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci
-          TAG=vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}-"${COMMIT_REF}"
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+          IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
           DOCKER_BUILDKIT=1 docker build --progress plain \
-            --build-arg CACHE_REFRESH="${DATE}" \
-            --tag "${REPO}":"${TAG}" \
+            --build-arg CACHE_REFRESH=$(date +"%Y-%m-%d_%H") \
+            --tag "$IMAGE_TAG" \
             --target vllm-rayserve-ec2 \
             -f docker/vllm/Dockerfile.rayserve .
-          docker push "${REPO}":"${TAG}"
-          docker rmi "${REPO}":"${TAG}"
-          yes | docker system prune
-
+          docker push "$IMAGE_TAG"
+          docker rmi "$IMAGE_TAG"
+          
   example-on-g6xl-runner-1:
     needs: [example-on-build-runner]
     runs-on:

From 082f67c609362b7121bb1a120e1ef68efe40d749 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 17:01:48 -0800
Subject: [PATCH 07/63] fix actionlint

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index f9d3fbb6c313..35e2d6359fdb 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -47,7 +47,7 @@ jobs:
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
           DOCKER_BUILDKIT=1 docker build --progress plain \
-            --build-arg CACHE_REFRESH=$(date +"%Y-%m-%d_%H") \
+            --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \
             --tag "$IMAGE_TAG" \
             --target vllm-rayserve-ec2 \
             -f docker/vllm/Dockerfile.rayserve .

From a82924d2ee29162deca033e88687ba6372380c40 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 17:28:37 -0800
Subject: [PATCH 08/63] try inline cache

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 35e2d6359fdb..a0692b324d74 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -47,7 +47,10 @@ jobs:
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
           DOCKER_BUILDKIT=1 docker build --progress plain \
+            --build-arg BUILDKIT_INLINE_CACHE=1 \
             --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \
+            --cache-to=type=inline \
+            --cache-from=type=registry,ref="$IMAGE_TAG" \
             --tag "$IMAGE_TAG" \
             --target vllm-rayserve-ec2 \
             -f docker/vllm/Dockerfile.rayserve .

From d82b4a1a1bc34fa2b804a05e8de7785b4b0728c6 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 17:33:25 -0800
Subject: [PATCH 09/63] fix

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index a0692b324d74..24092d131525 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -47,7 +47,6 @@ jobs:
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
           DOCKER_BUILDKIT=1 docker build --progress plain \
-            --build-arg BUILDKIT_INLINE_CACHE=1 \
             --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \
             --cache-to=type=inline \
             --cache-from=type=registry,ref="$IMAGE_TAG" \

From c7d65bc18542d7142d219564466da00aceebeda5 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 17:41:05 -0800
Subject: [PATCH 10/63] use buildx

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 24092d131525..00287d427eb7 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -46,7 +46,7 @@ jobs:
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
-          DOCKER_BUILDKIT=1 docker build --progress plain \
+          docker buildx build --progress plain \
             --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \
             --cache-to=type=inline \
             --cache-from=type=registry,ref="$IMAGE_TAG" \

From 09bfc6319cddf46181b12fe8df13ccebbbe35c3d Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Fri, 7 Nov 2025 17:51:02 -0800
Subject: [PATCH 11/63] per day cache refresh

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 00287d427eb7..f9f7637197aa 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -47,7 +47,7 @@ jobs:
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
           docker buildx build --progress plain \
-            --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \
+            --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
             --cache-to=type=inline \
             --cache-from=type=registry,ref="$IMAGE_TAG" \
             --tag "$IMAGE_TAG" \

From 8a2108768d72788cf7c24f7d2a76cd76aaa204f9 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 08:30:42 -0800
Subject: [PATCH 12/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 60 ++++++++++++++++----------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index f9f7637197aa..1f4c393af66d 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -12,9 +12,11 @@ concurrency:
   group: pr-${{ github.event.pull_request.number }}
   cancel-in-progress: true
 
-jobs:  
-  pre-commit:
+jobs:    
+  check-changes:
     runs-on: ubuntu-latest
+    outputs:
+      vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }}
     steps:
       - uses: actions/checkout@v5
       - uses: actions/setup-python@v6
@@ -23,25 +25,28 @@ jobs:
       - uses: pre-commit/action@v3.0.1
         with:
           extra_args: --all-files
+      - name: Detect file changes
+        id: changes
+        uses: dorny/paths-filter@v3
+        with:
+          filters: |
+            vllm-rayserve-ec2:
+              - "docker/vllm/Dockerfile.rayserve"
   
-  example-on-default-runner:
-    needs: [pre-commit]
-    runs-on:
-      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-    steps:
-      - uses: actions/checkout@v5
-      - run: .github/scripts/runner_setup.sh
-  
-  example-on-build-runner:
-    needs: [example-on-default-runner]
+  build-vllm-rayserve-ec2-image:
+    needs: [check-changes]
+    if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-build-runner
+    outputs:
+      image_uri: ${{ steps.build.outputs.image_uri }}
     steps:
       - uses: actions/checkout@v5
       - run: .github/scripts/runner_setup.sh
       - run: .github/scripts/buildkitd.sh
-      - name: build vllm-rayserve-ec2 image
+      - name: Build vllm-rayserve-ec2 image
+        id: build
         shell: bash
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
@@ -55,25 +60,20 @@ jobs:
             -f docker/vllm/Dockerfile.rayserve .
           docker push "$IMAGE_TAG"
           docker rmi "$IMAGE_TAG"
-          
-  example-on-g6xl-runner-1:
-    needs: [example-on-build-runner]
-    runs-on:
-      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-        fleet:x86-g6xl-runner
-    steps:
-      - uses: actions/checkout@v5
-      - run: .github/scripts/runner_setup.sh
-      - run: |
-          nvidia-smi
-  
-  example-on-g6xl-runner-2:
-    needs: [example-on-build-runner]
+          echo "image_uri="$IMAGE_TAG"" >> $GITHUB_OUTPUT
+
+  test-vllm-rayserve-ec2-image:
+    needs: [build-vllm-rayserve-ec2-image]
+    if: needs.build-vllm-rayserve-ec2-image.result == 'success'
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
     steps:
       - uses: actions/checkout@v5
-      - run: .github/scripts/runner_setup.sh
-      - run: |
-          nvidia-smi
+      - name: Use built image
+        run: |
+          IMAGE_URI=${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}
+          echo "Testing image: $IMAGE_URI"
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+          docker pull "$IMAGE_URI"
+          docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')"

From 031a0e80fcf93f27f8e9113f941eb937bf799427 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 08:32:58 -0800
Subject: [PATCH 13/63] fix

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 1f4c393af66d..f77cd7423f51 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -60,7 +60,7 @@ jobs:
             -f docker/vllm/Dockerfile.rayserve .
           docker push "$IMAGE_TAG"
           docker rmi "$IMAGE_TAG"
-          echo "image_uri="$IMAGE_TAG"" >> $GITHUB_OUTPUT
+          echo "image_uri=$IMAGE_TAG" >> "$GITHUB_OUTPUT"
 
   test-vllm-rayserve-ec2-image:
     needs: [build-vllm-rayserve-ec2-image]

From df2d590cd30b143ffff663d4ed7ebd01c14ea941 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 09:44:48 -0800
Subject: [PATCH 14/63] test

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 64 ++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index f77cd7423f51..33d36f206d7d 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -40,7 +40,7 @@ jobs:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-build-runner
     outputs:
-      image_uri: ${{ steps.build.outputs.image_uri }}
+      image_uri: ${{ steps.export.outputs.image_uri }}
     steps:
       - uses: actions/checkout@v5
       - run: .github/scripts/runner_setup.sh
@@ -60,20 +60,60 @@ jobs:
             -f docker/vllm/Dockerfile.rayserve .
           docker push "$IMAGE_TAG"
           docker rmi "$IMAGE_TAG"
-          echo "image_uri=$IMAGE_TAG" >> "$GITHUB_OUTPUT"
+          echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV
+      - name: Export output
+        id: export
+        run: echo "image_uri=$IMAGE_TAG" >> $GITHUB_OUTPUT
+      - name: Debug local
+        run: |
+          echo "Local GITHUB_OUTPUT contents:"
+          cat $GITHUB_OUTPUT || echo "No output file found"
+          echo "Local step output: ${{ steps.export.outputs.image_uri }}"
 
-  test-vllm-rayserve-ec2-image:
+  test-job1:
     needs: [build-vllm-rayserve-ec2-image]
-    if: needs.build-vllm-rayserve-ec2-image.result == 'success'
-    runs-on:
+    runs-on: 
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-build-runner
+    if: always()
+    steps:
+      - name: Check received output
+        run: |
+          echo "Received job output: ${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}"
+          if [ -z "${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" ]; then
+            echo "❌ Output is missing!"
+          else
+            echo "✅ Output received successfully."
+          fi
+
+  test-job2:
+    needs: [build-vllm-rayserve-ec2-image]
+    runs-on: 
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
+    if: always()
     steps:
-      - uses: actions/checkout@v5
-      - name: Use built image
+      - name: Check received output
         run: |
-          IMAGE_URI=${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}
-          echo "Testing image: $IMAGE_URI"
-          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
-          docker pull "$IMAGE_URI"
-          docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')"
+          echo "Received job output: ${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}"
+          if [ -z "${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" ]; then
+            echo "❌ Output is missing!"
+          else
+            echo "✅ Output received successfully."
+          fi
+
+  # test-vllm-rayserve-ec2-image:
+  #   needs: [build-vllm-rayserve-ec2-image]
+  #   if: needs.build-vllm-rayserve-ec2-image.result == 'success'
+  #   runs-on:
+  #     - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+  #       fleet:x86-g6xl-runner
+  #   steps:
+  #     - uses: actions/checkout@v5
+  #     - name: Use built image
+  #       run: |
+  #         IMAGE_URI=${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}
+  #         echo "Testing image: $IMAGE_URI"
+  #         aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+  #         docker pull "$IMAGE_URI"
+  #         docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')"

From 2d594069e7da50f88a912617a5fe88aa1fdffd2e Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 09:53:19 -0800
Subject: [PATCH 15/63] fix

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .pre-commit-config.yaml | 1 +
 .shellcheckrc           | 1 +
 DEVELOPMENT.md          | 1 +
 3 files changed, 3 insertions(+)
 create mode 100644 .shellcheckrc

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 63f60f47f387..e343734a2aaf 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -48,6 +48,7 @@ repos:
     rev: v1.7.7
     hooks:
     - id: actionlint
+      args: ["-shellcheck=enable=all"]
   - repo: https://github.com/crate-ci/typos
     rev: v1.38.1
     hooks:
diff --git a/.shellcheckrc b/.shellcheckrc
new file mode 100644
index 000000000000..cf179afb4534
--- /dev/null
+++ b/.shellcheckrc
@@ -0,0 +1 @@
+disable=SC2086
\ No newline at end of file
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index 55fefbd3a911..95ac91415950 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -33,6 +33,7 @@ Install go using [homebrew](https://brew.sh/), below example assume on Mac.
 ```bash
 brew install go
 go env -w GOPROXY=direct
+brew install shellcheck
 ```
 
 To manually run all linters:

From 75a8f1ad45d827177a7461df1f562f7660dff128 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 10:24:57 -0800
Subject: [PATCH 16/63] try artifact

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 77 ++++++++++----------------------
 1 file changed, 24 insertions(+), 53 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 33d36f206d7d..1dc056d83119 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -39,8 +39,6 @@ jobs:
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-build-runner
-    outputs:
-      image_uri: ${{ steps.export.outputs.image_uri }}
     steps:
       - uses: actions/checkout@v5
       - run: .github/scripts/runner_setup.sh
@@ -60,60 +58,33 @@ jobs:
             -f docker/vllm/Dockerfile.rayserve .
           docker push "$IMAGE_TAG"
           docker rmi "$IMAGE_TAG"
-          echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV
-      - name: Export output
-        id: export
-        run: echo "image_uri=$IMAGE_TAG" >> $GITHUB_OUTPUT
-      - name: Debug local
-        run: |
-          echo "Local GITHUB_OUTPUT contents:"
-          cat $GITHUB_OUTPUT || echo "No output file found"
-          echo "Local step output: ${{ steps.export.outputs.image_uri }}"
-
-  test-job1:
-    needs: [build-vllm-rayserve-ec2-image]
-    runs-on: 
-      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-        fleet:x86-build-runner
-    if: always()
-    steps:
-      - name: Check received output
-        run: |
-          echo "Received job output: ${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}"
-          if [ -z "${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" ]; then
-            echo "❌ Output is missing!"
-          else
-            echo "✅ Output received successfully."
-          fi
+          echo "$IMAGE_TAG" > image_uri.txt
+      - name: Upload image URI
+        uses: actions/upload-artifact@v4
+        with:
+          name: vllm-rayserve-ec2-image-uri
+          path: image_uri.txt
 
-  test-job2:
+  test-vllm-rayserve-ec2-image:
     needs: [build-vllm-rayserve-ec2-image]
-    runs-on: 
+    if: needs.build-vllm-rayserve-ec2-image.result == 'success'
+    runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
-    if: always()
     steps:
-      - name: Check received output
+      - uses: actions/checkout@v5
+      - name: Download image URI
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-rayserve-ec2-image-uri
+      - name: Read image URI
+        id: read
         run: |
-          echo "Received job output: ${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}"
-          if [ -z "${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" ]; then
-            echo "❌ Output is missing!"
-          else
-            echo "✅ Output received successfully."
-          fi
-
-  # test-vllm-rayserve-ec2-image:
-  #   needs: [build-vllm-rayserve-ec2-image]
-  #   if: needs.build-vllm-rayserve-ec2-image.result == 'success'
-  #   runs-on:
-  #     - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-  #       fleet:x86-g6xl-runner
-  #   steps:
-  #     - uses: actions/checkout@v5
-  #     - name: Use built image
-  #       run: |
-  #         IMAGE_URI=${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}
-  #         echo "Testing image: $IMAGE_URI"
-  #         aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
-  #         docker pull "$IMAGE_URI"
-  #         docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')"
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+          echo "Resolved image URI: $IMAGE_URI"
+      - name: Test image
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+          docker pull "$IMAGE_URI"
+          docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')"

From 65975f71e98db0839e6158543b82d925f6f100da Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 10:32:40 -0800
Subject: [PATCH 17/63] update docker command

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 1dc056d83119..9e311dfb3812 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -87,4 +87,4 @@ jobs:
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           docker pull "$IMAGE_URI"
-          docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')"
+          docker run --rm "$IMAGE_URI" --entrypoint python -c "import torch; import ray; import vllm; print('Test OK')"

From ff4725e0879a5e1829f8c065496c3db44dd1fc62 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 10:37:09 -0800
Subject: [PATCH 18/63] fix command

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 9e311dfb3812..6b722eaa86c2 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -87,4 +87,4 @@ jobs:
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           docker pull "$IMAGE_URI"
-          docker run --rm "$IMAGE_URI" --entrypoint python -c "import torch; import ray; import vllm; print('Test OK')"
+          docker run --rm --gpus=all --entrypoint /bin/bash "$IMAGE_URI" --entrypoint python -c "import torch; import ray; import vllm; print('Test OK')"

From 3dd1a99d9c1f010454f36a2d2af9fd5a0158d131 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 10:42:34 -0800
Subject: [PATCH 19/63] fix command

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 6b722eaa86c2..1cc4bd574fea 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -87,4 +87,5 @@ jobs:
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           docker pull "$IMAGE_URI"
-          docker run --rm --gpus=all --entrypoint /bin/bash "$IMAGE_URI" --entrypoint python -c "import torch; import ray; import vllm; print('Test OK')"
+          docker run --rm --gpus=all $IMAGE_URI --entrypoint /bin/bash \
+          -c "python -c 'import vllm; print(vllm.__version__)'"

From 872029d121225a48dab1e9c37eecb9e1afe64004 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 10:47:44 -0800
Subject: [PATCH 20/63] fix entrypoint

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 1cc4bd574fea..b3db25e8985a 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -87,5 +87,7 @@ jobs:
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           docker pull "$IMAGE_URI"
-          docker run --rm --gpus=all $IMAGE_URI --entrypoint /bin/bash \
-          -c "python -c 'import vllm; print(vllm.__version__)'"
+          docker run --rm --gpus=all \
+            --entrypoint /bin/bash \
+            "$IMAGE_URI" \
+            -c "python -c 'import vllm; print(vllm.__version__)'"

From fadf714b26227dfb4350f44467308a30c590d57d Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 11:17:16 -0800
Subject: [PATCH 21/63] update test

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 40 ++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index b3db25e8985a..773961c2c915 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -85,9 +85,41 @@ jobs:
           echo "Resolved image URI: $IMAGE_URI"
       - name: Test image
         run: |
+          # Download ShareGPT dataset if it doesn't exist
+          mkdir -p ${HOME}/dataset
+          if [ ! -f ${HOME}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
+              echo "Downloading ShareGPT dataset..."
+              wget -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
+          else
+              echo "ShareGPT dataset already exists. Skipping download."
+          fi
+          
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           docker pull "$IMAGE_URI"
-          docker run --rm --gpus=all \
-            --entrypoint /bin/bash \
-            "$IMAGE_URI" \
-            -c "python -c 'import vllm; print(vllm.__version__)'"
+          CONTAINER_NAME=vllm-rayserve
+          docker stop ${CONTAINER_NAME} || true
+          docker rm -f ${CONTAINER_NAME} || true
+          docker run --name ${CONTAINER_NAME} \
+              -d --gpus=all --entrypoint /bin/bash \
+              -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+              -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+              -v ${HOME}/dataset:/dataset \
+              -e "HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
+              ${IMAGE_URI} \
+              -c "vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3"
+          sleep 60
+          docker logs ${CONTAINER_NAME}
+          
+          # run serving benchmark
+          echo "start running serving benchmark workflow..."
+          docker exec ${CONTAINER_NAME} vllm bench serve \
+              --backend vllm \
+              --model Qwen/Qwen3-0.6B \
+              --dataset-name sharegpt \
+              --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \
+              --num-prompts 1000
+
+          # cleanup container
+          docker stop ${CONTAINER_NAME}
+          docker rm -f ${CONTAINER_NAME}
+

From 557e649628da03d4d4bf987137f32e1ca6795e05 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 11:22:25 -0800
Subject: [PATCH 22/63] fix command

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 773961c2c915..650ef0535c59 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -89,8 +89,7 @@ jobs:
           mkdir -p ${HOME}/dataset
           if [ ! -f ${HOME}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
               echo "Downloading ShareGPT dataset..."
-              wget -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
-          else
+              wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json          else
               echo "ShareGPT dataset already exists. Skipping download."
           fi
           
@@ -104,7 +103,7 @@ jobs:
               -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
               -v ${HOME}/.cache/vllm:/root/.cache/vllm \
               -v ${HOME}/dataset:/dataset \
-              -e "HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
+              -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
               ${IMAGE_URI} \
               -c "vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3"
           sleep 60

From 58aa567e32660bd1520c1d93343a60bfad8d0d5f Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 13:27:43 -0800
Subject: [PATCH 23/63] checkout vllm

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-example.yml | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml
index 650ef0535c59..02dbc4d335bd 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-example.yml
@@ -121,4 +121,18 @@ jobs:
           # cleanup container
           docker stop ${CONTAINER_NAME}
           docker rm -f ${CONTAINER_NAME}
-
+      
+      - name: Checkout vLLM v0.10.2
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.10.2
+          path: vllm
+      
+      - name: Verify vLLM repository is cloned
+        run: |
+          echo "Checking vLLM working directory..."
+          ls -la vllm
+          test -d vllm || (echo "❌ vllm directory not found!" && exit 1)
+          test -f vllm/pyproject.toml || (echo "❌ Expected file pyproject.toml missing — clone may have failed." && exit 1)
+          echo "✅ vLLM repository cloned successfully."

From b071a75dd6a082dfd21b38a889faf19039d274af Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 13:50:53 -0800
Subject: [PATCH 24/63] update workflow

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .../{pr-example.yml => pr-vllm-rayserve.yml}  | 65 ++++++++++++-------
 1 file changed, 40 insertions(+), 25 deletions(-)
 rename .github/workflows/{pr-example.yml => pr-vllm-rayserve.yml} (82%)

diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-vllm-rayserve.yml
similarity index 82%
rename from .github/workflows/pr-example.yml
rename to .github/workflows/pr-vllm-rayserve.yml
index 02dbc4d335bd..e217ad0f17a6 100644
--- a/.github/workflows/pr-example.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -1,9 +1,11 @@
-name: Example Workflow
+name: PR - vLLM RayServe
 
 on:
   pull_request:
     branches: 
       - main
+    paths:
+      - "docker/**"
 
 permissions:
   contents: read
@@ -33,7 +35,7 @@ jobs:
             vllm-rayserve-ec2:
               - "docker/vllm/Dockerfile.rayserve"
   
-  build-vllm-rayserve-ec2-image:
+  build-vllm-rayserve-ec2:
     needs: [check-changes]
     if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
     runs-on:
@@ -65,9 +67,9 @@ jobs:
           name: vllm-rayserve-ec2-image-uri
           path: image_uri.txt
 
-  test-vllm-rayserve-ec2-image:
-    needs: [build-vllm-rayserve-ec2-image]
-    if: needs.build-vllm-rayserve-ec2-image.result == 'success'
+  test-vllm-rayserve-ec2:
+    needs: [build-vllm-rayserve-ec2]
+    if: needs.build-vllm-rayserve-ec2.result == 'success'
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
@@ -77,13 +79,44 @@ jobs:
         uses: actions/download-artifact@v4
         with:
           name: vllm-rayserve-ec2-image-uri
-      - name: Read image URI
+      
+      - name: Pull image URI
         id: read
         run: |
           IMAGE_URI=$(cat image_uri.txt)
           echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
           echo "Resolved image URI: $IMAGE_URI"
-      - name: Test image
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+          docker pull "$IMAGE_URI"
+      
+      - name: Checkout vLLM v0.10.2
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.10.2
+          path: vllm
+      
+      - name: Run vLLM Tests
+        run: |
+          CONTAINER_NAME=vllm-rayserve-test
+          docker stop ${CONTAINER_NAME} || true
+          docker rm -f ${CONTAINER_NAME} || true
+
+          docker run --name ${CONTAINER_NAME} \
+            -d --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            ${IMAGE_URI} \
+            -c "nvidia-smi"
+          
+          docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e
+          docker exec ${CONTAINER_NAME} pytest -v -s v1/engine
+
+          # cleanup container
+          docker stop ${CONTAINER_NAME}
+          docker rm -f ${CONTAINER_NAME}
+                
+      - name: Run qwen3 benchmark
         run: |
           # Download ShareGPT dataset if it doesn't exist
           mkdir -p ${HOME}/dataset
@@ -92,9 +125,6 @@ jobs:
               wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json          else
               echo "ShareGPT dataset already exists. Skipping download."
           fi
-          
-          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
-          docker pull "$IMAGE_URI"
           CONTAINER_NAME=vllm-rayserve
           docker stop ${CONTAINER_NAME} || true
           docker rm -f ${CONTAINER_NAME} || true
@@ -121,18 +151,3 @@ jobs:
           # cleanup container
           docker stop ${CONTAINER_NAME}
           docker rm -f ${CONTAINER_NAME}
-      
-      - name: Checkout vLLM v0.10.2
-        uses: actions/checkout@v5
-        with:
-          repository: vllm-project/vllm
-          ref: v0.10.2
-          path: vllm
-      
-      - name: Verify vLLM repository is cloned
-        run: |
-          echo "Checking vLLM working directory..."
-          ls -la vllm
-          test -d vllm || (echo "❌ vllm directory not found!" && exit 1)
-          test -f vllm/pyproject.toml || (echo "❌ Expected file pyproject.toml missing — clone may have failed." && exit 1)
-          echo "✅ vLLM repository cloned successfully."

From e362483157f78dc319fb608560f464d15bcdb533 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 14:40:48 -0800
Subject: [PATCH 25/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index e217ad0f17a6..9d2385061f82 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -103,12 +103,13 @@ jobs:
           docker rm -f ${CONTAINER_NAME} || true
 
           docker run --name ${CONTAINER_NAME} \
-            -d --rm --gpus=all --entrypoint /bin/bash \
+            -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
             ${IMAGE_URI} \
             -c "nvidia-smi"
           
+          docker exec ${CONTAINER_NAME} uv pip list
           docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e
           docker exec ${CONTAINER_NAME} pytest -v -s v1/engine
 

From 369551b019d129d9c40982f7fe29e7577afaafb1 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 14:45:16 -0800
Subject: [PATCH 26/63] fix

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 9d2385061f82..f64cfb2ecbd3 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -101,14 +101,15 @@ jobs:
           CONTAINER_NAME=vllm-rayserve-test
           docker stop ${CONTAINER_NAME} || true
           docker rm -f ${CONTAINER_NAME} || true
+          echo "${IMAGE_URI}"
 
           docker run --name ${CONTAINER_NAME} \
             -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-            ${IMAGE_URI} \
-            -c "nvidia-smi"
+            ${IMAGE_URI}
           
+          docker exec ${CONTAINER_NAME} nvidia-smi
           docker exec ${CONTAINER_NAME} uv pip list
           docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e
           docker exec ${CONTAINER_NAME} pytest -v -s v1/engine

From aeebfe84014437435d1162cabd83b1055817aa9a Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 15:04:27 -0800
Subject: [PATCH 27/63] try test

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index f64cfb2ecbd3..5ae8a56f25cd 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -107,10 +107,12 @@ jobs:
             -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm:/workdir --workdir /workdir \
             ${IMAGE_URI}
           
           docker exec ${CONTAINER_NAME} nvidia-smi
           docker exec ${CONTAINER_NAME} uv pip list
+          docker exec uv pip install --system -r requirements/dev.txt
           docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e
           docker exec ${CONTAINER_NAME} pytest -v -s v1/engine
 

From 18f2b64f63a87a7fcc61e3c9c80f0501c90f9704 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 15:06:35 -0800
Subject: [PATCH 28/63] fix typo

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 5ae8a56f25cd..180d802fc00a 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -112,7 +112,7 @@ jobs:
           
           docker exec ${CONTAINER_NAME} nvidia-smi
           docker exec ${CONTAINER_NAME} uv pip list
-          docker exec uv pip install --system -r requirements/dev.txt
+          docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/dev.txt
           docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e
           docker exec ${CONTAINER_NAME} pytest -v -s v1/engine
 

From 9c3bc51ee39564be13e64c8b1784b30be46cf90d Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 15:37:54 -0800
Subject: [PATCH 29/63] run basic terst

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 180d802fc00a..1fe809db9213 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -89,11 +89,11 @@ jobs:
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           docker pull "$IMAGE_URI"
       
-      - name: Checkout vLLM v0.10.2
+      - name: Checkout vLLM
         uses: actions/checkout@v5
         with:
           repository: vllm-project/vllm
-          ref: v0.10.2
+          ref: v0.11.1rc6
           path: vllm
       
       - name: Run vLLM Tests
@@ -111,10 +111,12 @@ jobs:
             ${IMAGE_URI}
           
           docker exec ${CONTAINER_NAME} nvidia-smi
-          docker exec ${CONTAINER_NAME} uv pip list
-          docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/dev.txt
-          docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e
-          docker exec ${CONTAINER_NAME} pytest -v -s v1/engine
+          docker exec ${CONTAINER_NAME} uv pip install -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+          docker exec ${CONTAINER_NAME} uv pip install pytest pytest-asyncio
+          docker exec ${CONTAINER_NAME} pytest -s -v tests/test_logger.py
+
+          # docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e
+          # docker exec ${CONTAINER_NAME} pytest -v -s v1/engine
 
           # cleanup container
           docker stop ${CONTAINER_NAME}

From 4e43405e9b87380227d41124b3ca7782575b6419 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 15:42:39 -0800
Subject: [PATCH 30/63] test

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 1fe809db9213..ff315d1ea0b0 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -111,13 +111,10 @@ jobs:
             ${IMAGE_URI}
           
           docker exec ${CONTAINER_NAME} nvidia-smi
-          docker exec ${CONTAINER_NAME} uv pip install -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
-          docker exec ${CONTAINER_NAME} uv pip install pytest pytest-asyncio
+          docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+          docker exec ${CONTAINER_NAME} uv pip install --system pytest pytest-asyncio
           docker exec ${CONTAINER_NAME} pytest -s -v tests/test_logger.py
 
-          # docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e
-          # docker exec ${CONTAINER_NAME} pytest -v -s v1/engine
-
           # cleanup container
           docker stop ${CONTAINER_NAME}
           docker rm -f ${CONTAINER_NAME}

From 94e16b6d1291ff22e4509a8b1a4a1362b8f0b164 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 15:45:06 -0800
Subject: [PATCH 31/63] use older version

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index ff315d1ea0b0..f6c1ceaf96ff 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -93,7 +93,7 @@ jobs:
         uses: actions/checkout@v5
         with:
           repository: vllm-project/vllm
-          ref: v0.11.1rc6
+          ref: v0.10.2
           path: vllm
       
       - name: Run vLLM Tests

From d5d1ff3f6563689b8c563ff19ebc82f9a4fb8ea1 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 16:29:09 -0800
Subject: [PATCH 32/63] check path

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index f6c1ceaf96ff..f7b9f4d449c8 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -110,7 +110,8 @@ jobs:
             -v ./vllm:/workdir --workdir /workdir \
             ${IMAGE_URI}
           
-          docker exec ${CONTAINER_NAME} nvidia-smi
+          docker exec ${CONTAINER_NAME} nvidia-smi && ls -l
+
           docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
           docker exec ${CONTAINER_NAME} uv pip install --system pytest pytest-asyncio
           docker exec ${CONTAINER_NAME} pytest -s -v tests/test_logger.py

From b137dea0a9e6898c060b23c365150d7c69db4a4b Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 16:57:58 -0800
Subject: [PATCH 33/63] partial clone

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index f7b9f4d449c8..b980a6038fe5 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -94,7 +94,10 @@ jobs:
         with:
           repository: vllm-project/vllm
           ref: v0.10.2
-          path: vllm
+          sparse-checkout: |
+            requirements
+            tests
+          path: vllm_tests
       
       - name: Run vLLM Tests
         run: |
@@ -107,14 +110,13 @@ jobs:
             -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-            -v ./vllm:/workdir --workdir /workdir \
+            -v vllm_tests:/workdir --workdir /workdir \
             ${IMAGE_URI}
           
-          docker exec ${CONTAINER_NAME} nvidia-smi && ls -l
-
-          docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
-          docker exec ${CONTAINER_NAME} uv pip install --system pytest pytest-asyncio
-          docker exec ${CONTAINER_NAME} pytest -s -v tests/test_logger.py
+          docker exec ${CONTAINER_NAME} nvidia-smi
+          docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+          docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system pytest pytest-asyncio
+          docker exec ${CONTAINER_NAME} cd vllm_tests && pytest -s -v tests/test_logger.py
 
           # cleanup container
           docker stop ${CONTAINER_NAME}

From 0d8b5a57264109e1edd58ca55338d7c0b75a3959 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 17:15:51 -0800
Subject: [PATCH 34/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 83 ++++++++++++--------------
 .shellcheckrc                          |  1 -
 2 files changed, 38 insertions(+), 46 deletions(-)
 delete mode 100644 .shellcheckrc

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index b980a6038fe5..9fcb1e671c78 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -54,13 +54,13 @@ jobs:
           docker buildx build --progress plain \
             --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
             --cache-to=type=inline \
-            --cache-from=type=registry,ref="$IMAGE_TAG" \
-            --tag "$IMAGE_TAG" \
+            --cache-from=type=registry,ref=$IMAGE_TAG \
+            --tag $IMAGE_TAG \
             --target vllm-rayserve-ec2 \
             -f docker/vllm/Dockerfile.rayserve .
-          docker push "$IMAGE_TAG"
-          docker rmi "$IMAGE_TAG"
-          echo "$IMAGE_TAG" > image_uri.txt
+          docker push $IMAGE_TAG
+          docker rmi $IMAGE_TAG
+          echo $IMAGE_TAG > image_uri.txt
       - name: Upload image URI
         uses: actions/upload-artifact@v4
         with:
@@ -80,15 +80,6 @@ jobs:
         with:
           name: vllm-rayserve-ec2-image-uri
       
-      - name: Pull image URI
-        id: read
-        run: |
-          IMAGE_URI=$(cat image_uri.txt)
-          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
-          echo "Resolved image URI: $IMAGE_URI"
-          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
-          docker pull "$IMAGE_URI"
-      
       - name: Checkout vLLM
         uses: actions/checkout@v5
         with:
@@ -99,29 +90,39 @@ jobs:
             tests
           path: vllm_tests
       
-      - name: Run vLLM Tests
+      - name: Pull image URI
+        id: read
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+          echo "Resolved image URI: $IMAGE_URI"
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+          docker pull "$IMAGE_URI"
+      
+      - name: Start container
+        id: start
         run: |
           CONTAINER_NAME=vllm-rayserve-test
-          docker stop ${CONTAINER_NAME} || true
-          docker rm -f ${CONTAINER_NAME} || true
-          echo "${IMAGE_URI}"
-
+          echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
           docker run --name ${CONTAINER_NAME} \
             -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
             -v vllm_tests:/workdir --workdir /workdir \
             ${IMAGE_URI}
-          
-          docker exec ${CONTAINER_NAME} nvidia-smi
-          docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
-          docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system pytest pytest-asyncio
-          docker exec ${CONTAINER_NAME} cd vllm_tests && pytest -s -v tests/test_logger.py
-
-          # cleanup container
-          docker stop ${CONTAINER_NAME}
-          docker rm -f ${CONTAINER_NAME}
-                
+      
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_NAME} sh -c '
+          set -eux
+          nvidia-smi
+          ls -la
+          cd vllm_tests
+          uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+          uv pip install --system pytest pytest-asyncio
+          pytest -s -v tests/test_logger.py
+          '
+      
       - name: Run qwen3 benchmark
         run: |
           # Download ShareGPT dataset if it doesn't exist
@@ -131,19 +132,9 @@ jobs:
               wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json          else
               echo "ShareGPT dataset already exists. Skipping download."
           fi
-          CONTAINER_NAME=vllm-rayserve
-          docker stop ${CONTAINER_NAME} || true
-          docker rm -f ${CONTAINER_NAME} || true
-          docker run --name ${CONTAINER_NAME} \
-              -d --gpus=all --entrypoint /bin/bash \
-              -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-              -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-              -v ${HOME}/dataset:/dataset \
-              -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
-              ${IMAGE_URI} \
-              -c "vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3"
+    
           sleep 60
-          docker logs ${CONTAINER_NAME}
+          # docker logs ${CONTAINER_NAME}
           
           # run serving benchmark
           echo "start running serving benchmark workflow..."
@@ -153,7 +144,9 @@ jobs:
               --dataset-name sharegpt \
               --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \
               --num-prompts 1000
-
-          # cleanup container
-          docker stop ${CONTAINER_NAME}
-          docker rm -f ${CONTAINER_NAME}
+      
+      - name: Cleanup container
+        if: always()
+        run: |
+          docker stop ${CONTAINER_NAME} || true
+          docker rm -f ${CONTAINER_NAME} || true
diff --git a/.shellcheckrc b/.shellcheckrc
deleted file mode 100644
index cf179afb4534..000000000000
--- a/.shellcheckrc
+++ /dev/null
@@ -1 +0,0 @@
-disable=SC2086
\ No newline at end of file

From f75fa3713000a2c6b843df811bc35278518919b6 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 17:24:02 -0800
Subject: [PATCH 35/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 29 +++++++++++++-------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 9fcb1e671c78..9d1a9adbd4c6 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -74,13 +74,9 @@ jobs:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
     steps:
-      - uses: actions/checkout@v5
-      - name: Download image URI
-        uses: actions/download-artifact@v4
-        with:
-          name: vllm-rayserve-ec2-image-uri
-      
-      - name: Checkout vLLM
+      - name: Checkout DLC Source
+        uses: actions/checkout@v5
+      - name: Checkout vLLM Tests
         uses: actions/checkout@v5
         with:
           repository: vllm-project/vllm
@@ -90,18 +86,19 @@ jobs:
             tests
           path: vllm_tests
       
-      - name: Pull image URI
-        id: read
+      - name: Download image URI
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-rayserve-ec2-image-uri
+      
+      - name: Start container
+        id: start
         run: |
           IMAGE_URI=$(cat image_uri.txt)
           echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
           echo "Resolved image URI: $IMAGE_URI"
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
           docker pull "$IMAGE_URI"
-      
-      - name: Start container
-        id: start
-        run: |
           CONTAINER_NAME=vllm-rayserve-test
           echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
           docker run --name ${CONTAINER_NAME} \
@@ -116,6 +113,8 @@ jobs:
           docker exec ${CONTAINER_NAME} sh -c '
           set -eux
           nvidia-smi
+          pwd
+          cd /workdir
           ls -la
           cd vllm_tests
           uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
@@ -145,8 +144,10 @@ jobs:
               --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \
               --num-prompts 1000
       
-      - name: Cleanup container
+      - name: Cleanup container and image
         if: always()
         run: |
           docker stop ${CONTAINER_NAME} || true
           docker rm -f ${CONTAINER_NAME} || true
+          docker rmi ${IMAGE_URI} || true
+          docker image ls || true

From 1ad77b4b16bfb5be6d1fc2b7892a73ab203bd2ab Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 17:33:04 -0800
Subject: [PATCH 36/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 9d1a9adbd4c6..8f4a1bc55201 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -76,29 +76,36 @@ jobs:
     steps:
       - name: Checkout DLC Source
         uses: actions/checkout@v5
+      
       - name: Checkout vLLM Tests
         uses: actions/checkout@v5
         with:
           repository: vllm-project/vllm
           ref: v0.10.2
+          path: vllm_tests
           sparse-checkout: |
             requirements
             tests
-          path: vllm_tests
+          sparse-checkout-cone-mode: false
       
       - name: Download image URI
         uses: actions/download-artifact@v4
         with:
           name: vllm-rayserve-ec2-image-uri
-      
-      - name: Start container
-        id: start
+
+      - name: Resolve image URI
         run: |
           IMAGE_URI=$(cat image_uri.txt)
-          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
           echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+      
+      - name: Pull image
+        run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
-          docker pull "$IMAGE_URI"
+          docker pull $IMAGE_URI
+      
+      - name: Start container
+        run: |
           CONTAINER_NAME=vllm-rayserve-test
           echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
           docker run --name ${CONTAINER_NAME} \

From a98f01c458a6deb6fd9d2d847624c90e60eab410 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 17:37:17 -0800
Subject: [PATCH 37/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 8f4a1bc55201..df0a613078ae 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -106,13 +106,15 @@ jobs:
       
       - name: Start container
         run: |
+          pwd
+          ls -la
           CONTAINER_NAME=vllm-rayserve-test
           echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
           docker run --name ${CONTAINER_NAME} \
             -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-            -v vllm_tests:/workdir --workdir /workdir \
+            -v ./vllm_tests:/workdir --workdir /workdir \
             ${IMAGE_URI}
       
       - name: Run vLLM Tests
@@ -121,9 +123,7 @@ jobs:
           set -eux
           nvidia-smi
           pwd
-          cd /workdir
           ls -la
-          cd vllm_tests
           uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
           uv pip install --system pytest pytest-asyncio
           pytest -s -v tests/test_logger.py

From 13a065dc84b1cdbd97efb64e1ccb707ddcfd7765 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 17:47:53 -0800
Subject: [PATCH 38/63] refactor

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 39 ++++++++++++--------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index df0a613078ae..77b7785d4b31 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -74,11 +74,8 @@ jobs:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
     steps:
-      - name: Checkout DLC Source
-        uses: actions/checkout@v5
-      
-      - name: Checkout vLLM Tests
-        uses: actions/checkout@v5
+      - uses: actions/checkout@v5
+      - uses: actions/checkout@v5
         with:
           repository: vllm-project/vllm
           ref: v0.10.2
@@ -106,26 +103,26 @@ jobs:
       
       - name: Start container
         run: |
-          pwd
-          ls -la
-          CONTAINER_NAME=vllm-rayserve-test
-          echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
-          docker run --name ${CONTAINER_NAME} \
-            -d -it --rm --gpus=all --entrypoint /bin/bash \
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
             -v ./vllm_tests:/workdir --workdir /workdir \
-            ${IMAGE_URI}
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
       
+      - name: Install Test dependencies
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+          '
+
       - name: Run vLLM Tests
         run: |
-          docker exec ${CONTAINER_NAME} sh -c '
+          docker exec ${CONTAINER_ID} sh -c '
           set -eux
           nvidia-smi
-          pwd
-          ls -la
-          uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
-          uv pip install --system pytest pytest-asyncio
           pytest -s -v tests/test_logger.py
           '
       
@@ -140,11 +137,10 @@ jobs:
           fi
     
           sleep 60
-          # docker logs ${CONTAINER_NAME}
           
           # run serving benchmark
           echo "start running serving benchmark workflow..."
-          docker exec ${CONTAINER_NAME} vllm bench serve \
+          docker exec ${CONTAINER_ID} vllm bench serve \
               --backend vllm \
               --model Qwen/Qwen3-0.6B \
               --dataset-name sharegpt \
@@ -154,7 +150,8 @@ jobs:
       - name: Cleanup container and image
         if: always()
         run: |
-          docker stop ${CONTAINER_NAME} || true
-          docker rm -f ${CONTAINER_NAME} || true
+          docker stop ${CONTAINER_ID} || true
+          docker rm -f ${CONTAINER_ID} || true
           docker rmi ${IMAGE_URI} || true
           docker image ls || true
+          docker system prune -af

From b75b9246d9cdbad6f97ea238c799fc66b3bc2a55 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 17:53:55 -0800
Subject: [PATCH 39/63] add dataset path

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 77b7785d4b31..fd26f02db89a 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -106,6 +106,7 @@ jobs:
           CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ${HOME}/dataset:/root/dataset \
             -v ./vllm_tests:/workdir --workdir /workdir \
             ${IMAGE_URI})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
@@ -153,5 +154,3 @@ jobs:
           docker stop ${CONTAINER_ID} || true
           docker rm -f ${CONTAINER_ID} || true
           docker rmi ${IMAGE_URI} || true
-          docker image ls || true
-          docker system prune -af

From ff6bba4b2d3f11ad7bc993bb4eef24ec05d69d76 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 18:05:58 -0800
Subject: [PATCH 40/63] try smart cleanup

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/cleanup_old_image.sh   | 14 +++++++++++
 .github/workflows/pr-vllm-rayserve.yml | 33 +++++++-------------------
 2 files changed, 22 insertions(+), 25 deletions(-)
 create mode 100755 .github/scripts/cleanup_old_image.sh

diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh
new file mode 100755
index 000000000000..b615f0393c05
--- /dev/null
+++ b/.github/scripts/cleanup_old_image.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Delete images older than 1 day (24h)
+cutoff=$(date -d '1 day ago' +%s)
+
+docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' \
+  | while read -r id name created_at _; do
+      created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0)
+      if (( created_ts < cutoff )); then
+        echo "Deleting old image: $name ($id, created $created_at)"
+        docker rmi -f "$id" || true
+      fi
+    done
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index fd26f02db89a..e1c2cb2b0714 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -106,7 +106,6 @@ jobs:
           CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-            -v ${HOME}/dataset:/root/dataset \
             -v ./vllm_tests:/workdir --workdir /workdir \
             ${IMAGE_URI})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
@@ -123,34 +122,18 @@ jobs:
         run: |
           docker exec ${CONTAINER_ID} sh -c '
           set -eux
-          nvidia-smi
+          nvidia-smi          
           pytest -s -v tests/test_logger.py
+          # Entrypoints Integration Test (LLM) # 30min
+          # export VLLM_WORKER_MULTIPROC_METHOD=spawn
+          # pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
+          # pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
+          # pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
           '
-      
-      - name: Run qwen3 benchmark
-        run: |
-          # Download ShareGPT dataset if it doesn't exist
-          mkdir -p ${HOME}/dataset
-          if [ ! -f ${HOME}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then
-              echo "Downloading ShareGPT dataset..."
-              wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json          else
-              echo "ShareGPT dataset already exists. Skipping download."
-          fi
-    
-          sleep 60
-          
-          # run serving benchmark
-          echo "start running serving benchmark workflow..."
-          docker exec ${CONTAINER_ID} vllm bench serve \
-              --backend vllm \
-              --model Qwen/Qwen3-0.6B \
-              --dataset-name sharegpt \
-              --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \
-              --num-prompts 1000
-      
+
       - name: Cleanup container and image
         if: always()
         run: |
           docker stop ${CONTAINER_ID} || true
           docker rm -f ${CONTAINER_ID} || true
-          docker rmi ${IMAGE_URI} || true
+          - run: .github/scripts/cleanup_old_image.sh

From 43570430a5db4e793b3fbd2a95a2a07d1c39b763 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 18:12:55 -0800
Subject: [PATCH 41/63] cleanup

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index e1c2cb2b0714..f1cdf3123b4a 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -134,6 +134,5 @@ jobs:
       - name: Cleanup container and image
         if: always()
         run: |
-          docker stop ${CONTAINER_ID} || true
           docker rm -f ${CONTAINER_ID} || true
-          - run: .github/scripts/cleanup_old_image.sh
+          docker system prune -af

From 85cffdf61f5c9f74eeadcbd3aade5feb77de0c5f Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 18:19:57 -0800
Subject: [PATCH 42/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/cleanup_old_image.sh   | 32 ++++++++++++++++++++++----
 .github/workflows/pr-vllm-rayserve.yml |  7 ++++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh
index b615f0393c05..379334cbf6ac 100755
--- a/.github/scripts/cleanup_old_image.sh
+++ b/.github/scripts/cleanup_old_image.sh
@@ -1,14 +1,36 @@
 #!/usr/bin/env bash
 set -euo pipefail
 
-# Delete images older than 1 day (24h)
-cutoff=$(date -d '1 day ago' +%s)
+# Configurable cutoff age (default 1 day)
+CUTOFF_HOURS=${CUTOFF_HOURS:-24}
+CUTOFF_TS=$(date -d "${CUTOFF_HOURS} hours ago" +%s)
+
+echo "=== Docker disk usage before cleanup ==="
+docker system df -v || true
+echo
+
+echo "=== Checking images older than ${CUTOFF_HOURS}h ==="
+deleted=0
+kept=0
 
 docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' \
   | while read -r id name created_at _; do
+      # skip dangling images (no repo:tag)
+      [ "$name" = "<none>:<none>" ] && continue
       created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0)
-      if (( created_ts < cutoff )); then
-        echo "Deleting old image: $name ($id, created $created_at)"
-        docker rmi -f "$id" || true
+      if (( created_ts < CUTOFF_TS )); then
+        echo "🗑️  Removing old image: $name (created $created_at)"
+        docker rmi -f "$id" >/dev/null 2>&1 && ((deleted++)) || true
+      else
+        ((kept++))
       fi
     done
+
+echo
+echo "=== Cleanup summary ==="
+echo "Images kept:   $kept"
+echo "Images deleted: $deleted"
+echo
+
+echo "=== Docker disk usage after cleanup ==="
+docker system df -v || true
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index f1cdf3123b4a..be54a42cb62b 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -131,8 +131,11 @@ jobs:
           # pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
           '
 
-      - name: Cleanup container and image
+      - name: Cleanup container
         if: always()
         run: |
           docker rm -f ${CONTAINER_ID} || true
-          docker system prune -af
+      
+      - name: Cleanup old images
+        if: always()
+        run: cleanup_old_image.sh

From 12e2dc1f929ceb95c06f11f4d88908718401124e Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 18:26:43 -0800
Subject: [PATCH 43/63] fix

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index be54a42cb62b..2280030de82f 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -138,4 +138,4 @@ jobs:
       
       - name: Cleanup old images
         if: always()
-        run: cleanup_old_image.sh
+        run: .github/scripts/cleanup_old_image.sh

From ccb5a733799246ce2c0b7c97f356700f4f0a3381 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 18:31:44 -0800
Subject: [PATCH 44/63] update script

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/cleanup_old_image.sh   | 47 ++++++++++++++++----------
 .github/workflows/pr-vllm-rayserve.yml |  6 ++--
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh
index 379334cbf6ac..4291139d1c9d 100755
--- a/.github/scripts/cleanup_old_image.sh
+++ b/.github/scripts/cleanup_old_image.sh
@@ -1,36 +1,47 @@
 #!/usr/bin/env bash
-set -euo pipefail
+set -u  # only unset vars cause failure, not command errors
 
-# Configurable cutoff age (default 1 day)
+# Configurable cutoff age (default 24h)
 CUTOFF_HOURS=${CUTOFF_HOURS:-24}
-CUTOFF_TS=$(date -d "${CUTOFF_HOURS} hours ago" +%s)
+CUTOFF_TS=$(date -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || date -v-"${CUTOFF_HOURS}"H +%s)
 
 echo "=== Docker disk usage before cleanup ==="
-docker system df -v || true
+docker system df -v || echo "(warning: docker system df failed)"
 echo
 
 echo "=== Checking images older than ${CUTOFF_HOURS}h ==="
+
 deleted=0
 kept=0
 
-docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' \
-  | while read -r id name created_at _; do
-      # skip dangling images (no repo:tag)
-      [ "$name" = "<none>:<none>" ] && continue
-      created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0)
-      if (( created_ts < CUTOFF_TS )); then
-        echo "🗑️  Removing old image: $name (created $created_at)"
-        docker rmi -f "$id" >/dev/null 2>&1 && ((deleted++)) || true
-      else
-        ((kept++))
-      fi
-    done
+# Use a safer loop (no pipe subshell, avoid 'set -e' inside)
+while IFS= read -r line; do
+  id=$(awk '{print $1}' <<<"$line")
+  name=$(awk '{print $2}' <<<"$line")
+  created_at=$(awk '{$1=$2=""; print substr($0,3)}' <<<"$line")
+
+  # Skip empty or malformed lines
+  [ -z "$id" ] && continue
+  [ "$name" = "<none>:<none>" ] && continue
+
+  created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0)
+  if [ "$created_ts" -lt "$CUTOFF_TS" ]; then
+    echo "🗑️  Removing old image: $name (created $created_at)"
+    if docker rmi -f "$id" >/dev/null 2>&1; then
+      deleted=$((deleted+1))
+    else
+      echo "(warning: failed to remove $name)"
+    fi
+  else
+    kept=$((kept+1))
+  fi
+done < <(docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' 2>/dev/null || true)
 
 echo
 echo "=== Cleanup summary ==="
-echo "Images kept:   $kept"
+echo "Images kept:    $kept"
 echo "Images deleted: $deleted"
 echo
 
 echo "=== Docker disk usage after cleanup ==="
-docker system df -v || true
+docker system df -v || echo "(warning: docker system df failed)"
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 2280030de82f..21cbd4943547 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -74,8 +74,10 @@ jobs:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
     steps:
-      - uses: actions/checkout@v5
-      - uses: actions/checkout@v5
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
         with:
           repository: vllm-project/vllm
           ref: v0.10.2

From 43c22322328a38ab6860d0426806ace50072e636 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 19:01:12 -0800
Subject: [PATCH 45/63] enable Entrypoints Integration Test (LLM)

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 21cbd4943547..6423f1c27fc7 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -127,10 +127,10 @@ jobs:
           nvidia-smi          
           pytest -s -v tests/test_logger.py
           # Entrypoints Integration Test (LLM) # 30min
-          # export VLLM_WORKER_MULTIPROC_METHOD=spawn
-          # pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
-          # pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
-          # pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
+          export VLLM_WORKER_MULTIPROC_METHOD=spawn
+          pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
+          pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
+          pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
           '
 
       - name: Cleanup container

From 15f0c89055f0bf4f261e232f5b43ea102697acf1 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 20:37:58 -0800
Subject: [PATCH 46/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/cleanup_old_image.sh   | 21 +++++++++++++--------
 .github/workflows/pr-vllm-rayserve.yml |  1 +
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh
index 4291139d1c9d..106a0ad64b01 100755
--- a/.github/scripts/cleanup_old_image.sh
+++ b/.github/scripts/cleanup_old_image.sh
@@ -1,32 +1,33 @@
 #!/usr/bin/env bash
-set -u  # only unset vars cause failure, not command errors
+set -u  # only fail on unset vars
 
-# Configurable cutoff age (default 24h)
 CUTOFF_HOURS=${CUTOFF_HOURS:-24}
-CUTOFF_TS=$(date -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || date -v-"${CUTOFF_HOURS}"H +%s)
+# always compute cutoff in UTC for comparison
+CUTOFF_TS=$(date -u -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || \
+            date -u -v-"${CUTOFF_HOURS}"H +%s)
 
 echo "=== Docker disk usage before cleanup ==="
 docker system df -v || echo "(warning: docker system df failed)"
 echo
 
-echo "=== Checking images older than ${CUTOFF_HOURS}h ==="
+echo "=== Checking images older than ${CUTOFF_HOURS}h (UTC-based) ==="
 
 deleted=0
 kept=0
 
-# Use a safer loop (no pipe subshell, avoid 'set -e' inside)
 while IFS= read -r line; do
   id=$(awk '{print $1}' <<<"$line")
   name=$(awk '{print $2}' <<<"$line")
   created_at=$(awk '{$1=$2=""; print substr($0,3)}' <<<"$line")
 
-  # Skip empty or malformed lines
   [ -z "$id" ] && continue
   [ "$name" = "<none>:<none>" ] && continue
 
-  created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0)
+  # parse docker UTC timestamp safely
+  created_ts=$(date -u -d "$created_at" +%s 2>/dev/null || echo 0)
+
   if [ "$created_ts" -lt "$CUTOFF_TS" ]; then
-    echo "🗑️  Removing old image: $name (created $created_at)"
+    echo "🗑️  Removing old image: $name (created $created_at UTC)"
     if docker rmi -f "$id" >/dev/null 2>&1; then
       deleted=$((deleted+1))
     else
@@ -45,3 +46,7 @@ echo
 
 echo "=== Docker disk usage after cleanup ==="
 docker system df -v || echo "(warning: docker system df failed)"
+
+echo
+echo "=== Host disk space (for /var/lib/docker) ==="
+df -h /var/lib/docker 2>/dev/null || df -h /
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 6423f1c27fc7..5c46fcde3075 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -127,6 +127,7 @@ jobs:
           nvidia-smi          
           pytest -s -v tests/test_logger.py
           # Entrypoints Integration Test (LLM) # 30min
+          cd tests
           export VLLM_WORKER_MULTIPROC_METHOD=spawn
           pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
           pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process

From e9fa11cba6c347ecf2c3263b5c39c664600990fb Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 21:06:09 -0800
Subject: [PATCH 47/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/cleanup_old_image.sh   | 44 +++++++++++++++-----------
 .github/workflows/pr-vllm-rayserve.yml |  3 ++
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh
index 106a0ad64b01..b5e2ac33a591 100755
--- a/.github/scripts/cleanup_old_image.sh
+++ b/.github/scripts/cleanup_old_image.sh
@@ -1,33 +1,41 @@
 #!/usr/bin/env bash
-set -u  # only fail on unset vars
+set -euo pipefail
 
+# configurable cutoff
 CUTOFF_HOURS=${CUTOFF_HOURS:-24}
-# always compute cutoff in UTC for comparison
-CUTOFF_TS=$(date -u -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || \
-            date -u -v-"${CUTOFF_HOURS}"H +%s)
+CUTOFF_TS=$(date -u -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || date -u -v-"${CUTOFF_HOURS}"H +%s)
 
 echo "=== Docker disk usage before cleanup ==="
-docker system df -v || echo "(warning: docker system df failed)"
+docker system df -v || true
 echo
 
-echo "=== Checking images older than ${CUTOFF_HOURS}h (UTC-based) ==="
-
 deleted=0
 kept=0
 
-while IFS= read -r line; do
-  id=$(awk '{print $1}' <<<"$line")
-  name=$(awk '{print $2}' <<<"$line")
-  created_at=$(awk '{$1=$2=""; print substr($0,3)}' <<<"$line")
+echo "=== Checking images older than ${CUTOFF_HOURS}h (UTC) ==="
+
+docker images --format '{{json .}}' | while read -r json; do
+  id=$(jq -r '.ID' <<<"$json")
+  repo=$(jq -r '.Repository' <<<"$json")
+  tag=$(jq -r '.Tag' <<<"$json")
+  created_at=$(jq -r '.CreatedAt' <<<"$json")
 
+  # Skip empty or invalid
   [ -z "$id" ] && continue
-  [ "$name" = "<none>:<none>" ] && continue
 
-  # parse docker UTC timestamp safely
-  created_ts=$(date -u -d "$created_at" +%s 2>/dev/null || echo 0)
+  # Normalize name
+  name="${repo}:${tag}"
+
+  # Convert CreatedAt → epoch (cross-platform)
+  if date --version >/dev/null 2>&1; then
+    created_ts=$(date -u -d "$created_at" +%s)
+  else
+    created_ts=$(date -u -j -f "%Y-%m-%d %H:%M:%S %z %Z" "$created_at" +%s)
+  fi
 
+  # Compare
   if [ "$created_ts" -lt "$CUTOFF_TS" ]; then
-    echo "🗑️  Removing old image: $name (created $created_at UTC)"
+    echo "🗑️  Removing old image: $name (created $created_at)"
     if docker rmi -f "$id" >/dev/null 2>&1; then
       deleted=$((deleted+1))
     else
@@ -36,7 +44,7 @@ while IFS= read -r line; do
   else
     kept=$((kept+1))
   fi
-done < <(docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' 2>/dev/null || true)
+done
 
 echo
 echo "=== Cleanup summary ==="
@@ -45,8 +53,8 @@ echo "Images deleted: $deleted"
 echo
 
 echo "=== Docker disk usage after cleanup ==="
-docker system df -v || echo "(warning: docker system df failed)"
+docker system df -v || true
 
 echo
-echo "=== Host disk space (for /var/lib/docker) ==="
+echo "=== Disk space for /var/lib/docker ==="
 df -h /var/lib/docker 2>/dev/null || df -h /
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 5c46fcde3075..f20aff876e87 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -118,12 +118,15 @@ jobs:
             set -eux
             uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
             uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
           '
 
       - name: Run vLLM Tests
         run: |
           docker exec ${CONTAINER_ID} sh -c '
           set -eux
+          HF_HUB_ENABLE_HF_TRANSFER=1
           nvidia-smi          
           pytest -s -v tests/test_logger.py
           # Entrypoints Integration Test (LLM) # 30min

From 60fd04f0fe5fd69f774dba570f0c64d5e81c06ba Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Mon, 10 Nov 2025 21:39:13 -0800
Subject: [PATCH 48/63] update test

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/cleanup_old_image.sh   | 60 --------------------------
 .github/workflows/pr-vllm-rayserve.yml | 21 +++++----
 2 files changed, 10 insertions(+), 71 deletions(-)
 delete mode 100755 .github/scripts/cleanup_old_image.sh

diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh
deleted file mode 100755
index b5e2ac33a591..000000000000
--- a/.github/scripts/cleanup_old_image.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-# configurable cutoff
-CUTOFF_HOURS=${CUTOFF_HOURS:-24}
-CUTOFF_TS=$(date -u -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || date -u -v-"${CUTOFF_HOURS}"H +%s)
-
-echo "=== Docker disk usage before cleanup ==="
-docker system df -v || true
-echo
-
-deleted=0
-kept=0
-
-echo "=== Checking images older than ${CUTOFF_HOURS}h (UTC) ==="
-
-docker images --format '{{json .}}' | while read -r json; do
-  id=$(jq -r '.ID' <<<"$json")
-  repo=$(jq -r '.Repository' <<<"$json")
-  tag=$(jq -r '.Tag' <<<"$json")
-  created_at=$(jq -r '.CreatedAt' <<<"$json")
-
-  # Skip empty or invalid
-  [ -z "$id" ] && continue
-
-  # Normalize name
-  name="${repo}:${tag}"
-
-  # Convert CreatedAt → epoch (cross-platform)
-  if date --version >/dev/null 2>&1; then
-    created_ts=$(date -u -d "$created_at" +%s)
-  else
-    created_ts=$(date -u -j -f "%Y-%m-%d %H:%M:%S %z %Z" "$created_at" +%s)
-  fi
-
-  # Compare
-  if [ "$created_ts" -lt "$CUTOFF_TS" ]; then
-    echo "🗑️  Removing old image: $name (created $created_at)"
-    if docker rmi -f "$id" >/dev/null 2>&1; then
-      deleted=$((deleted+1))
-    else
-      echo "(warning: failed to remove $name)"
-    fi
-  else
-    kept=$((kept+1))
-  fi
-done
-
-echo
-echo "=== Cleanup summary ==="
-echo "Images kept:    $kept"
-echo "Images deleted: $deleted"
-echo
-
-echo "=== Docker disk usage after cleanup ==="
-docker system df -v || true
-
-echo
-echo "=== Disk space for /var/lib/docker ==="
-df -h /var/lib/docker 2>/dev/null || df -h /
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index f20aff876e87..34da2de603ac 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -126,22 +126,21 @@ jobs:
         run: |
           docker exec ${CONTAINER_ID} sh -c '
           set -eux
-          HF_HUB_ENABLE_HF_TRANSFER=1
-          nvidia-smi          
           pytest -s -v tests/test_logger.py
-          # Entrypoints Integration Test (LLM) # 30min
+          nvidia-smi          
+
+          # Core Test # 22min
+          pytest -v -s core
+          
+          # Entrypoints Unit Tests # 5min
           cd tests
-          export VLLM_WORKER_MULTIPROC_METHOD=spawn
-          pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
-          pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
-          pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
+          HF_HUB_ENABLE_HF_TRANSFER=1
+          pytest -v -s entrypoints/openai/tool_parsers
+          pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
+          
           '
 
       - name: Cleanup container
         if: always()
         run: |
           docker rm -f ${CONTAINER_ID} || true
-      
-      - name: Cleanup old images
-        if: always()
-        run: .github/scripts/cleanup_old_image.sh

From 56d85c1d06aca06f56b93117d2c82d432ea7afd7 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 07:44:13 -0800
Subject: [PATCH 49/63] add cleanup

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/image_cleanup.py       | 19 +++++++++++++++++++
 .github/workflows/pr-vllm-rayserve.yml | 22 +++++++++++++---------
 2 files changed, 32 insertions(+), 9 deletions(-)
 create mode 100644 .github/scripts/image_cleanup.py

diff --git a/.github/scripts/image_cleanup.py b/.github/scripts/image_cleanup.py
new file mode 100644
index 000000000000..0a146567756a
--- /dev/null
+++ b/.github/scripts/image_cleanup.py
@@ -0,0 +1,19 @@
+import docker
+import datetime
+
+client = docker.from_env()
+cutoff = datetime.datetime.utcnow() - datetime.timedelta(hours=24)
+
+for img in client.images.list():
+    # 'Created' is epoch seconds
+    created = datetime.datetime.utcfromtimestamp(img.attrs["Created"])
+    if created < cutoff:
+        name = img.tags[0] if img.tags else "<none>"
+        print(f"🗑️  Removing {name} (created {created.isoformat()}Z)")
+        try:
+            client.images.remove(img.id, force=True)
+        except docker.errors.APIError as e:
+            print(f"(warning: failed to remove {name}: {e.explanation})")
+
+print("\n=== Disk usage summary ===")
+print(client.df())  # structured info like `docker system df`
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 34da2de603ac..e78f2a102464 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -86,6 +86,10 @@ jobs:
             requirements
             tests
           sparse-checkout-cone-mode: false
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+      - run: python -m pip install docker
       
       - name: Download image URI
         uses: actions/download-artifact@v4
@@ -126,21 +130,21 @@ jobs:
         run: |
           docker exec ${CONTAINER_ID} sh -c '
           set -eux
-          pytest -s -v tests/test_logger.py
-          nvidia-smi          
+          nvidia-smi
+          cd tests
+          pytest -s -v test_logger.py
 
           # Core Test # 22min
-          pytest -v -s core
+          # pytest -v -s core
           
           # Entrypoints Unit Tests # 5min
-          cd tests
-          HF_HUB_ENABLE_HF_TRANSFER=1
-          pytest -v -s entrypoints/openai/tool_parsers
-          pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
-          
+          # HF_HUB_ENABLE_HF_TRANSFER=1
+          # pytest -v -s entrypoints/openai/tool_parsers
+          # pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
           '
 
-      - name: Cleanup container
+      - name: Cleanup container and images
         if: always()
         run: |
           docker rm -f ${CONTAINER_ID} || true
+          python .github/scripts/image_cleanup.py

From 432917dc0e32e0a8da7647682e953b96114293f7 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 07:48:10 -0800
Subject: [PATCH 50/63] fix

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index e78f2a102464..f0a19d86fff2 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -88,7 +88,7 @@ jobs:
           sparse-checkout-cone-mode: false
       - uses: actions/setup-python@v6
         with:
-          python-version: "3.12"
+          python-version: "3.12.12"
       - run: python -m pip install docker
       
       - name: Download image URI

From e5ad9e6acccc9f93cd929b4a4ad630f3ad0d3e7f Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 07:54:21 -0800
Subject: [PATCH 51/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/runner_setup.sh        | 2 ++
 .github/workflows/pr-vllm-rayserve.yml | 5 +----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/runner_setup.sh b/.github/scripts/runner_setup.sh
index e6eb3aecd88e..2c6f733ccab8 100755
--- a/.github/scripts/runner_setup.sh
+++ b/.github/scripts/runner_setup.sh
@@ -5,4 +5,6 @@ if ! command -v uv &> /dev/null; then
     curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
     uv self update
 fi
+uv python install 3.12
+uv python list
 docker --version
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index f0a19d86fff2..9bc7c1c04883 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -86,10 +86,7 @@ jobs:
             requirements
             tests
           sparse-checkout-cone-mode: false
-      - uses: actions/setup-python@v6
-        with:
-          python-version: "3.12.12"
-      - run: python -m pip install docker
+      - run: .github/scripts/runner_setup.sh
       
       - name: Download image URI
         uses: actions/download-artifact@v4

From 8e7a408dd586ef8df0f8dbf58743a4d2202b1838 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 08:56:35 -0800
Subject: [PATCH 52/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/scripts/image_cleanup.py       | 19 -------------------
 .github/scripts/runner_setup.sh        |  2 --
 .github/workflows/pr-vllm-rayserve.yml |  3 +--
 3 files changed, 1 insertion(+), 23 deletions(-)
 delete mode 100644 .github/scripts/image_cleanup.py

diff --git a/.github/scripts/image_cleanup.py b/.github/scripts/image_cleanup.py
deleted file mode 100644
index 0a146567756a..000000000000
--- a/.github/scripts/image_cleanup.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import docker
-import datetime
-
-client = docker.from_env()
-cutoff = datetime.datetime.utcnow() - datetime.timedelta(hours=24)
-
-for img in client.images.list():
-    # 'Created' is epoch seconds
-    created = datetime.datetime.utcfromtimestamp(img.attrs["Created"])
-    if created < cutoff:
-        name = img.tags[0] if img.tags else "<none>"
-        print(f"🗑️  Removing {name} (created {created.isoformat()}Z)")
-        try:
-            client.images.remove(img.id, force=True)
-        except docker.errors.APIError as e:
-            print(f"(warning: failed to remove {name}: {e.explanation})")
-
-print("\n=== Disk usage summary ===")
-print(client.df())  # structured info like `docker system df`
diff --git a/.github/scripts/runner_setup.sh b/.github/scripts/runner_setup.sh
index 2c6f733ccab8..e6eb3aecd88e 100755
--- a/.github/scripts/runner_setup.sh
+++ b/.github/scripts/runner_setup.sh
@@ -5,6 +5,4 @@ if ! command -v uv &> /dev/null; then
     curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
     uv self update
 fi
-uv python install 3.12
-uv python list
 docker --version
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 9bc7c1c04883..1127c8cd8bfd 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -86,7 +86,6 @@ jobs:
             requirements
             tests
           sparse-checkout-cone-mode: false
-      - run: .github/scripts/runner_setup.sh
       
       - name: Download image URI
         uses: actions/download-artifact@v4
@@ -144,4 +143,4 @@ jobs:
         if: always()
         run: |
           docker rm -f ${CONTAINER_ID} || true
-          python .github/scripts/image_cleanup.py
+          docker image prune -a --force --filter "until=24h"

From f5e61e3102beebd2fcd24c1a6eed49b907935e12 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 09:05:33 -0800
Subject: [PATCH 53/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 1127c8cd8bfd..c0ab14cb6edb 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -35,7 +35,7 @@ jobs:
             vllm-rayserve-ec2:
               - "docker/vllm/Dockerfile.rayserve"
   
-  build-vllm-rayserve-ec2:
+  build:
     needs: [check-changes]
     if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
     runs-on:
@@ -67,9 +67,9 @@ jobs:
           name: vllm-rayserve-ec2-image-uri
           path: image_uri.txt
 
-  test-vllm-rayserve-ec2:
-    needs: [build-vllm-rayserve-ec2]
-    if: needs.build-vllm-rayserve-ec2.result == 'success'
+  test:
+    needs: [build]
+    if: needs.build.result == 'success'
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
@@ -121,7 +121,7 @@ jobs:
             uv pip install --system -e tests/vllm_test_utils
             uv pip install --system hf_transfer
           '
-
+      
       - name: Run vLLM Tests
         run: |
           docker exec ${CONTAINER_ID} sh -c '
@@ -138,9 +138,10 @@ jobs:
           # pytest -v -s entrypoints/openai/tool_parsers
           # pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
           '
-
+      
       - name: Cleanup container and images
         if: always()
         run: |
           docker rm -f ${CONTAINER_ID} || true
           docker image prune -a --force --filter "until=24h"
+          docker system df -v

From 16d5f1eb116aabece5d78942640568f681b1deaf Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 09:19:35 -0800
Subject: [PATCH 54/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 43 +++++++++++++++++---------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index c0ab14cb6edb..282c306577bd 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -35,7 +35,7 @@ jobs:
             vllm-rayserve-ec2:
               - "docker/vllm/Dockerfile.rayserve"
   
-  build:
+  build-image:
     needs: [check-changes]
     if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
     runs-on:
@@ -45,31 +45,41 @@ jobs:
       - uses: actions/checkout@v5
       - run: .github/scripts/runner_setup.sh
       - run: .github/scripts/buildkitd.sh
-      - name: Build vllm-rayserve-ec2 image
-        id: build
-        shell: bash
+      - name: ECR login
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
-          IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
+      
+      - name: Resolve image URI for build
+        run: |
+          IMAGE_URI=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
+          echo "Image URI to build: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+      
+      - name: Build image
+        run: |
           docker buildx build --progress plain \
             --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
             --cache-to=type=inline \
-            --cache-from=type=registry,ref=$IMAGE_TAG \
-            --tag $IMAGE_TAG \
+            --cache-from=type=registry,ref=$IMAGE_URI \
+            --tag $IMAGE_URI \
             --target vllm-rayserve-ec2 \
             -f docker/vllm/Dockerfile.rayserve .
-          docker push $IMAGE_TAG
-          docker rmi $IMAGE_TAG
-          echo $IMAGE_TAG > image_uri.txt
+          
+      - name: Docker Push and save IMAGE_URI
+        run: |
+          docker push $IMAGE_URI
+          docker rmi $IMAGE_URI
+          echo $IMAGE_URI > image_uri.txt
+
       - name: Upload image URI
         uses: actions/upload-artifact@v4
         with:
           name: vllm-rayserve-ec2-image-uri
           path: image_uri.txt
 
-  test:
-    needs: [build]
-    if: needs.build.result == 'success'
+  test-image:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
         fleet:x86-g6xl-runner
@@ -98,9 +108,12 @@ jobs:
           echo "Resolved image URI: $IMAGE_URI"
           echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
       
-      - name: Pull image
+      - name: ECR login
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+      
+      - name: Pull image
+        run: |
           docker pull $IMAGE_URI
       
       - name: Start container
@@ -144,4 +157,4 @@ jobs:
         run: |
           docker rm -f ${CONTAINER_ID} || true
           docker image prune -a --force --filter "until=24h"
-          docker system df -v
+          docker system df

From c0a8c8512dc3f4c728f5daf2e6ad98d496a5d30e Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 09:23:25 -0800
Subject: [PATCH 55/63] update workflow

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 282c306577bd..429cb27af73e 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -65,13 +65,13 @@ jobs:
             --target vllm-rayserve-ec2 \
             -f docker/vllm/Dockerfile.rayserve .
           
-      - name: Docker Push and save IMAGE_URI
+      - name: Docker Push and save image URI artifact
         run: |
           docker push $IMAGE_URI
           docker rmi $IMAGE_URI
           echo $IMAGE_URI > image_uri.txt
 
-      - name: Upload image URI
+      - name: Upload image URI artifact
         uses: actions/upload-artifact@v4
         with:
           name: vllm-rayserve-ec2-image-uri
@@ -86,6 +86,7 @@ jobs:
     steps:
       - name: Checkout DLC source
         uses: actions/checkout@v5
+      
       - name: Checkout vLLM Tests
         uses: actions/checkout@v5
         with:
@@ -97,21 +98,21 @@ jobs:
             tests
           sparse-checkout-cone-mode: false
       
-      - name: Download image URI
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+ 
+      - name: Download image URI artifact
         uses: actions/download-artifact@v4
         with:
           name: vllm-rayserve-ec2-image-uri
 
-      - name: Resolve image URI
+      - name: Resolve image URI for test
         run: |
           IMAGE_URI=$(cat image_uri.txt)
           echo "Resolved image URI: $IMAGE_URI"
           echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
       
-      - name: ECR login
-        run: |
-          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
-      
       - name: Pull image
         run: |
           docker pull $IMAGE_URI

From af227b45ff62fc68130b13563b5430c5cf5f0750 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 09:26:52 -0800
Subject: [PATCH 56/63] enable more test

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 429cb27af73e..f21ab6f4dfe5 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -145,12 +145,12 @@ jobs:
           pytest -s -v test_logger.py
 
           # Core Test # 22min
-          # pytest -v -s core
+          pytest -v -s core
           
           # Entrypoints Unit Tests # 5min
-          # HF_HUB_ENABLE_HF_TRANSFER=1
-          # pytest -v -s entrypoints/openai/tool_parsers
-          # pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
+          HF_HUB_ENABLE_HF_TRANSFER=1
+          pytest -v -s entrypoints/openai/tool_parsers
+          pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
           '
       
       - name: Cleanup container and images

From 6ba7e450dae9556a62de996ebd0282e2b88bf585 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 09:56:41 -0800
Subject: [PATCH 57/63] update tests

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 74 ++++++++++++++++++--------
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index f21ab6f4dfe5..8ebd4552d753 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -87,17 +87,6 @@ jobs:
       - name: Checkout DLC source
         uses: actions/checkout@v5
       
-      - name: Checkout vLLM Tests
-        uses: actions/checkout@v5
-        with:
-          repository: vllm-project/vllm
-          ref: v0.10.2
-          path: vllm_tests
-          sparse-checkout: |
-            requirements
-            tests
-          sparse-checkout-cone-mode: false
-      
       - name: ECR login
         run: |
           aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
@@ -117,16 +106,23 @@ jobs:
         run: |
           docker pull $IMAGE_URI
       
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.10.2
+          path: vllm_source
+      
       - name: Start container
         run: |
           CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-            -v ./vllm_tests:/workdir --workdir /workdir \
+            -v ./vllm_source:/workdir --workdir /workdir \
             ${IMAGE_URI})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
       
-      - name: Install Test dependencies
+      - name: Setup for vLLM Test 
         run: |
           docker exec ${CONTAINER_ID} sh -c '
             set -eux
@@ -134,6 +130,8 @@ jobs:
             uv pip install --system pytest pytest-asyncio
             uv pip install --system -e tests/vllm_test_utils
             uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
           '
       
       - name: Run vLLM Tests
@@ -141,16 +139,48 @@ jobs:
           docker exec ${CONTAINER_ID} sh -c '
           set -eux
           nvidia-smi
-          cd tests
-          pytest -s -v test_logger.py
 
-          # Core Test # 22min
-          pytest -v -s core
-          
-          # Entrypoints Unit Tests # 5min
-          HF_HUB_ENABLE_HF_TRANSFER=1
-          pytest -v -s entrypoints/openai/tool_parsers
-          pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
+          # Regression Test # 7min
+          cd /workdir/tests
+          uv pip install --system modelscope
+          pytest -v -s test_regression.py
+
+          # Engine Test # 25min
+          cd /workdir/tests
+          pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
+          pytest -v -s tokenization
+
+          # Examples Test # 30min
+          cd /workdir/examples
+          pip install tensorizer # for tensorizer test
+          python3 offline_inference/basic/generate.py --model facebook/opt-125m
+          python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
+          python3 offline_inference/basic/chat.py
+          python3 offline_inference/prefix_caching.py
+          python3 offline_inference/llm_engine_example.py
+          python3 offline_inference/audio_language.py --seed 0
+          python3 offline_inference/vision_language.py --seed 0
+          python3 offline_inference/vision_language_pooling.py --seed 0
+          python3 offline_inference/vision_language_multi_image.py --seed 0
+          VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+          python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+          python3 offline_inference/basic/classify.py
+          python3 offline_inference/basic/embed.py
+          python3 offline_inference/basic/score.py
+          VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
+
+          # Platform Tests (CUDA) # 4min
+          cd /workdir/tests
+          pytest -v -s cuda/test_cuda_context.py
+
+          # Encoder Decoder tests # 12min
+          cd /workdir/tests
+          pytest -v -s encoder_decoder
+
+          # OpenAI-Compatible Tool Use # 23min
+          cd /workdir/tests
+          pytest -v -s tool_use
+          pytest -v -s mistral_tool_u
           '
       
       - name: Cleanup container and images

From c3cc99c6c1190a692190566a6f1d4ee4871226fa Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 10:24:27 -0800
Subject: [PATCH 58/63] parallel tests

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 357 ++++++++++++++++++++++++-
 1 file changed, 356 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 8ebd4552d753..dc4f31e30c9d 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -77,7 +77,7 @@ jobs:
           name: vllm-rayserve-ec2-image-uri
           path: image_uri.txt
 
-  test-image:
+  regression-test:
     needs: [build-image]
     if: needs.build-image.result == 'success'
     runs-on:
@@ -144,11 +144,153 @@ jobs:
           cd /workdir/tests
           uv pip install --system modelscope
           pytest -v -s test_regression.py
+          '
+      
+      - name: Cleanup container and images
+        if: always()
+        run: |
+          docker rm -f ${CONTAINER_ID} || true
+          docker image prune -a --force --filter "until=24h"
+          docker system df
+  
+  engine-test:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+      
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+ 
+      - name: Download image URI artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-rayserve-ec2-image-uri
+
+      - name: Resolve image URI for test
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+      
+      - name: Pull image
+        run: |
+          docker pull $IMAGE_URI
+      
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.10.2
+          path: vllm_source
+      
+      - name: Start container
+        run: |
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm_source:/workdir --workdir /workdir \
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+      
+      - name: Setup for vLLM Test 
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
+          '
+      
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+          set -eux
+          nvidia-smi
 
           # Engine Test # 25min
           cd /workdir/tests
           pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
           pytest -v -s tokenization
+          '
+      
+      - name: Cleanup container and images
+        if: always()
+        run: |
+          docker rm -f ${CONTAINER_ID} || true
+          docker image prune -a --force --filter "until=24h"
+          docker system df
+  
+  example-test:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+      
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+ 
+      - name: Download image URI artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-rayserve-ec2-image-uri
+
+      - name: Resolve image URI for test
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+      
+      - name: Pull image
+        run: |
+          docker pull $IMAGE_URI
+      
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.10.2
+          path: vllm_source
+      
+      - name: Start container
+        run: |
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm_source:/workdir --workdir /workdir \
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+      
+      - name: Setup for vLLM Test 
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
+          '
+      
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+          set -eux
+          nvidia-smi
 
           # Examples Test # 30min
           cd /workdir/examples
@@ -168,14 +310,227 @@ jobs:
           python3 offline_inference/basic/embed.py
           python3 offline_inference/basic/score.py
           VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
+          '
+      
+      - name: Cleanup container and images
+        if: always()
+        run: |
+          docker rm -f ${CONTAINER_ID} || true
+          docker image prune -a --force --filter "until=24h"
+          docker system df
+
+  cuda-test:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+      
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+ 
+      - name: Download image URI artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-rayserve-ec2-image-uri
+
+      - name: Resolve image URI for test
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+      
+      - name: Pull image
+        run: |
+          docker pull $IMAGE_URI
+      
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.10.2
+          path: vllm_source
+      
+      - name: Start container
+        run: |
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm_source:/workdir --workdir /workdir \
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+      
+      - name: Setup for vLLM Test 
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
+          '
+      
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+          set -eux
+          nvidia-smi
 
           # Platform Tests (CUDA) # 4min
           cd /workdir/tests
           pytest -v -s cuda/test_cuda_context.py
+          '
+      
+      - name: Cleanup container and images
+        if: always()
+        run: |
+          docker rm -f ${CONTAINER_ID} || true
+          docker image prune -a --force --filter "until=24h"
+          docker system df
+
+  encoder-decoder-test:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+      
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+ 
+      - name: Download image URI artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-rayserve-ec2-image-uri
+
+      - name: Resolve image URI for test
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+      
+      - name: Pull image
+        run: |
+          docker pull $IMAGE_URI
+      
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.10.2
+          path: vllm_source
+      
+      - name: Start container
+        run: |
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm_source:/workdir --workdir /workdir \
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+      
+      - name: Setup for vLLM Test 
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
+          '
+      
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+          set -eux
+          nvidia-smi
 
           # Encoder Decoder tests # 12min
           cd /workdir/tests
           pytest -v -s encoder_decoder
+          '
+      
+      - name: Cleanup container and images
+        if: always()
+        run: |
+          docker rm -f ${CONTAINER_ID} || true
+          docker image prune -a --force --filter "until=24h"
+          docker system df
+
+  openai-compatible-tool-use-test:
+    needs: [build-image]
+    if: needs.build-image.result == 'success'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+      
+      - name: ECR login
+        run: |
+          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
+ 
+      - name: Download image URI artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: vllm-rayserve-ec2-image-uri
+
+      - name: Resolve image URI for test
+        run: |
+          IMAGE_URI=$(cat image_uri.txt)
+          echo "Resolved image URI: $IMAGE_URI"
+          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
+      
+      - name: Pull image
+        run: |
+          docker pull $IMAGE_URI
+      
+      - name: Checkout vLLM Tests
+        uses: actions/checkout@v5
+        with:
+          repository: vllm-project/vllm
+          ref: v0.10.2
+          path: vllm_source
+      
+      - name: Start container
+        run: |
+          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
+            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
+            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
+            -v ./vllm_source:/workdir --workdir /workdir \
+            ${IMAGE_URI})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+      
+      - name: Setup for vLLM Test 
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+            set -eux
+            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
+            uv pip install --system pytest pytest-asyncio
+            uv pip install --system -e tests/vllm_test_utils
+            uv pip install --system hf_transfer
+            mkdir src
+            mv vllm src/vllm
+          '
+      
+      - name: Run vLLM Tests
+        run: |
+          docker exec ${CONTAINER_ID} sh -c '
+          set -eux
+          nvidia-smi
 
           # OpenAI-Compatible Tool Use # 23min
           cd /workdir/tests

From dcb93027ab18c7cb431e8cf6b678559b746daecc Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 11:09:13 -0800
Subject: [PATCH 59/63] remove encoder decoder test

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 75 --------------------------
 1 file changed, 75 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index dc4f31e30c9d..2bc3eecbd101 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -394,81 +394,6 @@ jobs:
           docker image prune -a --force --filter "until=24h"
           docker system df
 
-  encoder-decoder-test:
-    needs: [build-image]
-    if: needs.build-image.result == 'success'
-    runs-on:
-      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-        fleet:x86-g6xl-runner
-    steps:
-      - name: Checkout DLC source
-        uses: actions/checkout@v5
-      
-      - name: ECR login
-        run: |
-          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
- 
-      - name: Download image URI artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: vllm-rayserve-ec2-image-uri
-
-      - name: Resolve image URI for test
-        run: |
-          IMAGE_URI=$(cat image_uri.txt)
-          echo "Resolved image URI: $IMAGE_URI"
-          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
-      
-      - name: Pull image
-        run: |
-          docker pull $IMAGE_URI
-      
-      - name: Checkout vLLM Tests
-        uses: actions/checkout@v5
-        with:
-          repository: vllm-project/vllm
-          ref: v0.10.2
-          path: vllm_source
-      
-      - name: Start container
-        run: |
-          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-            -v ./vllm_source:/workdir --workdir /workdir \
-            ${IMAGE_URI})
-          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
-      
-      - name: Setup for vLLM Test 
-        run: |
-          docker exec ${CONTAINER_ID} sh -c '
-            set -eux
-            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
-            uv pip install --system pytest pytest-asyncio
-            uv pip install --system -e tests/vllm_test_utils
-            uv pip install --system hf_transfer
-            mkdir src
-            mv vllm src/vllm
-          '
-      
-      - name: Run vLLM Tests
-        run: |
-          docker exec ${CONTAINER_ID} sh -c '
-          set -eux
-          nvidia-smi
-
-          # Encoder Decoder tests # 12min
-          cd /workdir/tests
-          pytest -v -s encoder_decoder
-          '
-      
-      - name: Cleanup container and images
-        if: always()
-        run: |
-          docker rm -f ${CONTAINER_ID} || true
-          docker image prune -a --force --filter "until=24h"
-          docker system df
-
   openai-compatible-tool-use-test:
     needs: [build-image]
     if: needs.build-image.result == 'success'

From c7b284bf295ebd23487aba3e6b06f942402e6d65 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 13:33:36 -0800
Subject: [PATCH 60/63] add hf token

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 2bc3eecbd101..86ebf40f8ecc 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -119,6 +119,7 @@ jobs:
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
             -v ./vllm_source:/workdir --workdir /workdir \
+            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
             ${IMAGE_URI})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
       
@@ -195,6 +196,7 @@ jobs:
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
             -v ./vllm_source:/workdir --workdir /workdir \
+            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
             ${IMAGE_URI})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
       
@@ -271,6 +273,7 @@ jobs:
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
             -v ./vllm_source:/workdir --workdir /workdir \
+            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
             ${IMAGE_URI})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
       
@@ -361,6 +364,7 @@ jobs:
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
             -v ./vllm_source:/workdir --workdir /workdir \
+            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
             ${IMAGE_URI})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
       
@@ -436,6 +440,7 @@ jobs:
             -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
             -v ${HOME}/.cache/vllm:/root/.cache/vllm \
             -v ./vllm_source:/workdir --workdir /workdir \
+            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
             ${IMAGE_URI})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
       

From 98079271b9b89b09541ab522698b208de2cf100a Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 14:38:50 -0800
Subject: [PATCH 61/63] update

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 162 +------------------------
 1 file changed, 4 insertions(+), 158 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 86ebf40f8ecc..b279b89fdba1 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -154,7 +154,7 @@ jobs:
           docker image prune -a --force --filter "until=24h"
           docker system df
   
-  engine-test:
+  cuda-test:
     needs: [build-image]
     if: needs.build-image.result == 'success'
     runs-on:
@@ -218,10 +218,9 @@ jobs:
           set -eux
           nvidia-smi
 
-          # Engine Test # 25min
+          # Platform Tests (CUDA) # 4min
           cd /workdir/tests
-          pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
-          pytest -v -s tokenization
+          pytest -v -s cuda/test_cuda_context.py
           '
       
       - name: Cleanup container and images
@@ -299,7 +298,7 @@ jobs:
           cd /workdir/examples
           pip install tensorizer # for tensorizer test
           python3 offline_inference/basic/generate.py --model facebook/opt-125m
-          python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
+          # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
           python3 offline_inference/basic/chat.py
           python3 offline_inference/prefix_caching.py
           python3 offline_inference/llm_engine_example.py
@@ -321,156 +320,3 @@ jobs:
           docker rm -f ${CONTAINER_ID} || true
           docker image prune -a --force --filter "until=24h"
           docker system df
-
-  cuda-test:
-    needs: [build-image]
-    if: needs.build-image.result == 'success'
-    runs-on:
-      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-        fleet:x86-g6xl-runner
-    steps:
-      - name: Checkout DLC source
-        uses: actions/checkout@v5
-      
-      - name: ECR login
-        run: |
-          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
- 
-      - name: Download image URI artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: vllm-rayserve-ec2-image-uri
-
-      - name: Resolve image URI for test
-        run: |
-          IMAGE_URI=$(cat image_uri.txt)
-          echo "Resolved image URI: $IMAGE_URI"
-          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
-      
-      - name: Pull image
-        run: |
-          docker pull $IMAGE_URI
-      
-      - name: Checkout vLLM Tests
-        uses: actions/checkout@v5
-        with:
-          repository: vllm-project/vllm
-          ref: v0.10.2
-          path: vllm_source
-      
-      - name: Start container
-        run: |
-          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-            -v ./vllm_source:/workdir --workdir /workdir \
-            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
-            ${IMAGE_URI})
-          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
-      
-      - name: Setup for vLLM Test 
-        run: |
-          docker exec ${CONTAINER_ID} sh -c '
-            set -eux
-            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
-            uv pip install --system pytest pytest-asyncio
-            uv pip install --system -e tests/vllm_test_utils
-            uv pip install --system hf_transfer
-            mkdir src
-            mv vllm src/vllm
-          '
-      
-      - name: Run vLLM Tests
-        run: |
-          docker exec ${CONTAINER_ID} sh -c '
-          set -eux
-          nvidia-smi
-
-          # Platform Tests (CUDA) # 4min
-          cd /workdir/tests
-          pytest -v -s cuda/test_cuda_context.py
-          '
-      
-      - name: Cleanup container and images
-        if: always()
-        run: |
-          docker rm -f ${CONTAINER_ID} || true
-          docker image prune -a --force --filter "until=24h"
-          docker system df
-
-  openai-compatible-tool-use-test:
-    needs: [build-image]
-    if: needs.build-image.result == 'success'
-    runs-on:
-      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-        fleet:x86-g6xl-runner
-    steps:
-      - name: Checkout DLC source
-        uses: actions/checkout@v5
-      
-      - name: ECR login
-        run: |
-          aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
- 
-      - name: Download image URI artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: vllm-rayserve-ec2-image-uri
-
-      - name: Resolve image URI for test
-        run: |
-          IMAGE_URI=$(cat image_uri.txt)
-          echo "Resolved image URI: $IMAGE_URI"
-          echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
-      
-      - name: Pull image
-        run: |
-          docker pull $IMAGE_URI
-      
-      - name: Checkout vLLM Tests
-        uses: actions/checkout@v5
-        with:
-          repository: vllm-project/vllm
-          ref: v0.10.2
-          path: vllm_source
-      
-      - name: Start container
-        run: |
-          CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-            -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-            -v ${HOME}/.cache/vllm:/root/.cache/vllm \
-            -v ./vllm_source:/workdir --workdir /workdir \
-            -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
-            ${IMAGE_URI})
-          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
-      
-      - name: Setup for vLLM Test 
-        run: |
-          docker exec ${CONTAINER_ID} sh -c '
-            set -eux
-            uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
-            uv pip install --system pytest pytest-asyncio
-            uv pip install --system -e tests/vllm_test_utils
-            uv pip install --system hf_transfer
-            mkdir src
-            mv vllm src/vllm
-          '
-      
-      - name: Run vLLM Tests
-        run: |
-          docker exec ${CONTAINER_ID} sh -c '
-          set -eux
-          nvidia-smi
-
-          # OpenAI-Compatible Tool Use # 23min
-          cd /workdir/tests
-          pytest -v -s tool_use
-          pytest -v -s mistral_tool_u
-          '
-      
-      - name: Cleanup container and images
-        if: always()
-        run: |
-          docker rm -f ${CONTAINER_ID} || true
-          docker image prune -a --force --filter "until=24h"
-          docker system df

From 11ead3bbbe9e00bad73b25954d236f2b6376fab0 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 15:52:07 -0800
Subject: [PATCH 62/63] remove push on main

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index b279b89fdba1..718ee6960baa 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -2,8 +2,6 @@ name: PR - vLLM RayServe
 
 on:
   pull_request:
-    branches: 
-      - main
     paths:
       - "docker/**"
 

From 92f77d9de642a1870afb48a85fabe0cf184350a1 Mon Sep 17 00:00:00 2001
From: Junpu Fan <junpu@amazon.com>
Date: Tue, 11 Nov 2025 15:52:50 -0800
Subject: [PATCH 63/63] revert

Signed-off-by: Junpu Fan <junpu@amazon.com>
---
 .github/workflows/pr-vllm-rayserve.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 718ee6960baa..b279b89fdba1 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -2,6 +2,8 @@ name: PR - vLLM RayServe
 
 on:
   pull_request:
+    branches: 
+      - main
     paths:
       - "docker/**"