From 8ea932b73ac294ba918cf553ab506936e11f2d76 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 14:54:16 -0800 Subject: [PATCH 01/63] try build Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 11 + .gitignore | 1 + .pre-commit-config.yaml | 28 +- DEVELOPMENT.md | 7 + docker/vllm/Dockerfile.rayserve | 68 ++++ scripts/dockerd_entrypoint.sh | 6 + scripts/install_efa.sh | 102 +++++ scripts/setup_oss_compliance.sh | 34 ++ scripts/telemetry/bash_telemetry.sh | 11 + scripts/telemetry/deep_learning_container.py | 395 +++++++++++++++++++ 10 files changed, 650 insertions(+), 13 deletions(-) create mode 100644 docker/vllm/Dockerfile.rayserve create mode 100755 scripts/dockerd_entrypoint.sh create mode 100755 scripts/install_efa.sh create mode 100755 scripts/setup_oss_compliance.sh create mode 100755 scripts/telemetry/bash_telemetry.sh create mode 100755 scripts/telemetry/deep_learning_container.py diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index e69804295944..81b28112ea14 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -41,6 +41,17 @@ jobs: - uses: actions/checkout@v5 - run: .github/scripts/runner_setup.sh - run: .github/scripts/buildkitd.sh + - name: build vllm-rayserve-ec2 image + shell: bash + run: | + DATE=$(date +"%Y-%m-%d") + COMMIT_REF=$(git rev-parse --short HEAD) + DOCKER_BUILDKIT=1 docker build --progress plain \ + --build-arg CACHE_REFRESH=${DATE} \ + --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-${COMMIT_REF} \ + --target vllm-rayserve-ec2 \ + -f docker/vllm/Dockerfile.rayserve . + docker image ls example-on-g6xl-runner-1: needs: [example-on-build-runner] diff --git a/.gitignore b/.gitignore index 18b67f20119c..126c4416f381 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__ .idea *.pyc .venv +.ruff_cache \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3ec28eba9d20..63f60f47f387 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,23 +15,14 @@ repos: # optional: add additional arguments here - --indent=2 - --write - stages: [manual] # run in CI - - repo: https://github.com/rhysd/actionlint - rev: v1.7.7 - hooks: - - id: actionlint - stages: [manual] # run in CI + stages: [pre-commit] - repo: https://github.com/scop/pre-commit-shfmt rev: v3.12.0-2 # Use the latest stable revision hooks: - id: shfmt # Optional: Add arguments to shfmt if needed, e.g., to enable "simplify" mode args: ["-s"] - - repo: https://github.com/crate-ci/typos - rev: v1.38.1 - hooks: - - id: typos - args: [--force-exclude] + stages: [pre-commit] - repo: https://github.com/hukkin/mdformat rev: 1.0.0 # Use the ref you want to point at hooks: @@ -40,17 +31,28 @@ repos: additional_dependencies: - mdformat-gfm - mdformat-black + stages: [pre-commit] - repo: https://github.com/igorshubovych/markdownlint-cli rev: v0.45.0 hooks: - id: markdownlint args: [--fix] + stages: [pre-commit] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.14.3 hooks: - - id: ruff-check - args: [ --fix, --output-format=github ] - id: ruff-format + stages: [pre-commit] + - id: ruff-check + - repo: https://github.com/rhysd/actionlint + rev: v1.7.7 + hooks: + - id: actionlint + - repo: https://github.com/crate-ci/typos + rev: v1.38.1 + hooks: + - id: typos + args: [--force-exclude] - repo: local hooks: - id: signoff-commit diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 97ec98b254b1..55fefbd3a911 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -28,6 +28,13 @@ uv pip install pre-commit pre-commit install ``` +Install go using [homebrew](https://brew.sh/), below example assume on Mac. + +```bash +brew install go +go env -w GOPROXY=direct +``` + To manually run all linters: ```bash diff --git a/docker/vllm/Dockerfile.rayserve b/docker/vllm/Dockerfile.rayserve new file mode 100644 index 000000000000..3eeff6528797 --- /dev/null +++ b/docker/vllm/Dockerfile.rayserve @@ -0,0 +1,68 @@ +FROM docker.io/vllm/vllm-openai:v0.10.2 AS base +ARG PYTHON="python3" +LABEL maintainer="Amazon AI" +ARG EFA_VERSION="1.43.3" +LABEL dlc_major_version="1" +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + DLC_CONTAINER_TYPE=base \ + # Python won’t try to write .pyc or .pyo files on the import of source modules + # Force stdin, stdout and stderr to be totally unbuffered. Good for logging + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING=UTF-8 \ + LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \ + PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" + +WORKDIR / + +COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py +COPY ./scripts/telemetry/bash_telemetry.sh /usr/local/bin/bash_telemetry.sh +COPY ./scripts/setup_oss_compliance.sh setup_oss_compliance.sh + +RUN chmod +x /usr/local/bin/deep_learning_container.py \ + && chmod +x /usr/local/bin/bash_telemetry.sh \ + && echo 'source /usr/local/bin/bash_telemetry.sh' >>/etc/bash.bashrc \ + && bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh \ + # create symlink for python + && ln -s /usr/bin/python3 /usr/bin/python \ + # clean up + && rm -rf ${HOME_DIR}/oss_compliance* \ + && rm -rf /tmp/tmp* \ + && rm -rf /tmp/uv* \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /root/.cache | true + +COPY ./scripts/install_efa.sh install_efa.sh +RUN bash install_efa.sh ${EFA_VERSION} \ + && rm install_efa.sh \ + && mkdir -p /tmp/nvjpeg \ + && cd /tmp/nvjpeg \ + && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/lib64/ \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/include/ \ + && rm -rf /tmp/nvjpeg \ + # remove cuobjdump and nvdisasm + && rm -rf /usr/local/cuda/bin/cuobjdump* \ + && rm -rf /usr/local/cuda/bin/nvdisasm* + +# ====================== ray serve ========================================= +FROM base AS vllm-rayserve-ec2 + +RUN uv pip install --system ray[serve]==2.49.0 \ + && uv cache clean + +ARG CACHE_REFRESH=0 +RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \ + && apt-get update \ + && apt-get upgrade -y \ + && apt-get clean + +COPY ./scripts/dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh +RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh + +ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"] \ No newline at end of file diff --git a/scripts/dockerd_entrypoint.sh b/scripts/dockerd_entrypoint.sh new file mode 100755 index 000000000000..c05dab13dfa1 --- /dev/null +++ b/scripts/dockerd_entrypoint.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +# Check if telemetry file exists before executing +# Execute telemetry script if it exists, suppress errors +bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true + +python3 -m vllm.entrypoints.openai.api_server "$@" \ No newline at end of file diff --git a/scripts/install_efa.sh b/scripts/install_efa.sh new file mode 100755 index 000000000000..75cbc6e93116 --- /dev/null +++ b/scripts/install_efa.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +set -ex + +ARCH=$(uname -m) +case $ARCH in + x86_64) + ARCH_DIR="x86_64-linux-gnu" + ;; + aarch64) + ARCH_DIR="aarch64-linux-gnu" + ;; + *) + echo "Unsupported architecture: $ARCH" + exit 1 + ;; +esac + +function check_libnccl_net_so { + OFI_LIB_DIR="/opt/amazon/ofi-nccl/lib/${ARCH_DIR}" + NCCL_NET_SO="$OFI_LIB_DIR/libnccl-net.so" + + # Check if file exists + if [ ! -f "$NCCL_NET_SO" ]; then + echo "ERROR: $NCCL_NET_SO does not exist" + return 1 + fi +} + +function install_efa { + EFA_VERSION=$1 + OPEN_MPI_PATH="/opt/amazon/openmpi" + + # Install build time tools + apt-get update + apt-get install -y --allow-change-held-packages --no-install-recommends \ + curl \ + build-essential \ + cmake \ + git + + # Install EFA + mkdir /tmp/efa + cd /tmp/efa + curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz + tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz + cd aws-efa-installer + ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify + rm -rf /tmp/efa + # Configure Open MPI and configure NCCL parameters + mv ${OPEN_MPI_PATH}/bin/mpirun ${OPEN_MPI_PATH}/bin/mpirun.real + echo '#!/bin/bash' > ${OPEN_MPI_PATH}/bin/mpirun + echo "${OPEN_MPI_PATH}/bin/mpirun.real --allow-run-as-root \"\$@\"" >> ${OPEN_MPI_PATH}/bin/mpirun + chmod a+x ${OPEN_MPI_PATH}/bin/mpirun + echo "hwloc_base_binding_policy = none" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf + echo "rmaps_base_mapping_policy = slot" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf + echo NCCL_DEBUG=INFO >> /etc/nccl.conf + echo NCCL_SOCKET_IFNAME=^docker0,lo >> /etc/nccl.conf + + # Install OpenSSH for MPI to communicate between containers, allow OpenSSH to talk to containers without asking for confirmation + apt-get install -y --no-install-recommends \ + openssh-client \ + openssh-server + mkdir -p /var/run/sshd + cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + # Configure OpenSSH so that nodes can communicate with each other + mkdir -p /var/run/sshd + sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd + rm -rf /root/.ssh/ + mkdir -p /root/.ssh/ + ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa + cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys + printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config + + # Remove build time tools + # apt-get remove -y + # curl + # build-essential + # cmake + # git + + # Cleanup + apt-get clean + apt-get autoremove -y + rm -rf /var/lib/apt/lists/* + ldconfig + check_libnccl_net_so +} + +# idiomatic parameter and option handling in sh +while test $# -gt 0 +do + case "$1" in + [0-9].[0-9]*.[0-9]*) install_efa $1; + ;; + *) echo "bad argument $1"; exit 1 + ;; + esac + shift +done diff --git a/scripts/setup_oss_compliance.sh b/scripts/setup_oss_compliance.sh new file mode 100755 index 000000000000..426f8fb52f63 --- /dev/null +++ b/scripts/setup_oss_compliance.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +set -ex + +function install_oss_compliance { + HOME_DIR="/root" + PYTHON=$1 + + if [ -z "$PYTHON" ]; then + echo "Python version not specified. Using default Python." + PYTHON="python3" + fi + curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip + ${PYTHON} -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" + cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance + chmod +x /usr/local/bin/testOSSCompliance + chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh + ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} + rm -rf ${HOME_DIR}/oss_compliance* + rm -rf /tmp/tmp* + # Removing the cache as it is needed for security verification + rm -rf /root/.cache | true +} + +while test $# -gt 0 +do + case "$1" in + python*) install_oss_compliance $1; + ;; + *) echo "bad argument $1"; exit 1 + ;; + esac + shift +done \ No newline at end of file diff --git a/scripts/telemetry/bash_telemetry.sh b/scripts/telemetry/bash_telemetry.sh new file mode 100755 index 000000000000..390000bacfca --- /dev/null +++ b/scripts/telemetry/bash_telemetry.sh @@ -0,0 +1,11 @@ +# telemetry.sh +#!/bin/bash +if [ -f /usr/local/bin/deep_learning_container.py ] && [[ -z "${OPT_OUT_TRACKING}" || "${OPT_OUT_TRACKING,,}" != "true" ]]; then + ( + python /usr/local/bin/deep_learning_container.py \ + --framework "${FRAMEWORK}" \ + --framework-version "${FRAMEWORK_VERSION}" \ + --container-type "${CONTAINER_TYPE}" \ + &>/dev/null & + ) +fi diff --git a/scripts/telemetry/deep_learning_container.py b/scripts/telemetry/deep_learning_container.py new file mode 100755 index 000000000000..35e730d745d6 --- /dev/null +++ b/scripts/telemetry/deep_learning_container.py @@ -0,0 +1,395 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import argparse +import json +import logging +import multiprocessing +import os +import re +import signal +import sys + +import botocore.session +import requests + +TIMEOUT_SECS = 5 +REGION_MAPPING = { + "ap-northeast-1": "ddce303c", + "ap-northeast-2": "528c8d92", + "ap-southeast-1": "c35f9f00", + "ap-southeast-2": "d2add9c0", + "ap-south-1": "9deb4123", + "ca-central-1": "b95e2bf4", + "eu-central-1": "bfec3957", + "eu-north-1": "b453c092", + "eu-west-1": "d763c260", + "eu-west-2": "ea20d193", + "eu-west-3": "1894043c", + "sa-east-1": "030b4357", + "us-east-1": "487d6534", + "us-east-2": "72252b46", + "us-west-1": "d02c1125", + "us-west-2": "d8c0d063", + "af-south-1": "08ea8dc5", + "eu-south-1": "29566eac", + "me-south-1": "7ea07793", + "ap-southeast-7": "1699f14f", + "ap-southeast-3": "be0a3174", + "me-central-1": "6e06aaeb", + "ap-east-1": "5e1fbf92", + "ap-south-2": "50209442", + "ap-northeast-3": "fa298003", + "ap-southeast-5": "5852cd87", + "us-northeast-1": "bbf9e961", + "ap-southeast-4": "dc6f76ce", + "mx-central-1": "ed0da79c", + "il-central-1": "2fb2448e", + "ap-east-2": "8947749e", + "ca-west-1": "ea83ea06", + "eu-south-2": "df2c9d70", + "eu-central-2": "aa7aabcc", +} + + +def requests_helper(url, headers=None, timeout=0.1): + """ + Requests to get instance metadata using imdsv1 and imdsv2 + :param url: str, url to get the request + :param headers: str, headers needed to make a request + :param timeout: float, timeout value for a request + """ + response = None + try: + if headers: + response = requests.get(url, headers=headers, timeout=timeout) + else: + response = requests.get(url, timeout=timeout) + + except requests.exceptions.RequestException as e: + logging.error("Request exception: {}".format(e)) + + return response + + +def requests_helper_imds(url, token=None): + """ + Requests to get instance metadata using imdsv1 and imdsv2 + :param url: str, url to get the request + :param token: str, token is needed to use imdsv2 + """ + response_text = None + response = None + headers = None + if token: + headers = {"X-aws-ec2-metadata-token": token} + timeout = 1 + try: + while timeout <= 3: + if headers: + response = requests.get(url, headers=headers, timeout=timeout) + else: + response = requests.get(url, timeout=timeout) + if response: + break + timeout += 1 + + except requests.exceptions.RequestException as e: + logging.error("Request exception: {}".format(e)) + + if response is not None and not (400 <= response.status_code < 600): + response_text = response.text + + return response_text + + +def get_imdsv2_token(): + """ + Retrieve token using imdsv2 service + """ + response = None + token = None + headers = {"X-aws-ec2-metadata-token-ttl-seconds": "600"} + url = "http://169.254.169.254/latest/api/token" + timeout = 1 + + try: + while timeout <= 3: + response = requests.put(url, headers=headers, timeout=timeout) + if response: + break + timeout += 1 + except requests.exceptions.RequestException as e: + logging.error("Request exception: {}".format(e)) + + if response is not None and not (400 <= response.status_code < 600): + token = response.text + + return token + + +def _validate_instance_id(instance_id): + """ + Validate instance ID + """ + instance_id_regex = r"^(i-\S{17})" + compiled_regex = re.compile(instance_id_regex) + match = compiled_regex.match(instance_id) + + if not match: + return None + + return match.group(1) + + +def _retrieve_instance_id(token=None): + """ + Retrieve instance ID from instance metadata service + """ + instance_id = None + instance_url = "http://169.254.169.254/latest/meta-data/instance-id" + + if token: + instance_id = requests_helper_imds(instance_url, token) + else: + instance_id = requests_helper_imds(instance_url) + + if instance_id: + instance_id = _validate_instance_id(instance_id) + + return instance_id + + +def _retrieve_instance_region(token=None): + """ + Retrieve instance region from instance metadata service + """ + region = None + response_json = None + + region_url = "http://169.254.169.254/latest/dynamic/instance-identity/document" + + if token: + response_text = requests_helper_imds(region_url, token) + else: + response_text = requests_helper_imds(region_url) + + if response_text: + response_json = json.loads(response_text) + + if response_json["region"] in REGION_MAPPING: + region = response_json["region"] + + return region + + +def _retrieve_device(): + return ( + "gpu" + if os.path.isdir("/usr/local/cuda") + else ( + "eia" + if os.path.isdir("/opt/ei_tools") + else ( + "neuron" + if os.path.exists("/usr/local/bin/tensorflow_model_server_neuron") + else "cpu" + ) + ) + ) + + +def _retrieve_cuda(): + cuda_version = "" + try: + cuda_path = os.path.basename(os.readlink("/usr/local/cuda")) + cuda_version_search = re.search(r"\d+\.\d+", cuda_path) + cuda_version = "" if not cuda_version_search else cuda_version_search.group() + except Exception as e: + logging.error(f"Failed to get cuda path: {e}") + return cuda_version + + +def _retrieve_os(): + version = "" + name = "" + with open("/etc/os-release", "r") as f: + for line in f.readlines(): + if re.match(r"^ID=\w+$", line): + name = re.search(r"^ID=(\w+)$", line).group(1) + if re.match(r'^VERSION_ID="\d+\.\d+"$', line): + version = re.search(r'^VERSION_ID="(\d+\.\d+)"$', line).group(1) + return name + version + + +def parse_args(): + """ + Parsing function to parse input arguments. + Return: args, which containers parsed input arguments. + """ + parser = argparse.ArgumentParser() + parser.add_argument( + "--framework", + choices=["tensorflow", "mxnet", "pytorch", "base", "vllm"], + help="framework of container image.", + required=True, + ) + parser.add_argument( + "--framework-version", help="framework version of container image.", required=True + ) + parser.add_argument( + "--container-type", + choices=["training", "inference", "general"], + help="What kind of jobs you want to run on container. Either training or inference.", + required=True, + ) + + args, _unknown = parser.parse_known_args() + + fw_version_pattern = r"\d+(\.\d+){1,2}(-rc\d)?" + + # PT 1.10 and above has +cpu or +cu113 string, so handle accordingly + if args.framework == "pytorch": + pt_fw_version_pattern = r"(\d+(\.\d+){1,2}(-rc\d)?)((\+cpu)|(\+cu\d{3})|(a0\+git\w{7}))" + pt_fw_version_match = re.fullmatch(pt_fw_version_pattern, args.framework_version) + if pt_fw_version_match: + args.framework_version = pt_fw_version_match.group(1) + assert re.fullmatch(fw_version_pattern, args.framework_version), ( + f"args.framework_version = {args.framework_version} does not match {fw_version_pattern}\n" + f"Please specify framework version as X.Y.Z or X.Y." + ) + # TFS 2.12.1 still uses TF 2.12.0 and breaks the telemetry check as it is checking TF version + # instead of TFS version. WE are forcing the version we want. + if ( + args.framework == "tensorflow" + and args.container_type == "inference" + and args.framework_version == "2.12.0" + ): + args.framework_version = "2.12.1" + + return args + + +def query_bucket(instance_id, region): + """ + GET request on an empty object from an Amazon S3 bucket + """ + + response = None + args = parse_args() + framework, framework_version, container_type = ( + args.framework, + args.framework_version, + args.container_type, + ) + + py_version = sys.version.split(" ")[0] + + if instance_id is not None and region is not None: + url = ( + "https://aws-deep-learning-containers-{0}.s3.{1}.amazonaws.com" + "/dlc-containers-{2}.txt?x-instance-id={2}&x-framework={3}&x-framework_version={4}&x-py_version={5}&x-container_type={6}".format( + REGION_MAPPING[region], + region, + instance_id, + framework, + framework_version, + py_version, + container_type, + ) + ) + response = requests_helper(url, timeout=0.2) + if os.environ.get("TEST_MODE") == str(1): + with open(os.path.join(os.sep, "tmp", "test_request.txt"), "w+") as rf: + rf.write(url) + + logging.debug("Query bucket finished: {}".format(response)) + + return response + + +def tag_instance(instance_id, region): + """ + Apply instance tag on the instance that is running the container using botocore + """ + args = parse_args() + framework, framework_version, container_type = ( + args.framework, + args.framework_version, + args.container_type, + ) + py_version = sys.version.split(" ")[0] + device = _retrieve_device() + cuda_version = f"_cuda{_retrieve_cuda()}" if device == "gpu" else "" + os_version = _retrieve_os() + + tag = f"{framework}_{container_type}_{framework_version}_python{py_version}_{device}{cuda_version}_{os_version}" + tag_struct = {"Key": "aws-dlc-autogenerated-tag-do-not-delete", "Value": tag} + + request_status = None + if instance_id and region: + try: + session = botocore.session.get_session() + ec2_client = session.create_client("ec2", region_name=region) + response = ec2_client.create_tags(Resources=[instance_id], Tags=[tag_struct]) + request_status = response.get("ResponseMetadata").get("HTTPStatusCode") + if os.environ.get("TEST_MODE") == str(1): + with open(os.path.join(os.sep, "tmp", "test_tag_request.txt"), "w+") as rf: + rf.write(json.dumps(tag_struct, indent=4)) + except Exception as e: + logging.error(f"Error. {e}") + logging.debug("Instance tagged successfully: {}".format(request_status)) + else: + logging.error("Failed to retrieve instance_id or region") + + return request_status + + +def main(): + """ + Invoke bucket query + """ + # Logs are not necessary for normal run. Remove this line while debugging. + logging.getLogger().disabled = True + + logging.basicConfig(level=logging.ERROR) + + token = None + instance_id = None + region = None + token = get_imdsv2_token() + if token: + instance_id = _retrieve_instance_id(token) + region = _retrieve_instance_region(token) + else: + instance_id = _retrieve_instance_id() + region = _retrieve_instance_region() + + bucket_process = multiprocessing.Process(target=query_bucket, args=(instance_id, region)) + tag_process = multiprocessing.Process(target=tag_instance, args=(instance_id, region)) + + bucket_process.start() + tag_process.start() + + tag_process.join(TIMEOUT_SECS) + bucket_process.join(TIMEOUT_SECS) + + if tag_process.is_alive(): + os.kill(tag_process.pid, signal.SIGKILL) + tag_process.join() + if bucket_process.is_alive(): + os.kill(bucket_process.pid, signal.SIGKILL) + bucket_process.join() + + +if __name__ == "__main__": + main() From 50e9793f741219a39747c08b41a0231653b098c1 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 15:06:13 -0800 Subject: [PATCH 02/63] fix action Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 81b28112ea14..949b8b3f734f 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -47,8 +47,8 @@ jobs: DATE=$(date +"%Y-%m-%d") COMMIT_REF=$(git rev-parse --short HEAD) DOCKER_BUILDKIT=1 docker build --progress plain \ - --build-arg CACHE_REFRESH=${DATE} \ - --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-${COMMIT_REF} \ + --build-arg CACHE_REFRESH="${DATE}" \ + --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-"${COMMIT_REF}" \ --target vllm-rayserve-ec2 \ -f docker/vllm/Dockerfile.rayserve . docker image ls From 47e7bf6a28d4acfba3d48ae6326e5f1feee5a3bc Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 15:15:36 -0800 Subject: [PATCH 03/63] using long commit ref Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 949b8b3f734f..e67ef20351b2 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -45,7 +45,7 @@ jobs: shell: bash run: | DATE=$(date +"%Y-%m-%d") - COMMIT_REF=$(git rev-parse --short HEAD) + COMMIT_REF=$(git rev-parse HEAD) DOCKER_BUILDKIT=1 docker build --progress plain \ --build-arg CACHE_REFRESH="${DATE}" \ --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-"${COMMIT_REF}" \ From f3e7416dc5a7f049ef6b2f536ceeff82e56b091c Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 15:57:17 -0800 Subject: [PATCH 04/63] install/update uv only if not already installed Signed-off-by: Junpu Fan --- .github/scripts/runner_setup.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/scripts/runner_setup.sh b/.github/scripts/runner_setup.sh index bb1b7d0976b4..e6eb3aecd88e 100755 --- a/.github/scripts/runner_setup.sh +++ b/.github/scripts/runner_setup.sh @@ -1,6 +1,8 @@ #!/bin/bash set -e -curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh -uv self update +if ! command -v uv &> /dev/null; then + curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh + uv self update +fi docker --version From 96d976b59a312ea0f532ad09be0c344d8ccdf12e Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 16:16:12 -0800 Subject: [PATCH 05/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index e67ef20351b2..32d33daf2328 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -46,12 +46,17 @@ jobs: run: | DATE=$(date +"%Y-%m-%d") COMMIT_REF=$(git rev-parse HEAD) + REPO=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci + TAG=vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}-"${COMMIT_REF}" + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com DOCKER_BUILDKIT=1 docker build --progress plain \ --build-arg CACHE_REFRESH="${DATE}" \ - --tag vllm:0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-"${COMMIT_REF}" \ + --tag "${REPO}":"${TAG}" \ --target vllm-rayserve-ec2 \ -f docker/vllm/Dockerfile.rayserve . - docker image ls + docker push "${REPO}":"${TAG}" + docker rmi "${REPO}":"${TAG}" + yes | docker system prune example-on-g6xl-runner-1: needs: [example-on-build-runner] From e21334c31d3df4400784d990edf709a6c577ae16 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 16:56:45 -0800 Subject: [PATCH 06/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 32d33daf2328..f9d3fbb6c313 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -44,20 +44,16 @@ jobs: - name: build vllm-rayserve-ec2 image shell: bash run: | - DATE=$(date +"%Y-%m-%d") - COMMIT_REF=$(git rev-parse HEAD) - REPO=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci - TAG=vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}-"${COMMIT_REF}" aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} DOCKER_BUILDKIT=1 docker build --progress plain \ - --build-arg CACHE_REFRESH="${DATE}" \ - --tag "${REPO}":"${TAG}" \ + --build-arg CACHE_REFRESH=$(date +"%Y-%m-%d_%H") \ + --tag "$IMAGE_TAG" \ --target vllm-rayserve-ec2 \ -f docker/vllm/Dockerfile.rayserve . - docker push "${REPO}":"${TAG}" - docker rmi "${REPO}":"${TAG}" - yes | docker system prune - + docker push "$IMAGE_TAG" + docker rmi "$IMAGE_TAG" + example-on-g6xl-runner-1: needs: [example-on-build-runner] runs-on: From 082f67c609362b7121bb1a120e1ef68efe40d749 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 17:01:48 -0800 Subject: [PATCH 07/63] fix actionlint Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index f9d3fbb6c313..35e2d6359fdb 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -47,7 +47,7 @@ jobs: aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} DOCKER_BUILDKIT=1 docker build --progress plain \ - --build-arg CACHE_REFRESH=$(date +"%Y-%m-%d_%H") \ + --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \ --tag "$IMAGE_TAG" \ --target vllm-rayserve-ec2 \ -f docker/vllm/Dockerfile.rayserve . From a82924d2ee29162deca033e88687ba6372380c40 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 17:28:37 -0800 Subject: [PATCH 08/63] try inline cache Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 35e2d6359fdb..a0692b324d74 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -47,7 +47,10 @@ jobs: aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} DOCKER_BUILDKIT=1 docker build --progress plain \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \ + --cache-to=type=inline \ + --cache-from=type=registry,ref="$IMAGE_TAG" \ --tag "$IMAGE_TAG" \ --target vllm-rayserve-ec2 \ -f docker/vllm/Dockerfile.rayserve . From d82b4a1a1bc34fa2b804a05e8de7785b4b0728c6 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 17:33:25 -0800 Subject: [PATCH 09/63] fix Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index a0692b324d74..24092d131525 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -47,7 +47,6 @@ jobs: aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} DOCKER_BUILDKIT=1 docker build --progress plain \ - --build-arg BUILDKIT_INLINE_CACHE=1 \ --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \ --cache-to=type=inline \ --cache-from=type=registry,ref="$IMAGE_TAG" \ From c7d65bc18542d7142d219564466da00aceebeda5 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 17:41:05 -0800 Subject: [PATCH 10/63] use buildx Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 24092d131525..00287d427eb7 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -46,7 +46,7 @@ jobs: run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} - DOCKER_BUILDKIT=1 docker build --progress plain \ + docker buildx build --progress plain \ --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \ --cache-to=type=inline \ --cache-from=type=registry,ref="$IMAGE_TAG" \ From 09bfc6319cddf46181b12fe8df13ccebbbe35c3d Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Fri, 7 Nov 2025 17:51:02 -0800 Subject: [PATCH 11/63] per day cache refresh Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 00287d427eb7..f9f7637197aa 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -47,7 +47,7 @@ jobs: aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} docker buildx build --progress plain \ - --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d_%H")" \ + --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ --cache-to=type=inline \ --cache-from=type=registry,ref="$IMAGE_TAG" \ --tag "$IMAGE_TAG" \ From 8a2108768d72788cf7c24f7d2a76cd76aaa204f9 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 08:30:42 -0800 Subject: [PATCH 12/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 60 ++++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index f9f7637197aa..1f4c393af66d 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -12,9 +12,11 @@ concurrency: group: pr-${{ github.event.pull_request.number }} cancel-in-progress: true -jobs: - pre-commit: +jobs: + check-changes: runs-on: ubuntu-latest + outputs: + vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }} steps: - uses: actions/checkout@v5 - uses: actions/setup-python@v6 @@ -23,25 +25,28 @@ jobs: - uses: pre-commit/action@v3.0.1 with: extra_args: --all-files + - name: Detect file changes + id: changes + uses: dorny/paths-filter@v3 + with: + filters: | + vllm-rayserve-ec2: + - "docker/vllm/Dockerfile.rayserve" - example-on-default-runner: - needs: [pre-commit] - runs-on: - - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} - steps: - - uses: actions/checkout@v5 - - run: .github/scripts/runner_setup.sh - - example-on-build-runner: - needs: [example-on-default-runner] + build-vllm-rayserve-ec2-image: + needs: [check-changes] + if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true' runs-on: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-build-runner + outputs: + image_uri: ${{ steps.build.outputs.image_uri }} steps: - uses: actions/checkout@v5 - run: .github/scripts/runner_setup.sh - run: .github/scripts/buildkitd.sh - - name: build vllm-rayserve-ec2 image + - name: Build vllm-rayserve-ec2 image + id: build shell: bash run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com @@ -55,25 +60,20 @@ jobs: -f docker/vllm/Dockerfile.rayserve . docker push "$IMAGE_TAG" docker rmi "$IMAGE_TAG" - - example-on-g6xl-runner-1: - needs: [example-on-build-runner] - runs-on: - - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} - fleet:x86-g6xl-runner - steps: - - uses: actions/checkout@v5 - - run: .github/scripts/runner_setup.sh - - run: | - nvidia-smi - - example-on-g6xl-runner-2: - needs: [example-on-build-runner] + echo "image_uri="$IMAGE_TAG"" >> $GITHUB_OUTPUT + + test-vllm-rayserve-ec2-image: + needs: [build-vllm-rayserve-ec2-image] + if: needs.build-vllm-rayserve-ec2-image.result == 'success' runs-on: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner steps: - uses: actions/checkout@v5 - - run: .github/scripts/runner_setup.sh - - run: | - nvidia-smi + - name: Use built image + run: | + IMAGE_URI=${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }} + echo "Testing image: $IMAGE_URI" + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + docker pull "$IMAGE_URI" + docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')" From 031a0e80fcf93f27f8e9113f941eb937bf799427 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 08:32:58 -0800 Subject: [PATCH 13/63] fix Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 1f4c393af66d..f77cd7423f51 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -60,7 +60,7 @@ jobs: -f docker/vllm/Dockerfile.rayserve . docker push "$IMAGE_TAG" docker rmi "$IMAGE_TAG" - echo "image_uri="$IMAGE_TAG"" >> $GITHUB_OUTPUT + echo "image_uri=$IMAGE_TAG" >> "$GITHUB_OUTPUT" test-vllm-rayserve-ec2-image: needs: [build-vllm-rayserve-ec2-image] From df2d590cd30b143ffff663d4ed7ebd01c14ea941 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 09:44:48 -0800 Subject: [PATCH 14/63] test Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 64 ++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index f77cd7423f51..33d36f206d7d 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -40,7 +40,7 @@ jobs: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-build-runner outputs: - image_uri: ${{ steps.build.outputs.image_uri }} + image_uri: ${{ steps.export.outputs.image_uri }} steps: - uses: actions/checkout@v5 - run: .github/scripts/runner_setup.sh @@ -60,20 +60,60 @@ jobs: -f docker/vllm/Dockerfile.rayserve . docker push "$IMAGE_TAG" docker rmi "$IMAGE_TAG" - echo "image_uri=$IMAGE_TAG" >> "$GITHUB_OUTPUT" + echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV + - name: Export output + id: export + run: echo "image_uri=$IMAGE_TAG" >> $GITHUB_OUTPUT + - name: Debug local + run: | + echo "Local GITHUB_OUTPUT contents:" + cat $GITHUB_OUTPUT || echo "No output file found" + echo "Local step output: ${{ steps.export.outputs.image_uri }}" - test-vllm-rayserve-ec2-image: + test-job1: needs: [build-vllm-rayserve-ec2-image] - if: needs.build-vllm-rayserve-ec2-image.result == 'success' - runs-on: + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-build-runner + if: always() + steps: + - name: Check received output + run: | + echo "Received job output: ${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" + if [ -z "${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" ]; then + echo "❌ Output is missing!" + else + echo "✅ Output received successfully." + fi + + test-job2: + needs: [build-vllm-rayserve-ec2-image] + runs-on: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner + if: always() steps: - - uses: actions/checkout@v5 - - name: Use built image + - name: Check received output run: | - IMAGE_URI=${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }} - echo "Testing image: $IMAGE_URI" - aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - docker pull "$IMAGE_URI" - docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')" + echo "Received job output: ${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" + if [ -z "${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" ]; then + echo "❌ Output is missing!" + else + echo "✅ Output received successfully." + fi + + # test-vllm-rayserve-ec2-image: + # needs: [build-vllm-rayserve-ec2-image] + # if: needs.build-vllm-rayserve-ec2-image.result == 'success' + # runs-on: + # - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + # fleet:x86-g6xl-runner + # steps: + # - uses: actions/checkout@v5 + # - name: Use built image + # run: | + # IMAGE_URI=${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }} + # echo "Testing image: $IMAGE_URI" + # aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + # docker pull "$IMAGE_URI" + # docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')" From 2d594069e7da50f88a912617a5fe88aa1fdffd2e Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 09:53:19 -0800 Subject: [PATCH 15/63] fix Signed-off-by: Junpu Fan --- .pre-commit-config.yaml | 1 + .shellcheckrc | 1 + DEVELOPMENT.md | 1 + 3 files changed, 3 insertions(+) create mode 100644 .shellcheckrc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 63f60f47f387..e343734a2aaf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,6 +48,7 @@ repos: rev: v1.7.7 hooks: - id: actionlint + args: ["-shellcheck=enable=all"] - repo: https://github.com/crate-ci/typos rev: v1.38.1 hooks: diff --git a/.shellcheckrc b/.shellcheckrc new file mode 100644 index 000000000000..cf179afb4534 --- /dev/null +++ b/.shellcheckrc @@ -0,0 +1 @@ +disable=SC2086 \ No newline at end of file diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 55fefbd3a911..95ac91415950 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -33,6 +33,7 @@ Install go using [homebrew](https://brew.sh/), below example assume on Mac. ```bash brew install go go env -w GOPROXY=direct +brew install shellcheck ``` To manually run all linters: From 75a8f1ad45d827177a7461df1f562f7660dff128 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 10:24:57 -0800 Subject: [PATCH 16/63] try artifact Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 77 ++++++++++---------------------- 1 file changed, 24 insertions(+), 53 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 33d36f206d7d..1dc056d83119 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -39,8 +39,6 @@ jobs: runs-on: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-build-runner - outputs: - image_uri: ${{ steps.export.outputs.image_uri }} steps: - uses: actions/checkout@v5 - run: .github/scripts/runner_setup.sh @@ -60,60 +58,33 @@ jobs: -f docker/vllm/Dockerfile.rayserve . docker push "$IMAGE_TAG" docker rmi "$IMAGE_TAG" - echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV - - name: Export output - id: export - run: echo "image_uri=$IMAGE_TAG" >> $GITHUB_OUTPUT - - name: Debug local - run: | - echo "Local GITHUB_OUTPUT contents:" - cat $GITHUB_OUTPUT || echo "No output file found" - echo "Local step output: ${{ steps.export.outputs.image_uri }}" - - test-job1: - needs: [build-vllm-rayserve-ec2-image] - runs-on: - - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} - fleet:x86-build-runner - if: always() - steps: - - name: Check received output - run: | - echo "Received job output: ${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" - if [ -z "${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" ]; then - echo "❌ Output is missing!" - else - echo "✅ Output received successfully." - fi + echo "$IMAGE_TAG" > image_uri.txt + - name: Upload image URI + uses: actions/upload-artifact@v4 + with: + name: vllm-rayserve-ec2-image-uri + path: image_uri.txt - test-job2: + test-vllm-rayserve-ec2-image: needs: [build-vllm-rayserve-ec2-image] - runs-on: + if: needs.build-vllm-rayserve-ec2-image.result == 'success' + runs-on: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner - if: always() steps: - - name: Check received output + - uses: actions/checkout@v5 + - name: Download image URI + uses: actions/download-artifact@v4 + with: + name: vllm-rayserve-ec2-image-uri + - name: Read image URI + id: read run: | - echo "Received job output: ${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" - if [ -z "${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }}" ]; then - echo "❌ Output is missing!" - else - echo "✅ Output received successfully." - fi - - # test-vllm-rayserve-ec2-image: - # needs: [build-vllm-rayserve-ec2-image] - # if: needs.build-vllm-rayserve-ec2-image.result == 'success' - # runs-on: - # - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} - # fleet:x86-g6xl-runner - # steps: - # - uses: actions/checkout@v5 - # - name: Use built image - # run: | - # IMAGE_URI=${{ needs.build-vllm-rayserve-ec2-image.outputs.image_uri }} - # echo "Testing image: $IMAGE_URI" - # aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - # docker pull "$IMAGE_URI" - # docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')" + IMAGE_URI=$(cat image_uri.txt) + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + echo "Resolved image URI: $IMAGE_URI" + - name: Test image + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + docker pull "$IMAGE_URI" + docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')" From 65975f71e98db0839e6158543b82d925f6f100da Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 10:32:40 -0800 Subject: [PATCH 17/63] update docker command Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 1dc056d83119..9e311dfb3812 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -87,4 +87,4 @@ jobs: run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com docker pull "$IMAGE_URI" - docker run --rm "$IMAGE_URI" python -c "import torch; import ray; import vllm; print('Test OK')" + docker run --rm "$IMAGE_URI" --entrypoint python -c "import torch; import ray; import vllm; print('Test OK')" From ff4725e0879a5e1829f8c065496c3db44dd1fc62 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 10:37:09 -0800 Subject: [PATCH 18/63] fix command Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 9e311dfb3812..6b722eaa86c2 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -87,4 +87,4 @@ jobs: run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com docker pull "$IMAGE_URI" - docker run --rm "$IMAGE_URI" --entrypoint python -c "import torch; import ray; import vllm; print('Test OK')" + docker run --rm --gpus=all --entrypoint /bin/bash "$IMAGE_URI" --entrypoint python -c "import torch; import ray; import vllm; print('Test OK')" From 3dd1a99d9c1f010454f36a2d2af9fd5a0158d131 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 10:42:34 -0800 Subject: [PATCH 19/63] fix command Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 6b722eaa86c2..1cc4bd574fea 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -87,4 +87,5 @@ jobs: run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com docker pull "$IMAGE_URI" - docker run --rm --gpus=all --entrypoint /bin/bash "$IMAGE_URI" --entrypoint python -c "import torch; import ray; import vllm; print('Test OK')" + docker run --rm --gpus=all $IMAGE_URI --entrypoint /bin/bash \ + -c "python -c 'import vllm; print(vllm.__version__)'" From 872029d121225a48dab1e9c37eecb9e1afe64004 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 10:47:44 -0800 Subject: [PATCH 20/63] fix entrypoint Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 1cc4bd574fea..b3db25e8985a 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -87,5 +87,7 @@ jobs: run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com docker pull "$IMAGE_URI" - docker run --rm --gpus=all $IMAGE_URI --entrypoint /bin/bash \ - -c "python -c 'import vllm; print(vllm.__version__)'" + docker run --rm --gpus=all \ + --entrypoint /bin/bash \ + "$IMAGE_URI" \ + -c "python -c 'import vllm; print(vllm.__version__)'" From fadf714b26227dfb4350f44467308a30c590d57d Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 11:17:16 -0800 Subject: [PATCH 21/63] update test Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 40 ++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index b3db25e8985a..773961c2c915 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -85,9 +85,41 @@ jobs: echo "Resolved image URI: $IMAGE_URI" - name: Test image run: | + # Download ShareGPT dataset if it doesn't exist + mkdir -p ${HOME}/dataset + if [ ! -f ${HOME}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then + echo "Downloading ShareGPT dataset..." + wget -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + else + echo "ShareGPT dataset already exists. Skipping download." + fi + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com docker pull "$IMAGE_URI" - docker run --rm --gpus=all \ - --entrypoint /bin/bash \ - "$IMAGE_URI" \ - -c "python -c 'import vllm; print(vllm.__version__)'" + CONTAINER_NAME=vllm-rayserve + docker stop ${CONTAINER_NAME} || true + docker rm -f ${CONTAINER_NAME} || true + docker run --name ${CONTAINER_NAME} \ + -d --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ${HOME}/dataset:/dataset \ + -e "HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${IMAGE_URI} \ + -c "vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3" + sleep 60 + docker logs ${CONTAINER_NAME} + + # run serving benchmark + echo "start running serving benchmark workflow..." + docker exec ${CONTAINER_NAME} vllm bench serve \ + --backend vllm \ + --model Qwen/Qwen3-0.6B \ + --dataset-name sharegpt \ + --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \ + --num-prompts 1000 + + # cleanup container + docker stop ${CONTAINER_NAME} + docker rm -f ${CONTAINER_NAME} + From 557e649628da03d4d4bf987137f32e1ca6795e05 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 11:22:25 -0800 Subject: [PATCH 22/63] fix command Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 773961c2c915..650ef0535c59 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -89,8 +89,7 @@ jobs: mkdir -p ${HOME}/dataset if [ ! -f ${HOME}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then echo "Downloading ShareGPT dataset..." - wget -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json - else + wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json else echo "ShareGPT dataset already exists. Skipping download." fi @@ -104,7 +103,7 @@ jobs: -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ -v ${HOME}/dataset:/dataset \ - -e "HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ ${IMAGE_URI} \ -c "vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3" sleep 60 From 58aa567e32660bd1520c1d93343a60bfad8d0d5f Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 13:27:43 -0800 Subject: [PATCH 23/63] checkout vllm Signed-off-by: Junpu Fan --- .github/workflows/pr-example.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-example.yml index 650ef0535c59..02dbc4d335bd 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-example.yml @@ -121,4 +121,18 @@ jobs: # cleanup container docker stop ${CONTAINER_NAME} docker rm -f ${CONTAINER_NAME} - + + - name: Checkout vLLM v0.10.2 + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.10.2 + path: vllm + + - name: Verify vLLM repository is cloned + run: | + echo "Checking vLLM working directory..." + ls -la vllm + test -d vllm || (echo "❌ vllm directory not found!" && exit 1) + test -f vllm/pyproject.toml || (echo "❌ Expected file pyproject.toml missing — clone may have failed." && exit 1) + echo "✅ vLLM repository cloned successfully." From b071a75dd6a082dfd21b38a889faf19039d274af Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 13:50:53 -0800 Subject: [PATCH 24/63] update workflow Signed-off-by: Junpu Fan --- .../{pr-example.yml => pr-vllm-rayserve.yml} | 65 ++++++++++++------- 1 file changed, 40 insertions(+), 25 deletions(-) rename .github/workflows/{pr-example.yml => pr-vllm-rayserve.yml} (82%) diff --git a/.github/workflows/pr-example.yml b/.github/workflows/pr-vllm-rayserve.yml similarity index 82% rename from .github/workflows/pr-example.yml rename to .github/workflows/pr-vllm-rayserve.yml index 02dbc4d335bd..e217ad0f17a6 100644 --- a/.github/workflows/pr-example.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -1,9 +1,11 @@ -name: Example Workflow +name: PR - vLLM RayServe on: pull_request: branches: - main + paths: + - "docker/**" permissions: contents: read @@ -33,7 +35,7 @@ jobs: vllm-rayserve-ec2: - "docker/vllm/Dockerfile.rayserve" - build-vllm-rayserve-ec2-image: + build-vllm-rayserve-ec2: needs: [check-changes] if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true' runs-on: @@ -65,9 +67,9 @@ jobs: name: vllm-rayserve-ec2-image-uri path: image_uri.txt - test-vllm-rayserve-ec2-image: - needs: [build-vllm-rayserve-ec2-image] - if: needs.build-vllm-rayserve-ec2-image.result == 'success' + test-vllm-rayserve-ec2: + needs: [build-vllm-rayserve-ec2] + if: needs.build-vllm-rayserve-ec2.result == 'success' runs-on: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner @@ -77,13 +79,44 @@ jobs: uses: actions/download-artifact@v4 with: name: vllm-rayserve-ec2-image-uri - - name: Read image URI + + - name: Pull image URI id: read run: | IMAGE_URI=$(cat image_uri.txt) echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV echo "Resolved image URI: $IMAGE_URI" - - name: Test image + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + docker pull "$IMAGE_URI" + + - name: Checkout vLLM v0.10.2 + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.10.2 + path: vllm + + - name: Run vLLM Tests + run: | + CONTAINER_NAME=vllm-rayserve-test + docker stop ${CONTAINER_NAME} || true + docker rm -f ${CONTAINER_NAME} || true + + docker run --name ${CONTAINER_NAME} \ + -d --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + ${IMAGE_URI} \ + -c "nvidia-smi" + + docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e + docker exec ${CONTAINER_NAME} pytest -v -s v1/engine + + # cleanup container + docker stop ${CONTAINER_NAME} + docker rm -f ${CONTAINER_NAME} + + - name: Run qwen3 benchmark run: | # Download ShareGPT dataset if it doesn't exist mkdir -p ${HOME}/dataset @@ -92,9 +125,6 @@ jobs: wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json else echo "ShareGPT dataset already exists. Skipping download." fi - - aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - docker pull "$IMAGE_URI" CONTAINER_NAME=vllm-rayserve docker stop ${CONTAINER_NAME} || true docker rm -f ${CONTAINER_NAME} || true @@ -121,18 +151,3 @@ jobs: # cleanup container docker stop ${CONTAINER_NAME} docker rm -f ${CONTAINER_NAME} - - - name: Checkout vLLM v0.10.2 - uses: actions/checkout@v5 - with: - repository: vllm-project/vllm - ref: v0.10.2 - path: vllm - - - name: Verify vLLM repository is cloned - run: | - echo "Checking vLLM working directory..." - ls -la vllm - test -d vllm || (echo "❌ vllm directory not found!" && exit 1) - test -f vllm/pyproject.toml || (echo "❌ Expected file pyproject.toml missing — clone may have failed." && exit 1) - echo "✅ vLLM repository cloned successfully." From e362483157f78dc319fb608560f464d15bcdb533 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 14:40:48 -0800 Subject: [PATCH 25/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index e217ad0f17a6..9d2385061f82 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -103,12 +103,13 @@ jobs: docker rm -f ${CONTAINER_NAME} || true docker run --name ${CONTAINER_NAME} \ - -d --rm --gpus=all --entrypoint /bin/bash \ + -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ ${IMAGE_URI} \ -c "nvidia-smi" + docker exec ${CONTAINER_NAME} uv pip list docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e docker exec ${CONTAINER_NAME} pytest -v -s v1/engine From 369551b019d129d9c40982f7fe29e7577afaafb1 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 14:45:16 -0800 Subject: [PATCH 26/63] fix Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 9d2385061f82..f64cfb2ecbd3 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -101,14 +101,15 @@ jobs: CONTAINER_NAME=vllm-rayserve-test docker stop ${CONTAINER_NAME} || true docker rm -f ${CONTAINER_NAME} || true + echo "${IMAGE_URI}" docker run --name ${CONTAINER_NAME} \ -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - ${IMAGE_URI} \ - -c "nvidia-smi" + ${IMAGE_URI} + docker exec ${CONTAINER_NAME} nvidia-smi docker exec ${CONTAINER_NAME} uv pip list docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e docker exec ${CONTAINER_NAME} pytest -v -s v1/engine From aeebfe84014437435d1162cabd83b1055817aa9a Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 15:04:27 -0800 Subject: [PATCH 27/63] try test Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index f64cfb2ecbd3..5ae8a56f25cd 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -107,10 +107,12 @@ jobs: -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm:/workdir --workdir /workdir \ ${IMAGE_URI} docker exec ${CONTAINER_NAME} nvidia-smi docker exec ${CONTAINER_NAME} uv pip list + docker exec uv pip install --system -r requirements/dev.txt docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e docker exec ${CONTAINER_NAME} pytest -v -s v1/engine From 18f2b64f63a87a7fcc61e3c9c80f0501c90f9704 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 15:06:35 -0800 Subject: [PATCH 28/63] fix typo Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 5ae8a56f25cd..180d802fc00a 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -112,7 +112,7 @@ jobs: docker exec ${CONTAINER_NAME} nvidia-smi docker exec ${CONTAINER_NAME} uv pip list - docker exec uv pip install --system -r requirements/dev.txt + docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/dev.txt docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e docker exec ${CONTAINER_NAME} pytest -v -s v1/engine From 9c3bc51ee39564be13e64c8b1784b30be46cf90d Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 15:37:54 -0800 Subject: [PATCH 29/63] run basic terst Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 180d802fc00a..1fe809db9213 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -89,11 +89,11 @@ jobs: aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com docker pull "$IMAGE_URI" - - name: Checkout vLLM v0.10.2 + - name: Checkout vLLM uses: actions/checkout@v5 with: repository: vllm-project/vllm - ref: v0.10.2 + ref: v0.11.1rc6 path: vllm - name: Run vLLM Tests @@ -111,10 +111,12 @@ jobs: ${IMAGE_URI} docker exec ${CONTAINER_NAME} nvidia-smi - docker exec ${CONTAINER_NAME} uv pip list - docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/dev.txt - docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e - docker exec ${CONTAINER_NAME} pytest -v -s v1/engine + docker exec ${CONTAINER_NAME} uv pip install -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + docker exec ${CONTAINER_NAME} uv pip install pytest pytest-asyncio + docker exec ${CONTAINER_NAME} pytest -s -v tests/test_logger.py + + # docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e + # docker exec ${CONTAINER_NAME} pytest -v -s v1/engine # cleanup container docker stop ${CONTAINER_NAME} From 4e43405e9b87380227d41124b3ca7782575b6419 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 15:42:39 -0800 Subject: [PATCH 30/63] test Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 1fe809db9213..ff315d1ea0b0 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -111,13 +111,10 @@ jobs: ${IMAGE_URI} docker exec ${CONTAINER_NAME} nvidia-smi - docker exec ${CONTAINER_NAME} uv pip install -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto - docker exec ${CONTAINER_NAME} uv pip install pytest pytest-asyncio + docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + docker exec ${CONTAINER_NAME} uv pip install --system pytest pytest-asyncio docker exec ${CONTAINER_NAME} pytest -s -v tests/test_logger.py - # docker exec ${CONTAINER_NAME} pytest -v -s v1/e2e - # docker exec ${CONTAINER_NAME} pytest -v -s v1/engine - # cleanup container docker stop ${CONTAINER_NAME} docker rm -f ${CONTAINER_NAME} From 94e16b6d1291ff22e4509a8b1a4a1362b8f0b164 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 15:45:06 -0800 Subject: [PATCH 31/63] use older version Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index ff315d1ea0b0..f6c1ceaf96ff 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -93,7 +93,7 @@ jobs: uses: actions/checkout@v5 with: repository: vllm-project/vllm - ref: v0.11.1rc6 + ref: v0.10.2 path: vllm - name: Run vLLM Tests From d5d1ff3f6563689b8c563ff19ebc82f9a4fb8ea1 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 16:29:09 -0800 Subject: [PATCH 32/63] check path Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index f6c1ceaf96ff..f7b9f4d449c8 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -110,7 +110,8 @@ jobs: -v ./vllm:/workdir --workdir /workdir \ ${IMAGE_URI} - docker exec ${CONTAINER_NAME} nvidia-smi + docker exec ${CONTAINER_NAME} nvidia-smi && ls -l + docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto docker exec ${CONTAINER_NAME} uv pip install --system pytest pytest-asyncio docker exec ${CONTAINER_NAME} pytest -s -v tests/test_logger.py From b137dea0a9e6898c060b23c365150d7c69db4a4b Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 16:57:58 -0800 Subject: [PATCH 33/63] partial clone Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index f7b9f4d449c8..b980a6038fe5 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -94,7 +94,10 @@ jobs: with: repository: vllm-project/vllm ref: v0.10.2 - path: vllm + sparse-checkout: | + requirements + tests + path: vllm_tests - name: Run vLLM Tests run: | @@ -107,14 +110,13 @@ jobs: -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - -v ./vllm:/workdir --workdir /workdir \ + -v vllm_tests:/workdir --workdir /workdir \ ${IMAGE_URI} - docker exec ${CONTAINER_NAME} nvidia-smi && ls -l - - docker exec ${CONTAINER_NAME} uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto - docker exec ${CONTAINER_NAME} uv pip install --system pytest pytest-asyncio - docker exec ${CONTAINER_NAME} pytest -s -v tests/test_logger.py + docker exec ${CONTAINER_NAME} nvidia-smi + docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system pytest pytest-asyncio + docker exec ${CONTAINER_NAME} cd vllm_tests && pytest -s -v tests/test_logger.py # cleanup container docker stop ${CONTAINER_NAME} From 0d8b5a57264109e1edd58ca55338d7c0b75a3959 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 17:15:51 -0800 Subject: [PATCH 34/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 83 ++++++++++++-------------- .shellcheckrc | 1 - 2 files changed, 38 insertions(+), 46 deletions(-) delete mode 100644 .shellcheckrc diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index b980a6038fe5..9fcb1e671c78 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -54,13 +54,13 @@ jobs: docker buildx build --progress plain \ --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ --cache-to=type=inline \ - --cache-from=type=registry,ref="$IMAGE_TAG" \ - --tag "$IMAGE_TAG" \ + --cache-from=type=registry,ref=$IMAGE_TAG \ + --tag $IMAGE_TAG \ --target vllm-rayserve-ec2 \ -f docker/vllm/Dockerfile.rayserve . - docker push "$IMAGE_TAG" - docker rmi "$IMAGE_TAG" - echo "$IMAGE_TAG" > image_uri.txt + docker push $IMAGE_TAG + docker rmi $IMAGE_TAG + echo $IMAGE_TAG > image_uri.txt - name: Upload image URI uses: actions/upload-artifact@v4 with: @@ -80,15 +80,6 @@ jobs: with: name: vllm-rayserve-ec2-image-uri - - name: Pull image URI - id: read - run: | - IMAGE_URI=$(cat image_uri.txt) - echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV - echo "Resolved image URI: $IMAGE_URI" - aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - docker pull "$IMAGE_URI" - - name: Checkout vLLM uses: actions/checkout@v5 with: @@ -99,29 +90,39 @@ jobs: tests path: vllm_tests - - name: Run vLLM Tests + - name: Pull image URI + id: read + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + echo "Resolved image URI: $IMAGE_URI" + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + docker pull "$IMAGE_URI" + + - name: Start container + id: start run: | CONTAINER_NAME=vllm-rayserve-test - docker stop ${CONTAINER_NAME} || true - docker rm -f ${CONTAINER_NAME} || true - echo "${IMAGE_URI}" - + echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV docker run --name ${CONTAINER_NAME} \ -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ -v vllm_tests:/workdir --workdir /workdir \ ${IMAGE_URI} - - docker exec ${CONTAINER_NAME} nvidia-smi - docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto - docker exec ${CONTAINER_NAME} cd vllm_tests && uv pip install --system pytest pytest-asyncio - docker exec ${CONTAINER_NAME} cd vllm_tests && pytest -s -v tests/test_logger.py - - # cleanup container - docker stop ${CONTAINER_NAME} - docker rm -f ${CONTAINER_NAME} - + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_NAME} sh -c ' + set -eux + nvidia-smi + ls -la + cd vllm_tests + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + pytest -s -v tests/test_logger.py + ' + - name: Run qwen3 benchmark run: | # Download ShareGPT dataset if it doesn't exist @@ -131,19 +132,9 @@ jobs: wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json else echo "ShareGPT dataset already exists. Skipping download." fi - CONTAINER_NAME=vllm-rayserve - docker stop ${CONTAINER_NAME} || true - docker rm -f ${CONTAINER_NAME} || true - docker run --name ${CONTAINER_NAME} \ - -d --gpus=all --entrypoint /bin/bash \ - -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - -v ${HOME}/dataset:/dataset \ - -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ - ${IMAGE_URI} \ - -c "vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3" + sleep 60 - docker logs ${CONTAINER_NAME} + # docker logs ${CONTAINER_NAME} # run serving benchmark echo "start running serving benchmark workflow..." @@ -153,7 +144,9 @@ jobs: --dataset-name sharegpt \ --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \ --num-prompts 1000 - - # cleanup container - docker stop ${CONTAINER_NAME} - docker rm -f ${CONTAINER_NAME} + + - name: Cleanup container + if: always() + run: | + docker stop ${CONTAINER_NAME} || true + docker rm -f ${CONTAINER_NAME} || true diff --git a/.shellcheckrc b/.shellcheckrc deleted file mode 100644 index cf179afb4534..000000000000 --- a/.shellcheckrc +++ /dev/null @@ -1 +0,0 @@ -disable=SC2086 \ No newline at end of file From f75fa3713000a2c6b843df811bc35278518919b6 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 17:24:02 -0800 Subject: [PATCH 35/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 29 +++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 9fcb1e671c78..9d1a9adbd4c6 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -74,13 +74,9 @@ jobs: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner steps: - - uses: actions/checkout@v5 - - name: Download image URI - uses: actions/download-artifact@v4 - with: - name: vllm-rayserve-ec2-image-uri - - - name: Checkout vLLM + - name: Checkout DLC Source + uses: actions/checkout@v5 + - name: Checkout vLLM Tests uses: actions/checkout@v5 with: repository: vllm-project/vllm @@ -90,18 +86,19 @@ jobs: tests path: vllm_tests - - name: Pull image URI - id: read + - name: Download image URI + uses: actions/download-artifact@v4 + with: + name: vllm-rayserve-ec2-image-uri + + - name: Start container + id: start run: | IMAGE_URI=$(cat image_uri.txt) echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV echo "Resolved image URI: $IMAGE_URI" aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com docker pull "$IMAGE_URI" - - - name: Start container - id: start - run: | CONTAINER_NAME=vllm-rayserve-test echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV docker run --name ${CONTAINER_NAME} \ @@ -116,6 +113,8 @@ jobs: docker exec ${CONTAINER_NAME} sh -c ' set -eux nvidia-smi + pwd + cd /workdir ls -la cd vllm_tests uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto @@ -145,8 +144,10 @@ jobs: --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \ --num-prompts 1000 - - name: Cleanup container + - name: Cleanup container and image if: always() run: | docker stop ${CONTAINER_NAME} || true docker rm -f ${CONTAINER_NAME} || true + docker rmi ${IMAGE_URI} || true + docker image ls || true From 1ad77b4b16bfb5be6d1fc2b7892a73ab203bd2ab Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 17:33:04 -0800 Subject: [PATCH 36/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 9d1a9adbd4c6..8f4a1bc55201 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -76,29 +76,36 @@ jobs: steps: - name: Checkout DLC Source uses: actions/checkout@v5 + - name: Checkout vLLM Tests uses: actions/checkout@v5 with: repository: vllm-project/vllm ref: v0.10.2 + path: vllm_tests sparse-checkout: | requirements tests - path: vllm_tests + sparse-checkout-cone-mode: false - name: Download image URI uses: actions/download-artifact@v4 with: name: vllm-rayserve-ec2-image-uri - - - name: Start container - id: start + + - name: Resolve image URI run: | IMAGE_URI=$(cat image_uri.txt) - echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - docker pull "$IMAGE_URI" + docker pull $IMAGE_URI + + - name: Start container + run: | CONTAINER_NAME=vllm-rayserve-test echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV docker run --name ${CONTAINER_NAME} \ From a98f01c458a6deb6fd9d2d847624c90e60eab410 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 17:37:17 -0800 Subject: [PATCH 37/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 8f4a1bc55201..df0a613078ae 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -106,13 +106,15 @@ jobs: - name: Start container run: | + pwd + ls -la CONTAINER_NAME=vllm-rayserve-test echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV docker run --name ${CONTAINER_NAME} \ -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - -v vllm_tests:/workdir --workdir /workdir \ + -v ./vllm_tests:/workdir --workdir /workdir \ ${IMAGE_URI} - name: Run vLLM Tests @@ -121,9 +123,7 @@ jobs: set -eux nvidia-smi pwd - cd /workdir ls -la - cd vllm_tests uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto uv pip install --system pytest pytest-asyncio pytest -s -v tests/test_logger.py From 13a065dc84b1cdbd97efb64e1ccb707ddcfd7765 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 17:47:53 -0800 Subject: [PATCH 38/63] refactor Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 39 ++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index df0a613078ae..77b7785d4b31 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -74,11 +74,8 @@ jobs: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner steps: - - name: Checkout DLC Source - uses: actions/checkout@v5 - - - name: Checkout vLLM Tests - uses: actions/checkout@v5 + - uses: actions/checkout@v5 + - uses: actions/checkout@v5 with: repository: vllm-project/vllm ref: v0.10.2 @@ -106,26 +103,26 @@ jobs: - name: Start container run: | - pwd - ls -la - CONTAINER_NAME=vllm-rayserve-test - echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV - docker run --name ${CONTAINER_NAME} \ - -d -it --rm --gpus=all --entrypoint /bin/bash \ + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ -v ./vllm_tests:/workdir --workdir /workdir \ - ${IMAGE_URI} + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + - name: Install Test dependencies + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + ' + - name: Run vLLM Tests run: | - docker exec ${CONTAINER_NAME} sh -c ' + docker exec ${CONTAINER_ID} sh -c ' set -eux nvidia-smi - pwd - ls -la - uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto - uv pip install --system pytest pytest-asyncio pytest -s -v tests/test_logger.py ' @@ -140,11 +137,10 @@ jobs: fi sleep 60 - # docker logs ${CONTAINER_NAME} # run serving benchmark echo "start running serving benchmark workflow..." - docker exec ${CONTAINER_NAME} vllm bench serve \ + docker exec ${CONTAINER_ID} vllm bench serve \ --backend vllm \ --model Qwen/Qwen3-0.6B \ --dataset-name sharegpt \ @@ -154,7 +150,8 @@ jobs: - name: Cleanup container and image if: always() run: | - docker stop ${CONTAINER_NAME} || true - docker rm -f ${CONTAINER_NAME} || true + docker stop ${CONTAINER_ID} || true + docker rm -f ${CONTAINER_ID} || true docker rmi ${IMAGE_URI} || true docker image ls || true + docker system prune -af From b75b9246d9cdbad6f97ea238c799fc66b3bc2a55 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 17:53:55 -0800 Subject: [PATCH 39/63] add dataset path Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 77b7785d4b31..fd26f02db89a 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -106,6 +106,7 @@ jobs: CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ${HOME}/dataset:/root/dataset \ -v ./vllm_tests:/workdir --workdir /workdir \ ${IMAGE_URI}) echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV @@ -153,5 +154,3 @@ jobs: docker stop ${CONTAINER_ID} || true docker rm -f ${CONTAINER_ID} || true docker rmi ${IMAGE_URI} || true - docker image ls || true - docker system prune -af From ff6bba4b2d3f11ad7bc993bb4eef24ec05d69d76 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 18:05:58 -0800 Subject: [PATCH 40/63] try smart cleanup Signed-off-by: Junpu Fan --- .github/scripts/cleanup_old_image.sh | 14 +++++++++++ .github/workflows/pr-vllm-rayserve.yml | 33 +++++++------------------- 2 files changed, 22 insertions(+), 25 deletions(-) create mode 100755 .github/scripts/cleanup_old_image.sh diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh new file mode 100755 index 000000000000..b615f0393c05 --- /dev/null +++ b/.github/scripts/cleanup_old_image.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Delete images older than 1 day (24h) +cutoff=$(date -d '1 day ago' +%s) + +docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' \ + | while read -r id name created_at _; do + created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0) + if (( created_ts < cutoff )); then + echo "Deleting old image: $name ($id, created $created_at)" + docker rmi -f "$id" || true + fi + done diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index fd26f02db89a..e1c2cb2b0714 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -106,7 +106,6 @@ jobs: CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - -v ${HOME}/dataset:/root/dataset \ -v ./vllm_tests:/workdir --workdir /workdir \ ${IMAGE_URI}) echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV @@ -123,34 +122,18 @@ jobs: run: | docker exec ${CONTAINER_ID} sh -c ' set -eux - nvidia-smi + nvidia-smi pytest -s -v tests/test_logger.py + # Entrypoints Integration Test (LLM) # 30min + # export VLLM_WORKER_MULTIPROC_METHOD=spawn + # pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py + # pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process + # pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests ' - - - name: Run qwen3 benchmark - run: | - # Download ShareGPT dataset if it doesn't exist - mkdir -p ${HOME}/dataset - if [ ! -f ${HOME}/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then - echo "Downloading ShareGPT dataset..." - wget -q -P ${HOME}/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json else - echo "ShareGPT dataset already exists. Skipping download." - fi - - sleep 60 - - # run serving benchmark - echo "start running serving benchmark workflow..." - docker exec ${CONTAINER_ID} vllm bench serve \ - --backend vllm \ - --model Qwen/Qwen3-0.6B \ - --dataset-name sharegpt \ - --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json \ - --num-prompts 1000 - + - name: Cleanup container and image if: always() run: | docker stop ${CONTAINER_ID} || true docker rm -f ${CONTAINER_ID} || true - docker rmi ${IMAGE_URI} || true + - run: .github/scripts/cleanup_old_image.sh From 43570430a5db4e793b3fbd2a95a2a07d1c39b763 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 18:12:55 -0800 Subject: [PATCH 41/63] cleanup Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index e1c2cb2b0714..f1cdf3123b4a 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -134,6 +134,5 @@ jobs: - name: Cleanup container and image if: always() run: | - docker stop ${CONTAINER_ID} || true docker rm -f ${CONTAINER_ID} || true - - run: .github/scripts/cleanup_old_image.sh + docker system prune -af From 85cffdf61f5c9f74eeadcbd3aade5feb77de0c5f Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 18:19:57 -0800 Subject: [PATCH 42/63] update Signed-off-by: Junpu Fan --- .github/scripts/cleanup_old_image.sh | 32 ++++++++++++++++++++++---- .github/workflows/pr-vllm-rayserve.yml | 7 ++++-- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh index b615f0393c05..379334cbf6ac 100755 --- a/.github/scripts/cleanup_old_image.sh +++ b/.github/scripts/cleanup_old_image.sh @@ -1,14 +1,36 @@ #!/usr/bin/env bash set -euo pipefail -# Delete images older than 1 day (24h) -cutoff=$(date -d '1 day ago' +%s) +# Configurable cutoff age (default 1 day) +CUTOFF_HOURS=${CUTOFF_HOURS:-24} +CUTOFF_TS=$(date -d "${CUTOFF_HOURS} hours ago" +%s) + +echo "=== Docker disk usage before cleanup ===" +docker system df -v || true +echo + +echo "=== Checking images older than ${CUTOFF_HOURS}h ===" +deleted=0 +kept=0 docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' \ | while read -r id name created_at _; do + # skip dangling images (no repo:tag) + [ "$name" = ":" ] && continue created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0) - if (( created_ts < cutoff )); then - echo "Deleting old image: $name ($id, created $created_at)" - docker rmi -f "$id" || true + if (( created_ts < CUTOFF_TS )); then + echo "🗑️ Removing old image: $name (created $created_at)" + docker rmi -f "$id" >/dev/null 2>&1 && ((deleted++)) || true + else + ((kept++)) fi done + +echo +echo "=== Cleanup summary ===" +echo "Images kept: $kept" +echo "Images deleted: $deleted" +echo + +echo "=== Docker disk usage after cleanup ===" +docker system df -v || true diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index f1cdf3123b4a..be54a42cb62b 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -131,8 +131,11 @@ jobs: # pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests ' - - name: Cleanup container and image + - name: Cleanup container if: always() run: | docker rm -f ${CONTAINER_ID} || true - docker system prune -af + + - name: Cleanup old images + if: always() + run: cleanup_old_image.sh From 12e2dc1f929ceb95c06f11f4d88908718401124e Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 18:26:43 -0800 Subject: [PATCH 43/63] fix Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index be54a42cb62b..2280030de82f 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -138,4 +138,4 @@ jobs: - name: Cleanup old images if: always() - run: cleanup_old_image.sh + run: .github/scripts/cleanup_old_image.sh From ccb5a733799246ce2c0b7c97f356700f4f0a3381 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 18:31:44 -0800 Subject: [PATCH 44/63] update script Signed-off-by: Junpu Fan --- .github/scripts/cleanup_old_image.sh | 47 ++++++++++++++++---------- .github/workflows/pr-vllm-rayserve.yml | 6 ++-- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh index 379334cbf6ac..4291139d1c9d 100755 --- a/.github/scripts/cleanup_old_image.sh +++ b/.github/scripts/cleanup_old_image.sh @@ -1,36 +1,47 @@ #!/usr/bin/env bash -set -euo pipefail +set -u # only unset vars cause failure, not command errors -# Configurable cutoff age (default 1 day) +# Configurable cutoff age (default 24h) CUTOFF_HOURS=${CUTOFF_HOURS:-24} -CUTOFF_TS=$(date -d "${CUTOFF_HOURS} hours ago" +%s) +CUTOFF_TS=$(date -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || date -v-"${CUTOFF_HOURS}"H +%s) echo "=== Docker disk usage before cleanup ===" -docker system df -v || true +docker system df -v || echo "(warning: docker system df failed)" echo echo "=== Checking images older than ${CUTOFF_HOURS}h ===" + deleted=0 kept=0 -docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' \ - | while read -r id name created_at _; do - # skip dangling images (no repo:tag) - [ "$name" = ":" ] && continue - created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0) - if (( created_ts < CUTOFF_TS )); then - echo "🗑️ Removing old image: $name (created $created_at)" - docker rmi -f "$id" >/dev/null 2>&1 && ((deleted++)) || true - else - ((kept++)) - fi - done +# Use a safer loop (no pipe subshell, avoid 'set -e' inside) +while IFS= read -r line; do + id=$(awk '{print $1}' <<<"$line") + name=$(awk '{print $2}' <<<"$line") + created_at=$(awk '{$1=$2=""; print substr($0,3)}' <<<"$line") + + # Skip empty or malformed lines + [ -z "$id" ] && continue + [ "$name" = ":" ] && continue + + created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0) + if [ "$created_ts" -lt "$CUTOFF_TS" ]; then + echo "🗑️ Removing old image: $name (created $created_at)" + if docker rmi -f "$id" >/dev/null 2>&1; then + deleted=$((deleted+1)) + else + echo "(warning: failed to remove $name)" + fi + else + kept=$((kept+1)) + fi +done < <(docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' 2>/dev/null || true) echo echo "=== Cleanup summary ===" -echo "Images kept: $kept" +echo "Images kept: $kept" echo "Images deleted: $deleted" echo echo "=== Docker disk usage after cleanup ===" -docker system df -v || true +docker system df -v || echo "(warning: docker system df failed)" diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 2280030de82f..21cbd4943547 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -74,8 +74,10 @@ jobs: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner steps: - - uses: actions/checkout@v5 - - uses: actions/checkout@v5 + - name: Checkout DLC source + uses: actions/checkout@v5 + - name: Checkout vLLM Tests + uses: actions/checkout@v5 with: repository: vllm-project/vllm ref: v0.10.2 From 43c22322328a38ab6860d0426806ace50072e636 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 19:01:12 -0800 Subject: [PATCH 45/63] enable Entrypoints Integration Test (LLM) Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 21cbd4943547..6423f1c27fc7 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -127,10 +127,10 @@ jobs: nvidia-smi pytest -s -v tests/test_logger.py # Entrypoints Integration Test (LLM) # 30min - # export VLLM_WORKER_MULTIPROC_METHOD=spawn - # pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py - # pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - # pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests + export VLLM_WORKER_MULTIPROC_METHOD=spawn + pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py + pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process + pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests ' - name: Cleanup container From 15f0c89055f0bf4f261e232f5b43ea102697acf1 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 20:37:58 -0800 Subject: [PATCH 46/63] update Signed-off-by: Junpu Fan --- .github/scripts/cleanup_old_image.sh | 21 +++++++++++++-------- .github/workflows/pr-vllm-rayserve.yml | 1 + 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh index 4291139d1c9d..106a0ad64b01 100755 --- a/.github/scripts/cleanup_old_image.sh +++ b/.github/scripts/cleanup_old_image.sh @@ -1,32 +1,33 @@ #!/usr/bin/env bash -set -u # only unset vars cause failure, not command errors +set -u # only fail on unset vars -# Configurable cutoff age (default 24h) CUTOFF_HOURS=${CUTOFF_HOURS:-24} -CUTOFF_TS=$(date -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || date -v-"${CUTOFF_HOURS}"H +%s) +# always compute cutoff in UTC for comparison +CUTOFF_TS=$(date -u -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || \ + date -u -v-"${CUTOFF_HOURS}"H +%s) echo "=== Docker disk usage before cleanup ===" docker system df -v || echo "(warning: docker system df failed)" echo -echo "=== Checking images older than ${CUTOFF_HOURS}h ===" +echo "=== Checking images older than ${CUTOFF_HOURS}h (UTC-based) ===" deleted=0 kept=0 -# Use a safer loop (no pipe subshell, avoid 'set -e' inside) while IFS= read -r line; do id=$(awk '{print $1}' <<<"$line") name=$(awk '{print $2}' <<<"$line") created_at=$(awk '{$1=$2=""; print substr($0,3)}' <<<"$line") - # Skip empty or malformed lines [ -z "$id" ] && continue [ "$name" = ":" ] && continue - created_ts=$(date -d "$created_at" +%s 2>/dev/null || echo 0) + # parse docker UTC timestamp safely + created_ts=$(date -u -d "$created_at" +%s 2>/dev/null || echo 0) + if [ "$created_ts" -lt "$CUTOFF_TS" ]; then - echo "🗑️ Removing old image: $name (created $created_at)" + echo "🗑️ Removing old image: $name (created $created_at UTC)" if docker rmi -f "$id" >/dev/null 2>&1; then deleted=$((deleted+1)) else @@ -45,3 +46,7 @@ echo echo "=== Docker disk usage after cleanup ===" docker system df -v || echo "(warning: docker system df failed)" + +echo +echo "=== Host disk space (for /var/lib/docker) ===" +df -h /var/lib/docker 2>/dev/null || df -h / diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 6423f1c27fc7..5c46fcde3075 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -127,6 +127,7 @@ jobs: nvidia-smi pytest -s -v tests/test_logger.py # Entrypoints Integration Test (LLM) # 30min + cd tests export VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process From e9fa11cba6c347ecf2c3263b5c39c664600990fb Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 21:06:09 -0800 Subject: [PATCH 47/63] update Signed-off-by: Junpu Fan --- .github/scripts/cleanup_old_image.sh | 44 +++++++++++++++----------- .github/workflows/pr-vllm-rayserve.yml | 3 ++ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh index 106a0ad64b01..b5e2ac33a591 100755 --- a/.github/scripts/cleanup_old_image.sh +++ b/.github/scripts/cleanup_old_image.sh @@ -1,33 +1,41 @@ #!/usr/bin/env bash -set -u # only fail on unset vars +set -euo pipefail +# configurable cutoff CUTOFF_HOURS=${CUTOFF_HOURS:-24} -# always compute cutoff in UTC for comparison -CUTOFF_TS=$(date -u -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || \ - date -u -v-"${CUTOFF_HOURS}"H +%s) +CUTOFF_TS=$(date -u -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || date -u -v-"${CUTOFF_HOURS}"H +%s) echo "=== Docker disk usage before cleanup ===" -docker system df -v || echo "(warning: docker system df failed)" +docker system df -v || true echo -echo "=== Checking images older than ${CUTOFF_HOURS}h (UTC-based) ===" - deleted=0 kept=0 -while IFS= read -r line; do - id=$(awk '{print $1}' <<<"$line") - name=$(awk '{print $2}' <<<"$line") - created_at=$(awk '{$1=$2=""; print substr($0,3)}' <<<"$line") +echo "=== Checking images older than ${CUTOFF_HOURS}h (UTC) ===" + +docker images --format '{{json .}}' | while read -r json; do + id=$(jq -r '.ID' <<<"$json") + repo=$(jq -r '.Repository' <<<"$json") + tag=$(jq -r '.Tag' <<<"$json") + created_at=$(jq -r '.CreatedAt' <<<"$json") + # Skip empty or invalid [ -z "$id" ] && continue - [ "$name" = ":" ] && continue - # parse docker UTC timestamp safely - created_ts=$(date -u -d "$created_at" +%s 2>/dev/null || echo 0) + # Normalize name + name="${repo}:${tag}" + + # Convert CreatedAt → epoch (cross-platform) + if date --version >/dev/null 2>&1; then + created_ts=$(date -u -d "$created_at" +%s) + else + created_ts=$(date -u -j -f "%Y-%m-%d %H:%M:%S %z %Z" "$created_at" +%s) + fi + # Compare if [ "$created_ts" -lt "$CUTOFF_TS" ]; then - echo "🗑️ Removing old image: $name (created $created_at UTC)" + echo "🗑️ Removing old image: $name (created $created_at)" if docker rmi -f "$id" >/dev/null 2>&1; then deleted=$((deleted+1)) else @@ -36,7 +44,7 @@ while IFS= read -r line; do else kept=$((kept+1)) fi -done < <(docker images --format '{{.ID}} {{.Repository}}:{{.Tag}} {{.CreatedAt}}' 2>/dev/null || true) +done echo echo "=== Cleanup summary ===" @@ -45,8 +53,8 @@ echo "Images deleted: $deleted" echo echo "=== Docker disk usage after cleanup ===" -docker system df -v || echo "(warning: docker system df failed)" +docker system df -v || true echo -echo "=== Host disk space (for /var/lib/docker) ===" +echo "=== Disk space for /var/lib/docker ===" df -h /var/lib/docker 2>/dev/null || df -h / diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 5c46fcde3075..f20aff876e87 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -118,12 +118,15 @@ jobs: set -eux uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer ' - name: Run vLLM Tests run: | docker exec ${CONTAINER_ID} sh -c ' set -eux + HF_HUB_ENABLE_HF_TRANSFER=1 nvidia-smi pytest -s -v tests/test_logger.py # Entrypoints Integration Test (LLM) # 30min From 60fd04f0fe5fd69f774dba570f0c64d5e81c06ba Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 10 Nov 2025 21:39:13 -0800 Subject: [PATCH 48/63] update test Signed-off-by: Junpu Fan --- .github/scripts/cleanup_old_image.sh | 60 -------------------------- .github/workflows/pr-vllm-rayserve.yml | 21 +++++---- 2 files changed, 10 insertions(+), 71 deletions(-) delete mode 100755 .github/scripts/cleanup_old_image.sh diff --git a/.github/scripts/cleanup_old_image.sh b/.github/scripts/cleanup_old_image.sh deleted file mode 100755 index b5e2ac33a591..000000000000 --- a/.github/scripts/cleanup_old_image.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# configurable cutoff -CUTOFF_HOURS=${CUTOFF_HOURS:-24} -CUTOFF_TS=$(date -u -d "${CUTOFF_HOURS} hours ago" +%s 2>/dev/null || date -u -v-"${CUTOFF_HOURS}"H +%s) - -echo "=== Docker disk usage before cleanup ===" -docker system df -v || true -echo - -deleted=0 -kept=0 - -echo "=== Checking images older than ${CUTOFF_HOURS}h (UTC) ===" - -docker images --format '{{json .}}' | while read -r json; do - id=$(jq -r '.ID' <<<"$json") - repo=$(jq -r '.Repository' <<<"$json") - tag=$(jq -r '.Tag' <<<"$json") - created_at=$(jq -r '.CreatedAt' <<<"$json") - - # Skip empty or invalid - [ -z "$id" ] && continue - - # Normalize name - name="${repo}:${tag}" - - # Convert CreatedAt → epoch (cross-platform) - if date --version >/dev/null 2>&1; then - created_ts=$(date -u -d "$created_at" +%s) - else - created_ts=$(date -u -j -f "%Y-%m-%d %H:%M:%S %z %Z" "$created_at" +%s) - fi - - # Compare - if [ "$created_ts" -lt "$CUTOFF_TS" ]; then - echo "🗑️ Removing old image: $name (created $created_at)" - if docker rmi -f "$id" >/dev/null 2>&1; then - deleted=$((deleted+1)) - else - echo "(warning: failed to remove $name)" - fi - else - kept=$((kept+1)) - fi -done - -echo -echo "=== Cleanup summary ===" -echo "Images kept: $kept" -echo "Images deleted: $deleted" -echo - -echo "=== Docker disk usage after cleanup ===" -docker system df -v || true - -echo -echo "=== Disk space for /var/lib/docker ===" -df -h /var/lib/docker 2>/dev/null || df -h / diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index f20aff876e87..34da2de603ac 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -126,22 +126,21 @@ jobs: run: | docker exec ${CONTAINER_ID} sh -c ' set -eux - HF_HUB_ENABLE_HF_TRANSFER=1 - nvidia-smi pytest -s -v tests/test_logger.py - # Entrypoints Integration Test (LLM) # 30min + nvidia-smi + + # Core Test # 22min + pytest -v -s core + + # Entrypoints Unit Tests # 5min cd tests - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests + HF_HUB_ENABLE_HF_TRANSFER=1 + pytest -v -s entrypoints/openai/tool_parsers + pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling + ' - name: Cleanup container if: always() run: | docker rm -f ${CONTAINER_ID} || true - - - name: Cleanup old images - if: always() - run: .github/scripts/cleanup_old_image.sh From 56d85c1d06aca06f56b93117d2c82d432ea7afd7 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 07:44:13 -0800 Subject: [PATCH 49/63] add cleanup Signed-off-by: Junpu Fan --- .github/scripts/image_cleanup.py | 19 +++++++++++++++++++ .github/workflows/pr-vllm-rayserve.yml | 22 +++++++++++++--------- 2 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 .github/scripts/image_cleanup.py diff --git a/.github/scripts/image_cleanup.py b/.github/scripts/image_cleanup.py new file mode 100644 index 000000000000..0a146567756a --- /dev/null +++ b/.github/scripts/image_cleanup.py @@ -0,0 +1,19 @@ +import docker +import datetime + +client = docker.from_env() +cutoff = datetime.datetime.utcnow() - datetime.timedelta(hours=24) + +for img in client.images.list(): + # 'Created' is epoch seconds + created = datetime.datetime.utcfromtimestamp(img.attrs["Created"]) + if created < cutoff: + name = img.tags[0] if img.tags else "" + print(f"🗑️ Removing {name} (created {created.isoformat()}Z)") + try: + client.images.remove(img.id, force=True) + except docker.errors.APIError as e: + print(f"(warning: failed to remove {name}: {e.explanation})") + +print("\n=== Disk usage summary ===") +print(client.df()) # structured info like `docker system df` diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 34da2de603ac..e78f2a102464 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -86,6 +86,10 @@ jobs: requirements tests sparse-checkout-cone-mode: false + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + - run: python -m pip install docker - name: Download image URI uses: actions/download-artifact@v4 @@ -126,21 +130,21 @@ jobs: run: | docker exec ${CONTAINER_ID} sh -c ' set -eux - pytest -s -v tests/test_logger.py - nvidia-smi + nvidia-smi + cd tests + pytest -s -v test_logger.py # Core Test # 22min - pytest -v -s core + # pytest -v -s core # Entrypoints Unit Tests # 5min - cd tests - HF_HUB_ENABLE_HF_TRANSFER=1 - pytest -v -s entrypoints/openai/tool_parsers - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling - + # HF_HUB_ENABLE_HF_TRANSFER=1 + # pytest -v -s entrypoints/openai/tool_parsers + # pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling ' - - name: Cleanup container + - name: Cleanup container and images if: always() run: | docker rm -f ${CONTAINER_ID} || true + python .github/scripts/image_cleanup.py From 432917dc0e32e0a8da7647682e953b96114293f7 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 07:48:10 -0800 Subject: [PATCH 50/63] fix Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index e78f2a102464..f0a19d86fff2 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -88,7 +88,7 @@ jobs: sparse-checkout-cone-mode: false - uses: actions/setup-python@v6 with: - python-version: "3.12" + python-version: "3.12.12" - run: python -m pip install docker - name: Download image URI From e5ad9e6acccc9f93cd929b4a4ad630f3ad0d3e7f Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 07:54:21 -0800 Subject: [PATCH 51/63] update Signed-off-by: Junpu Fan --- .github/scripts/runner_setup.sh | 2 ++ .github/workflows/pr-vllm-rayserve.yml | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/scripts/runner_setup.sh b/.github/scripts/runner_setup.sh index e6eb3aecd88e..2c6f733ccab8 100755 --- a/.github/scripts/runner_setup.sh +++ b/.github/scripts/runner_setup.sh @@ -5,4 +5,6 @@ if ! command -v uv &> /dev/null; then curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh uv self update fi +uv python install 3.12 +uv python list docker --version diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index f0a19d86fff2..9bc7c1c04883 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -86,10 +86,7 @@ jobs: requirements tests sparse-checkout-cone-mode: false - - uses: actions/setup-python@v6 - with: - python-version: "3.12.12" - - run: python -m pip install docker + - run: .github/scripts/runner_setup.sh - name: Download image URI uses: actions/download-artifact@v4 From 8e7a408dd586ef8df0f8dbf58743a4d2202b1838 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 08:56:35 -0800 Subject: [PATCH 52/63] update Signed-off-by: Junpu Fan --- .github/scripts/image_cleanup.py | 19 ------------------- .github/scripts/runner_setup.sh | 2 -- .github/workflows/pr-vllm-rayserve.yml | 3 +-- 3 files changed, 1 insertion(+), 23 deletions(-) delete mode 100644 .github/scripts/image_cleanup.py diff --git a/.github/scripts/image_cleanup.py b/.github/scripts/image_cleanup.py deleted file mode 100644 index 0a146567756a..000000000000 --- a/.github/scripts/image_cleanup.py +++ /dev/null @@ -1,19 +0,0 @@ -import docker -import datetime - -client = docker.from_env() -cutoff = datetime.datetime.utcnow() - datetime.timedelta(hours=24) - -for img in client.images.list(): - # 'Created' is epoch seconds - created = datetime.datetime.utcfromtimestamp(img.attrs["Created"]) - if created < cutoff: - name = img.tags[0] if img.tags else "" - print(f"🗑️ Removing {name} (created {created.isoformat()}Z)") - try: - client.images.remove(img.id, force=True) - except docker.errors.APIError as e: - print(f"(warning: failed to remove {name}: {e.explanation})") - -print("\n=== Disk usage summary ===") -print(client.df()) # structured info like `docker system df` diff --git a/.github/scripts/runner_setup.sh b/.github/scripts/runner_setup.sh index 2c6f733ccab8..e6eb3aecd88e 100755 --- a/.github/scripts/runner_setup.sh +++ b/.github/scripts/runner_setup.sh @@ -5,6 +5,4 @@ if ! command -v uv &> /dev/null; then curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh uv self update fi -uv python install 3.12 -uv python list docker --version diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 9bc7c1c04883..1127c8cd8bfd 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -86,7 +86,6 @@ jobs: requirements tests sparse-checkout-cone-mode: false - - run: .github/scripts/runner_setup.sh - name: Download image URI uses: actions/download-artifact@v4 @@ -144,4 +143,4 @@ jobs: if: always() run: | docker rm -f ${CONTAINER_ID} || true - python .github/scripts/image_cleanup.py + docker image prune -a --force --filter "until=24h" From f5e61e3102beebd2fcd24c1a6eed49b907935e12 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 09:05:33 -0800 Subject: [PATCH 53/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 1127c8cd8bfd..c0ab14cb6edb 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -35,7 +35,7 @@ jobs: vllm-rayserve-ec2: - "docker/vllm/Dockerfile.rayserve" - build-vllm-rayserve-ec2: + build: needs: [check-changes] if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true' runs-on: @@ -67,9 +67,9 @@ jobs: name: vllm-rayserve-ec2-image-uri path: image_uri.txt - test-vllm-rayserve-ec2: - needs: [build-vllm-rayserve-ec2] - if: needs.build-vllm-rayserve-ec2.result == 'success' + test: + needs: [build] + if: needs.build.result == 'success' runs-on: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner @@ -121,7 +121,7 @@ jobs: uv pip install --system -e tests/vllm_test_utils uv pip install --system hf_transfer ' - + - name: Run vLLM Tests run: | docker exec ${CONTAINER_ID} sh -c ' @@ -138,9 +138,10 @@ jobs: # pytest -v -s entrypoints/openai/tool_parsers # pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling ' - + - name: Cleanup container and images if: always() run: | docker rm -f ${CONTAINER_ID} || true docker image prune -a --force --filter "until=24h" + docker system df -v From 16d5f1eb116aabece5d78942640568f681b1deaf Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 09:19:35 -0800 Subject: [PATCH 54/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 43 +++++++++++++++++--------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index c0ab14cb6edb..282c306577bd 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -35,7 +35,7 @@ jobs: vllm-rayserve-ec2: - "docker/vllm/Dockerfile.rayserve" - build: + build-image: needs: [check-changes] if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true' runs-on: @@ -45,31 +45,41 @@ jobs: - uses: actions/checkout@v5 - run: .github/scripts/runner_setup.sh - run: .github/scripts/buildkitd.sh - - name: Build vllm-rayserve-ec2 image - id: build - shell: bash + - name: ECR login run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - IMAGE_TAG=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} + + - name: Resolve image URI for build + run: | + IMAGE_URI=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} + echo "Image URI to build: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Build image + run: | docker buildx build --progress plain \ --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ --cache-to=type=inline \ - --cache-from=type=registry,ref=$IMAGE_TAG \ - --tag $IMAGE_TAG \ + --cache-from=type=registry,ref=$IMAGE_URI \ + --tag $IMAGE_URI \ --target vllm-rayserve-ec2 \ -f docker/vllm/Dockerfile.rayserve . - docker push $IMAGE_TAG - docker rmi $IMAGE_TAG - echo $IMAGE_TAG > image_uri.txt + + - name: Docker Push and save IMAGE_URI + run: | + docker push $IMAGE_URI + docker rmi $IMAGE_URI + echo $IMAGE_URI > image_uri.txt + - name: Upload image URI uses: actions/upload-artifact@v4 with: name: vllm-rayserve-ec2-image-uri path: image_uri.txt - test: - needs: [build] - if: needs.build.result == 'success' + test-image: + needs: [build-image] + if: needs.build-image.result == 'success' runs-on: - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} fleet:x86-g6xl-runner @@ -98,9 +108,12 @@ jobs: echo "Resolved image URI: $IMAGE_URI" echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV - - name: Pull image + - name: ECR login run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Pull image + run: | docker pull $IMAGE_URI - name: Start container @@ -144,4 +157,4 @@ jobs: run: | docker rm -f ${CONTAINER_ID} || true docker image prune -a --force --filter "until=24h" - docker system df -v + docker system df From c0a8c8512dc3f4c728f5daf2e6ad98d496a5d30e Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 09:23:25 -0800 Subject: [PATCH 55/63] update workflow Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 282c306577bd..429cb27af73e 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -65,13 +65,13 @@ jobs: --target vllm-rayserve-ec2 \ -f docker/vllm/Dockerfile.rayserve . - - name: Docker Push and save IMAGE_URI + - name: Docker Push and save image URI artifact run: | docker push $IMAGE_URI docker rmi $IMAGE_URI echo $IMAGE_URI > image_uri.txt - - name: Upload image URI + - name: Upload image URI artifact uses: actions/upload-artifact@v4 with: name: vllm-rayserve-ec2-image-uri @@ -86,6 +86,7 @@ jobs: steps: - name: Checkout DLC source uses: actions/checkout@v5 + - name: Checkout vLLM Tests uses: actions/checkout@v5 with: @@ -97,21 +98,21 @@ jobs: tests sparse-checkout-cone-mode: false - - name: Download image URI + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact uses: actions/download-artifact@v4 with: name: vllm-rayserve-ec2-image-uri - - name: Resolve image URI + - name: Resolve image URI for test run: | IMAGE_URI=$(cat image_uri.txt) echo "Resolved image URI: $IMAGE_URI" echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV - - name: ECR login - run: | - aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - - name: Pull image run: | docker pull $IMAGE_URI From af227b45ff62fc68130b13563b5430c5cf5f0750 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 09:26:52 -0800 Subject: [PATCH 56/63] enable more test Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 429cb27af73e..f21ab6f4dfe5 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -145,12 +145,12 @@ jobs: pytest -s -v test_logger.py # Core Test # 22min - # pytest -v -s core + pytest -v -s core # Entrypoints Unit Tests # 5min - # HF_HUB_ENABLE_HF_TRANSFER=1 - # pytest -v -s entrypoints/openai/tool_parsers - # pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling + HF_HUB_ENABLE_HF_TRANSFER=1 + pytest -v -s entrypoints/openai/tool_parsers + pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling ' - name: Cleanup container and images From 6ba7e450dae9556a62de996ebd0282e2b88bf585 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 09:56:41 -0800 Subject: [PATCH 57/63] update tests Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 74 ++++++++++++++++++-------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index f21ab6f4dfe5..8ebd4552d753 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -87,17 +87,6 @@ jobs: - name: Checkout DLC source uses: actions/checkout@v5 - - name: Checkout vLLM Tests - uses: actions/checkout@v5 - with: - repository: vllm-project/vllm - ref: v0.10.2 - path: vllm_tests - sparse-checkout: | - requirements - tests - sparse-checkout-cone-mode: false - - name: ECR login run: | aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com @@ -117,16 +106,23 @@ jobs: run: | docker pull $IMAGE_URI + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.10.2 + path: vllm_source + - name: Start container run: | CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - -v ./vllm_tests:/workdir --workdir /workdir \ + -v ./vllm_source:/workdir --workdir /workdir \ ${IMAGE_URI}) echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV - - name: Install Test dependencies + - name: Setup for vLLM Test run: | docker exec ${CONTAINER_ID} sh -c ' set -eux @@ -134,6 +130,8 @@ jobs: uv pip install --system pytest pytest-asyncio uv pip install --system -e tests/vllm_test_utils uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm ' - name: Run vLLM Tests @@ -141,16 +139,48 @@ jobs: docker exec ${CONTAINER_ID} sh -c ' set -eux nvidia-smi - cd tests - pytest -s -v test_logger.py - # Core Test # 22min - pytest -v -s core - - # Entrypoints Unit Tests # 5min - HF_HUB_ENABLE_HF_TRANSFER=1 - pytest -v -s entrypoints/openai/tool_parsers - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling + # Regression Test # 7min + cd /workdir/tests + uv pip install --system modelscope + pytest -v -s test_regression.py + + # Engine Test # 25min + cd /workdir/tests + pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py + pytest -v -s tokenization + + # Examples Test # 30min + cd /workdir/examples + pip install tensorizer # for tensorizer test + python3 offline_inference/basic/generate.py --model facebook/opt-125m + python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 + python3 offline_inference/basic/chat.py + python3 offline_inference/prefix_caching.py + python3 offline_inference/llm_engine_example.py + python3 offline_inference/audio_language.py --seed 0 + python3 offline_inference/vision_language.py --seed 0 + python3 offline_inference/vision_language_pooling.py --seed 0 + python3 offline_inference/vision_language_multi_image.py --seed 0 + VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors + python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0 + python3 offline_inference/basic/classify.py + python3 offline_inference/basic/embed.py + python3 offline_inference/basic/score.py + VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2 + + # Platform Tests (CUDA) # 4min + cd /workdir/tests + pytest -v -s cuda/test_cuda_context.py + + # Encoder Decoder tests # 12min + cd /workdir/tests + pytest -v -s encoder_decoder + + # OpenAI-Compatible Tool Use # 23min + cd /workdir/tests + pytest -v -s tool_use + pytest -v -s mistral_tool_u ' - name: Cleanup container and images From c3cc99c6c1190a692190566a6f1d4ee4871226fa Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 10:24:27 -0800 Subject: [PATCH 58/63] parallel tests Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 357 ++++++++++++++++++++++++- 1 file changed, 356 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 8ebd4552d753..dc4f31e30c9d 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -77,7 +77,7 @@ jobs: name: vllm-rayserve-ec2-image-uri path: image_uri.txt - test-image: + regression-test: needs: [build-image] if: needs.build-image.result == 'success' runs-on: @@ -144,11 +144,153 @@ jobs: cd /workdir/tests uv pip install --system modelscope pytest -v -s test_regression.py + ' + + - name: Cleanup container and images + if: always() + run: | + docker rm -f ${CONTAINER_ID} || true + docker image prune -a --force --filter "until=24h" + docker system df + + engine-test: + needs: [build-image] + if: needs.build-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact + uses: actions/download-artifact@v4 + with: + name: vllm-rayserve-ec2-image-uri + + - name: Resolve image URI for test + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | + docker pull $IMAGE_URI + + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.10.2 + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm_source:/workdir --workdir /workdir \ + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM Test + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm + ' + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi # Engine Test # 25min cd /workdir/tests pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py pytest -v -s tokenization + ' + + - name: Cleanup container and images + if: always() + run: | + docker rm -f ${CONTAINER_ID} || true + docker image prune -a --force --filter "until=24h" + docker system df + + example-test: + needs: [build-image] + if: needs.build-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact + uses: actions/download-artifact@v4 + with: + name: vllm-rayserve-ec2-image-uri + + - name: Resolve image URI for test + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | + docker pull $IMAGE_URI + + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.10.2 + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm_source:/workdir --workdir /workdir \ + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM Test + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm + ' + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi # Examples Test # 30min cd /workdir/examples @@ -168,14 +310,227 @@ jobs: python3 offline_inference/basic/embed.py python3 offline_inference/basic/score.py VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2 + ' + + - name: Cleanup container and images + if: always() + run: | + docker rm -f ${CONTAINER_ID} || true + docker image prune -a --force --filter "until=24h" + docker system df + + cuda-test: + needs: [build-image] + if: needs.build-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact + uses: actions/download-artifact@v4 + with: + name: vllm-rayserve-ec2-image-uri + + - name: Resolve image URI for test + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | + docker pull $IMAGE_URI + + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.10.2 + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm_source:/workdir --workdir /workdir \ + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM Test + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm + ' + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi # Platform Tests (CUDA) # 4min cd /workdir/tests pytest -v -s cuda/test_cuda_context.py + ' + + - name: Cleanup container and images + if: always() + run: | + docker rm -f ${CONTAINER_ID} || true + docker image prune -a --force --filter "until=24h" + docker system df + + encoder-decoder-test: + needs: [build-image] + if: needs.build-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact + uses: actions/download-artifact@v4 + with: + name: vllm-rayserve-ec2-image-uri + + - name: Resolve image URI for test + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | + docker pull $IMAGE_URI + + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.10.2 + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm_source:/workdir --workdir /workdir \ + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM Test + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm + ' + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi # Encoder Decoder tests # 12min cd /workdir/tests pytest -v -s encoder_decoder + ' + + - name: Cleanup container and images + if: always() + run: | + docker rm -f ${CONTAINER_ID} || true + docker image prune -a --force --filter "until=24h" + docker system df + + openai-compatible-tool-use-test: + needs: [build-image] + if: needs.build-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: ECR login + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com + + - name: Download image URI artifact + uses: actions/download-artifact@v4 + with: + name: vllm-rayserve-ec2-image-uri + + - name: Resolve image URI for test + run: | + IMAGE_URI=$(cat image_uri.txt) + echo "Resolved image URI: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + + - name: Pull image + run: | + docker pull $IMAGE_URI + + - name: Checkout vLLM Tests + uses: actions/checkout@v5 + with: + repository: vllm-project/vllm + ref: v0.10.2 + path: vllm_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ${HOME}/.cache/vllm:/root/.cache/vllm \ + -v ./vllm_source:/workdir --workdir /workdir \ + ${IMAGE_URI}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for vLLM Test + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + uv pip install --system pytest pytest-asyncio + uv pip install --system -e tests/vllm_test_utils + uv pip install --system hf_transfer + mkdir src + mv vllm src/vllm + ' + + - name: Run vLLM Tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi # OpenAI-Compatible Tool Use # 23min cd /workdir/tests From dcb93027ab18c7cb431e8cf6b678559b746daecc Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 11:09:13 -0800 Subject: [PATCH 59/63] remove encoder decoder test Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 75 -------------------------- 1 file changed, 75 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index dc4f31e30c9d..2bc3eecbd101 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -394,81 +394,6 @@ jobs: docker image prune -a --force --filter "until=24h" docker system df - encoder-decoder-test: - needs: [build-image] - if: needs.build-image.result == 'success' - runs-on: - - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} - fleet:x86-g6xl-runner - steps: - - name: Checkout DLC source - uses: actions/checkout@v5 - - - name: ECR login - run: | - aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - - - name: Download image URI artifact - uses: actions/download-artifact@v4 - with: - name: vllm-rayserve-ec2-image-uri - - - name: Resolve image URI for test - run: | - IMAGE_URI=$(cat image_uri.txt) - echo "Resolved image URI: $IMAGE_URI" - echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV - - - name: Pull image - run: | - docker pull $IMAGE_URI - - - name: Checkout vLLM Tests - uses: actions/checkout@v5 - with: - repository: vllm-project/vllm - ref: v0.10.2 - path: vllm_source - - - name: Start container - run: | - CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ - -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - -v ./vllm_source:/workdir --workdir /workdir \ - ${IMAGE_URI}) - echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV - - - name: Setup for vLLM Test - run: | - docker exec ${CONTAINER_ID} sh -c ' - set -eux - uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto - uv pip install --system pytest pytest-asyncio - uv pip install --system -e tests/vllm_test_utils - uv pip install --system hf_transfer - mkdir src - mv vllm src/vllm - ' - - - name: Run vLLM Tests - run: | - docker exec ${CONTAINER_ID} sh -c ' - set -eux - nvidia-smi - - # Encoder Decoder tests # 12min - cd /workdir/tests - pytest -v -s encoder_decoder - ' - - - name: Cleanup container and images - if: always() - run: | - docker rm -f ${CONTAINER_ID} || true - docker image prune -a --force --filter "until=24h" - docker system df - openai-compatible-tool-use-test: needs: [build-image] if: needs.build-image.result == 'success' From c7b284bf295ebd23487aba3e6b06f942402e6d65 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 13:33:36 -0800 Subject: [PATCH 60/63] add hf token Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 2bc3eecbd101..86ebf40f8ecc 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -119,6 +119,7 @@ jobs: -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ -v ./vllm_source:/workdir --workdir /workdir \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ ${IMAGE_URI}) echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV @@ -195,6 +196,7 @@ jobs: -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ -v ./vllm_source:/workdir --workdir /workdir \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ ${IMAGE_URI}) echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV @@ -271,6 +273,7 @@ jobs: -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ -v ./vllm_source:/workdir --workdir /workdir \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ ${IMAGE_URI}) echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV @@ -361,6 +364,7 @@ jobs: -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ -v ./vllm_source:/workdir --workdir /workdir \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ ${IMAGE_URI}) echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV @@ -436,6 +440,7 @@ jobs: -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ -v ${HOME}/.cache/vllm:/root/.cache/vllm \ -v ./vllm_source:/workdir --workdir /workdir \ + -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ ${IMAGE_URI}) echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV From 98079271b9b89b09541ab522698b208de2cf100a Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 14:38:50 -0800 Subject: [PATCH 61/63] update Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 162 +------------------------ 1 file changed, 4 insertions(+), 158 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 86ebf40f8ecc..b279b89fdba1 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -154,7 +154,7 @@ jobs: docker image prune -a --force --filter "until=24h" docker system df - engine-test: + cuda-test: needs: [build-image] if: needs.build-image.result == 'success' runs-on: @@ -218,10 +218,9 @@ jobs: set -eux nvidia-smi - # Engine Test # 25min + # Platform Tests (CUDA) # 4min cd /workdir/tests - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py - pytest -v -s tokenization + pytest -v -s cuda/test_cuda_context.py ' - name: Cleanup container and images @@ -299,7 +298,7 @@ jobs: cd /workdir/examples pip install tensorizer # for tensorizer test python3 offline_inference/basic/generate.py --model facebook/opt-125m - python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 + # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 python3 offline_inference/basic/chat.py python3 offline_inference/prefix_caching.py python3 offline_inference/llm_engine_example.py @@ -321,156 +320,3 @@ jobs: docker rm -f ${CONTAINER_ID} || true docker image prune -a --force --filter "until=24h" docker system df - - cuda-test: - needs: [build-image] - if: needs.build-image.result == 'success' - runs-on: - - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} - fleet:x86-g6xl-runner - steps: - - name: Checkout DLC source - uses: actions/checkout@v5 - - - name: ECR login - run: | - aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - - - name: Download image URI artifact - uses: actions/download-artifact@v4 - with: - name: vllm-rayserve-ec2-image-uri - - - name: Resolve image URI for test - run: | - IMAGE_URI=$(cat image_uri.txt) - echo "Resolved image URI: $IMAGE_URI" - echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV - - - name: Pull image - run: | - docker pull $IMAGE_URI - - - name: Checkout vLLM Tests - uses: actions/checkout@v5 - with: - repository: vllm-project/vllm - ref: v0.10.2 - path: vllm_source - - - name: Start container - run: | - CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ - -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - -v ./vllm_source:/workdir --workdir /workdir \ - -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ - ${IMAGE_URI}) - echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV - - - name: Setup for vLLM Test - run: | - docker exec ${CONTAINER_ID} sh -c ' - set -eux - uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto - uv pip install --system pytest pytest-asyncio - uv pip install --system -e tests/vllm_test_utils - uv pip install --system hf_transfer - mkdir src - mv vllm src/vllm - ' - - - name: Run vLLM Tests - run: | - docker exec ${CONTAINER_ID} sh -c ' - set -eux - nvidia-smi - - # Platform Tests (CUDA) # 4min - cd /workdir/tests - pytest -v -s cuda/test_cuda_context.py - ' - - - name: Cleanup container and images - if: always() - run: | - docker rm -f ${CONTAINER_ID} || true - docker image prune -a --force --filter "until=24h" - docker system df - - openai-compatible-tool-use-test: - needs: [build-image] - if: needs.build-image.result == 'success' - runs-on: - - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} - fleet:x86-g6xl-runner - steps: - - name: Checkout DLC source - uses: actions/checkout@v5 - - - name: ECR login - run: | - aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com - - - name: Download image URI artifact - uses: actions/download-artifact@v4 - with: - name: vllm-rayserve-ec2-image-uri - - - name: Resolve image URI for test - run: | - IMAGE_URI=$(cat image_uri.txt) - echo "Resolved image URI: $IMAGE_URI" - echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV - - - name: Pull image - run: | - docker pull $IMAGE_URI - - - name: Checkout vLLM Tests - uses: actions/checkout@v5 - with: - repository: vllm-project/vllm - ref: v0.10.2 - path: vllm_source - - - name: Start container - run: | - CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ - -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - -v ${HOME}/.cache/vllm:/root/.cache/vllm \ - -v ./vllm_source:/workdir --workdir /workdir \ - -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ - ${IMAGE_URI}) - echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV - - - name: Setup for vLLM Test - run: | - docker exec ${CONTAINER_ID} sh -c ' - set -eux - uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto - uv pip install --system pytest pytest-asyncio - uv pip install --system -e tests/vllm_test_utils - uv pip install --system hf_transfer - mkdir src - mv vllm src/vllm - ' - - - name: Run vLLM Tests - run: | - docker exec ${CONTAINER_ID} sh -c ' - set -eux - nvidia-smi - - # OpenAI-Compatible Tool Use # 23min - cd /workdir/tests - pytest -v -s tool_use - pytest -v -s mistral_tool_u - ' - - - name: Cleanup container and images - if: always() - run: | - docker rm -f ${CONTAINER_ID} || true - docker image prune -a --force --filter "until=24h" - docker system df From 11ead3bbbe9e00bad73b25954d236f2b6376fab0 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 15:52:07 -0800 Subject: [PATCH 62/63] remove push on main Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index b279b89fdba1..718ee6960baa 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -2,8 +2,6 @@ name: PR - vLLM RayServe on: pull_request: - branches: - - main paths: - "docker/**" From 92f77d9de642a1870afb48a85fabe0cf184350a1 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Tue, 11 Nov 2025 15:52:50 -0800 Subject: [PATCH 63/63] revert Signed-off-by: Junpu Fan --- .github/workflows/pr-vllm-rayserve.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml index 718ee6960baa..b279b89fdba1 100644 --- a/.github/workflows/pr-vllm-rayserve.yml +++ b/.github/workflows/pr-vllm-rayserve.yml @@ -2,6 +2,8 @@ name: PR - vLLM RayServe on: pull_request: + branches: + - main paths: - "docker/**"