Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
8ea932b
try build
junpuf Nov 7, 2025
50e9793
fix action
junpuf Nov 7, 2025
47e7bf6
using long commit ref
junpuf Nov 7, 2025
f3e7416
install/update uv only if not already installed
junpuf Nov 7, 2025
96d976b
update
junpuf Nov 8, 2025
e21334c
update
junpuf Nov 8, 2025
082f67c
fix actionlint
junpuf Nov 8, 2025
a82924d
try inline cache
junpuf Nov 8, 2025
d82b4a1
fix
junpuf Nov 8, 2025
c7d65bc
use buildx
junpuf Nov 8, 2025
09bfc63
per day cache refresh
junpuf Nov 8, 2025
8a21087
update
junpuf Nov 10, 2025
031a0e8
fix
junpuf Nov 10, 2025
df2d590
test
junpuf Nov 10, 2025
2d59406
fix
junpuf Nov 10, 2025
75a8f1a
try artifact
junpuf Nov 10, 2025
65975f7
update docker command
junpuf Nov 10, 2025
ff4725e
fix command
junpuf Nov 10, 2025
3dd1a99
fix command
junpuf Nov 10, 2025
872029d
fix entrypoint
junpuf Nov 10, 2025
fadf714
update test
junpuf Nov 10, 2025
557e649
fix command
junpuf Nov 10, 2025
58aa567
checkout vllm
junpuf Nov 10, 2025
b071a75
update workflow
junpuf Nov 10, 2025
e362483
update
junpuf Nov 10, 2025
369551b
fix
junpuf Nov 10, 2025
aeebfe8
try test
junpuf Nov 10, 2025
18f2b64
fix typo
junpuf Nov 10, 2025
9c3bc51
run basic terst
junpuf Nov 10, 2025
4e43405
test
junpuf Nov 10, 2025
94e16b6
use older version
junpuf Nov 10, 2025
d5d1ff3
check path
junpuf Nov 11, 2025
b137dea
partial clone
junpuf Nov 11, 2025
0d8b5a5
update
junpuf Nov 11, 2025
f75fa37
update
junpuf Nov 11, 2025
1ad77b4
update
junpuf Nov 11, 2025
a98f01c
update
junpuf Nov 11, 2025
13a065d
refactor
junpuf Nov 11, 2025
b75b924
add dataset path
junpuf Nov 11, 2025
ff6bba4
try smart cleanup
junpuf Nov 11, 2025
4357043
cleanup
junpuf Nov 11, 2025
85cffdf
update
junpuf Nov 11, 2025
12e2dc1
fix
junpuf Nov 11, 2025
ccb5a73
update script
junpuf Nov 11, 2025
43c2232
enable Entrypoints Integration Test (LLM)
junpuf Nov 11, 2025
15f0c89
update
junpuf Nov 11, 2025
e9fa11c
update
junpuf Nov 11, 2025
60fd04f
update test
junpuf Nov 11, 2025
56d85c1
add cleanup
junpuf Nov 11, 2025
432917d
fix
junpuf Nov 11, 2025
e5ad9e6
update
junpuf Nov 11, 2025
8e7a408
update
junpuf Nov 11, 2025
f5e61e3
update
junpuf Nov 11, 2025
16d5f1e
update
junpuf Nov 11, 2025
c0a8c85
update workflow
junpuf Nov 11, 2025
af227b4
enable more test
junpuf Nov 11, 2025
6ba7e45
update tests
junpuf Nov 11, 2025
c3cc99c
parallel tests
junpuf Nov 11, 2025
dcb9302
remove encoder decoder test
junpuf Nov 11, 2025
c7b284b
add hf token
junpuf Nov 11, 2025
9807927
update
junpuf Nov 11, 2025
11ead3b
remove push on main
junpuf Nov 11, 2025
92f77d9
revert
junpuf Nov 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/scripts/runner_setup.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/bin/bash
set -e

curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
uv self update
if ! command -v uv &> /dev/null; then
curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
uv self update
fi
docker --version
65 changes: 0 additions & 65 deletions .github/workflows/pr-example.yml

This file was deleted.

322 changes: 322 additions & 0 deletions .github/workflows/pr-vllm-rayserve.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
name: PR - vLLM RayServe

on:
pull_request:
branches:
- main
paths:
- "docker/**"

permissions:
contents: read

concurrency:
group: pr-${{ github.event.pull_request.number }}
cancel-in-progress: true

jobs:
check-changes:
runs-on: ubuntu-latest
outputs:
vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }}
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: pre-commit/action@v3.0.1
with:
extra_args: --all-files
- name: Detect file changes
id: changes
uses: dorny/paths-filter@v3
with:
filters: |
vllm-rayserve-ec2:
- "docker/vllm/Dockerfile.rayserve"

build-image:
needs: [check-changes]
if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-build-runner
steps:
- uses: actions/checkout@v5
- run: .github/scripts/runner_setup.sh
- run: .github/scripts/buildkitd.sh
- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com

- name: Resolve image URI for build
run: |
IMAGE_URI=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
echo "Image URI to build: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV

- name: Build image
run: |
docker buildx build --progress plain \
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
--cache-to=type=inline \
--cache-from=type=registry,ref=$IMAGE_URI \
--tag $IMAGE_URI \
--target vllm-rayserve-ec2 \
-f docker/vllm/Dockerfile.rayserve .

- name: Docker Push and save image URI artifact
run: |
docker push $IMAGE_URI
docker rmi $IMAGE_URI
echo $IMAGE_URI > image_uri.txt

- name: Upload image URI artifact
uses: actions/upload-artifact@v4
with:
name: vllm-rayserve-ec2-image-uri
path: image_uri.txt

regression-test:
needs: [build-image]
if: needs.build-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5

- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com

- name: Download image URI artifact
uses: actions/download-artifact@v4
with:
name: vllm-rayserve-ec2-image-uri

- name: Resolve image URI for test
run: |
IMAGE_URI=$(cat image_uri.txt)
echo "Resolved image URI: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV

- name: Pull image
run: |
docker pull $IMAGE_URI

- name: Checkout vLLM Tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.10.2
path: vllm_source
Comment on lines +94 to +114
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there's a way to DRY these steps. These are going to be used repeatedly across multiple stages

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

later we can refactor common patterns into callable workflows or other things


- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${IMAGE_URI})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV

- name: Setup for vLLM Test
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --system pytest pytest-asyncio
uv pip install --system -e tests/vllm_test_utils
uv pip install --system hf_transfer
mkdir src
mv vllm src/vllm
'

- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi

# Regression Test # 7min
cd /workdir/tests
uv pip install --system modelscope
pytest -v -s test_regression.py
'

- name: Cleanup container and images
if: always()
run: |
docker rm -f ${CONTAINER_ID} || true
docker image prune -a --force --filter "until=24h"
docker system df

cuda-test:
needs: [build-image]
if: needs.build-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5

- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com

- name: Download image URI artifact
uses: actions/download-artifact@v4
with:
name: vllm-rayserve-ec2-image-uri

- name: Resolve image URI for test
run: |
IMAGE_URI=$(cat image_uri.txt)
echo "Resolved image URI: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV

- name: Pull image
run: |
docker pull $IMAGE_URI

- name: Checkout vLLM Tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.10.2
path: vllm_source

- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${IMAGE_URI})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV

- name: Setup for vLLM Test
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --system pytest pytest-asyncio
uv pip install --system -e tests/vllm_test_utils
uv pip install --system hf_transfer
mkdir src
mv vllm src/vllm
'

- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi

# Platform Tests (CUDA) # 4min
cd /workdir/tests
pytest -v -s cuda/test_cuda_context.py
'

- name: Cleanup container and images
if: always()
run: |
docker rm -f ${CONTAINER_ID} || true
docker image prune -a --force --filter "until=24h"
docker system df

example-test:
needs: [build-image]
if: needs.build-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5

- name: ECR login
run: |
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com

- name: Download image URI artifact
uses: actions/download-artifact@v4
with:
name: vllm-rayserve-ec2-image-uri

- name: Resolve image URI for test
run: |
IMAGE_URI=$(cat image_uri.txt)
echo "Resolved image URI: $IMAGE_URI"
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV

- name: Pull image
run: |
docker pull $IMAGE_URI

- name: Checkout vLLM Tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.10.2
path: vllm_source

- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${IMAGE_URI})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV

- name: Setup for vLLM Test
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --system pytest pytest-asyncio
uv pip install --system -e tests/vllm_test_utils
uv pip install --system hf_transfer
mkdir src
mv vllm src/vllm
'

- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi

# Examples Test # 30min
cd /workdir/examples
pip install tensorizer # for tensorizer test
python3 offline_inference/basic/generate.py --model facebook/opt-125m
# python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
python3 offline_inference/basic/chat.py
python3 offline_inference/prefix_caching.py
python3 offline_inference/llm_engine_example.py
python3 offline_inference/audio_language.py --seed 0
python3 offline_inference/vision_language.py --seed 0
python3 offline_inference/vision_language_pooling.py --seed 0
python3 offline_inference/vision_language_multi_image.py --seed 0
VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
python3 offline_inference/basic/classify.py
python3 offline_inference/basic/embed.py
python3 offline_inference/basic/score.py
VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
'

- name: Cleanup container and images
if: always()
run: |
docker rm -f ${CONTAINER_ID} || true
docker image prune -a --force --filter "until=24h"
docker system df
Loading