11steps :
2- # aarch64 + CUDA builds
3- - label : " Build arm64 wheel - CUDA 12.8 "
4- id : build-wheel-arm64-cuda-12-8
2+ # aarch64 + CUDA builds. PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9
3+ - label : " Build arm64 wheel - CUDA 12.9 "
4+ id : build-wheel-arm64-cuda-12-9
55 agents :
66 queue : arm64_cpu_queue_postmerge
77 commands :
88 # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
99 # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
10- - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8 .1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
10+ - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9 .1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
1111 - " mkdir artifacts"
1212 - " docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
1313 - " bash .buildkite/scripts/upload-wheels.sh"
1414 env :
1515 DOCKER_BUILDKIT : " 1"
1616
17- # x86 + CUDA builds
17+ - block : " Build CUDA 12.8 wheel"
18+ key : block-build-cu128-wheel
19+
1820 - label : " Build wheel - CUDA 12.8"
21+ depends_on : block-build-cu128-wheel
1922 id : build-wheel-cuda-12-8
2023 agents :
2124 queue : cpu_queue_postmerge
@@ -44,18 +47,14 @@ steps:
4447 env :
4548 DOCKER_BUILDKIT : " 1"
4649
47- # Note(simon): We can always build CUDA 11.8 wheel to ensure the build is working.
48- # However, this block can be uncommented to save some compute hours.
49- # - block: "Build CUDA 11.8 wheel"
50- # key: block-build-cu118-wheel
51-
52- - label : " Build wheel - CUDA 11.8"
53- # depends_on: block-build-cu118-wheel
54- id : build-wheel-cuda-11-8
50+ # x86 + CUDA builds
51+ - label : " Build wheel - CUDA 12.9"
52+ depends_on : ~
53+ id : build-wheel-cuda-12-9
5554 agents :
5655 queue : cpu_queue_postmerge
5756 commands :
58- - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
57+ - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
5958 - " mkdir artifacts"
6059 - " docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
6160 - " bash .buildkite/scripts/upload-wheels.sh"
@@ -75,14 +74,15 @@ steps:
7574 - " docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
7675 - " docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
7776
77+ # PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9
7878 - label : " Build release image (arm64)"
7979 depends_on : ~
8080 id : build-release-image-arm64
8181 agents :
8282 queue : arm64_cpu_queue_postmerge
8383 commands :
8484 - " aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
85- - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8 .1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
85+ - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9 .1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
8686 - " docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"
8787
8888 # Add job to create multi-arch manifest
@@ -103,7 +103,7 @@ steps:
103103 - create-multi-arch-manifest
104104 - build-wheel-cuda-12-8
105105 - build-wheel-cuda-12-6
106- - build-wheel-cuda-11-8
106+ - build-wheel-cuda-12-9
107107 id : annotate-release-workflow
108108 agents :
109109 queue : cpu_queue_postmerge
0 commit comments