From 95f7861e08baa6e315840945ff2ed81f0fdb2c7c Mon Sep 17 00:00:00 2001 From: Reza Barazesh Date: Fri, 14 Nov 2025 06:54:44 -0800 Subject: [PATCH] Switch to runtime image Signed-off-by: Reza Barazesh --- docker/Dockerfile | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 1b937bbc1225..14b08788f87e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,8 +20,8 @@ ARG PYTHON_VERSION=3.12 # glibc version is baked into the distro, and binaries built with one glibc # version are not backwards compatible with OSes that use an earlier version. ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 -# TODO: Restore to base image after FlashInfer AOT wheel fixed -ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 +# Using runtime image with minimal build tools for JIT compilation (FlashInfer, DeepGEMM, EP kernels) +ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 # By parameterizing the Deadsnakes repository URL, we allow third-party to use # their own mirror. When doing so, we don't benefit from the transparent @@ -328,6 +328,25 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ && python3 --version && python3 -m pip --version +# Install CUDA development tools and build essentials for runtime JIT compilation +# (FlashInfer, DeepGEMM, EP kernels all require compilation at runtime) +RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \ + apt-get update -y && \ + apt-get install -y --no-install-recommends \ + build-essential \ + gcc-10 \ + g++-10 \ + cuda-nvcc-${CUDA_VERSION_DASH} \ + cuda-cudart-dev-${CUDA_VERSION_DASH} \ + cuda-nvrtc-dev-${CUDA_VERSION_DASH} \ + cuda-nvml-dev-${CUDA_VERSION_DASH} \ + libcublas-dev-${CUDA_VERSION_DASH} \ + libcusparse-dev-${CUDA_VERSION_DASH} \ + libcusolver-dev-${CUDA_VERSION_DASH} && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10 && \ + rm -rf /var/lib/apt/lists/* && \ + gcc --version + ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL