Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 34 additions & 38 deletions docker/Dockerfile.multi
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ LABEL com.nvidia.eula="https://www.nvidia.com/en-us/agreements/enterprise-softwa
LABEL com.nvidia.ai-terms="https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/"

# https://www.gnu.org/software/bash/manual/html_node/Bash-Startup-Files.html
# The default values come from `nvcr.io/nvidia/pytorch`
ENV BASH_ENV=${BASH_ENV:-/etc/bash.bashrc}
ENV ENV=${ENV:-/etc/shinit_v2}
ARG SH_ENV="/etc/shinit_v2"
ENV ENV=${SH_ENV}
ARG BASH_ENV="/etc/bash.bashrc"
ENV BASH_ENV=${BASH_ENV}

ARG GITHUB_MIRROR=""
RUN echo "Using GitHub mirror: $GITHUB_MIRROR"
Expand Down Expand Up @@ -44,48 +45,41 @@ COPY docker/common/install.sh \
docker/common/install_ucx.sh \
docker/common/install_nixl.sh \
docker/common/install_etcd.sh \
docker/common/install_mooncake.sh \
./

RUN GITHUB_MIRROR=${GITHUB_MIRROR} \
PYTHON_VERSION=${PYTHON_VERSION} \
bash ./install.sh --base && rm install_base.sh

RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --cmake && rm install_cmake.sh

RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --ccache && rm install_ccache.sh

RUN bash ./install.sh --cuda_toolkit && rm install_cuda_toolkit.sh

ARG TRT_VER
ARG CUDA_VER
ARG CUDNN_VER
ARG NCCL_VER
ARG CUBLAS_VER
RUN TRT_VER=${TRT_VER} \
ARG TORCH_INSTALL_TYPE="skip"
RUN GITHUB_MIRROR=${GITHUB_MIRROR} \
PYTHON_VERSION=${PYTHON_VERSION} \
TRT_VER=${TRT_VER} \
CUDA_VER=${CUDA_VER} \
CUDNN_VER=${CUDNN_VER} \
NCCL_VER=${NCCL_VER} \
CUBLAS_VER=${CUBLAS_VER} \
bash ./install.sh --tensorrt && rm install_tensorrt.sh

RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --polygraphy && rm install_polygraphy.sh

RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --mpi4py && rm install_mpi4py.sh

ARG TORCH_INSTALL_TYPE="skip"
RUN TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} bash ./install.sh --pytorch && rm install_pytorch.sh

RUN bash ./install.sh --opencv && rm install.sh

# Install UCX first
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_ucx.sh && rm install_ucx.sh

# Install NIXL
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_nixl.sh && rm install_nixl.sh

# Install etcd
RUN bash ./install_etcd.sh && rm install_etcd.sh
TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} \
bash ./install.sh --base --cmake --ccache --cuda_toolkit --tensorrt --polygraphy --mpi4py --pytorch --opencv && \
rm install_base.sh && \
rm install_cmake.sh && \
rm install_ccache.sh && \
rm install_cuda_toolkit.sh && \
rm install_tensorrt.sh && \
rm install_polygraphy.sh && \
rm install_mpi4py.sh && \
rm install_pytorch.sh && \
rm install.sh

# Install UCX, NIXL, etcd
# TODO: Combine these into the main install.sh script
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_ucx.sh && \
GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_nixl.sh && \
bash ./install_etcd.sh && \
rm install_ucx.sh && \
rm install_nixl.sh && \
rm install_etcd.sh

FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton

Expand All @@ -100,16 +94,18 @@ COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches

# Copy all installation scripts at once to reduce layers
COPY docker/common/install_triton.sh \
docker/common/install_mooncake.sh \
./

RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && rm install_triton.sh

# Install Mooncake, after triton handles boost requirement
RUN if [ -f /etc/redhat-release ]; then \
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && \
if [ -f /etc/redhat-release ]; then \
echo "Rocky8 detected, skipping mooncake installation"; \
else \
bash ./install_mooncake.sh; \
fi && rm install_mooncake.sh
fi && \
rm install_triton.sh && \
rm install_mooncake.sh

FROM ${DEVEL_IMAGE} AS wheel
WORKDIR /src/tensorrt_llm
Expand Down
12 changes: 11 additions & 1 deletion docker/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,16 @@ define rewrite_tag
$(shell echo $(IMAGE_WITH_TAG) | sed "s/\/tensorrt-llm:/\/tensorrt-llm-staging:/g")
endef

base_pull:
@echo "Pulling base image: $(BASE_IMAGE):$(BASE_TAG)"
docker pull $(BASE_IMAGE):$(BASE_TAG)

%_build: DEVEL_IMAGE = $(if $(findstring 1,$(JENKINS_DEVEL)),$(shell . ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE))
%_build:
%_build: SH_ENV = $(shell docker inspect --format='{{range .Config.Env}}{{println .}}{{end}}' $(BASE_IMAGE):$(BASE_TAG) \
| grep '^ENV=' | sed 's/^[^=]*=//' 2>/dev/null)
%_build: BASH_ENV = $(shell docker inspect --format='{{range .Config.Env}}{{println .}}{{end}}' $(BASE_IMAGE):$(BASE_TAG) \
| grep '^BASH_ENV=' | sed 's/^[^=]*=//' 2>/dev/null)
%_build: base_pull
@echo "Building docker image: $(IMAGE_WITH_TAG)"
docker buildx build $(DOCKER_BUILD_OPTS) $(DOCKER_BUILD_ARGS) \
--progress $(DOCKER_PROGRESS) \
Expand All @@ -97,6 +105,8 @@ endef
$(if $(GIT_COMMIT), --build-arg GIT_COMMIT="$(GIT_COMMIT)") \
$(if $(GITHUB_MIRROR), --build-arg GITHUB_MIRROR="$(GITHUB_MIRROR)") \
$(if $(PYTHON_VERSION), --build-arg PYTHON_VERSION="$(PYTHON_VERSION)") \
$(if $(SH_ENV), --build-arg SH_ENV="$(SH_ENV)") \
$(if $(BASH_ENV), --build-arg BASH_ENV="$(BASH_ENV)") \
$(if $(STAGE), --target $(STAGE)) \
--file Dockerfile.multi \
--tag $(IMAGE_WITH_TAG) \
Expand Down
6 changes: 3 additions & 3 deletions docker/common/install.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -Eeo pipefail
shopt -s nullglob
trap 'echo "[install.sh] Error on line $LINENO" >&2' ERR
Expand Down Expand Up @@ -125,7 +125,7 @@ fi

if [ $opencv -eq 1 ]; then
echo "Installing OpenCV..."
pip3 uninstall -y opencv
bash -c "pip3 uninstall -y opencv"
rm -rf /usr/local/lib/python3*/dist-packages/cv2/
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
bash -c "pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir"
fi
8 changes: 4 additions & 4 deletions jenkins/current_image_tags.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm

LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511110140-8447
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511110140-8447
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511141224-9077
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511141224-9077
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511141224-9077
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511141224-9077