diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml index b84a1c485..7517d7a98 100644 --- a/.github/workflows/example_tests.yml +++ b/.github/workflows/example_tests.yml @@ -65,7 +65,7 @@ jobs: matrix: EXAMPLE: [llm_ptq] container: &example_container - image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2 + image: nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc0.post1 env: PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml index 1012bcb7f..a2a1b640f 100644 --- a/.gitlab/tests.yml +++ b/.gitlab/tests.yml @@ -62,7 +62,7 @@ example-torch: example-trtllm: extends: example-torch timeout: 60m - image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2 + image: nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc0.post1 tags: [docker, linux, 2-gpu, sm>=89] parallel: matrix: diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cfa40d75f..8b90731f0 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,6 +23,7 @@ Model Optimizer Changelog (Linux) **Misc** +- Bump TensorRT-LLM docker to 1.2.0rc0.post1. - Bump minimum recommended transformers version to 4.53. 0.39 (2025-11-11) diff --git a/docs/source/getting_started/_installation_for_Linux.rst b/docs/source/getting_started/_installation_for_Linux.rst index f6e60ade7..c487d93fd 100644 --- a/docs/source/getting_started/_installation_for_Linux.rst +++ b/docs/source/getting_started/_installation_for_Linux.rst @@ -18,7 +18,7 @@ Latest Model Optimizer (``nvidia-modelopt``) currently has the following system +-------------------------+-----------------------------+ | PyTorch | >=2.6 | +-------------------------+-----------------------------+ -| TensorRT-LLM (Optional) | 1.1.0rc2.post2 | +| TensorRT-LLM (Optional) | 1.2.0rc0.post1 | +-------------------------+-----------------------------+ | ONNX Runtime (Optional) | 1.22 | +-------------------------+-----------------------------+ diff --git a/examples/llm_ptq/README.md b/examples/llm_ptq/README.md index 4009fbd6d..ef9fe6461 100755 --- a/examples/llm_ptq/README.md +++ b/examples/llm_ptq/README.md @@ -27,7 +27,7 @@ This section focuses on Post-training quantization, a technique that reduces mod ### Docker -For Hugging Face models, please use the TensorRT-LLM docker image (e.g., `nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2`). +For Hugging Face models, please use the TensorRT-LLM docker image (e.g., `nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc0.post1`). For NeMo models, use the NeMo container (e.g., `nvcr.io/nvidia/nemo:25.09`). Visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.