Skip to content

Commit f607c51

Browse files
LeoZDongjames-p-xu
authored andcommitted
TensorRT 10.9 OSS Release. (#4381)
Signed-off-by: Leo Dong <leod@nvidia.com>
1 parent de23577 commit f607c51

File tree

474 files changed

+12214
-6837
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

474 files changed

+12214
-6837
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ build/
33
/demo/BERT/engines
44
/demo/BERT/squad/*.json
55
/docker/jetpack_files/*
6-
*.nvmk
76
*.sln
87
*.vcxproj
98
externals/

CHANGELOG.md

Lines changed: 320 additions & 187 deletions
Large diffs are not rendered by default.

CMakeLists.txt

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
# SPDX-License-Identifier: Apache-2.0
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -176,43 +176,29 @@ set(CUDA_LIBRARIES ${CUDART_LIB})
176176
if (DEFINED GPU_ARCHS)
177177
message(STATUS "GPU_ARCHS defined as ${GPU_ARCHS}. Generating CUDA code for SM ${GPU_ARCHS}")
178178
separate_arguments(GPU_ARCHS)
179+
foreach(SM IN LISTS GPU_ARCHS)
180+
list(APPEND CMAKE_CUDA_ARCHITECTURES SM)
181+
endforeach()
179182
else()
180-
list(APPEND GPU_ARCHS
181-
75
182-
)
183-
184-
find_file(IS_L4T_NATIVE nv_tegra_release PATHS /env/)
185-
set (IS_L4T_CROSS "False")
186-
if (DEFINED ENV{IS_L4T_CROSS})
187-
set(IS_L4T_CROSS $ENV{IS_L4T_CROSS})
183+
list(APPEND CMAKE_CUDA_ARCHITECTURES 72 75 80 86 87 89 90)
184+
185+
if(CUDA_VERSION VERSION_GREATER_EQUAL 12.8)
186+
list(APPEND CMAKE_CUDA_ARCHITECTURES 100 120)
188187
endif()
189188

190-
if (IS_L4T_NATIVE OR ${IS_L4T_CROSS} STREQUAL "True")
191-
# Only Orin (SM87) supported
192-
list(APPEND GPU_ARCHS 87)
193-
endif()
194-
195-
if (CUDA_VERSION VERSION_GREATER_EQUAL 11.0)
196-
# Ampere GPU (SM80) support is only available in CUDA versions > 11.0
197-
list(APPEND GPU_ARCHS 80)
198-
endif()
199-
if (CUDA_VERSION VERSION_GREATER_EQUAL 11.1)
200-
list(APPEND GPU_ARCHS 86)
201-
endif()
202-
203-
message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${GPU_ARCHS}")
189+
message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${CMAKE_CUDA_ARCHITECTURES}")
204190
endif()
205191
set(BERT_GENCODES)
206192
# Generate SASS for each architecture
207-
foreach(arch ${GPU_ARCHS})
193+
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
208194
if (${arch} GREATER_EQUAL 75)
209195
set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${arch},code=sm_${arch}")
210196
endif()
211197
set(GENCODES "${GENCODES} -gencode arch=compute_${arch},code=sm_${arch}")
212198
endforeach()
213199

214200
# Generate PTX for the last architecture in the list.
215-
list(GET GPU_ARCHS -1 LATEST_SM)
201+
list(GET CMAKE_CUDA_ARCHITECTURES -1 LATEST_SM)
216202
set(GENCODES "${GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")
217203
if (${LATEST_SM} GREATER_EQUAL 75)
218204
set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")

README.md

Lines changed: 168 additions & 147 deletions
Large diffs are not rendered by default.

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
10.8.0.43
1+
10.9.0.34

cmake/toolchains/cmake_aarch64_cross.toolchain

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,5 @@ set(CMAKE_CUDA_COMPILER_FORCED TRUE)
5353
set(CUDA_LIBS -L${CUDA_ROOT}/lib)
5454

5555
set(ADDITIONAL_PLATFORM_LIB_FLAGS ${CUDA_LIBS} -lcublas -lcudart -lstdc++ -lm)
56+
57+
link_directories(${CUDA_ROOT}/lib)

demo/BERT/README.md

Lines changed: 345 additions & 313 deletions
Large diffs are not rendered by default.

demo/DeBERTa/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Note that the performance gap between BERT's self-attention and DeBERTa's disent
7575
## Environment Setup
7676
It is recommended to use docker for reproducing the following steps. Follow the setup steps in TensorRT OSS [README](https://github.com/NVIDIA/TensorRT#setting-up-the-build-environment) to build and launch the container and build OSS:
7777

78-
**Example: Ubuntu 20.04 on x86-64 with cuda-12.5 (default)**
78+
**Example: Ubuntu 20.04 on x86-64 with cuda-12.8 (default)**
7979
```bash
8080
# Download this TensorRT OSS repo
8181
git clone -b main https://github.com/nvidia/TensorRT TensorRT
@@ -84,10 +84,10 @@ git submodule update --init --recursive
8484

8585
## at root of TensorRT OSS
8686
# build container
87-
./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.5
87+
./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.8
8888

8989
# launch container
90-
./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.5 --gpus all
90+
./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.8 --gpus all
9191

9292
## now inside container
9393
# build OSS (only required for pre-8.4.3 TensorRT versions)

demo/Diffusion/.gitignore

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
__pycache__/
2-
onnx/*.onnx
3-
engine/*.plan
4-
output/*.png
2+
onnx/
3+
engine/
4+
output/
5+
pytorch_model/
6+
artifacts_cache/

demo/Diffusion/README.md

Lines changed: 40 additions & 53 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)