Skip to content

Commit 033ca86

Browse files
authored
[NV RTX EP] Set Compute Capability only on Turing architecture (microsoft#25446)
### Description <!-- Describe your changes. --> Set compute capability only on Turing arch ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Setting the native compute capability was causing a regression in performance. @gaugarg-nv @ishwar-raut1 @ankan-ban
1 parent 9d11ae2 commit 033ca86

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2281,11 +2281,16 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
22812281
if (max_shared_mem_size_ > 0) {
22822282
trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kTACTIC_SHARED_MEMORY, max_shared_mem_size_);
22832283
}
2284-
// Only set default compute capabilities if user hasn't explicitly configured them
2285-
constexpr int kDefaultNumComputeCapabilities = 1; // Default number of compute capabilities for Turing support
2286-
if (trt_config->getNbComputeCapabilities() == 0) {
2287-
trt_config->setNbComputeCapabilities(kDefaultNumComputeCapabilities);
2288-
trt_config->setComputeCapability(nvinfer1::ComputeCapability::kCURRENT, 0);
2284+
2285+
// Only set compute capability for Turing
2286+
const std::string kTuringComputeCapability{"75"};
2287+
2288+
if (compute_capability_ == kTuringComputeCapability) {
2289+
constexpr int kDefaultNumComputeCapabilities = 1;
2290+
if (trt_config->getNbComputeCapabilities() == 0) {
2291+
trt_config->setNbComputeCapabilities(kDefaultNumComputeCapabilities);
2292+
trt_config->setComputeCapability(nvinfer1::ComputeCapability::kSM75, 0);
2293+
}
22892294
}
22902295

22912296
int num_inputs = trt_network->getNbInputs();

0 commit comments

Comments
 (0)