Skip to content

Commit 6baaaf5

Browse files
preetha-inteljatinwadhwa921ankitm3k
authored
OVEP options to disable CPU fallback at compile time (microsoft#21166)
### Description Provide user level options to control the fallback on CPU for models not supported on Intel's NPU hardware. ### Motivation and Context - Current workflow of OVEP allows safe fallback from OV NPU to OV CPU on compilation failures. Also supports MLAS CPU fallback in presence of unsupported custom ops. - The PR provides a build-time option to disable fallback from OV NPU to OV CPU. - The session Option "kOrtSessionOptionsDisableCPUEPFallback" disables OV CPU and MLAS CPU fallback. - Also has bug fix for proto creation. --------- Co-authored-by: jatinwadhwa921 <jatin.wadhwa@intel.com> Co-authored-by: ankitm3k <ankit.maheshkar@intel.com>
1 parent 21ad004 commit 6baaaf5

18 files changed

+247
-165
lines changed

cmake/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,6 +1341,10 @@ if (onnxruntime_USE_OPENVINO)
13411341

13421342
add_definitions(-DUSE_OPENVINO=1)
13431343

1344+
if(onnxruntime_NPU_NO_FALLBACK)
1345+
add_definitions(-DOPENVINO_DISABLE_NPU_FALLBACK=1)
1346+
endif()
1347+
13441348
if (onnxruntime_USE_OPENVINO_GPU)
13451349
add_definitions(-DOPENVINO_CONFIG_GPU=1)
13461350
endif()

onnxruntime/core/framework/config_options.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ std::string ConfigOptions::GetConfigOrDefault(const std::string& config_key,
3030
}
3131

3232
Status ConfigOptions::AddConfigEntry(const char* config_key, const char* config_value) noexcept {
33-
std::string key(config_key);
33+
std::string key = config_key;
3434
if (key.empty() || key.length() > 128)
3535
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Config key is empty or longer than maximum length 128");
3636

37-
std::string val(config_value);
37+
std::string val = config_value;
3838
if (val.length() > onnxruntime::kMaxStrLen)
3939
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
4040
"Config value is longer than maximum length: ",

onnxruntime/core/graph/graph_proto_serializer.cc

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,21 @@ void GraphViewerToProto(const GraphViewer& graph_view,
2121
*(graph_proto.mutable_output()->Add()) = output_arg->ToProto();
2222
}
2323

24-
for (const auto* value_info : graph_view.GetValueInfo()) {
24+
std::unordered_set<const onnxruntime::NodeArg*> value_info_ = graph_view.GetValueInfo();
25+
26+
// Reserve memory for the vector to avoid reallocations
27+
std::vector<const NodeArg*> value_info_sorted;
28+
value_info_sorted.reserve(value_info_.size());
29+
30+
value_info_sorted.assign(value_info_.begin(), value_info_.end());
31+
auto sort_predicate = [](const NodeArg* v1, const NodeArg* v2) {
32+
return v1->Name() < v2->Name();
33+
};
34+
35+
// This ensures consistent ordering of value_info entries in the output graph
36+
std::sort(value_info_sorted.begin(), value_info_sorted.end(), sort_predicate);
37+
38+
for (const auto* value_info : value_info_sorted) {
2539
*(graph_proto.mutable_value_info()->Add()) = value_info->ToProto();
2640
}
2741

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,11 @@ BackendManager::BackendManager(const GlobalContext& global_context,
105105
subgraph_context_,
106106
ep_ctx_handle_);
107107
} catch (const OnnxRuntimeException& ex) {
108-
if (device_type.find("NPU") != std::string::npos) {
108+
#if defined(OPENVINO_DISABLE_NPU_FALLBACK)
109+
ORT_THROW(ex.what());
110+
#else
111+
if (device_type.find("NPU") != std::string::npos &&
112+
!GetGlobalContext().disable_cpu_fallback) {
109113
LOGS_DEFAULT(WARNING) << ex.what();
110114
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
111115
<< "Falling back to OV CPU for execution";
@@ -122,6 +126,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
122126
} else {
123127
ORT_THROW(ex.what());
124128
}
129+
#endif
125130
}
126131
}
127132
}
@@ -419,7 +424,13 @@ void BackendManager::Compute(OrtKernelContext* context) {
419424
subgraph_context_,
420425
ep_ctx_handle_);
421426
} catch (const OnnxRuntimeException& ex) {
422-
if (GetGlobalContext().device_type.find("NPU") != std::string::npos) {
427+
// Build option disables fallback to CPU on compilation failures with NPU.
428+
#if defined(OPENVINO_DISABLE_NPU_FALLBACK)
429+
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU.";
430+
ORT_THROW(ex.what());
431+
#else
432+
if (GetGlobalContext().device_type.find("NPU") != std::string::npos &&
433+
!GetGlobalContext().disable_cpu_fallback) {
423434
LOGS_DEFAULT(WARNING) << ex.what();
424435
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
425436
<< "Falling back to OV CPU for execution";
@@ -434,7 +445,10 @@ void BackendManager::Compute(OrtKernelContext* context) {
434445
} catch (std::string const& msg) {
435446
ORT_THROW(msg);
436447
}
448+
} else {
449+
ORT_THROW(ex.what());
437450
}
451+
#endif
438452
}
439453
backend_map_.insert({key, dynamic_backend});
440454
} else {

onnxruntime/core/providers/openvino/backends/basic_backend.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,14 +545,19 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
545545
std::cout << "Inference successful" << std::endl;
546546
}
547547

548+
// Create a duplicate infer_request_ shared ptr on the stack in the current local scope,
549+
// as the infer_request gets freed in the next stage the reference count for the infer_request decrements &
550+
// thus we dont have any dangling ptr leading to seg faults in the debug mode subsequent execution call
551+
OVInferRequestPtr infer_request_ = infer_request;
552+
548553
// Once the inference is completed, the infer_request becomes free and is placed back into pool of infer_requests_
549554
inferRequestsQueue_->putIdleRequest(std::move(infer_request));
550555
#ifndef NDEBUG
551556
#ifndef IO_BUFFER_ENABLED // Printing performance counts is disabled when IO_BUFFER_ENABLED
552557
if (openvino_ep::backend_utils::IsDebugEnabled()) {
553558
inferRequestsQueue_->printstatus(); // Printing the elements of infer_requests_ vector pool only in debug mode
554559
std::string& hw_target = global_context_.device_type;
555-
printPerformanceCounts(infer_request, std::cout, hw_target);
560+
printPerformanceCounts(std::move(infer_request_), std::cout, hw_target);
556561
}
557562
#endif
558563
#endif

onnxruntime/core/providers/openvino/contexts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ struct GlobalContext {
2121
bool ep_context_embed_mode = true;
2222
bool export_ep_ctx_blob = false;
2323
bool enable_qdq_optimizer = false;
24+
bool disable_cpu_fallback = false;
2425
size_t num_of_threads;
2526
std::string device_type;
2627
std::string precision_str;

onnxruntime/core/providers/openvino/openvino_execution_provider.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
3333
global_context_->OpenVINO_Version = {OPENVINO_VERSION_MAJOR, OPENVINO_VERSION_MINOR};
3434
global_context_->export_ep_ctx_blob = info.export_ep_ctx_blob_;
3535
global_context_->enable_qdq_optimizer = info.enable_qdq_optimizer_;
36+
global_context_->disable_cpu_fallback = info.disable_cpu_fallback_;
3637

3738
// to check if target device is available
3839
// using ie_core capability GetAvailableDevices to fetch list of devices plugged in

onnxruntime/core/providers/openvino/openvino_execution_provider.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,15 @@ struct OpenVINOExecutionProviderInfo {
7474
bool disable_dynamic_shapes_{false};
7575
bool export_ep_ctx_blob_{false};
7676
bool enable_qdq_optimizer_{false};
77+
bool disable_cpu_fallback_{false};
7778

7879
OpenVINOExecutionProviderInfo() = delete;
7980

8081
explicit OpenVINOExecutionProviderInfo(std::string dev_type, std::string precision, bool enable_npu_fast_compile,
8182
size_t num_of_threads, std::string cache_dir, std::string model_priority,
8283
int num_streams, void* context, bool enable_opencl_throttling,
8384
bool disable_dynamic_shapes, bool export_ep_ctx_blob,
84-
bool enable_qdq_optimizer)
85+
bool enable_qdq_optimizer, bool disable_cpu_fallback)
8586
: precision_(precision),
8687
enable_npu_fast_compile_(enable_npu_fast_compile),
8788
num_of_threads_(num_of_threads),
@@ -92,7 +93,8 @@ struct OpenVINOExecutionProviderInfo {
9293
enable_opencl_throttling_(enable_opencl_throttling),
9394
disable_dynamic_shapes_(disable_dynamic_shapes),
9495
export_ep_ctx_blob_(export_ep_ctx_blob),
95-
enable_qdq_optimizer_(enable_qdq_optimizer) {
96+
enable_qdq_optimizer_(enable_qdq_optimizer),
97+
disable_cpu_fallback_(disable_cpu_fallback) {
9698
std::set<std::string> ov_supported_device_types = {"CPU", "GPU",
9799
"GPU.0", "GPU.1", "NPU"};
98100
if (dev_type == "") {

onnxruntime/core/providers/openvino/openvino_provider_factory.cc

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
1313
const char* cache_dir, const char* model_priority,
1414
int num_streams, void* context,
1515
bool enable_opencl_throttling, bool disable_dynamic_shapes,
16-
bool export_ep_ctx_blob, bool enable_qdq_optimizer)
16+
bool export_ep_ctx_blob, bool enable_qdq_optimizer,
17+
bool disable_cpu_fallback)
1718
: precision_(precision),
1819
enable_npu_fast_compile_(enable_npu_fast_compile),
1920
num_of_threads_(num_of_threads),
@@ -23,7 +24,8 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
2324
enable_opencl_throttling_(enable_opencl_throttling),
2425
disable_dynamic_shapes_(disable_dynamic_shapes),
2526
export_ep_ctx_blob_(export_ep_ctx_blob),
26-
enable_qdq_optimizer_(enable_qdq_optimizer) {
27+
enable_qdq_optimizer_(enable_qdq_optimizer),
28+
disable_cpu_fallback_(disable_cpu_fallback) {
2729
device_type_ = (device_type == nullptr) ? "" : device_type;
2830
cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
2931
}
@@ -45,12 +47,14 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
4547
bool disable_dynamic_shapes_;
4648
bool export_ep_ctx_blob_;
4749
bool enable_qdq_optimizer_;
50+
bool disable_cpu_fallback_;
4851
};
4952

5053
std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
5154
OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_,
5255
cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_,
53-
disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_);
56+
disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_,
57+
disable_cpu_fallback_);
5458
return std::make_unique<OpenVINOExecutionProvider>(info);
5559
}
5660

@@ -99,6 +103,8 @@ struct OpenVINO_Provider : Provider {
99103

100104
bool enable_qdq_optimizer = false;
101105

106+
bool disable_cpu_fallback = false;
107+
102108
if (provider_options_map.find("device_type") != provider_options_map.end()) {
103109
device_type = provider_options_map.at("device_type").c_str();
104110

@@ -256,6 +262,15 @@ struct OpenVINO_Provider : Provider {
256262
export_ep_ctx_blob = false;
257263
bool_flag = "";
258264
}
265+
266+
if (provider_options_map.find("disable_cpu_fallback") != provider_options_map.end()) {
267+
bool_flag = provider_options_map.at("disable_cpu_fallback");
268+
if (bool_flag == "true" || bool_flag == "True")
269+
disable_cpu_fallback = true;
270+
else if (bool_flag == "false" || bool_flag == "False")
271+
disable_cpu_fallback = false;
272+
bool_flag = "";
273+
}
259274
return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
260275
const_cast<char*>(precision.c_str()),
261276
enable_npu_fast_compile,
@@ -267,7 +282,8 @@ struct OpenVINO_Provider : Provider {
267282
enable_opencl_throttling,
268283
disable_dynamic_shapes,
269284
export_ep_ctx_blob,
270-
enable_qdq_optimizer);
285+
enable_qdq_optimizer,
286+
disable_cpu_fallback);
271287
}
272288

273289
void Initialize() override {

onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
struct OrtOpenVINOProviderOptions;
1212

1313
namespace onnxruntime {
14+
struct SessionOptions;
1415
// defined in provider_bridge_ort.cc
1516
struct OpenVINOProviderFactoryCreator {
16-
static std::shared_ptr<IExecutionProviderFactory> Create(const ProviderOptions* provider_options_map);
17+
static std::shared_ptr<IExecutionProviderFactory> Create(ProviderOptions* provider_options_map,
18+
const SessionOptions* session_options);
1719
static std::shared_ptr<IExecutionProviderFactory> Create(const OrtOpenVINOProviderOptions* provider_options);
1820
};
1921
} // namespace onnxruntime

0 commit comments

Comments
 (0)