Skip to content

Commit 59f22e1

Browse files
authored
[OVEP] fixed NPU exception message when CPU fallback is disabled (#832)
* fixed NPU exception message when CPU fallback is disabled * fixed lint issues
1 parent b685871 commit 59f22e1

File tree

1 file changed

+31
-6
lines changed

1 file changed

+31
-6
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <fstream>
88
#include <regex>
99
#include <sstream>
10+
#include <string>
1011
#include <unordered_map>
1112
#include <unordered_set>
1213
#include <utility>
@@ -170,7 +171,10 @@ BackendManager::BackendManager(SessionContext& session_context,
170171
exception_str.find("intel_npu") != std::string::npos) {
171172
// Handle NPU device related errors
172173
#ifndef NDEBUG
173-
ORT_THROW(exception_str + "\nModel needs to be recompiled\n");
174+
std::string suffix = session_context_.so_disable_cpu_ep_fallback ?
175+
"\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" :
176+
"\nModel needs to be recompiled\n";
177+
ORT_THROW(exception_str + suffix);
174178
#else
175179
std::string error_message = "UNKNOWN NPU ERROR";
176180
std::string error_code = "code 0x0";
@@ -183,7 +187,10 @@ BackendManager::BackendManager(SessionContext& session_context,
183187
if (std::regex_search(exception_str, matches, error_code_pattern)) {
184188
error_code = matches[0];
185189
}
186-
throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n");
190+
std::string suffix = session_context_.so_disable_cpu_ep_fallback ?
191+
"\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" :
192+
"\nModel needs to be recompiled\n";
193+
throw std::runtime_error(error_message + ", " + error_code + suffix);
187194
#endif
188195
} else {
189196
ORT_THROW(exception_str);
@@ -631,8 +638,8 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
631638
// proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions.
632639
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025))
633640
constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32;
634-
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
635-
external_initializers_offset_and_length.size() > 1 &&
641+
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
642+
external_initializers_offset_and_length.size() > 1 &&
636643
extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE);
637644
#else
638645
const bool include_initializer_data_in_proto = true;
@@ -642,7 +649,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
642649
auto model = subgraph.CreateModel(logger);
643650
auto model_proto = model->ToProto();
644651
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
645-
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true,
652+
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true,
646653
/*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto);
647654

648655
print_model_proto_duration();
@@ -881,7 +888,25 @@ void BackendManager::Compute(OrtKernelContext* context) {
881888
ORT_THROW(msg);
882889
}
883890
} else {
884-
ORT_THROW(ex.what());
891+
std::string exception_str = ex.what();
892+
if (session_context_.so_disable_cpu_ep_fallback){
893+
std::string error_message = "UNKNOWN NPU ERROR";
894+
std::string error_code = "code 0x0";
895+
std::regex error_message_pattern(R"(\bZE_\w*\b)");
896+
std::regex error_code_pattern("code 0x[0-9a-fA-F]+");
897+
std::smatch matches;
898+
if (std::regex_search(exception_str, matches, error_message_pattern)) {
899+
error_message = matches[0];
900+
}
901+
if (std::regex_search(exception_str, matches, error_code_pattern)) {
902+
error_code = matches[0];
903+
}
904+
std::string suffix = "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" ;
905+
throw std::runtime_error(error_message + ", " + error_code + suffix);
906+
}
907+
else{
908+
ORT_THROW(exception_str);
909+
}
885910
}
886911
#endif
887912
}

0 commit comments

Comments
 (0)