77#include < fstream>
88#include < regex>
99#include < sstream>
10+ #include < string>
1011#include < unordered_map>
1112#include < unordered_set>
1213#include < utility>
@@ -170,7 +171,10 @@ BackendManager::BackendManager(SessionContext& session_context,
170171 exception_str.find (" intel_npu" ) != std::string::npos) {
171172 // Handle NPU device related errors
172173#ifndef NDEBUG
173- ORT_THROW (exception_str + " \n Model needs to be recompiled\n " );
174+ std::string suffix = session_context_.so_disable_cpu_ep_fallback ?
175+ " \n Model failed to compile on NPU. Enable CPU fallback or try another device.\n " :
176+ " \n Model needs to be recompiled\n " ;
177+ ORT_THROW (exception_str + suffix);
174178#else
175179 std::string error_message = " UNKNOWN NPU ERROR" ;
176180 std::string error_code = " code 0x0" ;
@@ -183,7 +187,10 @@ BackendManager::BackendManager(SessionContext& session_context,
183187 if (std::regex_search (exception_str, matches, error_code_pattern)) {
184188 error_code = matches[0 ];
185189 }
186- throw std::runtime_error (error_message + " , " + error_code + " \n Model needs to be recompiled\n " );
190+ std::string suffix = session_context_.so_disable_cpu_ep_fallback ?
191+ " \n Model failed to compile on NPU. Enable CPU fallback or try another device.\n " :
192+ " \n Model needs to be recompiled\n " ;
193+ throw std::runtime_error (error_message + " , " + error_code + suffix);
187194#endif
188195 } else {
189196 ORT_THROW (exception_str);
@@ -631,8 +638,8 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
631638 // proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions.
632639#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025))
633640 constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32 ;
634- const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
635- external_initializers_offset_and_length.size () > 1 &&
641+ const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
642+ external_initializers_offset_and_length.size () > 1 &&
636643 extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE);
637644#else
638645 const bool include_initializer_data_in_proto = true ;
@@ -642,7 +649,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
642649 auto model = subgraph.CreateModel (logger);
643650 auto model_proto = model->ToProto ();
644651 model_proto->set_ir_version (ONNX_NAMESPACE::Version::IR_VERSION);
645- subgraph.ToProto (*model_proto->mutable_graph (), /* include_initializers*/ true ,
652+ subgraph.ToProto (*model_proto->mutable_graph (), /* include_initializers*/ true ,
646653 /* include_outer_scope_args*/ true , /* execution_order*/ 0 , /* include_initializer_data*/ include_initializer_data_in_proto);
647654
648655 print_model_proto_duration ();
@@ -881,7 +888,25 @@ void BackendManager::Compute(OrtKernelContext* context) {
881888 ORT_THROW (msg);
882889 }
883890 } else {
884- ORT_THROW (ex.what ());
891+ std::string exception_str = ex.what ();
892+ if (session_context_.so_disable_cpu_ep_fallback ){
893+ std::string error_message = " UNKNOWN NPU ERROR" ;
894+ std::string error_code = " code 0x0" ;
895+ std::regex error_message_pattern (R"( \bZE_\w*\b)" );
896+ std::regex error_code_pattern (" code 0x[0-9a-fA-F]+" );
897+ std::smatch matches;
898+ if (std::regex_search (exception_str, matches, error_message_pattern)) {
899+ error_message = matches[0 ];
900+ }
901+ if (std::regex_search (exception_str, matches, error_code_pattern)) {
902+ error_code = matches[0 ];
903+ }
904+ std::string suffix = " \n Model failed to compile on NPU. Enable CPU fallback or try another device.\n " ;
905+ throw std::runtime_error (error_message + " , " + error_code + suffix);
906+ }
907+ else {
908+ ORT_THROW (exception_str);
909+ }
885910 }
886911#endif
887912 }
0 commit comments