intel
diff --git a/‎onnxruntime/core/providers/openvino/backend_manager.cc‎
Lines changed: 24 additions & 24 deletions b/‎onnxruntime/core/providers/openvino/backend_manager.cc‎
Lines changed: 24 additions & 24 deletions
diff --git a/‎onnxruntime/core/providers/openvino/exceptions.h‎
Lines changed: 82 additions & 0 deletions b/‎onnxruntime/core/providers/openvino/exceptions.h‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎onnxruntime/core/providers/openvino/openvino_execution_provider.cc‎
Lines changed: 111 additions & 106 deletions b/‎onnxruntime/core/providers/openvino/openvino_execution_provider.cc‎
Lines changed: 111 additions & 106 deletions
@@ -21,6 +21,7 @@
 #include "core/providers/openvino/ov_interface.h"
 #include "core/providers/openvino/ov_versions/capability.h"
 #include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
+#include "core/providers/openvino/exceptions.h"
 #include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
 #include "../../framework/tensorprotoutils.h"
 
@@ -164,32 +165,31 @@ BackendManager::BackendManager(SessionContext& session_context,
                                                       subgraph_context_,
                                                       shared_context_,
                                                       model_stream);
-    } catch (const OnnxRuntimeException& ex) {
-      std::string exception_str = ex.what();
-
-      if (session_context_.device_type.find("NPU") != std::string::npos &&
-          exception_str.find("intel_npu") != std::string::npos) {
-        // Handle NPU device related errors
-#ifndef NDEBUG
-        std::string suffix = session_context_.so_disable_cpu_ep_fallback ? "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : "\nModel needs to be recompiled\n";
-        ORT_THROW(exception_str + suffix);
-#else
-        std::string error_message = "UNKNOWN NPU ERROR";
-        std::string error_code = "code 0x0";
-        std::regex error_message_pattern(R"(\bZE_\w*\b)");
-        std::regex error_code_pattern("code 0x[0-9a-fA-F]+");
-        std::smatch matches;
-        if (std::regex_search(exception_str, matches, error_message_pattern)) {
-          error_message = matches[0];
-        }
-        if (std::regex_search(exception_str, matches, error_code_pattern)) {
-          error_code = matches[0];
+    } catch (const ovep_exception& ex) {
+#ifndef OPENVINO_DISABLE_NPU_FALLBACK
+      bool eligible_for_cpu_fallback = session_context_.device_type.find("NPU") != std::string::npos &&
+                                       !session_context_.so_disable_cpu_ep_fallback &&
+                                       !subgraph_context_.is_ep_ctx_graph;
+      if (eligible_for_cpu_fallback) {
+        std::string exception_str = ex.what();
+        LOGS_DEFAULT(VERBOSE) << exception_str;
+        LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
+                              << "Falling back to OV CPU for execution";
+        session_context_.device_type = "CPU";
+        session_context_.precision = "FP32";
+        try {
+          concrete_backend_ = BackendFactory::MakeBackend(model_proto,
+                                                          session_context_,
+                                                          subgraph_context_,
+                                                          shared_context_,
+                                                          model_stream);
+        } catch (std::string const& msg) {
+          ORT_THROW(msg);
         }
-        std::string suffix = session_context_.so_disable_cpu_ep_fallback ? "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : "\nModel needs to be recompiled\n";
-        throw std::runtime_error(error_message + ", " + error_code + suffix);
+      } else
 #endif
-      } else {
-        ORT_THROW(exception_str);
+      {
+        throw ex;
       }
     }
   }
 
@@ -0,0 +1,82 @@
+// Copyright (C) Intel Corporation
+// Licensed under the MIT License
+
+#pragma once
+
+#include <exception>
+#include <regex>
+#include <string>
+
+#include "core/common/status.h"
+
+namespace onnxruntime {
+namespace openvino_ep {
+
+struct ovep_exception : public std::exception {
+  enum class type {
+    compile_model,
+    import_model,
+    query_prop,
+    read_model,
+    unknown,
+  };
+
+  ovep_exception(const std::string& message,
+                 enum class type type) : message_{message},
+                                         type_{type},
+                                         error_code_{ze_result_code_from_string(message)},
+                                         error_name_{ze_result_name_from_string(message)} {}
+
+  const char* what() const noexcept override {
+    return message_.data();
+  }
+
+  uint32_t get_code() const { return error_code_; }
+
+  operator common::Status() const {
+    common::StatusCategory category_ort{common::ONNXRUNTIME};
+
+    if (type_ == type::unknown) {
+      return {category_ort, common::FAIL, message_};
+    }
+
+    // Newer drivers
+    if ((type_ == type::import_model) &&
+        (error_code_ == 0x7800000f /* ZE_RESULT_ERROR_INVALID_NATIVE_BINARY */)) {
+      std::string message{error_name_ + ", code 0x" + std::to_string(error_code_) + "\nModel needs to be recompiled\n"};
+      return {category_ort, common::INVALID_GRAPH, message};
+    }
+
+    std::string error_message = "Unhandled exception type: " + std::to_string(static_cast<int>(type_));
+    return {category_ort, common::FAIL, error_message};
+  }
+
+ protected:
+  std::string message_;
+  type type_{type::unknown};
+  uint32_t error_code_{0};
+  std::string error_name_;
+
+ private:
+  uint32_t ze_result_code_from_string(const std::string& ov_exception_string) {
+    uint32_t error_code{0};
+    std::regex error_code_pattern("code 0x([0-9a-fA-F]+)");
+    std::smatch matches;
+    if (std::regex_search(ov_exception_string, matches, error_code_pattern)) {
+      std::from_chars(&(*matches[1].first), &(*matches[1].second), error_code, 16);
+    }
+    return error_code;
+  }
+  std::string ze_result_name_from_string(const std::string& ov_exception_string) {
+    std::string error_message = "UNKNOWN NPU ERROR";
+    std::regex error_message_pattern(R"(\bZE_\w*\b)");
+    std::smatch matches;
+    if (std::regex_search(ov_exception_string, matches, error_message_pattern)) {
+      error_message = matches[0];
+    }
+    return error_message;
+  }
+};
+
+}  // namespace openvino_ep
+}  // namespace onnxruntime
@@ -12,6 +12,7 @@
 #include "core/providers/openvino/onnx_ctx_model_helper.h"
 #include "core/providers/openvino/ov_versions/capability.h"
 #include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
+#include "core/providers/openvino/exceptions.h"
 #include "core/session/onnxruntime_session_options_config_keys.h"
 #include "openvino/core/version.hpp"
 #ifdef USE_OVEP_NPU_MEMORY
@@ -102,124 +103,128 @@ common::Status OpenVINOExecutionProvider::Compile(
   auto& logger = *GetLogger();
   Status status = Status::OK();
 
-  bool is_epctx_model = false;
-  if (!fused_nodes.empty()) {
-    // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
-    const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
-    session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
-    session_context_.onnx_opset_version =
-        graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
-
-    // OVIR wrapped in epctx should be treated as source but this code does not
-    // This corner case is not in use and will be addressed in a future commit
-    is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
-  }
-
-  // The block below is executed during EP context model inference
-  auto& metadata = shared_context_->shared_weights.metadata;  // Metadata object in memory
-  if (session_context_.so_share_ep_contexts &&
-      is_epctx_model &&
-      metadata.empty()) {
-    fs::path context_model_file_path = session_context_.so_context_file_path;
-    if (context_model_file_path.empty()) {
-      // If ep.context_file_path is not set the input model path is used
-      context_model_file_path = session_context_.onnx_model_path_name;
+  try {
+    bool is_epctx_model = false;
+    if (!fused_nodes.empty()) {
+      // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
+      const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
+      session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
+      session_context_.onnx_opset_version =
+          graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
+
+      // OVIR wrapped in epctx should be treated as source but this code does not
+      // This corner case is not in use and will be addressed in a future commit
+      is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
     }
 
-    // Metadata is always read from model location, this could be a source or epctx model
-    fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
-    fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
-    std::ifstream file(metadata_file_path, std::ios::binary);
-    ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
-    shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
-    file >> metadata;
-  }
-
-  struct OpenVINOEPFunctionState {
-    AllocateFunc allocate_func = nullptr;
-    DestroyFunc destroy_func = nullptr;
-    AllocatorHandle allocator_handle = nullptr;
-    BackendManager& backend_manager;
-  };
-
-  for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
-    const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
-    const Node& fused_node = fused_node_graph.fused_node;
-
-    NodeComputeInfo compute_info;
-
-    // During backend creation, we check if user wants to use precompiled blob onnx model or the original model
-    // For precompiled blob, directly load the model instead of compiling the model
-    // For original model, check if the user wants to export a model with pre-compiled blob
-
-    auto& backend_manager = backend_managers_.emplace_back(session_context_,
-                                                           *shared_context_,
-                                                           fused_node,
-                                                           graph_body_viewer,
-                                                           logger,
-                                                           ep_ctx_handle_);
-
-    compute_info.create_state_func =
-        [&backend_manager](ComputeContext* context, FunctionState* state) {
-          OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
-              .allocate_func = context->allocate_func,
-              .destroy_func = context->release_func,
-              .allocator_handle = context->allocator_handle,
-              .backend_manager = backend_manager};
-          *state = static_cast<FunctionState>(p);
-          return 0;
-        };
-
-    compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
-      auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
-      try {
-        function_state->backend_manager.Compute(context);
-      } catch (const std::exception& ex) {
-        return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
+    // The block below is executed during EP context model inference
+    auto& metadata = shared_context_->shared_weights.metadata;  // Metadata object in memory
+    if (session_context_.so_share_ep_contexts &&
+        is_epctx_model &&
+        metadata.empty()) {
+      fs::path context_model_file_path = session_context_.so_context_file_path;
+      if (context_model_file_path.empty()) {
+        // If ep.context_file_path is not set the input model path is used
+        context_model_file_path = session_context_.onnx_model_path_name;
       }
-      return Status::OK();
+
+      // Metadata is always read from model location, this could be a source or epctx model
+      fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
+      fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
+      std::ifstream file(metadata_file_path, std::ios::binary);
+      ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
+      shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
+      file >> metadata;
+    }
+
+    struct OpenVINOEPFunctionState {
+      AllocateFunc allocate_func = nullptr;
+      DestroyFunc destroy_func = nullptr;
+      AllocatorHandle allocator_handle = nullptr;
+      BackendManager& backend_manager;
     };
 
-    compute_info.release_state_func =
-        [](FunctionState state) {
-          if (state) {
-            OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
-            delete function_state;
-          }
-        };
+    for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
+      const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
+      const Node& fused_node = fused_node_graph.fused_node;
+
+      NodeComputeInfo compute_info;
+
+      // During backend creation, we check if user wants to use precompiled blob onnx model or the original model
+      // For precompiled blob, directly load the model instead of compiling the model
+      // For original model, check if the user wants to export a model with pre-compiled blob
+
+      auto& backend_manager = backend_managers_.emplace_back(session_context_,
+                                                             *shared_context_,
+                                                             fused_node,
+                                                             graph_body_viewer,
+                                                             logger,
+                                                             ep_ctx_handle_);
+
+      compute_info.create_state_func =
+          [&backend_manager](ComputeContext* context, FunctionState* state) {
+            OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
+                .allocate_func = context->allocate_func,
+                .destroy_func = context->release_func,
+                .allocator_handle = context->allocator_handle,
+                .backend_manager = backend_manager};
+            *state = static_cast<FunctionState>(p);
+            return 0;
+          };
+
+      compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
+        auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
+        try {
+          function_state->backend_manager.Compute(context);
+        } catch (const std::exception& ex) {
+          return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
+        }
+        return Status::OK();
+      };
 
-    node_compute_funcs.push_back(std::move(compute_info));
+      compute_info.release_state_func =
+          [](FunctionState state) {
+            if (state) {
+              OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
+              delete function_state;
+            }
+          };
 
-    if (!status.IsOK()) {
-      break;
+      node_compute_funcs.push_back(std::move(compute_info));
+
+      if (!status.IsOK()) {
+        break;
+      }
     }
-  }
 
-  // The block below is executed during EP context model generation
-  if (session_context_.so_context_enable &&
-      session_context_.so_share_ep_contexts &&
-      !metadata.empty()) {
-    // For models after the first the metadata name comes from the shared context
-    fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
-    if (metadata_file_path.empty()) {
-      metadata_file_path = session_context_.so_context_file_path;
-      std::string name_append{"_metadata.bin"};
+    // The block below is executed during EP context model generation
+    if (session_context_.so_context_enable &&
+        session_context_.so_share_ep_contexts &&
+        !metadata.empty()) {
+      // For models after the first the metadata name comes from the shared context
+      fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
       if (metadata_file_path.empty()) {
-        metadata_file_path = session_context_.onnx_model_path_name;
-        name_append = "_ctx" + name_append;
+        metadata_file_path = session_context_.so_context_file_path;
+        std::string name_append{"_metadata.bin"};
+        if (metadata_file_path.empty()) {
+          metadata_file_path = session_context_.onnx_model_path_name;
+          name_append = "_ctx" + name_append;
+        }
+        auto metadata_filename = metadata_file_path.stem().string() + name_append;
+        metadata_file_path.replace_filename(metadata_filename);
+        shared_context_->shared_weights.metadata_filepath = metadata_file_path;
       }
-      auto metadata_filename = metadata_file_path.stem().string() + name_append;
-      metadata_file_path.replace_filename(metadata_filename);
-      shared_context_->shared_weights.metadata_filepath = metadata_file_path;
-    }
 
-    // Metadata is generated only for shared contexts
-    // If saving metadata then save it to the provided path or use the original model path
-    // Multiple calls to Compile() will update the metadata and for the last call
-    //   the resulting file will contain the aggregated content
-    std::ofstream file{metadata_file_path, std::ios::binary};
-    ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
-    file << metadata;
+      // Metadata is generated only for shared contexts
+      // If saving metadata then save it to the provided path or use the original model path
+      // Multiple calls to Compile() will update the metadata and for the last call
+      //   the resulting file will contain the aggregated content
+      std::ofstream file{metadata_file_path, std::ios::binary};
+      ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
+      file << metadata;
+    }
+  } catch (const ovep_exception& ex) {
+    status = ex;
   }
 
   if (session_context_.so_stop_share_ep_contexts) {