Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 6 additions & 34 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "core/providers/openvino/ov_interface.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/exceptions.h"
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
#include "../../framework/tensorprotoutils.h"

Expand Down Expand Up @@ -157,40 +158,11 @@ BackendManager::BackendManager(SessionContext& session_context,
subgraph_context_.has_dynamic_input_shape = false;

// OV NPU plugin is supported with fallback to OV CPU upon compilation failures.
try {
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
*shared_context_,
model_stream);
} catch (const OnnxRuntimeException& ex) {
std::string exception_str = ex.what();

if (session_context_.device_type.find("NPU") != std::string::npos &&
exception_str.find("intel_npu") != std::string::npos) {
// Handle NPU device related errors
#ifndef NDEBUG
std::string suffix = session_context_.so_disable_cpu_ep_fallback ? "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : "\nModel needs to be recompiled\n";
ORT_THROW(exception_str + suffix);
#else
std::string error_message = "UNKNOWN NPU ERROR";
std::string error_code = "code 0x0";
std::regex error_message_pattern(R"(\bZE_\w*\b)");
std::regex error_code_pattern("code 0x[0-9a-fA-F]+");
std::smatch matches;
if (std::regex_search(exception_str, matches, error_message_pattern)) {
error_message = matches[0];
}
if (std::regex_search(exception_str, matches, error_code_pattern)) {
error_code = matches[0];
}
std::string suffix = session_context_.so_disable_cpu_ep_fallback ? "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : "\nModel needs to be recompiled\n";
throw std::runtime_error(error_message + ", " + error_code + suffix);
#endif
} else {
ORT_THROW(exception_str);
}
}
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
*shared_context_,
model_stream);
}

if (ShouldExportEpContext(session_context_, subgraph_context_)) {
Expand Down
88 changes: 88 additions & 0 deletions onnxruntime/core/providers/openvino/exceptions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright (C) Intel Corporation
// Licensed under the MIT License

#pragma once

#include <exception>
#include <regex>
#include <string>

#include "core/common/status.h"

namespace onnxruntime {
namespace openvino_ep {

struct ovep_exception : public std::exception {
enum class type {
compile_model,
import_model,
query_prop,
read_model,
unknown,
};

ovep_exception(const std::exception& ex, enum class type exception_type)
: message_{ex.what()},
type_{exception_type},
error_code_{ze_result_code_from_string(message_)},
error_name_{ze_result_name_from_string(message_)} {}

ovep_exception(const std::string& message, enum class type exception_type)
: message_{message},
type_{exception_type},
error_code_{ze_result_code_from_string(message)},
error_name_{ze_result_name_from_string(message)} {}

const char* what() const noexcept override {
return message_.data();
}

uint32_t get_code() const { return error_code_; }

operator common::Status() const {
common::StatusCategory category_ort{common::ONNXRUNTIME};

if (type_ == type::unknown) {
return {category_ort, common::FAIL, message_};
}

// Newer drivers
if ((type_ == type::import_model) &&
(error_code_ == 0x7800000f /* ZE_RESULT_ERROR_INVALID_NATIVE_BINARY */)) {
std::string message{error_name_ + ", code 0x" + std::to_string(error_code_) + "\nModel needs to be recompiled\n"};
return {category_ort, common::INVALID_GRAPH, message};
}

std::string error_message = "Unhandled exception type: " + std::to_string(static_cast<int>(type_));
return {category_ort, common::EP_FAIL, error_message};
}

protected:
std::string message_;
type type_{type::unknown};
uint32_t error_code_{0};
std::string error_name_;

private:
uint32_t ze_result_code_from_string(const std::string& ov_exception_string) {
uint32_t error_code{0};
std::regex error_code_pattern("code 0x([0-9a-fA-F]+)");
std::smatch matches;
if (std::regex_search(ov_exception_string, matches, error_code_pattern)) {
std::from_chars(&(*matches[1].first), &(*matches[1].second), error_code, 16);
}
return error_code;
}
std::string ze_result_name_from_string(const std::string& ov_exception_string) {
std::string error_message = "UNKNOWN NPU ERROR";
std::regex error_message_pattern(R"(\bZE_\w*\b)");
std::smatch matches;
if (std::regex_search(ov_exception_string, matches, error_message_pattern)) {
error_message = matches[0];
}
return error_message;
}
};

} // namespace openvino_ep
} // namespace onnxruntime
181 changes: 93 additions & 88 deletions onnxruntime/core/providers/openvino/openvino_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "core/providers/openvino/onnx_ctx_model_helper.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/exceptions.h"
#include "core/session/onnxruntime_session_options_config_keys.h"
#include "openvino/core/version.hpp"
#ifdef USE_OVEP_NPU_MEMORY
Expand Down Expand Up @@ -103,107 +104,111 @@ common::Status OpenVINOExecutionProvider::Compile(
auto& logger = *GetLogger();
Status status = Status::OK();

if (session_context_.so_context_enable && session_context_.so_context_embed_mode && session_context_.so_share_ep_contexts) {
return Status(common::StatusCategory::ONNXRUNTIME, common::EP_FAIL,
std::string("Invalid EP context configuration: ") + kOrtSessionOptionEpContextEmbedMode + " must be 0 if " + kOrtSessionOptionShareEpContexts + " is 1.");
}
try {
if (session_context_.so_context_enable && session_context_.so_context_embed_mode && session_context_.so_share_ep_contexts) {
return Status(common::StatusCategory::ONNXRUNTIME, common::EP_FAIL,
std::string("Invalid EP context configuration: ") + kOrtSessionOptionEpContextEmbedMode + " must be 0 if " + kOrtSessionOptionShareEpContexts + " is 1.");
}

bool is_epctx_model = false;
if (!fused_nodes.empty()) {
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
session_context_.onnx_opset_version =
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);

// OVIR wrapped in epctx should be treated as source but this code does not
// This corner case is not in use and will be addressed in a future commit
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
}
bool is_epctx_model = false;
if (!fused_nodes.empty()) {
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
session_context_.onnx_opset_version =
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);

// OVIR wrapped in epctx should be treated as source but this code does not
// This corner case is not in use and will be addressed in a future commit
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
}

if (is_epctx_model) {
ep_ctx_handle_.Initialize(fused_nodes, session_context_.GetOutputBinPath().parent_path());
}
if (is_epctx_model) {
ep_ctx_handle_.Initialize(fused_nodes, session_context_.GetOutputBinPath().parent_path());
}

struct OpenVINOEPFunctionState {
AllocateFunc allocate_func = nullptr;
DestroyFunc destroy_func = nullptr;
AllocatorHandle allocator_handle = nullptr;
BackendManager& backend_manager;
};

for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
const Node& fused_node = fused_node_graph.fused_node;

NodeComputeInfo compute_info;

// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
// For precompiled blob, directly load the model instead of compiling the model
// For original model, check if the user wants to export a model with pre-compiled blob

auto& backend_manager = backend_managers_.emplace_back(session_context_,
*shared_context_manager_,
fused_node,
graph_body_viewer,
logger,
ep_ctx_handle_);
compute_info.create_state_func =
[&backend_manager](ComputeContext* context, FunctionState* state) {
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
.allocate_func = context->allocate_func,
.destroy_func = context->release_func,
.allocator_handle = context->allocator_handle,
.backend_manager = backend_manager};
*state = static_cast<FunctionState>(p);
return 0;
};

compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
try {
function_state->backend_manager.Compute(context);
} catch (const std::exception& ex) {
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
}
return Status::OK();
struct OpenVINOEPFunctionState {
AllocateFunc allocate_func = nullptr;
DestroyFunc destroy_func = nullptr;
AllocatorHandle allocator_handle = nullptr;
BackendManager& backend_manager;
};

compute_info.release_state_func =
[](FunctionState state) {
if (state) {
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
delete function_state;
}
};
for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
const Node& fused_node = fused_node_graph.fused_node;

NodeComputeInfo compute_info;

// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
// For precompiled blob, directly load the model instead of compiling the model
// For original model, check if the user wants to export a model with pre-compiled blob

auto& backend_manager = backend_managers_.emplace_back(session_context_,
*shared_context_manager_,
fused_node,
graph_body_viewer,
logger,
ep_ctx_handle_);
compute_info.create_state_func =
[&backend_manager](ComputeContext* context, FunctionState* state) {
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
.allocate_func = context->allocate_func,
.destroy_func = context->release_func,
.allocator_handle = context->allocator_handle,
.backend_manager = backend_manager};
*state = static_cast<FunctionState>(p);
return 0;
};

compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
try {
function_state->backend_manager.Compute(context);
} catch (const std::exception& ex) {
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
}
return Status::OK();
};

node_compute_funcs.push_back(std::move(compute_info));
}
compute_info.release_state_func =
[](FunctionState state) {
if (state) {
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
delete function_state;
}
};

// Export compiled blobs as EPContext nodes if context enable is set
if (session_context_.so_context_enable) {
auto backend_it = backend_managers_.begin();
bool is_first = true;
node_compute_funcs.push_back(std::move(compute_info));
}

for (const auto& fused_node_graph : fused_nodes) {
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
// Export compiled blobs as EPContext nodes if context enable is set
if (session_context_.so_context_enable) {
auto backend_it = backend_managers_.begin();
bool is_first = true;

// Set include_embed_data to true only for the first backend manager
backend_it->TryExportCompiledBlobAsEPCtxNode(graph_body_viewer, is_first);
for (const auto& fused_node_graph : fused_nodes) {
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;

is_first = false;
++backend_it;
}
// Set include_embed_data to true only for the first backend manager
backend_it->TryExportCompiledBlobAsEPCtxNode(graph_body_viewer, is_first);

is_first = false;
++backend_it;
}

// bit clunky ideally we should try to fold this into ep context handler
if (!session_context_.so_context_embed_mode) {
auto shared_context = shared_context_manager_->GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath());
shared_context->Serialize();
if (session_context_.so_stop_share_ep_contexts) {
shared_context_manager_->ClearActiveSharedContext();
shared_context->Clear();
// bit clunky ideally we should try to fold this into ep context handler
if (!session_context_.so_context_embed_mode) {
auto shared_context = shared_context_manager_->GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath());
shared_context->Serialize();
if (session_context_.so_stop_share_ep_contexts) {
shared_context_manager_->ClearActiveSharedContext();
shared_context->Clear();
}
}
}
} catch (const ovep_exception& ex) {
status = ex;
}

return status;
Expand Down
Loading
Loading