Skip to content

Commit 49fe713

Browse files
javier-intelankitm3k
authored andcommitted
Catch model import failure and report the appropriate error
1 parent 70acefe commit 49fe713

File tree

5 files changed

+570
-471
lines changed

5 files changed

+570
-471
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "core/providers/openvino/ov_interface.h"
2222
#include "core/providers/openvino/ov_versions/capability.h"
2323
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
24+
#include "core/providers/openvino/exceptions.h"
2425
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
2526
#include "../../framework/tensorprotoutils.h"
2627

@@ -164,32 +165,31 @@ BackendManager::BackendManager(SessionContext& session_context,
164165
subgraph_context_,
165166
shared_context_,
166167
model_stream);
167-
} catch (const OnnxRuntimeException& ex) {
168-
std::string exception_str = ex.what();
169-
170-
if (session_context_.device_type.find("NPU") != std::string::npos &&
171-
exception_str.find("intel_npu") != std::string::npos) {
172-
// Handle NPU device related errors
173-
#ifndef NDEBUG
174-
std::string suffix = session_context_.so_disable_cpu_ep_fallback ? "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : "\nModel needs to be recompiled\n";
175-
ORT_THROW(exception_str + suffix);
176-
#else
177-
std::string error_message = "UNKNOWN NPU ERROR";
178-
std::string error_code = "code 0x0";
179-
std::regex error_message_pattern(R"(\bZE_\w*\b)");
180-
std::regex error_code_pattern("code 0x[0-9a-fA-F]+");
181-
std::smatch matches;
182-
if (std::regex_search(exception_str, matches, error_message_pattern)) {
183-
error_message = matches[0];
184-
}
185-
if (std::regex_search(exception_str, matches, error_code_pattern)) {
186-
error_code = matches[0];
168+
} catch (const ovep_exception& ex) {
169+
#ifndef OPENVINO_DISABLE_NPU_FALLBACK
170+
bool eligible_for_cpu_fallback = session_context_.device_type.find("NPU") != std::string::npos &&
171+
!session_context_.so_disable_cpu_ep_fallback &&
172+
!subgraph_context_.is_ep_ctx_graph;
173+
if (eligible_for_cpu_fallback) {
174+
std::string exception_str = ex.what();
175+
LOGS_DEFAULT(VERBOSE) << exception_str;
176+
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
177+
<< "Falling back to OV CPU for execution";
178+
session_context_.device_type = "CPU";
179+
session_context_.precision = "FP32";
180+
try {
181+
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
182+
session_context_,
183+
subgraph_context_,
184+
shared_context_,
185+
model_stream);
186+
} catch (std::string const& msg) {
187+
ORT_THROW(msg);
187188
}
188-
std::string suffix = session_context_.so_disable_cpu_ep_fallback ? "\nModel failed to compile on NPU. Enable CPU fallback or try another device.\n" : "\nModel needs to be recompiled\n";
189-
throw std::runtime_error(error_message + ", " + error_code + suffix);
189+
} else
190190
#endif
191-
} else {
192-
ORT_THROW(exception_str);
191+
{
192+
throw ex;
193193
}
194194
}
195195
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright (C) Intel Corporation
2+
// Licensed under the MIT License
3+
4+
#pragma once
5+
6+
#include <exception>
7+
#include <regex>
8+
#include <string>
9+
10+
#include "core/common/status.h"
11+
12+
namespace onnxruntime {
13+
namespace openvino_ep {
14+
15+
struct ovep_exception : public std::exception {
16+
enum class type {
17+
compile_model,
18+
import_model,
19+
query_prop,
20+
read_model,
21+
unknown,
22+
};
23+
24+
ovep_exception(const std::string& message,
25+
enum class type type) : message_{message},
26+
type_{type},
27+
error_code_{ze_result_code_from_string(message)},
28+
error_name_{ze_result_name_from_string(message)} {}
29+
30+
const char* what() const noexcept override {
31+
return message_.data();
32+
}
33+
34+
uint32_t get_code() const { return error_code_; }
35+
36+
operator common::Status() const {
37+
common::StatusCategory category_ort{common::ONNXRUNTIME};
38+
39+
if (type_ == type::unknown) {
40+
return {category_ort, common::FAIL, message_};
41+
}
42+
43+
// Newer drivers
44+
if ((type_ == type::import_model) &&
45+
(error_code_ == 0x7800000f /* ZE_RESULT_ERROR_INVALID_NATIVE_BINARY */)) {
46+
std::string message{error_name_ + ", code 0x" + std::to_string(error_code_) + "\nModel needs to be recompiled\n"};
47+
return {category_ort, common::INVALID_GRAPH, message};
48+
}
49+
50+
std::string error_message = "Unhandled exception type: " + std::to_string(static_cast<int>(type_));
51+
return {category_ort, common::FAIL, error_message};
52+
}
53+
54+
protected:
55+
std::string message_;
56+
type type_{type::unknown};
57+
uint32_t error_code_{0};
58+
std::string error_name_;
59+
60+
private:
61+
uint32_t ze_result_code_from_string(const std::string& ov_exception_string) {
62+
uint32_t error_code{0};
63+
std::regex error_code_pattern("code 0x([0-9a-fA-F]+)");
64+
std::smatch matches;
65+
if (std::regex_search(ov_exception_string, matches, error_code_pattern)) {
66+
std::from_chars(&(*matches[1].first), &(*matches[1].second), error_code, 16);
67+
}
68+
return error_code;
69+
}
70+
std::string ze_result_name_from_string(const std::string& ov_exception_string) {
71+
std::string error_message = "UNKNOWN NPU ERROR";
72+
std::regex error_message_pattern(R"(\bZE_\w*\b)");
73+
std::smatch matches;
74+
if (std::regex_search(ov_exception_string, matches, error_message_pattern)) {
75+
error_message = matches[0];
76+
}
77+
return error_message;
78+
}
79+
};
80+
81+
} // namespace openvino_ep
82+
} // namespace onnxruntime

onnxruntime/core/providers/openvino/openvino_execution_provider.cc

Lines changed: 111 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "core/providers/openvino/onnx_ctx_model_helper.h"
1313
#include "core/providers/openvino/ov_versions/capability.h"
1414
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
15+
#include "core/providers/openvino/exceptions.h"
1516
#include "core/session/onnxruntime_session_options_config_keys.h"
1617
#include "openvino/core/version.hpp"
1718
#ifdef USE_OVEP_NPU_MEMORY
@@ -102,124 +103,128 @@ common::Status OpenVINOExecutionProvider::Compile(
102103
auto& logger = *GetLogger();
103104
Status status = Status::OK();
104105

105-
bool is_epctx_model = false;
106-
if (!fused_nodes.empty()) {
107-
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
108-
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
109-
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
110-
session_context_.onnx_opset_version =
111-
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
112-
113-
// OVIR wrapped in epctx should be treated as source but this code does not
114-
// This corner case is not in use and will be addressed in a future commit
115-
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
116-
}
117-
118-
// The block below is executed during EP context model inference
119-
auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory
120-
if (session_context_.so_share_ep_contexts &&
121-
is_epctx_model &&
122-
metadata.empty()) {
123-
fs::path context_model_file_path = session_context_.so_context_file_path;
124-
if (context_model_file_path.empty()) {
125-
// If ep.context_file_path is not set the input model path is used
126-
context_model_file_path = session_context_.onnx_model_path_name;
106+
try {
107+
bool is_epctx_model = false;
108+
if (!fused_nodes.empty()) {
109+
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
110+
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
111+
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
112+
session_context_.onnx_opset_version =
113+
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
114+
115+
// OVIR wrapped in epctx should be treated as source but this code does not
116+
// This corner case is not in use and will be addressed in a future commit
117+
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
127118
}
128119

129-
// Metadata is always read from model location, this could be a source or epctx model
130-
fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
131-
fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
132-
std::ifstream file(metadata_file_path, std::ios::binary);
133-
ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
134-
shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
135-
file >> metadata;
136-
}
137-
138-
struct OpenVINOEPFunctionState {
139-
AllocateFunc allocate_func = nullptr;
140-
DestroyFunc destroy_func = nullptr;
141-
AllocatorHandle allocator_handle = nullptr;
142-
BackendManager& backend_manager;
143-
};
144-
145-
for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
146-
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
147-
const Node& fused_node = fused_node_graph.fused_node;
148-
149-
NodeComputeInfo compute_info;
150-
151-
// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
152-
// For precompiled blob, directly load the model instead of compiling the model
153-
// For original model, check if the user wants to export a model with pre-compiled blob
154-
155-
auto& backend_manager = backend_managers_.emplace_back(session_context_,
156-
*shared_context_,
157-
fused_node,
158-
graph_body_viewer,
159-
logger,
160-
ep_ctx_handle_);
161-
162-
compute_info.create_state_func =
163-
[&backend_manager](ComputeContext* context, FunctionState* state) {
164-
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
165-
.allocate_func = context->allocate_func,
166-
.destroy_func = context->release_func,
167-
.allocator_handle = context->allocator_handle,
168-
.backend_manager = backend_manager};
169-
*state = static_cast<FunctionState>(p);
170-
return 0;
171-
};
172-
173-
compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
174-
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
175-
try {
176-
function_state->backend_manager.Compute(context);
177-
} catch (const std::exception& ex) {
178-
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
120+
// The block below is executed during EP context model inference
121+
auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory
122+
if (session_context_.so_share_ep_contexts &&
123+
is_epctx_model &&
124+
metadata.empty()) {
125+
fs::path context_model_file_path = session_context_.so_context_file_path;
126+
if (context_model_file_path.empty()) {
127+
// If ep.context_file_path is not set the input model path is used
128+
context_model_file_path = session_context_.onnx_model_path_name;
179129
}
180-
return Status::OK();
130+
131+
// Metadata is always read from model location, this could be a source or epctx model
132+
fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
133+
fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
134+
std::ifstream file(metadata_file_path, std::ios::binary);
135+
ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
136+
shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
137+
file >> metadata;
138+
}
139+
140+
struct OpenVINOEPFunctionState {
141+
AllocateFunc allocate_func = nullptr;
142+
DestroyFunc destroy_func = nullptr;
143+
AllocatorHandle allocator_handle = nullptr;
144+
BackendManager& backend_manager;
181145
};
182146

183-
compute_info.release_state_func =
184-
[](FunctionState state) {
185-
if (state) {
186-
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
187-
delete function_state;
188-
}
189-
};
147+
for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
148+
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
149+
const Node& fused_node = fused_node_graph.fused_node;
150+
151+
NodeComputeInfo compute_info;
152+
153+
// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
154+
// For precompiled blob, directly load the model instead of compiling the model
155+
// For original model, check if the user wants to export a model with pre-compiled blob
156+
157+
auto& backend_manager = backend_managers_.emplace_back(session_context_,
158+
*shared_context_,
159+
fused_node,
160+
graph_body_viewer,
161+
logger,
162+
ep_ctx_handle_);
163+
164+
compute_info.create_state_func =
165+
[&backend_manager](ComputeContext* context, FunctionState* state) {
166+
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
167+
.allocate_func = context->allocate_func,
168+
.destroy_func = context->release_func,
169+
.allocator_handle = context->allocator_handle,
170+
.backend_manager = backend_manager};
171+
*state = static_cast<FunctionState>(p);
172+
return 0;
173+
};
174+
175+
compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
176+
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
177+
try {
178+
function_state->backend_manager.Compute(context);
179+
} catch (const std::exception& ex) {
180+
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
181+
}
182+
return Status::OK();
183+
};
190184

191-
node_compute_funcs.push_back(std::move(compute_info));
185+
compute_info.release_state_func =
186+
[](FunctionState state) {
187+
if (state) {
188+
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
189+
delete function_state;
190+
}
191+
};
192192

193-
if (!status.IsOK()) {
194-
break;
193+
node_compute_funcs.push_back(std::move(compute_info));
194+
195+
if (!status.IsOK()) {
196+
break;
197+
}
195198
}
196-
}
197199

198-
// The block below is executed during EP context model generation
199-
if (session_context_.so_context_enable &&
200-
session_context_.so_share_ep_contexts &&
201-
!metadata.empty()) {
202-
// For models after the first the metadata name comes from the shared context
203-
fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
204-
if (metadata_file_path.empty()) {
205-
metadata_file_path = session_context_.so_context_file_path;
206-
std::string name_append{"_metadata.bin"};
200+
// The block below is executed during EP context model generation
201+
if (session_context_.so_context_enable &&
202+
session_context_.so_share_ep_contexts &&
203+
!metadata.empty()) {
204+
// For models after the first the metadata name comes from the shared context
205+
fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
207206
if (metadata_file_path.empty()) {
208-
metadata_file_path = session_context_.onnx_model_path_name;
209-
name_append = "_ctx" + name_append;
207+
metadata_file_path = session_context_.so_context_file_path;
208+
std::string name_append{"_metadata.bin"};
209+
if (metadata_file_path.empty()) {
210+
metadata_file_path = session_context_.onnx_model_path_name;
211+
name_append = "_ctx" + name_append;
212+
}
213+
auto metadata_filename = metadata_file_path.stem().string() + name_append;
214+
metadata_file_path.replace_filename(metadata_filename);
215+
shared_context_->shared_weights.metadata_filepath = metadata_file_path;
210216
}
211-
auto metadata_filename = metadata_file_path.stem().string() + name_append;
212-
metadata_file_path.replace_filename(metadata_filename);
213-
shared_context_->shared_weights.metadata_filepath = metadata_file_path;
214-
}
215217

216-
// Metadata is generated only for shared contexts
217-
// If saving metadata then save it to the provided path or use the original model path
218-
// Multiple calls to Compile() will update the metadata and for the last call
219-
// the resulting file will contain the aggregated content
220-
std::ofstream file{metadata_file_path, std::ios::binary};
221-
ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
222-
file << metadata;
218+
// Metadata is generated only for shared contexts
219+
// If saving metadata then save it to the provided path or use the original model path
220+
// Multiple calls to Compile() will update the metadata and for the last call
221+
// the resulting file will contain the aggregated content
222+
std::ofstream file{metadata_file_path, std::ios::binary};
223+
ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
224+
file << metadata;
225+
}
226+
} catch (const ovep_exception& ex) {
227+
status = ex;
223228
}
224229

225230
if (session_context_.so_stop_share_ep_contexts) {

0 commit comments

Comments
 (0)