|
12 | 12 | #include "core/providers/openvino/onnx_ctx_model_helper.h" |
13 | 13 | #include "core/providers/openvino/ov_versions/capability.h" |
14 | 14 | #include "core/providers/openvino/qdq_transformations/qdq_stripping.h" |
| 15 | +#include "core/providers/openvino/exceptions.h" |
15 | 16 | #include "core/session/onnxruntime_session_options_config_keys.h" |
16 | 17 | #include "openvino/core/version.hpp" |
17 | 18 | #ifdef USE_OVEP_NPU_MEMORY |
@@ -102,124 +103,128 @@ common::Status OpenVINOExecutionProvider::Compile( |
102 | 103 | auto& logger = *GetLogger(); |
103 | 104 | Status status = Status::OK(); |
104 | 105 |
|
105 | | - bool is_epctx_model = false; |
106 | | - if (!fused_nodes.empty()) { |
107 | | - // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext |
108 | | - const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get(); |
109 | | - session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string(); |
110 | | - session_context_.onnx_opset_version = |
111 | | - graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain); |
112 | | - |
113 | | - // OVIR wrapped in epctx should be treated as source but this code does not |
114 | | - // This corner case is not in use and will be addressed in a future commit |
115 | | - is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0); |
116 | | - } |
117 | | - |
118 | | - // The block below is executed during EP context model inference |
119 | | - auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory |
120 | | - if (session_context_.so_share_ep_contexts && |
121 | | - is_epctx_model && |
122 | | - metadata.empty()) { |
123 | | - fs::path context_model_file_path = session_context_.so_context_file_path; |
124 | | - if (context_model_file_path.empty()) { |
125 | | - // If ep.context_file_path is not set the input model path is used |
126 | | - context_model_file_path = session_context_.onnx_model_path_name; |
| 106 | + try { |
| 107 | + bool is_epctx_model = false; |
| 108 | + if (!fused_nodes.empty()) { |
| 109 | + // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext |
| 110 | + const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get(); |
| 111 | + session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string(); |
| 112 | + session_context_.onnx_opset_version = |
| 113 | + graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain); |
| 114 | + |
| 115 | + // OVIR wrapped in epctx should be treated as source but this code does not |
| 116 | + // This corner case is not in use and will be addressed in a future commit |
| 117 | + is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0); |
127 | 118 | } |
128 | 119 |
|
129 | | - // Metadata is always read from model location, this could be a source or epctx model |
130 | | - fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin"; |
131 | | - fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename; |
132 | | - std::ifstream file(metadata_file_path, std::ios::binary); |
133 | | - ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string()); |
134 | | - shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path); |
135 | | - file >> metadata; |
136 | | - } |
137 | | - |
138 | | - struct OpenVINOEPFunctionState { |
139 | | - AllocateFunc allocate_func = nullptr; |
140 | | - DestroyFunc destroy_func = nullptr; |
141 | | - AllocatorHandle allocator_handle = nullptr; |
142 | | - BackendManager& backend_manager; |
143 | | - }; |
144 | | - |
145 | | - for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) { |
146 | | - const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph; |
147 | | - const Node& fused_node = fused_node_graph.fused_node; |
148 | | - |
149 | | - NodeComputeInfo compute_info; |
150 | | - |
151 | | - // During backend creation, we check if user wants to use precompiled blob onnx model or the original model |
152 | | - // For precompiled blob, directly load the model instead of compiling the model |
153 | | - // For original model, check if the user wants to export a model with pre-compiled blob |
154 | | - |
155 | | - auto& backend_manager = backend_managers_.emplace_back(session_context_, |
156 | | - *shared_context_, |
157 | | - fused_node, |
158 | | - graph_body_viewer, |
159 | | - logger, |
160 | | - ep_ctx_handle_); |
161 | | - |
162 | | - compute_info.create_state_func = |
163 | | - [&backend_manager](ComputeContext* context, FunctionState* state) { |
164 | | - OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{ |
165 | | - .allocate_func = context->allocate_func, |
166 | | - .destroy_func = context->release_func, |
167 | | - .allocator_handle = context->allocator_handle, |
168 | | - .backend_manager = backend_manager}; |
169 | | - *state = static_cast<FunctionState>(p); |
170 | | - return 0; |
171 | | - }; |
172 | | - |
173 | | - compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { |
174 | | - auto function_state = static_cast<OpenVINOEPFunctionState*>(state); |
175 | | - try { |
176 | | - function_state->backend_manager.Compute(context); |
177 | | - } catch (const std::exception& ex) { |
178 | | - return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); |
| 120 | + // The block below is executed during EP context model inference |
| 121 | + auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory |
| 122 | + if (session_context_.so_share_ep_contexts && |
| 123 | + is_epctx_model && |
| 124 | + metadata.empty()) { |
| 125 | + fs::path context_model_file_path = session_context_.so_context_file_path; |
| 126 | + if (context_model_file_path.empty()) { |
| 127 | + // If ep.context_file_path is not set the input model path is used |
| 128 | + context_model_file_path = session_context_.onnx_model_path_name; |
179 | 129 | } |
180 | | - return Status::OK(); |
| 130 | + |
| 131 | + // Metadata is always read from model location, this could be a source or epctx model |
| 132 | + fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin"; |
| 133 | + fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename; |
| 134 | + std::ifstream file(metadata_file_path, std::ios::binary); |
| 135 | + ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string()); |
| 136 | + shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path); |
| 137 | + file >> metadata; |
| 138 | + } |
| 139 | + |
| 140 | + struct OpenVINOEPFunctionState { |
| 141 | + AllocateFunc allocate_func = nullptr; |
| 142 | + DestroyFunc destroy_func = nullptr; |
| 143 | + AllocatorHandle allocator_handle = nullptr; |
| 144 | + BackendManager& backend_manager; |
181 | 145 | }; |
182 | 146 |
|
183 | | - compute_info.release_state_func = |
184 | | - [](FunctionState state) { |
185 | | - if (state) { |
186 | | - OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state); |
187 | | - delete function_state; |
188 | | - } |
189 | | - }; |
| 147 | + for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) { |
| 148 | + const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph; |
| 149 | + const Node& fused_node = fused_node_graph.fused_node; |
| 150 | + |
| 151 | + NodeComputeInfo compute_info; |
| 152 | + |
| 153 | + // During backend creation, we check if user wants to use precompiled blob onnx model or the original model |
| 154 | + // For precompiled blob, directly load the model instead of compiling the model |
| 155 | + // For original model, check if the user wants to export a model with pre-compiled blob |
| 156 | + |
| 157 | + auto& backend_manager = backend_managers_.emplace_back(session_context_, |
| 158 | + *shared_context_, |
| 159 | + fused_node, |
| 160 | + graph_body_viewer, |
| 161 | + logger, |
| 162 | + ep_ctx_handle_); |
| 163 | + |
| 164 | + compute_info.create_state_func = |
| 165 | + [&backend_manager](ComputeContext* context, FunctionState* state) { |
| 166 | + OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{ |
| 167 | + .allocate_func = context->allocate_func, |
| 168 | + .destroy_func = context->release_func, |
| 169 | + .allocator_handle = context->allocator_handle, |
| 170 | + .backend_manager = backend_manager}; |
| 171 | + *state = static_cast<FunctionState>(p); |
| 172 | + return 0; |
| 173 | + }; |
| 174 | + |
| 175 | + compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { |
| 176 | + auto function_state = static_cast<OpenVINOEPFunctionState*>(state); |
| 177 | + try { |
| 178 | + function_state->backend_manager.Compute(context); |
| 179 | + } catch (const std::exception& ex) { |
| 180 | + return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); |
| 181 | + } |
| 182 | + return Status::OK(); |
| 183 | + }; |
190 | 184 |
|
191 | | - node_compute_funcs.push_back(std::move(compute_info)); |
| 185 | + compute_info.release_state_func = |
| 186 | + [](FunctionState state) { |
| 187 | + if (state) { |
| 188 | + OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state); |
| 189 | + delete function_state; |
| 190 | + } |
| 191 | + }; |
192 | 192 |
|
193 | | - if (!status.IsOK()) { |
194 | | - break; |
| 193 | + node_compute_funcs.push_back(std::move(compute_info)); |
| 194 | + |
| 195 | + if (!status.IsOK()) { |
| 196 | + break; |
| 197 | + } |
195 | 198 | } |
196 | | - } |
197 | 199 |
|
198 | | - // The block below is executed during EP context model generation |
199 | | - if (session_context_.so_context_enable && |
200 | | - session_context_.so_share_ep_contexts && |
201 | | - !metadata.empty()) { |
202 | | - // For models after the first the metadata name comes from the shared context |
203 | | - fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath; |
204 | | - if (metadata_file_path.empty()) { |
205 | | - metadata_file_path = session_context_.so_context_file_path; |
206 | | - std::string name_append{"_metadata.bin"}; |
| 200 | + // The block below is executed during EP context model generation |
| 201 | + if (session_context_.so_context_enable && |
| 202 | + session_context_.so_share_ep_contexts && |
| 203 | + !metadata.empty()) { |
| 204 | + // For models after the first the metadata name comes from the shared context |
| 205 | + fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath; |
207 | 206 | if (metadata_file_path.empty()) { |
208 | | - metadata_file_path = session_context_.onnx_model_path_name; |
209 | | - name_append = "_ctx" + name_append; |
| 207 | + metadata_file_path = session_context_.so_context_file_path; |
| 208 | + std::string name_append{"_metadata.bin"}; |
| 209 | + if (metadata_file_path.empty()) { |
| 210 | + metadata_file_path = session_context_.onnx_model_path_name; |
| 211 | + name_append = "_ctx" + name_append; |
| 212 | + } |
| 213 | + auto metadata_filename = metadata_file_path.stem().string() + name_append; |
| 214 | + metadata_file_path.replace_filename(metadata_filename); |
| 215 | + shared_context_->shared_weights.metadata_filepath = metadata_file_path; |
210 | 216 | } |
211 | | - auto metadata_filename = metadata_file_path.stem().string() + name_append; |
212 | | - metadata_file_path.replace_filename(metadata_filename); |
213 | | - shared_context_->shared_weights.metadata_filepath = metadata_file_path; |
214 | | - } |
215 | 217 |
|
216 | | - // Metadata is generated only for shared contexts |
217 | | - // If saving metadata then save it to the provided path or use the original model path |
218 | | - // Multiple calls to Compile() will update the metadata and for the last call |
219 | | - // the resulting file will contain the aggregated content |
220 | | - std::ofstream file{metadata_file_path, std::ios::binary}; |
221 | | - ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path); |
222 | | - file << metadata; |
| 218 | + // Metadata is generated only for shared contexts |
| 219 | + // If saving metadata then save it to the provided path or use the original model path |
| 220 | + // Multiple calls to Compile() will update the metadata and for the last call |
| 221 | + // the resulting file will contain the aggregated content |
| 222 | + std::ofstream file{metadata_file_path, std::ios::binary}; |
| 223 | + ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path); |
| 224 | + file << metadata; |
| 225 | + } |
| 226 | + } catch (const ovep_exception& ex) { |
| 227 | + status = ex; |
223 | 228 | } |
224 | 229 |
|
225 | 230 | if (session_context_.so_stop_share_ep_contexts) { |
|
0 commit comments