Skip to content

Commit be8f8be

Browse files
sfatimarTejalKhade28preetha-intel
authored
Sahar/tdr failure (#698)
* Catch exception with TDR * Handle exceptions during parallel execution with OVEP * Remove IO Buffer Implementation --------- Co-authored-by: TejalKhade28 <tejal.khade@intel.com> Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com>
1 parent be8fded commit be8f8be

File tree

6 files changed

+64
-245
lines changed

6 files changed

+64
-245
lines changed

cmake/onnxruntime_providers_openvino.cmake

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,6 @@
3030
endif()
3131

3232
list(APPEND OPENVINO_LIB_LIST openvino::frontend::onnx openvino::runtime ${PYTHON_LIBRARIES})
33-
if ((DEFINED ENV{OPENCL_LIBS}) AND (DEFINED ENV{OPENCL_INCS}) AND onnxruntime_USE_OPENVINO_GPU)
34-
add_definitions(-DIO_BUFFER_ENABLED=1)
35-
list(APPEND OPENVINO_LIB_LIST $ENV{OPENCL_LIBS})
36-
endif()
37-
3833
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
3934
onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
4035

onnxruntime/core/providers/openvino/backends/basic_backend.cc

Lines changed: 37 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -62,25 +62,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
6262
try {
6363
// IO_BUFFER is enabled on GPU HW.
6464
// Pre-requisite is provider_option "context" must be set
65-
#if defined(IO_BUFFER_ENABLED)
66-
cl_context ctx = static_cast<cl_context>(session_context_.context);
67-
remote_context_ = new ov::intel_gpu::ocl::ClContext(OVCore::Get()->core, ctx);
68-
if (subgraph_context_.is_ep_ctx_graph) {
69-
exe_network_ = OVCore::Get()->ImportModel(*model_stream,
70-
remote_context_,
71-
subgraph_context_.subgraph_name);
72-
model_stream.reset(); // Delete stream after it is no longer needed
73-
} else {
74-
std::string model = model_proto->SerializeAsString();
75-
if (!subgraph_context.has_dynamic_input_shape) {
76-
model_proto.reset()
77-
}
78-
auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
79-
LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
80-
exe_network_ = OVCore::Get()->CompileModel(
81-
ov_model, remote_context_, subgraph_context_.subgraph_name);
82-
}
83-
#else // !IO_BUFFER_ENABLED
8465
auto auto_unified_compile = ((hw_target.find("AUTO") == std::string::npos) ||
8566
(session_context_.OpenVINO_Version.at(0) >= 2024 &&
8667
session_context_.OpenVINO_Version.at(1) > 2));
@@ -117,7 +98,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
11798
exe_network_ = OVCore::Get()->CompileModel(
11899
ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
119100
}
120-
#endif
121101
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
122102
} catch (const char* msg) {
123103
ORT_THROW(msg);
@@ -459,150 +439,46 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
459439
}
460440
}
461441

462-
#ifdef IO_BUFFER_ENABLED
463-
// Wait for Remote Aynchronous inference completion
464-
void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
465-
try {
466-
auto graph_input_info = exe_network_.Get().inputs();
467-
int input_idx = 0;
468-
for (auto input_info_iter = graph_input_info.begin();
469-
input_info_iter != graph_input_info.end(); ++input_info_iter) {
470-
auto input_names = input_info_iter->get_names();
471-
std::string onnx_input_name;
472-
std::string input_name;
473-
// use names retrieved from original ONNX model to assign the right onnx input name for the graph
474-
for (auto it = subgraph_context_.input_names.begin(); it != subgraph_context_.input_names.end(); ++it) {
475-
if (it->second == input_idx) {
476-
onnx_input_name = it->first;
477-
break;
478-
}
479-
}
480-
// using the input name retrieved from ONNX original to match with the input names returned by OV tensors
481-
if (input_names.find(onnx_input_name) != input_names.end()) {
482-
input_name = onnx_input_name;
483-
} else {
484-
ORT_THROW(log_tag +
485-
"Input names mismatch between OpenVINO and ONNX. " +
486-
onnx_input_name +
487-
" doesn't exist in the list of OpenVINO input tensor names");
488-
}
489-
input_idx++;
490-
// Kernel Context Input Buffer
491-
const auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
492-
// If the ORTValue wraps a device pointer
493-
auto mem_info = tensor.GetTensorMemoryInfo();
494-
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
495-
// Get the shared buffer pointer
496-
const void* tensor_data = tensor.GetTensorRawData();
497-
const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
498-
// Create an Input Remote Blob
499-
auto input = graph_input_info.at(0);
500-
auto remote_blob = remote_context_->create_tensor(
501-
input.get_element_type(), input.get_shape(), *shared_buffer_const);
502-
ov::Tensor tensor_remote = static_cast<ov::Tensor>(remote_blob);
503-
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_remote);
504-
infer_request->SetTensor(input_name, tensor_ptr);
505-
} else {
506-
OVTensorPtr graph_input_blob;
507-
graph_input_blob = infer_request->GetTensor(input_name);
508-
size_t batch_slice_idx = 0;
509-
FillInputBlob(graph_input_blob, batch_slice_idx, input_name, context, subgraph_context_);
510-
}
511-
}
512-
513-
// Set the output blob as remote blob
514-
auto graph_output_info = exe_network_.Get().outputs();
515-
for (auto output_info_iter = graph_output_info.begin();
516-
output_info_iter != graph_output_info.end(); ++output_info_iter) {
517-
auto output_names = output_info_iter->get_names();
518-
std::string onnx_output_name;
519-
std::string output_name;
520-
bool output_name_found = false;
521-
// using the output name retrieved from ONNX original to match with the output names returned by OV tensors
522-
for (auto it = subgraph_context_.output_names.begin(); it != subgraph_context_.output_names.end(); ++it) {
523-
onnx_output_name = it->first;
524-
if (output_names.find(onnx_output_name) != output_names.end()) {
525-
// Assigning the output_name
526-
output_name = it->first;
527-
output_name_found = true;
528-
break;
529-
}
530-
}
531-
if (!output_name_found) {
532-
ORT_THROW(
533-
log_tag +
534-
"Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " +
535-
onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
536-
}
537-
538-
size_t batch_size = 1;
539-
Ort::UnownedValue tensor = GetOutputTensor(context,
540-
batch_size,
541-
infer_request,
542-
output_name,
543-
subgraph_context_.output_names);
544-
auto mem_info = tensor.GetTensorMemoryInfo();
545-
// Check if ORT Value wraps a device pointer
546-
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
547-
const void* tensor_data = tensor.GetTensorRawData();
548-
const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
549-
// Create a shared Blob, set the Infer Request Output Blob
550-
auto output = graph_output_info.at(0);
551-
auto remote_tensor =
552-
remote_context_->create_tensor(output.get_element_type(), output.get_shape(), *shared_buffer_const);
553-
ov::Tensor tensor_t = static_cast<ov::Tensor>(remote_tensor);
554-
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_t);
555-
try {
556-
infer_request->SetTensor(output_name, tensor_ptr);
557-
} catch (const char* msg) {
558-
ORT_THROW(msg);
559-
}
560-
}
561-
}
562-
563-
// Start Async inference
564-
infer_request->StartAsync();
565-
} catch (const char* msg) {
566-
ORT_THROW(msg);
567-
}
568-
}
569-
#endif
570-
571442
// Wait for asynchronous inference completion on an Infer Request object indexed by infer_req_idx
572443
// and copy the results into a slice location within the batched output buffer indexed by batch_slice_idx
573444
void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
574445
// Wait for Async inference completion
575446
try {
576-
bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos ||
577-
session_context_.device_type.find("GPU") != std::string::npos;
578-
579447
infer_request->WaitRequest();
448+
} catch(const std::runtime_error& e) {
449+
infer_request->CancelRequest();
450+
inferRequestsQueue_->deleteRequest();
451+
ORT_THROW(log_tag + e.what());
452+
}
580453

581-
if (cpu_or_gpu) {
582-
for (const auto& output_info : bindings_->network_outputs_) {
583-
OVTensorPtr graph_output_blob;
584-
try {
585-
graph_output_blob = infer_request->GetTensor(output_info.name);
586-
} catch (const char* msg) {
587-
ORT_THROW(msg);
588-
}
589-
size_t batch_size = 1;
590-
Ort::UnownedValue output_tensor =
591-
GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
592-
auto mem_info = output_tensor.GetTensorMemoryInfo();
593-
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
454+
bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos ||
455+
session_context_.device_type.find("GPU") != std::string::npos;
456+
if (cpu_or_gpu) {
457+
for (const auto& output_info : bindings_->network_outputs_) {
458+
OVTensorPtr graph_output_blob;
459+
try {
460+
graph_output_blob = infer_request->GetTensor(output_info.name);
461+
} catch (const char* msg) {
462+
ORT_THROW(msg);
463+
}
464+
size_t batch_size = 1;
465+
Ort::UnownedValue output_tensor =
466+
GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
467+
auto mem_info = output_tensor.GetTensorMemoryInfo();
468+
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
594469
return;
595-
} else {
596-
size_t batch_slice = 0;
597-
FillOutputBlob(std::move(graph_output_blob), output_tensor, batch_slice);
598-
}
470+
} else {
471+
size_t batch_slice = 0;
472+
FillOutputBlob(std::move(graph_output_blob), output_tensor, batch_slice);
599473
}
600474
}
475+
}
601476

602-
if (!const_outputs_map_.empty()) {
603-
for (const auto& item : const_outputs_map_) {
604-
const auto& out_name = item.first;
605-
auto node = item.second;
477+
if (!const_outputs_map_.empty()) {
478+
for (const auto& item : const_outputs_map_) {
479+
const auto& out_name = item.first;
480+
auto node = item.second;
481+
try {
606482
Ort::UnownedValue output_tensor = GetOutputTensor(context,
607483
out_name,
608484
subgraph_context_.output_names,
@@ -613,10 +489,10 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
613489
} else {
614490
FillOutputsWithConstantData(std::move(node), output_tensor);
615491
}
492+
} catch (std::string const& msg) {
493+
ORT_THROW(msg);
616494
}
617495
}
618-
} catch (const char* msg) {
619-
ORT_THROW(msg);
620496
}
621497
}
622498

@@ -650,31 +526,20 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
650526
}
651527

652528
} else {
653-
// Requesting for an idle infer_request from a pool of infer_requests_
654529
OVInferRequestPtr infer_request;
655530
infer_request = inferRequestsQueue_->getIdleRequest();
656-
#ifdef IO_BUFFER_ENABLED
657-
if ((session_context_.device_type.find("GPU") != std::string::npos) &&
658-
(session_context_.context != nullptr) && session_context_.is_wholly_supported_graph) {
659-
try {
660-
StartRemoteAsyncInference(context, infer_request);
661-
} catch (std::string const& msg) {
662-
ORT_THROW(msg);
663-
}
664-
} else {
665-
try {
666-
StartAsyncInference(context, infer_request);
667-
} catch (std::string const& msg) {
668-
ORT_THROW(msg);
669-
}
531+
if(infer_request == nullptr) {
532+
ORT_THROW("OpenVINO Execution Provider :: There are no inference requests");
533+
LOGS_DEFAULT(FATAL) << log_tag << "Create Infer Requests do not exist";
534+
return;
670535
}
671-
#else
536+
537+
LOGS_DEFAULT(INFO) << log_tag << "Get Idle Request";
672538
try {
673539
StartAsyncInference(context, infer_request);
674540
} catch (const std::runtime_error& e) {
675541
ORT_THROW(log_tag + " Exception at StartAsyncInference: " + e.what());
676542
}
677-
#endif
678543
try {
679544
CompleteAsyncInference(context, infer_request);
680545
} catch (const std::runtime_error& e) {
@@ -696,13 +561,11 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
696561
// Once the inference is completed, the infer_request becomes free and is placed back into pool of infer_requests_
697562
inferRequestsQueue_->putIdleRequest(std::move(infer_request));
698563
#ifndef NDEBUG
699-
#ifndef IO_BUFFER_ENABLED // Printing performance counts is disabled when IO_BUFFER_ENABLED
700564
if (openvino_ep::backend_utils::IsDebugEnabled()) {
701565
inferRequestsQueue_->printstatus(); // Printing the elements of infer_requests_ vector pool only in debug mode
702566
std::string& hw_target = session_context_.device_type;
703567
printPerformanceCounts(std::move(infer_request_), std::cout, hw_target);
704568
}
705-
#endif
706569
#endif
707570
}
708571
}

onnxruntime/core/providers/openvino/backends/basic_backend.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,6 @@ class BasicBackend : public IBackend {
9494
void EnableStreams();
9595
void SetNumThreads(ov::AnyMap& device_config);
9696
void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
97-
98-
#ifdef IO_BUFFER_ENABLED
99-
void StartRemoteAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
100-
#endif
101-
10297
void CompleteAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
10398

10499
SessionContext& session_context_;
@@ -108,10 +103,6 @@ class BasicBackend : public IBackend {
108103
OVExeNetwork exe_network_;
109104
std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
110105
std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
111-
#if defined IO_BUFFER_ENABLED
112-
OVRemoteContextPtr remote_context_;
113-
#endif
114-
115106
using ort_tensor_key_t = const std::string;
116107
std::map<ort_tensor_key_t, ov_tensor_data_t> ort_ov_tensor_map;
117108
std::unique_ptr<OnnxToOvNetworkBindings> bindings_;
@@ -121,6 +112,7 @@ class InferRequestsQueue {
121112
public:
122113
InferRequestsQueue(OVExeNetwork& net, size_t nireq, std::function<void(OVInferRequestPtr)> initializer) {
123114
OVInferRequestPtr infer_request;
115+
live_threads=nireq;
124116
for (size_t id = 0; id < nireq; id++) {
125117
infer_request = std::make_shared<OVInferRequest>(net.CreateInferRequest());
126118
initializer(infer_request);
@@ -152,16 +144,28 @@ class InferRequestsQueue {
152144

153145
OVInferRequestPtr getIdleRequest() {
154146
std::unique_lock<std::mutex> lock(_mutex);
147+
std::cout << "get Idle Request" << live_threads << "\n";
148+
if(live_threads==0) {
149+
return nullptr;
150+
}
151+
155152
_cv.wait(lock, [this] { return infer_requests_.size() > 0; });
156153
auto request = infer_requests_.at(0);
157154
infer_requests_.erase(infer_requests_.begin());
158155
return request;
159156
}
160157

158+
void deleteRequest() {
159+
std::unique_lock<std::mutex> lock(_mutex);
160+
live_threads=live_threads-1;
161+
std::cout << "delete Request" << live_threads << "\n";
162+
}
163+
161164
private:
162165
std::mutex _mutex;
163166
std::condition_variable _cv;
164167
std::vector<OVInferRequestPtr> infer_requests_;
168+
int live_threads;
165169
};
166170

167171
} // namespace openvino_ep

0 commit comments

Comments
 (0)