@@ -140,6 +140,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
140140 };
141141 }
142142 inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue (exe_network_, num_infer_req, std::move (initializer)));
143+ bindings_ = std::make_unique<OnnxToOvNetworkBindings>(exe_network_, subgraph_context_);
143144}
144145
145146bool BasicBackend::ValidateSubgraph (std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
@@ -362,29 +363,16 @@ void BasicBackend::SetNumThreads(ov::AnyMap& device_config) {
362363// an Infer Request indexed by infer_req_idx
363364void BasicBackend::StartAsyncInference (Ort::KernelContext& context, OVInferRequestPtr infer_request) {
364365 try {
365- auto ov_input_info = exe_network_.Get ().inputs ();
366-
367- // Loop over subgraph original input names to find the correspondent OV input name
368- for (const auto & [onnx_input_name, onnx_input_index] : subgraph_context_.input_names ) {
369- std::string input_name{};
370- uint32_t input_idx = 0 ;
371- for (uint32_t index = 0 ; const auto & ov_input : ov_input_info) {
372- if (ov_input.get_names ().contains (onnx_input_name)) {
373- input_name = onnx_input_name;
374- input_idx = index;
375- break ;
376- }
377- index++;
378- }
379- ORT_ENFORCE (!input_name.empty (), log_tag,
380- " Input names mismatch between OpenVINO and ONNX. " , onnx_input_name,
381- " doesn't exist in the list of OpenVINO input tensor names" );
366+ bool cpu_or_gpu = (session_context_.device_type .find (" CPU" ) != std::string::npos ||
367+ session_context_.device_type .find (" GPU" ) != std::string::npos);
368+ bool npu = (session_context_.device_type .find (" NPU" ) != std::string::npos);
369+
370+ for (const auto & input_info : bindings_->network_inputs_ ) {
382371 size_t batch_slice_idx = 0 ;
383372 if (subgraph_context_.has_dynamic_input_shape &&
384373 !session_context_.disable_dynamic_shapes &&
385- (session_context_.device_type .find (" CPU" ) != std::string::npos ||
386- session_context_.device_type .find (" GPU" ) != std::string::npos)) {
387- auto tensor = context.GetInput (subgraph_context_.input_names .at (input_name));
374+ cpu_or_gpu) {
375+ auto tensor = context.GetInput (input_info.onnx_index );
388376 auto tensor_info = tensor.GetTensorTypeAndShapeInfo ();
389377 auto tensor_shape = tensor_info.GetShape ();
390378 auto tensor_size = tensor_shape.size ();
@@ -395,98 +383,72 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
395383 input_tensor_shape[tensor_iter] = *i;
396384 tensor_iter += 1 ;
397385 }
398- const auto & input = ov_input_info.at (input_idx);
399386 OVTensorPtr tensor_ptr;
400387 // avoid input copies on the CPU device
401388 if (session_context_.device_type .find (" CPU" ) != std::string::npos) {
402- tensor_ptr = std::make_shared<ov::Tensor>(input. get_element_type () , input_tensor_shape,
389+ tensor_ptr = std::make_shared<ov::Tensor>(input_info. type , input_tensor_shape,
403390 (void *)tensor_data);
404391 } else {
405- tensor_ptr = std::make_shared<ov::Tensor>(input. get_element_type () , input_tensor_shape);
406- FillInputBlob (tensor_ptr, batch_slice_idx, input_name , context, subgraph_context_);
392+ tensor_ptr = std::make_shared<ov::Tensor>(input_info. type , input_tensor_shape);
393+ FillInputBlob (tensor_ptr, batch_slice_idx, input_info. name , context, subgraph_context_);
407394 }
408395
409396 try {
410- infer_request->SetTensor (std::move (input_name) , tensor_ptr);
397+ infer_request->SetTensor (input_info. name , tensor_ptr);
411398 } catch (const char * msg) {
412399 ORT_THROW (msg);
413400 }
414401 } else {
415- if ((session_context_.device_type .find (" CPU" ) != std::string::npos ||
416- session_context_.device_type .find (" GPU" ) != std::string::npos)) {
402+ if (cpu_or_gpu) {
417403 OVTensorPtr graph_input_blob;
418404 try {
419- graph_input_blob = infer_request->GetTensor (input_name );
405+ graph_input_blob = infer_request->GetTensor (input_info. name );
420406 } catch (const char * msg) {
421407 ORT_THROW (msg);
422408 }
423- FillInputBlob (std::move (graph_input_blob), batch_slice_idx, std::move (input_name) , context, subgraph_context_);
409+ FillInputBlob (std::move (graph_input_blob), batch_slice_idx, input_info. name , context, subgraph_context_);
424410 } else {
425- auto tensor = context.GetInput (subgraph_context_. input_names . at (input_name) );
426- ort_tensor_key_t ort_tensor_key{input_name };
411+ auto tensor = context.GetInput (input_info. onnx_index );
412+ ort_tensor_key_t ort_tensor_key{input_info. name };
427413 auto it = ort_ov_tensor_map.find (ort_tensor_key);
428- if ((it == ort_ov_tensor_map.end ()) ||
429- (it != ort_ov_tensor_map.end () && (it->second .ort_ptr != tensor.GetTensorRawData ()))) {
414+ if ((it == ort_ov_tensor_map.end ()) || it->second .ort_ptr != tensor.GetTensorRawData ()) {
430415 ov_tensor_data_t ov_tensor_data;
431- const auto & input = ov_input_info.at (input_idx);
432- ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type (), input.get_shape (),
416+ ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input_info.type , input_info.ov_shape .get_shape (),
433417 const_cast <void *>(tensor.GetTensorRawData ()));
434418
435419 ov_tensor_data.ort_ptr = tensor.GetTensorRawData ();
436420 ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
437421
438422 try {
439- infer_request->SetTensor (std::move (input_name) , ov_tensor_data.tensor_ptr );
423+ infer_request->SetTensor (input_info. name , ov_tensor_data.tensor_ptr );
440424 } catch (const char * msg) {
441425 ORT_THROW (msg);
442426 }
443427 }
444428 }
445429 }
446- } // Loop subgraph original input names
430+ } // Loop subgraph original input
447431
448- if (session_context_. device_type . find ( " NPU " ) != std::string::npos ) {
432+ if (npu ) {
449433 // Set the output blob as remote blob
450- auto graph_output_info = exe_network_.Get ().outputs ();
451- auto output_idx = 0 ;
452- for (auto output_info_iter = graph_output_info.begin ();
453- output_info_iter != graph_output_info.end (); ++output_info_iter) {
454- auto output_names = output_info_iter->get_names ();
455- std::string onnx_output_name;
456- std::string output_name;
457- // using the output name retrieved from ONNX original to match with the output names returned by OV tensors
458- for (auto it = subgraph_context_.output_names .begin (); it != subgraph_context_.output_names .end (); ++it) {
459- onnx_output_name = it->first ;
460- if (output_names.find (onnx_output_name) != output_names.end ()) {
461- // Assigning the output_name
462- output_name = it->first ;
463- break ;
464- }
465- }
466- size_t batch_size = 1 ;
467- Ort::UnownedValue tensor = GetOutputTensor (context,
468- batch_size,
469- infer_request,
470- output_name,
471- subgraph_context_.output_names );
472- ort_tensor_key_t ort_tensor_key{output_name};
434+ for (const auto & output_info : bindings_->network_outputs_ ) {
435+ Ort::UnownedValue tensor = context.GetOutput (output_info.onnx_index , output_info.onnx_shape );
436+
437+ ort_tensor_key_t ort_tensor_key{output_info.name };
473438 const auto & it = ort_ov_tensor_map.find (ort_tensor_key);
474- if ((it == ort_ov_tensor_map.end ()) ||
475- (it != ort_ov_tensor_map.end () && (it->second .ort_ptr != tensor.GetTensorRawData ()))) {
439+ if ((it == ort_ov_tensor_map.end ()) || (it->second .ort_ptr != tensor.GetTensorRawData ())) {
476440 ov_tensor_data_t ov_tensor_data;
477- const auto & output = graph_output_info.at (output_idx);
478441 ov_tensor_data.ort_ptr = tensor.GetTensorRawData ();
479- ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output. get_element_type (), output .get_shape (),
442+ ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output_info. type , output_info. ov_shape .get_shape (),
480443 const_cast <void *>(tensor.GetTensorRawData ()));
481444 ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
482445
483446 try {
484- infer_request->SetTensor (std::move (output_name) , ov_tensor_data.tensor_ptr );
447+ infer_request->SetTensor (output_info. name , ov_tensor_data.tensor_ptr );
485448 } catch (const char * msg) {
486449 ORT_THROW (msg);
487450 }
488451 }
489- output_idx++;
490452 }
491453 }
492454
@@ -611,44 +573,22 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
611573void BasicBackend::CompleteAsyncInference (Ort::KernelContext& context, OVInferRequestPtr infer_request) {
612574 // Wait for Async inference completion
613575 try {
576+ bool cpu_or_gpu = session_context_.device_type .find (" CPU" ) != std::string::npos ||
577+ session_context_.device_type .find (" GPU" ) != std::string::npos;
578+
614579 infer_request->WaitRequest ();
615- auto graph_output_info = exe_network_.Get ().outputs ();
616- for (auto output_info_iter = graph_output_info.begin ();
617- output_info_iter != graph_output_info.end (); ++output_info_iter) {
618- OVTensorPtr graph_output_blob;
619- auto output_names = output_info_iter->get_names ();
620- std::string onnx_output_name;
621- std::string output_name;
622- bool output_name_found = false ;
623- // using the output name retrieved from ONNX original to match with the output names returned by OV tensors
624- for (auto it = subgraph_context_.output_names .begin (); it != subgraph_context_.output_names .end (); ++it) {
625- onnx_output_name = it->first ;
626- if (output_names.find (onnx_output_name) != output_names.end ()) {
627- // Assigning the output_name
628- output_name = it->first ;
629- output_name_found = true ;
630- break ;
631- }
632- }
633- if (!output_name_found) {
634- ORT_THROW (
635- log_tag +
636- " Output names mismatch between OpenVINO and ONNX. "
637- " [ONNX Output: ] " +
638- onnx_output_name +
639- " doesn't exist in the "
640- " list of OpenVINO output tensor names" );
641- }
642- if ((session_context_.device_type .find (" CPU" ) != std::string::npos ||
643- session_context_.device_type .find (" GPU" ) != std::string::npos)) {
580+
581+ if (cpu_or_gpu) {
582+ for (const auto & output_info : bindings_->network_outputs_ ) {
583+ OVTensorPtr graph_output_blob;
644584 try {
645- graph_output_blob = infer_request->GetTensor (output_name );
585+ graph_output_blob = infer_request->GetTensor (output_info. name );
646586 } catch (const char * msg) {
647587 ORT_THROW (msg);
648588 }
649589 size_t batch_size = 1 ;
650590 Ort::UnownedValue output_tensor =
651- GetOutputTensor (context, batch_size, infer_request, std::move (output_name) , subgraph_context_.output_names );
591+ GetOutputTensor (context, batch_size, infer_request, output_info. name , subgraph_context_.output_names );
652592 auto mem_info = output_tensor.GetTensorMemoryInfo ();
653593 if (mem_info.GetAllocatorName () == OpenVINO_GPU) {
654594 return ;
0 commit comments