|
21 | 21 | #include "core/providers/openvino/ov_versions/capability.h" |
22 | 22 | #include "core/providers/openvino/qdq_transformations/qdq_stripping.h" |
23 | 23 | #include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h" |
| 24 | +#include "../../framework/tensorprotoutils.h" |
24 | 25 |
|
25 | 26 | namespace onnxruntime { |
26 | 27 | namespace openvino_ep { |
@@ -453,6 +454,80 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on |
453 | 454 | #endif |
454 | 455 | } |
455 | 456 |
|
| 457 | +// this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto |
| 458 | +// but we cannot use that function as it is not part of public provider api. |
| 459 | +static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { |
| 460 | + static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; |
| 461 | + auto* external_data = proto_init->mutable_external_data(); |
| 462 | + bool found_location = false, found_offset = false, found_length = false; |
| 463 | + const int ext_data_size = external_data->size(); |
| 464 | + proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL); |
| 465 | + |
| 466 | + for (int j = 0; j < ext_data_size; ++j) { |
| 467 | + auto& ext_entry = external_data->at(j); |
| 468 | + auto& key = *ext_entry.mutable_key(); |
| 469 | + if (key == "location") { |
| 470 | + *ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; |
| 471 | + found_location = true; |
| 472 | + } else if (key == "offset") { |
| 473 | + *ext_entry.mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr)); |
| 474 | + found_offset = true; |
| 475 | + } else if (key == "length") { |
| 476 | + *ext_entry.mutable_value() = std::to_string(data_size); |
| 477 | + found_length = true; |
| 478 | + } |
| 479 | + } |
| 480 | + |
| 481 | + if (!found_location) { |
| 482 | + auto* new_entry = external_data->Add(); |
| 483 | + *new_entry->mutable_key() = "location"; |
| 484 | + *new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; |
| 485 | + } |
| 486 | + if (!found_offset) { |
| 487 | + auto* new_entry = external_data->Add(); |
| 488 | + *new_entry->mutable_key() = "offset"; |
| 489 | + *new_entry->mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr)); |
| 490 | + } |
| 491 | + if (!found_length) { |
| 492 | + auto* new_entry = external_data->Add(); |
| 493 | + *new_entry->mutable_key() = "length"; |
| 494 | + *new_entry->mutable_value() = std::to_string(data_size); |
| 495 | + } |
| 496 | +} |
| 497 | + |
| 498 | +static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) { |
| 499 | + // Remove constness as we need to use mutable_external_data() to get the entries to read. |
| 500 | + // The entries themselves are not modified... |
| 501 | + auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init); |
| 502 | + auto* entry_protos = mutable_proto.mutable_external_data(); |
| 503 | + for (int i = 0; i < entry_protos->size(); i++) { |
| 504 | + auto& string_entry_proto{entry_protos->at(i)}; |
| 505 | + const auto& pb_key{*(string_entry_proto.mutable_key())}; |
| 506 | + const auto& pb_value{*(string_entry_proto.mutable_value())}; |
| 507 | + if (pb_key == "location") { |
| 508 | + location = pb_value; |
| 509 | + } else if (pb_key == "offset") { |
| 510 | + const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset); |
| 511 | + if (res.ec != std::errc()) { |
| 512 | + std::ostringstream err_msg; |
| 513 | + err_msg << "External data in memory has invalid offset field: " |
| 514 | + << src_init->name() << "], location: " << location |
| 515 | + << ", offset: " << pb_value; |
| 516 | + ORT_THROW(err_msg.str()); |
| 517 | + } |
| 518 | + } else if (pb_key == "length") { |
| 519 | + const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length); |
| 520 | + if (res.ec != std::errc()) { |
| 521 | + std::ostringstream err_msg; |
| 522 | + err_msg << "External data in memory has invalid length field: " |
| 523 | + << src_init->name() << "], location: " << location |
| 524 | + << ", length: " << pb_value; |
| 525 | + ORT_THROW(err_msg.str()); |
| 526 | + } |
| 527 | + } |
| 528 | + } |
| 529 | +} |
| 530 | + |
456 | 531 | std::unique_ptr<ONNX_NAMESPACE::ModelProto> |
457 | 532 | BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, |
458 | 533 | const onnxruntime::GraphViewer& subgraph, |
@@ -529,12 +604,98 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, |
529 | 604 | return model_proto; |
530 | 605 | } else { |
531 | 606 | LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; |
| 607 | + |
| 608 | + // scan ext initializers: |
| 609 | + std::unordered_map<std::string, std::pair<size_t, size_t>> external_initializers_offset_and_length; |
| 610 | + std::string tempLocation; |
| 611 | + size_t extInitializerTotalSize = 0; |
| 612 | + if (session_context_.has_external_weights) { |
| 613 | + auto allInitializers = subgraph.GetAllInitializedTensors(); |
| 614 | + for (auto& [name, tp] : allInitializers) { |
| 615 | + if (utils::HasExternalDataInMemory(*tp)) { |
| 616 | + size_t offset = 0; |
| 617 | + size_t length = 0; |
| 618 | + ReadExternalDataFields(tp, tempLocation, offset, length); |
| 619 | + extInitializerTotalSize += length; |
| 620 | + external_initializers_offset_and_length[name] = {offset, length}; |
| 621 | + } |
| 622 | + } |
| 623 | + } |
| 624 | + |
| 625 | + // when we have external weights in memory, the model proto will actually embed those |
| 626 | + // and bloat the serialized string. We can avoid that by not including the data in the proto |
| 627 | + // but then we have to update those initializers and set the external_data fields to mem_addr tag... |
| 628 | + // proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions. |
| 629 | +#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025)) |
| 630 | + constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32; |
| 631 | + const bool include_initializer_data_in_proto = !(session_context_.has_external_weights && |
| 632 | + external_initializers_offset_and_length.size() > 1 && |
| 633 | + extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE); |
| 634 | +#else |
| 635 | + const bool include_initializer_data_in_proto = true; |
| 636 | +#endif |
| 637 | + |
| 638 | + |
532 | 639 | auto model = subgraph.CreateModel(logger); |
533 | 640 | auto model_proto = model->ToProto(); |
534 | 641 | model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); |
535 | | - subgraph.ToProto(*model_proto->mutable_graph(), true, true); |
| 642 | + subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, |
| 643 | + /*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto); |
| 644 | + |
536 | 645 | print_model_proto_duration(); |
| 646 | + |
| 647 | + if (!include_initializer_data_in_proto) { |
| 648 | + LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024) << " MB in " << external_initializers_offset_and_length.size() << " initializers"; |
| 649 | + auto* graph_proto = model_proto->mutable_graph(); |
| 650 | + auto* proto_initializers = graph_proto->mutable_initializer(); |
| 651 | + |
| 652 | + std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map; |
| 653 | + for (int i = 0, n = proto_initializers->size(); i < n; ++i) { |
| 654 | + auto& proto_init = proto_initializers->at(i); |
| 655 | + proto_initializer_map[proto_init.name()] = &proto_init; |
| 656 | + } |
| 657 | + |
| 658 | + for (const auto& [name, src_init] : subgraph.GetAllInitializedTensors()) { |
| 659 | + auto it = proto_initializer_map.find(name); |
| 660 | + if (it == proto_initializer_map.end()) |
| 661 | + continue; |
| 662 | + |
| 663 | + auto* proto_init = it->second; |
| 664 | + |
| 665 | + // If the proto initializer is missing data, fill it in |
| 666 | + if (!proto_init->has_raw_data() && src_init->has_raw_data()) { |
| 667 | + *proto_init->mutable_raw_data() = src_init->raw_data(); |
| 668 | + } |
| 669 | + |
| 670 | + // Only set in-memory external_data fields if the data is in memory |
| 671 | + if (src_init->has_raw_data()) { |
| 672 | + LOGS(logger, VERBOSE) << "In-memory initializer RAW: " |
| 673 | + << src_init->name() |
| 674 | + << ", data_type: " << src_init->data_type() |
| 675 | + << ", raw_data size: " << src_init->raw_data().size(); |
| 676 | + |
| 677 | + SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size()); |
| 678 | + } else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { |
| 679 | + auto it_ext = external_initializers_offset_and_length.find(name); |
| 680 | + if (it_ext == external_initializers_offset_and_length.end()) { |
| 681 | + std::ostringstream err_msg; |
| 682 | + err_msg << "Initializer marked as external in memory but missing offset/length info: " << src_init->name(); |
| 683 | + ORT_THROW(err_msg.str()); |
| 684 | + } |
| 685 | + const size_t offset = it_ext->second.first; |
| 686 | + const size_t length = it_ext->second.second; |
| 687 | + |
| 688 | + LOGS(logger, VERBOSE) << "In-memory initializer EXT: " << src_init->name() << ", size: " << length; |
| 689 | + |
| 690 | + SetExternalDataFields(proto_init, (const void*)offset, length); |
| 691 | + } else { |
| 692 | + LOGS(logger, VERBOSE) << "File-based initializer: " << src_init->name() << ", data_type: " << src_init->data_type(); |
| 693 | + } |
| 694 | + } |
| 695 | + } |
| 696 | + |
537 | 697 | DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); |
| 698 | + |
538 | 699 | return model_proto; |
539 | 700 | } |
540 | 701 | } |
|
0 commit comments