Skip to content

Commit f8b0904

Browse files
intbfMayureshV1Copilot
authored
Don't embed external initializers into the proto to avoid 2GB limit (#817)
* early version, it doesn't embed initializers into the proto, but then restores the metadata so OV can read them back Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * improve code, refactor into smaller functions, run the logic when there are external initializers in memory (more than one) Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * revert the wrongly merged code Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * Updated the condition for the new logic based on the total size of ext initializers, comments, refactoring Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * Update onnxruntime/core/providers/openvino/backend_manager.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * make the condition less strict - 32MB threshold, move debug dump after the logic is executed, check for OV version Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * unit test that uses ext initializers, early version Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * used kOrtSessionOptionsDisableCPUEPFallback, cleanups, model is now over 2GB to show the proto limit (when the new logic for ext initializers is enabled, then the test passes) Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * address code review comments Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * Update onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix the Linux CI build, use PathString rather than wstring Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * As agreed, disable the test as it requires OV 2025.4, while the current CI version is only 2025.2 Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> * add missing comment Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> --------- Signed-off-by: bfilipek <bartlomiej.filipek@intel.com> Co-authored-by: MayureshV1 <47039074+MayureshV1@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 2d82713 commit f8b0904

File tree

2 files changed

+377
-1
lines changed

2 files changed

+377
-1
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 162 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "core/providers/openvino/ov_versions/capability.h"
2222
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
2323
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
24+
#include "../../framework/tensorprotoutils.h"
2425

2526
namespace onnxruntime {
2627
namespace openvino_ep {
@@ -453,6 +454,80 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on
453454
#endif
454455
}
455456

457+
// this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto
458+
// but we cannot use that function as it is not part of public provider api.
459+
static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) {
460+
static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*";
461+
auto* external_data = proto_init->mutable_external_data();
462+
bool found_location = false, found_offset = false, found_length = false;
463+
const int ext_data_size = external_data->size();
464+
proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
465+
466+
for (int j = 0; j < ext_data_size; ++j) {
467+
auto& ext_entry = external_data->at(j);
468+
auto& key = *ext_entry.mutable_key();
469+
if (key == "location") {
470+
*ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER;
471+
found_location = true;
472+
} else if (key == "offset") {
473+
*ext_entry.mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr));
474+
found_offset = true;
475+
} else if (key == "length") {
476+
*ext_entry.mutable_value() = std::to_string(data_size);
477+
found_length = true;
478+
}
479+
}
480+
481+
if (!found_location) {
482+
auto* new_entry = external_data->Add();
483+
*new_entry->mutable_key() = "location";
484+
*new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER;
485+
}
486+
if (!found_offset) {
487+
auto* new_entry = external_data->Add();
488+
*new_entry->mutable_key() = "offset";
489+
*new_entry->mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr));
490+
}
491+
if (!found_length) {
492+
auto* new_entry = external_data->Add();
493+
*new_entry->mutable_key() = "length";
494+
*new_entry->mutable_value() = std::to_string(data_size);
495+
}
496+
}
497+
498+
static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) {
499+
// Remove constness as we need to use mutable_external_data() to get the entries to read.
500+
// The entries themselves are not modified...
501+
auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init);
502+
auto* entry_protos = mutable_proto.mutable_external_data();
503+
for (int i = 0; i < entry_protos->size(); i++) {
504+
auto& string_entry_proto{entry_protos->at(i)};
505+
const auto& pb_key{*(string_entry_proto.mutable_key())};
506+
const auto& pb_value{*(string_entry_proto.mutable_value())};
507+
if (pb_key == "location") {
508+
location = pb_value;
509+
} else if (pb_key == "offset") {
510+
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset);
511+
if (res.ec != std::errc()) {
512+
std::ostringstream err_msg;
513+
err_msg << "External data in memory has invalid offset field: "
514+
<< src_init->name() << "], location: " << location
515+
<< ", offset: " << pb_value;
516+
ORT_THROW(err_msg.str());
517+
}
518+
} else if (pb_key == "length") {
519+
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length);
520+
if (res.ec != std::errc()) {
521+
std::ostringstream err_msg;
522+
err_msg << "External data in memory has invalid length field: "
523+
<< src_init->name() << "], location: " << location
524+
<< ", length: " << pb_value;
525+
ORT_THROW(err_msg.str());
526+
}
527+
}
528+
}
529+
}
530+
456531
std::unique_ptr<ONNX_NAMESPACE::ModelProto>
457532
BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
458533
const onnxruntime::GraphViewer& subgraph,
@@ -529,12 +604,98 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
529604
return model_proto;
530605
} else {
531606
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
607+
608+
// scan ext initializers:
609+
std::unordered_map<std::string, std::pair<size_t, size_t>> external_initializers_offset_and_length;
610+
std::string tempLocation;
611+
size_t extInitializerTotalSize = 0;
612+
if (session_context_.has_external_weights) {
613+
auto allInitializers = subgraph.GetAllInitializedTensors();
614+
for (auto& [name, tp] : allInitializers) {
615+
if (utils::HasExternalDataInMemory(*tp)) {
616+
size_t offset = 0;
617+
size_t length = 0;
618+
ReadExternalDataFields(tp, tempLocation, offset, length);
619+
extInitializerTotalSize += length;
620+
external_initializers_offset_and_length[name] = {offset, length};
621+
}
622+
}
623+
}
624+
625+
// when we have external weights in memory, the model proto will actually embed those
626+
// and bloat the serialized string. We can avoid that by not including the data in the proto
627+
// but then we have to update those initializers and set the external_data fields to mem_addr tag...
628+
// proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions.
629+
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025))
630+
constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32;
631+
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
632+
external_initializers_offset_and_length.size() > 1 &&
633+
extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE);
634+
#else
635+
const bool include_initializer_data_in_proto = true;
636+
#endif
637+
638+
532639
auto model = subgraph.CreateModel(logger);
533640
auto model_proto = model->ToProto();
534641
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
535-
subgraph.ToProto(*model_proto->mutable_graph(), true, true);
642+
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true,
643+
/*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto);
644+
536645
print_model_proto_duration();
646+
647+
if (!include_initializer_data_in_proto) {
648+
LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024) << " MB in " << external_initializers_offset_and_length.size() << " initializers";
649+
auto* graph_proto = model_proto->mutable_graph();
650+
auto* proto_initializers = graph_proto->mutable_initializer();
651+
652+
std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map;
653+
for (int i = 0, n = proto_initializers->size(); i < n; ++i) {
654+
auto& proto_init = proto_initializers->at(i);
655+
proto_initializer_map[proto_init.name()] = &proto_init;
656+
}
657+
658+
for (const auto& [name, src_init] : subgraph.GetAllInitializedTensors()) {
659+
auto it = proto_initializer_map.find(name);
660+
if (it == proto_initializer_map.end())
661+
continue;
662+
663+
auto* proto_init = it->second;
664+
665+
// If the proto initializer is missing data, fill it in
666+
if (!proto_init->has_raw_data() && src_init->has_raw_data()) {
667+
*proto_init->mutable_raw_data() = src_init->raw_data();
668+
}
669+
670+
// Only set in-memory external_data fields if the data is in memory
671+
if (src_init->has_raw_data()) {
672+
LOGS(logger, VERBOSE) << "In-memory initializer RAW: "
673+
<< src_init->name()
674+
<< ", data_type: " << src_init->data_type()
675+
<< ", raw_data size: " << src_init->raw_data().size();
676+
677+
SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size());
678+
} else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) {
679+
auto it_ext = external_initializers_offset_and_length.find(name);
680+
if (it_ext == external_initializers_offset_and_length.end()) {
681+
std::ostringstream err_msg;
682+
err_msg << "Initializer marked as external in memory but missing offset/length info: " << src_init->name();
683+
ORT_THROW(err_msg.str());
684+
}
685+
const size_t offset = it_ext->second.first;
686+
const size_t length = it_ext->second.second;
687+
688+
LOGS(logger, VERBOSE) << "In-memory initializer EXT: " << src_init->name() << ", size: " << length;
689+
690+
SetExternalDataFields(proto_init, (const void*)offset, length);
691+
} else {
692+
LOGS(logger, VERBOSE) << "File-based initializer: " << src_init->name() << ", data_type: " << src_init->data_type();
693+
}
694+
}
695+
}
696+
537697
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
698+
538699
return model_proto;
539700
}
540701
}

0 commit comments

Comments
 (0)