Skip to content

Commit c0c1ed7

Browse files
authored
Merge pull request #772 from intel/sync_msft_08082025
Sync with Microsoft ONNX Runtime - 08/08/2025
2 parents 055300f + 7f7091e commit c0c1ed7

File tree

79 files changed

+671
-708
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+671
-708
lines changed

include/onnxruntime/core/graph/graph.h

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,7 +1220,10 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
12201220
#endif
12211221

12221222
#if !defined(ORT_MINIMAL_BUILD)
1223-
/** Gets the GraphProto representation of this Graph only. */
1223+
/** Gets the GraphProto representation of this Graph only.
1224+
* This does not remove in-memory tags for graph initializers.
1225+
* Use ToGraphProto() const to get a GraphProto that can be serialized externally.
1226+
*/
12241227
const ONNX_NAMESPACE::GraphProto& ToGraphProto();
12251228

12261229
/// <summary>
@@ -1439,6 +1442,27 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
14391442
return Resolve(default_options);
14401443
}
14411444

1445+
/// <summary>
1446+
/// This function converts all the graph TensorProto initializers into OrtValues
1447+
/// and creates a in-memory external data reference for each OrtValue.
1448+
/// </summary>
1449+
/// <returns></returns>
1450+
Status ConvertInitializersIntoOrtValues();
1451+
1452+
/**
1453+
* @brief Converts a subset of graph TensorProto initializers into OrtValues and updates the graph proto.
1454+
*
1455+
* This function converts specified TensorProto initializers in the graph into OrtValues and
1456+
* creates in-memory external data references for each OrtValue. It then updates the provided
1457+
* GraphProto with the modified initializers.
1458+
*
1459+
* @param iterators Span of iterators pointing to the initializers and the order that should be processed
1460+
* @param output_graph_proto The GraphProto to be updated with the modified initializers
1461+
* @return Status Returns a Status object indicating success or any errors that occurred during conversion
1462+
*/
1463+
Status RegenerateInitializersAndReplaceInMemory(gsl::span<const InitializedTensorSet::const_iterator> iterators,
1464+
ONNX_NAMESPACE::GraphProto& output_graph_proto) const;
1465+
14421466
const std::unordered_set<std::string>& GetOuterScopeNodeArgNames() const noexcept {
14431467
return outer_scope_node_arg_names_;
14441468
}
@@ -1595,20 +1619,25 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
15951619
/// This function is used by ToGraphProto() to ensure in-memory external data references
15961620
/// don't leak externally since they are non-standard.
15971621
///
1598-
/// It handles two scenarios:
1599-
/// - When GraphSynchronizationNeeded() is false: GraphProto is simply copied
1622+
/// It is used when GraphSynchronizationNeeded() is false: GraphProto is simply copied
16001623
/// from graph_proto_ by ToGraphProto(). This copy includes both main graph
16011624
/// and subgraph initializers. This function examines all initializers
16021625
/// and inlines any in-memory data references.
1603-
/// - When GraphSynchronizationNeeded() is true: ToGraphProto() generates a new GraphProto
1604-
/// using ToGraphProtoInternal(). This doesn't transfer main graph initializers, which are
1605-
/// copied and inlined by ToGraphProto() itself. This function processes only the subgraph initializers
1606-
/// as needed.
16071626
/// </summary>
16081627
/// <param name="output_graph_proto">The GraphProto to process</param>
1609-
/// <param name="process_main">Whether to process the main graph initializers</param>
1610-
/// <returns>Status indicating success or failure</returns> ///
1611-
Status ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto, bool process_main) const;
1628+
/// <returns>Status indicating success or failure</returns>
1629+
Status ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto) const;
1630+
1631+
/// <summary>
1632+
/// This function replaces all of the initializers within output_graph_proto
1633+
/// from this Graph instance. All in memory initializers are regenerated and inlined.
1634+
/// This is necessary even if the graph_proto_ is already up to date because initializers() may
1635+
/// contain obsolete initializers that are no longer in use due to optimizations and contain obsolete
1636+
/// references to OrtValues that may no longer be around (since we like appending rather than replacing).
1637+
/// </summary>
1638+
/// <param name="output_graph_proto">Destination GraphProto to receive the updated initializers.</param>
1639+
/// <returns>Status indicating success or failure.</returns>
1640+
Status RegenerateInitializersAndReplaceInMemory(ONNX_NAMESPACE::GraphProto& output_graph_proto) const;
16121641

16131642
/// <summary>
16141643
/// This function traverses the graph bottom up and externalizes

onnxruntime/core/framework/debug_node_inputs_outputs_utils.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,13 @@ void DumpNodeOutputs(
667667
const bool is_shape_set = (dump_options.dump_flags & NodeDumpOptions::DumpFlags::Shape) != 0;
668668
PrintIf(is_shape_set, MakeString(" Shape: ", shape, "\n"));
669669

670+
// For MemcpyToHost, the memory copy has not been syncronized so the data is not ready to read yet.
671+
// Here we skip it since it is just a copy of input tensor (or output of previous node) which has been dumped.
672+
if (node.OpType() == "MemcpyToHost") {
673+
std::cout << " is same as input.\n";
674+
continue;
675+
}
676+
670677
if ((dump_options.dump_flags & NodeDumpOptions::DumpFlags::OutputData) != 0 || check_half_overflow) {
671678
tensor_metadata.name = output_defs[i]->Name();
672679
tensor_metadata.step = dump_context.iteration;

onnxruntime/core/graph/graph.cc

Lines changed: 78 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -666,12 +666,16 @@ void Node::ToProto(NodeProto& proto, bool update_subgraphs) const {
666666

667667
// Set attributes.
668668
proto.clear_attribute();
669-
for (const auto& attribute : attributes_) {
669+
for (const auto& [name, attribute] : attributes_) {
670670
const gsl::not_null<AttributeProto*> attr{proto.add_attribute()};
671-
*attr = attribute.second; // copy
672-
if (update_subgraphs && attr->has_g()) {
671+
*attr = attribute; // copy
672+
if (update_subgraphs && utils::HasGraph(*attr)) {
673+
auto find_hit = attr_to_subgraph_map_.find(name);
674+
// Force ToGraphProto() const to be called so
675+
// that any in-memory TensorProto initializers go back to being inlined
676+
const Graph& subgraph = *find_hit->second;
673677
attr->clear_g();
674-
*attr->mutable_g() = attr_to_subgraph_map_.find(attribute.first)->second->ToGraphProto();
678+
*attr->mutable_g() = subgraph.ToGraphProto();
675679
}
676680
}
677681

@@ -3381,7 +3385,12 @@ Status Graph::Resolve(const ResolveOptions& options) {
33813385

33823386
return Status::OK(); };
33833387

3384-
ORT_RETURN_IF_ERROR(ForThisAndAllSubgraphs(all_subgraphs, finalize_func));
3388+
return ForThisAndAllSubgraphs(all_subgraphs, finalize_func);
3389+
}
3390+
3391+
Status Graph::ConvertInitializersIntoOrtValues() {
3392+
std::vector<Graph*> all_subgraphs;
3393+
FindAllSubgraphs(all_subgraphs);
33853394

33863395
auto put_weights_maybe_in_memory_func = [&](Graph& graph) -> Status {
33873396
// if we have any initializers that are not in memory, put them there.
@@ -4308,11 +4317,47 @@ Status InlineOrCopyInitializer(const Graph& src_graph, const ONNX_NAMESPACE::Ten
43084317
}
43094318
return Status::OK();
43104319
}
4311-
43124320
} // namespace
43134321

4314-
Status Graph::ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto,
4315-
bool process_main) const {
4322+
Status Graph::RegenerateInitializersAndReplaceInMemory(gsl::span<const InitializedTensorSet::const_iterator> iterators,
4323+
ONNX_NAMESPACE::GraphProto& output_graph_proto) const {
4324+
auto& mutable_initializers = *output_graph_proto.mutable_initializer();
4325+
4326+
#if !defined(DISABLE_SPARSE_TENSORS)
4327+
output_graph_proto.clear_sparse_initializer();
4328+
4329+
const auto& model_path = ModelPath();
4330+
const bool has_sparse_initializers = !sparse_tensor_names_.empty();
4331+
const auto sparse_end = sparse_tensor_names_.end();
4332+
4333+
for (const auto& iter : iterators) {
4334+
const auto& [name, tensor_proto] = *iter;
4335+
const auto& initializer = *tensor_proto;
4336+
if (!has_sparse_initializers || sparse_end == sparse_tensor_names_.find(name)) {
4337+
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, initializer,
4338+
*mutable_initializers.Add()));
4339+
} else {
4340+
auto& sparse_initializer = *output_graph_proto.add_sparse_initializer();
4341+
if (utils::HasExternalDataInMemory(initializer)) {
4342+
ONNX_NAMESPACE::TensorProto tensor_proto_inlined;
4343+
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, initializer,
4344+
tensor_proto_inlined));
4345+
ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(tensor_proto_inlined, model_path, sparse_initializer));
4346+
} else {
4347+
ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer));
4348+
}
4349+
}
4350+
}
4351+
#else
4352+
for (const auto& iter : iterators) {
4353+
const auto& [name, tensor_proto] = *iter;
4354+
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, *tensor_proto, *mutable_initializers.Add()));
4355+
}
4356+
#endif
4357+
return Status::OK();
4358+
}
4359+
4360+
Status Graph::ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto) const {
43164361
for (const auto& node : Nodes()) {
43174362
if (node.ContainsSubgraph()) {
43184363
// Let's find this node in the output_graph_proto
@@ -4343,103 +4388,48 @@ Status Graph::ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_gr
43434388
"Subgraph ", name, " is referred to in GetAttributeNameToSubgraphMap, but not found in node ",
43444389
node.Name(), " while attempting to recurse into it.");
43454390
auto& result_subgraph = *sub_hit->mutable_g();
4346-
ORT_RETURN_IF_ERROR(subgraph->ProcessSubgraphsInMemoryData(result_subgraph, process_main));
4391+
ORT_RETURN_IF_ERROR(subgraph->ProcessSubgraphsInMemoryData(result_subgraph));
43474392
}
43484393
}
43494394
}
43504395

4351-
// When graph_proto is copied from graph_proto, initializers already present in the main graph
4352-
if (parent_graph_ != nullptr || process_main) {
4353-
#if !defined(DISABLE_SPARSE_TENSORS)
4354-
auto* mutable_initializers = output_graph_proto.mutable_initializer();
4355-
const auto& model_path = ModelPath();
4356-
const bool has_sparse_initializers = !sparse_tensor_names_.empty();
4357-
const auto sparse_end = sparse_tensor_names_.end();
4358-
4359-
// We want to make sure that sparse initializers do not appear
4360-
// as dense duplicates within the initializers list.
4361-
std::optional<InlinedHashSet<std::string>> initializer_to_remove;
4362-
if (has_sparse_initializers) {
4363-
// We need to remove the dense initializers that are sparse tensors
4364-
initializer_to_remove.emplace();
4365-
}
4366-
4367-
for (auto first = mutable_initializers->begin(), end = mutable_initializers->end(); first != end; ++first) {
4368-
auto& initializer = *first;
4369-
if (utils::HasExternalDataInMemory(initializer)) {
4370-
// If the initializer has external data in memory, we need to inline it.
4371-
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, initializer, initializer));
4372-
}
4373-
if (has_sparse_initializers && sparse_end != sparse_tensor_names_.find(initializer.name())) {
4374-
auto& sparse_initializer = *output_graph_proto.add_sparse_initializer();
4375-
ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer));
4376-
initializer_to_remove->insert(initializer.name());
4377-
}
4378-
}
4379-
4380-
// erase/remove dense initializers that are sparse tensors so no duplicates are present
4381-
if (initializer_to_remove && !initializer_to_remove->empty()) {
4382-
mutable_initializers->erase(std::remove_if(
4383-
mutable_initializers->begin(), mutable_initializers->end(),
4384-
[&initializer_to_remove](const ONNX_NAMESPACE::TensorProto& initializer) {
4385-
return initializer_to_remove->count(initializer.name()) > 0;
4386-
}),
4387-
mutable_initializers->end());
4388-
}
4389-
#else
4390-
for (auto& initializer : *output_graph_proto.mutable_initializer()) {
4391-
if (utils::HasExternalDataInMemory(initializer)) {
4392-
// If the initializer has external data in memory, we need to inline it.
4393-
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, initializer, initializer));
4394-
}
4396+
// Filter in iterators for weights that are present in the name_to_initial_tensor_ map
4397+
// and preserve the order. This is needed for tests.
4398+
InlinedVector<InitializedTensorSet::const_iterator> initializers_to_process;
4399+
initializers_to_process.reserve(name_to_initial_tensor_.size());
4400+
for (const auto& tensor_proto : output_graph_proto.initializer()) {
4401+
auto hit = name_to_initial_tensor_.find(tensor_proto.name());
4402+
if (hit != name_to_initial_tensor_.end()) {
4403+
initializers_to_process.push_back(hit);
43954404
}
4396-
#endif
43974405
}
4398-
return Status::OK();
4406+
4407+
output_graph_proto.clear_initializer();
4408+
return RegenerateInitializersAndReplaceInMemory(initializers_to_process, output_graph_proto);
43994409
}
44004410

44014411
ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
44024412
GraphProto result;
44034413
if (!GraphProtoSyncNeeded()) {
44044414
result = *graph_proto_;
4405-
ORT_THROW_IF_ERROR(ProcessSubgraphsInMemoryData(result, /*process_main*/ true));
4415+
ORT_THROW_IF_ERROR(ProcessSubgraphsInMemoryData(result));
44064416
} else {
4417+
// Recursion is handled via Node::ToProto() const -> Graph::ToGraphProto() const (this method)
4418+
// so below we handle this graph only.
44074419
ToGraphProtoInternal(result);
44084420

4409-
ORT_THROW_IF_ERROR(ProcessSubgraphsInMemoryData(result, /*process_main*/ false));
4410-
4411-
// Add initializers to parent graph by copy converting them from graph_proto_
4412-
// ToGraphProtoInternal() does not copy initializers for the main graph
4413-
auto* mutable_initializers = result.mutable_initializer();
4414-
4415-
#if !defined(DISABLE_SPARSE_TENSORS)
4416-
const auto& model_path = ModelPath();
4417-
const bool has_sparse_initializers = !sparse_tensor_names_.empty();
4418-
const auto sparse_end = sparse_tensor_names_.end();
4419-
4420-
for (const auto& initializer : graph_proto_->initializer()) {
4421-
if (!has_sparse_initializers || sparse_end == sparse_tensor_names_.find(initializer.name())) {
4422-
ORT_THROW_IF_ERROR(InlineOrCopyInitializer(*this, initializer,
4423-
*mutable_initializers->Add()));
4424-
} else {
4425-
auto& sparse_initializer = *result.add_sparse_initializer();
4426-
if (utils::HasExternalDataInMemory(initializer)) {
4427-
ONNX_NAMESPACE::TensorProto tensor_proto;
4428-
ORT_THROW_IF_ERROR(InlineOrCopyInitializer(*this, initializer,
4429-
tensor_proto));
4430-
ORT_THROW_IF_ERROR(utils::DenseTensorToSparseTensorProto(tensor_proto, model_path, sparse_initializer));
4431-
} else {
4432-
ORT_THROW_IF_ERROR(utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer));
4433-
}
4421+
InlinedVector<InitializedTensorSet::const_iterator> initializers_to_process;
4422+
initializers_to_process.reserve(name_to_initial_tensor_.size());
4423+
for (const auto& tensor_proto : graph_proto_->initializer()) {
4424+
auto hit = name_to_initial_tensor_.find(tensor_proto.name());
4425+
if (hit != name_to_initial_tensor_.end()) {
4426+
initializers_to_process.push_back(hit);
44344427
}
44354428
}
4436-
#else
4437-
for (const auto& initializer : graph_proto_->initializer()) {
4438-
ORT_THROW_IF_ERROR(InlineOrCopyInitializer(*this, initializer, *mutable_initializers->Add()));
4439-
}
4440-
#endif
4441-
}
44424429

4430+
ORT_THROW_IF_ERROR(RegenerateInitializersAndReplaceInMemory(initializers_to_process,
4431+
result));
4432+
}
44434433
return result;
44444434
}
44454435

@@ -5235,23 +5225,7 @@ Status Graph::AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& nod
52355225
tensor_proto.set_name(std::string(new_name.value()));
52365226
}
52375227

5238-
// In the constant node, we won't have symbolic dims.
5239-
const auto tensor_shape = utils::GetTensorShapeFromTensorProto(tensor_proto);
5240-
auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
5241-
const size_t size_in_bytes = Tensor::CalculateTensorStorageSize(ml_data, tensor_shape);
5242-
5243-
if (size_in_bytes > utils::kSmallTensorExternalDataThreshold) {
5244-
OrtValue ort_value;
5245-
ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(Env::Default(), ModelPath(), tensor_proto,
5246-
CPUAllocator::DefaultInstance(), ort_value));
5247-
5248-
constexpr const bool use_tensor_buffer_true = true;
5249-
auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get<Tensor>(), tensor_proto.name(),
5250-
use_tensor_buffer_true);
5251-
ORT_RETURN_IF_ERROR(AddInitializedOrtValue(tensor_proto_to_add, ort_value));
5252-
} else {
5253-
AddInitializedTensor(tensor_proto);
5254-
}
5228+
AddInitializedTensor(tensor_proto);
52555229

52565230
if (GetNodeArg(tensor_proto.name()) == nullptr) {
52575231
TypeProto t{utils::TypeProtoFromTensorProto(tensor_proto)};

onnxruntime/core/optimizer/attention_fusion.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ static NodeArg& MergeQkvWeights(Graph& graph, int64_t hidden_size,
111111
utils::SetRawDataInTensorProto(initializer, result.data(), gsl::narrow<size_t>(element_count) * sizeof(MLFloat16));
112112
}
113113

114-
return graph_utils::AddInitializerWithExternalData(graph, initializer);
114+
return graph_utils::AddInitializer(graph, initializer);
115115
}
116116

117117
static NodeArg* ConvertMaskToInt32(Graph& graph, NodeArg* mask_input, ProviderType provider_type,

onnxruntime/core/optimizer/compute_optimizer/shared_utils.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ NodeArg* CreateInitializerFromVector(Graph& graph,
189189
"total_count: ", total_count, " values.size(): ", values.size());
190190

191191
utils::SetRawDataInTensorProto(const_tensor, values.data(), values.size() * sizeof(int64_t));
192-
return &graph_utils::AddInitializerWithExternalData(graph, const_tensor);
192+
return &graph_utils::AddInitializer(graph, const_tensor);
193193
}
194194

195195
NodeArg* InsertNodesForValidIndices(Graph& graph,

onnxruntime/core/optimizer/constant_folding.cc

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ static bool ConstantFoldShapeNode(Graph& graph, Node& node) {
9595
ONNX_NAMESPACE::TensorShapeProto result_shape;
9696
result_shape.add_dim()->set_dim_value(clamped_slice_length);
9797
constant_arg_out->SetShape(result_shape);
98-
graph_utils::AddInitializerWithExternalData(graph, shape_constant);
98+
graph_utils::AddInitializer(graph, shape_constant);
9999
}
100100

101101
return is_concrete_shape; // convert to constant if this is true
@@ -317,24 +317,19 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level,
317317
// Build the TensorProto that corresponds to the computed OrtValue and add it as initializer to the graph.
318318
auto* constant_arg_out = node->MutableOutputDefs()[fetch_idx];
319319
const Tensor& out_tensor = ort_value.Get<Tensor>();
320-
constexpr const bool use_tensor_buffer_true = true;
320+
constexpr const bool use_tensor_buffer_false = false;
321321
ONNX_NAMESPACE::TensorProto out_tensorproto = utils::TensorToTensorProto(
322322
out_tensor,
323323
constant_arg_out->Name(),
324-
use_tensor_buffer_true);
324+
use_tensor_buffer_false);
325325

326326
ONNX_NAMESPACE::TensorShapeProto result_shape;
327327
for (auto& dim : out_tensor.Shape().GetDims()) {
328328
result_shape.add_dim()->set_dim_value(dim);
329329
}
330330

331331
constant_arg_out->SetShape(result_shape);
332-
// The data is too small and has been inlined.
333-
if (!utils::HasExternalData(out_tensorproto)) {
334-
ORT_THROW_IF_ERROR(graph.AddInitializedOrtValue(out_tensorproto, OrtValue()));
335-
} else {
336-
ORT_THROW_IF_ERROR(graph.AddInitializedOrtValue(out_tensorproto, ort_value));
337-
}
332+
graph.AddInitializedTensor(out_tensorproto);
338333
}
339334
}
340335
}

0 commit comments

Comments
 (0)