Skip to content

Commit 307b34a

Browse files
authored
[NNAPI EP] Track skipped initializer usage (microsoft#21286)
Track skipped initializer usage in NNAPI EP to account for usage by other nodes.
1 parent 1ab162f commit 307b34a

File tree

5 files changed

+99
-16
lines changed

5 files changed

+99
-16
lines changed

onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,13 @@ DEFINE_ADD_OPERAND_FROM_SCALAR(float, FLOAT32);
5656
#undef DEFINE_ADD_OPERAND_FROM_SCALAR
5757

5858
void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) {
59-
skipped_initializers_.insert(tensor_name);
59+
// decrement usage count if this is a known initializer.
60+
// For simplicity the OpBuilder::AddInitializersToSkip implementations may call this for arbitrary input names
61+
// without first checking if the value is an initializer.
62+
auto entry = initializer_usage_.find(tensor_name);
63+
if (entry != initializer_usage_.end()) {
64+
entry->second -= 1;
65+
}
6066
}
6167

6268
Status ModelBuilder::Prepare() {
@@ -87,7 +93,16 @@ static size_t GetPaddedByteSize(size_t size) {
8793
}
8894

8995
void ModelBuilder::PreprocessInitializers() {
96+
const auto& initializers = GetInitializerTensors();
97+
9098
for (const auto& node_unit : node_unit_holder_) {
99+
// find all initializers consumed. AddInitializersToSkip will potentially decrement the usage count.
100+
for (const auto& input : node_unit->Inputs()) {
101+
if (input.node_arg.Exists() && Contains(initializers, input.node_arg.Name())) {
102+
initializer_usage_[input.node_arg.Name()]++;
103+
}
104+
}
105+
91106
if (const auto* op_builder = GetOpBuilder(*node_unit)) {
92107
op_builder->AddInitializersToSkip(*this, *node_unit);
93108
}
@@ -208,11 +223,16 @@ Status ModelBuilder::RegisterInitializers() {
208223
std::vector<std::tuple<uint32_t, size_t, size_t>> initializers(initializer_size);
209224
size_t sizeAll = 0;
210225

226+
const auto should_skip_initializer = [this](const std::string& name) -> bool {
227+
const auto it = initializer_usage_.find(name);
228+
return it == initializer_usage_.end() || it->second == 0;
229+
};
230+
211231
int i = 0;
212232
for (const auto& pair : initializer_tensors) {
213233
const auto& tensor = *pair.second;
214234
const auto& name = tensor.name();
215-
if (Contains(skipped_initializers_, name))
235+
if (should_skip_initializer(name))
216236
continue;
217237

218238
Shape shape;
@@ -249,7 +269,7 @@ Status ModelBuilder::RegisterInitializers() {
249269
size_t offset = 0;
250270
for (const auto& pair : initializer_tensors) {
251271
const auto& tensor = *pair.second;
252-
if (Contains(skipped_initializers_, tensor.name()))
272+
if (should_skip_initializer(tensor.name()))
253273
continue;
254274

255275
auto [index, size, padded_size] = initializers[i++];
@@ -439,10 +459,11 @@ Status ModelBuilder::AddOperandFromPersistMemoryBuffer(
439459
Status ModelBuilder::AddOperations() {
440460
const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
441461
for (const auto node_idx : node_indices) {
442-
LOGS_DEFAULT(VERBOSE) << "Adding node [" << node_idx << "]";
443462
const auto* node(graph_viewer_.GetNode(node_idx));
444463
const NodeUnit& node_unit = GetNodeUnit(node);
445464

465+
LOGS_DEFAULT(VERBOSE) << "Adding node [" << node_unit.Name() << "] at index [" << node_unit.Index() << "]";
466+
446467
// Since we may have NodeUnit with multiple nodes, insert NodeUnit with the first occurrence of
447468
// its node(s) in topological order may cause the incorrect topological order while inserting
448469
// NodeUNits, for example,

onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ class ModelBuilder {
134134
std::unordered_set<std::string> operands_;
135135
std::unordered_set<std::string> fused_activations_;
136136

137-
std::unordered_set<std::string> skipped_initializers_;
137+
std::unordered_map<std::string, int> initializer_usage_;
138138

139139
// All activation nodes (Relu, Relu1, Relu6) as a map <const NodeUnit*, activation_code>
140140
std::unordered_map<const NodeUnit*, int32_t> activation_node_units_;

onnxruntime/test/providers/nnapi/nnapi_basic_test.cc

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "test/common/tensor_op_test_utils.h"
1515
#include "test/framework/test_utils.h"
1616
#include "test/util/include/asserts.h"
17+
#include "test/util/include/current_test_name.h"
1718
#include "test/util/include/default_providers.h"
1819
#include "test/util/include/inference_session_wrapper.h"
1920
#include "test/util/include/test/test_environment.h"
@@ -36,10 +37,6 @@ using namespace ::onnxruntime::logging;
3637
namespace onnxruntime {
3738
namespace test {
3839

39-
#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
40-
41-
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
42-
4340
#if !defined(ORT_MINIMAL_BUILD)
4441

4542
// Since NNAPI EP handles Reshape and Flatten differently,
@@ -65,7 +62,8 @@ TEST(NnapiExecutionProviderTest, ReshapeFlattenTest) {
6562
feeds.insert(std::make_pair("X", ml_value_x));
6663
feeds.insert(std::make_pair("Y", ml_value_y));
6764

68-
RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.ReshapeFlattenTest",
65+
RunAndVerifyOutputsWithEP(model_file_name,
66+
CurrentTestName(),
6967
std::make_unique<NnapiExecutionProvider>(0),
7068
feeds);
7169
#else
@@ -88,7 +86,8 @@ TEST(NnapiExecutionProviderTest, SigmoidSupportedInputRankTest) {
8886
NameMLValMap feeds;
8987
feeds.insert(std::make_pair("X", ml_value_x));
9088

91-
RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.SigmoidSupportedInputRankTest",
89+
RunAndVerifyOutputsWithEP(model_file_name,
90+
CurrentTestName(),
9291
std::make_unique<NnapiExecutionProvider>(0),
9392
feeds, {ExpectedEPNodeAssignment::None} /* params */);
9493
#else
@@ -115,7 +114,8 @@ TEST(NnapiExecutionProviderTest, DynamicGraphInputTest) {
115114
NameMLValMap feeds;
116115
feeds.insert(std::make_pair("X", ml_value_x));
117116

118-
RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.DynamicGraphInputTest",
117+
RunAndVerifyOutputsWithEP(model_file_name,
118+
CurrentTestName(),
119119
std::make_unique<NnapiExecutionProvider>(0),
120120
feeds);
121121
#else
@@ -144,7 +144,8 @@ TEST(NnapiExecutionProviderTest, InternalUint8SupportTest) {
144144
NameMLValMap feeds;
145145
feeds.insert(std::make_pair("X", ml_value_x));
146146

147-
RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.InternalUint8SupportTest",
147+
RunAndVerifyOutputsWithEP(model_file_name,
148+
CurrentTestName(),
148149
std::make_unique<NnapiExecutionProvider>(0),
149150
feeds);
150151
#else
@@ -208,7 +209,8 @@ TEST(NnapiExecutionProviderTest, FunctionTest) {
208209
feeds.insert(std::make_pair("Y", ml_value_y));
209210
feeds.insert(std::make_pair("Z", ml_value_z));
210211

211-
RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.FunctionTest",
212+
RunAndVerifyOutputsWithEP(model_file_name,
213+
CurrentTestName(),
212214
std::make_unique<NnapiExecutionProvider>(0),
213215
feeds);
214216
#else
@@ -273,7 +275,8 @@ static void RunQDQModelTest(
273275
const auto model_data_span = AsByteSpan(model_data.data(), model_data.size());
274276

275277
#if defined(__ANDROID__)
276-
RunAndVerifyOutputsWithEP(model_data_span, "NnapiExecutionProviderTest.TestQDQModel",
278+
RunAndVerifyOutputsWithEP(model_data_span,
279+
CurrentTestName(),
277280
std::make_unique<NnapiExecutionProvider>(0),
278281
helper.feeds_, params);
279282
#else
@@ -513,6 +516,31 @@ TEST(NnapiExecutionProviderTest, TestGather) {
513516
{ExpectedEPNodeAssignment::All});
514517
}
515518

519+
TEST(NnapiExecutionProviderTest, SharedInitializersDoNotGetSkipped) {
520+
// NNAPI EP's Clip op builder will mark the max initializer as skipped but it is also used by the Div op.
521+
// Test that the shared initializer is still present in the NNAPI model for the Div op.
522+
constexpr auto* model_file_name = ORT_TSTR("testdata/clip_div_shared_initializer.onnx");
523+
524+
#if defined(__ANDROID__)
525+
AllocatorPtr cpu_allocator = std::make_shared<CPUAllocator>();
526+
527+
std::vector<int64_t> x_dims{3, 2};
528+
std::vector<float> x_values(3.0f, 3 * 2);
529+
OrtValue ml_value_x;
530+
CreateMLValue<float>(cpu_allocator, x_dims, x_values, &ml_value_x);
531+
532+
NameMLValMap feeds{{"input_0", ml_value_x}};
533+
534+
RunAndVerifyOutputsWithEP(model_file_name,
535+
CurrentTestName(),
536+
std::make_unique<NnapiExecutionProvider>(0),
537+
feeds,
538+
{ExpectedEPNodeAssignment::All});
539+
#else
540+
TestModelLoad(model_file_name, std::make_unique<NnapiExecutionProvider>(0), ExpectedEPNodeAssignment::All);
541+
#endif
542+
}
543+
516544
#endif // !(ORT_MINIMAL_BUILD)
517545

518546
TEST(NnapiExecutionProviderTest, NNAPIFlagsTest) {
@@ -541,7 +569,8 @@ TEST(NnapiExecutionProviderTest, TestOrtFormatModel) {
541569
NameMLValMap feeds;
542570
feeds.insert(std::make_pair("Input3", ml_value));
543571

544-
RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.TestOrtFormatModel",
572+
RunAndVerifyOutputsWithEP(model_file_name,
573+
CurrentTestName(),
545574
std::make_unique<NnapiExecutionProvider>(0),
546575
feeds);
547576
#else
242 Bytes
Binary file not shown.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from onnx import TensorProto, checker, helper, save
2+
3+
graph_proto = helper.make_graph(
4+
[
5+
helper.make_node(
6+
"Clip",
7+
inputs=["input_0", "initializer_0", "initializer_1"],
8+
outputs=["clip_output"],
9+
name="clip",
10+
),
11+
helper.make_node(
12+
"Div",
13+
inputs=["clip_output", "initializer_1"],
14+
outputs=["output_0"],
15+
name="div",
16+
),
17+
],
18+
"Main_graph",
19+
[
20+
helper.make_tensor_value_info("input_0", TensorProto.FLOAT, [3, 2]),
21+
],
22+
[
23+
helper.make_tensor_value_info("output_0", TensorProto.FLOAT, [3, 2]),
24+
],
25+
[
26+
helper.make_tensor("initializer_0", TensorProto.FLOAT, [], [0.0]),
27+
helper.make_tensor("initializer_1", TensorProto.FLOAT, [], [6.0]),
28+
],
29+
)
30+
31+
model = helper.make_model(graph_proto)
32+
checker.check_model(model, True)
33+
save(model, "clip_div_shared_initializer.onnx")

0 commit comments

Comments
 (0)