Skip to content

Commit e2ec2b3

Browse files
authored
Add QDQ scale propagation pass (#713)
* Add pass to perform QDQ stripping and propagate scales * Fix disconnected outptu node * Fixes to support session.disable_quant_qdq output, remove dangling nodes and duplicate DQ nodes * Fix lack of scales updates and remove stray QDQ nodes in certain models * Address issues with Linux CI * Fix for double QDQ issue
1 parent 80daa9b commit e2ec2b3

File tree

7 files changed

+1014
-3
lines changed

7 files changed

+1014
-3
lines changed

cmake/onnxruntime_providers_openvino.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
endif()
5050
add_dependencies(onnxruntime_providers_openvino onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
5151
target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${OpenVINO_INCLUDE_DIR} ${OPENVINO_INCLUDE_DIR_LIST} ${PYTHON_INCLUDE_DIRS} $ENV{OPENCL_INCS} $ENV{OPENCL_INCS}/../../cl_headers/)
52-
target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS} Eigen3::Eigen)
52+
target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS} Eigen3::Eigen onnx_proto)
5353

5454
target_compile_definitions(onnxruntime_providers_openvino PRIVATE FILE_NAME=\"onnxruntime_providers_openvino.dll\")
5555

onnxruntime/core/optimizer/double_qdq_pairs_remover.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ static void ApplyNewInputValue(Graph& graph, Node& node, QDQ::InputIndex index,
5252
input_init.ToProto(new_input_tensor);
5353
auto new_name = graph.GenerateNodeArgName("DoubleQDQRemoved_" + node.InputDefs()[index]->Name());
5454
new_input_tensor.set_name(new_name);
55+
new_input_tensor.add_dims(1);
5556
NodeArg& new_input = graph_utils::AddInitializerWithExternalData(graph, new_input_tensor);
5657
graph_utils::ReplaceNodeInput(node, index, new_input);
5758
}

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "core/providers/openvino/ov_interface.h"
2121
#include "core/providers/openvino/ov_versions/capability.h"
2222
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
23+
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
2324

2425
namespace onnxruntime {
2526
namespace openvino_ep {
@@ -429,8 +430,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
429430

430431
const auto& onnx_model_path_name = subgraph.ModelPath();
431432
// QDQ stripping enabled only for the NPU and experimentally on the GPU
432-
if ((session_context_.device_type.find("NPU") != std::string::npos ||
433-
session_context_.device_type.find("GPU") != std::string::npos) &&
433+
if ((session_context_.device_type.find("NPU") != std::string::npos) &&
434434
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
435435
std::unique_ptr<onnxruntime::Model> model;
436436
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_.shared_weights);
@@ -440,6 +440,17 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
440440
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
441441
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
442442
return model_proto;
443+
} else if ((session_context_.device_type.find("GPU") != std::string::npos) &&
444+
enable_ovep_qdq_optimizer) {
445+
// Create a copy of the model
446+
std::unique_ptr<onnxruntime::Model> model;
447+
Status status = qdq_scales_fix::Transform(subgraph, logger, model);
448+
auto model_proto = model->ToProto();
449+
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
450+
print_model_proto_duration();
451+
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
452+
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
453+
return model_proto;
443454
} else {
444455
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
445456
auto model = subgraph.CreateModel(logger);
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright (C) Intel Corporation
2+
// Licensed under the MIT License
3+
4+
#include "ov_protobuf_utils.h"
5+
6+
#include "core/graph/onnx_protobuf.h"
7+
#include "core/common/common.h"
8+
9+
namespace onnxruntime {
10+
namespace openvino_ep {
11+
float get_float_initializer_data(const void* initializer) {
12+
const auto* tp = reinterpret_cast<const ONNX_NAMESPACE::TensorProto*>(initializer);
13+
ORT_ENFORCE((tp->has_data_type() && (tp->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT)));
14+
// ORT_ENFORCE(initializer.dims_size() == 1);
15+
return tp->float_data(0);
16+
}
17+
void set_float_initializer_data(const void* initializer, float data) {
18+
auto* tp = (ONNX_NAMESPACE::TensorProto*)(initializer);
19+
ORT_ENFORCE((tp->has_data_type() && (tp->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT)));
20+
// ORT_ENFORCE(initializer.dims_size() == 1);
21+
tp->set_float_data(0, data);
22+
}
23+
} // namespace openvino_ep
24+
} // namespace onnxruntime
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Copyright (C) Intel Corporation
2+
// Licensed under the MIT License
3+
4+
#pragma once
5+
namespace onnxruntime {
6+
namespace openvino_ep {
7+
float get_float_initializer_data(const void* initializer);
8+
void set_float_initializer_data(const void* initializer, float data);
9+
}
10+
} // namespace onnxruntime

0 commit comments

Comments
 (0)