Skip to content

Commit c533007

Browse files
authored
Merge pull request #766 from intel/niharika/mild_weight_sharing
Mild weight as input implemented to keep quantization parameters as initializers for QDQ nodes
2 parents ed9e425 + 47a231a commit c533007

File tree

1 file changed

+43
-10
lines changed

1 file changed

+43
-10
lines changed

onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,27 @@ static void AddInitializerAsInput(onnxruntime::Graph& dst_graph,
677677
}
678678
}
679679

680+
// To check if the input parameters of a DQ or Q node are quantization parameters
681+
// Scale and Zero point parameters are quantization parameters
682+
static bool IsQuantizationParameter(const std::string& initializer_name,
683+
const onnxruntime::GraphViewer& src_graph) {
684+
// Check if this initializer is used as scale or zero_point in any DQ/Q node
685+
for (auto& node_idx : src_graph.GetNodesInTopologicalOrder()) {
686+
const auto* node = src_graph.GetNode(node_idx);
687+
if (node->OpType() == "DequantizeLinear" || node->OpType() == "QuantizeLinear") {
688+
const auto& input_defs = node->InputDefs();
689+
// Check if this initializer is used as scale (input 1) or zero_point (input 2)
690+
if (input_defs.size() >= 2 && input_defs[1]->Name() == initializer_name) {
691+
return true; // This is a scale parameter
692+
}
693+
if (input_defs.size() >= 3 && input_defs[2]->Name() == initializer_name) {
694+
return true; // This is a zero_point parameter
695+
}
696+
}
697+
}
698+
return false;
699+
}
700+
680701
// Creates a new model without the DQ/Q operators in the src graph.
681702
Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
682703
const logging::Logger& logger,
@@ -845,19 +866,31 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
845866
if (!init_with_data &&
846867
utils::HasExternalData(initializer_tensor) &&
847868
enable_ovep_weight_sharing) {
848-
insert_metadata(initializer_tensor);
849869

850-
// Add initializer with external data as input
851-
AddInitializerAsInput(dst_graph, accumulated_inputs, src_graph, name);
852-
} else {
853-
// Add as an initialized tensor if it does not have external data
854-
if (initializers_to_keep.count(name) > 0) {
855-
if (init_with_data) {
856-
dst_graph.AddInitializedTensor(*init_with_data);
870+
// Only convert to input if it's not a quantization parameter
871+
bool is_quant_param = IsQuantizationParameter(name, src_graph);
872+
873+
if (!is_quant_param) {
874+
// This is actual weight data - so to convert to input for weight sharing
875+
insert_metadata(initializer_tensor);
876+
AddInitializerAsInput(dst_graph, accumulated_inputs, src_graph, name);
857877
} else {
858-
dst_graph.AddInitializedTensor(initializer_tensor);
878+
// This is a quantization parameter - keep as initializer even if external
879+
880+
if (initializers_to_keep.count(name) > 0) {
881+
882+
dst_graph.AddInitializedTensor(initializer_tensor);
883+
}
884+
}
885+
} else {
886+
// Add as an initialized tensor if it does not have external data
887+
if (initializers_to_keep.count(name) > 0) {
888+
if (init_with_data) {
889+
dst_graph.AddInitializedTensor(*init_with_data);
890+
} else {
891+
dst_graph.AddInitializedTensor(initializer_tensor);
892+
}
859893
}
860-
}
861894
}
862895

863896
current_scope_initializer_set.insert(name);

0 commit comments

Comments
 (0)