@@ -677,6 +677,27 @@ static void AddInitializerAsInput(onnxruntime::Graph& dst_graph,
677677 }
678678}
679679
680+ // To check if the input parameters of a DQ or Q node are quantization parameters
681+ // Scale and Zero point parameters are quantization parameters
682+ static bool IsQuantizationParameter (const std::string& initializer_name,
683+ const onnxruntime::GraphViewer& src_graph) {
684+ // Check if this initializer is used as scale or zero_point in any DQ/Q node
685+ for (auto & node_idx : src_graph.GetNodesInTopologicalOrder ()) {
686+ const auto * node = src_graph.GetNode (node_idx);
687+ if (node->OpType () == " DequantizeLinear" || node->OpType () == " QuantizeLinear" ) {
688+ const auto & input_defs = node->InputDefs ();
689+ // Check if this initializer is used as scale (input 1) or zero_point (input 2)
690+ if (input_defs.size () >= 2 && input_defs[1 ]->Name () == initializer_name) {
691+ return true ; // This is a scale parameter
692+ }
693+ if (input_defs.size () >= 3 && input_defs[2 ]->Name () == initializer_name) {
694+ return true ; // This is a zero_point parameter
695+ }
696+ }
697+ }
698+ return false ;
699+ }
700+
680701// Creates a new model without the DQ/Q operators in the src graph.
681702Status CreateModelWithStrippedQDQNodes (const GraphViewer& src_graph,
682703 const logging::Logger& logger,
@@ -845,19 +866,31 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
845866 if (!init_with_data &&
846867 utils::HasExternalData (initializer_tensor) &&
847868 enable_ovep_weight_sharing) {
848- insert_metadata (initializer_tensor);
849869
850- // Add initializer with external data as input
851- AddInitializerAsInput (dst_graph, accumulated_inputs, src_graph, name);
852- } else {
853- // Add as an initialized tensor if it does not have external data
854- if (initializers_to_keep. count (name) > 0 ) {
855- if (init_with_data) {
856- dst_graph. AddInitializedTensor (*init_with_data );
870+ // Only convert to input if it's not a quantization parameter
871+ bool is_quant_param = IsQuantizationParameter ( name, src_graph );
872+
873+ if (!is_quant_param) {
874+ // This is actual weight data - so to convert to input for weight sharing
875+ insert_metadata (initializer_tensor);
876+ AddInitializerAsInput (dst_graph, accumulated_inputs, src_graph, name );
857877 } else {
858- dst_graph.AddInitializedTensor (initializer_tensor);
878+ // This is a quantization parameter - keep as initializer even if external
879+
880+ if (initializers_to_keep.count (name) > 0 ) {
881+
882+ dst_graph.AddInitializedTensor (initializer_tensor);
883+ }
884+ }
885+ } else {
886+ // Add as an initialized tensor if it does not have external data
887+ if (initializers_to_keep.count (name) > 0 ) {
888+ if (init_with_data) {
889+ dst_graph.AddInitializedTensor (*init_with_data);
890+ } else {
891+ dst_graph.AddInitializedTensor (initializer_tensor);
892+ }
859893 }
860- }
861894 }
862895
863896 current_scope_initializer_set.insert (name);
0 commit comments