Incoperated all subfunction changes

abhishek-singh591 · abhishek-singh591 · commit 79414da8b4dc · 2025-10-29T12:55:43.000Z
Signed-off-by: Abhishek Kumar Singh &lt;sabhis@qti.qualcomm.com&gt;
diff --git a/QEfficient/__init__.py b/QEfficient/__init__.py
@@ -11,6 +11,7 @@
 import QEfficient.utils.model_registery  # noqa: F401
 from QEfficient.utils import custom_format_warning
 from QEfficient.utils.logging_utils import logger
+from QEfficient.utils.patches import apply_torch_patches, is_patched
 
 # For faster downloads via hf_transfer
 # This code is put above import statements as this needs to be executed before
@@ -22,6 +23,9 @@
 # custom warning for the better logging experience
 warnings.formatwarning = custom_format_warning
 
+# Apply patches
+# TODO: Find a better way to do this, this is temp. fix.
+apply_torch_patches()
 
 def check_qaic_sdk():
     """Check if QAIC SDK is installed"""
@@ -70,6 +74,8 @@ def check_qaic_sdk():
         "QEFFAutoModelForImageTextToText",
         "QEFFAutoModelForSpeechSeq2Seq",
         "QEFFCommonLoader",
+        "apply_torch_patches",
+        "is_patched",
     ]
 
 else:
diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
@@ -18,10 +18,13 @@
 import onnx
 import torch
 
-from QEfficient.base.onnx_transforms import OnnxTransform
+from QEfficient.base.onnx_transforms import CustomOpTransform, OnnxTransform, rename_function_outputs
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
+from QEfficient.customop.ctx_scatter_gather import CtxGather, CtxGatherFunc, CtxScatter, CtxScatterFunc
+from QEfficient.customop.rms_norm import CustomRMSNorm, CustomRMSNormFunc
 from QEfficient.generation.cloud_infer import QAICInferenceSession
+from QEfficient.transformers.models.pytorch_transforms import get_decoder_layer_classes_for_export
 from QEfficient.utils import (
     constants,
     create_json,
@@ -243,22 +246,31 @@ def _export(
                     input_names.append(param)
 
         try:
+            CustomOpTransform.register_custom_op("CustomRMSNormFunc", CustomRMSNormFunc, CustomRMSNorm)
+            CustomOpTransform.register_custom_op("CtxScatterFunc", CtxScatterFunc, CtxScatter)
+            CustomOpTransform.register_custom_op("CtxGatherFunc", CtxGatherFunc, CtxGather)
+            decoder_layer_classes = get_decoder_layer_classes_for_export(self.model)
             export_kwargs = {} if export_kwargs is None else export_kwargs
+            
             torch.onnx.export(
                 self.model,
                 (example_inputs,),
                 str(tmp_onnx_path),
                 input_names=input_names,
                 output_names=output_names,
                 dynamic_axes=dynamic_axes,
-                opset_version=constants.ONNX_EXPORT_OPSET,
+                opset_version=17,
+                export_modules_as_functions=decoder_layer_classes,
+                do_constant_folding=True,
+                verbose=True,
                 **export_kwargs,
             )
             logger.info("PyTorch export successful")
 
             _ = self._offload_model_weights(offload_pt_weights)
-
             model = onnx.load(tmp_onnx_path, load_external_data=False)
+            model,transformed = rename_function_outputs(model)
+            
             transform_kwargs = {
                 "onnx_base_dir": str(tmp_onnx_dir),
                 "model_name": self.model_name,
diff --git a/QEfficient/base/onnx_transforms.py b/QEfficient/base/onnx_transforms.py
@@ -99,3 +99,29 @@ def apply(
                     current_file_size = tsize
                 external_data_helper.set_external_data(tensor, f"{model_name}_{file_num}.onnx.data")
         return model, transformed
+
+def rename_function_outputs(model):
+    graph = model.graph
+    op_type_to_func_map = {func.name:func for func in model.functions}
+    decoder_layer_patterns = ["DecoderLayer", "Block", "Layer"]
+    transformed = False
+    model_graph_outputs = [val.name for val in model.graph.output]
+    node_count = 0
+    for node in graph.node:
+        if any(pattern in node.name or pattern in node.op_type for pattern in decoder_layer_patterns):
+            func = op_type_to_func_map[node.op_type]
+            for i, out_name in enumerate(func.output):
+                if "_InternalRetainedState" in out_name:
+                    transformed = True
+                    tmp = node.output[i]
+                    if "key" in func.output[i]:
+                        new_name = f"past_key.{node_count}_RetainedState"
+                    elif "value" in func.output[i]:
+                        new_name= f"past_value.{node_count}_RetainedState"
+                    else:
+                        raise NotImplementedError()
+                    print(f"renaming {node.output[i]} to {new_name}")
+                    node.output[i] = new_name
+                    model.graph.output[model_graph_outputs.index(tmp)].name = new_name
+            node_count+=1                    
+    return model, transformed
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -347,7 +347,7 @@ def export(self, export_dir: Optional[str] = None) -> str:
             dynamic_axes,
             export_dir=export_dir,
         )
-
+ 
     def compile(
         self,
         onnx_path: Optional[str] = None,
@@ -2285,14 +2285,14 @@ def export(self, export_dir: Optional[str] = None) -> str:
                 for kv in ["key", "value"]:
                     example_inputs["past_key_values"][i].append(torch.zeros(pkv_cache[0][0].shape, dtype=torch.float32))
                     dynamic_axes[f"past_{kv}.{i}"] = pkv_dynamic_axes
-                    output_names.append(f"past_{kv}.{i}_RetainedState")
+                    output_names.append(f"past_{kv}.{i}_InternalRetainedState")
 
         else:
             for i in range(self.num_layers):
                 for kv in ["key", "value"]:
                     example_inputs["past_key_values"][i].append(torch.zeros(kv_cache_shape, dtype=torch.float32))
                     dynamic_axes[f"past_{kv}.{i}"] = pkv_dynamic_axes
-                    output_names.append(f"past_{kv}.{i}_RetainedState")
+                    output_names.append(f"past_{kv}.{i}_InternalRetainedState")
 
         if self.continuous_batching:
             example_inputs["batch_index"] = torch.arange(bs).view(bs, 1)
diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py
@@ -788,3 +788,28 @@ def apply(cls, model: nn.Module, pooling: Union[str, Callable]) -> Tuple[nn.Modu
         model = PooledModel(model, pooling_method)
         warnings.warn("Pooling is applied to the model.")
         return model, transformed
+
+def get_decoder_layer_classes_for_export(model: nn.Module) -> set:
+    """
+    Dynamically determine which DecoderLayer classes should be exported as functions
+    based on the model's architecture using the existing KVCacheTransform mapping.
+    """
+    # Define patterns that identify decoder layer classes
+    DECODER_LAYER_PATTERNS = ["DecoderLayer", "Block", "Layer"]
+
+    # Get all QEff classes that are decoder layers from the existing mapping
+    decoder_layer_classes = set()
+
+    for original_class, qeff_class in KVCacheTransform._module_mapping.items():
+        # Check if the QEff class name contains decoder layer patterns
+        qeff_class_name = qeff_class.__name__
+        if any(pattern in qeff_class_name for pattern in DECODER_LAYER_PATTERNS):
+            decoder_layer_classes.add(qeff_class)
+
+    # Filter to only include classes that are actually used in the current model
+    model_decoder_classes = set()
+    for module in model.modules():
+        if module.__class__ in decoder_layer_classes:
+            model_decoder_classes.add(module.__class__)
+
+    return model_decoder_classes
diff --git a/QEfficient/utils/patches.py b/QEfficient/utils/patches.py
@@ -0,0 +1,120 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+"""Monkey patches for torch.onnx.utils to fix ONNX export issues."""
+
+from typing import Collection, Set, Type, Union
+
+import torch
+import torch.onnx.utils as onnx_utils
+from torch import _C
+
+
+def _setup_trace_module_map_patched(
+    model: Union[torch.nn.Module, torch.jit.ScriptModule],
+    export_modules_as_functions: Union[bool, Collection[Type[torch.nn.Module]]],
+) -> Set[str]:
+    """Patched version of _setup_trace_module_map that fixes onnx_attrs type mismatch."""
+
+    def __register_attribute_hook():
+        attr_name = "_onnx_attrs"
+
+        def _track_module_attributes_forward_pre_hook(module, input):
+            setattr(module, attr_name, _get_module_attributes(module))
+
+        def _track_module_attributes_forward_hook(module, input, output):
+            tracing_state = _C._get_tracing_state()
+            if not tracing_state:
+                return
+            graph = tracing_state.graph()
+            onnx_attrs = {}
+            if hasattr(module, attr_name):
+                onnx_attrs = getattr(module, attr_name)
+                delattr(module, attr_name)
+            # FIX: use empty dict to avoid type mismatch with _jit_pass_onnx_track_scope_attributes
+            # Observed in transformers v4.55 and above
+            onnx_attrs = {}
+            _C._jit_pass_onnx_track_scope_attributes(graph, onnx_attrs)
+
+        for m in model.modules():
+            m.register_forward_hook(_track_module_attributes_forward_hook)
+            m.register_forward_pre_hook(_track_module_attributes_forward_pre_hook)
+
+    def _unqualified_variable_name(qualified_name: str) -> str:
+        """
+        Parse qualified variable name and return the unqualified version.
+        Pure numeric atoms are considered inadequate, so this function will look past them,
+        and start from the first non-numeric atom.
+        """
+        name_atoms = qualified_name.split(".")
+        for i, atom in reversed(list(enumerate(name_atoms))):
+            if not atom.isnumeric():
+                return ".".join(name_atoms[i:])
+        return qualified_name
+
+    trace_module_map = {
+        _m: torch._C._jit_onnx_create_full_scope_name(torch.typename(type(_m)), _unqualified_variable_name(_n))
+        for _n, _m in model.named_modules()
+    }
+    torch.jit._trace._trace_module_map = trace_module_map
+
+    if isinstance(export_modules_as_functions, bool) and export_modules_as_functions:
+        module_typenames = {torch.typename(type(module)) for module in trace_module_map}
+    elif isinstance(export_modules_as_functions, set) and export_modules_as_functions:
+
+        def _find_typename(v):
+            if isinstance(v, type):
+                return torch.typename(v)
+            else:
+                raise RuntimeError(
+                    "Only type of the `nn.Module` should be "
+                    "passed in the set for argument `export_modules_as_functions`. "
+                    f"Got `{type(v).__name__}`."
+                )
+
+        module_typenames = {_find_typename(v) for v in export_modules_as_functions}
+    else:
+        module_typenames = set()
+
+    if module_typenames:
+        __register_attribute_hook()
+
+    return module_typenames
+
+
+def _get_module_attributes(module):
+    """Helper function to get module attributes safely."""
+    import typing
+
+    annotations = typing.get_type_hints(type(module))
+    base_m_annotations = typing.get_type_hints(torch.nn.Module)
+    [annotations.pop(k, None) for k in base_m_annotations]
+
+    attrs = {}
+    for k in annotations:
+        try:
+            attrs[k] = getattr(module, k)
+        except AttributeError:
+            _C._jit_onnx_log(f"Skipping module attribute '{k}'")
+            continue
+    return attrs
+
+
+def apply_torch_patches():
+    """Apply all necessary torch patches for ONNX export."""
+    # Monkey patch the function
+    onnx_utils._setup_trace_module_map = _setup_trace_module_map_patched
+
+    if hasattr(onnx_utils, "_get_module_attributes"):
+        onnx_utils._get_module_attributes = _get_module_attributes
+
+    print("Applied torch ONNX export patches for export_modules_as_functions compatibility")
+
+
+def is_patched():
+    """Check if patches have been applied."""
+    return onnx_utils._setup_trace_module_map == _setup_trace_module_map_patched