Keep creating constants when constants are folded inside ir.Function (#2679)

titaiwangms · justinchuby · web-flow · commit d80575dce25e · 2025-11-04T17:13:40.000-08:00
Fixes #2673 --------- Co-authored-by: Justin Chu <justinchuby@users.noreply.github.com>
diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py
@@ -1050,46 +1050,79 @@ def get_type(value: ir.Value) -> onnx.TypeProto | None:
                     e,
                 )
 
-    def new_initializer(self, node: ir.Node, array) -> ir.Value | None:
-        original_value = node.outputs[0]
-        if not isinstance(array, np.ndarray):
-            # ONNX does not have a way to represent non-tensor constants, eg. a sequence.
-            # So, a constant-value of type sequence is not folded, but it can be used
-            # to optimize subsequent operations when possible.
+    def _prepare_folded_tensor(
+        self, node: ir.Node, output_name: str, output_array: np.ndarray | Any
+    ) -> ir.Tensor | None:
+        """
+        Shared helper for constant/init creation:
+        - Validates the folded Python value is a numpy ndarray.
+        - Wraps it in an ir.Tensor and names it.
+        - Applies output_size_limit logic with input-usage compensation.
+        Returns the ir.Tensor or None if it should be skipped.
+        """
+        if not isinstance(output_array, np.ndarray):
             logger.info(
                 "Skip storing constant folded value %s due to unsupported type %s.",
-                original_value.name,
-                type(array),
+                output_name,
+                type(output_array),
             )
             return None
 
-        tensor = ir.tensor(array)
-        tensor.name = original_value.name
-        initializer = ir.Value(
-            name=original_value.name,
-            type=ir.TensorType(ir.DataType(tensor.dtype)),
-            shape=tensor.shape,  # type: ignore[arg-type]
-            const_value=tensor,
-        )
+        tensor = ir.tensor(output_array)
+        tensor.name = output_name
 
-        if array.size > self.output_size_limit:
-            # Handle examples like Transpose(weight) to be folded even if the size is large,
-            # as long as weight has no other uses. This won't increase model size.
+        # Size gating (shared logic)
+        if output_array.size > self.output_size_limit:
             removed_input_size = 0
-            for input in node.inputs:
-                if (input is not None) and (len(input.uses()) == 1):
-                    array = _get_numpy_value(input)
-                    if array is not None:
-                        removed_input_size += array.size
-            increased_size = array.size - removed_input_size
+            for input_val in node.inputs:
+                if (input_val is not None) and (len(input_val.uses()) == 1):
+                    input_array = _get_numpy_value(input_val)
+                    if input_array is not None:
+                        removed_input_size += input_array.size
+            increased_size = output_array.size - removed_input_size
             if increased_size > 0:
                 logger.info(
-                    "Skip storing constant folded nvalue %s due to large size %s.",
-                    original_value.name,
-                    array.size,
+                    "Skip storing constant folded array %s due to large size %s.",
+                    output_name,
+                    output_array.size,
                 )
                 return None
 
+        return tensor
+
+    def new_constant(self, node: ir.Node, array: np.ndarray | Any) -> ir.Node | None:
+        """Create a new Constant node with the given array as its value."""
+        original_value = node.outputs[0]
+
+        tensor = self._prepare_folded_tensor(node, original_value.name, array)
+        if tensor is None:
+            return None
+
+        logger.debug(
+            "New constant for value %s dtype: %s shape: %s",
+            original_value.name,
+            array.dtype,
+            array.shape,
+        )
+
+        node = ir.Node("", "Constant", inputs=[], attributes=(ir.AttrTensor("value", tensor),))
+        return node
+
+    def new_initializer(self, node: ir.Node, array: np.ndarray | Any) -> ir.Value | None:
+        """Create a new initializer value with the given array as its value."""
+        original_value = node.outputs[0]
+
+        tensor = self._prepare_folded_tensor(node, original_value.name, array)
+        if tensor is None:
+            return None
+
+        initializer = ir.Value(
+            name=original_value.name,
+            type=ir.TensorType(ir.DataType(tensor.dtype)),
+            shape=tensor.shape,  # type: ignore[arg-type]
+            const_value=tensor,
+        )
+
         logger.debug(
             "New Initializer for value %s dtype: %s shape: %s",
             original_value.name,
@@ -1099,7 +1132,7 @@ def new_initializer(self, node: ir.Node, array) -> ir.Value | None:
 
         return initializer
 
-    def process_node(self, node: ir.Node) -> Replacement | None:
+    def process_node(self, node: ir.Node, is_function: bool) -> Replacement | None:
         """Process a node and return a Replacement if the node can be replaced."""
         for i, value in enumerate(node.inputs):
             sym_value = self._state.get_sym_value(value)
@@ -1252,6 +1285,12 @@ def convert(av):
         if outputs is None:
             return None
         if len(node.outputs) == 1 and not isinstance(outputs, (tuple, list)):
+            # We don't support initializers in functions, so we need to create Constant nodes
+            if is_function:
+                replacement = self.new_constant(node, outputs)
+                if replacement is None:
+                    return None
+                return Replacement(replacement.outputs, [replacement])
             new_initializer_value = self.new_initializer(node, outputs)
             if new_initializer_value is None:
                 return None
@@ -1301,7 +1340,8 @@ def visit_attribute(self, attr: ir.Attr) -> None:
                 self.visit_graph(graph)
 
     def visit_node(self, node: ir.Node, root: ir.Graph | ir.Function) -> None:
-        replacement = self.process_node(node)
+        is_function = isinstance(root, ir.Function)
+        replacement = self.process_node(node, is_function=is_function)
         if replacement is None:
             # No change. Process attributes.
             for attr in node.attributes.values():
diff --git a/onnxscript/optimizer/_constant_folding_test.py b/onnxscript/optimizer/_constant_folding_test.py
@@ -721,6 +721,26 @@ def test_attribute_reference(self):
         optimized = self._fold(model)
         self.assertEqual(len(optimized.graph), 2)
 
+    def test_constant_folding_creates_constant_nodes_in_function(self):
+        model = """
+            <ir_version: 9, opset_import: ["this" : 1, "" : 19]>
+            model (float x) => (float return_val) {
+                return_val = this.function (x)
+            }
+            <domain: "this", opset_import: ["" : 19]>
+            function (x) => (return_val) {
+                tmp = Constant <value_int=1> ()
+                tmp_0 = Cast <to=1> (tmp)
+                return_val = Sub (tmp_0, x)
+            }
+        """
+        optimized = self._fold(model)
+        self.assertEqual(len(optimized.functions), 1)
+        for func in optimized.functions.values():
+            # Ensure that constant folding has created constant nodes in the function
+            constant_nodes = [n for n in func.graph if n.op_type == "Constant"]
+            self.assertEqual(len(constant_nodes), 1)
+
 
 if __name__ == "__main__":
     unittest.main()