Avoid initializer name collision in _fuse_batchnorm.py (#2680)

titaiwangms · web-flow · commit 8845fb22911e · 2025-11-05T08:55:26.000-08:00
Fixes pytorch/pytorch#166797 The original naming collides when there are multiple matched patterns sharing the same parent node. This PR changes the naming to depend on their own Conv weight name, which should be non-duplicated identifier. ~~NOTE: I don't know if my understanding is correct. It seems x is an input of the pattern, which x.name + "_bias" collides with `max_pool` bias (see the pic in the original issue)? If we check the output model after _fuse_batchnorm.py, the bias would be correct with a name `val_17` (the name may be collided and given by NameAuthority?). However, when the following rule _remove_optional_bias tries to fetch the bias, it would see all zero for some reasons.~~
diff --git a/onnxscript/rewriter/rules/common/_fuse_batchnorm.py b/onnxscript/rewriter/rules/common/_fuse_batchnorm.py
@@ -68,7 +68,10 @@ def rewrite(self, op, x: ir.Value, inbound_out: ir.Value, batchnorm_out: ir.Valu
             bias_name = inbound_node.inputs[2].name
         else:
             original_bias = np.zeros_like(input_mean)
-            bias_name = x.name + "_bias"
+            # Use inbound input 1 (should be weight) to derive a name for the bias
+            # to avoid name collision on initializer creation when there are multiple patterns
+            # sharing the same parent nodes.
+            bias_name = inbound_node.inputs[1].name + "_bias"
         fused_bias = ir.tensor((original_bias - input_mean) * scale_factor + beta)
 
         return op.op(
diff --git a/onnxscript/rewriter/rules/common/_fuse_batchnorm_test.py b/onnxscript/rewriter/rules/common/_fuse_batchnorm_test.py
@@ -253,6 +253,64 @@ def test_fuse_batchnorm_graph_inputs(self):
         # No changes were applied as W is a graph input
         self.assertEqual(count, 0)
 
+    def test_fuse_batchnorm_does_not_collide_names_with_same_parent_node(self):
+        model_proto = onnx.parser.parse_model("""
+            < ir_version: 7, opset_import: ["" : 17] >
+            test_model (float[N, 32, 14, 16] X) => (float [N, ?, ?, ?] Y1, float [N, ?, ?, ?] Y2)
+            {
+                X1 = MaxPool<kernel_shape=[3,3]>(X)
+                X2 = Conv(X1, W1)
+                Y1 = BatchNormalization(X2, gamma_64, beta_64, input_mean_64, input_var_64)
+                X3 = Conv(X1, W2)
+                Y2 = BatchNormalization(X3, gamma_256, beta_256, input_mean_256, input_var_256)
+            }
+        """)
+        initializers = [
+            onnx.numpy_helper.from_array(
+                np.random.randn(64, 32, 3, 3).astype(np.float32), name="W1"
+            ),
+            onnx.numpy_helper.from_array(
+                np.random.randn(64).astype(np.float32), name="gamma_64"
+            ),
+            onnx.numpy_helper.from_array(
+                np.random.randn(64).astype(np.float32), name="beta_64"
+            ),
+            onnx.numpy_helper.from_array(
+                np.random.randn(64).astype(np.float32), name="input_mean_64"
+            ),
+            onnx.numpy_helper.from_array(
+                np.abs(np.random.randn(64)).astype(np.float32), name="input_var_64"
+            ),
+            onnx.numpy_helper.from_array(
+                np.random.randn(256, 32, 3, 3).astype(np.float32), name="W2"
+            ),
+            onnx.numpy_helper.from_array(
+                np.random.randn(256).astype(np.float32), name="gamma_256"
+            ),
+            onnx.numpy_helper.from_array(
+                np.random.randn(256).astype(np.float32), name="beta_256"
+            ),
+            onnx.numpy_helper.from_array(
+                np.random.randn(256).astype(np.float32), name="input_mean_256"
+            ),
+            onnx.numpy_helper.from_array(
+                np.abs(np.random.randn(256)).astype(np.float32), name="input_var_256"
+            ),
+        ]
+        model_proto.graph.initializer.extend(initializers)
+        onnx.checker.check_model(model_proto, True)
+        model = ir.serde.deserialize_model(model_proto)
+        count = _fuse_batchnorm.rules.apply_to_model(model)
+
+        # Applied twice, once for each BatchNorm
+        self.assertEqual(count, 2)
+        # it should have different bias names for the two fused Conv nodes
+        conv_nodes = [node for node in model.graph if node.op_type == "Conv"]
+        self.assertEqual(len(conv_nodes), 2)
+        bias_names_1 = conv_nodes[0].inputs[2].name
+        bias_names_2 = conv_nodes[1].inputs[2].name
+        self.assertNotEqual(bias_names_1, bias_names_2)
+
 
 if __name__ == "__main__":
     unittest.main()