More fixes

Alejandro Gaston Alvarez Franceschi · Alejandro Gaston Alvarez Franceschi · commit 0d3238aa8563 · 2024-01-09T13:42:51.000+01:00
diff --git a/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py b/coremltools/converters/mil/frontend/torch/test/test_torch_ops.py
@@ -9589,11 +9589,13 @@ def forward(self, x):
 
     @pytest.mark.slow
     @pytest.mark.parametrize(
-        "compute_unit, backend, input_shape, hop_length, win_length, window, center, normalized, onesided, length, return_complex",
+        "compute_unit, backend, channels, n_fft, num_frames, hop_length, win_length, window, center, normalized, onesided, length, return_complex",
         itertools.product(
             compute_units,
             backends,
-            [(1, 32, 9), (32, 9), (3, 32, 9)], # input shape
+            [None, 1, 3], # channels
+            [16, 32], # n_fft
+            [5, 9], # num_frames
             [None, 4, 5], # hop_length
             [None, 16, 9], # win_length
             [None, torch.hann_window], # window
@@ -9604,11 +9606,12 @@ def forward(self, x):
             [False, True], # return_complex
         )
     )
-    def test_istft(self, compute_unit, backend, input_shape, hop_length, win_length, window, center, normalized, onesided, length, return_complex):
+    def test_istft(self, compute_unit, backend, channels, n_fft, num_frames, hop_length, win_length, window, center, normalized, onesided, length, return_complex):
         if return_complex and onesided:
             pytest.skip("Complex output is incompatible with onesided")
 
-        n_fft = input_shape[1]
+        freq = n_fft*2+1 if onesided else n_fft
+        input_shape = (channels, freq, num_frames) if channels else (freq, num_frames)
 
         class ISTFTModel(torch.nn.Module):
             def forward(self, x):
diff --git a/coremltools/converters/mil/mil/ops/defs/complex_dialect_ops.py b/coremltools/converters/mil/mil/ops/defs/complex_dialect_ops.py
@@ -938,5 +938,4 @@ def type_inference(self):
             hop_length = self.hop_length.val if self.hop_length else self.n_fft.val // 4
             output_shape += [self.n_fft.val + hop_length * (n_frames - 1)]
 
-
         return types.tensor(output_type, tuple(output_shape))
diff --git a/coremltools/converters/mil/mil/passes/defs/lower_complex_dialect_ops.py b/coremltools/converters/mil/mil/passes/defs/lower_complex_dialect_ops.py
@@ -419,9 +419,12 @@ def _istft(
     win_length = win_length or n_fft
 
     input_shape = mb.shape(x=input_real, before_op=before_op)
-    channels = input_shape.val[0]
-    fft_size = input_shape.val[1]
-    n_frames = input_shape.val[2]
+    if input_shape.rank == 3:
+        channels, fft_size, n_frames = input_shape.val
+    else:
+        channels = None
+        fft_size, n_frames = input_shape.val
+
     expected_output_signal_len = n_fft.val + hop_length.val * (n_frames - 1)
 
     is_onesided = onesided.val if onesided else fft_size != n_fft
@@ -482,12 +485,16 @@ def _istft(
     # We need to adapt last dimension
     if length is not None:
         if length.val > expected_output_signal_len:
-            right_pad = mb.fill(shape=(channels, expected_output_signal_len - length), value=0., before_op=before_op)
+            if channels:
+                right_pad = mb.fill(shape=(channels, expected_output_signal_len - length), value=0., before_op=before_op)
+            else:
+                right_pad = mb.fill(shape=(expected_output_signal_len - length,), value=0., before_op=before_op)
+
             real_result = mb.stack(x=(real_result, right_pad), axis=1, before_op=before_op)
             imag_result = mb.stack(x=(imag_result, right_pad), axis=1, before_op=before_op)
         elif length.val < expected_output_signal_len:
-            real_result = mb.slice_by_size(x=real_result, begin=[0], size=[length], before_op=before_op)
-            imag_result = mb.slice_by_size(x=imag_result, begin=[0], size=[length], before_op=before_op)
+            real_result = mb.slice_by_size(x=real_result, begin=[0], size=[length.val], before_op=before_op)
+            imag_result = mb.slice_by_size(x=imag_result, begin=[0], size=[length.val], before_op=before_op)
 
     return real_result, imag_result
 
@@ -498,14 +505,18 @@ def _overlap_add(
     before_op: Operation,
 ) -> Var:
     """
-    The input has shape (channels, fft_size, n_frames)
+    The input has shape (channels, n_frames, fft_size)
     """
     input_shape = mb.shape(x=x, before_op=before_op)
-    channels = input_shape.val[0]
-    n_frames = input_shape.val[1]
 
     # Create empty output with final shape
-    output = mb.fill(shape=(channels, int(n_fft.val + hop_length.val * (n_frames - 1))), value=0., before_op=before_op)
+    if input_shape.rank == 3:
+        channels, n_frames = input_shape.val
+        output = mb.fill(shape=(channels, int(n_fft.val + hop_length.val * (n_frames - 1))), value=0., before_op=before_op)
+    else:
+        channels = None
+        n_frames= input_shape.val
+        output = mb.fill(shape=(int(n_fft.val + hop_length.val * (n_frames - 1)),), value=0., before_op=before_op)
 
     # Create an index used later on overlap add
     n_fft = mb.cast(x=n_fft, dtype="int32", before_op=before_op)
@@ -519,7 +530,8 @@ def _overlap_add(
 
         # Create index to align data frames
         global_idx = mb.add(x=local_idx , y=frame_num*hop_length.val, before_op=before_op)
-        global_idx = mb.stack(values=[global_idx] * channels, axis=0, before_op=before_op)
+        if channels:
+            global_idx = mb.stack(values=[global_idx] * channels, axis=0, before_op=before_op)
 
         # Add data frame
         output = mb.scatter_along_axis(data=output, indices=global_idx, updates=frame, axis=1, mode="add", before_op=before_op)