Regenerate MLIR Bindings (#1862)

github-actions[bot] · enzyme-ci-bot[bot] · web-flow · commit 481a91ce3191 · 2025-11-12T20:03:46.000-05:00
Co-authored-by: enzyme-ci-bot[bot] &lt;78882869+enzyme-ci-bot[bot]@users.noreply.github.com&gt;
diff --git a/src/mlir/Dialects/MemRef.jl b/src/mlir/Dialects/MemRef.jl
@@ -461,9 +461,10 @@ region. To return a value, one should use `memref.alloca_scope.return`
 operation:
 
 ```mlir
-%result = memref.alloca_scope {
+%result = memref.alloca_scope -> f32 {
+  %value = arith.constant 1.0 : f32
   ...
-  memref.alloca_scope.return %value
+  memref.alloca_scope.return %value : f32
 }
 ```
 
@@ -498,7 +499,7 @@ the return operation may be omitted. Otherwise, it has to be present
 to indicate which values are going to be returned. For example:
 
 ```mlir
-memref.alloca_scope.return %value
+memref.alloca_scope.return %value : f32
 ```
 """
 function alloca_scope_return(results::Vector{Value}; location=Location())
@@ -563,11 +564,11 @@ address space.
 # Example
 
 ```mlir
-Cast to concrete shape.
-    %4 = memref.cast %1 : memref<*xf32> to memref<4x?xf32>
+// Cast to concrete shape.
+%4 = memref.cast %1 : memref<*xf32> to memref<4x?xf32>
 
-Erase rank information.
-    %5 = memref.cast %1 : memref<4x?xf32> to memref<*xf32>
+// Erase rank information.
+%5 = memref.cast %1 : memref<4x?xf32> to memref<*xf32>
 ```
 """
 function cast(source::Value; dest::IR.Type, location=Location())
@@ -662,8 +663,8 @@ alloc\'d memref (e.g. memrefs returned by `view` operations).
 # Example
 
 ```mlir
-%0 = memref.alloc() : memref<8x64xf32, affine_map<(d0, d1) -> (d0, d1), 1>>
-memref.dealloc %0 : memref<8x64xf32,  affine_map<(d0, d1) -> (d0, d1), 1>>
+%0 = memref.alloc() : memref<8x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+memref.dealloc %0 : memref<8x64xf32,  affine_map<(d0, d1) -> (d0, d1)>, 1>
 ```
 """
 function dealloc(memref::Value; location=Location())
@@ -766,22 +767,22 @@ For example, a DmaStartOp operation that transfers 256 elements of a memref
 space 1 at indices [%k, %l], would be specified as follows:
 
 ```mlir
-%num_elements = arith.constant 256
+%num_elements = arith.constant 256 : index
 %idx = arith.constant 0 : index
-%tag = memref.alloc() : memref<1 x i32, affine_map<(d0) -> (d0)>, 4>
-dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx] :
-  memref<40 x 128 x f32>, affine_map<(d0) -> (d0)>, 0>,
-  memref<2 x 1024 x f32>, affine_map<(d0) -> (d0)>, 1>,
-  memref<1 x i32>, affine_map<(d0) -> (d0)>, 2>
+%tag = memref.alloc() : memref<1 x i32, affine_map<(d0) -> (d0)>, 2>
+memref.dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx] :
+  memref<40 x 128 x f32, affine_map<(d0, d1) -> (d0, d1)>, 0>,
+  memref<2 x 1024 x f32, affine_map<(d0, d1) -> (d0, d1)>, 1>,
+  memref<1 x i32, affine_map<(d0) -> (d0)>, 2>
 ```
 
 If %stride and %num_elt_per_stride are specified, the DMA is expected to
 transfer %num_elt_per_stride elements every %stride elements apart from
 memory space 0 until %num_elements are transferred.
 
 ```mlir
-dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx], %stride,
-          %num_elt_per_stride :
+memref.dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx], %stride,
+                 %num_elt_per_stride :
 ```
 
 * TODO: add additional operands to allow source and destination striding, and
@@ -818,10 +819,10 @@ number of elements associated with the DMA operation.
 # Example
 
 ```mlir
- dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%index] :
-   memref<2048 x f32>, affine_map<(d0) -> (d0)>, 0>,
-   memref<256 x f32>, affine_map<(d0) -> (d0)>, 1>
-   memref<1 x i32>, affine_map<(d0) -> (d0)>, 2>
+ memref.dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%index] :
+   memref<2048 x f32, affine_map<(d0) -> (d0)>, 0>,
+   memref<256 x f32, affine_map<(d0) -> (d0)>, 1>,
+   memref<1 x i32, affine_map<(d0) -> (d0)>, 2>
  ...
  ...
  dma_wait %tag[%index], %num_elements : memref<1 x i32, affine_map<(d0) -> (d0)>, 2>
@@ -998,16 +999,16 @@ This makes lowering more progressive and brings the following benefits:
 
 ```mlir
   %base, %offset, %sizes:2, %strides:2 =
-    memref.extract_strided_metadata %memref :
-      memref<10x?xf32>, index, index, index, index, index
+    memref.extract_strided_metadata %memref : memref<10x?xf32>
+      -> memref<f32>, index, index, index, index, index
 
   // After folding, the type of %m2 can be memref<10x?xf32> and further
   // folded to %memref.
   %m2 = memref.reinterpret_cast %base to
       offset: [%offset],
       sizes: [%sizes#0, %sizes#1],
       strides: [%strides#0, %strides#1]
-    : memref<f32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+    : memref<f32> to memref<?x?xf32, strided<[?, ?], offset:?>>
 ```
 """
 function extract_strided_metadata(
@@ -1096,10 +1097,10 @@ given global variable will always return the same memref descriptor).
 
 ```mlir
 // Private variable with an initial value.
-memref.global \"private\" @x : memref<2xf32> = dense<0.0,2.0>
+memref.global \"private\" @x : memref<2xf32> = dense<[0.0, 2.0]>
 
 // Private variable with an initial value and an alignment (power of 2).
-memref.global \"private\" @x : memref<2xf32> = dense<0.0,2.0> {alignment = 64}
+memref.global \"private\" @x : memref<2xf32> = dense<[0.0, 2.0]> {alignment = 64}
 
 // Declaration of an external variable.
 memref.global \"private\" @y : memref<4xi32>
@@ -1108,7 +1109,7 @@ memref.global \"private\" @y : memref<4xi32>
 memref.global @z : memref<3xf16> = uninitialized
 
 // Externally visible constant variable.
-memref.global constant @c : memref<2xi32> = dense<1, 4>
+memref.global constant @c : memref<2xi32> = dense<[1, 4]>
 ```
 """
 function global_(;
@@ -1328,8 +1329,8 @@ behavior.
 
 ```mlir
 %new = memref.realloc %old : memref<64xf32> to memref<124xf32>
-%4 = memref.load %new[%index]   // ok
-%5 = memref.load %old[%index]   // undefined behavior
+%4 = memref.load %new[%index] : memref<124xf32> // ok
+%5 = memref.load %old[%index] : memref<64xf32>  // undefined behavior
 ```
 """
 function realloc(
@@ -1458,7 +1459,8 @@ In other words:
 %dst = memref.reinterpret_cast %src to
   offset: [%offset],
   sizes: [%sizes],
-  strides: [%strides]
+  strides: [%strides] :
+  memref<*xf32> to memref<?x?xf32, strided<[?, ?], offset: ?>>
 ```
 means that `%dst`\'s descriptor will be:
 ```mlir
@@ -1521,12 +1523,12 @@ Result type is ranked.
 ```mlir
 // Reshape statically-shaped memref.
 %dst = memref.reshape %src(%shape)
-         : (memref<4x1xf32>, memref<1xi32>) to memref<4xf32>
+         : (memref<4x1xf32>, memref<1xi32>) -> memref<4xf32>
 %dst0 = memref.reshape %src(%shape0)
-         : (memref<4x1xf32>, memref<2xi32>) to memref<2x2xf32>
+         : (memref<4x1xf32>, memref<2xi32>) -> memref<2x2xf32>
 // Flatten unranked memref.
 %dst = memref.reshape %src(%shape)
-         : (memref<*xf32>, memref<1xi32>) to memref<?xf32>
+         : (memref<*xf32>, memref<1xi32>) -> memref<?xf32>
 ```
 
 b. Source type is ranked or unranked. Shape argument has dynamic size.
@@ -1535,10 +1537,10 @@ Result type is unranked.
 ```mlir
 // Reshape dynamically-shaped 1D memref.
 %dst = memref.reshape %src(%shape)
-         : (memref<?xf32>, memref<?xi32>) to memref<*xf32>
+         : (memref<?xf32>, memref<?xi32>) -> memref<*xf32>
 // Reshape unranked memref.
 %dst = memref.reshape %src(%shape)
-         : (memref<*xf32>, memref<?xi32>) to memref<*xf32>
+         : (memref<*xf32>, memref<?xi32>) -> memref<*xf32>
 ```
 """
 function reshape(source::Value, shape::Value; result::IR.Type, location=Location())
diff --git a/src/mlir/Dialects/MosaicGPU.jl b/src/mlir/Dialects/MosaicGPU.jl
@@ -322,6 +322,25 @@ function custom_primitive(
     )
 end
 
+function debug_print(value::Value; format, location=Location())
+    op_ty_results = IR.Type[]
+    operands = Value[value,]
+    owned_regions = Region[]
+    successors = Block[]
+    attributes = NamedAttribute[namedattribute("format", format),]
+
+    return create_operation(
+        "mosaic_gpu.debug_print",
+        location;
+        operands,
+        owned_regions,
+        successors,
+        attributes,
+        results=op_ty_results,
+        result_inference=false,
+    )
+end
+
 """
 `initialize_barrier`
 
@@ -669,6 +688,74 @@ function tmem_relinquish_alloc_permit(; collective=nothing, location=Location())
     )
 end
 
+"""
+`vector_load`
+
+Similar to `vector.load` (vector dialect) but supports loading from
+non-contiguous memory.
+
+If `optimized` is true, raises an error if we cannot generate an optimised
+transfer. If unset, fall back to a non-optimized transfer if unable to
+generate an optimized transfer.
+"""
+function vector_load(
+    source::Value;
+    result_0=nothing::Union{Nothing,IR.Type},
+    optimized=nothing,
+    location=Location(),
+)
+    op_ty_results = IR.Type[]
+    operands = Value[source,]
+    owned_regions = Region[]
+    successors = Block[]
+    attributes = NamedAttribute[]
+    !isnothing(result_0) && push!(op_ty_results, result_0)
+    !isnothing(optimized) && push!(attributes, namedattribute("optimized", optimized))
+
+    return create_operation(
+        "mosaic_gpu.vector_load",
+        location;
+        operands,
+        owned_regions,
+        successors,
+        attributes,
+        results=(length(op_ty_results) == 0 ? nothing : op_ty_results),
+        result_inference=(length(op_ty_results) == 0 ? true : false),
+    )
+end
+
+"""
+`vector_store`
+
+Similar to `vector.store` (vector dialect) but supports storing to
+non-contiguous memory.
+
+If `optimized` is true, raises an error if we cannot generate an optimised
+transfer. If unset, fall back to a non-optimized transfer if unable to
+generate an optimized transfer.
+"""
+function vector_store(
+    valueToStore::Value, destination::Value; optimized=nothing, location=Location()
+)
+    op_ty_results = IR.Type[]
+    operands = Value[valueToStore, destination]
+    owned_regions = Region[]
+    successors = Block[]
+    attributes = NamedAttribute[]
+    !isnothing(optimized) && push!(attributes, namedattribute("optimized", optimized))
+
+    return create_operation(
+        "mosaic_gpu.vector_store",
+        location;
+        operands,
+        owned_regions,
+        successors,
+        attributes,
+        results=op_ty_results,
+        result_inference=false,
+    )
+end
+
 """
 `wgmma`
 
diff --git a/src/mlir/Dialects/Nvvm.jl b/src/mlir/Dialects/Nvvm.jl