fixup default order and more restrictive selection

alexbaden · alexbaden · commit 0563ba648c43 · 2024-12-06T02:40:02.000Z
diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp
@@ -80,7 +80,6 @@ getWarpsPerTile(tt::DotOp dotOp,
       ret[colDim] *= 2;
     }
   } while (true);
-
   return ret;
 }
 
@@ -120,27 +119,18 @@ class BlockedToDPAS : public OpRewritePattern<tt::DotOp> {
     unsigned opsPerChan =
         ttg::intel::DpasEncodingAttr::getOpsPerChannel(elemType);
 
-    SmallVector<unsigned> order = {1, 0}; // TODO: acceptable default arg?
-    // llvm::errs() << "a: " << a << "\n";
+    SmallVector<unsigned> order = {0, 1}; 
     Operation *aOp = a.getDefiningOp();
-    if (aOp) {
-      // llvm::errs() << "Processing a op: " << *aOp << "\n";
+    if (isa<ttg::ConvertLayoutOp>(aOp)) {
+        auto valueToConvert = aOp->getOperand(0);
+        aOp = valueToConvert.getDefiningOp();
+    }
+    if (aOp && isa<tt::LoadOp>(aOp)) {
       Attribute layout;
-      if (isa<ttg::ConvertLayoutOp>(aOp)) {
-        // TODO: convertlayoutop converts the order to match dpas, so we need to
-        // "look through" the conversion. is there a way to prevent the
-        // conversion in the first place?
-        assert(aOp->getNumOperands() == 1);
-        layout =
-            cast<RankedTensorType>(aOp->getOperand(0).getType()).getEncoding();
-      } else {
         assert(aOp->getNumResults() == 1);
         layout =
             cast<RankedTensorType>(aOp->getResult(0).getType()).getEncoding();
-      }
       order = triton::gpu::getOrder(layout);
-    } else {
-      // llvm::errs() << "no A op for A: " << a << "\n";
     }
     llvm::errs() << "order: " << order[0] << ", " << order[1] << "\n";