fixups again after rebase

alexbaden · alexbaden · commit ce3f3c96647d · 2024-12-04T03:04:22.000Z
diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp
@@ -120,15 +120,28 @@ class BlockedToDPAS : public OpRewritePattern<tt::DotOp> {
         ttg::intel::DpasEncodingAttr::getOpsPerChannel(elemType);
 
     SmallVector<unsigned> order = {1, 0}; // TODO: acceptable default arg?
-    llvm::errs() << "a: " << a << "\n";
+    // llvm::errs() << "a: " << a << "\n";
     Operation *aOp = a.getDefiningOp();
     if (aOp) {
-      llvm::errs() << "Processing a op: " << *aOp << "\n";
-      assert(aOp->getNumResults() == 1);
-      auto ret = aOp->getResult(0);
+      // llvm::errs() << "Processing a op: " << *aOp << "\n";
+      Attribute layout;
+      if (isa<ttg::ConvertLayoutOp>(aOp)) {
+        // TODO: convertlayoutop converts the order to match dpas, so we need to
+        // "look through" the conversion. is there a way to prevent the
+        // conversion in the first place?
+        assert(aOp->getNumOperands() == 1);
+        layout =
+            cast<RankedTensorType>(aOp->getOperand(0).getType()).getEncoding();
+      } else {
+        assert(aOp->getNumResults() == 1);
+        layout =
+            cast<RankedTensorType>(aOp->getResult(0).getType()).getEncoding();
+      }
+      order = triton::gpu::getOrder(layout);
     } else {
-      llvm::errs() << "no A op for A: " << a << "\n";
+      // llvm::errs() << "no A op for A: " << a << "\n";
     }
+    // llvm::errs() << "order: " << order[0] << ", " << order[1] << "\n";
 
     SmallVector<unsigned> warpsPerTile =
         getWarpsPerTile(dotOp, dpasCap, retShape, numWarps, order);