pick a default order to avoid problems getting the order from operations

alexbaden · alexbaden · commit b42d9799daf7 · 2024-12-04T03:04:22.000Z
diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp
@@ -123,19 +123,72 @@ class BlockedToDPAS : public OpRewritePattern<tt::DotOp> {
         oldAType.getElementType().isFloat8E4M3FN())
       dpasElemBitWidths = 2 * dpasElemBitWidths;
 
-    SmallVector<unsigned> order;
-    Operation *aOp = a.getDefiningOp();
-    if (isa<ttg::ConvertLayoutOp>(aOp)) {
-      assert(aOp->getNumOperands() == 1);
-      auto aLoad = aOp->getOperand(0);
-      order = triton::gpu::getOrder(
-          cast<RankedTensorType>(aLoad.getType()).getEncoding());
+    SmallVector<unsigned> order = {1, 0}; // TODO: acceptable default arg? 
+    llvm::errs() << "a: " << a << "\n";
+    Operation* aOp = a.getDefiningOp();
+    if (aOp) {
+      llvm::errs() << "Processing a op: " << *aOp << "\n";
+#if 0
+    Operation *aOp{nullptr};
+    if (auto arg = dyn_cast<BlockArgument>(a)) {
+      unsigned argNum = arg.getArgNumber();
+      Operation *argOwner = a.getParentBlock()->getParentOp();
+
+      if (auto forOp = dyn_cast<scf::ForOp>(argOwner)) {
+        auto operand = forOp.getOperand(argNum + forOp.getNumControlOperands() - 1);
+        aOp = operand.getDefiningOp();
+      } else if (auto funcOp = dyn_cast<FunctionOpInterface>(argOwner)) {
+#if 1
+      llvm::errs() << "func arg: " << funcOp.getArgument(argNum) << "\n";
+        aOp = funcOp.getArgument(argNum).getDefiningOp();
+#else
+        llvm::errs() << "funcOp num args: " << funcOp.getNumArguments() << "\n";
+        llvm::errs() << "arg number: " << argNum << "\n";
+        llvm::errs() << "func op at arg num: " << funcOp.getArgument(argNum) << "\n";
+        llvm::errs() << "func op at arg num - 1: " << funcOp.getArgument(argNum -1) << "\n";
+        llvm::errs() << "func op at arg num - 2: " << funcOp.getArgument(argNum -2) << "\n";
+        
+        llvm::errs() << "funcOp: " << funcOp << "\n";
+        assert(false && "funcOp!");
+#endif
+      } else {
+        llvm_unreachable("Unable to parse dpas op argument");
+      }
+      assert(aOp && "failed to get defining operation for DPAS A value");
+#if 0
+      llvm::errs() << "arg: " << arg << "\n";
+      // TODO
+      aOp = arg.getDefiningOp();
+      if (aOp) {
+      llvm::errs() << "a op from arg: " << *aOp << "\n";
+      } else {
+        assert(false && "no aOp!");
+      }
+#endif
     } else {
-      assert(isa<tt::LoadOp>(aOp) && "expecting load input to DPAS");
+      aOp = a.getDefiningOp();
+    }
+    llvm::errs() << "Broke on aOP: " << *aOp << "\n";
+#endif 
+#if 1 
       assert(aOp->getNumResults() == 1);
       auto ret = aOp->getResult(0);
-      order = triton::gpu::getOrder(
-          cast<RankedTensorType>(ret.getType()).getEncoding());
+#else
+      if (isa<ttg::ConvertLayoutOp>(aOp)) {
+        assert(aOp->getNumOperands() == 1);
+        auto aLoad = aOp->getOperand(0);
+        order = triton::gpu::getOrder(
+            cast<RankedTensorType>(aLoad.getType()).getEncoding());
+      } else {
+        assert(isa<tt::LoadOp>(aOp) && "expecting load input to DPAS");
+        assert(aOp->getNumResults() == 1);
+        auto ret = aOp->getResult(0);
+        order = triton::gpu::getOrder(
+            cast<RankedTensorType>(ret.getType()).getEncoding());
+      }
+#endif 
+    } else {
+      llvm::errs() << "no A op for A: " << a << "\n";
     }
 
     SmallVector<unsigned> warpsPerTile =