@@ -123,31 +123,20 @@ class BlockedToDPAS : public OpRewritePattern<tt::DotOp> {
123123 oldAType.getElementType ().isFloat8E4M3FN ())
124124 dpasElemBitWidths = 2 * dpasElemBitWidths;
125125
126- // now we can get the order from the a defining op
127-
128- llvm::errs () << " oldAType: " << oldAType << " \n " ;
129- llvm::errs () << " oldBType: " << oldBType << " \n " ;
130-
131- llvm::errs () << " a: " << a << " \n " ;
132- llvm::errs () << " a defining op: " << *a.getDefiningOp () << " \n " ;
133-
134126 SmallVector<unsigned > order;
135- Operation* aOp = a.getDefiningOp ();
127+ Operation * aOp = a.getDefiningOp ();
136128 if (isa<ttg::ConvertLayoutOp>(aOp)) {
137129 assert (aOp->getNumOperands () == 1 );
138130 auto aLoad = aOp->getOperand (0 );
139- order = triton::gpu::getOrder (cast<RankedTensorType>(aLoad.getType ()).getEncoding ());
131+ order = triton::gpu::getOrder (
132+ cast<RankedTensorType>(aLoad.getType ()).getEncoding ());
140133 } else {
141134 assert (isa<tt::LoadOp>(aOp) && " expecting load input to DPAS" );
142135 assert (aOp->getNumResults () == 1 );
143136 auto ret = aOp->getResult (0 );
144- order = triton::gpu::getOrder (cast<RankedTensorType>(ret.getType ()).getEncoding ());
137+ order = triton::gpu::getOrder (
138+ cast<RankedTensorType>(ret.getType ()).getEncoding ());
145139 }
146- llvm::errs () << " a load order: " << order[0 ] << " , " << order[1 ] << " \n " ;
147- #if 0
148- const bool aIsTransposed = order.size() == 2 && order[0] == 0 && order[1] == 1;
149- llvm::errs() << "Transposed? " << aIsTransposed << "\n";
150- #endif
151140
152141 SmallVector<unsigned > warpsPerTile =
153142 getWarpsPerTile (dotOp, dpasCap, retShape, numWarps, order);
0 commit comments