@@ -528,32 +528,6 @@ Value emitPadding(Location loc, RewriterBase &rewriter,
528528 triton::gpu::PaddedSharedEncodingAttr layout,
529529 unsigned bitwidth, Value smemOffset, bool offsetInBytes);
530530
531- // Emits IR to load data from shared memory into registers, or to store data
532- // from registers into shared memory.
533- //
534- // You supply perVectorCallback, which is called once per group of register
535- // elements to transfer. You can use this callback to emit IR to load or store
536- // data from or to shared memory.
537- //
538- // elemLlvmTy should be dstTy's element type converted to an LLVM-dialect type.
539- //
540- // If maxVecElems is provided, we won't vectorize more than this many elements.
541- //
542- // Returns true on success.
543- [[nodiscard]] bool emitTransferBetweenRegistersAndShared (
544- RankedTensorType registerTy, triton::gpu::MemDescType sharedTy,
545- Type elemLlvmTy, std::optional<int32_t > maxVecElems,
546- const SharedMemoryObject &smemObj, Location loc, RewriterBase &rewriter,
547- const TargetInfoBase &target,
548- std::function<void (VectorType, Value /* shmemAddr*/ )> perVectorCallback);
549-
550- [[nodiscard]] bool emitTransferBetweenRegistersAndShared (
551- LinearLayout ®Layout, triton::gpu::MemDescType sharedTy, Type elemLlvmTy,
552- std::optional<int32_t > maxVecElems, const SharedMemoryObject &smemObj,
553- Location loc, RewriterBase &rewriter, const TargetInfoBase &target,
554- Value laneId, Value warpId,
555- std::function<void (VectorType, Value /* shmemAddr*/ )> perVectorCallback);
556-
557531// Close cousin of lowerLdStMatrix in MemoryOpToLLVM.cpp
558532// We might want to merge them at some point, but having to support
559533// ldmatrix.trans makes the code in lowerLdStMatrix a bit specific
0 commit comments