@@ -493,8 +493,8 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
493493 if (!ST->isWave32 ()) {
494494 // Combine lane 31 into lanes 32..63.
495495 V = B.CreateBitCast (V, IntNTy);
496- Value *const Lane31 = B.CreateIntrinsic (Intrinsic::amdgcn_readlane, {},
497- {V, B.getInt32 (31 )});
496+ Value *const Lane31 = B.CreateIntrinsic (
497+ V-> getType (), Intrinsic::amdgcn_readlane, {V, B.getInt32 (31 )});
498498
499499 Value *UpdateDPPCall = B.CreateCall (
500500 UpdateDPP, {Identity, Lane31, B.getInt32 (DPP::QUAD_PERM_ID),
@@ -598,16 +598,16 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
598598
599599 // Get the value required for atomic operation
600600 V = B.CreateBitCast (V, IntNTy);
601- Value *LaneValue =
602- B. CreateIntrinsic (Intrinsic::amdgcn_readlane, {}, {V, LaneIdxInt});
601+ Value *LaneValue = B. CreateIntrinsic (V-> getType (), Intrinsic::amdgcn_readlane,
602+ {V, LaneIdxInt});
603603 LaneValue = B.CreateBitCast (LaneValue, Ty);
604604
605605 // Perform writelane if intermediate scan results are required later in the
606606 // kernel computations
607607 Value *OldValue = nullptr ;
608608 if (NeedResult) {
609609 OldValue =
610- B.CreateIntrinsic (Intrinsic::amdgcn_writelane, {} ,
610+ B.CreateIntrinsic (IntNTy, Intrinsic::amdgcn_writelane,
611611 {B.CreateBitCast (Accumulator, IntNTy), LaneIdxInt,
612612 B.CreateBitCast (OldValuePhi, IntNTy)});
613613 OldValue = B.CreateBitCast (OldValue, Ty);
@@ -789,7 +789,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
789789 Value *const LastLaneIdx = B.getInt32 (ST->getWavefrontSize () - 1 );
790790 assert (TyBitWidth == 32 );
791791 NewV = B.CreateBitCast (NewV, IntNTy);
792- NewV = B.CreateIntrinsic (Intrinsic::amdgcn_readlane, {} ,
792+ NewV = B.CreateIntrinsic (IntNTy, Intrinsic::amdgcn_readlane,
793793 {NewV, LastLaneIdx});
794794 NewV = B.CreateBitCast (NewV, Ty);
795795 }
@@ -936,10 +936,10 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
936936 Value *const ExtractLo = B.CreateTrunc (CastedPhi, Int32Ty);
937937 Value *const ExtractHi =
938938 B.CreateTrunc (B.CreateLShr (CastedPhi, 32 ), Int32Ty);
939- CallInst *const ReadFirstLaneLo =
940- B. CreateIntrinsic ( Intrinsic::amdgcn_readfirstlane, {} , ExtractLo);
941- CallInst *const ReadFirstLaneHi =
942- B. CreateIntrinsic ( Intrinsic::amdgcn_readfirstlane, {} , ExtractHi);
939+ CallInst *const ReadFirstLaneLo = B. CreateIntrinsic (
940+ Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractLo);
941+ CallInst *const ReadFirstLaneHi = B. CreateIntrinsic (
942+ Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractHi);
943943 Value *const PartialInsert = B.CreateInsertElement (
944944 PoisonValue::get (VecTy), ReadFirstLaneLo, B.getInt32 (0 ));
945945 Value *const Insert =
@@ -948,7 +948,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
948948 } else if (TyBitWidth == 32 ) {
949949 Value *CastedPhi = B.CreateBitCast (PHI, IntNTy);
950950 BroadcastI =
951- B.CreateIntrinsic (Intrinsic::amdgcn_readfirstlane, {} , CastedPhi);
951+ B.CreateIntrinsic (IntNTy, Intrinsic::amdgcn_readfirstlane, CastedPhi);
952952 BroadcastI = B.CreateBitCast (BroadcastI, Ty);
953953
954954 } else {
0 commit comments