@@ -1600,14 +1600,35 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
16001600
16011601 TrackedWaitcntSet.clear ();
16021602 BlockInfos.clear ();
1603+ bool Modified = false ;
1604+
1605+ if (!MFI->isEntryFunction ()) {
1606+ // Wait for any outstanding memory operations that the input registers may
1607+ // depend on. We can't track them and it's better to do the wait after the
1608+ // costly call sequence.
1609+
1610+ // TODO: Could insert earlier and schedule more liberally with operations
1611+ // that only use caller preserved registers.
1612+ MachineBasicBlock &EntryBB = MF.front ();
1613+ MachineBasicBlock::iterator I = EntryBB.begin ();
1614+ for (MachineBasicBlock::iterator E = EntryBB.end ();
1615+ I != E && (I->isPHI () || I->isMetaInstruction ()); ++I)
1616+ ;
1617+ BuildMI (EntryBB, I, DebugLoc (), TII->get (AMDGPU::S_WAITCNT)).addImm (0 );
1618+ if (ST->hasVscnt ())
1619+ BuildMI (EntryBB, I, DebugLoc (), TII->get (AMDGPU::S_WAITCNT_VSCNT))
1620+ .addReg (AMDGPU::SGPR_NULL, RegState::Undef)
1621+ .addImm (0 );
1622+
1623+ Modified = true ;
1624+ }
16031625
16041626 // Keep iterating over the blocks in reverse post order, inserting and
16051627 // updating s_waitcnt where needed, until a fix point is reached.
16061628 for (auto *MBB : ReversePostOrderTraversal<MachineFunction *>(&MF))
16071629 BlockInfos.insert ({MBB, BlockInfo (MBB)});
16081630
16091631 std::unique_ptr<WaitcntBrackets> Brackets;
1610- bool Modified = false ;
16111632 bool Repeat;
16121633 do {
16131634 Repeat = false ;
@@ -1707,26 +1728,5 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
17071728 }
17081729 }
17091730
1710- if (!MFI->isEntryFunction ()) {
1711- // Wait for any outstanding memory operations that the input registers may
1712- // depend on. We can't track them and it's better to the wait after the
1713- // costly call sequence.
1714-
1715- // TODO: Could insert earlier and schedule more liberally with operations
1716- // that only use caller preserved registers.
1717- MachineBasicBlock &EntryBB = MF.front ();
1718- MachineBasicBlock::iterator I = EntryBB.begin ();
1719- for (MachineBasicBlock::iterator E = EntryBB.end ();
1720- I != E && (I->isPHI () || I->isMetaInstruction ()); ++I)
1721- ;
1722- BuildMI (EntryBB, I, DebugLoc (), TII->get (AMDGPU::S_WAITCNT)).addImm (0 );
1723- if (ST->hasVscnt ())
1724- BuildMI (EntryBB, I, DebugLoc (), TII->get (AMDGPU::S_WAITCNT_VSCNT))
1725- .addReg (AMDGPU::SGPR_NULL, RegState::Undef)
1726- .addImm (0 );
1727-
1728- Modified = true ;
1729- }
1730-
17311731 return Modified;
17321732}
0 commit comments