1010#include " GCNSubtarget.h"
1111#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
1212#include " SIRegisterInfo.h"
13+ #include " llvm/ADT/SmallVector.h"
1314#include " llvm/CodeGen/LivePhysRegs.h"
1415#include " llvm/CodeGen/MachineFunctionPass.h"
1516#include " llvm/CodeGen/MachineOperand.h"
@@ -32,6 +33,7 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
3233
3334 DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
3435 SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1 > OrXors;
36+ SmallVector<MachineOperand *, 1 > KillFlagCandidates;
3537
3638 Register isCopyFromExec (const MachineInstr &MI) const ;
3739 Register isCopyToExec (const MachineInstr &MI) const ;
@@ -41,15 +43,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
4143 MachineBasicBlock::reverse_iterator
4244 findExecCopy (MachineBasicBlock &MBB,
4345 MachineBasicBlock::reverse_iterator I) const ;
44-
4546 bool isRegisterInUseBetween (MachineInstr &Stop, MachineInstr &Start,
4647 MCRegister Reg, bool UseLiveOuts = false ,
4748 bool IgnoreStart = false ) const ;
4849 bool isRegisterInUseAfter (MachineInstr &Stop, MCRegister Reg) const ;
49- MachineInstr *findInstrBackwards (MachineInstr &Origin,
50- std::function<bool (MachineInstr *)> Pred,
51- ArrayRef<MCRegister> NonModifiableRegs,
52- unsigned MaxInstructions = 20) const ;
50+ MachineInstr *findInstrBackwards (
51+ MachineInstr &Origin, std::function<bool (MachineInstr *)> Pred,
52+ ArrayRef<MCRegister> NonModifiableRegs,
53+ MachineInstr *Terminator = nullptr,
54+ SmallVectorImpl<MachineOperand *> *KillFlagCandidates = nullptr,
55+ unsigned MaxInstructions = 20) const ;
5356 bool optimizeExecSequence ();
5457 void tryRecordVCmpxAndSaveexecSequence (MachineInstr &MI);
5558 bool optimizeVCMPSaveExecSequence (MachineInstr &SaveExecInstr,
@@ -325,11 +328,13 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
325328// Backwards-iterate from Origin (for n=MaxInstructions iterations) until either
326329// the beginning of the BB is reached or Pred evaluates to true - which can be
327330// an arbitrary condition based on the current MachineInstr, for instance an
328- // target instruction. Breaks prematurely by returning nullptr if one of the
331+ // target instruction. Breaks prematurely by returning nullptr if one of the
329332// registers given in NonModifiableRegs is modified by the current instruction.
330333MachineInstr *SIOptimizeExecMasking::findInstrBackwards (
331334 MachineInstr &Origin, std::function<bool (MachineInstr *)> Pred,
332- ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const {
335+ ArrayRef<MCRegister> NonModifiableRegs, MachineInstr *Terminator,
336+ SmallVectorImpl<MachineOperand *> *KillFlagCandidates,
337+ unsigned MaxInstructions) const {
333338 MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator (),
334339 E = Origin.getParent ()->rend ();
335340 unsigned CurrentIteration = 0 ;
@@ -344,6 +349,21 @@ MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
344349 for (MCRegister Reg : NonModifiableRegs) {
345350 if (A->modifiesRegister (Reg, TRI))
346351 return nullptr ;
352+
353+ // Check for kills that appear after the terminator instruction, that
354+ // would not be detected by clearKillFlags, since they will cause the
355+ // register to be dead at a later place, causing the verifier to fail.
356+ // We use the candidates to clear the kill flags later.
357+ if (Terminator && KillFlagCandidates && A != Terminator &&
358+ A->killsRegister (Reg, TRI)) {
359+ for (MachineOperand &MO : A->operands ()) {
360+ if (MO.isReg () && MO.isKill ()) {
361+ Register Candidate = MO.getReg ();
362+ if (Candidate != Reg && TRI->regsOverlap (Candidate, Reg))
363+ KillFlagCandidates->push_back (&MO);
364+ }
365+ }
366+ }
347367 }
348368
349369 ++CurrentIteration;
@@ -599,6 +619,9 @@ bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence(
599619 if (Src1->isReg ())
600620 MRI->clearKillFlags (Src1->getReg ());
601621
622+ for (MachineOperand *MO : KillFlagCandidates)
623+ MO->setIsKill (false );
624+
602625 SaveExecInstr.eraseFromParent ();
603626 VCmp.eraseFromParent ();
604627
@@ -690,7 +713,8 @@ void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence(
690713 NonDefRegs.push_back (Src1->getReg ());
691714
692715 if (!findInstrBackwards (
693- MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs))
716+ MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs,
717+ VCmp, &KillFlagCandidates))
694718 return ;
695719
696720 if (VCmp)
@@ -777,6 +801,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
777801
778802 OrXors.clear ();
779803 SaveExecVCmpMapping.clear ();
804+ KillFlagCandidates.clear ();
780805 static unsigned SearchWindow = 10 ;
781806 for (MachineBasicBlock &MBB : MF) {
782807 unsigned SearchCount = 0 ;
0 commit comments