@@ -7265,35 +7265,35 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
72657265 MachineOperand &Src1 = Inst.getOperand (2 );
72667266 MachineOperand &Cond = Inst.getOperand (3 );
72677267
7268- Register SCCSource = Cond.getReg ();
7269- bool IsSCC = (SCCSource == AMDGPU::SCC);
7268+ Register CondReg = Cond.getReg ();
7269+ bool IsSCC = (CondReg == AMDGPU::SCC);
72707270
72717271 // If this is a trivial select where the condition is effectively not SCC
7272- // (SCCSource is a source of copy to SCC), then the select is semantically
7273- // equivalent to copying SCCSource . Hence, there is no need to create
7272+ // (CondReg is a source of copy to SCC), then the select is semantically
7273+ // equivalent to copying CondReg . Hence, there is no need to create
72747274 // V_CNDMASK, we can just use that and bail out.
72757275 if (!IsSCC && Src0.isImm () && (Src0.getImm () == -1 ) && Src1.isImm () &&
72767276 (Src1.getImm () == 0 )) {
7277- MRI.replaceRegWith (Dest.getReg (), SCCSource );
7277+ MRI.replaceRegWith (Dest.getReg (), CondReg );
72787278 return ;
72797279 }
72807280
7281- const TargetRegisterClass *TC =
7282- RI.getRegClass (AMDGPU::SReg_1_XEXECRegClassID);
7283-
7284- Register CopySCC = MRI.createVirtualRegister (TC);
7285-
7281+ Register NewCondReg = CondReg;
72867282 if (IsSCC) {
7283+ const TargetRegisterClass *TC =
7284+ RI.getRegClass (AMDGPU::SReg_1_XEXECRegClassID);
7285+ NewCondReg = MRI.createVirtualRegister (TC);
7286+
72877287 // Now look for the closest SCC def if it is a copy
7288- // replacing the SCCSource with the COPY source register
7288+ // replacing the CondReg with the COPY source register
72897289 bool CopyFound = false ;
72907290 for (MachineInstr &CandI :
72917291 make_range (std::next (MachineBasicBlock::reverse_iterator (Inst)),
72927292 Inst.getParent ()->rend ())) {
72937293 if (CandI.findRegisterDefOperandIdx (AMDGPU::SCC, false , false , &RI) !=
72947294 -1 ) {
72957295 if (CandI.isCopy () && CandI.getOperand (0 ).getReg () == AMDGPU::SCC) {
7296- BuildMI (MBB, MII, DL, get (AMDGPU::COPY), CopySCC )
7296+ BuildMI (MBB, MII, DL, get (AMDGPU::COPY), NewCondReg )
72977297 .addReg (CandI.getOperand (1 ).getReg ());
72987298 CopyFound = true ;
72997299 }
@@ -7308,24 +7308,31 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
73087308 unsigned Opcode = (ST.getWavefrontSize () == 64 ) ? AMDGPU::S_CSELECT_B64
73097309 : AMDGPU::S_CSELECT_B32;
73107310 auto NewSelect =
7311- BuildMI (MBB, MII, DL, get (Opcode), CopySCC ).addImm (-1 ).addImm (0 );
7311+ BuildMI (MBB, MII, DL, get (Opcode), NewCondReg ).addImm (-1 ).addImm (0 );
73127312 NewSelect->getOperand (3 ).setIsUndef (Cond.isUndef ());
73137313 }
73147314 }
73157315
7316- Register ResultReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7317-
7318- auto UpdatedInst =
7319- BuildMI (MBB, MII, DL, get (AMDGPU::V_CNDMASK_B32_e64), ResultReg)
7320- .addImm (0 )
7321- .add (Src1) // False
7322- .addImm (0 )
7323- .add (Src0) // True
7324- .addReg (IsSCC ? CopySCC : SCCSource);
7325-
7326- MRI.replaceRegWith (Dest.getReg (), ResultReg);
7327- legalizeOperands (*UpdatedInst, MDT);
7328- addUsersToMoveToVALUWorklist (ResultReg, MRI, Worklist);
7316+ Register NewDestReg = MRI.createVirtualRegister (
7317+ RI.getEquivalentVGPRClass (MRI.getRegClass (Dest.getReg ())));
7318+ MachineInstr *NewInst;
7319+ if (Inst.getOpcode () == AMDGPU::S_CSELECT_B32) {
7320+ NewInst = BuildMI (MBB, MII, DL, get (AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7321+ .addImm (0 )
7322+ .add (Src1) // False
7323+ .addImm (0 )
7324+ .add (Src0) // True
7325+ .addReg (NewCondReg);
7326+ } else {
7327+ NewInst =
7328+ BuildMI (MBB, MII, DL, get (AMDGPU::V_CNDMASK_B64_PSEUDO), NewDestReg)
7329+ .add (Src1) // False
7330+ .add (Src0) // True
7331+ .addReg (NewCondReg);
7332+ }
7333+ MRI.replaceRegWith (Dest.getReg (), NewDestReg);
7334+ legalizeOperands (*NewInst, MDT);
7335+ addUsersToMoveToVALUWorklist (NewDestReg, MRI, Worklist);
73297336}
73307337
73317338void SIInstrInfo::lowerScalarAbs (SIInstrWorklist &Worklist,
0 commit comments