@@ -7265,35 +7265,35 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
7265
7265
MachineOperand &Src1 = Inst.getOperand (2 );
7266
7266
MachineOperand &Cond = Inst.getOperand (3 );
7267
7267
7268
- Register SCCSource = Cond.getReg ();
7269
- bool IsSCC = (SCCSource == AMDGPU::SCC);
7268
+ Register CondReg = Cond.getReg ();
7269
+ bool IsSCC = (CondReg == AMDGPU::SCC);
7270
7270
7271
7271
// If this is a trivial select where the condition is effectively not SCC
7272
- // (SCCSource is a source of copy to SCC), then the select is semantically
7273
- // equivalent to copying SCCSource . Hence, there is no need to create
7272
+ // (CondReg is a source of copy to SCC), then the select is semantically
7273
+ // equivalent to copying CondReg . Hence, there is no need to create
7274
7274
// V_CNDMASK, we can just use that and bail out.
7275
7275
if (!IsSCC && Src0.isImm () && (Src0.getImm () == -1 ) && Src1.isImm () &&
7276
7276
(Src1.getImm () == 0 )) {
7277
- MRI.replaceRegWith (Dest.getReg (), SCCSource );
7277
+ MRI.replaceRegWith (Dest.getReg (), CondReg );
7278
7278
return ;
7279
7279
}
7280
7280
7281
- const TargetRegisterClass *TC =
7282
- RI.getRegClass (AMDGPU::SReg_1_XEXECRegClassID);
7283
-
7284
- Register CopySCC = MRI.createVirtualRegister (TC);
7285
-
7281
+ Register NewCondReg = CondReg;
7286
7282
if (IsSCC) {
7283
+ const TargetRegisterClass *TC =
7284
+ RI.getRegClass (AMDGPU::SReg_1_XEXECRegClassID);
7285
+ NewCondReg = MRI.createVirtualRegister (TC);
7286
+
7287
7287
// Now look for the closest SCC def if it is a copy
7288
- // replacing the SCCSource with the COPY source register
7288
+ // replacing the CondReg with the COPY source register
7289
7289
bool CopyFound = false ;
7290
7290
for (MachineInstr &CandI :
7291
7291
make_range (std::next (MachineBasicBlock::reverse_iterator (Inst)),
7292
7292
Inst.getParent ()->rend ())) {
7293
7293
if (CandI.findRegisterDefOperandIdx (AMDGPU::SCC, false , false , &RI) !=
7294
7294
-1 ) {
7295
7295
if (CandI.isCopy () && CandI.getOperand (0 ).getReg () == AMDGPU::SCC) {
7296
- BuildMI (MBB, MII, DL, get (AMDGPU::COPY), CopySCC )
7296
+ BuildMI (MBB, MII, DL, get (AMDGPU::COPY), NewCondReg )
7297
7297
.addReg (CandI.getOperand (1 ).getReg ());
7298
7298
CopyFound = true ;
7299
7299
}
@@ -7308,24 +7308,31 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
7308
7308
unsigned Opcode = (ST.getWavefrontSize () == 64 ) ? AMDGPU::S_CSELECT_B64
7309
7309
: AMDGPU::S_CSELECT_B32;
7310
7310
auto NewSelect =
7311
- BuildMI (MBB, MII, DL, get (Opcode), CopySCC ).addImm (-1 ).addImm (0 );
7311
+ BuildMI (MBB, MII, DL, get (Opcode), NewCondReg ).addImm (-1 ).addImm (0 );
7312
7312
NewSelect->getOperand (3 ).setIsUndef (Cond.isUndef ());
7313
7313
}
7314
7314
}
7315
7315
7316
- Register ResultReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7317
-
7318
- auto UpdatedInst =
7319
- BuildMI (MBB, MII, DL, get (AMDGPU::V_CNDMASK_B32_e64), ResultReg)
7320
- .addImm (0 )
7321
- .add (Src1) // False
7322
- .addImm (0 )
7323
- .add (Src0) // True
7324
- .addReg (IsSCC ? CopySCC : SCCSource);
7325
-
7326
- MRI.replaceRegWith (Dest.getReg (), ResultReg);
7327
- legalizeOperands (*UpdatedInst, MDT);
7328
- addUsersToMoveToVALUWorklist (ResultReg, MRI, Worklist);
7316
+ Register NewDestReg = MRI.createVirtualRegister (
7317
+ RI.getEquivalentVGPRClass (MRI.getRegClass (Dest.getReg ())));
7318
+ MachineInstr *NewInst;
7319
+ if (Inst.getOpcode () == AMDGPU::S_CSELECT_B32) {
7320
+ NewInst = BuildMI (MBB, MII, DL, get (AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7321
+ .addImm (0 )
7322
+ .add (Src1) // False
7323
+ .addImm (0 )
7324
+ .add (Src0) // True
7325
+ .addReg (NewCondReg);
7326
+ } else {
7327
+ NewInst =
7328
+ BuildMI (MBB, MII, DL, get (AMDGPU::V_CNDMASK_B64_PSEUDO), NewDestReg)
7329
+ .add (Src1) // False
7330
+ .add (Src0) // True
7331
+ .addReg (NewCondReg);
7332
+ }
7333
+ MRI.replaceRegWith (Dest.getReg (), NewDestReg);
7334
+ legalizeOperands (*NewInst, MDT);
7335
+ addUsersToMoveToVALUWorklist (NewDestReg, MRI, Worklist);
7329
7336
}
7330
7337
7331
7338
void SIInstrInfo::lowerScalarAbs (SIInstrWorklist &Worklist,
0 commit comments