Skip to content

Commit 7caff73

Browse files
authored
[AMDGPU] Assert that we can find subregs in copyPhysReg. NFC. (#70332)
This helped to catch a codegen failure caused by #69703. MachineVerifier did not complain about this malformed COPY either before regalloc: %9:vreg_64 = COPY %17:vgpr_32 Or after regalloc: renamable $vgpr0_vgpr1 = COPY renamable $vgpr2, implicit $exec But we can at least catch the problem when copyPhysReg tries to expand it into 32-bit register moves and fails to find suitable source registers: $vgpr0 = V_MOV_B32_e32 $noreg, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2 $vgpr1 = V_MOV_B32_e32 $noreg, implicit $exec, implicit $vgpr2, implicit $exec
1 parent 4afe550 commit 7caff73

File tree

1 file changed

+27
-24
lines changed

1 file changed

+27
-24
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -742,23 +742,27 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
742742

743743
for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) {
744744
int16_t SubIdx = BaseIndices[Idx];
745-
Register Reg = RI.getSubReg(DestReg, SubIdx);
745+
Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
746+
Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
747+
assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
746748
unsigned Opcode = AMDGPU::S_MOV_B32;
747749

748750
// Is SGPR aligned? If so try to combine with next.
749-
Register Src = RI.getSubReg(SrcReg, SubIdx);
750-
bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
751-
bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
751+
bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
752+
bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
752753
if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) {
753754
// Can use SGPR64 copy
754755
unsigned Channel = RI.getChannelFromSubReg(SubIdx);
755756
SubIdx = RI.getSubRegFromChannel(Channel, 2);
757+
DestSubReg = RI.getSubReg(DestReg, SubIdx);
758+
SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
759+
assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
756760
Opcode = AMDGPU::S_MOV_B64;
757761
Idx++;
758762
}
759763

760-
LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
761-
.addReg(RI.getSubReg(SrcReg, SubIdx))
764+
LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg)
765+
.addReg(SrcSubReg)
762766
.addReg(SrcReg, RegState::Implicit);
763767

764768
if (!FirstMI)
@@ -1098,37 +1102,36 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
10981102
SubIdx = SubIndices[Idx];
10991103
else
11001104
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
1105+
Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1106+
Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1107+
assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
11011108

11021109
bool IsFirstSubreg = Idx == 0;
11031110
bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1;
11041111

11051112
if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
11061113
Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register();
11071114
Register ImpUseSuper = SrcReg;
1108-
indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
1109-
RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap,
1110-
ImpDefSuper, ImpUseSuper);
1115+
indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill,
1116+
*RS, Overlap, ImpDefSuper, ImpUseSuper);
11111117
} else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1112-
Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
1113-
Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
11141118
MachineInstrBuilder MIB =
1115-
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg)
1116-
.addImm(SISrcMods::OP_SEL_1)
1117-
.addReg(SrcSubReg)
1118-
.addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
1119-
.addReg(SrcSubReg)
1120-
.addImm(0) // op_sel_lo
1121-
.addImm(0) // op_sel_hi
1122-
.addImm(0) // neg_lo
1123-
.addImm(0) // neg_hi
1124-
.addImm(0) // clamp
1125-
.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
1119+
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg)
1120+
.addImm(SISrcMods::OP_SEL_1)
1121+
.addReg(SrcSubReg)
1122+
.addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
1123+
.addReg(SrcSubReg)
1124+
.addImm(0) // op_sel_lo
1125+
.addImm(0) // op_sel_hi
1126+
.addImm(0) // neg_lo
1127+
.addImm(0) // neg_hi
1128+
.addImm(0) // clamp
1129+
.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
11261130
if (IsFirstSubreg)
11271131
MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
11281132
} else {
11291133
MachineInstrBuilder Builder =
1130-
BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx))
1131-
.addReg(RI.getSubReg(SrcReg, SubIdx));
1134+
BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg);
11321135
if (IsFirstSubreg)
11331136
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
11341137

0 commit comments

Comments
 (0)