-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Assert that we can find subregs in copyPhysReg. NFC. #70332
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This helped to catch a codegen failure caused by #69703. MachineVerifier did not complain about this malformed COPY either before regalloc: %9:vreg_64 = COPY %17:vgpr_32 Or after regalloc: renamable $vgpr0_vgpr1 = COPY renamable $vgpr2, implicit $exec But we can at least catch the problem when copyPhysReg tries to expand it into 32-bit register moves and fails to find suitable source registers: $vgpr0 = V_MOV_B32_e32 $noreg, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2 $vgpr1 = V_MOV_B32_e32 $noreg, implicit $exec, implicit $vgpr2, implicit $exec
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesThis helped to catch a codegen failure caused by #69703. MachineVerifier %9:vreg_64 = COPY %17:vgpr_32 Or after regalloc: renamable $vgpr0_vgpr1 = COPY renamable $vgpr2, implicit $exec But we can at least catch the problem when copyPhysReg tries to expand $vgpr0 = V_MOV_B32_e32 $noreg, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2 Full diff: https://github.com/llvm/llvm-project/pull/70332.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ffcd415a66648df..62f5a17635cee1a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -742,23 +742,27 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) {
int16_t SubIdx = BaseIndices[Idx];
- Register Reg = RI.getSubReg(DestReg, SubIdx);
+ Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
unsigned Opcode = AMDGPU::S_MOV_B32;
// Is SGPR aligned? If so try to combine with next.
- Register Src = RI.getSubReg(SrcReg, SubIdx);
- bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
- bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
+ bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
+ bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) {
// Can use SGPR64 copy
unsigned Channel = RI.getChannelFromSubReg(SubIdx);
SubIdx = RI.getSubRegFromChannel(Channel, 2);
+ DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
Opcode = AMDGPU::S_MOV_B64;
Idx++;
}
- LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
- .addReg(RI.getSubReg(SrcReg, SubIdx))
+ LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg)
+ .addReg(SrcSubReg)
.addReg(SrcReg, RegState::Implicit);
if (!FirstMI)
@@ -1098,6 +1102,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SubIdx = SubIndices[Idx];
else
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
+ Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
+ Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
+ assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
bool IsFirstSubreg = Idx == 0;
bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1;
@@ -1105,30 +1112,26 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register();
Register ImpUseSuper = SrcReg;
- indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
- RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap,
- ImpDefSuper, ImpUseSuper);
+ indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill,
+ *RS, Overlap, ImpDefSuper, ImpUseSuper);
} else if (Opcode == AMDGPU::V_PK_MOV_B32) {
- Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
- Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg)
- .addImm(SISrcMods::OP_SEL_1)
- .addReg(SrcSubReg)
- .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
- .addReg(SrcSubReg)
- .addImm(0) // op_sel_lo
- .addImm(0) // op_sel_hi
- .addImm(0) // neg_lo
- .addImm(0) // neg_hi
- .addImm(0) // clamp
- .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg)
+ .addImm(SISrcMods::OP_SEL_1)
+ .addReg(SrcSubReg)
+ .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
+ .addReg(SrcSubReg)
+ .addImm(0) // op_sel_lo
+ .addImm(0) // op_sel_hi
+ .addImm(0) // neg_lo
+ .addImm(0) // neg_hi
+ .addImm(0) // clamp
+ .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
if (IsFirstSubreg)
MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
} else {
MachineInstrBuilder Builder =
- BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx))
- .addReg(RI.getSubReg(SrcReg, SubIdx));
+ BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg);
if (IsFirstSubreg)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
|
Register DestSubReg = RI.getSubReg(DestReg, SubIdx); | ||
Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); | ||
assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did wonder about adding a new variant of getSubReg(MCRegister, unsigned)
that does the assert for you, but I couldn't think of a good name for it.
This helped to catch a codegen failure caused by #69703. MachineVerifier
did not complain about this malformed COPY either before regalloc:
%9:vreg_64 = COPY %17:vgpr_32
Or after regalloc:
renamable $vgpr0_vgpr1 = COPY renamable $vgpr2, implicit $exec
But we can at least catch the problem when copyPhysReg tries to expand
it into 32-bit register moves and fails to find suitable source
registers:
$vgpr0 = V_MOV_B32_e32 $noreg, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2
$vgpr1 = V_MOV_B32_e32 $noreg, implicit $exec, implicit $vgpr2, implicit $exec