Skip to content

[AMDGPU] Assert that we can find subregs in copyPhysReg. NFC. #70332

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 26, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 27 additions & 24 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,23 +742,27 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,

for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) {
int16_t SubIdx = BaseIndices[Idx];
Register Reg = RI.getSubReg(DestReg, SubIdx);
Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
Comment on lines +745 to +747
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did wonder about adding a new variant of getSubReg(MCRegister, unsigned) that does the assert for you, but I couldn't think of a good name for it.

unsigned Opcode = AMDGPU::S_MOV_B32;

// Is SGPR aligned? If so try to combine with next.
Register Src = RI.getSubReg(SrcReg, SubIdx);
bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) {
// Can use SGPR64 copy
unsigned Channel = RI.getChannelFromSubReg(SubIdx);
SubIdx = RI.getSubRegFromChannel(Channel, 2);
DestSubReg = RI.getSubReg(DestReg, SubIdx);
SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
assert(DestSubReg && SrcSubReg && "Failed to find subregs!");
Opcode = AMDGPU::S_MOV_B64;
Idx++;
}

LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
.addReg(RI.getSubReg(SrcReg, SubIdx))
LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg)
.addReg(SrcSubReg)
.addReg(SrcReg, RegState::Implicit);

if (!FirstMI)
Expand Down Expand Up @@ -1098,37 +1102,36 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SubIdx = SubIndices[Idx];
else
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
assert(DestSubReg && SrcSubReg && "Failed to find subregs!");

bool IsFirstSubreg = Idx == 0;
bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1;

if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register();
Register ImpUseSuper = SrcReg;
indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap,
ImpDefSuper, ImpUseSuper);
indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill,
*RS, Overlap, ImpDefSuper, ImpUseSuper);
} else if (Opcode == AMDGPU::V_PK_MOV_B32) {
Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg)
.addImm(SISrcMods::OP_SEL_1)
.addReg(SrcSubReg)
.addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
.addReg(SrcSubReg)
.addImm(0) // op_sel_lo
.addImm(0) // op_sel_hi
.addImm(0) // neg_lo
.addImm(0) // neg_hi
.addImm(0) // clamp
.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg)
.addImm(SISrcMods::OP_SEL_1)
.addReg(SrcSubReg)
.addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
.addReg(SrcSubReg)
.addImm(0) // op_sel_lo
.addImm(0) // op_sel_hi
.addImm(0) // neg_lo
.addImm(0) // neg_hi
.addImm(0) // clamp
.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
if (IsFirstSubreg)
MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
} else {
MachineInstrBuilder Builder =
BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx))
.addReg(RI.getSubReg(SrcReg, SubIdx));
BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg);
if (IsFirstSubreg)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);

Expand Down