-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU: Extract lambda used in foldImmediate into a helper function #127484
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Extract lambda used in foldImmediate into a helper function #127484
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesIt was also too permissive for a more general utilty, only return Full diff: https://github.com/llvm/llvm-project/pull/127484.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4cb07b1df04ce..b5f36f67a37ac 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
}
}
+std::optional<int64_t> SIInstrInfo::extractSubregFromImm(int64_t Imm,
+ unsigned SubRegIndex) {
+ switch (SubRegIndex) {
+ case AMDGPU::NoSubRegister:
+ return Imm;
+ case AMDGPU::sub0:
+ return Lo_32(Imm);
+ case AMDGPU::sub1:
+ return Hi_32(Imm);
+ case AMDGPU::lo16:
+ return SignExtend64<16>(Imm);
+ case AMDGPU::hi16:
+ return SignExtend64<16>(Imm >> 16);
+ case AMDGPU::sub1_lo16:
+ return SignExtend64<16>(Imm >> 32);
+ case AMDGPU::sub1_hi16:
+ return SignExtend64<16>(Imm >> 48);
+ default:
+ return std::nullopt;
+ }
+
+ llvm_unreachable("covered subregister switch");
+}
+
bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg, MachineRegisterInfo *MRI) const {
if (!MRI->hasOneNonDBGUse(Reg))
@@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (!getConstValDefinedInReg(DefMI, Reg, Imm))
return false;
- auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t {
- switch (UseOp.getSubReg()) {
- default:
- return Imm;
- case AMDGPU::sub0:
- return Lo_32(Imm);
- case AMDGPU::sub1:
- return Hi_32(Imm);
- case AMDGPU::lo16:
- return SignExtend64<16>(Imm);
- case AMDGPU::hi16:
- return SignExtend64<16>(Imm >> 16);
- case AMDGPU::sub1_lo16:
- return SignExtend64<16>(Imm >> 32);
- case AMDGPU::sub1_hi16:
- return SignExtend64<16>(Imm >> 48);
- }
- };
-
assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form");
unsigned Opc = UseMI.getOpcode();
@@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
: AMDGPU::V_MOV_B32_e32
: Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
: AMDGPU::S_MOV_B32;
- APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)),
+
+ std::optional<int64_t> SubRegImm =
+ extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg());
+
+ APInt Imm(Is64Bit ? 64 : 32, *SubRegImm,
/*isSigned=*/true, /*implicitTrunc=*/true);
if (RI.isAGPR(*MRI, DstReg)) {
@@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
return false;
- const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
+ const std::optional<int64_t> SubRegImm = extractSubregFromImm(
+ Imm, RegSrc == Src1 ? Src0->getSubReg() : Src1->getSubReg());
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
@@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
- Src1->ChangeToImmediate(Imm);
+ Src1->ChangeToImmediate(*SubRegImm);
removeModOperands(UseMI);
UseMI.setDesc(get(NewOpc));
@@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ const std::optional<int64_t> SubRegImm =
+ extractSubregFromImm(Imm, Src2->getSubReg());
+
// ChangingToImmediate adds Src2 back to the instruction.
- Src2->ChangeToImmediate(getImmFor(*Src2));
+ Src2->ChangeToImmediate(*SubRegImm);
// These come before src2.
removeModOperands(UseMI);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index ddd15e1766f70..06dbdf65e458f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
void removeModOperands(MachineInstr &MI) const;
+ /// Return the extracted immediate value in a subregister use from a constant
+ /// materialized in a super register.
+ ///
+ /// e.g. %imm = S_MOV_B64 K[0:63]
+ /// USE %imm.sub1
+ /// This will return k[32:63]
+ static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
+ unsigned SubRegIndex);
+
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const final;
|
return std::nullopt; | ||
} | ||
|
||
llvm_unreachable("covered subregister switch"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we really need this to avoid compiler warning?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this should be the version that makes msvc, gcc, and clang happy
@@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, | |||
if (!getConstValDefinedInReg(DefMI, Reg, Imm)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[Re: line +3469]
nit: I'd still prefer to initialize it even though if getConstValDefinedInReg
returns true
, it will be initialized.
See this comment inline on Graphite.
4fb69a7
to
aa2d8fa
Compare
b6bc148
to
5eada95
Compare
aa2d8fa
to
62a2f1b
Compare
5eada95
to
5d9454b
Compare
It was also too permissive for a more general utilty, only return the original immediate if there is no subregister.
Co-authored-by: Shilei Tian <[email protected]>
5d9454b
to
d58106e
Compare
…lvm#127484) It was also too permissive for a more general utilty, only return the original immediate if there is no subregister.
It was also too permissive for a more general utilty, only return
the original immediate if there is no subregister.