-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[X86] X86LowerTileCopy: Find dead register to use to prevent save-reload of tile register #83628
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: AtariDreams (AtariDreams) ChangesFull diff: https://github.com/llvm/llvm-project/pull/83628.diff 1 Files Affected:
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index e7afc49240e547..3d82360942ddbf 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -20,6 +20,7 @@
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -73,6 +74,7 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
const X86InstrInfo *TII = ST.getInstrInfo();
bool Changed = false;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
@@ -90,22 +92,54 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
unsigned Size = TRI->getSpillSize(X86::TILERegClass);
Align Alignment = TRI->getSpillAlign(X86::TILERegClass);
int TileSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
- // Allocate stack slot for stride register
- Size = TRI->getSpillSize(X86::GR64RegClass);
- Alignment = TRI->getSpillAlign(X86::GR64RegClass);
- int StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
+ int StrideSS = 0;
- // TODO: Pick a killed regiter to avoid save/reload. There is problem
+ LivePhysRegs UsedRegs(*TRI);
+ UsedRegs.addLiveOuts(MBB);
+
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ UsedRegs.addReg(CSRegs[i]);
+
+ auto InstUpToMI = MBB.end();
+ while (InstUpToMI != MI)
+ // The pre-decrement is on purpose here.
+ // We want to have the liveness right before I.
+ UsedRegs.stepBackward(*--InstUpToMI);
+
+ // Look for a temporary register to use.
+ BitVector GR64Regs =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(X86::GR64RegClassID));
+ // Pick a killed regiter to avoid save/reload. There is problem
// to get live interval in this stage.
Register GR64Cand = X86::RAX;
+ // Find the first available-register that is available
+ bool found = false;
+ for (auto RegT : GR64Regs.set_bits()) {
+ if (UsedRegs.available(MRI, RegT)) {
+ GR64Cand = RegT;
+ break;
+ }
+ }
+
const DebugLoc &DL = MI.getDebugLoc();
- // mov %rax (%sp)
- BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)), StrideSS)
- .addReg(GR64Cand);
- // mov 64 %rax
- BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
+ if (found) {
+ // mov 64 %reg
+ BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
+ } else {
+ // Allocate stack slot for stride register
+ Size = TRI->getSpillSize(X86::GR64RegClass);
+ Alignment = TRI->getSpillAlign(X86::GR64RegClass);
+ StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
+ // mov %rax (%sp)
+ BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)),
+ StrideSS)
+ .addReg(GR64Cand);
+ // mov 64 %rax
+ BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
+ }
// tilestored %tmm, (%sp, %idx)
#define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
@@ -120,10 +154,12 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
#undef GET_EGPR_IF_ENABLED
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
TileSS);
- // restore %rax
- // mov (%sp) %rax
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand),
- StrideSS);
+ if (!found) {
+ // restore %rax
+ // mov (%sp) %rax
+ addFrameReference(
+ BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand), StrideSS);
+ }
MI.eraseFromParent();
Changed = true;
}
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
test coverage?
10f95c2
to
edef42d
Compare
@AtariDreams Reverse ping~ I met a problem cased by #86973 which was confused by |
…oad of tile register
Thank you! I just made a few changes, and I think this is ready now! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
I don't have commit perms, just an FYI. |
Reverted due to large compile-time regressions, especially for |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @nikic!
I didn't think UsedRegs.stepBackward(MI);
is such expensive. I think we can early out with below code. How can we check it before commit?
bool Changed = false; | ||
|
||
for (MachineBasicBlock &MBB : MF) { | ||
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { | ||
LiveRegUnits UsedRegs(*TRI); | ||
UsedRegs.addLiveOuts(MBB); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can early out the loop by checking tile registers, e.g.,
BitVector TILERegs =
TRI->getAllocatableSet(MF, TRI->getRegClass(X86::TILERegClassID));
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
LiveRegUnits UsedRegs(*TRI);
UsedRegs.addLiveOuts(MBB);
// There won't be a tile copy if no tile register live out.
if (!TILERegs.anyCommon(UsedRegs.getBitVector())
continue;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we add if (!ST.hasAMXTILE()) return false
at line 75?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's a good idea.
…oad of tile register (llvm#83628)
…save-reload of tile register (llvm#83628)" This reverts commit 34acbb3. This change causes major compile-time regressions.
…save-reload of tile register (#83628)" Fixes compile time regression in previous commit.
We need to check if `GR64Cand` a valid register before using it. Test is not needed since it's covered in llvm-test-suite. Fixes llvm#90954
Another bug fix for llvm#83628.
No description provided.