Skip to content

Commit e32c4df

Browse files
committed
Revert "[X86] X86LowerTileCopy: Find dead register to use to prevent save-reload of tile register (#83628)"
This reverts commit 34acbb3. This change causes major compile-time regressions.
1 parent fa01d04 commit e32c4df

File tree

2 files changed

+29
-42
lines changed

2 files changed

+29
-42
lines changed

llvm/lib/Target/X86/X86LowerTileCopy.cpp

Lines changed: 19 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "X86InstrBuilder.h"
2121
#include "X86InstrInfo.h"
2222
#include "X86Subtarget.h"
23-
#include "llvm/CodeGen/LiveRegUnits.h"
2423
#include "llvm/CodeGen/MachineBasicBlock.h"
2524
#include "llvm/CodeGen/MachineFrameInfo.h"
2625
#include "llvm/CodeGen/MachineFunction.h"
@@ -73,16 +72,10 @@ FunctionPass *llvm::createX86LowerTileCopyPass() {
7372
bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
7473
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
7574
const X86InstrInfo *TII = ST.getInstrInfo();
76-
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
77-
BitVector GR64Regs =
78-
TRI->getAllocatableSet(MF, TRI->getRegClass(X86::GR64RegClassID));
7975
bool Changed = false;
8076

8177
for (MachineBasicBlock &MBB : MF) {
82-
LiveRegUnits UsedRegs(*TRI);
83-
UsedRegs.addLiveOuts(MBB);
84-
for (MachineInstr &MI : llvm::make_early_inc_range(reverse(MBB))) {
85-
UsedRegs.stepBackward(MI);
78+
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
8679
if (!MI.isCopy())
8780
continue;
8881
MachineOperand &DstMO = MI.getOperand(0);
@@ -92,41 +85,27 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
9285
if (!X86::TILERegClass.contains(DstReg, SrcReg))
9386
continue;
9487

88+
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
9589
// Allocate stack slot for tile register
9690
unsigned Size = TRI->getSpillSize(X86::TILERegClass);
9791
Align Alignment = TRI->getSpillAlign(X86::TILERegClass);
9892
int TileSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
93+
// Allocate stack slot for stride register
94+
Size = TRI->getSpillSize(X86::GR64RegClass);
95+
Alignment = TRI->getSpillAlign(X86::GR64RegClass);
96+
int StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
9997

100-
int StrideSS = 0;
101-
102-
// Pick a killed register to avoid a save/reload.
103-
Register GR64Cand = X86::NoRegister;
104-
for (auto RegT : GR64Regs.set_bits()) {
105-
if (UsedRegs.available(RegT)) {
106-
GR64Cand = RegT;
107-
break;
108-
}
109-
}
98+
// TODO: Pick a killed regiter to avoid save/reload. There is problem
99+
// to get live interval in this stage.
100+
Register GR64Cand = X86::RAX;
110101

111102
const DebugLoc &DL = MI.getDebugLoc();
112-
if (GR64Cand) {
113-
// mov 64 %reg
114-
BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
115-
} else {
116-
// No available register? Save RAX and reload it after use.
117-
118-
// Allocate stack slot for stride register
119-
Size = TRI->getSpillSize(X86::GR64RegClass);
120-
Alignment = TRI->getSpillAlign(X86::GR64RegClass);
121-
StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
122-
123-
// mov %reg (%sp)
124-
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)),
125-
StrideSS)
126-
.addReg(X86::RAX);
127-
// mov 64 %reg
128-
BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), X86::RAX).addImm(64);
129-
}
103+
// mov %rax (%sp)
104+
BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand);
105+
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)), StrideSS)
106+
.addReg(GR64Cand);
107+
// mov 64 %rax
108+
BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
130109
// tilestored %tmm, (%sp, %idx)
131110
#define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
132111
unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
@@ -141,12 +120,10 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
141120
#undef GET_EGPR_IF_ENABLED
142121
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
143122
TileSS);
144-
if (!GR64Cand) {
145-
// restore %rax
146-
// mov (%sp) %rax
147-
addFrameReference(
148-
BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand), StrideSS);
149-
}
123+
// restore %rax
124+
// mov (%sp) %rax
125+
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand),
126+
StrideSS);
150127
MI.eraseFromParent();
151128
Changed = true;
152129
}

llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,12 @@ define dso_local void @test1(ptr%buf) nounwind {
4444
; CHECK-NEXT: tileloadd 3024(%rsp,%rax), %tmm3 # 1024-byte Folded Reload
4545
; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm0
4646
; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm1
47+
; CHECK-NEXT: # implicit-def: $rax
48+
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
49+
; CHECK-NEXT: movabsq $64, %rax
4750
; CHECK-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
4851
; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
52+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
4953
; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm2
5054
; CHECK-NEXT: tilestored %tmm2, (%rbx,%r15)
5155
; CHECK-NEXT: incl %r14d
@@ -107,10 +111,16 @@ define dso_local void @test1(ptr%buf) nounwind {
107111
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x04,0xd0,0x0b,0x00,0x00]
108112
; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x3b]
109113
; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
114+
; EGPR-NEXT: # implicit-def: $rax
115+
; EGPR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
116+
; EGPR-NEXT: # encoding: [0x48,0x89,0x84,0x24,0xb8,0x03,0x00,0x00]
117+
; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
110118
; EGPR-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
111119
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0x00,0x04,0x00,0x00]
112120
; EGPR-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
113121
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x24,0x00,0x04,0x00,0x00]
122+
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
123+
; EGPR-NEXT: # encoding: [0x48,0x8b,0x84,0x24,0xb8,0x03,0x00,0x00]
114124
; EGPR-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
115125
; EGPR-NEXT: tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
116126
; EGPR-NEXT: incl %r14d # encoding: [0x41,0xff,0xc6]

0 commit comments

Comments
 (0)