20
20
#include " X86InstrBuilder.h"
21
21
#include " X86InstrInfo.h"
22
22
#include " X86Subtarget.h"
23
+ #include " llvm/CodeGen/LiveRegUnits.h"
23
24
#include " llvm/CodeGen/MachineBasicBlock.h"
24
25
#include " llvm/CodeGen/MachineFrameInfo.h"
25
26
#include " llvm/CodeGen/MachineFunction.h"
@@ -72,10 +73,16 @@ FunctionPass *llvm::createX86LowerTileCopyPass() {
72
73
bool X86LowerTileCopy::runOnMachineFunction (MachineFunction &MF) {
73
74
const X86Subtarget &ST = MF.getSubtarget <X86Subtarget>();
74
75
const X86InstrInfo *TII = ST.getInstrInfo ();
76
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo ();
77
+ BitVector GR64Regs =
78
+ TRI->getAllocatableSet (MF, TRI->getRegClass (X86::GR64RegClassID));
75
79
bool Changed = false ;
76
80
77
81
for (MachineBasicBlock &MBB : MF) {
78
- for (MachineInstr &MI : llvm::make_early_inc_range (MBB)) {
82
+ LiveRegUnits UsedRegs (*TRI);
83
+ UsedRegs.addLiveOuts (MBB);
84
+ for (MachineInstr &MI : llvm::make_early_inc_range (reverse (MBB))) {
85
+ UsedRegs.stepBackward (MI);
79
86
if (!MI.isCopy ())
80
87
continue ;
81
88
MachineOperand &DstMO = MI.getOperand (0 );
@@ -85,27 +92,41 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
85
92
if (!X86::TILERegClass.contains (DstReg, SrcReg))
86
93
continue ;
87
94
88
- const TargetRegisterInfo *TRI = ST.getRegisterInfo ();
89
95
// Allocate stack slot for tile register
90
96
unsigned Size = TRI->getSpillSize (X86::TILERegClass);
91
97
Align Alignment = TRI->getSpillAlign (X86::TILERegClass);
92
98
int TileSS = MF.getFrameInfo ().CreateSpillStackObject (Size , Alignment);
93
- // Allocate stack slot for stride register
94
- Size = TRI->getSpillSize (X86::GR64RegClass);
95
- Alignment = TRI->getSpillAlign (X86::GR64RegClass);
96
- int StrideSS = MF.getFrameInfo ().CreateSpillStackObject (Size , Alignment);
97
99
98
- // TODO: Pick a killed regiter to avoid save/reload. There is problem
99
- // to get live interval in this stage.
100
- Register GR64Cand = X86::RAX;
100
+ int StrideSS = 0 ;
101
+
102
+ // Pick a killed register to avoid a save/reload.
103
+ Register GR64Cand = X86::NoRegister;
104
+ for (auto RegT : GR64Regs.set_bits ()) {
105
+ if (UsedRegs.available (RegT)) {
106
+ GR64Cand = RegT;
107
+ break ;
108
+ }
109
+ }
101
110
102
111
const DebugLoc &DL = MI.getDebugLoc ();
103
- // mov %rax (%sp)
104
- BuildMI (MBB, MI, DL, TII->get (X86::IMPLICIT_DEF), GR64Cand);
105
- addFrameReference (BuildMI (MBB, MI, DL, TII->get (X86::MOV64mr)), StrideSS)
106
- .addReg (GR64Cand);
107
- // mov 64 %rax
108
- BuildMI (MBB, MI, DL, TII->get (X86::MOV64ri), GR64Cand).addImm (64 );
112
+ if (GR64Cand) {
113
+ // mov 64 %reg
114
+ BuildMI (MBB, MI, DL, TII->get (X86::MOV64ri), GR64Cand).addImm (64 );
115
+ } else {
116
+ // No available register? Save RAX and reload it after use.
117
+
118
+ // Allocate stack slot for stride register
119
+ Size = TRI->getSpillSize (X86::GR64RegClass);
120
+ Alignment = TRI->getSpillAlign (X86::GR64RegClass);
121
+ StrideSS = MF.getFrameInfo ().CreateSpillStackObject (Size , Alignment);
122
+
123
+ // mov %reg (%sp)
124
+ addFrameReference (BuildMI (MBB, MI, DL, TII->get (X86::MOV64mr)),
125
+ StrideSS)
126
+ .addReg (X86::RAX);
127
+ // mov 64 %reg
128
+ BuildMI (MBB, MI, DL, TII->get (X86::MOV64ri), X86::RAX).addImm (64 );
129
+ }
109
130
// tilestored %tmm, (%sp, %idx)
110
131
#define GET_EGPR_IF_ENABLED (OPC ) (ST.hasEGPR() ? OPC##_EVEX : OPC)
111
132
unsigned Opc = GET_EGPR_IF_ENABLED (X86::TILESTORED);
@@ -120,10 +141,12 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
120
141
#undef GET_EGPR_IF_ENABLED
121
142
NewMI = addFrameReference (BuildMI (MBB, MI, DL, TII->get (Opc), DstReg),
122
143
TileSS);
123
- // restore %rax
124
- // mov (%sp) %rax
125
- addFrameReference (BuildMI (MBB, MI, DL, TII->get (X86::MOV64rm), GR64Cand),
126
- StrideSS);
144
+ if (!GR64Cand) {
145
+ // restore %rax
146
+ // mov (%sp) %rax
147
+ addFrameReference (
148
+ BuildMI (MBB, MI, DL, TII->get (X86::MOV64rm), GR64Cand), StrideSS);
149
+ }
127
150
MI.eraseFromParent ();
128
151
Changed = true ;
129
152
}
0 commit comments