Skip to content

Commit e5c93ed

Browse files
authored
[X86][AMX] Checking AMXProgModel in X86LowerTileCopy (#94358)
This fixes compile time regression after #93692.
1 parent e49f902 commit e5c93ed

File tree

5 files changed

+37
-22
lines changed

5 files changed

+37
-22
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5120,6 +5120,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
51205120
case Intrinsic::x86_tileloaddt164_internal: {
51215121
if (!Subtarget->hasAMXTILE())
51225122
break;
5123+
auto *MFI =
5124+
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5125+
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
51235126
unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
51245127
? X86::PTILELOADDV
51255128
: X86::PTILELOADDT1V;
@@ -5201,6 +5204,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
52015204
break;
52025205
}
52035206
case Intrinsic::x86_tilestored64_internal: {
5207+
auto *MFI =
5208+
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5209+
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
52045210
unsigned Opc = X86::PTILESTOREDV;
52055211
// _tile_stored_internal(row, col, buf, STRIDE, c)
52065212
SDValue Base = Node->getOperand(4);
@@ -5228,6 +5234,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
52285234
case Intrinsic::x86_tilestored64: {
52295235
if (!Subtarget->hasAMXTILE())
52305236
break;
5237+
auto *MFI =
5238+
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5239+
MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
52315240
unsigned Opc;
52325241
switch (IntNo) {
52335242
default: llvm_unreachable("Unexpected intrinsic!");

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26790,7 +26790,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2679026790
case Intrinsic::swift_async_context_addr: {
2679126791
SDLoc dl(Op);
2679226792
auto &MF = DAG.getMachineFunction();
26793-
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
26793+
auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2679426794
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) {
2679526795
MF.getFrameInfo().setFrameAddressIsTaken(true);
2679626796
X86FI->setHasSwiftAsyncContext(true);
@@ -36795,7 +36795,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3679536795
}
3679636796
case TargetOpcode::PREALLOCATED_SETUP: {
3679736797
assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
36798-
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
36798+
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
3679936799
MFI->setHasPreallocatedCall(true);
3680036800
int64_t PreallocatedId = MI.getOperand(0).getImm();
3680136801
size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
@@ -36812,7 +36812,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3681236812
assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
3681336813
int64_t PreallocatedId = MI.getOperand(1).getImm();
3681436814
int64_t ArgIdx = MI.getOperand(2).getImm();
36815-
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
36815+
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
3681636816
size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
3681736817
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
3681836818
<< ", arg offset " << ArgOffset << "\n");
@@ -36855,6 +36855,13 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3685536855
unsigned Imm = MI.getOperand(0).getImm();
3685636856
BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
3685736857
MI.eraseFromParent(); // The pseudo is gone now.
36858+
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
36859+
MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
36860+
return BB;
36861+
}
36862+
case X86::PTILEZEROV: {
36863+
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
36864+
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
3685836865
return BB;
3685936866
}
3686036867
case X86::PTILELOADD:

llvm/lib/Target/X86/X86InstrAMX.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ let SchedRW = [WriteSystem] in {
7474
GR16:$src2, opaquemem:$src3,
7575
TILE:$src4), []>;
7676
let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
77-
canFoldAsLoad = 1 in
77+
canFoldAsLoad = 1, usesCustomInserter = 1 in
7878
def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
7979
[(set TILE:$dst, (int_x86_tilezero_internal
8080
GR16:$src1, GR16:$src2))]>;

llvm/lib/Target/X86/X86LowerTileCopy.cpp

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "X86.h"
2020
#include "X86InstrBuilder.h"
2121
#include "X86InstrInfo.h"
22+
#include "X86MachineFunctionInfo.h"
2223
#include "X86Subtarget.h"
2324
#include "llvm/CodeGen/LiveRegUnits.h"
2425
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -71,6 +72,10 @@ FunctionPass *llvm::createX86LowerTileCopyPass() {
7172
}
7273

7374
bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
75+
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
76+
if (FuncInfo->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
77+
return false;
78+
7479
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
7580
const X86InstrInfo *TII = ST.getInstrInfo();
7681
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
@@ -81,26 +86,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
8186
bool Changed = false;
8287

8388
for (MachineBasicBlock &MBB : MF) {
84-
// There won't be a tile copy if neither tile register live in nor live out.
85-
bool HasTileCopy = false;
86-
for (const auto &LI : MBB.liveins()) {
87-
if (TILERegs.test(LI.PhysReg)) {
88-
HasTileCopy = true;
89-
break;
90-
}
91-
}
9289
LiveRegUnits UsedRegs(*TRI);
9390
UsedRegs.addLiveOuts(MBB);
94-
if (!HasTileCopy) {
95-
for (auto RegT : TILERegs.set_bits()) {
96-
if (UsedRegs.available(RegT)) {
97-
HasTileCopy = true;
98-
break;
99-
}
100-
}
101-
}
102-
if (!HasTileCopy)
103-
continue;
10491
for (MachineInstr &MI : llvm::make_early_inc_range(reverse(MBB))) {
10592
UsedRegs.stepBackward(MI);
10693
if (!MI.isCopy())

llvm/lib/Target/X86/X86MachineFunctionInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
namespace llvm {
2323

24+
enum AMXProgModelEnum { None = 0, DirectReg = 1, ManagedRA = 2 };
25+
2426
/// X86MachineFunctionInfo - This class is derived from MachineFunction and
2527
/// contains private X86 target-specific information for each MachineFunction.
2628
class X86MachineFunctionInfo : public MachineFunctionInfo {
@@ -96,6 +98,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
9698
/// used to address arguments in a function using a base pointer.
9799
int SEHFramePtrSaveIndex = 0;
98100

101+
/// The AMX programing model used in the function.
102+
AMXProgModelEnum AMXProgModel = AMXProgModelEnum::None;
103+
99104
/// True if this function has a subset of CSRs that is handled explicitly via
100105
/// copies.
101106
bool IsSplitCSR = false;
@@ -219,6 +224,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
219224
int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; }
220225
void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; }
221226

227+
AMXProgModelEnum getAMXProgModel() const { return AMXProgModel; }
228+
void setAMXProgModel(AMXProgModelEnum Model) {
229+
assert((AMXProgModel == AMXProgModelEnum::None || AMXProgModel == Model) &&
230+
"mixed model is not supported");
231+
AMXProgModel = Model;
232+
}
233+
222234
SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
223235
return ForwardedMustTailRegParms;
224236
}

0 commit comments

Comments
 (0)