Skip to content

[X86][AMX] Checking AMXProgModel in X86LowerTileCopy #94358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5120,6 +5120,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_tileloaddt164_internal: {
if (!Subtarget->hasAMXTILE())
break;
auto *MFI =
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
? X86::PTILELOADDV
: X86::PTILELOADDT1V;
Expand Down Expand Up @@ -5201,6 +5204,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
case Intrinsic::x86_tilestored64_internal: {
auto *MFI =
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
unsigned Opc = X86::PTILESTOREDV;
// _tile_stored_internal(row, col, buf, STRIDE, c)
SDValue Base = Node->getOperand(4);
Expand Down Expand Up @@ -5228,6 +5234,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_tilestored64: {
if (!Subtarget->hasAMXTILE())
break;
auto *MFI =
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26776,7 +26776,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
case Intrinsic::swift_async_context_addr: {
SDLoc dl(Op);
auto &MF = DAG.getMachineFunction();
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) {
MF.getFrameInfo().setFrameAddressIsTaken(true);
X86FI->setHasSwiftAsyncContext(true);
Expand Down Expand Up @@ -36781,7 +36781,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
case TargetOpcode::PREALLOCATED_SETUP: {
assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setHasPreallocatedCall(true);
int64_t PreallocatedId = MI.getOperand(0).getImm();
size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
Expand All @@ -36798,7 +36798,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
int64_t PreallocatedId = MI.getOperand(1).getImm();
int64_t ArgIdx = MI.getOperand(2).getImm();
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
<< ", arg offset " << ArgOffset << "\n");
Expand Down Expand Up @@ -36841,6 +36841,13 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned Imm = MI.getOperand(0).getImm();
BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
MI.eraseFromParent(); // The pseudo is gone now.
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
return BB;
}
case X86::PTILEZEROV: {
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
return BB;
}
case X86::PTILELOADD:
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrAMX.td
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ let SchedRW = [WriteSystem] in {
GR16:$src2, opaquemem:$src3,
TILE:$src4), []>;
let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
canFoldAsLoad = 1 in
canFoldAsLoad = 1, usesCustomInserter = 1 in
def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
[(set TILE:$dst, (int_x86_tilezero_internal
GR16:$src1, GR16:$src2))]>;
Expand Down
23 changes: 5 additions & 18 deletions llvm/lib/Target/X86/X86LowerTileCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
Expand Down Expand Up @@ -71,6 +72,10 @@ FunctionPass *llvm::createX86LowerTileCopyPass() {
}

bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
if (FuncInfo->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
return false;

const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
const X86InstrInfo *TII = ST.getInstrInfo();
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
Expand All @@ -81,26 +86,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;

for (MachineBasicBlock &MBB : MF) {
// There won't be a tile copy if neither tile register live in nor live out.
bool HasTileCopy = false;
for (const auto &LI : MBB.liveins()) {
if (TILERegs.test(LI.PhysReg)) {
HasTileCopy = true;
break;
}
}
LiveRegUnits UsedRegs(*TRI);
UsedRegs.addLiveOuts(MBB);
if (!HasTileCopy) {
for (auto RegT : TILERegs.set_bits()) {
if (UsedRegs.available(RegT)) {
HasTileCopy = true;
break;
}
}
}
if (!HasTileCopy)
continue;
for (MachineInstr &MI : llvm::make_early_inc_range(reverse(MBB))) {
UsedRegs.stepBackward(MI);
if (!MI.isCopy())
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86MachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

namespace llvm {

enum AMXProgModelEnum { None = 0, DirectReg = 1, ManagedRA = 2 };

/// X86MachineFunctionInfo - This class is derived from MachineFunction and
/// contains private X86 target-specific information for each MachineFunction.
class X86MachineFunctionInfo : public MachineFunctionInfo {
Expand Down Expand Up @@ -96,6 +98,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// used to address arguments in a function using a base pointer.
int SEHFramePtrSaveIndex = 0;

/// The AMX programing model used in the function.
AMXProgModelEnum AMXProgModel = AMXProgModelEnum::None;

/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
bool IsSplitCSR = false;
Expand Down Expand Up @@ -219,6 +224,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; }
void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; }

AMXProgModelEnum getAMXProgModel() const { return AMXProgModel; }
void setAMXProgModel(AMXProgModelEnum Model) {
assert((AMXProgModel == AMXProgModelEnum::None || AMXProgModel == Model) &&
"mixed model is not supported");
AMXProgModel = Model;
}

SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
return ForwardedMustTailRegParms;
}
Expand Down
Loading