Skip to content

[AArch64][GlobalISel] Add support for post-indexed loads/stores. #69532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ struct IndexedLoadStoreMatchInfo {
Register Addr;
Register Base;
Register Offset;
bool RematOffset; // True if Offset is a constant that needs to be
// rematerialized before the new load/store.
bool IsPre;
};

Expand Down Expand Up @@ -814,12 +816,14 @@ class CombinerHelper {
void applyCommuteBinOpOperands(MachineInstr &MI);

private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
///
/// \returns true if a candidate is found.
bool findPostIndexCandidate(GLoadStore &MI, Register &Addr, Register &Base,
Register &Offset);
Register &Offset, bool &RematOffset);

/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a pre-indexing operation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class GIndexedLoad : public GMemOperation {
/// Get the offset register of the pointer value.
Register getOffsetReg() const { return getOperand(3).getReg(); }

bool isPre() const { return getOperand(5).getImm() == 1; }
bool isPre() const { return getOperand(4).getImm() == 1; }
bool isPost() const { return !isPre(); }

static bool classof(const MachineInstr *MI) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,

def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
Expand Down
206 changes: 158 additions & 48 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -945,42 +945,164 @@ void CombinerHelper::applySextInRegOfLoad(
MI.eraseFromParent();
}

static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
Ty.getNumElements());
return IntegerType::get(C, Ty.getSizeInBits());
}

/// Return true if 'MI' is a load or a store that may be fold it's address
/// operand into the load / store addressing mode.
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI,
MachineRegisterInfo &MRI) {
TargetLowering::AddrMode AM;
auto *MF = MI->getMF();
auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
if (!Addr)
return false;

AM.HasBaseReg = true;
if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
else
AM.Scale = 1; // [reg +/- reg]

return TLI.isLegalAddressingMode(
MF->getDataLayout(), AM,
getTypeForLLT(MI->getMMO().getMemoryType(),
MF->getFunction().getContext()),
MI->getMMO().getAddrSpace());
}

static unsigned getIndexedOpc(unsigned LdStOpc) {
switch (LdStOpc) {
case TargetOpcode::G_LOAD:
return TargetOpcode::G_INDEXED_LOAD;
case TargetOpcode::G_STORE:
return TargetOpcode::G_INDEXED_STORE;
case TargetOpcode::G_ZEXTLOAD:
return TargetOpcode::G_INDEXED_ZEXTLOAD;
case TargetOpcode::G_SEXTLOAD:
return TargetOpcode::G_INDEXED_SEXTLOAD;
default:
llvm_unreachable("Unexpected opcode");
}
}

bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
// Check for legality.
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
LLT Ty = MRI.getType(LdSt.getReg(0));
LLT MemTy = LdSt.getMMO().getMemoryType();
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
{{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
SmallVector<LLT> OpTys;
if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
OpTys = {PtrTy, Ty, Ty};
else
OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD

LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
return isLegal(Q);
}

static cl::opt<unsigned> PostIndexUseThreshold(
"post-index-use-threshold", cl::Hidden, cl::init(32),
cl::desc("Number of uses of a base pointer to check before it is no longer "
"considered for post-indexing."));

bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
Register &Base, Register &Offset) {
auto &MF = *LdSt.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
Register &Base, Register &Offset,
bool &RematOffset) {
// We're looking for the following pattern, for either load or store:
// %baseptr:_(p0) = ...
// G_STORE %val(s64), %baseptr(p0)
// %offset:_(s64) = G_CONSTANT i64 -256
// %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
const auto &TLI = getTargetLowering();

Register Ptr = LdSt.getPointerReg();
// If the store is the only use, don't bother.
if (MRI.hasOneNonDBGUse(Ptr))
return false;

Base = LdSt.getPointerReg();
if (!isIndexedLoadStoreLegal(LdSt))
return false;

if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Base, MRI))
if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
return false;

// FIXME: The following use traversal needs a bail out for patholigical cases.
for (auto &Use : MRI.use_nodbg_instructions(Base)) {
MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
auto *PtrDef = MRI.getVRegDef(Ptr);

unsigned NumUsesChecked = 0;
for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
if (++NumUsesChecked > PostIndexUseThreshold)
return false; // Try to avoid exploding compile time.

auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feel free to ignore. Is there a lookThroughCopy<GPtrAdd>(&Use)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, there isn't.

if (!PtrAdd)
// The use itself might be dead. This can happen during combines if DCE
// hasn't had a chance to run yet. Don't allow it to form an indexed op.
if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
continue;

// Check the user of this isn't the store, otherwise we'd be generate a
// indexed store defining its own use.
if (StoredValDef == &Use)
continue;

Offset = PtrAdd->getOffsetReg();
if (!ForceLegalIndexing &&
!TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ false, MRI))
!TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
/*IsPre*/ false, MRI))
continue;

// Make sure the offset calculation is before the potentially indexed op.
MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
if (!dominates(*OffsetDef, LdSt))
continue;
RematOffset = false;
if (!dominates(*OffsetDef, LdSt)) {
// If the offset however is just a G_CONSTANT, we can always just
// rematerialize it where we need it.
if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
continue;
RematOffset = true;
}

// FIXME: check whether all uses of Base are load/store with foldable
// addressing modes. If so, using the normal addr-modes is better than
// forming an indexed one.
if (any_of(MRI.use_nodbg_instructions(PtrAdd->getReg(0)),
[&](MachineInstr &PtrAddUse) {
return !dominates(LdSt, PtrAddUse);
}))
continue;
for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
if (&BasePtrUse == PtrDef)
continue;

// If the user is a later load/store that can be post-indexed, then don't
// combine this one.
auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
dominates(LdSt, *BasePtrLdSt) &&
isIndexedLoadStoreLegal(*BasePtrLdSt))
return false;

// Now we're looking for the key G_PTR_ADD instruction, which contains
// the offset add that we want to fold.
if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
Register PtrAddDefReg = BasePtrUseDef->getReg(0);
for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
// If the use is in a different block, then we may produce worse code
// due to the extra register pressure.
if (BaseUseUse.getParent() != LdSt.getParent())
return false;

if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
return false;
}
if (!dominates(LdSt, BasePtrUse))
return false; // All use must be dominated by the load/store.
}
}

Addr = PtrAdd->getReg(0);
Base = PtrAdd->getBaseReg();
return true;
}

Expand All @@ -1001,6 +1123,9 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
!TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
return false;

if (!isIndexedLoadStoreLegal(LdSt))
return false;

MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return false;
Expand All @@ -1027,16 +1152,14 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
auto &LdSt = cast<GLoadStore>(MI);

// For now, no targets actually support these opcodes so don't waste time
// running these unless we're forced to for testing.
if (!ForceLegalIndexing)
if (LdSt.isAtomic())
return false;

MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
!findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset))
MatchInfo.Offset, MatchInfo.RematOffset))
return false;

return true;
Expand All @@ -1045,28 +1168,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
void CombinerHelper::applyCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
MachineIRBuilder MIRBuilder(MI);
Builder.setInstrAndDebugLoc(MI);
unsigned Opcode = MI.getOpcode();
bool IsStore = Opcode == TargetOpcode::G_STORE;
unsigned NewOpcode;
switch (Opcode) {
case TargetOpcode::G_LOAD:
NewOpcode = TargetOpcode::G_INDEXED_LOAD;
break;
case TargetOpcode::G_SEXTLOAD:
NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
break;
case TargetOpcode::G_ZEXTLOAD:
NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
break;
case TargetOpcode::G_STORE:
NewOpcode = TargetOpcode::G_INDEXED_STORE;
break;
default:
llvm_unreachable("Unknown load/store opcode");
unsigned NewOpcode = getIndexedOpc(Opcode);

// If the offset constant didn't happen to dominate the load/store, we can
// just clone it as needed.
if (MatchInfo.RematOffset) {
auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
*OldCst->getOperand(1).getCImm());
MatchInfo.Offset = NewCst.getReg(0);
}

auto MIB = MIRBuilder.buildInstr(NewOpcode);
auto MIB = Builder.buildInstr(NewOpcode);
if (IsStore) {
MIB.addDef(MatchInfo.Addr);
MIB.addUse(MI.getOperand(0).getReg());
Expand Down Expand Up @@ -1245,13 +1361,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}

static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
Ty.getNumElements());
return IntegerType::get(C, Ty.getSizeInBits());
}


bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def AArch64PostLegalizerLowering
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
[copy_prop, combines_for_extload,
combine_indexed_load_store,
sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
Expand Down Expand Up @@ -23615,6 +23617,23 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}

bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
Register Offset, bool IsPre,
MachineRegisterInfo &MRI) const {
// HACK
if (IsPre)
return false; // Until we implement.

auto CstOffset = getIConstantVRegVal(Offset, MRI);
if (!CstOffset || CstOffset->isZero())
return false;

// All of the indexed addressing mode instructions take a signed 9 bit
// immediate offset. Our CstOffset is a G_PTR_ADD offset so it already
// encodes the sign/indexing direction.
return isInt<9>(CstOffset->getSExtValue());
}

bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue &Base,
SDValue &Offset,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,8 @@ class AArch64TargetLowering : public TargetLowering {
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
bool IsPre, MachineRegisterInfo &MRI) const override;

void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
Expand Down
Loading