-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AArch64][GlobalISel] Add support for post-indexed loads/stores. #69532
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Gives small code size improvements across the board at -Os CTMark. Much of the work is porting the existing heuristics in the DAGCombiner.
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Amara Emerson (aemerson) ChangesGives small code size improvements across the board at -Os CTMark. Much of the work is porting the existing heuristics in the DAGCombiner. This is part of a stack of changes: #69533 Patch is 100.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69532.diff 16 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index d64b414f2747621..65299e852574bd1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -58,6 +58,8 @@ struct IndexedLoadStoreMatchInfo {
Register Addr;
Register Base;
Register Offset;
+ bool RematOffset; // True if Offset is a constant that needs to be
+ // rematerialized before the new load/store.
bool IsPre;
};
@@ -814,12 +816,14 @@ class CombinerHelper {
void applyCommuteBinOpOperands(MachineInstr &MI);
private:
+ /// Checks for legality of an indexed variant of \p LdSt.
+ bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
///
/// \returns true if a candidate is found.
bool findPostIndexCandidate(GLoadStore &MI, Register &Addr, Register &Base,
- Register &Offset);
+ Register &Offset, bool &RematOffset);
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a pre-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6c36b1bbcf8649b..b34b90fd24eb602 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -97,7 +97,7 @@ class GIndexedLoad : public GMemOperation {
/// Get the offset register of the pointer value.
Register getOffsetReg() const { return getOperand(3).getReg(); }
- bool isPre() const { return getOperand(5).getImm() == 1; }
+ bool isPre() const { return getOperand(4).getImm() == 1; }
bool isPost() const { return !isPre(); }
static bool classof(const MachineInstr *MI) {
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7e0691e1ee95048..bb8223ba3486a8d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1248,7 +1248,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
- combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
+ undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9efb70f28fee3ee..a8425db6584f61c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -945,42 +945,171 @@ void CombinerHelper::applySextInRegOfLoad(
MI.eraseFromParent();
}
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+/// Return true if 'MI' is a load or a store that may be fold it's address
+/// operand into the load / store addressing mode.
+static bool canFoldInAddressingMode(GLoadStore *MI,
+ const TargetLowering &TLI,
+ MachineRegisterInfo &MRI) {
+ TargetLowering::AddrMode AM;
+ auto *MF = MI->getMF();
+ auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
+ if (!Addr)
+ return false;
+
+ AM.HasBaseReg = true;
+ auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI);
+ if (CstOff)
+ AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
+ else
+ AM.Scale = 1; // [reg +/- reg]
+
+ return TLI.isLegalAddressingMode(
+ MF->getDataLayout(), AM,
+ getTypeForLLT(MI->getMMO().getMemoryType(),
+ MF->getFunction().getContext()),
+ MI->getMMO().getAddrSpace());
+}
+
+namespace {
+unsigned getIndexedOpc(unsigned LdStOpc) {
+ switch (LdStOpc) {
+ case TargetOpcode::G_LOAD:
+ return TargetOpcode::G_INDEXED_LOAD;
+ case TargetOpcode::G_STORE:
+ return TargetOpcode::G_INDEXED_STORE;
+ case TargetOpcode::G_ZEXTLOAD:
+ return TargetOpcode::G_INDEXED_ZEXTLOAD;
+ case TargetOpcode::G_SEXTLOAD:
+ return TargetOpcode::G_INDEXED_SEXTLOAD;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+} // namespace
+
+bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
+ // Check for legality.
+ LLT PtrTy = MRI.getType(LdSt.getPointerReg());
+ LLT Ty = MRI.getType(LdSt.getReg(0));
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
+ SmallVector<LLT> OpTys;
+ if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
+ OpTys = {PtrTy, Ty, Ty};
+ else
+ OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
+
+ LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
+ return isLegal(Q);
+}
+
+static cl::opt<unsigned> PostIndexUseThreshold(
+ "post-index-use-threshold", cl::Hidden, cl::init(32),
+ cl::desc("Number of uses of a base pointer to check before it is no longer "
+ "considered for post-indexing."));
+
bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
- Register &Base, Register &Offset) {
+ Register &Base, Register &Offset,
+ bool &RematOffset) {
+ // We're looking for the following pattern, for either load or store:
+ // %baseptr:_(p0) = ...
+ // G_STORE %val(s64), %baseptr(p0)
+ // %offset:_(s64) = G_CONSTANT i64 -256
+ // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
auto &MF = *LdSt.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
- Base = LdSt.getPointerReg();
+ Register Ptr = LdSt.getPointerReg();
+ // If the store is the only use, don't bother.
+ if (MRI.hasOneNonDBGUse(Ptr))
+ return false;
+
+ if (!isIndexedLoadStoreLegal(LdSt))
+ return false;
- if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Base, MRI))
+ if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
return false;
- // FIXME: The following use traversal needs a bail out for patholigical cases.
- for (auto &Use : MRI.use_nodbg_instructions(Base)) {
+ MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
+ auto *PtrDef = MRI.getVRegDef(Ptr);
+
+ unsigned NumUsesChecked = 0;
+ for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
+ if (++NumUsesChecked > PostIndexUseThreshold)
+ return false; // Try to avoid exploding compile time.
+
auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
- if (!PtrAdd)
+ // The use itself might be dead. This can happen during combines if DCE
+ // hasn't had a chance to run yet. Don't allow it to form an indexed op.
+ if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
+ continue;
+
+ // Check the user of this isn't the store, otherwise we'd be generate a
+ // indexed store defining its own use.
+ if (StoredValDef == &Use)
continue;
Offset = PtrAdd->getOffsetReg();
if (!ForceLegalIndexing &&
- !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ false, MRI))
+ !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
+ /*IsPre*/ false, MRI))
continue;
// Make sure the offset calculation is before the potentially indexed op.
MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
- if (!dominates(*OffsetDef, LdSt))
- continue;
+ if (!dominates(*OffsetDef, LdSt)) {
+ // If the offset however is just a G_CONSTANT, we can always just
+ // rematerialize it where we need it.
+ if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
+ continue;
+ RematOffset = true;
+ }
- // FIXME: check whether all uses of Base are load/store with foldable
- // addressing modes. If so, using the normal addr-modes is better than
- // forming an indexed one.
- if (any_of(MRI.use_nodbg_instructions(PtrAdd->getReg(0)),
- [&](MachineInstr &PtrAddUse) {
- return !dominates(LdSt, PtrAddUse);
- }))
- continue;
+ for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
+ if (&BasePtrUse == PtrDef)
+ continue;
+
+ // If the user is a later load/store that can be post-indexed, then don't
+ // combine this one.
+ auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
+ if (BasePtrLdSt && BasePtrLdSt != &LdSt) {
+ if (dominates(LdSt, *BasePtrLdSt)) {
+ if (isIndexedLoadStoreLegal(*BasePtrLdSt))
+ return false;
+ }
+ }
+
+ // Now we're looking for the key G_PTR_ADD instruction, which contains
+ // the offset add that we want to fold.
+ if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
+ Register PtrAddDefReg = BasePtrUseDef->getReg(0);
+ for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
+ // If the use is in a different block, then we may produce worse code
+ // due to the extra register pressure.
+ if (BaseUseUse.getParent() != LdSt.getParent())
+ return false;
+
+ if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse)) {
+ if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
+ return false;
+ }
+ }
+ if (!dominates(LdSt, BasePtrUse))
+ return false; // All use must be dominated by the load/store.
+ }
+ }
Addr = PtrAdd->getReg(0);
+ Base = PtrAdd->getBaseReg();
return true;
}
@@ -1001,6 +1130,9 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
!TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
return false;
+ if (!isIndexedLoadStoreLegal(LdSt))
+ return false;
+
MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return false;
@@ -1027,16 +1159,14 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
auto &LdSt = cast<GLoadStore>(MI);
- // For now, no targets actually support these opcodes so don't waste time
- // running these unless we're forced to for testing.
- if (!ForceLegalIndexing)
+ if (LdSt.isAtomic())
return false;
MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
!findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
- MatchInfo.Offset))
+ MatchInfo.Offset, MatchInfo.RematOffset))
return false;
return true;
@@ -1045,28 +1175,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
void CombinerHelper::applyCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
- MachineIRBuilder MIRBuilder(MI);
+ Builder.setInstrAndDebugLoc(MI);
unsigned Opcode = MI.getOpcode();
bool IsStore = Opcode == TargetOpcode::G_STORE;
- unsigned NewOpcode;
- switch (Opcode) {
- case TargetOpcode::G_LOAD:
- NewOpcode = TargetOpcode::G_INDEXED_LOAD;
- break;
- case TargetOpcode::G_SEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
- break;
- case TargetOpcode::G_ZEXTLOAD:
- NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
- break;
- case TargetOpcode::G_STORE:
- NewOpcode = TargetOpcode::G_INDEXED_STORE;
- break;
- default:
- llvm_unreachable("Unknown load/store opcode");
+ unsigned NewOpcode = getIndexedOpc(Opcode);
+
+ // If the offset constant didn't happen to dominate the load/store, we can
+ // just clone it as needed.
+ if (MatchInfo.RematOffset) {
+ auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
+ auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
+ *OldCst->getOperand(1).getCImm());
+ MatchInfo.Offset = NewCst.getReg(0);
}
- auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ auto MIB = Builder.buildInstr(NewOpcode);
if (IsStore) {
MIB.addDef(MatchInfo.Addr);
MIB.addUse(MI.getOperand(0).getReg());
@@ -1245,13 +1368,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
- if (Ty.isVector())
- return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
- return IntegerType::get(C, Ty.getSizeInBits());
-}
-
+
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f7b55cad4269944..017c4523c23a184 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -246,6 +246,7 @@ def AArch64PostLegalizerLowering
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
[copy_prop, combines_for_extload,
+ combine_indexed_load_store,
sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a16a102e472e709..b20c5823371c226 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -37,6 +37,8 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -23615,6 +23617,23 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
+bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
+ Register Offset, bool IsPre,
+ MachineRegisterInfo &MRI) const {
+ // HACK
+ if (IsPre)
+ return false; // Until we implement.
+
+ auto CstOffset = getIConstantVRegVal(Offset, MRI);
+ if (!CstOffset || CstOffset->isZero())
+ return false;
+
+ // All of the indexed addressing mode instructions take a signed 9 bit
+ // immediate offset. Our CstOffset is a G_PTR_ADD offset so it already
+ // encodes the sign/indexing direction.
+ return isInt<9>(CstOffset->getSExtValue());
+}
+
bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue &Base,
SDValue &Offset,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9dcfba3a229cccd..52e519cd8a0c93c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1201,6 +1201,8 @@ class AArch64TargetLowering : public TargetLowering {
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
+ bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+ bool IsPre, MachineRegisterInfo &MRI) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1c7a09696e853e2..152a6bfab21faf2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
@@ -224,6 +225,9 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
+
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
@@ -3038,6 +3042,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
+ case TargetOpcode::G_INDEXED_LOAD:
+ return selectIndexedLoad(I, MRI);
+ case TargetOpcode::G_INDEXED_STORE:
+ return selectIndexedStore(cast<GIndexedStore>(I), MRI);
+
case TargetOpcode::G_SMULH:
case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
@@ -5621,6 +5630,82 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
return &*Mov;
}
+bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
+ MachineRegisterInfo &MRI) {
+ // TODO: extending loads.
+ if (isa<GIndexedExtLoad>(MI))
+ return false;
+
+ auto &Ld = cast<GIndexedLoad>(MI);
+ Register Dst = Ld.getDstReg();
+ Register WriteBack = Ld.getWritebackReg();
+ Register Base = Ld.getBaseReg();
+ Register Offset = Ld.getOffsetReg();
+
+ if (Ld.isPre())
+ return false; // TODO: add pre-inc support
+
+ unsigned Opc = 0;
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
+ AArch64::LDRXpost};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
+ AArch64::LDRDpost, AArch64::LDRQpost};
+
+ unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
+ if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(MemSize)];
+ else
+ Opc = GPROpcodes[Log2_32(MemSize)];
+
+ auto Cst = getIConstantVRegVal(Offset, MRI);
+ if (!Cst)
+ return false; // Shouldn't happen, but just in case.
+ auto LdMI =
+ MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
+ LdMI.cloneMemRefs(Ld);
+ constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
+ MachineRegisterInfo &MRI) {
+ Register Dst = I.getWritebackReg();
+ Register Val = I.getValueReg();
+ Register Base = I.getBaseReg();
+ Register Offset = I.getOffsetReg();
+ LLT ValTy = MRI.getType(Val);
+
+ if (I.isPre())
+ return false; // TODO: add pre-inc support
+
+ unsigned Opc = 0;
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
+ AArch64::STRXpost};
+ static constexpr unsigned FPROpcodes[] = {
+ AA...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 3745e7080746b73377a479b6ceba2dbf25f245e2 1b3a6f263d3fdbbc60819290a02f012346f58a47 -- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 1cccddfd9722..6c10b723ee7b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1361,7 +1361,6 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
|
} | ||
|
||
namespace { | ||
unsigned getIndexedOpc(unsigned LdStOpc) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: static please
// %baseptr:_(p0) = ... | ||
// G_STORE %val(s64), %baseptr(p0) | ||
// %offset:_(s64) = G_CONSTANT i64 -256 | ||
// %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64) | ||
auto &MF = *LdSt.getParent()->getParent(); | ||
const auto &TLI = *MF.getSubtarget().getTargetLowering(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there is getTargetLowering()
above.
// combine this one. | ||
auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse); | ||
if (BasePtrLdSt && BasePtrLdSt != &LdSt) { | ||
if (dominates(LdSt, *BasePtrLdSt)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This brace should be removed. There are more violations.
I prefer returning |
for (auto &Use : MRI.use_nodbg_instructions(Ptr)) { | ||
if (++NumUsesChecked > PostIndexUseThreshold) | ||
return false; // Try to avoid exploding compile time. | ||
|
||
auto *PtrAdd = dyn_cast<GPtrAdd>(&Use); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Feel free to ignore. Is there a lookThroughCopy<GPtrAdd>(&Use)
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, there isn't.
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Gives small code size improvements across the board at -Os CTMark.
Much of the work is porting the existing heuristics in the DAGCombiner.
This is part of a stack of changes: #69533