[AArch64][GlobalISel] Add support for post-indexed loads/stores. #69532

aemerson · 2023-10-18T22:07:26Z

Gives small code size improvements across the board at -Os CTMark.

Much of the work is porting the existing heuristics in the DAGCombiner.

This is part of a stack of changes: #69533

Gives small code size improvements across the board at -Os CTMark. Much of the work is porting the existing heuristics in the DAGCombiner.

llvmbot · 2023-10-18T22:08:36Z

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: Amara Emerson (aemerson)

Changes

Gives small code size improvements across the board at -Os CTMark.

Much of the work is porting the existing heuristics in the DAGCombiner.

This is part of a stack of changes: #69533

Patch is 100.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69532.diff

16 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+5-1)
(modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+1-1)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+1-1)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+163-46)
(modified) llvm/lib/Target/AArch64/AArch64Combine.td (+1)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+19)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+2)
(modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+85)
(modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+48)
(modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (+50-24)
(modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h (+5)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll (+124-93)
(added) llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir (+89)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (+9-8)
(modified) llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll (+58-226)
(modified) llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll (+227-425)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index d64b414f2747621..65299e852574bd1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -58,6 +58,8 @@ struct IndexedLoadStoreMatchInfo {
   Register Addr;
   Register Base;
   Register Offset;
+  bool RematOffset; // True if Offset is a constant that needs to be
+                    // rematerialized before the new load/store.
   bool IsPre;
 };
 
@@ -814,12 +816,14 @@ class CombinerHelper {
   void applyCommuteBinOpOperands(MachineInstr &MI);
 
 private:
+  /// Checks for legality of an indexed variant of \p LdSt.
+  bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
   /// Given a non-indexed load or store instruction \p MI, find an offset that
   /// can be usefully and legally folded into it as a post-indexing operation.
   ///
   /// \returns true if a candidate is found.
   bool findPostIndexCandidate(GLoadStore &MI, Register &Addr, Register &Base,
-                              Register &Offset);
+                              Register &Offset, bool &RematOffset);
 
   /// Given a non-indexed load or store instruction \p MI, find an offset that
   /// can be usefully and legally folded into it as a pre-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6c36b1bbcf8649b..b34b90fd24eb602 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -97,7 +97,7 @@ class GIndexedLoad : public GMemOperation {
   /// Get the offset register of the pointer value.
   Register getOffsetReg() const { return getOperand(3).getReg(); }
 
-  bool isPre() const { return getOperand(5).getImm() == 1; }
+  bool isPre() const { return getOperand(4).getImm() == 1; }
   bool isPost() const { return !isPre(); }
 
   static bool classof(const MachineInstr *MI) {
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7e0691e1ee95048..bb8223ba3486a8d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1248,7 +1248,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
 
 def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     extract_vec_elt_combines, combines_for_extload,
-    combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
+    undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
     reassocs, ptr_add_immed_chain,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9efb70f28fee3ee..a8425db6584f61c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -945,42 +945,171 @@ void CombinerHelper::applySextInRegOfLoad(
   MI.eraseFromParent();
 }
 
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+  if (Ty.isVector())
+    return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+                                Ty.getNumElements());
+  return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+/// Return true if 'MI' is a load or a store that may be fold it's address
+/// operand into the load / store addressing mode.
+static bool canFoldInAddressingMode(GLoadStore *MI,
+                                    const TargetLowering &TLI,
+                                    MachineRegisterInfo &MRI) {
+  TargetLowering::AddrMode AM;
+  auto *MF = MI->getMF();
+  auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
+  if (!Addr)
+    return false;
+
+  AM.HasBaseReg = true;
+  auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI);
+  if (CstOff)
+    AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
+  else
+    AM.Scale = 1; // [reg +/- reg]
+
+  return TLI.isLegalAddressingMode(
+      MF->getDataLayout(), AM,
+      getTypeForLLT(MI->getMMO().getMemoryType(),
+                    MF->getFunction().getContext()),
+      MI->getMMO().getAddrSpace());
+}
+
+namespace {
+unsigned getIndexedOpc(unsigned LdStOpc) {
+  switch (LdStOpc) {
+  case TargetOpcode::G_LOAD:
+    return TargetOpcode::G_INDEXED_LOAD;
+  case TargetOpcode::G_STORE:
+    return TargetOpcode::G_INDEXED_STORE;
+  case TargetOpcode::G_ZEXTLOAD:
+    return TargetOpcode::G_INDEXED_ZEXTLOAD;
+  case TargetOpcode::G_SEXTLOAD:
+    return TargetOpcode::G_INDEXED_SEXTLOAD;
+  default:
+    llvm_unreachable("Unexpected opcode");
+  }
+}
+} // namespace
+
+bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
+    // Check for legality.
+  LLT PtrTy = MRI.getType(LdSt.getPointerReg());
+  LLT Ty = MRI.getType(LdSt.getReg(0));
+  LLT MemTy = LdSt.getMMO().getMemoryType();
+  SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+      {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
+  unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
+  SmallVector<LLT> OpTys;
+  if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
+    OpTys = {PtrTy, Ty, Ty};
+  else
+    OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
+
+  LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
+  return isLegal(Q);
+}
+
+static cl::opt<unsigned> PostIndexUseThreshold(
+    "post-index-use-threshold", cl::Hidden, cl::init(32),
+    cl::desc("Number of uses of a base pointer to check before it is no longer "
+             "considered for post-indexing."));
+
 bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
-                                            Register &Base, Register &Offset) {
+                                            Register &Base, Register &Offset,
+                                            bool &RematOffset) {
+  // We're looking for the following pattern, for either load or store:
+  // %baseptr:_(p0) = ...
+  // G_STORE %val(s64), %baseptr(p0)
+  // %offset:_(s64) = G_CONSTANT i64 -256
+  // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
   auto &MF = *LdSt.getParent()->getParent();
   const auto &TLI = *MF.getSubtarget().getTargetLowering();
 
-  Base = LdSt.getPointerReg();
+  Register Ptr = LdSt.getPointerReg();
+  // If the store is the only use, don't bother.
+  if (MRI.hasOneNonDBGUse(Ptr))
+    return false;
+
+  if (!isIndexedLoadStoreLegal(LdSt))
+    return false;
 
-  if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Base, MRI))
+  if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
     return false;
 
-  // FIXME: The following use traversal needs a bail out for patholigical cases.
-  for (auto &Use : MRI.use_nodbg_instructions(Base)) {
+  MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
+  auto *PtrDef = MRI.getVRegDef(Ptr);
+
+  unsigned NumUsesChecked = 0;
+  for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
+    if (++NumUsesChecked > PostIndexUseThreshold)
+      return false; // Try to avoid exploding compile time.
+
     auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
-    if (!PtrAdd)
+    // The use itself might be dead. This can happen during combines if DCE
+    // hasn't had a chance to run yet. Don't allow it to form an indexed op.
+    if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
+      continue;
+
+    // Check the user of this isn't the store, otherwise we'd be generate a
+    // indexed store defining its own use.
+    if (StoredValDef == &Use)
       continue;
 
     Offset = PtrAdd->getOffsetReg();
     if (!ForceLegalIndexing &&
-        !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ false, MRI))
+        !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
+                             /*IsPre*/ false, MRI))
       continue;
 
     // Make sure the offset calculation is before the potentially indexed op.
     MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
-    if (!dominates(*OffsetDef, LdSt))
-      continue;
+    if (!dominates(*OffsetDef, LdSt)) {
+      // If the offset however is just a G_CONSTANT, we can always just
+      // rematerialize it where we need it.
+      if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
+        continue;
+      RematOffset = true;
+    }
 
-    // FIXME: check whether all uses of Base are load/store with foldable
-    // addressing modes. If so, using the normal addr-modes is better than
-    // forming an indexed one.
-    if (any_of(MRI.use_nodbg_instructions(PtrAdd->getReg(0)),
-               [&](MachineInstr &PtrAddUse) {
-                 return !dominates(LdSt, PtrAddUse);
-               }))
-      continue;
+    for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
+      if (&BasePtrUse == PtrDef)
+        continue;
+
+      // If the user is a later load/store that can be post-indexed, then don't
+      // combine this one.
+      auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
+      if (BasePtrLdSt && BasePtrLdSt != &LdSt) {
+        if (dominates(LdSt, *BasePtrLdSt)) {
+          if (isIndexedLoadStoreLegal(*BasePtrLdSt))
+            return false;
+        }
+      }
+
+      // Now we're looking for the key G_PTR_ADD instruction, which contains
+      // the offset add that we want to fold.
+      if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
+        Register PtrAddDefReg = BasePtrUseDef->getReg(0);
+        for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
+          // If the use is in a different block, then we may produce worse code
+          // due to the extra register pressure.
+          if (BaseUseUse.getParent() != LdSt.getParent())
+            return false;
+
+          if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse)) {
+            if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
+              return false;
+          }
+        }
+        if (!dominates(LdSt, BasePtrUse))
+          return false; // All use must be dominated by the load/store.
+      }
+    }
 
     Addr = PtrAdd->getReg(0);
+    Base = PtrAdd->getBaseReg();
     return true;
   }
 
@@ -1001,6 +1130,9 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
       !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
     return false;
 
+  if (!isIndexedLoadStoreLegal(LdSt))
+    return false;
+
   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
     return false;
@@ -1027,16 +1159,14 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
   auto &LdSt = cast<GLoadStore>(MI);
 
-  // For now, no targets actually support these opcodes so don't waste time
-  // running these unless we're forced to for testing.
-  if (!ForceLegalIndexing)
+  if (LdSt.isAtomic())
     return false;
 
   MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
                                           MatchInfo.Offset);
   if (!MatchInfo.IsPre &&
       !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
-                              MatchInfo.Offset))
+                              MatchInfo.Offset, MatchInfo.RematOffset))
     return false;
 
   return true;
@@ -1045,28 +1175,21 @@ bool CombinerHelper::matchCombineIndexedLoadStore(
 void CombinerHelper::applyCombineIndexedLoadStore(
     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
-  MachineIRBuilder MIRBuilder(MI);
+  Builder.setInstrAndDebugLoc(MI);
   unsigned Opcode = MI.getOpcode();
   bool IsStore = Opcode == TargetOpcode::G_STORE;
-  unsigned NewOpcode;
-  switch (Opcode) {
-  case TargetOpcode::G_LOAD:
-    NewOpcode = TargetOpcode::G_INDEXED_LOAD;
-    break;
-  case TargetOpcode::G_SEXTLOAD:
-    NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
-    break;
-  case TargetOpcode::G_ZEXTLOAD:
-    NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
-    break;
-  case TargetOpcode::G_STORE:
-    NewOpcode = TargetOpcode::G_INDEXED_STORE;
-    break;
-  default:
-    llvm_unreachable("Unknown load/store opcode");
+  unsigned NewOpcode = getIndexedOpc(Opcode);
+
+  // If the offset constant didn't happen to dominate the load/store, we can
+  // just clone it as needed.
+  if (MatchInfo.RematOffset) {
+    auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
+    auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
+                                        *OldCst->getOperand(1).getCImm());
+    MatchInfo.Offset = NewCst.getReg(0);
   }
 
-  auto MIB = MIRBuilder.buildInstr(NewOpcode);
+  auto MIB = Builder.buildInstr(NewOpcode);
   if (IsStore) {
     MIB.addDef(MatchInfo.Addr);
     MIB.addUse(MI.getOperand(0).getReg());
@@ -1245,13 +1368,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
   Observer.changedInstr(*BrCond);
 }
 
-static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
-  if (Ty.isVector())
-    return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
-                                Ty.getNumElements());
-  return IntegerType::get(C, Ty.getSizeInBits());
-}
-
+ 
 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
   MachineIRBuilder HelperBuilder(MI);
   GISelObserverWrapper DummyObserver;
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f7b55cad4269944..017c4523c23a184 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -246,6 +246,7 @@ def AArch64PostLegalizerLowering
 def AArch64PostLegalizerCombiner
     : GICombiner<"AArch64PostLegalizerCombinerImpl",
                        [copy_prop, combines_for_extload,
+                        combine_indexed_load_store,
                         sext_trunc_sextload, mutate_anyext_to_zext,
                         hoist_logic_op_with_same_opcode_hands,
                         redundant_and, xor_of_and_with_same_reg,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a16a102e472e709..b20c5823371c226 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -37,6 +37,8 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -23615,6 +23617,23 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   return CI->isTailCall();
 }
 
+bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
+                                            Register Offset, bool IsPre,
+                                            MachineRegisterInfo &MRI) const {
+  // HACK
+  if (IsPre)
+    return false; // Until we implement.
+  
+  auto CstOffset = getIConstantVRegVal(Offset, MRI);
+  if (!CstOffset || CstOffset->isZero())
+    return false;
+
+  // All of the indexed addressing mode instructions take a signed 9 bit
+  // immediate offset. Our CstOffset is a G_PTR_ADD offset so it already
+  // encodes the sign/indexing direction.
+  return isInt<9>(CstOffset->getSExtValue());
+}
+
 bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
                                                    SDValue &Base,
                                                    SDValue &Offset,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9dcfba3a229cccd..52e519cd8a0c93c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1201,6 +1201,8 @@ class AArch64TargetLowering : public TargetLowering {
   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
                                   SDValue &Offset, ISD::MemIndexedMode &AM,
                                   SelectionDAG &DAG) const override;
+  bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+                       bool IsPre, MachineRegisterInfo &MRI) const override;
 
   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                           SelectionDAG &DAG) const override;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1c7a09696e853e2..152a6bfab21faf2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -37,6 +37,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
@@ -224,6 +225,9 @@ class AArch64InstructionSelector : public InstructionSelector {
   bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
   bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
 
+  bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
+  bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
+
   unsigned emitConstantPoolEntry(const Constant *CPVal,
                                  MachineFunction &MF) const;
   MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
@@ -3038,6 +3042,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
   }
 
+  case TargetOpcode::G_INDEXED_LOAD:
+    return selectIndexedLoad(I, MRI);
+  case TargetOpcode::G_INDEXED_STORE:
+    return selectIndexedStore(cast<GIndexedStore>(I), MRI);
+
   case TargetOpcode::G_SMULH:
   case TargetOpcode::G_UMULH: {
     // Reject the various things we don't support yet.
@@ -5621,6 +5630,82 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
   return &*Mov;
 }
 
+bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
+                                                   MachineRegisterInfo &MRI) {
+  // TODO: extending loads.
+  if (isa<GIndexedExtLoad>(MI))
+    return false;
+
+  auto &Ld = cast<GIndexedLoad>(MI);
+  Register Dst = Ld.getDstReg();
+  Register WriteBack = Ld.getWritebackReg();
+  Register Base = Ld.getBaseReg();
+  Register Offset = Ld.getOffsetReg();
+
+  if (Ld.isPre())
+    return false; // TODO: add pre-inc support
+
+  unsigned Opc = 0;
+  static constexpr unsigned GPROpcodes[] = {
+      AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
+      AArch64::LDRXpost};
+  static constexpr unsigned FPROpcodes[] = {
+      AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
+      AArch64::LDRDpost, AArch64::LDRQpost};
+
+  unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
+  if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+    Opc = FPROpcodes[Log2_32(MemSize)];
+  else
+    Opc = GPROpcodes[Log2_32(MemSize)];
+
+  auto Cst = getIConstantVRegVal(Offset, MRI);
+  if (!Cst)
+    return false; // Shouldn't happen, but just in case.
+  auto LdMI =
+      MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
+  LdMI.cloneMemRefs(Ld);
+  constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
+                                                    MachineRegisterInfo &MRI) {
+  Register Dst = I.getWritebackReg();
+  Register Val = I.getValueReg();
+  Register Base = I.getBaseReg();
+  Register Offset = I.getOffsetReg();
+  LLT ValTy = MRI.getType(Val);
+
+  if (I.isPre())
+    return false; // TODO: add pre-inc support
+
+  unsigned Opc = 0;
+  static constexpr unsigned GPROpcodes[] = {
+      AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
+      AArch64::STRXpost};
+  static constexpr unsigned FPROpcodes[] = {
+      AA...
[truncated]

github-actions · 2023-10-18T22:19:49Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 3745e7080746b73377a479b6ceba2dbf25f245e2 1b3a6f263d3fdbbc60819290a02f012346f58a47 -- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h

View the diff from clang-format here.

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 1cccddfd9722..6c10b723ee7b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1361,7 +1361,6 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
   Observer.changedInstr(*BrCond);
 }
 
- 
 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
   MachineIRBuilder HelperBuilder(MI);
   GISelObserverWrapper DummyObserver;

tschuett · 2023-10-19T06:57:16Z