Skip to content

Commit 9365994

Browse files
authored
[AArch64][GlobalISel] Add support for pre-indexed loads/stores. (#70185)
The pre-index matcher just needs some small heuristics to make sure it doesn't cause regressions. Apart from that it's a simple change, since the only difference is an immediate operand of '1' vs '0' in the instruction.
1 parent bf92eba commit 9365994

File tree

7 files changed

+287
-580
lines changed

7 files changed

+287
-580
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,11 +1141,28 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
11411141
return false;
11421142
}
11431143

1144+
// Avoid increasing cross-block register pressure.
1145+
for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1146+
if (AddrUse.getParent() != LdSt.getParent())
1147+
return false;
1148+
11441149
// FIXME: check whether all uses of the base pointer are constant PtrAdds.
11451150
// That might allow us to end base's liveness here by adjusting the constant.
1146-
1147-
return all_of(MRI.use_nodbg_instructions(Addr),
1148-
[&](MachineInstr &UseMI) { return dominates(LdSt, UseMI); });
1151+
bool RealUse = false;
1152+
for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1153+
if (!dominates(LdSt, AddrUse))
1154+
return false; // All use must be dominated by the load/store.
1155+
1156+
// If Ptr may be folded in addressing mode of other use, then it's
1157+
// not profitable to do this transformation.
1158+
if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1159+
if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1160+
RealUse = true;
1161+
} else {
1162+
RealUse = true;
1163+
}
1164+
}
1165+
return RealUse;
11491166
}
11501167

11511168
bool CombinerHelper::matchCombineIndexedLoadStore(

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23718,10 +23718,6 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2371823718
bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
2371923719
Register Offset, bool IsPre,
2372023720
MachineRegisterInfo &MRI) const {
23721-
// HACK
23722-
if (IsPre)
23723-
return false; // Until we implement.
23724-
2372523721
auto CstOffset = getIConstantVRegVal(Offset, MRI);
2372623722
if (!CstOffset || CstOffset->isZero())
2372723723
return false;

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 52 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5659,24 +5659,34 @@ bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
56595659
Register WriteBack = Ld.getWritebackReg();
56605660
Register Base = Ld.getBaseReg();
56615661
Register Offset = Ld.getOffsetReg();
5662-
5663-
if (Ld.isPre())
5664-
return false; // TODO: add pre-inc support
5665-
5666-
unsigned Opc = 0;
5667-
static constexpr unsigned GPROpcodes[] = {
5668-
AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5669-
AArch64::LDRXpost};
5670-
static constexpr unsigned FPROpcodes[] = {
5671-
AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5672-
AArch64::LDRDpost, AArch64::LDRQpost};
5673-
5662+
LLT Ty = MRI.getType(Dst);
5663+
assert(Ty.getSizeInBits() <= 128 && "Unexpected type for indexed load");
56745664
unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5675-
if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5676-
Opc = FPROpcodes[Log2_32(MemSize)];
5677-
else
5678-
Opc = GPROpcodes[Log2_32(MemSize)];
56795665

5666+
unsigned Opc = 0;
5667+
if (Ld.isPre()) {
5668+
static constexpr unsigned GPROpcodes[] = {
5669+
AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5670+
AArch64::LDRXpre};
5671+
static constexpr unsigned FPROpcodes[] = {
5672+
AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5673+
AArch64::LDRQpre};
5674+
if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5675+
Opc = FPROpcodes[Log2_32(MemSize)];
5676+
else
5677+
Opc = GPROpcodes[Log2_32(MemSize)];
5678+
} else {
5679+
static constexpr unsigned GPROpcodes[] = {
5680+
AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5681+
AArch64::LDRXpost};
5682+
static constexpr unsigned FPROpcodes[] = {
5683+
AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5684+
AArch64::LDRDpost, AArch64::LDRQpost};
5685+
if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5686+
Opc = FPROpcodes[Log2_32(MemSize)];
5687+
else
5688+
Opc = GPROpcodes[Log2_32(MemSize)];
5689+
}
56805690
auto Cst = getIConstantVRegVal(Offset, MRI);
56815691
if (!Cst)
56825692
return false; // Shouldn't happen, but just in case.
@@ -5695,23 +5705,34 @@ bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
56955705
Register Base = I.getBaseReg();
56965706
Register Offset = I.getOffsetReg();
56975707
LLT ValTy = MRI.getType(Val);
5698-
5699-
if (I.isPre())
5700-
return false; // TODO: add pre-inc support
5708+
assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
57015709

57025710
unsigned Opc = 0;
5703-
static constexpr unsigned GPROpcodes[] = {
5704-
AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5705-
AArch64::STRXpost};
5706-
static constexpr unsigned FPROpcodes[] = {
5707-
AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5708-
AArch64::STRDpost, AArch64::STRQpost};
5709-
5710-
assert(ValTy.getSizeInBits() <= 128);
5711-
if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5712-
Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5713-
else
5714-
Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5711+
if (I.isPre()) {
5712+
static constexpr unsigned GPROpcodes[] = {
5713+
AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5714+
AArch64::STRXpre};
5715+
static constexpr unsigned FPROpcodes[] = {
5716+
AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5717+
AArch64::STRQpre};
5718+
5719+
if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5720+
Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5721+
else
5722+
Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5723+
} else {
5724+
static constexpr unsigned GPROpcodes[] = {
5725+
AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5726+
AArch64::STRXpost};
5727+
static constexpr unsigned FPROpcodes[] = {
5728+
AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5729+
AArch64::STRDpost, AArch64::STRQpost};
5730+
5731+
if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5732+
Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5733+
else
5734+
Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5735+
}
57155736

57165737
auto Cst = getIConstantVRegVal(Offset, MRI);
57175738
if (!Cst)

llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,25 @@ body: |
8787
$q0 = COPY %dst
8888
RET_ReallyLR implicit $x0, implicit $q0
8989
...
90+
---
91+
name: pre_store_s64
92+
body: |
93+
bb.0:
94+
liveins: $x0, $x1
95+
96+
; CHECK-LABEL: name: pre_store_s64
97+
; CHECK: liveins: $x0, $x1
98+
; CHECK-NEXT: {{ $}}
99+
; CHECK-NEXT: %ptr:_(p0) = COPY $x0
100+
; CHECK-NEXT: %val:_(s64) = COPY $x1
101+
; CHECK-NEXT: %offset:_(s64) = G_CONSTANT i64 8
102+
; CHECK-NEXT: %writeback:_(p0) = G_INDEXED_STORE %val(s64), %ptr, %offset(s64), 1 :: (store (s64))
103+
; CHECK-NEXT: $x0 = COPY %writeback(p0)
104+
; CHECK-NEXT: RET_ReallyLR implicit $x0
105+
%ptr:_(p0) = COPY $x0
106+
%val:_(s64) = COPY $x1
107+
%offset:_(s64) = G_CONSTANT i64 8
108+
%writeback:_(p0) = G_INDEXED_STORE %val, %ptr, %offset, 1 :: (store (s64), align 8)
109+
$x0 = COPY %writeback
110+
RET_ReallyLR implicit $x0
111+
...

llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ define void @test_simple_vector(ptr %ptr) {
8383
; CHECK-NEXT: mov w8, #5 ; =0x5
8484
; CHECK-NEXT: strh w9, [x0, #2]
8585
; CHECK-NEXT: mov w9, #8 ; =0x8
86-
; CHECK-NEXT: strh w8, [x0, #4]
87-
; CHECK-NEXT: strh w9, [x0, #6]
86+
; CHECK-NEXT: strh w8, [x0, #4]!
87+
; CHECK-NEXT: strh w9, [x0, #2]
8888
; CHECK-NEXT: ret
8989
store <2 x i16> <i16 4, i16 7>, ptr %ptr
9090
%addr2 = getelementptr <2 x i16>, ptr %ptr, i64 1

0 commit comments

Comments
 (0)