-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AArch64][GlobalISel] Add support for pre-indexed loads/stores. #70185
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The pre-index matcher just needs some small heuristics to make sure it doesn't cause regressions. Apart from that it's a simple change, since the only difference is an immediate operand of '1' vs '0' in the instruction.
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Amara Emerson (aemerson) ChangesThe pre-index matcher just needs some small heuristics to make sure it doesn't cause regressions. Apart from that it's a simple change, since the only difference is an immediate operand of '1' vs '0' in the instruction. Part of a stack #69533 Patch is 41.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70185.diff 8 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 1cccddfd972221c..a404bd0da19e172 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1141,11 +1141,29 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
return false;
}
+ // Avoid increasing cross-block register pressure.
+ for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
+ if (AddrUse.getParent() != LdSt.getParent())
+ return false;
+ }
+
// FIXME: check whether all uses of the base pointer are constant PtrAdds.
// That might allow us to end base's liveness here by adjusting the constant.
-
- return all_of(MRI.use_nodbg_instructions(Addr),
- [&](MachineInstr &UseMI) { return dominates(LdSt, UseMI); });
+ bool RealUse = false;
+ for (auto &PtrUse : MRI.use_nodbg_instructions(Addr)) {
+ if (!dominates(LdSt, PtrUse))
+ return false; // All use must be dominated by the load/store.
+
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (auto *UseLdSt = dyn_cast<GLoadStore>(&PtrUse)) {
+ if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
+ RealUse = true;
+ } else {
+ RealUse = true;
+ }
+ }
+ return RealUse;
}
bool CombinerHelper::matchCombineIndexedLoadStore(
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 017c4523c23a184..9ae1dd99f20f45d 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -245,7 +245,7 @@ def AArch64PostLegalizerLowering
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
- [copy_prop, combines_for_extload,
+ [copy_prop, combines_for_extload, reassocs,
combine_indexed_load_store,
sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7211607fee528a6..25919816747df4c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -23699,10 +23699,6 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
Register Offset, bool IsPre,
MachineRegisterInfo &MRI) const {
- // HACK
- if (IsPre)
- return false; // Until we implement.
-
auto CstOffset = getIConstantVRegVal(Offset, MRI);
if (!CstOffset || CstOffset->isZero())
return false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 941607dae29bb90..942edbd78b6452b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5659,24 +5659,34 @@ bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
Register WriteBack = Ld.getWritebackReg();
Register Base = Ld.getBaseReg();
Register Offset = Ld.getOffsetReg();
-
- if (Ld.isPre())
- return false; // TODO: add pre-inc support
-
- unsigned Opc = 0;
- static constexpr unsigned GPROpcodes[] = {
- AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
- AArch64::LDRXpost};
- static constexpr unsigned FPROpcodes[] = {
- AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
- AArch64::LDRDpost, AArch64::LDRQpost};
-
+ LLT Ty = MRI.getType(Dst);
+ assert(Ty.getSizeInBits() <= 128);
unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
- if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
- Opc = FPROpcodes[Log2_32(MemSize)];
- else
- Opc = GPROpcodes[Log2_32(MemSize)];
+ unsigned Opc = 0;
+ if (Ld.isPre()) {
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
+ AArch64::LDRXpre};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
+ AArch64::LDRQpre};
+ if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(MemSize)];
+ else
+ Opc = GPROpcodes[Log2_32(MemSize)];
+ } else {
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
+ AArch64::LDRXpost};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
+ AArch64::LDRDpost, AArch64::LDRQpost};
+ if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(MemSize)];
+ else
+ Opc = GPROpcodes[Log2_32(MemSize)];
+ }
auto Cst = getIConstantVRegVal(Offset, MRI);
if (!Cst)
return false; // Shouldn't happen, but just in case.
@@ -5695,23 +5705,34 @@ bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
Register Base = I.getBaseReg();
Register Offset = I.getOffsetReg();
LLT ValTy = MRI.getType(Val);
-
- if (I.isPre())
- return false; // TODO: add pre-inc support
+ assert(ValTy.getSizeInBits() <= 128);
unsigned Opc = 0;
- static constexpr unsigned GPROpcodes[] = {
- AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
- AArch64::STRXpost};
- static constexpr unsigned FPROpcodes[] = {
- AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
- AArch64::STRDpost, AArch64::STRQpost};
-
- assert(ValTy.getSizeInBits() <= 128);
- if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
- Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
- else
- Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ if (I.isPre()) {
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
+ AArch64::STRXpre};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
+ AArch64::STRQpre};
+
+ if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ else
+ Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ } else {
+ static constexpr unsigned GPROpcodes[] = {
+ AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
+ AArch64::STRXpost};
+ static constexpr unsigned FPROpcodes[] = {
+ AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
+ AArch64::STRDpost, AArch64::STRQpost};
+
+ if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+ Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ else
+ Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+ }
auto Cst = getIConstantVRegVal(Offset, MRI);
if (!Cst)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
index e82a0c219068fde..bd0317ec6a1360c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
@@ -87,3 +87,25 @@ body: |
$q0 = COPY %dst
RET_ReallyLR implicit $x0, implicit $q0
...
+---
+name: pre_store_s64
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: pre_store_s64
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+ ; CHECK-NEXT: %val:_(s64) = COPY $x1
+ ; CHECK-NEXT: %offset:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: %writeback:_(p0) = G_INDEXED_STORE %val(s64), %ptr, %offset(s64), 1 :: (store (s64))
+ ; CHECK-NEXT: $x0 = COPY %writeback(p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %ptr:_(p0) = COPY $x0
+ %val:_(s64) = COPY $x1
+ %offset:_(s64) = G_CONSTANT i64 8
+ %writeback:_(p0) = G_INDEXED_STORE %val, %ptr, %offset, 1 :: (store (s64), align 8)
+ $x0 = COPY %writeback
+ RET_ReallyLR implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
index 23886d8bc4a7baa..07744dada4f1faa 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
@@ -83,8 +83,8 @@ define void @test_simple_vector(ptr %ptr) {
; CHECK-NEXT: mov w8, #5 ; =0x5
; CHECK-NEXT: strh w9, [x0, #2]
; CHECK-NEXT: mov w9, #8 ; =0x8
-; CHECK-NEXT: strh w8, [x0, #4]
-; CHECK-NEXT: strh w9, [x0, #6]
+; CHECK-NEXT: strh w8, [x0, #4]!
+; CHECK-NEXT: strh w9, [x0, #2]
; CHECK-NEXT: ret
store <2 x i16> <i16 4, i16 7>, ptr %ptr
%addr2 = getelementptr <2 x i16>, ptr %ptr, i64 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index 050c7e30ad2f934..c24192c6da59a40 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -197,88 +197,40 @@ define ptr @storef64(ptr %ptr, double %index, double %spacing) {
define ptr @pref64(ptr %ptr, double %spacing) {
-; CHECK64-LABEL: pref64:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str d0, [x0, #32]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pref64:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #32
-; GISEL-NEXT: str d0, [x8, #32]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pref64:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str d0, [x0, #32]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pref64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str d0, [x0, #32]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds double, ptr %ptr, i64 4
store double %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
}
define ptr @pref32(ptr %ptr, float %spacing) {
-; CHECK64-LABEL: pref32:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str s0, [x0, #12]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pref32:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #12
-; GISEL-NEXT: str s0, [x8, #12]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pref32:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str s0, [x0, #12]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pref32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str s0, [x0, #12]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds float, ptr %ptr, i64 3
store float %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
}
define ptr @pref16(ptr %ptr, half %spacing) nounwind {
-; CHECK64-LABEL: pref16:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str h0, [x0, #6]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pref16:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #6
-; GISEL-NEXT: str h0, [x8, #6]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pref16:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str h0, [x0, #6]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pref16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str h0, [x0, #6]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds half, ptr %ptr, i64 3
store half %spacing, ptr %incdec.ptr, align 2
ret ptr %incdec.ptr
}
define ptr @pre64(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pre64:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str x1, [x0, #16]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pre64:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #16
-; GISEL-NEXT: str x1, [x8, #16]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pre64:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str x1, [x0, #16]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pre64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str x1, [x0, #16]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 2
store i64 %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
@@ -297,44 +249,20 @@ define ptr @pre64idxpos256(ptr %ptr, i64 %spacing) {
}
define ptr @pre64idxneg256(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pre64idxneg256:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str x1, [x0, #-256]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pre64idxneg256:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: sub x0, x0, #256
-; GISEL-NEXT: stur x1, [x8, #-256]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pre64idxneg256:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str x1, [x0, #-256]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pre64idxneg256:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str x1, [x0, #-256]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 -32
store i64 %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
}
define ptr @pre32(ptr %ptr, i32 %spacing) {
-; CHECK64-LABEL: pre32:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str w1, [x0, #8]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pre32:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #8
-; GISEL-NEXT: str w1, [x8, #8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pre32:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str w1, [x0, #8]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pre32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str w1, [x0, #8]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 2
store i32 %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
@@ -353,44 +281,20 @@ define ptr @pre32idxpos256(ptr %ptr, i32 %spacing) {
}
define ptr @pre32idxneg256(ptr %ptr, i32 %spacing) {
-; CHECK64-LABEL: pre32idxneg256:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str w1, [x0, #-256]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pre32idxneg256:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: sub x0, x0, #256
-; GISEL-NEXT: stur w1, [x8, #-256]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pre32idxneg256:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str w1, [x0, #-256]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pre32idxneg256:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str w1, [x0, #-256]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 -64
store i32 %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
}
define ptr @pre16(ptr %ptr, i16 %spacing) {
-; CHECK64-LABEL: pre16:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strh w1, [x0, #4]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pre16:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #4
-; GISEL-NEXT: strh w1, [x8, #4]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pre16:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strh w1, [x0, #4]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pre16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strh w1, [x0, #4]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 2
store i16 %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
@@ -409,44 +313,20 @@ define ptr @pre16idxpos256(ptr %ptr, i16 %spacing) {
}
define ptr @pre16idxneg256(ptr %ptr, i16 %spacing) {
-; CHECK64-LABEL: pre16idxneg256:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strh w1, [x0, #-256]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pre16idxneg256:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: sub x0, x0, #256
-; GISEL-NEXT: sturh w1, [x8, #-256]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pre16idxneg256:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strh w1, [x0, #-256]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pre16idxneg256:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strh w1, [x0, #-256]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 -128
store i16 %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
}
define ptr @pre8(ptr %ptr, i8 %spacing) {
-; CHECK64-LABEL: pre8:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strb w1, [x0, #2]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pre8:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #2
-; GISEL-NEXT: strb w1, [x8, #2]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pre8:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strb w1, [x0, #2]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pre8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strb w1, [x0, #2]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 2
store i8 %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
@@ -465,44 +345,20 @@ define ptr @pre8idxpos256(ptr %ptr, i8 %spacing) {
}
define ptr @pre8idxneg256(ptr %ptr, i8 %spacing) {
-; CHECK64-LABEL: pre8idxneg256:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strb w1, [x0, #-256]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pre8idxneg256:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: sub x0, x0, #256
-; GISEL-NEXT: sturb w1, [x8, #-256]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pre8idxneg256:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strb w1, [x0, #-256]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pre8idxneg256:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strb w1, [x0, #-256]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 -256
store i8 %spacing, ptr %incdec.ptr, align 4
ret ptr %incdec.ptr
}
define ptr @pretrunc64to32(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pretrunc64to32:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: str w1, [x0, #8]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pretrunc64to32:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #8
-; GISEL-NEXT: str w1, [x8, #8]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pretrunc64to32:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: str w1, [x0, #8]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pretrunc64to32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: str w1, [x0, #8]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 2
%trunc = trunc i64 %spacing to i32
store i32 %trunc, ptr %incdec.ptr, align 4
@@ -510,22 +366,10 @@ define ptr @pretrunc64to32(ptr %ptr, i64 %spacing) {
}
define ptr @pretrunc64to16(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pretrunc64to16:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strh w1, [x0, #4]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pretrunc64to16:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #4
-; GISEL-NEXT: strh w1, [x8, #4]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pretrunc64to16:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strh w1, [x0, #4]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pretrunc64to16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strh w1, [x0, #4]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 2
%trunc = trunc i64 %spacing to i16
store i16 %trunc, ptr %incdec.ptr, align 4
@@ -533,22 +377,10 @@ define ptr @pretrunc64to16(ptr %ptr, i64 %spacing) {
}
define ptr @pretrunc64to8(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pretrunc64to8:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: strb w1, [x0, #2]!
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: pretrunc64to8:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x8, x0
-; GISEL-NEXT: add x0, x0, #2
-; GISEL-NEXT: strb w1, [x8, #2]
-; GISEL-NEXT: ret
-;
-; CHECK32-LABEL: pretrunc64to8:
-; CHECK32: ; %bb.0:
-; CHECK32-NEXT: strb w1, [x0, #2]!
-; CHECK32-NEXT: ret
+; CHECK-LABEL: pretrunc64to8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: strb w1, [x0, #2]!
+; CHECK-NEXT: ret
%incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 2
%trunc = trunc i64 %spacing to i8
store i8 %trunc, ptr %incdec.ptr, align 4
@@ -583,24 +415,11 @@ define ptr @preidxf32(ptr %src, ptr %out) {
}
define ptr @preidxf16(ptr %src, ptr %out) {
-; CHECK64-LABEL: preidxf16:
-; CHECK64: ; %bb.0:
-; CHECK64-NEXT: ldr h0, [x0, #2]!
-; CHECK64-NEXT: str h0, [x1]
-; CHECK64-NEXT: ret
-;
-; GISEL-LABEL: preidxf16:
-; GISEL: ...
[truncated]
|
You added new asserts without the common && "something bad happened". I am wondering why they are not just |
return all_of(MRI.use_nodbg_instructions(Addr), | ||
[&](MachineInstr &UseMI) { return dominates(LdSt, UseMI); }); | ||
bool RealUse = false; | ||
for (auto &PtrUse : MRI.use_nodbg_instructions(Addr)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
10 lines up it is called AddrUse?!?
for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) { | ||
if (AddrUse.getParent() != LdSt.getParent()) | ||
return false; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you sure about the braces?
It's something we're not that consistent with. In selection of instructions where we should be confident in the completeness of the legalization rules, we can use asserts. However if the legalizer rules may have gaps, it's probably better to be safe and fall back instead of asserting. |
[&](MachineInstr &UseMI) { return dominates(LdSt, UseMI); }); | ||
bool RealUse = false; | ||
for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) { | ||
if (!dominates(LdSt, AddrUse)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Defer the dominance check until after you know the mode is foldable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The foldable check isn't an early exit, so we still need to do this for every use.
@@ -245,7 +245,7 @@ def AArch64PostLegalizerLowering | |||
// Post-legalization combines which are primarily optimizations. | |||
def AArch64PostLegalizerCombiner | |||
: GICombiner<"AArch64PostLegalizerCombinerImpl", | |||
[copy_prop, combines_for_extload, | |||
[copy_prop, combines_for_extload, reassocs, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks unrelated?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, will remove.
AArch64::LDRDpost, AArch64::LDRQpost}; | ||
|
||
LLT Ty = MRI.getType(Dst); | ||
assert(Ty.getSizeInBits() <= 128 && "Unexpected type for indexed load"); | ||
unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dumb question: are we guaranteed that Ty.getSizeInBits() == MemSize
, or could the size of the memory operand be wider than what's actually loaded?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question. We have separate opcodes for extending loads, G_INDEXED_ZEXTLOAD/G_INDEXED_SEXTLOAD
. If the load's destination type is > memory type here it's an any extending load, so we just use the memory type to decide which instruction to pick.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The pre-index matcher just needs some small heuristics to make sure it doesn't cause regressions. Apart from that it's a simple change, since the only difference is an immediate operand of '1' vs '0' in the instruction.
Part of a stack #69533