Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit c8ef580

Browse files
author
Jun Bum Lim
committed
[AArch64]Merge narrow zero stores to a wider store
This change merges adjacent zero stores into a wider single store. For example : strh wzr, [x0] strh wzr, [x0, #2] becomes str wzr, [x0] This will fix PR25410. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253711 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 3de08c6 commit c8ef580

File tree

2 files changed

+168
-16
lines changed

2 files changed

+168
-16
lines changed

lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 80 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
4242
STATISTIC(NumUnscaledPairCreated,
4343
"Number of load/store from unscaled generated");
4444
STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
45+
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
4546

4647
static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
4748
cl::init(20), cl::Hidden);
@@ -152,6 +153,8 @@ static bool isUnscaledLdSt(unsigned Opc) {
152153
case AArch64::STURSi:
153154
case AArch64::STURDi:
154155
case AArch64::STURQi:
156+
case AArch64::STURBBi:
157+
case AArch64::STURHHi:
155158
case AArch64::STURWi:
156159
case AArch64::STURXi:
157160
case AArch64::LDURSi:
@@ -189,6 +192,22 @@ static unsigned getBitExtrOpcode(MachineInstr *MI) {
189192
}
190193
}
191194

195+
static bool isNarrowStore(unsigned Opc) {
196+
switch (Opc) {
197+
default:
198+
return false;
199+
case AArch64::STRBBui:
200+
case AArch64::STURBBi:
201+
case AArch64::STRHHui:
202+
case AArch64::STURHHi:
203+
return true;
204+
}
205+
}
206+
207+
static bool isNarrowStore(MachineInstr *MI) {
208+
return isNarrowStore(MI->getOpcode());
209+
}
210+
192211
static bool isNarrowLoad(unsigned Opc) {
193212
switch (Opc) {
194213
default:
@@ -219,12 +238,14 @@ static int getMemScale(MachineInstr *MI) {
219238
case AArch64::LDRSBWui:
220239
case AArch64::LDURSBWi:
221240
case AArch64::STRBBui:
241+
case AArch64::STURBBi:
222242
return 1;
223243
case AArch64::LDRHHui:
224244
case AArch64::LDURHHi:
225245
case AArch64::LDRSHWui:
226246
case AArch64::LDURSHWi:
227247
case AArch64::STRHHui:
248+
case AArch64::STURHHi:
228249
return 2;
229250
case AArch64::LDRSui:
230251
case AArch64::LDURSi:
@@ -278,6 +299,10 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
278299
case AArch64::STURDi:
279300
case AArch64::STRQui:
280301
case AArch64::STURQi:
302+
case AArch64::STRBBui:
303+
case AArch64::STURBBi:
304+
case AArch64::STRHHui:
305+
case AArch64::STURHHi:
281306
case AArch64::STRWui:
282307
case AArch64::STURWi:
283308
case AArch64::STRXui:
@@ -327,6 +352,14 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
327352
case AArch64::STRQui:
328353
case AArch64::STURQi:
329354
return AArch64::STPQi;
355+
case AArch64::STRBBui:
356+
return AArch64::STRHHui;
357+
case AArch64::STRHHui:
358+
return AArch64::STRWui;
359+
case AArch64::STURBBi:
360+
return AArch64::STURHHi;
361+
case AArch64::STURHHi:
362+
return AArch64::STURWi;
330363
case AArch64::STRWui:
331364
case AArch64::STURWi:
332365
return AArch64::STPWi;
@@ -681,17 +714,33 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
681714
return NextI;
682715
}
683716

684-
// Handle Unscaled
685-
if (IsUnscaled)
686-
OffsetImm /= OffsetStride;
687-
688717
// Construct the new instruction.
689-
MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
690-
I->getDebugLoc(), TII->get(NewOpc))
691-
.addOperand(getLdStRegOp(RtMI))
692-
.addOperand(getLdStRegOp(Rt2MI))
693-
.addOperand(BaseRegOp)
694-
.addImm(OffsetImm);
718+
MachineInstrBuilder MIB;
719+
if (isNarrowStore(Opc)) {
720+
// Change the scaled offset from small to large type.
721+
if (!IsUnscaled) {
722+
assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
723+
OffsetImm /= 2;
724+
}
725+
MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
726+
TII->get(NewOpc))
727+
.addOperand(getLdStRegOp(I))
728+
.addOperand(BaseRegOp)
729+
.addImm(OffsetImm);
730+
// Copy MachineMemOperands from the original stores.
731+
concatenateMemOperands(MIB, I, Paired);
732+
} else {
733+
// Handle Unscaled
734+
if (IsUnscaled)
735+
OffsetImm /= OffsetStride;
736+
MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
737+
TII->get(NewOpc))
738+
.addOperand(getLdStRegOp(RtMI))
739+
.addOperand(getLdStRegOp(Rt2MI))
740+
.addOperand(BaseRegOp)
741+
.addImm(OffsetImm);
742+
}
743+
695744
(void)MIB;
696745

697746
// FIXME: Do we need/want to copy the mem operands from the source
@@ -830,6 +879,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
830879
unsigned Reg = getLdStRegOp(FirstMI).getReg();
831880
unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
832881
int Offset = getLdStOffsetOp(FirstMI).getImm();
882+
bool IsNarrowStore = isNarrowStore(Opc);
883+
884+
// For narrow stores, find only the case where the stored value is WZR.
885+
if (IsNarrowStore && Reg != AArch64::WZR)
886+
return E;
833887

834888
// Early exit if the first instruction modifies the base register.
835889
// e.g., ldr x0, [x0]
@@ -840,7 +894,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
840894
// range, plus allow an extra one in case we find a later insn that matches
841895
// with Offset-1)
842896
int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
843-
if (!isNarrowLoad(Opc) && !inBoundsForPair(IsUnscaled, Offset, OffsetStride))
897+
if (!(isNarrowLoad(Opc) || IsNarrowStore) &&
898+
!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
844899
return E;
845900

846901
// Track which registers have been modified and used between the first insn
@@ -907,9 +962,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
907962
continue;
908963
}
909964

910-
if (IsNarrowLoad) {
911-
// If the alignment requirements of the larger type scaled load
912-
// instruction can't express the scaled offset of the smaller type
965+
if (IsNarrowLoad || IsNarrowStore) {
966+
// If the alignment requirements of the scaled wide load/store
967+
// instruction can't express the offset of the scaled narrow
913968
// input, bail and keep looking.
914969
if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
915970
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
@@ -929,7 +984,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
929984
// If the destination register of the loads is the same register, bail
930985
// and keep looking. A load-pair instruction with both destination
931986
// registers the same is UNPREDICTABLE and will result in an exception.
932-
if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
987+
// For narrow stores, allow only when the stored value is the same
988+
// (i.e., WZR).
989+
if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
990+
(IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) {
933991
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
934992
MemInsns.push_back(MI);
935993
continue;
@@ -1228,6 +1286,8 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst(
12281286
if (Paired != E) {
12291287
if (isNarrowLoad(MI)) {
12301288
++NumNarrowLoadsPromoted;
1289+
} else if (isNarrowStore(MI)) {
1290+
++NumZeroStoresPromoted;
12311291
} else {
12321292
++NumPairCreated;
12331293
if (isUnscaledLdSt(MI))
@@ -1284,11 +1344,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
12841344
case AArch64::LDRHHui:
12851345
case AArch64::LDRSBWui:
12861346
case AArch64::LDRSHWui:
1347+
case AArch64::STRBBui:
1348+
case AArch64::STRHHui:
12871349
// Unscaled instructions.
12881350
case AArch64::LDURBBi:
12891351
case AArch64::LDURHHi:
12901352
case AArch64::LDURSBWi:
1291-
case AArch64::LDURSHWi: {
1353+
case AArch64::LDURSHWi:
1354+
case AArch64::STURBBi:
1355+
case AArch64::STURHHi: {
12921356
if (tryToMergeLdStInst(MBBI)) {
12931357
Modified = true;
12941358
break;

test/CodeGen/AArch64/arm64-ldr-merge.ll renamed to test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,3 +316,91 @@ define i32 @Ldursb_szext_merge(i8* %p) nounwind {
316316
ret i32 %add
317317
}
318318

319+
; CHECK-LABEL: Strh_zero
320+
; CHECK: str wzr
321+
define void @Strh_zero(i16* nocapture %P, i32 %n) {
322+
entry:
323+
%idxprom = sext i32 %n to i64
324+
%arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
325+
store i16 0, i16* %arrayidx
326+
%add = add nsw i32 %n, 1
327+
%idxprom1 = sext i32 %add to i64
328+
%arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
329+
store i16 0, i16* %arrayidx2
330+
ret void
331+
}
332+
333+
; CHECK-LABEL: Strh_zero_4
334+
; CHECK: stp wzr, wzr
335+
define void @Strh_zero_4(i16* nocapture %P, i32 %n) {
336+
entry:
337+
%idxprom = sext i32 %n to i64
338+
%arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
339+
store i16 0, i16* %arrayidx
340+
%add = add nsw i32 %n, 1
341+
%idxprom1 = sext i32 %add to i64
342+
%arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
343+
store i16 0, i16* %arrayidx2
344+
%add3 = add nsw i32 %n, 2
345+
%idxprom4 = sext i32 %add3 to i64
346+
%arrayidx5 = getelementptr inbounds i16, i16* %P, i64 %idxprom4
347+
store i16 0, i16* %arrayidx5
348+
%add6 = add nsw i32 %n, 3
349+
%idxprom7 = sext i32 %add6 to i64
350+
%arrayidx8 = getelementptr inbounds i16, i16* %P, i64 %idxprom7
351+
store i16 0, i16* %arrayidx8
352+
ret void
353+
}
354+
355+
; CHECK-LABEL: Sturb_zero
356+
; CHECK: sturh wzr
357+
define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
358+
entry:
359+
%sub = add nsw i32 %n, -2
360+
%idxprom = sext i32 %sub to i64
361+
%arrayidx = getelementptr inbounds i8, i8* %P, i64 %idxprom
362+
store i8 0, i8* %arrayidx
363+
%sub2= add nsw i32 %n, -1
364+
%idxprom1 = sext i32 %sub2 to i64
365+
%arrayidx2 = getelementptr inbounds i8, i8* %P, i64 %idxprom1
366+
store i8 0, i8* %arrayidx2
367+
ret void
368+
}
369+
370+
; CHECK-LABEL: Sturh_zero
371+
; CHECK: stur wzr
372+
define void @Sturh_zero(i16* nocapture %P, i32 %n) {
373+
entry:
374+
%sub = add nsw i32 %n, -2
375+
%idxprom = sext i32 %sub to i64
376+
%arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
377+
store i16 0, i16* %arrayidx
378+
%sub1 = add nsw i32 %n, -3
379+
%idxprom2 = sext i32 %sub1 to i64
380+
%arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
381+
store i16 0, i16* %arrayidx3
382+
ret void
383+
}
384+
385+
; CHECK-LABEL: Sturh_zero_4
386+
; CHECK: stp wzr, wzr
387+
define void @Sturh_zero_4(i16* nocapture %P, i32 %n) {
388+
entry:
389+
%sub = add nsw i32 %n, -3
390+
%idxprom = sext i32 %sub to i64
391+
%arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
392+
store i16 0, i16* %arrayidx
393+
%sub1 = add nsw i32 %n, -4
394+
%idxprom2 = sext i32 %sub1 to i64
395+
%arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
396+
store i16 0, i16* %arrayidx3
397+
%sub4 = add nsw i32 %n, -2
398+
%idxprom5 = sext i32 %sub4 to i64
399+
%arrayidx6 = getelementptr inbounds i16, i16* %P, i64 %idxprom5
400+
store i16 0, i16* %arrayidx6
401+
%sub7 = add nsw i32 %n, -1
402+
%idxprom8 = sext i32 %sub7 to i64
403+
%arrayidx9 = getelementptr inbounds i16, i16* %P, i64 %idxprom8
404+
store i16 0, i16* %arrayidx9
405+
ret void
406+
}

0 commit comments

Comments
 (0)