@@ -42,6 +42,7 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
42
42
STATISTIC (NumUnscaledPairCreated,
43
43
" Number of load/store from unscaled generated" );
44
44
STATISTIC (NumNarrowLoadsPromoted, " Number of narrow loads promoted" );
45
+ STATISTIC (NumZeroStoresPromoted, " Number of narrow zero stores promoted" );
45
46
46
47
static cl::opt<unsigned > ScanLimit (" aarch64-load-store-scan-limit" ,
47
48
cl::init (20 ), cl::Hidden);
@@ -152,6 +153,8 @@ static bool isUnscaledLdSt(unsigned Opc) {
152
153
case AArch64::STURSi:
153
154
case AArch64::STURDi:
154
155
case AArch64::STURQi:
156
+ case AArch64::STURBBi:
157
+ case AArch64::STURHHi:
155
158
case AArch64::STURWi:
156
159
case AArch64::STURXi:
157
160
case AArch64::LDURSi:
@@ -189,6 +192,22 @@ static unsigned getBitExtrOpcode(MachineInstr *MI) {
189
192
}
190
193
}
191
194
195
+ static bool isNarrowStore (unsigned Opc) {
196
+ switch (Opc) {
197
+ default :
198
+ return false ;
199
+ case AArch64::STRBBui:
200
+ case AArch64::STURBBi:
201
+ case AArch64::STRHHui:
202
+ case AArch64::STURHHi:
203
+ return true ;
204
+ }
205
+ }
206
+
207
+ static bool isNarrowStore (MachineInstr *MI) {
208
+ return isNarrowStore (MI->getOpcode ());
209
+ }
210
+
192
211
static bool isNarrowLoad (unsigned Opc) {
193
212
switch (Opc) {
194
213
default :
@@ -219,12 +238,14 @@ static int getMemScale(MachineInstr *MI) {
219
238
case AArch64::LDRSBWui:
220
239
case AArch64::LDURSBWi:
221
240
case AArch64::STRBBui:
241
+ case AArch64::STURBBi:
222
242
return 1 ;
223
243
case AArch64::LDRHHui:
224
244
case AArch64::LDURHHi:
225
245
case AArch64::LDRSHWui:
226
246
case AArch64::LDURSHWi:
227
247
case AArch64::STRHHui:
248
+ case AArch64::STURHHi:
228
249
return 2 ;
229
250
case AArch64::LDRSui:
230
251
case AArch64::LDURSi:
@@ -278,6 +299,10 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
278
299
case AArch64::STURDi:
279
300
case AArch64::STRQui:
280
301
case AArch64::STURQi:
302
+ case AArch64::STRBBui:
303
+ case AArch64::STURBBi:
304
+ case AArch64::STRHHui:
305
+ case AArch64::STURHHi:
281
306
case AArch64::STRWui:
282
307
case AArch64::STURWi:
283
308
case AArch64::STRXui:
@@ -327,6 +352,14 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
327
352
case AArch64::STRQui:
328
353
case AArch64::STURQi:
329
354
return AArch64::STPQi;
355
+ case AArch64::STRBBui:
356
+ return AArch64::STRHHui;
357
+ case AArch64::STRHHui:
358
+ return AArch64::STRWui;
359
+ case AArch64::STURBBi:
360
+ return AArch64::STURHHi;
361
+ case AArch64::STURHHi:
362
+ return AArch64::STURWi;
330
363
case AArch64::STRWui:
331
364
case AArch64::STURWi:
332
365
return AArch64::STPWi;
@@ -681,17 +714,33 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
681
714
return NextI;
682
715
}
683
716
684
- // Handle Unscaled
685
- if (IsUnscaled)
686
- OffsetImm /= OffsetStride;
687
-
688
717
// Construct the new instruction.
689
- MachineInstrBuilder MIB = BuildMI (*I->getParent (), InsertionPoint,
690
- I->getDebugLoc (), TII->get (NewOpc))
691
- .addOperand (getLdStRegOp (RtMI))
692
- .addOperand (getLdStRegOp (Rt2MI))
693
- .addOperand (BaseRegOp)
694
- .addImm (OffsetImm);
718
+ MachineInstrBuilder MIB;
719
+ if (isNarrowStore (Opc)) {
720
+ // Change the scaled offset from small to large type.
721
+ if (!IsUnscaled) {
722
+ assert (((OffsetImm & 1 ) == 0 ) && " Unexpected offset to merge" );
723
+ OffsetImm /= 2 ;
724
+ }
725
+ MIB = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
726
+ TII->get (NewOpc))
727
+ .addOperand (getLdStRegOp (I))
728
+ .addOperand (BaseRegOp)
729
+ .addImm (OffsetImm);
730
+ // Copy MachineMemOperands from the original stores.
731
+ concatenateMemOperands (MIB, I, Paired);
732
+ } else {
733
+ // Handle Unscaled
734
+ if (IsUnscaled)
735
+ OffsetImm /= OffsetStride;
736
+ MIB = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
737
+ TII->get (NewOpc))
738
+ .addOperand (getLdStRegOp (RtMI))
739
+ .addOperand (getLdStRegOp (Rt2MI))
740
+ .addOperand (BaseRegOp)
741
+ .addImm (OffsetImm);
742
+ }
743
+
695
744
(void )MIB;
696
745
697
746
// FIXME: Do we need/want to copy the mem operands from the source
@@ -830,6 +879,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
830
879
unsigned Reg = getLdStRegOp (FirstMI).getReg ();
831
880
unsigned BaseReg = getLdStBaseOp (FirstMI).getReg ();
832
881
int Offset = getLdStOffsetOp (FirstMI).getImm ();
882
+ bool IsNarrowStore = isNarrowStore (Opc);
883
+
884
+ // For narrow stores, find only the case where the stored value is WZR.
885
+ if (IsNarrowStore && Reg != AArch64::WZR)
886
+ return E;
833
887
834
888
// Early exit if the first instruction modifies the base register.
835
889
// e.g., ldr x0, [x0]
@@ -840,7 +894,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
840
894
// range, plus allow an extra one in case we find a later insn that matches
841
895
// with Offset-1)
842
896
int OffsetStride = IsUnscaled ? getMemScale (FirstMI) : 1 ;
843
- if (!isNarrowLoad (Opc) && !inBoundsForPair (IsUnscaled, Offset, OffsetStride))
897
+ if (!(isNarrowLoad (Opc) || IsNarrowStore) &&
898
+ !inBoundsForPair (IsUnscaled, Offset, OffsetStride))
844
899
return E;
845
900
846
901
// Track which registers have been modified and used between the first insn
@@ -907,9 +962,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
907
962
continue ;
908
963
}
909
964
910
- if (IsNarrowLoad) {
911
- // If the alignment requirements of the larger type scaled load
912
- // instruction can't express the scaled offset of the smaller type
965
+ if (IsNarrowLoad || IsNarrowStore ) {
966
+ // If the alignment requirements of the scaled wide load/store
967
+ // instruction can't express the offset of the scaled narrow
913
968
// input, bail and keep looking.
914
969
if (!IsUnscaled && alignTo (MinOffset, 2 ) != MinOffset) {
915
970
trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
@@ -929,7 +984,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
929
984
// If the destination register of the loads is the same register, bail
930
985
// and keep looking. A load-pair instruction with both destination
931
986
// registers the same is UNPREDICTABLE and will result in an exception.
932
- if (MayLoad && Reg == getLdStRegOp (MI).getReg ()) {
987
+ // For narrow stores, allow only when the stored value is the same
988
+ // (i.e., WZR).
989
+ if ((MayLoad && Reg == getLdStRegOp (MI).getReg ()) ||
990
+ (IsNarrowStore && Reg != getLdStRegOp (MI).getReg ())) {
933
991
trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
934
992
MemInsns.push_back (MI);
935
993
continue ;
@@ -1228,6 +1286,8 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst(
1228
1286
if (Paired != E) {
1229
1287
if (isNarrowLoad (MI)) {
1230
1288
++NumNarrowLoadsPromoted;
1289
+ } else if (isNarrowStore (MI)) {
1290
+ ++NumZeroStoresPromoted;
1231
1291
} else {
1232
1292
++NumPairCreated;
1233
1293
if (isUnscaledLdSt (MI))
@@ -1284,11 +1344,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
1284
1344
case AArch64::LDRHHui:
1285
1345
case AArch64::LDRSBWui:
1286
1346
case AArch64::LDRSHWui:
1347
+ case AArch64::STRBBui:
1348
+ case AArch64::STRHHui:
1287
1349
// Unscaled instructions.
1288
1350
case AArch64::LDURBBi:
1289
1351
case AArch64::LDURHHi:
1290
1352
case AArch64::LDURSBWi:
1291
- case AArch64::LDURSHWi: {
1353
+ case AArch64::LDURSHWi:
1354
+ case AArch64::STURBBi:
1355
+ case AArch64::STURHHi: {
1292
1356
if (tryToMergeLdStInst (MBBI)) {
1293
1357
Modified = true ;
1294
1358
break ;
0 commit comments