Skip to content

Commit 43ffe2e

Browse files
committed
[AArch64] Fold more load.x into load.i with large offset
The list of load.x is refer to canFoldIntoAddrMode on D152828. Also support LDRSroX missed in canFoldIntoAddrMode
1 parent 3319049 commit 43ffe2e

File tree

3 files changed

+96
-53
lines changed

3 files changed

+96
-53
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4521,7 +4521,20 @@ AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
45214521
switch (MI.getOpcode()) {
45224522
default:
45234523
llvm_unreachable("Unexpected opcode");
4524+
case AArch64::LDRBroX:
45244525
case AArch64::LDRBBroX:
4526+
case AArch64::LDRSBXroX:
4527+
case AArch64::LDRSBWroX:
4528+
case AArch64::LDRHroX:
4529+
case AArch64::LDRHHroX:
4530+
case AArch64::LDRSHXroX:
4531+
case AArch64::LDRSHWroX:
4532+
case AArch64::LDRWroX:
4533+
case AArch64::LDRSroX:
4534+
case AArch64::LDRSWroX:
4535+
case AArch64::LDRDroX:
4536+
case AArch64::LDRXroX:
4537+
case AArch64::LDRQroX:
45254538
return MI.getOperand(4);
45264539
}
45274540
}

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,12 +509,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
509509
}
510510

511511
static unsigned getBaseAddressOpcode(unsigned Opc) {
512-
// TODO: Add more index address loads/stores.
512+
// TODO: Add more index address stores.
513513
switch (Opc) {
514514
default:
515515
llvm_unreachable("Opcode has no base address equivalent!");
516+
case AArch64::LDRBroX:
517+
return AArch64::LDRBui;
516518
case AArch64::LDRBBroX:
517519
return AArch64::LDRBBui;
520+
case AArch64::LDRSBXroX:
521+
return AArch64::LDRSBXui;
522+
case AArch64::LDRSBWroX:
523+
return AArch64::LDRSBWui;
524+
case AArch64::LDRHroX:
525+
return AArch64::LDRHui;
526+
case AArch64::LDRHHroX:
527+
return AArch64::LDRHHui;
528+
case AArch64::LDRSHXroX:
529+
return AArch64::LDRSHXui;
530+
case AArch64::LDRSHWroX:
531+
return AArch64::LDRSHWui;
532+
case AArch64::LDRWroX:
533+
return AArch64::LDRWui;
534+
case AArch64::LDRSroX:
535+
return AArch64::LDRSui;
536+
case AArch64::LDRSWroX:
537+
return AArch64::LDRSWui;
538+
case AArch64::LDRDroX:
539+
return AArch64::LDRDui;
540+
case AArch64::LDRXroX:
541+
return AArch64::LDRXui;
542+
case AArch64::LDRQroX:
543+
return AArch64::LDRQui;
518544
}
519545
}
520546

@@ -766,10 +792,31 @@ static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
766792
default:
767793
return false;
768794
// Scaled instructions.
769-
// TODO: Add more index address loads/stores.
795+
// TODO: Add more index address stores.
796+
case AArch64::LDRBroX:
770797
case AArch64::LDRBBroX:
798+
case AArch64::LDRSBXroX:
799+
case AArch64::LDRSBWroX:
771800
Scale = 1;
772801
return true;
802+
case AArch64::LDRHroX:
803+
case AArch64::LDRHHroX:
804+
case AArch64::LDRSHXroX:
805+
case AArch64::LDRSHWroX:
806+
Scale = 2;
807+
return true;
808+
case AArch64::LDRWroX:
809+
case AArch64::LDRSroX:
810+
case AArch64::LDRSWroX:
811+
Scale = 4;
812+
return true;
813+
case AArch64::LDRDroX:
814+
case AArch64::LDRXroX:
815+
Scale = 8;
816+
return true;
817+
case AArch64::LDRQroX:
818+
Scale = 16;
819+
return true;
773820
}
774821
}
775822

llvm/test/CodeGen/AArch64/arm64-addrmode.ll

Lines changed: 34 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a) {
239239
define i32 @LdOffset_i8_sext32(ptr %a) {
240240
; CHECK-LABEL: LdOffset_i8_sext32:
241241
; CHECK: // %bb.0:
242-
; CHECK-NEXT: mov w8, #56952 // =0xde78
243-
; CHECK-NEXT: movk w8, #15, lsl #16
244-
; CHECK-NEXT: ldrsb w0, [x0, x8]
242+
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
243+
; CHECK-NEXT: ldrsb w0, [x8, #3704]
245244
; CHECK-NEXT: ret
246245
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
247246
%val = load i8, ptr %arrayidx, align 1
@@ -266,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a) {
266265
define i64 @LdOffset_i8_sext64(ptr %a) {
267266
; CHECK-LABEL: LdOffset_i8_sext64:
268267
; CHECK: // %bb.0:
269-
; CHECK-NEXT: mov w8, #56952 // =0xde78
270-
; CHECK-NEXT: movk w8, #15, lsl #16
271-
; CHECK-NEXT: ldrsb x0, [x0, x8]
268+
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
269+
; CHECK-NEXT: ldrsb x0, [x8, #3704]
272270
; CHECK-NEXT: ret
273271
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
274272
%val = load i8, ptr %arrayidx, align 1
@@ -280,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a) {
280278
define i16 @LdOffset_i16(ptr %a) {
281279
; CHECK-LABEL: LdOffset_i16:
282280
; CHECK: // %bb.0:
283-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
284-
; CHECK-NEXT: movk w8, #31, lsl #16
285-
; CHECK-NEXT: ldrh w0, [x0, x8]
281+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
282+
; CHECK-NEXT: ldrh w0, [x8, #7408]
286283
; CHECK-NEXT: ret
287284
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
288285
%val = load i16, ptr %arrayidx, align 2
@@ -293,9 +290,8 @@ define i16 @LdOffset_i16(ptr %a) {
293290
define i32 @LdOffset_i16_zext32(ptr %a) {
294291
; CHECK-LABEL: LdOffset_i16_zext32:
295292
; CHECK: // %bb.0:
296-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
297-
; CHECK-NEXT: movk w8, #31, lsl #16
298-
; CHECK-NEXT: ldrh w0, [x0, x8]
293+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
294+
; CHECK-NEXT: ldrh w0, [x8, #7408]
299295
; CHECK-NEXT: ret
300296
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
301297
%val = load i16, ptr %arrayidx, align 2
@@ -307,9 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a) {
307303
define i32 @LdOffset_i16_sext32(ptr %a) {
308304
; CHECK-LABEL: LdOffset_i16_sext32:
309305
; CHECK: // %bb.0:
310-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
311-
; CHECK-NEXT: movk w8, #31, lsl #16
312-
; CHECK-NEXT: ldrsh w0, [x0, x8]
306+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
307+
; CHECK-NEXT: ldrsh w0, [x8, #7408]
313308
; CHECK-NEXT: ret
314309
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
315310
%val = load i16, ptr %arrayidx, align 2
@@ -321,9 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a) {
321316
define i64 @LdOffset_i16_zext64(ptr %a) {
322317
; CHECK-LABEL: LdOffset_i16_zext64:
323318
; CHECK: // %bb.0:
324-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
325-
; CHECK-NEXT: movk w8, #31, lsl #16
326-
; CHECK-NEXT: ldrh w0, [x0, x8]
319+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
320+
; CHECK-NEXT: ldrh w0, [x8, #7408]
327321
; CHECK-NEXT: ret
328322
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
329323
%val = load i16, ptr %arrayidx, align 2
@@ -335,9 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a) {
335329
define i64 @LdOffset_i16_sext64(ptr %a) {
336330
; CHECK-LABEL: LdOffset_i16_sext64:
337331
; CHECK: // %bb.0:
338-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
339-
; CHECK-NEXT: movk w8, #31, lsl #16
340-
; CHECK-NEXT: ldrsh x0, [x0, x8]
332+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
333+
; CHECK-NEXT: ldrsh x0, [x8, #7408]
341334
; CHECK-NEXT: ret
342335
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
343336
%val = load i16, ptr %arrayidx, align 2
@@ -349,9 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a) {
349342
define i32 @LdOffset_i32(ptr %a) {
350343
; CHECK-LABEL: LdOffset_i32:
351344
; CHECK: // %bb.0:
352-
; CHECK-NEXT: mov w8, #31200 // =0x79e0
353-
; CHECK-NEXT: movk w8, #63, lsl #16
354-
; CHECK-NEXT: ldr w0, [x0, x8]
345+
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
346+
; CHECK-NEXT: ldr w0, [x8, #14816]
355347
; CHECK-NEXT: ret
356348
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
357349
%val = load i32, ptr %arrayidx, align 4
@@ -362,9 +354,8 @@ define i32 @LdOffset_i32(ptr %a) {
362354
define i64 @LdOffset_i32_zext64(ptr %a) {
363355
; CHECK-LABEL: LdOffset_i32_zext64:
364356
; CHECK: // %bb.0:
365-
; CHECK-NEXT: mov w8, #31200 // =0x79e0
366-
; CHECK-NEXT: movk w8, #63, lsl #16
367-
; CHECK-NEXT: ldr w0, [x0, x8]
357+
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
358+
; CHECK-NEXT: ldr w0, [x8, #14816]
368359
; CHECK-NEXT: ret
369360
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
370361
%val = load i32, ptr %arrayidx, align 2
@@ -376,9 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a) {
376367
define i64 @LdOffset_i32_sext64(ptr %a) {
377368
; CHECK-LABEL: LdOffset_i32_sext64:
378369
; CHECK: // %bb.0:
379-
; CHECK-NEXT: mov w8, #31200 // =0x79e0
380-
; CHECK-NEXT: movk w8, #63, lsl #16
381-
; CHECK-NEXT: ldrsw x0, [x0, x8]
370+
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
371+
; CHECK-NEXT: ldrsw x0, [x8, #14816]
382372
; CHECK-NEXT: ret
383373
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
384374
%val = load i32, ptr %arrayidx, align 2
@@ -390,9 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a) {
390380
define i64 @LdOffset_i64(ptr %a) {
391381
; CHECK-LABEL: LdOffset_i64:
392382
; CHECK: // %bb.0:
393-
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
394-
; CHECK-NEXT: movk w8, #126, lsl #16
395-
; CHECK-NEXT: ldr x0, [x0, x8]
383+
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
384+
; CHECK-NEXT: ldr x0, [x8, #29632]
396385
; CHECK-NEXT: ret
397386
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
398387
%val = load i64, ptr %arrayidx, align 4
@@ -403,9 +392,8 @@ define i64 @LdOffset_i64(ptr %a) {
403392
define <2 x i32> @LdOffset_v2i32(ptr %a) {
404393
; CHECK-LABEL: LdOffset_v2i32:
405394
; CHECK: // %bb.0:
406-
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
407-
; CHECK-NEXT: movk w8, #126, lsl #16
408-
; CHECK-NEXT: ldr d0, [x0, x8]
395+
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
396+
; CHECK-NEXT: ldr d0, [x8, #29632]
409397
; CHECK-NEXT: ret
410398
%arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
411399
%val = load <2 x i32>, ptr %arrayidx, align 4
@@ -416,9 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a) {
416404
define <2 x i64> @LdOffset_v2i64(ptr %a) {
417405
; CHECK-LABEL: LdOffset_v2i64:
418406
; CHECK: // %bb.0:
419-
; CHECK-NEXT: mov w8, #59264 // =0xe780
420-
; CHECK-NEXT: movk w8, #253, lsl #16
421-
; CHECK-NEXT: ldr q0, [x0, x8]
407+
; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608
408+
; CHECK-NEXT: ldr q0, [x8, #59264]
422409
; CHECK-NEXT: ret
423410
%arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
424411
%val = load <2 x i64>, ptr %arrayidx, align 4
@@ -429,9 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a) {
429416
define double @LdOffset_i8_f64(ptr %a) {
430417
; CHECK-LABEL: LdOffset_i8_f64:
431418
; CHECK: // %bb.0:
432-
; CHECK-NEXT: mov w8, #56952 // =0xde78
433-
; CHECK-NEXT: movk w8, #15, lsl #16
434-
; CHECK-NEXT: ldrsb w8, [x0, x8]
419+
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
420+
; CHECK-NEXT: ldrsb w8, [x8, #3704]
435421
; CHECK-NEXT: scvtf d0, w8
436422
; CHECK-NEXT: ret
437423
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
@@ -444,9 +430,8 @@ define double @LdOffset_i8_f64(ptr %a) {
444430
define double @LdOffset_i16_f64(ptr %a) {
445431
; CHECK-LABEL: LdOffset_i16_f64:
446432
; CHECK: // %bb.0:
447-
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
448-
; CHECK-NEXT: movk w8, #31, lsl #16
449-
; CHECK-NEXT: ldrsh w8, [x0, x8]
433+
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
434+
; CHECK-NEXT: ldrsh w8, [x8, #7408]
450435
; CHECK-NEXT: scvtf d0, w8
451436
; CHECK-NEXT: ret
452437
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
@@ -459,9 +444,8 @@ define double @LdOffset_i16_f64(ptr %a) {
459444
define double @LdOffset_i32_f64(ptr %a) {
460445
; CHECK-LABEL: LdOffset_i32_f64:
461446
; CHECK: // %bb.0:
462-
; CHECK-NEXT: mov w8, #31200 // =0x79e0
463-
; CHECK-NEXT: movk w8, #63, lsl #16
464-
; CHECK-NEXT: ldr s0, [x0, x8]
447+
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
448+
; CHECK-NEXT: ldr s0, [x8, #14816]
465449
; CHECK-NEXT: ucvtf d0, d0
466450
; CHECK-NEXT: ret
467451
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
@@ -474,9 +458,8 @@ define double @LdOffset_i32_f64(ptr %a) {
474458
define double @LdOffset_i64_f64(ptr %a) {
475459
; CHECK-LABEL: LdOffset_i64_f64:
476460
; CHECK: // %bb.0:
477-
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
478-
; CHECK-NEXT: movk w8, #126, lsl #16
479-
; CHECK-NEXT: ldr d0, [x0, x8]
461+
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
462+
; CHECK-NEXT: ldr d0, [x8, #29632]
480463
; CHECK-NEXT: scvtf d0, d0
481464
; CHECK-NEXT: ret
482465
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992

0 commit comments

Comments
 (0)