Skip to content

Commit ddb46ab

Browse files
committed
[LSR] Don't consider users of constant outside loop
In CollectLoopInvariantFixupsAndFormulae(), LSR looks at users outside the loop. E.g. if we have an addrec based on %base, and %base is also used outside the loop, then we have to keep it in a register anyway, which may make it more profitable to use %base + %idx style addressing. This reasoning doesn't hold up when the base is a constant, because the constant can be rematerialized. The lsr-memcpy.ll test regressed when enabling opaque pointers, because inttoptr (i64 6442450944 to ptr) now also has a use outside the loop (previously it didn't due to a pointer type difference), and that extra "use" results in worse use of addressing modes in the loop. However, the use outside the loop actually gets rematerialized, so the alleged register saving does not occur. The same reasoning also applies to other types of constants, such as global variable references. Differential Revision: https://reviews.llvm.org/D155073
1 parent 1c4e4e0 commit ddb46ab

File tree

2 files changed

+16
-21
lines changed

2 files changed

+16
-21
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3507,8 +3507,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
35073507
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
35083508
// Look for instructions defined outside the loop.
35093509
if (L->contains(Inst)) continue;
3510-
} else if (isa<UndefValue>(V))
3511-
// Undef doesn't have a live range, so it doesn't matter.
3510+
} else if (isa<Constant>(V))
3511+
// Constants can be re-materialized.
35123512
continue;
35133513
for (const Use &U : V->uses()) {
35143514
const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());

llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,18 @@
77
; <rdar://problem/12702735> [ARM64][coalescer] need better register
88
; coalescing for simple unit tests.
99

10-
; FIXME: This regressed after enabling opaque pointers.
1110
define i32 @test_inttoptr() nounwind {
1211
; CHECK-LABEL: test_inttoptr:
1312
; CHECK: // %bb.0: // %entry
1413
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
15-
; CHECK-NEXT: mov x8, #0 // =0x0
16-
; CHECK-NEXT: mov w9, #1288 // =0x508
17-
; CHECK-NEXT: mov x10, #4294967296 // =0x100000000
18-
; CHECK-NEXT: mov x11, #6442450944 // =0x180000000
14+
; CHECK-NEXT: mov w8, #1288 // =0x508
15+
; CHECK-NEXT: mov x9, #4294967296 // =0x100000000
16+
; CHECK-NEXT: mov x10, #6442450944 // =0x180000000
1917
; CHECK-NEXT: .LBB0_1: // %while.body
2018
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
21-
; CHECK-NEXT: ldr x12, [x8, x10]
22-
; CHECK-NEXT: str x12, [x8, x11]
23-
; CHECK-NEXT: add x8, x8, #8
24-
; CHECK-NEXT: subs x9, x9, #8
19+
; CHECK-NEXT: ldr x11, [x9], #8
20+
; CHECK-NEXT: str x11, [x10], #8
21+
; CHECK-NEXT: subs x8, x8, #8
2522
; CHECK-NEXT: b.pl .LBB0_1
2623
; CHECK-NEXT: // %bb.2: // %while.end
2724
; CHECK-NEXT: mov x8, #6442450944 // =0x180000000
@@ -55,18 +52,16 @@ while.end: ; preds = %while.body
5552
define ptr @test_globals() nounwind {
5653
; CHECK-LABEL: test_globals:
5754
; CHECK: // %bb.0: // %entry
58-
; CHECK-NEXT: mov x8, #0 // =0x0
59-
; CHECK-NEXT: mov w9, #1288 // =0x508
60-
; CHECK-NEXT: adrp x10, g2
61-
; CHECK-NEXT: add x10, x10, :lo12:g2
62-
; CHECK-NEXT: adrp x11, g1
63-
; CHECK-NEXT: add x11, x11, :lo12:g1
55+
; CHECK-NEXT: mov w8, #1288 // =0x508
56+
; CHECK-NEXT: adrp x9, g2
57+
; CHECK-NEXT: add x9, x9, :lo12:g2
58+
; CHECK-NEXT: adrp x10, g1
59+
; CHECK-NEXT: add x10, x10, :lo12:g1
6460
; CHECK-NEXT: .LBB1_1: // %while.body
6561
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
66-
; CHECK-NEXT: ldr x12, [x10, x8]
67-
; CHECK-NEXT: str x12, [x11, x8]
68-
; CHECK-NEXT: add x8, x8, #8
69-
; CHECK-NEXT: subs x9, x9, #8
62+
; CHECK-NEXT: ldr x11, [x9], #8
63+
; CHECK-NEXT: str x11, [x10], #8
64+
; CHECK-NEXT: subs x8, x8, #8
7065
; CHECK-NEXT: b.pl .LBB1_1
7166
; CHECK-NEXT: // %bb.2: // %while.end
7267
; CHECK-NEXT: adrp x0, g1

0 commit comments

Comments
 (0)