Skip to content

Commit 4145032

Browse files
author
Max Kazantsev
committed
Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start"
The patch rL303730 was reverted because test lsr-expand-quadratic.ll failed on many non-X86 configs with this patch. The reason of this is that the patch makes a correctless fix that changes optimizer's behavior for this test. Without the change, LSR was making an overconfident simplification basing on a wrong SCEV. Apparently it did not need the IV analysis to do this. With the change, it chose a different way to simplify (that wasn't so confident), and this way required the IV analysis. Now, following the right execution path, LSR tries to make a transformation relying on IV Users analysis. This analysis is target-dependent due to this code: // LSR is not APInt clean, do not touch integers bigger than 64-bits. // Also avoid creating IVs of non-native types. For example, we don't want a // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. uint64_t Width = SE->getTypeSizeInBits(I->getType()); if (Width > 64 || !DL.isLegalInteger(Width)) return false; To make a proper transformation in this test case, the type i32 needs to be legal for the specified data layout. When the test runs on some non-X86 configuration (e.g. pure ARM 64), opt gets confused by the specified target and does not use it, rejecting the specified data layout as well. Instead, it uses some default layout that does not treat i32 as a legal type (currently the layout that is used when it is not specified does not have legal types at all). As result, the transformation we expect to happen does not happen for this test. This re-enabling patch does not have any source code changes compared to the original patch rL303730. The only difference is that the failing test is moved to X86 directory and now has requirement of running on x86 only to comply with the specified target triple and data layout. Differential Revision: https://reviews.llvm.org/D33543 llvm-svn: 303971
1 parent e51c435 commit 4145032

File tree

7 files changed

+189
-20
lines changed

7 files changed

+189
-20
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

+6
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,12 @@ class ScalarEvolution {
15331533
/// specified loop.
15341534
bool isLoopInvariant(const SCEV *S, const Loop *L);
15351535

1536+
/// Determine if the SCEV can be evaluated at loop's entry. It is true if it
1537+
/// doesn't depend on a SCEVUnknown of an instruction which is dominated by
1538+
/// the header of loop L.
1539+
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L, DominatorTree &DT,
1540+
LoopInfo &LI);
1541+
15361542
/// Return true if the given SCEV changes value in a known way in the
15371543
/// specified loop. This property being true implies that the value is
15381544
/// variant in the loop AND that we can emit an expression to compute the

llvm/lib/Analysis/ScalarEvolution.cpp

+59-2
Original file line numberDiff line numberDiff line change
@@ -2178,6 +2178,63 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
21782178
return Flags;
21792179
}
21802180

2181+
bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L,
2182+
DominatorTree &DT, LoopInfo &LI) {
2183+
if (!isLoopInvariant(S, L))
2184+
return false;
2185+
// If a value depends on a SCEVUnknown which is defined after the loop, we
2186+
// conservatively assume that we cannot calculate it at the loop's entry.
2187+
struct FindDominatedSCEVUnknown {
2188+
bool Found = false;
2189+
const Loop *L;
2190+
DominatorTree &DT;
2191+
LoopInfo &LI;
2192+
2193+
FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI)
2194+
: L(L), DT(DT), LI(LI) {}
2195+
2196+
bool checkSCEVUnknown(const SCEVUnknown *SU) {
2197+
if (auto *I = dyn_cast<Instruction>(SU->getValue())) {
2198+
if (DT.dominates(L->getHeader(), I->getParent()))
2199+
Found = true;
2200+
else
2201+
assert(DT.dominates(I->getParent(), L->getHeader()) &&
2202+
"No dominance relationship between SCEV and loop?");
2203+
}
2204+
return false;
2205+
}
2206+
2207+
bool follow(const SCEV *S) {
2208+
switch (static_cast<SCEVTypes>(S->getSCEVType())) {
2209+
case scConstant:
2210+
return false;
2211+
case scAddRecExpr:
2212+
case scTruncate:
2213+
case scZeroExtend:
2214+
case scSignExtend:
2215+
case scAddExpr:
2216+
case scMulExpr:
2217+
case scUMaxExpr:
2218+
case scSMaxExpr:
2219+
case scUDivExpr:
2220+
return true;
2221+
case scUnknown:
2222+
return checkSCEVUnknown(cast<SCEVUnknown>(S));
2223+
case scCouldNotCompute:
2224+
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
2225+
}
2226+
return false;
2227+
}
2228+
2229+
bool isDone() { return Found; }
2230+
};
2231+
2232+
FindDominatedSCEVUnknown FSU(L, DT, LI);
2233+
SCEVTraversal<FindDominatedSCEVUnknown> ST(FSU);
2234+
ST.visitAll(S);
2235+
return !FSU.Found;
2236+
}
2237+
21812238
/// Get a canonical add expression, or something simpler if possible.
21822239
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
21832240
SCEV::NoWrapFlags Flags,
@@ -2459,7 +2516,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
24592516
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
24602517
const Loop *AddRecLoop = AddRec->getLoop();
24612518
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2462-
if (isLoopInvariant(Ops[i], AddRecLoop)) {
2519+
if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
24632520
LIOps.push_back(Ops[i]);
24642521
Ops.erase(Ops.begin()+i);
24652522
--i; --e;
@@ -2734,7 +2791,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
27342791
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
27352792
const Loop *AddRecLoop = AddRec->getLoop();
27362793
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2737-
if (isLoopInvariant(Ops[i], AddRecLoop)) {
2794+
if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
27382795
LIOps.push_back(Ops[i]);
27392796
Ops.erase(Ops.begin()+i);
27402797
--i; --e;

llvm/test/Analysis/IVUsers/quadradic-exit-value.ll

+35-1
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,47 @@ exit:
3030
ret i64 %r
3131
}
3232

33+
; PR15470: LSR miscompile. The test1 function should return '1'.
34+
; It is valid to fold SCEVUnknown into the recurrence because it
35+
; was defined before the loop.
36+
;
37+
; SCEV does not know how to denormalize chained recurrences, so make
38+
; sure they aren't marked as post-inc users.
39+
;
40+
; CHECK-LABEL: IV Users for loop %test1.loop
41+
; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us))<nuw><nsw>,+,33554432}<%test1.loop> (post-inc with loop %test1.loop) in %f = ashr i32 %sext.us, 24
42+
define i32 @test1(i1 %cond) {
43+
entry:
44+
%sub.us = select i1 %cond, i32 0, i32 0
45+
br label %test1.loop
46+
47+
test1.loop:
48+
%inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test1.loop ]
49+
%inc11.us = add nsw i32 %inc1115.us, 1
50+
%cmp.us = icmp slt i32 %inc11.us, 2
51+
br i1 %cmp.us, label %test1.loop, label %for.end
52+
53+
for.end:
54+
%tobool.us = icmp eq i32 %inc1115.us, 0
55+
%mul.us = shl i32 %inc1115.us, 24
56+
%sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
57+
%sext.us = mul i32 %mul.us, %sub.cond.us
58+
%f = ashr i32 %sext.us, 24
59+
br label %exit
60+
61+
exit:
62+
ret i32 %f
63+
}
64+
3365
; PR15470: LSR miscompile. The test2 function should return '1'.
66+
; It is illegal to fold SCEVUnknown (sext.us) into the recurrence
67+
; because it is defined after the loop where this recurrence belongs.
3468
;
3569
; SCEV does not know how to denormalize chained recurrences, so make
3670
; sure they aren't marked as post-inc users.
3771
;
3872
; CHECK-LABEL: IV Users for loop %test2.loop
39-
; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us))<nuw><nsw>,+,33554432}<%test2.loop> (post-inc with loop %test2.loop) in %f = ashr i32 %sext.us, 24
73+
; CHECK-NO-LCSSA: %sub.cond.us = ((-1 * %sub.us)<nsw> + {0,+,1}<nuw><nsw><%test2.loop>) (post-inc with loop %test2.loop) in %sext.us = mul i32 %mul.us, %sub.cond.us
4074
define i32 @test2() {
4175
entry:
4276
br label %test2.loop

llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll

+61-3
Original file line numberDiff line numberDiff line change
@@ -220,17 +220,18 @@ exit:
220220

221221
; Mix of previous use cases that demonstrates %s3 can be incorrectly treated as
222222
; a recurrence of loop1 because of operands order if we pick recurrencies in an
223-
; incorrect order.
223+
; incorrect order. It also shows that we cannot safely fold v1 (SCEVUnknown)
224+
; because we cannot prove for sure that it doesn't use Phis of loop 2.
224225

225226
define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) {
226227

227228
; CHECK-LABEL: Classifying expressions for: @test_03
228229
; CHECK: %v1 = load i32, i32* %p
229230
; CHECK-NEXT: --> %v1
230231
; CHECK: %s1 = add i32 %phi1, %v1
231-
; CHECK-NEXT: --> {(%a + %v1),+,1}<%loop1>
232+
; CHECK-NEXT: --> ({%a,+,1}<%loop1> + %v1)
232233
; CHECK: %s2 = add i32 %s1, %b
233-
; CHECK-NEXT: --> {(%a + %b + %v1),+,1}<%loop1>
234+
; CHECK-NEXT: --> ({(%a + %b),+,1}<%loop1> + %v1)
234235
; CHECK: %s3 = add i32 %s2, %phi2
235236
; CHECK-NEXT: --> ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1)
236237

@@ -452,3 +453,60 @@ exit:
452453
%s6 = add i32 %phi3, %phi2
453454
ret void
454455
}
456+
457+
; Make sure that a complicated Phi does not get folded with rec's start value
458+
; of a loop which is above.
459+
define void @test_08() {
460+
461+
; CHECK-LABEL: Classifying expressions for: @test_08
462+
; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1
463+
; CHECK-NEXT: --> ({0,+,-1}<nsw><%loop_2> + %iv.2.1)
464+
; CHECK: %tmp12 = trunc i64 %tmp11 to i32
465+
; CHECK-NEXT: --> (trunc i64 ({0,+,-1}<nsw><%loop_2> + %iv.2.1) to i32)
466+
; CHECK: %tmp14 = mul i32 %tmp12, %tmp7
467+
; CHECK-NEXT: --> ((trunc i64 ({0,+,-1}<nsw><%loop_2> + %iv.2.1) to i32) * {-1,+,-1}<%loop_1>)
468+
; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1
469+
; CHECK-NEXT: --> ({2,+,1}<nuw><nsw><%loop_1> * %iv.2.1)
470+
471+
entry:
472+
br label %loop_1
473+
474+
loop_1:
475+
%iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ]
476+
%iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ]
477+
br label %loop_1_exit
478+
479+
dead:
480+
br label %loop_1_exit
481+
482+
loop_1_exit:
483+
%tmp5 = icmp sgt i64 %iv.1.1, 2
484+
br i1 %tmp5, label %loop_2_preheader, label %loop_1_back_branch
485+
486+
loop_1_back_branch:
487+
%iv.1.1.next = add nuw nsw i64 %iv.1.1, 1
488+
%iv.1.2.next = add nsw i32 %iv.1.2, 1
489+
br label %loop_1
490+
491+
loop_2_preheader:
492+
%tmp6 = sub i64 1, %iv.1.1
493+
%tmp7 = trunc i64 %tmp6 to i32
494+
br label %loop_2
495+
496+
loop_2:
497+
%iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ]
498+
%iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ]
499+
%iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ]
500+
%tmp11 = add i64 %iv.2.2, %iv.2.1
501+
%tmp12 = trunc i64 %tmp11 to i32
502+
%tmp14 = mul i32 %tmp12, %tmp7
503+
%tmp16 = mul i64 %iv.2.1, %iv.1.1
504+
%iv.2.3.next = add nuw nsw i64 %iv.2.3, 1
505+
%iv.2.2.next = add nsw i64 %iv.2.2, -1
506+
%tmp17 = icmp slt i64 %iv.2.3.next, %iv.1.1
507+
br i1 %tmp17, label %loop_2, label %exit
508+
509+
exit:
510+
%tmp10 = add i32 %iv.1.2, 3
511+
ret void
512+
}

llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll

+5-7
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ L2: ; preds = %idxend.8
2525
if6: ; preds = %idxend.8
2626
%r2 = add i64 %0, -1
2727
%r3 = load i64, i64* %1, align 8
28-
; CHECK-NOT: %r2
28+
; CHECK: %r2 = add i64 %0, -1
2929
; CHECK: %r3 = load i64
3030
br label %ib
3131

@@ -36,13 +36,11 @@ ib: ; preds = %if6
3636
%r4 = mul i64 %r3, %r0
3737
%r5 = add i64 %r2, %r4
3838
%r6 = icmp ult i64 %r5, undef
39-
; CHECK: [[MUL1:%[0-9]+]] = mul i64 %lsr.iv, %r3
40-
; CHECK: [[ADD1:%[0-9]+]] = add i64 [[MUL1]], -1
41-
; CHECK: add i64 %{{.}}, [[ADD1]]
42-
; CHECK: %r6
39+
; CHECK: %r4 = mul i64 %r3, %lsr.iv
40+
; CHECK: %r5 = add i64 %r2, %r4
41+
; CHECK: %r6 = icmp ult i64 %r5, undef
42+
; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5
4343
%r7 = getelementptr i64, i64* undef, i64 %r5
4444
store i64 1, i64* %r7, align 8
45-
; CHECK: [[MUL2:%[0-9]+]] = mul i64 %lsr.iv, %r3
46-
; CHECK: [[ADD2:%[0-9]+]] = add i64 [[MUL2]], -1
4745
br label %L
4846
}

llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll renamed to llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll

+21-5
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,38 @@
1+
; REQUIRES: x86
12
; RUN: opt -loop-reduce -S < %s | FileCheck %s
23

4+
; Strength reduction analysis here relies on IV Users analysis, that
5+
; only finds users among instructions with types that are treated as
6+
; legal by the data layout. When running this test on pure non-x86
7+
; configs (for example, ARM 64), it gets confused with the target
8+
; triple and uses a default data layout instead. This default layout
9+
; does not have any legal types (even i32), so the transformation
10+
; does not happen.
11+
312
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
413
target triple = "x86_64-apple-macosx"
514

615
; PR15470: LSR miscompile. The test2 function should return '1'.
716
;
817
; SCEV expander cannot expand quadratic recurrences outside of the
918
; loop. This recurrence depends on %sub.us, so can't be expanded.
19+
; We cannot fold SCEVUnknown (sub.us) with recurrences since it is
20+
; declared after the loop.
1021
;
1122
; CHECK-LABEL: @test2
1223
; CHECK-LABEL: test2.loop:
13-
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
14-
; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
24+
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
25+
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
26+
; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1
27+
; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
1528
;
1629
; CHECK-LABEL: for.end:
17-
; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
18-
; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
19-
; CHECK: %f = ashr i32 %sext.us, 24
30+
; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0
31+
; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
32+
; CHECK: %1 = sub i32 0, %sub.us
33+
; CHECK: %2 = add i32 %1, %lsr.iv.next
34+
; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2
35+
; CHECK: %f = ashr i32 %sext.us, 24
2036
; CHECK: ret i32 %f
2137
define i32 @test2() {
2238
entry:

llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2*
2525
entry:
2626
%buffer = alloca [33 x i16], align 16
2727
%add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33
28+
%sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
29+
%sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64
2830
br label %do.body
2931

3032
do.body: ; preds = %do.body, %entry
@@ -46,8 +48,6 @@ do.body: ; preds = %do.body, %entry
4648
do.end: ; preds = %do.body
4749
%xap.0 = inttoptr i64 %0 to i1*
4850
%cap.0 = ptrtoint i1* %xap.0 to i64
49-
%sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
50-
%sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64
5151
%sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
5252
%sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
5353
%conv11 = trunc i64 %sub.ptr.div39 to i32

0 commit comments

Comments
 (0)