Closed as not planned
Description
The following example:
define void @lver.check.unnecessary(ptr %arg, ptr %arg1, i1 %arg2) {
entry:
%load = load i32, ptr %arg, align 4
br i1 %arg2, label %noloop.exit, label %loop.ph
loop.ph: ; preds = %entry
%sext7 = sext i32 %load to i64
%gep8 = getelementptr i8, ptr %arg1, i64 8
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%phi = phi i64 [ 0, %loop.ph ], [ %add, %loop.body ]
%mul = mul i64 %phi, %sext7
%gep10 = getelementptr double, ptr %gep8, i64 %mul
%load11 = load double, ptr %gep10, align 8
store double %load11, ptr %arg1, align 8
%add = add i64 %phi, 1
%icmp = icmp eq i64 %phi, 0
br i1 %icmp, label %loop.exit, label %loop.body
noloop.exit: ; preds = %entry
%sext = sext i32 %load to i64
%gep = getelementptr double, ptr %arg1, i64 %sext
%load5 = load double, ptr %gep, align 8
store double %load5, ptr %arg, align 8
ret void
loop.exit: ; preds = %loop.body
ret void
}
has identical output after running loop-versioning before #92119. However, after that patch, the following diff is observed:
diff --git a/lver.before.ll b/lver.main.ll
index 9dce17d..fc81e12 100644
--- a/lver.before.ll
+++ b/lver.main.ll
@@ -4,22 +4,39 @@ source_filename = "hand-reduce.ll"
define void @lver.check.unnecessary(ptr %arg, ptr %arg1, i1 %arg2) {
entry:
%load = load i32, ptr %arg, align 4
- br i1 %arg2, label %noloop.exit, label %loop.ph
+ br i1 %arg2, label %noloop.exit, label %loop.body.lver.check
-loop.ph: ; preds = %entry
+loop.body.lver.check: ; preds = %entry
%sext7 = sext i32 %load to i64
%gep8 = getelementptr i8, ptr %arg1, i64 8
+ %ident.check = icmp ne i32 %load, 1
+ br i1 %ident.check, label %loop.body.ph.lver.orig, label %loop.body.ph
+
+loop.body.ph.lver.orig: ; preds = %loop.body.lver.check
+ br label %loop.body.lver.orig
+
+loop.body.lver.orig: ; preds = %loop.body.lver.orig, %loop.body.ph.lver.orig
+ %phi.lver.orig = phi i64 [ 0, %loop.body.ph.lver.orig ], [ %add.lver.orig, %loop.body.lver.orig ]
+ %mul.lver.orig = mul i64 %phi.lver.orig, %sext7
+ %gep10.lver.orig = getelementptr double, ptr %gep8, i64 %mul.lver.orig
+ %load11.lver.orig = load double, ptr %gep10.lver.orig, align 8
+ store double %load11.lver.orig, ptr %arg1, align 8
+ %add.lver.orig = add i64 %phi.lver.orig, 1
+ %icmp.lver.orig = icmp eq i64 %phi.lver.orig, 0
+ br i1 %icmp.lver.orig, label %loop.exit.loopexit, label %loop.body.lver.orig
+
+loop.body.ph: ; preds = %loop.body.lver.check
br label %loop.body
-loop.body: ; preds = %loop.body, %loop.ph
- %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop.body ]
+loop.body: ; preds = %loop.body, %loop.body.ph
+ %phi = phi i64 [ 0, %loop.body.ph ], [ %add, %loop.body ]
%mul = mul i64 %phi, %sext7
%gep10 = getelementptr double, ptr %gep8, i64 %mul
%load11 = load double, ptr %gep10, align 8
store double %load11, ptr %arg1, align 8
%add = add i64 %phi, 1
%icmp = icmp eq i64 %phi, 0
- br i1 %icmp, label %loop.exit, label %loop.body
+ br i1 %icmp, label %loop.exit.loopexit1, label %loop.body
noloop.exit: ; preds = %entry
%sext = sext i32 %load to i64
@@ -28,6 +45,12 @@ noloop.exit: ; preds = %entry
store double %load5, ptr %arg, align 8
ret void
-loop.exit: ; preds = %loop.body
+loop.exit.loopexit: ; preds = %loop.body.lver.orig
+ br label %loop.exit
+
+loop.exit.loopexit1: ; preds = %loop.body
+ br label %loop.exit
+
+loop.exit: ; preds = %loop.exit.loopexit1, %loop.exit.loopexit
ret void
}
This is a regression.
The underlying issue is in LoopAccessAnalysis, which produces a false equal predicate. The diff before and after running LAA on the example is:
diff --git a/laa.before b/laa.main
index 17b0a1b..1b3b1b0 100644
--- a/laa.before
+++ b/laa.main
@@ -7,5 +7,9 @@ Printing analysis 'Loop Access Analysis' for function 'lver.check.unnecessary':
Non vectorizable stores to invariant address were not found in loop.
SCEV assumptions:
+ Equal predicate: %load == 1
Expressions re-written:
+ [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul:
+ {(8 + %arg1),+,(8 * (sext i32 %load to i64))<nsw>}<%loop.body>
+ --> {(8 + %arg1),+,8}<%loop.body>