Skip to content

LoopLoadElim: calling LoopVersioning with single-iteration loop #96656

Closed as not planned
@artagnon

Description

@artagnon

The following example:

define void @lver.check.unnecessary(ptr %arg, ptr %arg1, i1 %arg2) {
entry:
  %load = load i32, ptr %arg, align 4
  br i1 %arg2, label %noloop.exit, label %loop.ph

loop.ph:                                          ; preds = %entry
  %sext7 = sext i32 %load to i64
  %gep8 = getelementptr i8, ptr %arg1, i64 8
  br label %loop.body

loop.body:                                        ; preds = %loop.body, %loop.ph
  %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop.body ]
  %mul = mul i64 %phi, %sext7
  %gep10 = getelementptr double, ptr %gep8, i64 %mul
  %load11 = load double, ptr %gep10, align 8
  store double %load11, ptr %arg1, align 8
  %add = add i64 %phi, 1
  %icmp = icmp eq i64 %phi, 0
  br i1 %icmp, label %loop.exit, label %loop.body

noloop.exit:                                      ; preds = %entry
  %sext = sext i32 %load to i64
  %gep = getelementptr double, ptr %arg1, i64 %sext
  %load5 = load double, ptr %gep, align 8
  store double %load5, ptr %arg, align 8
  ret void

loop.exit:                                        ; preds = %loop.body
  ret void
}

has identical output after running loop-versioning before #92119. However, after that patch, the following diff is observed:

diff --git a/lver.before.ll b/lver.main.ll
index 9dce17d..fc81e12 100644
--- a/lver.before.ll
+++ b/lver.main.ll
@@ -4,22 +4,39 @@ source_filename = "hand-reduce.ll"
 define void @lver.check.unnecessary(ptr %arg, ptr %arg1, i1 %arg2) {
 entry:
   %load = load i32, ptr %arg, align 4
-  br i1 %arg2, label %noloop.exit, label %loop.ph
+  br i1 %arg2, label %noloop.exit, label %loop.body.lver.check

-loop.ph:                                          ; preds = %entry
+loop.body.lver.check:                             ; preds = %entry
   %sext7 = sext i32 %load to i64
   %gep8 = getelementptr i8, ptr %arg1, i64 8
+  %ident.check = icmp ne i32 %load, 1
+  br i1 %ident.check, label %loop.body.ph.lver.orig, label %loop.body.ph
+
+loop.body.ph.lver.orig:                           ; preds = %loop.body.lver.check
+  br label %loop.body.lver.orig
+
+loop.body.lver.orig:                              ; preds = %loop.body.lver.orig, %loop.body.ph.lver.orig
+  %phi.lver.orig = phi i64 [ 0, %loop.body.ph.lver.orig ], [ %add.lver.orig, %loop.body.lver.orig ]
+  %mul.lver.orig = mul i64 %phi.lver.orig, %sext7
+  %gep10.lver.orig = getelementptr double, ptr %gep8, i64 %mul.lver.orig
+  %load11.lver.orig = load double, ptr %gep10.lver.orig, align 8
+  store double %load11.lver.orig, ptr %arg1, align 8
+  %add.lver.orig = add i64 %phi.lver.orig, 1
+  %icmp.lver.orig = icmp eq i64 %phi.lver.orig, 0
+  br i1 %icmp.lver.orig, label %loop.exit.loopexit, label %loop.body.lver.orig
+
+loop.body.ph:                                     ; preds = %loop.body.lver.check
   br label %loop.body

-loop.body:                                        ; preds = %loop.body, %loop.ph
-  %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop.body ]
+loop.body:                                        ; preds = %loop.body, %loop.body.ph
+  %phi = phi i64 [ 0, %loop.body.ph ], [ %add, %loop.body ]
   %mul = mul i64 %phi, %sext7
   %gep10 = getelementptr double, ptr %gep8, i64 %mul
   %load11 = load double, ptr %gep10, align 8
   store double %load11, ptr %arg1, align 8
   %add = add i64 %phi, 1
   %icmp = icmp eq i64 %phi, 0
-  br i1 %icmp, label %loop.exit, label %loop.body
+  br i1 %icmp, label %loop.exit.loopexit1, label %loop.body

 noloop.exit:                                      ; preds = %entry
   %sext = sext i32 %load to i64
@@ -28,6 +45,12 @@ noloop.exit:                                      ; preds = %entry
   store double %load5, ptr %arg, align 8
   ret void

-loop.exit:                                        ; preds = %loop.body
+loop.exit.loopexit:                               ; preds = %loop.body.lver.orig
+  br label %loop.exit
+
+loop.exit.loopexit1:                              ; preds = %loop.body
+  br label %loop.exit
+
+loop.exit:                                        ; preds = %loop.exit.loopexit1, %loop.exit.loopexit
   ret void
 }

This is a regression.

The underlying issue is in LoopAccessAnalysis, which produces a false equal predicate. The diff before and after running LAA on the example is:

diff --git a/laa.before b/laa.main
index 17b0a1b..1b3b1b0 100644
--- a/laa.before
+++ b/laa.main
@@ -7,5 +7,9 @@ Printing analysis 'Loop Access Analysis' for function 'lver.check.unnecessary':

     Non vectorizable stores to invariant address were not found in loop.
     SCEV assumptions:
+    Equal predicate: %load == 1

     Expressions re-written:
+    [PSE]  %gep10 = getelementptr double, ptr %gep8, i64 %mul:
+      {(8 + %arg1),+,(8 * (sext i32 %load to i64))<nsw>}<%loop.body>
+      --> {(8 + %arg1),+,8}<%loop.body>

Metadata

Metadata

Assignees

No one assigned

    Labels

    llvm:analysisloopoptimquestionA question, not bug report. Check out https://llvm.org/docs/GettingInvolved.html instead!regression

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions