Description
Here is the reduced testcase, and how to get the assertion:
$ cat reduced.ll
; ModuleID = '<bc file>'
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @_ZN21ND_R1D_SegmentElementC2Ei(i32* %0, i32* %1, i64 %indvars.iv, i32* %2, i64 %indvars.iv76, i64 %indvars.iv93) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv761 = phi i64 [ 0, %entry ], [ %indvars.iv.next77, %for.body ]
%indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%indvars.iv.next77 = add i64 %indvars.iv761, 1
%arrayidx.i.i50 = getelementptr i32, i32* %0, i64 %indvars.iv76
%indvars.iv.next = add i64 %indvars.iv4, 1
%exitcond.not = icmp eq i64 %indvars.iv4, %indvars.iv
br i1 %exitcond.not, label %for.body13.preheader, label %for.body
for.body13.preheader: ; preds = %for.body
br label %for.body13
for.body26.lr.ph: ; preds = %for.body13
%idxprom.i.i61 = and i64 %indvars.iv761, 1
%arrayidx.i.i62 = getelementptr i32, i32* %0, i64 %idxprom.i.i61
br label %for.body26
for.body13: ; preds = %for.body13, %for.body13.preheader
%indvars.iv846 = phi i64 [ %indvars.iv.next85, %for.body13 ], [ 0, %for.body13.preheader ]
%indvars.iv.next87 = add i64 0, 0
%arrayidx.i.i56 = getelementptr i32, i32* %0, i64 %indvars.iv761
%3 = load i32, i32* %arrayidx.i.i56, align 4
store i32 0, i32* %1, align 4
%indvars.iv.next85 = add i64 %indvars.iv846, 1
%exitcond92.not = icmp eq i64 %indvars.iv846, %indvars.iv
br i1 %exitcond92.not, label %for.body26.lr.ph, label %for.body13
for.cond.cleanup25: ; preds = %for.body26
ret void
for.body26: ; preds = %for.body26, %for.body26.lr.ph
%indvars.iv932 = phi i64 [ 0, %for.body26.lr.ph ], [ %indvars.iv.next94, %for.body26 ]
%4 = load i32, i32* %arrayidx.i.i62, align 4
%arrayidx.i.i653 = getelementptr i32, i32* %2, i64 %indvars.iv93
store i32 0, i32* %1, align 4
%indvars.iv.next94 = add i64 %indvars.iv932, 1
%exitcond97.not = icmp eq i64 %indvars.iv932, %indvars.iv
br i1 %exitcond97.not, label %for.cond.cleanup25, label %for.body26
}
$ opt -passes=loop-distribute,loop-vectorize -enable-loop-distribute -disable-output reduced.ll
opt: /home/users/saldivar/workspace/random/llvm-project.git/main/llvm/include/llvm/Support/Casting.h:578: decltype(auto) llvm::cast(From*) [with To = Instruction; From = Value]: Assertion `isa<To>(Val) && "cast<Ty>() argument of incompati
ble type!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
This is an interesting issue because the assertion ONLY happens with loop-distribute
enabled, and it's worth noting that the loop-distribute
pass fails and doesn't modify/optimize anything.
So, what exactly might the issue be? I think it has to do with the LoopAccessInfoManager
object in the passes. I was comparing and contrasting the debug output of the loop-vectorize
pass, and noticed that the output of the failing run was missing LoopAccessInfoManager
debug output. With GDB I was able to track that data was already cached within the object, so an insertion wasn't happening, and the data that is getting returned was from the loop-distribute
pass.
I was able to stop the assertion with this change:
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 434bee101ae7..49a4355fb4bc 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -992,6 +992,7 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
// enabled/disabled, follow that. Otherwise use the global flag.
if (LDL.isForced().value_or(EnableLoopDistribute))
Changed |= LDL.processLoop();
+ LAIs.clear();
}
// Process each loop nest in the function.
This change clears the all the cached information of the LoopAccessInfoManager
after every loop, I seen a couple other loop passes do this, so I thought this would apply here as well.