Skip to content

Commit 4a0bbbc

Browse files
committed
[SLP]Fix PR104637: do not create new nodes for fully overlapped non-schedulable nodes
If the scalars do not require scheduling and were already vectorized, but in the different order, compiler still tries to create the new node. It may cause the compiler crash for the gathered operands. Instead need to consider such nodes as full overlap and just reshuffle vectorized node. Fixes #104637
1 parent 13779ec commit 4a0bbbc

File tree

2 files changed

+57
-0
lines changed

2 files changed

+57
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7144,6 +7144,21 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
71447144
ReuseShuffleIndices);
71457145
return;
71467146
}
7147+
SmallPtrSet<const TreeEntry *, 4> Nodes;
7148+
Nodes.insert(getTreeEntry(S.OpValue));
7149+
for (const TreeEntry *E : MultiNodeScalars.lookup(S.OpValue))
7150+
Nodes.insert(E);
7151+
SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
7152+
if (any_of(Nodes, [&](const TreeEntry *E) {
7153+
return all_of(E->Scalars,
7154+
[&](Value *V) { return Values.contains(V); });
7155+
})) {
7156+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
7157+
if (TryToFindDuplicates(S))
7158+
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
7159+
ReuseShuffleIndices);
7160+
return;
7161+
}
71477162
} else {
71487163
// Record the reuse of the tree node. FIXME, currently this is only used
71497164
// to properly draw the graph rather than for the actual vectorization.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(double %v) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: double [[V:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[V]], i32 1
9+
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> zeroinitializer, [[TMP0]]
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
11+
; CHECK-NEXT: br label %[[LOOP:.*]]
12+
; CHECK: [[LOOP]]:
13+
; CHECK-NEXT: [[T50_02:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[LOOP]] ]
14+
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ]
15+
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP2]]
16+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
17+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
18+
; CHECK-NEXT: [[TMP7:%.*]] = fadd double [[TMP6]], [[TMP5]]
19+
; CHECK-NEXT: [[TMP8]] = fadd double [[TMP7]], [[V]]
20+
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
21+
; CHECK-NEXT: [[TMP10]] = fadd <2 x double> zeroinitializer, [[TMP9]]
22+
; CHECK-NEXT: br label %[[LOOP]]
23+
;
24+
entry:
25+
%mul.3 = fmul double 0.000000e+00, %v
26+
%mul.4 = fmul double 0.000000e+00, 0.000000e+00
27+
br label %loop
28+
29+
loop:
30+
%t48.0 = phi double [ 0.000000e+00, %entry ], [ %5, %loop ]
31+
%t50.02 = phi double [ 0.000000e+00, %entry ], [ %3, %loop ]
32+
%t52.0 = phi double [ 0.000000e+00, %entry ], [ %7, %loop ]
33+
%0 = fmul double %t52.0, %mul.3
34+
%1 = fmul double %t48.0, %mul.4
35+
%2 = fadd double %1, %0
36+
%3 = fadd double %2, %v
37+
%4 = fmul double 0.000000e+00, %mul.3
38+
%5 = fadd double 0.000000e+00, %4
39+
%6 = fmul double 0.000000e+00, %mul.4
40+
%7 = fadd double 0.000000e+00, %6
41+
br label %loop
42+
}

0 commit comments

Comments
 (0)