Skip to content

Commit 2271f0b

Browse files
committed
[SLP]Check for perfect/shuffled match for the split node
If the potential split node is a perfect/shuffled match of another split node, need to skip creation of the another split node with the same scalars, it should be a buildvector. Fixes #135800
1 parent a1d52fc commit 2271f0b

File tree

3 files changed

+94
-1
lines changed

3 files changed

+94
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9575,6 +9575,24 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
95759575
!SplitAlternateInstructions)
95769576
return false;
95779577

9578+
// Check if this is a duplicate of another split entry.
9579+
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *LocalState.getMainOp()
9580+
<< ".\n");
9581+
for (TreeEntry *E : getSplitTreeEntries(LocalState.getMainOp())) {
9582+
if (E->isSame(VL)) {
9583+
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at "
9584+
<< *LocalState.getMainOp() << ".\n");
9585+
return false;
9586+
}
9587+
SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
9588+
if (all_of(VL, [&](Value *V) {
9589+
return isa<PoisonValue>(V) || Values.contains(V);
9590+
})) {
9591+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
9592+
return false;
9593+
}
9594+
}
9595+
95789596
ReorderIndices.assign(VL.size(), VL.size());
95799597
SmallBitVector Op1Indices(VL.size());
95809598
for (auto [Idx, V] : enumerate(VL)) {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(double %0) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: double [[TMP0:%.*]]) {
7+
; CHECK-NEXT: [[_THREAD:.*:]]
8+
; CHECK-NEXT: [[TMP1:%.*]] = call double null(ptr null, ptr null, ptr null)
9+
; CHECK-NEXT: [[TMP2:%.*]] = call double null(ptr null, ptr null, ptr null)
10+
; CHECK-NEXT: br i1 false, label %[[BB3:.*]], label %[[BB7:.*]]
11+
; CHECK: [[BB3]]:
12+
; CHECK-NEXT: [[TMP4:%.*]] = call double null(ptr null, ptr null, ptr null)
13+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> <double poison, double 0.000000e+00, double poison, double 0.000000e+00>, double [[TMP0]], i32 2
14+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[TMP4]], i32 0
15+
; CHECK-NEXT: br label %[[BB7]]
16+
; CHECK: [[BB7]]:
17+
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x double> [ [[TMP6]], %[[BB3]] ], [ zeroinitializer, [[DOTTHREAD:%.*]] ]
18+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
19+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP1]], i32 1
20+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison>
21+
; CHECK-NEXT: [[TMP12:%.*]] = call <6 x double> @llvm.vector.insert.v6f64.v2f64(<6 x double> [[TMP11]], <2 x double> [[TMP10]], i64 4)
22+
; CHECK-NEXT: br i1 false, label %[[DOTLR_PH272_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
23+
; CHECK: [[_LR_PH272_PREHEADER:.*:]]
24+
; CHECK-NEXT: br i1 false, [[DOT_CRIT_EDGE]], label %[[BB13:.*]]
25+
; CHECK: [[BB13]]:
26+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <6 x double> [[TMP12]], <6 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison>
28+
; CHECK-NEXT: [[TMP16:%.*]] = call <6 x double> @llvm.vector.insert.v6f64.v2f64(<6 x double> [[TMP15]], <2 x double> splat (double 0x7FF8000000000000), i64 4)
29+
; CHECK-NEXT: br i1 false, label %[[BB17:.*]], [[DOT_CRIT_EDGE]]
30+
; CHECK: [[BB17]]:
31+
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <6 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison, double 0.000000e+00, double 0.000000e+00>, double [[TMP0]], i32 3
32+
; CHECK-NEXT: br [[DOT_CRIT_EDGE]]
33+
; CHECK: [[__CRIT_EDGE:.*:]]
34+
; CHECK-NEXT: [[TMP19:%.*]] = phi <6 x double> [ [[TMP12]], %[[BB7]] ], [ [[TMP18]], %[[BB17]] ], [ [[TMP16]], %[[BB13]] ], [ [[TMP12]], %[[DOTLR_PH272_PREHEADER]] ]
35+
; CHECK-NEXT: ret void
36+
;
37+
.thread:
38+
%1 = call double null(ptr null, ptr null, ptr null)
39+
%2 = call double null(ptr null, ptr null, ptr null)
40+
br i1 false, label %3, label %5
41+
42+
3:
43+
%4 = call double null(ptr null, ptr null, ptr null)
44+
br label %5
45+
46+
5:
47+
%.1226 = phi double [ %4, %3 ], [ 0.000000e+00, %.thread ]
48+
%.1222 = phi double [ 0.000000e+00, %3 ], [ 0.000000e+00, %.thread ]
49+
%.1218 = phi double [ %0, %3 ], [ 0.000000e+00, %.thread ]
50+
%.1216 = phi double [ 0.000000e+00, %3 ], [ 0.000000e+00, %.thread ]
51+
br i1 false, label %.lr.ph272.preheader, label %._crit_edge
52+
53+
.lr.ph272.preheader:
54+
br i1 false, label %._crit_edge, label %6
55+
56+
6:
57+
%7 = fdiv double 0.000000e+00, 0.000000e+00
58+
%8 = fsub double 0.000000e+00, %7
59+
%9 = fdiv double 0.000000e+00, 0.000000e+00
60+
%10 = fsub double 0.000000e+00, %9
61+
br i1 false, label %11, label %._crit_edge
62+
63+
11:
64+
br label %._crit_edge
65+
66+
._crit_edge:
67+
%.2227.lcssa = phi double [ %.1226, %5 ], [ 0.000000e+00, %11 ], [ %.1226, %6 ], [ %.1226, %.lr.ph272.preheader ]
68+
%.2223.lcssa = phi double [ %.1222, %5 ], [ 0.000000e+00, %11 ], [ %.1222, %6 ], [ %.1222, %.lr.ph272.preheader ]
69+
%.2219.lcssa = phi double [ %.1218, %5 ], [ 0.000000e+00, %11 ], [ %.1218, %6 ], [ %.1218, %.lr.ph272.preheader ]
70+
%.2.lcssa = phi double [ %.1216, %5 ], [ %0, %11 ], [ %.1216, %6 ], [ %.1216, %.lr.ph272.preheader ]
71+
%.0213.lcssa = phi double [ %2, %5 ], [ 0.000000e+00, %11 ], [ %10, %6 ], [ %2, %.lr.ph272.preheader ]
72+
%.0211.lcssa = phi double [ %1, %5 ], [ 0.000000e+00, %11 ], [ %8, %6 ], [ %1, %.lr.ph272.preheader ]
73+
ret void
74+
}

llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ define i1 @test(ptr %0, ptr %1, <2 x float> %2, <2 x float> %3, <2 x float> %4)
1515
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 poison>
1616
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP9]], i32 7
1717
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
18-
; CHECK-NEXT: [[TMP17:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP16]], <8 x float> [[TMP15]], i64 8)
1918
; CHECK-NEXT: [[TMP18:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP16]], <8 x float> [[TMP15]], i64 8)
19+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> [[TMP12]], <16 x i32> <i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 14, i32 14, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 poison>
20+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x float> [[TMP19]], float [[TMP9]], i32 15
2021
; CHECK-NEXT: [[TMP20:%.*]] = fmul <16 x float> [[TMP18]], [[TMP17]]
2122
; CHECK-NEXT: [[TMP21:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP20]])
2223
; CHECK-NEXT: [[TMP22:%.*]] = call float @foo(float [[TMP21]])

0 commit comments

Comments
 (0)