Skip to content

Commit 8237520

Browse files
committed
[SLP]Fix PR85082: PHI node has multiple entries.
Need to record casted extractelement for the externally used scalar, not original extract instruction.
1 parent 744a23f commit 8237520

File tree

2 files changed

+84
-7
lines changed

2 files changed

+84
-7
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12582,6 +12582,7 @@ Value *BoUpSLP::vectorizeTree(
1258212582
Ex = I;
1258312583
}
1258412584
}
12585+
Value *ExV = Ex;
1258512586
if (!Ex) {
1258612587
// "Reuse" the existing extract to improve final codegen.
1258712588
if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) {
@@ -12592,7 +12593,13 @@ Value *BoUpSLP::vectorizeTree(
1259212593
} else {
1259312594
Ex = Builder.CreateExtractElement(Vec, Lane);
1259412595
}
12595-
if (auto *I = dyn_cast<Instruction>(Ex))
12596+
// If necessary, sign-extend or zero-extend ScalarRoot
12597+
// to the larger type.
12598+
ExV = Ex;
12599+
if (Scalar->getType() != Ex->getType())
12600+
ExV = Builder.CreateIntCast(Ex, Scalar->getType(),
12601+
MinBWs.find(E)->second.second);
12602+
if (auto *I = dyn_cast<Instruction>(ExV))
1259612603
ScalarToEEs[Scalar].try_emplace(Builder.GetInsertBlock(), I);
1259712604
}
1259812605
// The then branch of the previous if may produce constants, since 0
@@ -12601,12 +12608,7 @@ Value *BoUpSLP::vectorizeTree(
1260112608
GatherShuffleExtractSeq.insert(ExI);
1260212609
CSEBlocks.insert(ExI->getParent());
1260312610
}
12604-
// If necessary, sign-extend or zero-extend ScalarRoot
12605-
// to the larger type.
12606-
if (Scalar->getType() != Ex->getType())
12607-
return Builder.CreateIntCast(Ex, Scalar->getType(),
12608-
MinBWs.find(E)->second.second);
12609-
return Ex;
12611+
return ExV;
1261012612
}
1261112613
assert(isa<FixedVectorType>(Scalar->getType()) &&
1261212614
isa<InsertElementInst>(Scalar) &&
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(i32 %arg) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: i32 [[ARG:%.*]]) {
7+
; CHECK-NEXT: bb:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
9+
; CHECK-NEXT: br label [[BB2:%.*]]
10+
; CHECK: bb2:
11+
; CHECK-NEXT: switch i32 0, label [[BB10:%.*]] [
12+
; CHECK-NEXT: i32 0, label [[BB9:%.*]]
13+
; CHECK-NEXT: i32 11, label [[BB9]]
14+
; CHECK-NEXT: i32 1, label [[BB4:%.*]]
15+
; CHECK-NEXT: ]
16+
; CHECK: bb3:
17+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
18+
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
19+
; CHECK-NEXT: switch i32 0, label [[BB10]] [
20+
; CHECK-NEXT: i32 18, label [[BB7:%.*]]
21+
; CHECK-NEXT: i32 1, label [[BB7]]
22+
; CHECK-NEXT: i32 0, label [[BB10]]
23+
; CHECK-NEXT: ]
24+
; CHECK: bb4:
25+
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP0]], [[BB2]] ]
26+
; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
27+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
28+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr null, i64 [[TMP5]]
29+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
30+
; CHECK-NEXT: [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr null, i64 [[TMP6]]
31+
; CHECK-NEXT: ret void
32+
; CHECK: bb7:
33+
; CHECK-NEXT: [[PHI8:%.*]] = phi i64 [ [[TMP2]], [[BB3:%.*]] ], [ [[TMP2]], [[BB3]] ]
34+
; CHECK-NEXT: br label [[BB9]]
35+
; CHECK: bb9:
36+
; CHECK-NEXT: ret void
37+
; CHECK: bb10:
38+
; CHECK-NEXT: ret void
39+
;
40+
bb:
41+
%zext = zext i32 %arg to i64
42+
%zext1 = zext i32 0 to i64
43+
br label %bb2
44+
45+
bb2:
46+
switch i32 0, label %bb10 [
47+
i32 0, label %bb9
48+
i32 11, label %bb9
49+
i32 1, label %bb4
50+
]
51+
52+
bb3:
53+
switch i32 0, label %bb10 [
54+
i32 18, label %bb7
55+
i32 1, label %bb7
56+
i32 0, label %bb10
57+
]
58+
59+
bb4:
60+
%phi = phi i64 [ %zext, %bb2 ]
61+
%phi5 = phi i64 [ %zext1, %bb2 ]
62+
%getelementptr = getelementptr i32, ptr null, i64 %phi
63+
%getelementptr6 = getelementptr i32, ptr null, i64 %phi5
64+
ret void
65+
66+
bb7:
67+
%phi8 = phi i64 [ %zext, %bb3 ], [ %zext, %bb3 ]
68+
br label %bb9
69+
70+
bb9:
71+
ret void
72+
73+
bb10:
74+
ret void
75+
}

0 commit comments

Comments
 (0)