Skip to content

Commit 58a94b1

Browse files
committed
[SLP]Fix PR91467: Look through scalar cast, when trying to cast to another type.
Need to look through the SExt/ZExt scalars to be gathered, when trying to reduce their width after minbitwidth analysis to prevent permanent attempts to revectorize such gathered instructions.
1 parent 6eb9e21 commit 58a94b1

File tree

5 files changed

+77
-17
lines changed

5 files changed

+77
-17
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11419,8 +11419,16 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
1141911419
if (Scalar->getType() != Ty) {
1142011420
assert(Scalar->getType()->isIntegerTy() && Ty->isIntegerTy() &&
1142111421
"Expected integer types only.");
11422+
Value *V = Scalar;
11423+
if (auto *CI = dyn_cast<CastInst>(Scalar);
11424+
isa_and_nonnull<SExtInst, ZExtInst>(CI)) {
11425+
Value *Op = CI->getOperand(0);
11426+
if (auto *IOp = dyn_cast<Instruction>(Op);
11427+
!IOp || !(isDeleted(IOp) || getTreeEntry(IOp)))
11428+
V = Op;
11429+
}
1142211430
Scalar = Builder.CreateIntCast(
11423-
Scalar, Ty, !isKnownNonNegative(Scalar, SimplifyQuery(*DL)));
11431+
V, Ty, !isKnownNonNegative(Scalar, SimplifyQuery(*DL)));
1142411432
}
1142511433

1142611434
Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos));

llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,7 @@ define void @h() {
55
; CHECK-LABEL: define void @h() {
66
; CHECK-NEXT: entry:
77
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
8-
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 0 to i1
9-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 poison, i1 false, i1 false, i1 false>, i1 [[TMP6]], i32 4
10-
; CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i1> [[TMP0]], zeroinitializer
11-
; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i1> [[TMP0]], zeroinitializer
12-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
13-
; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i1> [[TMP3]], zeroinitializer
14-
; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i1> [[TMP5]] to <8 x i16>
15-
; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[ARRAYIDX2]], align 2
8+
; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
169
; CHECK-NEXT: ret void
1710
;
1811
entry:

llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,7 @@ define void @h() {
55
; CHECK-LABEL: define void @h() {
66
; CHECK-NEXT: entry:
77
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
8-
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 0 to i1
9-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 poison, i1 false, i1 false, i1 false>, i1 [[TMP0]], i32 4
10-
; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i1> zeroinitializer, [[TMP1]]
11-
; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i1> zeroinitializer, [[TMP2]]
12-
; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i1> [[TMP3]] to <8 x i16>
13-
; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[ARRAYIDX2]], align 2
8+
; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
149
; CHECK-NEXT: ret void
1510
;
1611
entry:

llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
define void @test(ptr %a, i8 %0, i16 %b.promoted.i) {
55
; CHECK-LABEL: define void @test(
66
; CHECK-SAME: ptr [[A:%.*]], i8 [[TMP0:%.*]], i16 [[B_PROMOTED_I:%.*]]) #[[ATTR0:[0-9]+]] {
7-
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i128
87
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[B_PROMOTED_I]], i32 0
98
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
10-
; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP2]] to i16
9+
; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP0]] to i16
1110
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> poison, i16 [[TMP5]], i32 0
1211
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <4 x i32> zeroinitializer
1312
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]]
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s
3+
4+
define void @test(ptr %top) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[TOP:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[TOP]], align 1
9+
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], zeroinitializer
10+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[TMP0]], i32 2
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
12+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
13+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i8> <i8 0, i8 0, i8 0, i8 poison>, i8 [[TMP4]], i32 3
14+
; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i8> [[TMP1]], [[TMP5]]
15+
; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i8> [[TMP6]], zeroinitializer
16+
; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i8> [[TMP7]], <i8 2, i8 2, i8 2, i8 2>
17+
; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
18+
; CHECK: for.cond.i:
19+
; CHECK-NEXT: store <4 x i8> [[TMP8]], ptr null, align 1
20+
; CHECK-NEXT: br label [[FOR_COND_I]]
21+
;
22+
entry:
23+
%0 = load i8, ptr %top, align 1
24+
%conv2.i = zext i8 %0 to i32
25+
%mul.i = mul i32 %conv2.i, 0
26+
%add.i = or i32 %mul.i, 0
27+
%arrayidx3.i = getelementptr i8, ptr %top, i64 1
28+
%1 = load i8, ptr %arrayidx3.i, align 1
29+
%conv4.i = zext i8 %1 to i32
30+
%add5.i = or i32 %add.i, 0
31+
%shr.i = lshr i32 %add5.i, 2
32+
%conv7.i = trunc i32 %shr.i to i8
33+
%mul12.i = mul i32 %conv4.i, 0
34+
%arrayidx14.i = getelementptr i8, ptr %top, i64 2
35+
%2 = load i8, ptr %arrayidx14.i, align 1
36+
%conv15.i = zext i8 %2 to i32
37+
%add16.i = or i32 %mul12.i, 0
38+
%add17.i = or i32 %add16.i, 0
39+
%shr18.i = lshr i32 %add17.i, 2
40+
%conv19.i = trunc i32 %shr18.i to i8
41+
%mul25.i = mul i32 %conv15.i, 0
42+
%arrayidx27.i = getelementptr i8, ptr %top, i64 3
43+
%3 = load i8, ptr %arrayidx27.i, align 1
44+
%conv28.i = zext i8 %3 to i32
45+
%add29.i = or i32 %mul25.i, 0
46+
%add30.i = or i32 %add29.i, 0
47+
%shr31.i = lshr i32 %add30.i, 2
48+
%conv32.i = trunc i32 %shr31.i to i8
49+
%mul38.i = mul i32 %conv28.i, 0
50+
%add39.i = or i32 %mul38.i, %conv15.i
51+
%add42.i = or i32 %add39.i, 0
52+
%shr44.i = lshr i32 %add42.i, 2
53+
%conv45.i = trunc i32 %shr44.i to i8
54+
br label %for.cond.i
55+
56+
for.cond.i:
57+
store i8 %conv7.i, ptr null, align 1
58+
%vals.sroa.5.0.add.ptr.sroa_idx.i = getelementptr i8, ptr null, i64 1
59+
store i8 %conv19.i, ptr %vals.sroa.5.0.add.ptr.sroa_idx.i, align 1
60+
%vals.sroa.7.0.add.ptr.sroa_idx.i = getelementptr i8, ptr null, i64 2
61+
store i8 %conv32.i, ptr %vals.sroa.7.0.add.ptr.sroa_idx.i, align 1
62+
%vals.sroa.9.0.add.ptr.sroa_idx.i = getelementptr i8, ptr null, i64 3
63+
store i8 %conv45.i, ptr %vals.sroa.9.0.add.ptr.sroa_idx.i, align 1
64+
br label %for.cond.i
65+
}

0 commit comments

Comments
 (0)