Skip to content

Commit 39b2104

Browse files
committed
[SLP]Fix a crash for reduced values with minbitwidth, which are reused.
If the reduced values are additionally affected by minbitwidth analysis, need to cast them to a proper type before doing any math, if they are reused.
1 parent aa2dc79 commit 39b2104

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15214,6 +15214,19 @@ class HorizontalReduction {
1521415214
assert(IsSupportedHorRdxIdentityOp &&
1521515215
"The optimization of matched scalar identity horizontal reductions "
1521615216
"must be supported.");
15217+
auto *VTy = cast<FixedVectorType>(VectorizedValue->getType());
15218+
if (VTy->getElementType() != VL.front()->getType()) {
15219+
VectorizedValue = Builder.CreateIntCast(
15220+
VectorizedValue,
15221+
FixedVectorType::get(VL.front()->getType(), VTy->getNumElements()),
15222+
any_of(VL, [&](Value *R) {
15223+
KnownBits Known = computeKnownBits(
15224+
R, cast<Instruction>(ReductionOps.front().front())
15225+
->getModule()
15226+
->getDataLayout());
15227+
return !Known.isNonNegative();
15228+
}));
15229+
}
1521715230
switch (RdxKind) {
1521815231
case RecurKind::Add: {
1521915232
// root = mul prev_root, <1, 1, n, 1>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mcpu=skylake < %s | FileCheck %s
3+
4+
define i1 @test(i1 %cmp5.not.31) {
5+
; CHECK-LABEL: define i1 @test(
6+
; CHECK-SAME: i1 [[CMP5_NOT_31:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> <i1 poison, i1 false, i1 false, i1 false>, i1 [[CMP5_NOT_31]], i32 0
9+
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i1>
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i32>
12+
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP3]], <i32 2, i32 1, i32 1, i32 1>
13+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
14+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0
15+
; CHECK-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP6]], 0
16+
; CHECK-NEXT: ret i1 [[CMP_NOT_I_I]]
17+
;
18+
entry:
19+
%add7.31 = select i1 %cmp5.not.31, i32 0, i32 0
20+
%add18 = select i1 false, i32 0, i32 0
21+
%add19 = add i32 %add18, %add7.31
22+
%add18.1 = select i1 false, i32 0, i32 0
23+
%add19.1 = add i32 %add18.1, %add19
24+
%add18.4 = select i1 false, i32 0, i32 0
25+
%add19.4 = add i32 %add18.4, %add19.1
26+
%add19.31 = add i32 %add7.31, %add19.4
27+
%0 = and i32 %add19.31, 0
28+
%cmp.not.i.i = icmp eq i32 %0, 0
29+
ret i1 %cmp.not.i.i
30+
}

0 commit comments

Comments
 (0)