Skip to content

Commit a2d129b

Browse files
committed
[SLP]Fix a crash when trying to reduce in revec after minbitwidth analysis
Need to use the original scalar type, when building the reduction, and use the scalar type, when performing casting, to avoid compiler crash.
1 parent abe3b90 commit a2d129b

File tree

2 files changed

+39
-4
lines changed

2 files changed

+39
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22141,8 +22141,8 @@ class HorizontalReduction {
2214122141
if (isa<FixedVectorType>(ScalarTy)) {
2214222142
assert(SLPReVec && "FixedVectorType is not expected.");
2214322143
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
22144-
Value *ReducedSubTree = PoisonValue::get(getWidenedType(
22145-
VectorizedRoot->getType()->getScalarType(), ScalarTyNumElements));
22144+
Value *ReducedSubTree = PoisonValue::get(
22145+
getWidenedType(ScalarTy->getScalarType(), ScalarTyNumElements));
2214622146
for (unsigned I : seq<unsigned>(ScalarTyNumElements)) {
2214722147
// Do reduction for each lane.
2214822148
// e.g., do reduce add for
@@ -22359,7 +22359,7 @@ class HorizontalReduction {
2235922359
Type *DestTy) {
2236022360
Value *Rdx = emitReduction(Vec, Builder, &TTI, DestTy);
2236122361
if (Rdx->getType() != DestTy->getScalarType())
22362-
Rdx = Builder.CreateIntCast(Rdx, DestTy, IsSigned);
22362+
Rdx = Builder.CreateIntCast(Rdx, DestTy->getScalarType(), IsSigned);
2236322363
// Improved analysis for add/fadd/xor reductions with same scale
2236422364
// factor for all operands of reductions. We can emit scalar ops for
2236522365
// them instead.

llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
2+
; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
3+
; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec -slp-threshold=-1000 < %s | FileCheck %s --check-prefix=THRESH
34

45
define void @e(<4 x i16> %0) {
56
; CHECK-LABEL: @e(
@@ -22,6 +23,40 @@ define void @e(<4 x i16> %0) {
2223
; CHECK-NEXT: [[TMP12]] = or <4 x i32> [[TMP9]], [[TMP11]]
2324
; CHECK-NEXT: br label [[VECTOR_BODY]]
2425
;
26+
; THRESH-LABEL: @e(
27+
; THRESH-NEXT: entry:
28+
; THRESH-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0)
29+
; THRESH-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP1]], <4 x i16> zeroinitializer, i64 4)
30+
; THRESH-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 4)
31+
; THRESH-NEXT: [[TMP4:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0)
32+
; THRESH-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP4]], <4 x i16> zeroinitializer, i64 4)
33+
; THRESH-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP5]], <4 x i16> zeroinitializer, i64 8)
34+
; THRESH-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP6]], <4 x i16> zeroinitializer, i64 12)
35+
; THRESH-NEXT: br label [[VECTOR_BODY:%.*]]
36+
; THRESH: vector.body:
37+
; THRESH-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ]
38+
; THRESH-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
39+
; THRESH-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP3]], <4 x i16> [[VEC_IND]], i64 0)
40+
; THRESH-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP2]], [[TMP8]]
41+
; THRESH-NEXT: [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
42+
; THRESH-NEXT: [[TMP11:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP10]], <4 x i16> [[TMP0:%.*]], i64 4)
43+
; THRESH-NEXT: [[TMP12:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP11]], <8 x i16> [[TMP9]], i64 8)
44+
; THRESH-NEXT: [[TMP13:%.*]] = icmp sgt <16 x i16> [[TMP12]], [[TMP7]]
45+
; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
46+
; THRESH-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]])
47+
; THRESH-NEXT: [[TMP16:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
48+
; THRESH-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]])
49+
; THRESH-NEXT: [[TMP18:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
50+
; THRESH-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
51+
; THRESH-NEXT: [[TMP20:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
52+
; THRESH-NEXT: [[TMP21:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]])
53+
; THRESH-NEXT: [[TMP22:%.*]] = insertelement <4 x i1> poison, i1 [[TMP15]], i32 0
54+
; THRESH-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> [[TMP22]], i1 [[TMP17]], i32 1
55+
; THRESH-NEXT: [[TMP24:%.*]] = insertelement <4 x i1> [[TMP23]], i1 [[TMP19]], i32 2
56+
; THRESH-NEXT: [[TMP25:%.*]] = insertelement <4 x i1> [[TMP24]], i1 [[TMP21]], i32 3
57+
; THRESH-NEXT: [[TMP26]] = zext <4 x i1> [[TMP25]] to <4 x i32>
58+
; THRESH-NEXT: br label [[VECTOR_BODY]]
59+
;
2560
entry:
2661
br label %vector.body
2762

0 commit comments

Comments
 (0)