Skip to content

Commit 8a0bfe4

Browse files
committed
[SLP]Fix PR87630: wrong result for externally used vector value.
Need to check that the externally used value can be represented with the BitWidth before applying it, otherwise need to keep wider type.
1 parent eeaaf33 commit 8a0bfe4

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14141,6 +14141,16 @@ bool BoUpSLP::collectValuesToDemote(
1414114141
}))
1414214142
return FinalAnalysis();
1414314143

14144+
if (!all_of(I->users(),
14145+
[=](User *U) {
14146+
return getTreeEntry(U) ||
14147+
(UserIgnoreList && UserIgnoreList->contains(U)) ||
14148+
(U->getType()->isSized() &&
14149+
DL->getTypeSizeInBits(U->getType()) <= BitWidth);
14150+
}) &&
14151+
!IsPotentiallyTruncated(I, BitWidth))
14152+
return false;
14153+
1414414154
unsigned Start = 0;
1414514155
unsigned End = I->getNumOperands();
1414614156

llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,13 @@ define i8 @test() {
1414
; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[TMP1]] to i32
1515
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
1616
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
17-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i32> [[TMP3]] to <8 x i16>
18-
; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -32767>
17+
; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 32769>
1918
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV1]], i32 0
2019
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> zeroinitializer
21-
; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16>
22-
; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP5]], [[TMP8]]
23-
; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP9]])
24-
; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP10]] to i32
20+
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[TMP4]], [[TMP7]]
21+
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP8]])
2522
; CHECK-NEXT: [[CONV4_30:%.*]] = trunc i32 [[TMP11]] to i8
26-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP5]], i32 7
27-
; CHECK-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32
23+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7
2824
; CHECK-NEXT: [[XOR_31:%.*]] = and i32 [[TMP13]], -2
2925
; CHECK-NEXT: store i32 [[XOR_31]], ptr @d, align 4
3026
; CHECK-NEXT: ret i8 [[CONV4_30]]

0 commit comments

Comments
 (0)