Skip to content

Commit 102a811

Browse files
committed
[SLP]Fix a check for multi-users for icmp user.
The compiler should not take into account the type of the cmp instruction, otherwise it may treat the size incorrectly and it may lead to incorrect codegen.
1 parent 19a625a commit 102a811

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14530,7 +14530,8 @@ bool BoUpSLP::collectValuesToDemote(
1453014530
return !all_of(V->users(), [=](User *U) {
1453114531
return getTreeEntry(U) ||
1453214532
(UserIgnoreList && UserIgnoreList->contains(U)) ||
14533-
(U->getType()->isSized() && !U->getType()->isScalableTy() &&
14533+
(!isa<CmpInst>(U) && U->getType()->isSized() &&
14534+
!U->getType()->isScalableTy() &&
1453414535
DL->getTypeSizeInBits(U->getType()) <= BitWidth);
1453514536
}) && !IsPotentiallyTruncated(V, BitWidth);
1453614537
}))

llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,22 @@ define i16 @foo(i16 %in1, i16 %in2) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[IN1]], i32 0
99
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <2 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i64>
1011
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[IN2]], i32 0
1112
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
12-
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i16> [[TMP3]], [[TMP1]]
13-
; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i16> [[TMP4]], <i16 -1, i16 -1>
14-
; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i64>
13+
; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i64>
14+
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw <2 x i64> [[TMP5]], [[TMP4]]
15+
; CHECK-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP9]], <i64 65535, i64 65535>
1516
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[TMP6]], <i64 65533, i64 65533>
1617
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
1718
; CHECK-NEXT: [[ZEXT3_1:%.*]] = zext i1 [[TMP8]] to i16
18-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
19-
; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i64
19+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
2020
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ne i64 [[TMP10]], 196605
2121
; CHECK-NEXT: [[ZEXT4_1:%.*]] = zext i1 [[CMP2_1]] to i16
2222
; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i16 [[ZEXT3_1]], [[ZEXT4_1]]
2323
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
2424
; CHECK-NEXT: [[ZEXT3_2:%.*]] = zext i1 [[TMP11]] to i16
25-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
26-
; CHECK-NEXT: [[TMP13:%.*]] = zext i16 [[TMP12]] to i64
25+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
2726
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ne i64 [[TMP13]], 196605
2827
; CHECK-NEXT: [[ZEXT4_2:%.*]] = zext i1 [[CMP2_2]] to i16
2928
; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i16 [[ADD1]], [[ZEXT4_2]]

0 commit comments

Comments
 (0)