Skip to content

Commit f308f16

Browse files
committed
merge main 6119461 into amd-staging
revert: breaks rocTHRUST build c46b41a LSV: forbid load-cycles when vectorizing; fix bug (llvm#104815) Change-Id: I4c0fc53575c4a7c16cfacd3d4aab20adc249e544
2 parents 888c4d1 + 6119461 commit f308f16

24 files changed

+5080
-686
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
196196

197197
getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
198198
.lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
199+
.libcallFor({s128})
199200
.widenScalarOrEltToNextPow2(0)
200-
.clampScalarOrElt(0, s32, s64)
201+
.minScalarOrElt(0, s32)
201202
.clampNumElements(0, v2s32, v4s32)
202203
.clampNumElements(0, v2s64, v2s64)
203-
.moreElementsToNextPow2(0);
204-
204+
.scalarize(0);
205205

206206
getActionDefinitionsBuilder({G_SMULO, G_UMULO})
207207
.widenScalarToNextPow2(0, /*Min = */ 32)

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3236,14 +3236,14 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
32363236

32373237
bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
32383238
// Speculate cttz only if we can directly use TZCNT or can promote to i32/i64.
3239-
return Subtarget.hasBMI() ||
3239+
return Subtarget.hasBMI() || Subtarget.canUseCMOV() ||
32403240
(!Ty->isVectorTy() &&
32413241
Ty->getScalarSizeInBits() < (Subtarget.is64Bit() ? 64u : 32u));
32423242
}
32433243

32443244
bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
32453245
// Speculate ctlz only if we can directly use LZCNT.
3246-
return Subtarget.hasLZCNT();
3246+
return Subtarget.hasLZCNT() || Subtarget.canUseCMOV();
32473247
}
32483248

32493249
bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4210,9 +4210,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
42104210
{ ISD::ABS, MVT::i64, { 1, 2, 3, 3 } }, // SUB+CMOV
42114211
{ ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } },
42124212
{ ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } },
4213-
{ ISD::CTLZ, MVT::i64, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
4213+
{ ISD::CTLZ, MVT::i64, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
42144214
{ ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 2, 2 } }, // BSR+XOR
4215-
{ ISD::CTTZ, MVT::i64, { 2, 2, 5, 5 } }, // TEST+BSF+CMOV/BRANCH
4215+
{ ISD::CTTZ, MVT::i64, { 2, 2, 3, 4 } }, // TEST+BSF+CMOV/BRANCH
42164216
{ ISD::CTTZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 1, 2 } }, // BSF
42174217
{ ISD::CTPOP, MVT::i64, { 10, 6, 19, 19 } },
42184218
{ ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
@@ -4241,9 +4241,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
42414241
{ ISD::BITREVERSE, MVT::i8, { 7, 9, 13, 14 } },
42424242
{ ISD::BSWAP, MVT::i32, { 1, 1, 1, 1 } },
42434243
{ ISD::BSWAP, MVT::i16, { 1, 2, 1, 2 } }, // ROL
4244-
{ ISD::CTLZ, MVT::i32, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
4245-
{ ISD::CTLZ, MVT::i16, { 3, 2, 6, 6 } }, // BSR+XOR or BSR+XOR+CMOV
4246-
{ ISD::CTLZ, MVT::i8, { 3, 2, 7, 7 } }, // BSR+XOR or BSR+XOR+CMOV
4244+
{ ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
4245+
{ ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
4246+
{ ISD::CTLZ, MVT::i8, { 2, 2, 5, 6 } }, // BSR+XOR or BSR+XOR+CMOV
42474247
{ ISD::CTLZ_ZERO_UNDEF, MVT::i32,{ 1, 2, 2, 2 } }, // BSR+XOR
42484248
{ ISD::CTLZ_ZERO_UNDEF, MVT::i16,{ 2, 2, 2, 2 } }, // BSR+XOR
42494249
{ ISD::CTLZ_ZERO_UNDEF, MVT::i8, { 2, 2, 3, 3 } }, // BSR+XOR

llvm/test/Analysis/CostModel/X86/ctlz-codesize.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)
1717

1818
define i64 @var_ctlz_i64(i64 %a) {
1919
; NOLZCNT-LABEL: 'var_ctlz_i64'
20-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
20+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
2121
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctlz
2222
;
2323
; LZCNT-LABEL: 'var_ctlz_i64'
@@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {
4343

4444
define i32 @var_ctlz_i32(i32 %a) {
4545
; NOLZCNT-LABEL: 'var_ctlz_i32'
46-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
46+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
4747
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctlz
4848
;
4949
; LZCNT-LABEL: 'var_ctlz_i32'
@@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {
6969

7070
define i16 @var_ctlz_i16(i16 %a) {
7171
; NOLZCNT-LABEL: 'var_ctlz_i16'
72-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
72+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
7373
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctlz
7474
;
7575
; LZCNT-LABEL: 'var_ctlz_i16'
@@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {
9595

9696
define i8 @var_ctlz_i8(i8 %a) {
9797
; NOLZCNT-LABEL: 'var_ctlz_i8'
98-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
98+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
9999
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctlz
100100
;
101101
; LZCNT-LABEL: 'var_ctlz_i8'

llvm/test/Analysis/CostModel/X86/ctlz-sizelatency.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)
1717

1818
define i64 @var_ctlz_i64(i64 %a) {
1919
; NOLZCNT-LABEL: 'var_ctlz_i64'
20-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
20+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
2121
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctlz
2222
;
2323
; LZCNT-LABEL: 'var_ctlz_i64'
@@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {
4343

4444
define i32 @var_ctlz_i32(i32 %a) {
4545
; NOLZCNT-LABEL: 'var_ctlz_i32'
46-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
46+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
4747
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctlz
4848
;
4949
; LZCNT-LABEL: 'var_ctlz_i32'
@@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {
6969

7070
define i16 @var_ctlz_i16(i16 %a) {
7171
; NOLZCNT-LABEL: 'var_ctlz_i16'
72-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
72+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
7373
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctlz
7474
;
7575
; LZCNT-LABEL: 'var_ctlz_i16'
@@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {
9595

9696
define i8 @var_ctlz_i8(i8 %a) {
9797
; NOLZCNT-LABEL: 'var_ctlz_i8'
98-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
98+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
9999
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctlz
100100
;
101101
; LZCNT-LABEL: 'var_ctlz_i8'

llvm/test/Analysis/CostModel/X86/ctlz.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)
1717

1818
define i64 @var_ctlz_i64(i64 %a) {
1919
; NOLZCNT-LABEL: 'var_ctlz_i64'
20-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
20+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
2121
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz
2222
;
2323
; LZCNT-LABEL: 'var_ctlz_i64'
@@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {
4343

4444
define i32 @var_ctlz_i32(i32 %a) {
4545
; NOLZCNT-LABEL: 'var_ctlz_i32'
46-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
46+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
4747
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz
4848
;
4949
; LZCNT-LABEL: 'var_ctlz_i32'
@@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {
6969

7070
define i16 @var_ctlz_i16(i16 %a) {
7171
; NOLZCNT-LABEL: 'var_ctlz_i16'
72-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
72+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
7373
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz
7474
;
7575
; LZCNT-LABEL: 'var_ctlz_i16'
@@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {
9595

9696
define i8 @var_ctlz_i8(i8 %a) {
9797
; NOLZCNT-LABEL: 'var_ctlz_i8'
98-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
98+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
9999
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz
100100
;
101101
; LZCNT-LABEL: 'var_ctlz_i8'

llvm/test/Analysis/CostModel/X86/cttz-codesize.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)
1818

1919
define i64 @var_cttz_i64(i64 %a) {
2020
; NOBMI-LABEL: 'var_cttz_i64'
21-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
21+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
2222
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %cttz
2323
;
2424
; BMI-LABEL: 'var_cttz_i64'

llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)
1818

1919
define i64 @var_cttz_i64(i64 %a) {
2020
; NOBMI-LABEL: 'var_cttz_i64'
21-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
21+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
2222
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %cttz
2323
;
2424
; BMI-LABEL: 'var_cttz_i64'

0 commit comments

Comments
 (0)