Skip to content

Commit f2a0f97

Browse files
committed
[CostModel][X86] Improve vector mul overflow intrinsic costs
1 parent 42c79f7 commit f2a0f97

File tree

3 files changed

+292
-236
lines changed

3 files changed

+292
-236
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3596,6 +3596,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
35963596
{ ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
35973597
{ ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
35983598
{ ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3599+
{ ISD::SMULO, MVT::v32i16, { 3, 6, 4, 4 } },
3600+
{ ISD::SMULO, MVT::v64i8, { 8, 21, 17, 18 } },
3601+
{ ISD::UMULO, MVT::v32i16, { 2, 5, 3, 3 } },
3602+
{ ISD::UMULO, MVT::v64i8, { 8, 15, 15, 16 } },
35993603
{ ISD::SSUBSAT, MVT::v32i16, { 1, 1, 1, 1 } },
36003604
{ ISD::SSUBSAT, MVT::v64i8, { 1, 1, 1, 1 } },
36013605
{ ISD::UADDSAT, MVT::v32i16, { 1, 1, 1, 1 } },
@@ -3674,8 +3678,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
36743678
{ ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
36753679
{ ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
36763680
{ ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3681+
{ ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
3682+
{ ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
36773683
{ ISD::SSUBSAT, MVT::v2i64, { 2, 13, 9, 10 } },
3678-
{ ISD::SSUBSAT, MVT::v4i64, { 2, 15, 7, 8} },
3684+
{ ISD::SSUBSAT, MVT::v4i64, { 2, 15, 7, 8 } },
36793685
{ ISD::SSUBSAT, MVT::v8i64, { 2, 14, 7, 8 } },
36803686
{ ISD::SSUBSAT, MVT::v4i32, { 2, 14, 7, 8 } },
36813687
{ ISD::SSUBSAT, MVT::v8i32, { 2, 15, 7, 8 } },
@@ -3694,6 +3700,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
36943700
{ ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
36953701
{ ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
36963702
{ ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3703+
{ ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
3704+
{ ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
36973705
{ ISD::UADDSAT, MVT::v2i64, { 1, 4, 4, 4 } },
36983706
{ ISD::UADDSAT, MVT::v4i64, { 1, 4, 4, 4 } },
36993707
{ ISD::UADDSAT, MVT::v8i64, { 1, 4, 4, 4 } },
@@ -3828,6 +3836,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
38283836
{ ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
38293837
{ ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
38303838
{ ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3839+
{ ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
3840+
{ ISD::SMULO, MVT::v2i64, { 8, 8, 13, 15 } },
3841+
{ ISD::SMULO, MVT::v8i32, { 8, 20, 13, 24 } },
3842+
{ ISD::SMULO, MVT::v4i32, { 5, 15, 11, 12 } },
3843+
{ ISD::SMULO, MVT::v16i16, { 4, 14, 8, 14 } },
3844+
{ ISD::SMULO, MVT::v8i16, { 3, 9, 6, 6 } },
3845+
{ ISD::SMULO, MVT::v32i8, { 9, 15, 18, 35 } },
3846+
{ ISD::SMULO, MVT::v16i8, { 6, 22, 14, 21 } },
38313847
{ ISD::SSUBSAT, MVT::v2i64, { 4, 13, 9, 13 } },
38323848
{ ISD::SSUBSAT, MVT::v4i64, { 4, 15, 9, 13 } },
38333849
{ ISD::SSUBSAT, MVT::v4i32, { 3, 14, 9, 11 } },
@@ -3849,6 +3865,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
38493865
{ ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
38503866
{ ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
38513867
{ ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3868+
{ ISD::UMULO, MVT::v4i64, { 24, 24, 39, 43 } },
3869+
{ ISD::UMULO, MVT::v2i64, { 10, 10, 15, 19 } },
3870+
{ ISD::UMULO, MVT::v8i32, { 8, 11, 13, 23 } },
3871+
{ ISD::UMULO, MVT::v4i32, { 5, 12, 11, 12 } },
3872+
{ ISD::UMULO, MVT::v16i16, { 4, 6, 8, 13 } },
3873+
{ ISD::UMULO, MVT::v8i16, { 2, 8, 6, 6 } },
3874+
{ ISD::UMULO, MVT::v32i8, { 9, 13, 17, 33 } },
3875+
{ ISD::UMULO, MVT::v16i8, { 6, 19, 13, 20 } },
38523876
{ ISD::USUBSAT, MVT::v2i64, { 2, 7, 6, 6 } },
38533877
{ ISD::USUBSAT, MVT::v4i64, { 3, 7, 6, 10 } },
38543878
{ ISD::USUBSAT, MVT::v8i32, { 2, 2, 2, 4 } },
@@ -3925,6 +3949,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
39253949
{ ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } }, // 2 x 128-bit Op + extract/insert
39263950
{ ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } }, // 2 x 128-bit Op + extract/insert
39273951
{ ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } }, // 2 x 128-bit Op + extract/insert
3952+
{ ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
3953+
{ ISD::SMULO, MVT::v2i64, { 9, 9, 13, 17 } },
3954+
{ ISD::SMULO, MVT::v8i32, { 15, 20, 24, 29 } },
3955+
{ ISD::SMULO, MVT::v4i32, { 7, 15, 11, 13 } },
3956+
{ ISD::SMULO, MVT::v16i16, { 8, 14, 14, 15 } },
3957+
{ ISD::SMULO, MVT::v8i16, { 3, 9, 6, 6 } },
3958+
{ ISD::SMULO, MVT::v32i8, { 20, 20, 37, 39 } },
3959+
{ ISD::SMULO, MVT::v16i8, { 9, 22, 18, 21 } },
39283960
{ ISD::SSUBSAT, MVT::v2i64, { 7, 13, 9, 13 } },
39293961
{ ISD::SSUBSAT, MVT::v4i64, { 15, 21, 18, 29 } }, // 2 x 128-bit Op + extract/insert
39303962
{ ISD::SSUBSAT, MVT::v8i32, { 15, 19, 18, 29 } }, // 2 x 128-bit Op + extract/insert
@@ -3945,6 +3977,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
39453977
{ ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } }, // 2 x 128-bit Op + extract/insert
39463978
{ ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } }, // 2 x 128-bit Op + extract/insert
39473979
{ ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } }, // 2 x 128-bit Op + extract/insert
3980+
{ ISD::UMULO, MVT::v4i64, { 24, 26, 39, 45 } },
3981+
{ ISD::UMULO, MVT::v2i64, { 10, 12, 15, 20 } },
3982+
{ ISD::UMULO, MVT::v8i32, { 14, 15, 23, 28 } },
3983+
{ ISD::UMULO, MVT::v4i32, { 7, 12, 11, 13 } },
3984+
{ ISD::UMULO, MVT::v16i16, { 7, 11, 13, 14 } },
3985+
{ ISD::UMULO, MVT::v8i16, { 3, 8, 6, 6 } },
3986+
{ ISD::UMULO, MVT::v32i8, { 19, 19, 35, 37 } },
3987+
{ ISD::UMULO, MVT::v16i8, { 9, 19, 17, 20 } },
39483988
{ ISD::USUBSAT, MVT::v2i64, { 3, 7, 6, 6 } },
39493989
{ ISD::USUBSAT, MVT::v4i64, { 8, 10, 14, 15 } }, // 2 x 128-bit Op + extract/insert
39503990
{ ISD::USUBSAT, MVT::v8i32, { 4, 4, 7, 8 } }, // 2 x 128-bit Op + extract/insert
@@ -4020,6 +4060,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
40204060
{ ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
40214061
{ ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
40224062
{ ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4063+
{ ISD::SMULO, MVT::v2i64, { 9, 11, 13, 17 } },
4064+
{ ISD::SMULO, MVT::v4i32, { 20, 24, 13, 19 } },
4065+
{ ISD::SMULO, MVT::v8i16, { 5, 9, 8, 8 } },
4066+
{ ISD::SMULO, MVT::v16i8, { 13, 22, 24, 25 } },
40234067
{ ISD::UADDSAT, MVT::v2i64, { 6, 13, 14, 14 } },
40244068
{ ISD::UADDSAT, MVT::v4i32, { 2, 2, 4, 4 } },
40254069
{ ISD::USUBSAT, MVT::v2i64, { 6, 10, 14, 14 } },
@@ -4030,6 +4074,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
40304074
{ ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
40314075
{ ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
40324076
{ ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4077+
{ ISD::UMULO, MVT::v2i64, { 14, 20, 15, 20 } },
4078+
{ ISD::UMULO, MVT::v4i32, { 19, 22, 12, 18 } },
4079+
{ ISD::UMULO, MVT::v8i16, { 4, 9, 7, 7 } },
4080+
{ ISD::UMULO, MVT::v16i8, { 13, 19, 18, 20 } },
40334081
};
40344082
static const CostKindTblEntry SSSE3CostTbl[] = {
40354083
{ ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
@@ -4091,6 +4139,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
40914139
{ ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
40924140
{ ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
40934141
{ ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4142+
{ ISD::SMULO, MVT::v2i64, { 30, 33, 13, 23 } },
4143+
{ ISD::SMULO, MVT::v4i32, { 20, 24, 23, 23 } },
4144+
{ ISD::SMULO, MVT::v8i16, { 5, 10, 8, 8 } },
4145+
{ ISD::SMULO, MVT::v16i8, { 13, 23, 24, 25 } },
40944146
{ ISD::SSUBSAT, MVT::v2i64, { 16, 19, 31, 31 } },
40954147
{ ISD::SSUBSAT, MVT::v4i32, { 6, 14, 12, 13 } },
40964148
{ ISD::SSUBSAT, MVT::v8i16, { 1, 2, 1, 1 } },
@@ -4107,6 +4159,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
41074159
{ ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
41084160
{ ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
41094161
{ ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4162+
{ ISD::UMULO, MVT::v2i64, { 30, 33, 15, 29 } },
4163+
{ ISD::UMULO, MVT::v4i32, { 19, 22, 14, 18 } },
4164+
{ ISD::UMULO, MVT::v8i16, { 4, 9, 7, 7 } },
4165+
{ ISD::UMULO, MVT::v16i8, { 13, 19, 20, 20 } },
41104166
{ ISD::USUBSAT, MVT::v2i64, { 7, 10, 14, 14 } },
41114167
{ ISD::USUBSAT, MVT::v4i32, { 4, 4, 7, 7 } },
41124168
{ ISD::USUBSAT, MVT::v8i16, { 1, 2, 1, 1 } },

0 commit comments

Comments
 (0)