@@ -3596,6 +3596,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3596
3596
{ ISD::SMAX, MVT::v64i8, { 1 , 1 , 1 , 1 } },
3597
3597
{ ISD::SMIN, MVT::v32i16, { 1 , 1 , 1 , 1 } },
3598
3598
{ ISD::SMIN, MVT::v64i8, { 1 , 1 , 1 , 1 } },
3599
+ { ISD::SMULO, MVT::v32i16, { 3 , 6 , 4 , 4 } },
3600
+ { ISD::SMULO, MVT::v64i8, { 8 , 21 , 17 , 18 } },
3601
+ { ISD::UMULO, MVT::v32i16, { 2 , 5 , 3 , 3 } },
3602
+ { ISD::UMULO, MVT::v64i8, { 8 , 15 , 15 , 16 } },
3599
3603
{ ISD::SSUBSAT, MVT::v32i16, { 1 , 1 , 1 , 1 } },
3600
3604
{ ISD::SSUBSAT, MVT::v64i8, { 1 , 1 , 1 , 1 } },
3601
3605
{ ISD::UADDSAT, MVT::v32i16, { 1 , 1 , 1 , 1 } },
@@ -3674,8 +3678,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3674
3678
{ ISD::SMIN, MVT::v64i8, { 3 , 7 , 5 , 5 } },
3675
3679
{ ISD::SMIN, MVT::v4i64, { 1 , 3 , 1 , 1 } },
3676
3680
{ ISD::SMIN, MVT::v2i64, { 1 , 3 , 1 , 1 } },
3681
+ { ISD::SMULO, MVT::v8i64, { 44 , 44 , 81 , 93 } },
3682
+ { ISD::SMULO, MVT::v16i32, { 5 , 12 , 9 , 11 } },
3677
3683
{ ISD::SSUBSAT, MVT::v2i64, { 2 , 13 , 9 , 10 } },
3678
- { ISD::SSUBSAT, MVT::v4i64, { 2 , 15 , 7 , 8 } },
3684
+ { ISD::SSUBSAT, MVT::v4i64, { 2 , 15 , 7 , 8 } },
3679
3685
{ ISD::SSUBSAT, MVT::v8i64, { 2 , 14 , 7 , 8 } },
3680
3686
{ ISD::SSUBSAT, MVT::v4i32, { 2 , 14 , 7 , 8 } },
3681
3687
{ ISD::SSUBSAT, MVT::v8i32, { 2 , 15 , 7 , 8 } },
@@ -3694,6 +3700,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3694
3700
{ ISD::UMIN, MVT::v64i8, { 3 , 7 , 5 , 5 } },
3695
3701
{ ISD::UMIN, MVT::v4i64, { 1 , 3 , 1 , 1 } },
3696
3702
{ ISD::UMIN, MVT::v2i64, { 1 , 3 , 1 , 1 } },
3703
+ { ISD::UMULO, MVT::v8i64, { 52 , 52 , 95 , 104 } },
3704
+ { ISD::UMULO, MVT::v16i32, { 5 , 12 , 8 , 10 } },
3697
3705
{ ISD::UADDSAT, MVT::v2i64, { 1 , 4 , 4 , 4 } },
3698
3706
{ ISD::UADDSAT, MVT::v4i64, { 1 , 4 , 4 , 4 } },
3699
3707
{ ISD::UADDSAT, MVT::v8i64, { 1 , 4 , 4 , 4 } },
@@ -3828,6 +3836,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3828
3836
{ ISD::SMIN, MVT::v8i32, { 1 , 1 , 1 , 2 } },
3829
3837
{ ISD::SMIN, MVT::v16i16, { 1 , 1 , 1 , 2 } },
3830
3838
{ ISD::SMIN, MVT::v32i8, { 1 , 1 , 1 , 2 } },
3839
+ { ISD::SMULO, MVT::v4i64, { 20 , 20 , 33 , 37 } },
3840
+ { ISD::SMULO, MVT::v2i64, { 8 , 8 , 13 , 15 } },
3841
+ { ISD::SMULO, MVT::v8i32, { 8 , 20 , 13 , 24 } },
3842
+ { ISD::SMULO, MVT::v4i32, { 5 , 15 , 11 , 12 } },
3843
+ { ISD::SMULO, MVT::v16i16, { 4 , 14 , 8 , 14 } },
3844
+ { ISD::SMULO, MVT::v8i16, { 3 , 9 , 6 , 6 } },
3845
+ { ISD::SMULO, MVT::v32i8, { 9 , 15 , 18 , 35 } },
3846
+ { ISD::SMULO, MVT::v16i8, { 6 , 22 , 14 , 21 } },
3831
3847
{ ISD::SSUBSAT, MVT::v2i64, { 4 , 13 , 9 , 13 } },
3832
3848
{ ISD::SSUBSAT, MVT::v4i64, { 4 , 15 , 9 , 13 } },
3833
3849
{ ISD::SSUBSAT, MVT::v4i32, { 3 , 14 , 9 , 11 } },
@@ -3849,6 +3865,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3849
3865
{ ISD::UMIN, MVT::v8i32, { 1 , 1 , 1 , 2 } },
3850
3866
{ ISD::UMIN, MVT::v16i16, { 1 , 1 , 1 , 2 } },
3851
3867
{ ISD::UMIN, MVT::v32i8, { 1 , 1 , 1 , 2 } },
3868
+ { ISD::UMULO, MVT::v4i64, { 24 , 24 , 39 , 43 } },
3869
+ { ISD::UMULO, MVT::v2i64, { 10 , 10 , 15 , 19 } },
3870
+ { ISD::UMULO, MVT::v8i32, { 8 , 11 , 13 , 23 } },
3871
+ { ISD::UMULO, MVT::v4i32, { 5 , 12 , 11 , 12 } },
3872
+ { ISD::UMULO, MVT::v16i16, { 4 , 6 , 8 , 13 } },
3873
+ { ISD::UMULO, MVT::v8i16, { 2 , 8 , 6 , 6 } },
3874
+ { ISD::UMULO, MVT::v32i8, { 9 , 13 , 17 , 33 } },
3875
+ { ISD::UMULO, MVT::v16i8, { 6 , 19 , 13 , 20 } },
3852
3876
{ ISD::USUBSAT, MVT::v2i64, { 2 , 7 , 6 , 6 } },
3853
3877
{ ISD::USUBSAT, MVT::v4i64, { 3 , 7 , 6 , 10 } },
3854
3878
{ ISD::USUBSAT, MVT::v8i32, { 2 , 2 , 2 , 4 } },
@@ -3925,6 +3949,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3925
3949
{ ISD::SMIN, MVT::v8i32, { 4 , 6 , 5 , 6 } }, // 2 x 128-bit Op + extract/insert
3926
3950
{ ISD::SMIN, MVT::v16i16, { 4 , 6 , 5 , 6 } }, // 2 x 128-bit Op + extract/insert
3927
3951
{ ISD::SMIN, MVT::v32i8, { 4 , 6 , 5 , 6 } }, // 2 x 128-bit Op + extract/insert
3952
+ { ISD::SMULO, MVT::v4i64, { 20 , 20 , 33 , 37 } },
3953
+ { ISD::SMULO, MVT::v2i64, { 9 , 9 , 13 , 17 } },
3954
+ { ISD::SMULO, MVT::v8i32, { 15 , 20 , 24 , 29 } },
3955
+ { ISD::SMULO, MVT::v4i32, { 7 , 15 , 11 , 13 } },
3956
+ { ISD::SMULO, MVT::v16i16, { 8 , 14 , 14 , 15 } },
3957
+ { ISD::SMULO, MVT::v8i16, { 3 , 9 , 6 , 6 } },
3958
+ { ISD::SMULO, MVT::v32i8, { 20 , 20 , 37 , 39 } },
3959
+ { ISD::SMULO, MVT::v16i8, { 9 , 22 , 18 , 21 } },
3928
3960
{ ISD::SSUBSAT, MVT::v2i64, { 7 , 13 , 9 , 13 } },
3929
3961
{ ISD::SSUBSAT, MVT::v4i64, { 15 , 21 , 18 , 29 } }, // 2 x 128-bit Op + extract/insert
3930
3962
{ ISD::SSUBSAT, MVT::v8i32, { 15 , 19 , 18 , 29 } }, // 2 x 128-bit Op + extract/insert
@@ -3945,6 +3977,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3945
3977
{ ISD::UMIN, MVT::v8i32, { 4 , 6 , 5 , 6 } }, // 2 x 128-bit Op + extract/insert
3946
3978
{ ISD::UMIN, MVT::v16i16, { 4 , 6 , 5 , 6 } }, // 2 x 128-bit Op + extract/insert
3947
3979
{ ISD::UMIN, MVT::v32i8, { 4 , 6 , 5 , 6 } }, // 2 x 128-bit Op + extract/insert
3980
+ { ISD::UMULO, MVT::v4i64, { 24 , 26 , 39 , 45 } },
3981
+ { ISD::UMULO, MVT::v2i64, { 10 , 12 , 15 , 20 } },
3982
+ { ISD::UMULO, MVT::v8i32, { 14 , 15 , 23 , 28 } },
3983
+ { ISD::UMULO, MVT::v4i32, { 7 , 12 , 11 , 13 } },
3984
+ { ISD::UMULO, MVT::v16i16, { 7 , 11 , 13 , 14 } },
3985
+ { ISD::UMULO, MVT::v8i16, { 3 , 8 , 6 , 6 } },
3986
+ { ISD::UMULO, MVT::v32i8, { 19 , 19 , 35 , 37 } },
3987
+ { ISD::UMULO, MVT::v16i8, { 9 , 19 , 17 , 20 } },
3948
3988
{ ISD::USUBSAT, MVT::v2i64, { 3 , 7 , 6 , 6 } },
3949
3989
{ ISD::USUBSAT, MVT::v4i64, { 8 , 10 , 14 , 15 } }, // 2 x 128-bit Op + extract/insert
3950
3990
{ ISD::USUBSAT, MVT::v8i32, { 4 , 4 , 7 , 8 } }, // 2 x 128-bit Op + extract/insert
@@ -4020,6 +4060,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
4020
4060
{ ISD::SMIN, MVT::v2i64, { 3 , 7 , 2 , 3 } },
4021
4061
{ ISD::SMIN, MVT::v4i32, { 1 , 1 , 1 , 1 } },
4022
4062
{ ISD::SMIN, MVT::v16i8, { 1 , 1 , 1 , 1 } },
4063
+ { ISD::SMULO, MVT::v2i64, { 9 , 11 , 13 , 17 } },
4064
+ { ISD::SMULO, MVT::v4i32, { 20 , 24 , 13 , 19 } },
4065
+ { ISD::SMULO, MVT::v8i16, { 5 , 9 , 8 , 8 } },
4066
+ { ISD::SMULO, MVT::v16i8, { 13 , 22 , 24 , 25 } },
4023
4067
{ ISD::UADDSAT, MVT::v2i64, { 6 , 13 , 14 , 14 } },
4024
4068
{ ISD::UADDSAT, MVT::v4i32, { 2 , 2 , 4 , 4 } },
4025
4069
{ ISD::USUBSAT, MVT::v2i64, { 6 , 10 , 14 , 14 } },
@@ -4030,6 +4074,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
4030
4074
{ ISD::UMIN, MVT::v2i64, { 2 , 11 , 6 , 7 } },
4031
4075
{ ISD::UMIN, MVT::v4i32, { 1 , 1 , 1 , 1 } },
4032
4076
{ ISD::UMIN, MVT::v8i16, { 1 , 1 , 1 , 1 } },
4077
+ { ISD::UMULO, MVT::v2i64, { 14 , 20 , 15 , 20 } },
4078
+ { ISD::UMULO, MVT::v4i32, { 19 , 22 , 12 , 18 } },
4079
+ { ISD::UMULO, MVT::v8i16, { 4 , 9 , 7 , 7 } },
4080
+ { ISD::UMULO, MVT::v16i8, { 13 , 19 , 18 , 20 } },
4033
4081
};
4034
4082
static const CostKindTblEntry SSSE3CostTbl[] = {
4035
4083
{ ISD::ABS, MVT::v4i32, { 1 , 2 , 1 , 1 } },
@@ -4091,6 +4139,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
4091
4139
{ ISD::SMIN, MVT::v4i32, { 2 , 4 , 5 , 5 } },
4092
4140
{ ISD::SMIN, MVT::v8i16, { 1 , 1 , 1 , 1 } },
4093
4141
{ ISD::SMIN, MVT::v16i8, { 2 , 4 , 5 , 5 } },
4142
+ { ISD::SMULO, MVT::v2i64, { 30 , 33 , 13 , 23 } },
4143
+ { ISD::SMULO, MVT::v4i32, { 20 , 24 , 23 , 23 } },
4144
+ { ISD::SMULO, MVT::v8i16, { 5 , 10 , 8 , 8 } },
4145
+ { ISD::SMULO, MVT::v16i8, { 13 , 23 , 24 , 25 } },
4094
4146
{ ISD::SSUBSAT, MVT::v2i64, { 16 , 19 , 31 , 31 } },
4095
4147
{ ISD::SSUBSAT, MVT::v4i32, { 6 , 14 , 12 , 13 } },
4096
4148
{ ISD::SSUBSAT, MVT::v8i16, { 1 , 2 , 1 , 1 } },
@@ -4107,6 +4159,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
4107
4159
{ ISD::UMIN, MVT::v4i32, { 2 , 5 , 8 , 8 } },
4108
4160
{ ISD::UMIN, MVT::v8i16, { 1 , 3 , 3 , 3 } },
4109
4161
{ ISD::UMIN, MVT::v16i8, { 1 , 1 , 1 , 1 } },
4162
+ { ISD::UMULO, MVT::v2i64, { 30 , 33 , 15 , 29 } },
4163
+ { ISD::UMULO, MVT::v4i32, { 19 , 22 , 14 , 18 } },
4164
+ { ISD::UMULO, MVT::v8i16, { 4 , 9 , 7 , 7 } },
4165
+ { ISD::UMULO, MVT::v16i8, { 13 , 19 , 20 , 20 } },
4110
4166
{ ISD::USUBSAT, MVT::v2i64, { 7 , 10 , 14 , 14 } },
4111
4167
{ ISD::USUBSAT, MVT::v4i32, { 4 , 4 , 7 , 7 } },
4112
4168
{ ISD::USUBSAT, MVT::v8i16, { 1 , 2 , 1 , 1 } },
0 commit comments