Skip to content

Commit 3e9d046

Browse files
committed
[CostModel][X86] Improve i16 and vXi16 MUL costs
Use a modified version of the D103695 script to determine more accurate throughput/latency/codesize/size-latency cost estimates
1 parent 70d8c01 commit 3e9d046

13 files changed

+186
-181
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1087,7 +1087,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
10871087

10881088
{ ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } }, // extend/pmullw/pack
10891089
{ ISD::MUL, MVT::v32i8, { 6, 11,10,19 } }, // unpack/pmullw
1090-
{ ISD::MUL, MVT::v16i16, { 2, 5, 1, 1 } }, // pmullw
1090+
{ ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } }, // pmullw
10911091
{ ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } }, // pmulld
10921092
{ ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } }, // pmulld
10931093
{ ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } }, // 3*pmuludq/3*shift/2*add
@@ -1395,6 +1395,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
13951395
{ ISD::SUB, MVT::i32, { 1 } }, // Pentium III from http://www.agner.org/
13961396

13971397
{ ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1398+
{ ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
13981399

13991400
{ ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } }, // (x87)
14001401
{ ISD::FADD, MVT::f64, { 2, 3, 1, 1 } }, // (x87)

llvm/test/Analysis/CostModel/X86/arith-fix.ll

Lines changed: 20 additions & 20 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-int-latency.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ define i32 @mul(i32 %arg) {
627627
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = mul <4 x i32> undef, undef
628628
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = mul <8 x i32> undef, undef
629629
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = mul <16 x i32> undef, undef
630-
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
630+
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, undef
631631
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = mul <8 x i16> undef, undef
632632
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = mul <16 x i16> undef, undef
633633
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = mul <32 x i16> undef, undef
@@ -649,7 +649,7 @@ define i32 @mul(i32 %arg) {
649649
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = mul <4 x i32> undef, undef
650650
; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = mul <8 x i32> undef, undef
651651
; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = mul <16 x i32> undef, undef
652-
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
652+
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, undef
653653
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = mul <8 x i16> undef, undef
654654
; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = mul <16 x i16> undef, undef
655655
; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = mul <32 x i16> undef, undef
@@ -671,7 +671,7 @@ define i32 @mul(i32 %arg) {
671671
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = mul <4 x i32> undef, undef
672672
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = mul <8 x i32> undef, undef
673673
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = mul <16 x i32> undef, undef
674-
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
674+
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, undef
675675
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = mul <8 x i16> undef, undef
676676
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = mul <16 x i16> undef, undef
677677
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = mul <32 x i16> undef, undef
@@ -693,7 +693,7 @@ define i32 @mul(i32 %arg) {
693693
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = mul <4 x i32> undef, undef
694694
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = mul <8 x i32> undef, undef
695695
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = mul <16 x i32> undef, undef
696-
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
696+
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, undef
697697
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = mul <8 x i16> undef, undef
698698
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = mul <16 x i16> undef, undef
699699
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = mul <32 x i16> undef, undef
@@ -715,7 +715,7 @@ define i32 @mul(i32 %arg) {
715715
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = mul <4 x i32> undef, undef
716716
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = mul <8 x i32> undef, undef
717717
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = mul <16 x i32> undef, undef
718-
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
718+
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, undef
719719
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = mul <8 x i16> undef, undef
720720
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = mul <16 x i16> undef, undef
721721
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef
@@ -737,7 +737,7 @@ define i32 @mul(i32 %arg) {
737737
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = mul <4 x i32> undef, undef
738738
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = mul <8 x i32> undef, undef
739739
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = mul <16 x i32> undef, undef
740-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
740+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, undef
741741
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = mul <8 x i16> undef, undef
742742
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = mul <16 x i16> undef, undef
743743
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = mul <32 x i16> undef, undef
@@ -759,7 +759,7 @@ define i32 @mul(i32 %arg) {
759759
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = mul <4 x i32> undef, undef
760760
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = mul <8 x i32> undef, undef
761761
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = mul <16 x i32> undef, undef
762-
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
762+
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, undef
763763
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = mul <8 x i16> undef, undef
764764
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = mul <16 x i16> undef, undef
765765
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef
@@ -781,7 +781,7 @@ define i32 @mul(i32 %arg) {
781781
; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = mul <4 x i32> undef, undef
782782
; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = mul <8 x i32> undef, undef
783783
; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = mul <16 x i32> undef, undef
784-
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
784+
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, undef
785785
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = mul <8 x i16> undef, undef
786786
; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = mul <16 x i16> undef, undef
787787
; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = mul <32 x i16> undef, undef

llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -695,8 +695,8 @@ define i32 @mul(i32 %arg) {
695695
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = mul <16 x i32> undef, undef
696696
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
697697
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
698-
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef
699-
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = mul <32 x i16> undef, undef
698+
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = mul <16 x i16> undef, undef
699+
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = mul <32 x i16> undef, undef
700700
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
701701
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
702702
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
@@ -717,7 +717,7 @@ define i32 @mul(i32 %arg) {
717717
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = mul <16 x i32> undef, undef
718718
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
719719
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
720-
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef
720+
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = mul <16 x i16> undef, undef
721721
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef
722722
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
723723
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
@@ -739,7 +739,7 @@ define i32 @mul(i32 %arg) {
739739
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = mul <16 x i32> undef, undef
740740
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
741741
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
742-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef
742+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = mul <16 x i16> undef, undef
743743
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef
744744
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
745745
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
@@ -761,7 +761,7 @@ define i32 @mul(i32 %arg) {
761761
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = mul <16 x i32> undef, undef
762762
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
763763
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
764-
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef
764+
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = mul <16 x i16> undef, undef
765765
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef
766766
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
767767
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef

0 commit comments

Comments
 (0)