Skip to content

Commit 80f3bb4

Browse files
author
Chen Zheng
authored
[PowerPC] adjust cost for vector insert/extract with non const index (llvm#79092)
P9 has vxform `Vector Extract Element Instructions` like `vextuwrx` and P10 has vxform `Vector Insert Element instructions` like `vinsd`. Update the instruction cost reflecting these instructions. Fixes llvm#50249
1 parent bb91b43 commit 80f3bb4

File tree

2 files changed

+55
-43
lines changed

2 files changed

+55
-43
lines changed

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -697,39 +697,51 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
697697

698698
return Cost;
699699

700-
} else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
700+
} else if (Val->getScalarType()->isIntegerTy()) {
701701
unsigned EltSize = Val->getScalarSizeInBits();
702702
// Computing on 1 bit values requires extra mask or compare operations.
703-
unsigned MaskCost = VecMaskCost && EltSize == 1 ? 1 : 0;
703+
unsigned MaskCostForOneBitSize = (VecMaskCost && EltSize == 1) ? 1 : 0;
704+
// Computing on non const index requires extra mask or compare operations.
705+
unsigned MaskCostForIdx = (Index != -1U) ? 0 : 1;
704706
if (ST->hasP9Altivec()) {
705-
if (ISD == ISD::INSERT_VECTOR_ELT)
706-
// A move-to VSR and a permute/insert. Assume vector operation cost
707-
// for both (cost will be 2x on P9).
708-
return 2 * CostFactor;
709-
710-
// It's an extract. Maybe we can do a cheap move-from VSR.
711-
unsigned EltSize = Val->getScalarSizeInBits();
712-
if (EltSize == 64) {
713-
unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
714-
if (Index == MfvsrdIndex)
715-
return 1;
716-
} else if (EltSize == 32) {
717-
unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
718-
if (Index == MfvsrwzIndex)
719-
return 1;
720-
}
721-
722-
// We need a vector extract (or mfvsrld). Assume vector operation cost.
723-
// The cost of the load constant for a vector extract is disregarded
724-
// (invariant, easily schedulable).
725-
return CostFactor + MaskCost;
707+
// P10 has vxform insert which can handle non const index. The
708+
// MaskCostForIdx is for masking the index.
709+
// P9 has insert for const index. A move-to VSR and a permute/insert.
710+
// Assume vector operation cost for both (cost will be 2x on P9).
711+
if (ISD == ISD::INSERT_VECTOR_ELT) {
712+
if (ST->hasP10Vector())
713+
return CostFactor + MaskCostForIdx;
714+
else if (Index != -1U)
715+
return 2 * CostFactor;
716+
} else if (ISD == ISD::EXTRACT_VECTOR_ELT) {
717+
// It's an extract. Maybe we can do a cheap move-from VSR.
718+
unsigned EltSize = Val->getScalarSizeInBits();
719+
if (EltSize == 64) {
720+
// FIXME: no need to worry about endian, P9 has both mfvsrd/mfvsrld.
721+
unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
722+
if (Index == MfvsrdIndex)
723+
return 1;
724+
} else if (EltSize == 32) {
725+
unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
726+
if (Index == MfvsrwzIndex)
727+
return 1;
728+
729+
// For other indexs like non const, P9 has vxform extract. The
730+
// MaskCostForIdx is for masking the index.
731+
return CostFactor + MaskCostForIdx;
732+
}
726733

727-
} else if (ST->hasDirectMove()) {
734+
// We need a vector extract (or mfvsrld). Assume vector operation cost.
735+
// The cost of the load constant for a vector extract is disregarded
736+
// (invariant, easily schedulable).
737+
return CostFactor + MaskCostForOneBitSize + MaskCostForIdx;
738+
}
739+
} else if (ST->hasDirectMove() && Index != -1U) {
728740
// Assume permute has standard cost.
729741
// Assume move-to/move-from VSR have 2x standard cost.
730742
if (ISD == ISD::INSERT_VECTOR_ELT)
731743
return 3;
732-
return 3 + MaskCost;
744+
return 3 + MaskCostForOneBitSize;
733745
}
734746
}
735747

llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define i32 @insert(i32 %arg) {
2727
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2828
;
2929
; CHECK-P10-LABEL: 'insert'
30-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0
30+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0
3131
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
3232
;
3333
%x = insertelement <4 x i32> undef, i32 %arg, i32 0
@@ -109,7 +109,7 @@ define void @test4xi32(<4 x i32> %v1, i32 %x1) {
109109
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
110110
;
111111
; CHECK-P10-LABEL: 'test4xi32'
112-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
112+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
113113
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
114114
;
115115
%v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
@@ -239,7 +239,7 @@ define <2 x i64> @insert_i64_x(<2 x i64> %dest, i64 %arg, i32 %idx) {
239239
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %x
240240
;
241241
; CHECK-P10-LABEL: 'insert_i64_x'
242-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
242+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
243243
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %x
244244
;
245245
%x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
@@ -264,7 +264,7 @@ define <4 x i32> @insert_i32_x(<4 x i32> %dest, i32 %arg, i32 %idx) {
264264
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x
265265
;
266266
; CHECK-P10-LABEL: 'insert_i32_x'
267-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
267+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
268268
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x
269269
;
270270
%x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
@@ -289,7 +289,7 @@ define <8 x i16> @insert_i16_x(<8 x i16> %dest, i16 %arg, i32 %idx) {
289289
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %x
290290
;
291291
; CHECK-P10-LABEL: 'insert_i16_x'
292-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
292+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
293293
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %x
294294
;
295295
%x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
@@ -314,7 +314,7 @@ define <16 x i8> @insert_i8_x(<16 x i8> %dest, i8 %arg, i32 %idx) {
314314
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %x
315315
;
316316
; CHECK-P10-LABEL: 'insert_i8_x'
317-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
317+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
318318
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %x
319319
;
320320
%x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
@@ -331,15 +331,15 @@ define i64 @extract_i64_x(<2 x i64> %arg, i32 %idx) {
331331
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
332332
;
333333
; CHECK-P9BE-LABEL: 'extract_i64_x'
334-
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
334+
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
335335
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
336336
;
337337
; CHECK-P9LE-LABEL: 'extract_i64_x'
338-
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
338+
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
339339
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
340340
;
341341
; CHECK-P10-LABEL: 'extract_i64_x'
342-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
342+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
343343
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
344344
;
345345
%x = extractelement <2 x i64> %arg, i32 %idx
@@ -356,15 +356,15 @@ define i32 @extract_i32_x(<4 x i32> %arg, i32 %idx) {
356356
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
357357
;
358358
; CHECK-P9BE-LABEL: 'extract_i32_x'
359-
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
359+
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
360360
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
361361
;
362362
; CHECK-P9LE-LABEL: 'extract_i32_x'
363-
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
363+
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
364364
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
365365
;
366366
; CHECK-P10-LABEL: 'extract_i32_x'
367-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
367+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
368368
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
369369
;
370370
%x = extractelement <4 x i32> %arg, i32 %idx
@@ -381,15 +381,15 @@ define i16 @extract_i16_x(<8 x i16> %arg, i32 %idx) {
381381
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
382382
;
383383
; CHECK-P9BE-LABEL: 'extract_i16_x'
384-
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
384+
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
385385
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
386386
;
387387
; CHECK-P9LE-LABEL: 'extract_i16_x'
388-
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
388+
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
389389
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
390390
;
391391
; CHECK-P10-LABEL: 'extract_i16_x'
392-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
392+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
393393
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
394394
;
395395
%x = extractelement <8 x i16> %arg, i32 %idx
@@ -406,15 +406,15 @@ define i8 @extract_i8_x(<16 x i8> %arg, i32 %idx) {
406406
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
407407
;
408408
; CHECK-P9BE-LABEL: 'extract_i8_x'
409-
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
409+
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
410410
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
411411
;
412412
; CHECK-P9LE-LABEL: 'extract_i8_x'
413-
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
413+
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
414414
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
415415
;
416416
; CHECK-P10-LABEL: 'extract_i8_x'
417-
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
417+
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
418418
; CHECK-P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
419419
;
420420
%x = extractelement <16 x i8> %arg, i32 %idx

0 commit comments

Comments
 (0)