Skip to content

[AArch64] Add getVectorInstrCost Codesize costs handling. #130946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3435,8 +3435,8 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
}

InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
unsigned Opcode, Type *Val, unsigned Index, bool HasRealUse,
const Instruction *I, Value *Scalar,
unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
bool HasRealUse, const Instruction *I, Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
assert(Val->isVectorTy() && "This must be a vector type");

Expand Down Expand Up @@ -3469,12 +3469,16 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
// and its second operand is a load, then we will generate a LD1, which
// are expensive instructions.
if (I && dyn_cast<LoadInst>(I->getOperand(1)))
return ST->getVectorInsertExtractBaseCost() + 1;
return CostKind == TTI::TCK_CodeSize
? 0
: ST->getVectorInsertExtractBaseCost() + 1;

// i1 inserts and extract will include an extra cset or cmp of the vector
// value. Increase the cost by 1 to account.
if (Val->getScalarSizeInBits() == 1)
return ST->getVectorInsertExtractBaseCost() + 1;
return CostKind == TTI::TCK_CodeSize
? 2
: ST->getVectorInsertExtractBaseCost() + 1;

// FIXME:
// If the extract-element and insert-element instructions could be
Expand Down Expand Up @@ -3598,7 +3602,8 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
return 0;

// All other insert/extracts cost this much.
return ST->getVectorInsertExtractBaseCost();
return CostKind == TTI::TCK_CodeSize ? 1
: ST->getVectorInsertExtractBaseCost();
}

InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
Expand All @@ -3607,22 +3612,22 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
Value *Op1) {
bool HasRealUse =
Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
return getVectorInstrCostHelper(Opcode, Val, Index, HasRealUse);
return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, HasRealUse);
}

InstructionCost AArch64TTIImpl::getVectorInstrCost(
unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
return getVectorInstrCostHelper(Opcode, Val, Index, false, nullptr, Scalar,
ScalarUserAndIdx);
return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, false, nullptr,
Scalar, ScalarUserAndIdx);
}

InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index) {
return getVectorInstrCostHelper(I.getOpcode(), Val, Index,
return getVectorInstrCostHelper(I.getOpcode(), Val, CostKind, Index,
true /* HasRealUse */, &I);
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
/// of the extract(nullptr if user is not known before vectorization) and
/// 'Idx' being the extract lane.
InstructionCost getVectorInstrCostHelper(
unsigned Opcode, Type *Val, unsigned Index, bool HasRealUse,
const Instruction *I = nullptr, Value *Scalar = nullptr,
unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
bool HasRealUse, const Instruction *I = nullptr, Value *Scalar = nullptr,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {});

public:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -536,9 +536,9 @@ define void @fsqrt() {
define void @fsqrt_fp16() {
; CHECK-BASE-LABEL: 'fsqrt_fp16'
; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %F16 = call half @llvm.sqrt.f16(half undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of 10 for: %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of 22 for: %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of 44 for: %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-FP16-LABEL: 'fsqrt_fp16'
Expand Down Expand Up @@ -679,9 +679,9 @@ define void @fma() {
define void @fma_fp16() {
; CHECK-BASE-LABEL: 'fma_fp16'
; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of 10 for: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of 22 for: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of 44 for: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-FP16-LABEL: 'fma_fp16'
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Analysis/CostModel/AArch64/cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -972,9 +972,9 @@ define i32 @store_truncs() {

define void @extend_extract() {
; CHECK-LABEL: 'extend_extract'
; CHECK-NEXT: Cost Model: Found costs of 2 for: %e8 = extractelement <8 x i8> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %e16 = extractelement <8 x i16> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %e32 = extractelement <8 x i32> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e8 = extractelement <8 x i8> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e16 = extractelement <8 x i16> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e32 = extractelement <8 x i32> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 1 for: %s8_16 = sext i8 %e8 to i16
; CHECK-NEXT: Cost Model: Found costs of 1 for: %z8_16 = zext i8 %e8 to i16
; CHECK-NEXT: Cost Model: Found costs of 1 for: %s8_32 = sext i8 %e8 to i32
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,38 +11,38 @@ target triple = "aarch64--linux-gnu"

define void @vectorInstrCost() {
; CHECK-LABEL: 'vectorInstrCost'
; CHECK-NEXT: Cost Model: Found costs of 3 for: %ta0 = extractelement <8 x i1> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of 3 for: %ta1 = extractelement <8 x i1> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t1 = extractelement <8 x i8> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t2 = extractelement <8 x i8> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t3 = extractelement <4 x i16> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t4 = extractelement <4 x i16> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t5 = extractelement <2 x i32> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t6 = extractelement <2 x i32> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t7 = extractelement <2 x i64> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t8 = extractelement <2 x i64> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:3 for: %ta0 = extractelement <8 x i1> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:3 for: %ta1 = extractelement <8 x i1> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t1 = extractelement <8 x i8> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t2 = extractelement <8 x i8> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t3 = extractelement <4 x i16> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t4 = extractelement <4 x i16> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t5 = extractelement <2 x i32> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t6 = extractelement <2 x i32> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t7 = extractelement <2 x i64> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t8 = extractelement <2 x i64> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t9 = extractelement <4 x half> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t10 = extractelement <4 x half> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t10 = extractelement <4 x half> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t11 = extractelement <2 x float> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t12 = extractelement <2 x float> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t12 = extractelement <2 x float> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t13 = extractelement <2 x double> undef, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t14 = extractelement <2 x double> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of 3 for: %t31 = insertelement <8 x i1> undef, i1 false, i32 0
; CHECK-NEXT: Cost Model: Found costs of 3 for: %t41 = insertelement <8 x i1> undef, i1 true, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t30 = insertelement <8 x i8> undef, i8 0, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t40 = insertelement <8 x i8> undef, i8 1, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t50 = insertelement <4 x i16> undef, i16 2, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t60 = insertelement <4 x i16> undef, i16 3, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t70 = insertelement <2 x i32> undef, i32 4, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t80 = insertelement <2 x i32> undef, i32 5, i32 1
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t90 = insertelement <2 x i64> undef, i64 6, i32 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t100 = insertelement <2 x i64> undef, i64 7, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t14 = extractelement <2 x double> undef, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:3 for: %t31 = insertelement <8 x i1> undef, i1 false, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:3 for: %t41 = insertelement <8 x i1> undef, i1 true, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t30 = insertelement <8 x i8> undef, i8 0, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t40 = insertelement <8 x i8> undef, i8 1, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t50 = insertelement <4 x i16> undef, i16 2, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t60 = insertelement <4 x i16> undef, i16 3, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t70 = insertelement <2 x i32> undef, i32 4, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t80 = insertelement <2 x i32> undef, i32 5, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t90 = insertelement <2 x i64> undef, i64 6, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t100 = insertelement <2 x i64> undef, i64 7, i32 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t110 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1
; CHECK-NEXT: Cost Model: Found costs of 0 for: %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0
; CHECK-NEXT: Cost Model: Found costs of 2 for: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%ta0 = extractelement <8 x i1> undef, i32 0
Expand Down Expand Up @@ -86,7 +86,7 @@ define void @vectorInstrCost() {
define <8 x i8> @LD1_B(<8 x i8> %vec, ptr noundef %i) {
; CHECK-LABEL: 'LD1_B'
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i8, ptr %i, align 1
; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %v2
;
entry:
Expand All @@ -98,7 +98,7 @@ entry:
define <4 x i16> @LD1_H(<4 x i16> %vec, ptr noundef %i) {
; CHECK-LABEL: 'LD1_H'
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i16, ptr %i, align 2
; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %v2
;
entry:
Expand All @@ -110,7 +110,7 @@ entry:
define <4 x i32> @LD1_W(<4 x i32> %vec, ptr noundef %i) {
; CHECK-LABEL: 'LD1_W'
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i32, ptr %i, align 4
; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %v2
;
entry:
Expand All @@ -122,7 +122,7 @@ entry:
define <2 x i64> @LD1_X(<2 x i64> %vec, ptr noundef %i) {
; CHECK-LABEL: 'LD1_X'
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i64, ptr %i, align 8
; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %v2
;
entry:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ define void @fixed() {
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:22 SizeLat:22 for: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:46 for: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:22 SizeLat:22 for: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:39 Lat:46 SizeLat:46 for: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:184 Lat:184 SizeLat:184 for: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:156 Lat:184 SizeLat:184 for: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
entry:
Expand Down
Loading