Skip to content

Commit bf78ffe

Browse files
preamesagozillon
authored andcommitted
[TTI] Add costing for vp.strided.load and vp.strided.store (llvm#80360)
The primary motivation of this patch is to add testing infrastructure atop the recently landed 8ad14b6, so that we can separate the costing aspects of strided memory operations from the SLP implementation details. I want to be clear that I am *not* proposing that we use the vp.strided.* forms as our canonical IR representation. I'm merely using them as a testing vehicle to exercise the costing machinery. The canonical IR form remains a masked.gather or masked.scatter. I do want to explore adding a non-vp strided load/store intrinsic, but that's a separate line of work. There is one costing change included in this. As I wrote my test, I discovered that the default implementation was scalarized (if invoked via generic routines such as getInstructionCost), and when adding the call into the strided specific costing discovered that we hadn't modeled the fallback to scalarization properly in the initial patch. After fixing that, there is a minor difference in scalarization cost reported for the unaligned case but I believe that to be uninteresting. For the record, I did confirm that vp.strided.store is lowered to a strided store on RISCV. :)
1 parent 53bef86 commit bf78ffe

File tree

3 files changed

+68
-36
lines changed

3 files changed

+68
-36
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,6 +1375,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
13751375
true, CostKind);
13761376
}
13771377

1378+
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
1379+
const Value *Ptr, bool VariableMask,
1380+
Align Alignment,
1381+
TTI::TargetCostKind CostKind,
1382+
const Instruction *I) {
1383+
// For a target without strided memory operations (or for an illegal
1384+
// operation type on one which does), assume we lower to a gather/scatter
1385+
// operation. (Which may in turn be scalarized.)
1386+
return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1387+
Alignment, CostKind, I);
1388+
}
1389+
13781390
InstructionCost getInterleavedMemoryOpCost(
13791391
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
13801392
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1595,6 +1607,26 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
15951607
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
15961608
VarMask, Alignment, CostKind, I);
15971609
}
1610+
case Intrinsic::experimental_vp_strided_store: {
1611+
const Value *Data = Args[0];
1612+
const Value *Ptr = Args[1];
1613+
const Value *Mask = Args[3];
1614+
const Value *EVL = Args[4];
1615+
bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1616+
Align Alignment = I->getParamAlign(1).valueOrOne();
1617+
return thisT()->getStridedMemoryOpCost(Instruction::Store,
1618+
Data->getType(), Ptr, VarMask,
1619+
Alignment, CostKind, I);
1620+
}
1621+
case Intrinsic::experimental_vp_strided_load: {
1622+
const Value *Ptr = Args[0];
1623+
const Value *Mask = Args[2];
1624+
const Value *EVL = Args[3];
1625+
bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1626+
Align Alignment = I->getParamAlign(0).valueOrOne();
1627+
return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
1628+
VarMask, Alignment, CostKind, I);
1629+
}
15981630
case Intrinsic::experimental_stepvector: {
15991631
if (isa<ScalableVectorType>(RetTy))
16001632
return BaseT::getIntrinsicInstrCost(ICA, CostKind);

llvm/test/Analysis/CostModel/RISCV/gep.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
272272
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42
273273
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
274274
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42
275-
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
275+
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
276276
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = getelementptr i8, ptr %base, i32 42
277277
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store volatile <2 x i8> undef, ptr %7, align 2
278278
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = getelementptr i8, ptr %base, i32 42
@@ -284,7 +284,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
284284
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42
285285
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
286286
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42
287-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
287+
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
288288
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
289289
;
290290
%1 = getelementptr i8, ptr %base, i32 42
@@ -342,7 +342,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
342342
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 0
343343
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
344344
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 0
345-
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
345+
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
346346
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %7 = getelementptr i8, ptr %base, i32 0
347347
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store volatile <2 x i8> undef, ptr %7, align 2
348348
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %8 = getelementptr i8, ptr %base, i32 0
@@ -354,7 +354,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
354354
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 0
355355
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
356356
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 0
357-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
357+
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
358358
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
359359
;
360360
%1 = getelementptr i8, ptr %base, i32 0

0 commit comments

Comments
 (0)