Skip to content

Commit 7d82c99

Browse files
committed
[RISCV][TTI] Account for constant materialization cost when costing arithmetic operations
At the IR level, we generally assume that constants are free to materialize. However, for RISCV due to some quirks of the ISA, materializing arbitrary constants can be rather expensive. We frequently fallback to constant pool loads. We've been slowly moving in the direction of modeling the cost of the remat as part of the instruction cost. This has the effect of disincentivizing vectorization - mostly SLP - when we'd have to materialize an expensive constant. We need better modeling of which constants are expensive and not, but the moment let's be consistent with how we model arithmetic and memory instructions. The difference between the two is that arithmetic can sometimes fold a splat operation which stores can not. Differential Revision: https://reviews.llvm.org/D138941
1 parent 3558da3 commit 7d82c99

File tree

4 files changed

+82
-48
lines changed

4 files changed

+82
-48
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,11 +1283,8 @@ bool RISCVTargetLowering::
12831283
return !XC;
12841284
}
12851285

1286-
bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1287-
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1288-
return false;
1289-
1290-
switch (I->getOpcode()) {
1286+
bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1287+
switch (Opcode) {
12911288
case Instruction::Add:
12921289
case Instruction::Sub:
12931290
case Instruction::Mul:
@@ -1309,38 +1306,48 @@ bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
13091306
case Instruction::URem:
13101307
case Instruction::SRem:
13111308
return Operand == 1;
1312-
case Instruction::Call:
1313-
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1314-
switch (II->getIntrinsicID()) {
1315-
case Intrinsic::fma:
1316-
case Intrinsic::vp_fma:
1317-
return Operand == 0 || Operand == 1;
1318-
case Intrinsic::vp_shl:
1319-
case Intrinsic::vp_lshr:
1320-
case Intrinsic::vp_ashr:
1321-
case Intrinsic::vp_udiv:
1322-
case Intrinsic::vp_sdiv:
1323-
case Intrinsic::vp_urem:
1324-
case Intrinsic::vp_srem:
1325-
return Operand == 1;
1326-
// These intrinsics are commutative.
1327-
case Intrinsic::vp_add:
1328-
case Intrinsic::vp_mul:
1329-
case Intrinsic::vp_and:
1330-
case Intrinsic::vp_or:
1331-
case Intrinsic::vp_xor:
1332-
case Intrinsic::vp_fadd:
1333-
case Intrinsic::vp_fmul:
1334-
// These intrinsics have 'vr' versions.
1335-
case Intrinsic::vp_sub:
1336-
case Intrinsic::vp_fsub:
1337-
case Intrinsic::vp_fdiv:
1338-
return Operand == 0 || Operand == 1;
1339-
default:
1340-
return false;
1341-
}
1342-
}
1309+
default:
13431310
return false;
1311+
}
1312+
}
1313+
1314+
1315+
bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1316+
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1317+
return false;
1318+
1319+
if (canSplatOperand(I->getOpcode(), Operand))
1320+
return true;
1321+
1322+
auto *II = dyn_cast<IntrinsicInst>(I);
1323+
if (!II)
1324+
return false;
1325+
1326+
switch (II->getIntrinsicID()) {
1327+
case Intrinsic::fma:
1328+
case Intrinsic::vp_fma:
1329+
return Operand == 0 || Operand == 1;
1330+
case Intrinsic::vp_shl:
1331+
case Intrinsic::vp_lshr:
1332+
case Intrinsic::vp_ashr:
1333+
case Intrinsic::vp_udiv:
1334+
case Intrinsic::vp_sdiv:
1335+
case Intrinsic::vp_urem:
1336+
case Intrinsic::vp_srem:
1337+
return Operand == 1;
1338+
// These intrinsics are commutative.
1339+
case Intrinsic::vp_add:
1340+
case Intrinsic::vp_mul:
1341+
case Intrinsic::vp_and:
1342+
case Intrinsic::vp_or:
1343+
case Intrinsic::vp_xor:
1344+
case Intrinsic::vp_fadd:
1345+
case Intrinsic::vp_fmul:
1346+
// These intrinsics have 'vr' versions.
1347+
case Intrinsic::vp_sub:
1348+
case Intrinsic::vp_fsub:
1349+
case Intrinsic::vp_fdiv:
1350+
return Operand == 0 || Operand == 1;
13441351
default:
13451352
return false;
13461353
}

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,9 @@ class RISCVTargetLowering : public TargetLowering {
365365
/// Return true if the (vector) instruction I will be lowered to an instruction
366366
/// with a scalar splat operand for the given Operand number.
367367
bool canSplatOperand(Instruction *I, int Operand) const;
368+
/// Return true if a vector instruction will lower to a target instruction
369+
/// able to splat the given operand.
370+
bool canSplatOperand(unsigned Opcode, int Operand) const;
368371
bool shouldSinkOperands(Instruction *I,
369372
SmallVectorImpl<Use *> &Ops) const override;
370373
bool shouldScalarizeBinop(SDValue VecOp) const override;

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,31 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
10791079
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
10801080
Args, CxtI);
10811081

1082+
1083+
auto getConstantMatCost =
1084+
[&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {
1085+
if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand))
1086+
// Two sub-cases:
1087+
// * Has a 5 bit immediate operand which can be splatted.
1088+
// * Has a larger immediate which must be materialized in scalar register
1089+
// We return 0 for both as we currently ignore the cost of materializing
1090+
// scalar constants in GPRs.
1091+
return 0;
1092+
1093+
// Add a cost of address generation + the cost of the vector load. The
1094+
// address is expected to be a PC relative offset to a constant pool entry
1095+
// using auipc/addi.
1096+
return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
1097+
/*AddressSpace=*/0, CostKind);
1098+
};
1099+
1100+
// Add the cost of materializing any constant vectors required.
1101+
InstructionCost ConstantMatCost = 0;
1102+
if (Op1Info.isConstant())
1103+
ConstantMatCost += getConstantMatCost(0, Op1Info);
1104+
if (Op2Info.isConstant())
1105+
ConstantMatCost += getConstantMatCost(1, Op2Info);
1106+
10821107
switch (TLI->InstructionOpcodeToISD(Opcode)) {
10831108
case ISD::ADD:
10841109
case ISD::SUB:
@@ -1095,13 +1120,12 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
10951120
case ISD::FSUB:
10961121
case ISD::FMUL:
10971122
case ISD::FNEG: {
1098-
// TODO: Add the cost of materializing any constant vectors required since
1099-
// we otherwise treat constants as no-cost.
11001123
// TODO: We should be accounting for LMUL and scaling costs for LMUL > 1.
1101-
return LT.first * 1;
1124+
return ConstantMatCost + LT.first * 1;
11021125
}
11031126
default:
1104-
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
1127+
return ConstantMatCost +
1128+
BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
11051129
Args, CxtI);
11061130
}
11071131
}

llvm/test/Analysis/CostModel/RISCV/arith-int.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -815,14 +815,14 @@ define void @add_of_constant() {
815815
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, undef
816816
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> <i64 1, i64 1>, undef
817817
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, undef
818-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = add <4 x i32> <i32 1, i32 1, i32 2, i32 1>, undef
819-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = add <4 x i32> <i32 2, i32 1, i32 1, i32 1>, undef
820-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, undef
821-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = add <4 x i32> <i32 1, i32 2, i32 3, i32 4>, undef
822-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = add <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -4>, undef
823-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = add <4 x i32> <i32 2, i32 4, i32 6, i32 8>, undef
824-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = add <4 x i32> <i32 -1, i32 0, i32 2, i32 1>, undef
825-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = add <4 x i32> <i32 256, i32 4096, i32 57, i32 1>, undef
818+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> <i32 1, i32 1, i32 2, i32 1>, undef
819+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> <i32 2, i32 1, i32 1, i32 1>, undef
820+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, undef
821+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = add <4 x i32> <i32 1, i32 2, i32 3, i32 4>, undef
822+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = add <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -4>, undef
823+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = add <4 x i32> <i32 2, i32 4, i32 6, i32 8>, undef
824+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = add <4 x i32> <i32 -1, i32 0, i32 2, i32 1>, undef
825+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = add <4 x i32> <i32 256, i32 4096, i32 57, i32 1>, undef
826826
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
827827
;
828828

0 commit comments

Comments
 (0)