Skip to content

Commit 6fbbe15

Browse files
authored
[VPlan] Introduce VPWidenIntrinsicRecipe to separate from libcall. (#110486)
This patch splits off intrinsic hanlding to a new VPWidenIntrinsicRecipe. VPWidenIntrinsicRecipes only need access to the intrinsic ID to widen and the scalar result type (in case the intrinsic is overloaded on the result type). It does not need access to an underlying IR call instruction or function. This means VPWidenIntrinsicRecipe can be created easily without access to underlying IR.
1 parent 29d6f8a commit 6fbbe15

File tree

12 files changed

+216
-103
lines changed

12 files changed

+216
-103
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4365,7 +4365,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
43654365
[](const auto *R) { return Instruction::Store; })
43664366
.Case<VPWidenLoadRecipe>(
43674367
[](const auto *R) { return Instruction::Load; })
4368-
.Case<VPWidenCallRecipe>(
4368+
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
43694369
[](const auto *R) { return Instruction::Call; })
43704370
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
43714371
VPWidenCastRecipe>(
@@ -4389,12 +4389,18 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
43894389
OS << (Pair.second == Subset.front().second ? "" : ", ") << Pair.second;
43904390
OS << "):";
43914391
if (Opcode == Instruction::Call) {
4392-
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
4393-
Function *CalledFn =
4394-
WidenCall ? WidenCall->getCalledScalarFunction()
4395-
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
4396-
->getLiveInIRValue());
4397-
OS << " call to " << CalledFn->getName();
4392+
StringRef Name = "";
4393+
if (auto *Int = dyn_cast<VPWidenIntrinsicRecipe>(R)) {
4394+
Name = Int->getIntrinsicName();
4395+
} else {
4396+
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
4397+
Function *CalledFn =
4398+
WidenCall ? WidenCall->getCalledScalarFunction()
4399+
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
4400+
->getLiveInIRValue());
4401+
Name = CalledFn->getName();
4402+
}
4403+
OS << " call to " << Name;
43984404
} else
43994405
OS << " " << Instruction::getOpcodeName(Opcode);
44004406
reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, nullptr,
@@ -4445,6 +4451,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
44454451
case VPDef::VPWidenCanonicalIVSC:
44464452
case VPDef::VPWidenCastSC:
44474453
case VPDef::VPWidenGEPSC:
4454+
case VPDef::VPWidenIntrinsicSC:
44484455
case VPDef::VPWidenSC:
44494456
case VPDef::VPWidenSelectSC:
44504457
case VPDef::VPBlendSC:
@@ -8294,7 +8301,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
82948301
return new VPBlendRecipe(Phi, OperandsWithMask);
82958302
}
82968303

8297-
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
8304+
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
82988305
ArrayRef<VPValue *> Operands,
82998306
VFRange &Range) {
83008307
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8314,7 +8321,6 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
83148321
return nullptr;
83158322

83168323
SmallVector<VPValue *, 4> Ops(Operands.take_front(CI->arg_size()));
8317-
Ops.push_back(Operands.back());
83188324

83198325
// Is it beneficial to perform intrinsic call compared to lib call?
83208326
bool ShouldUseVectorIntrinsic =
@@ -8325,8 +8331,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
83258331
},
83268332
Range);
83278333
if (ShouldUseVectorIntrinsic)
8328-
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()), ID,
8329-
CI->getDebugLoc());
8334+
return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(),
8335+
CI->getDebugLoc());
83308336

83318337
Function *Variant = nullptr;
83328338
std::optional<unsigned> MaskPos;
@@ -8378,9 +8384,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
83788384
Ops.insert(Ops.begin() + *MaskPos, Mask);
83798385
}
83808386

8381-
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()),
8382-
Intrinsic::not_intrinsic, CI->getDebugLoc(),
8383-
Variant);
8387+
Ops.push_back(Operands.back());
8388+
return new VPWidenCallRecipe(CI, Variant, Ops, CI->getDebugLoc());
83848389
}
83858390

83868391
return nullptr;
@@ -9253,7 +9258,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
92539258
RecurrenceDescriptor::isFMulAddIntrinsic(CurrentLinkI) &&
92549259
"Expected instruction to be a call to the llvm.fmuladd intrinsic");
92559260
assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) ||
9256-
isa<VPWidenCallRecipe>(CurrentLink)) &&
9261+
isa<VPWidenIntrinsicRecipe>(CurrentLink)) &&
92579262
CurrentLink->getOperand(2) == PreviousLink &&
92589263
"expected a call where the previous link is the added operand");
92599264

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ class VPRecipeBuilder {
9393
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);
9494

9595
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
96-
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
97-
/// decision from \p Range.Start to \p Range.End.
98-
VPWidenCallRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
96+
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
97+
/// decreased to ensure same decision from \p Range.Start to \p Range.End.
98+
VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
9999
VFRange &Range);
100100

101101
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 76 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
886886
case VPRecipeBase::VPWidenCanonicalIVSC:
887887
case VPRecipeBase::VPWidenCastSC:
888888
case VPRecipeBase::VPWidenGEPSC:
889+
case VPRecipeBase::VPWidenIntrinsicSC:
889890
case VPRecipeBase::VPWidenSC:
890891
case VPRecipeBase::VPWidenEVLSC:
891892
case VPRecipeBase::VPWidenSelectSC:
@@ -1613,25 +1614,85 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
16131614
}
16141615
};
16151616

1616-
/// A recipe for widening Call instructions.
1617-
class VPWidenCallRecipe : public VPRecipeWithIRFlags {
1618-
/// ID of the vector intrinsic to call when widening the call. If set the
1619-
/// Intrinsic::not_intrinsic, a library call will be used instead.
1617+
/// A recipe for widening vector intrinsics.
1618+
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
1619+
/// ID of the vector intrinsic to widen.
16201620
Intrinsic::ID VectorIntrinsicID;
1621-
/// If this recipe represents a library call, Variant stores a pointer to
1622-
/// the chosen function. There is a 1:1 mapping between a given VF and the
1623-
/// chosen vectorized variant, so there will be a different vplan for each
1624-
/// VF with a valid variant.
1621+
1622+
/// Scalar return type of the intrinsic.
1623+
Type *ResultTy;
1624+
1625+
/// True if the intrinsic may read from memory.
1626+
bool MayReadFromMemory;
1627+
1628+
/// True if the intrinsic may read write to memory.
1629+
bool MayWriteToMemory;
1630+
1631+
/// True if the intrinsic may have side-effects.
1632+
bool MayHaveSideEffects;
1633+
1634+
public:
1635+
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
1636+
ArrayRef<VPValue *> CallArguments, Type *Ty,
1637+
DebugLoc DL = {})
1638+
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1639+
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1640+
MayReadFromMemory(CI.mayReadFromMemory()),
1641+
MayWriteToMemory(CI.mayWriteToMemory()),
1642+
MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1643+
1644+
~VPWidenIntrinsicRecipe() override = default;
1645+
1646+
VPWidenIntrinsicRecipe *clone() override {
1647+
return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
1648+
VectorIntrinsicID, {op_begin(), op_end()},
1649+
ResultTy, getDebugLoc());
1650+
}
1651+
1652+
VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1653+
1654+
/// Produce a widened version of the vector intrinsic.
1655+
void execute(VPTransformState &State) override;
1656+
1657+
/// Return the cost of this vector intrinsic.
1658+
InstructionCost computeCost(ElementCount VF,
1659+
VPCostContext &Ctx) const override;
1660+
1661+
/// Return the scalar return type of the intrinsic.
1662+
Type *getResultType() const { return ResultTy; }
1663+
1664+
/// Return to name of the intrinsic as string.
1665+
StringRef getIntrinsicName() const;
1666+
1667+
/// Returns true if the intrinsic may read from memory.
1668+
bool mayReadFromMemory() const { return MayReadFromMemory; }
1669+
1670+
/// Returns true if the intrinsic may write to memory.
1671+
bool mayWriteToMemory() const { return MayWriteToMemory; }
1672+
1673+
/// Returns true if the intrinsic may have side-effects.
1674+
bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1675+
1676+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1677+
/// Print the recipe.
1678+
void print(raw_ostream &O, const Twine &Indent,
1679+
VPSlotTracker &SlotTracker) const override;
1680+
#endif
1681+
};
1682+
1683+
/// A recipe for widening Call instructions using library calls.
1684+
class VPWidenCallRecipe : public VPRecipeWithIRFlags {
1685+
/// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1686+
/// between a given VF and the chosen vectorized variant, so there will be a
1687+
/// different VPlan for each VF with a valid variant.
16251688
Function *Variant;
16261689

16271690
public:
1628-
template <typename IterT>
1629-
VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments,
1630-
Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1631-
Function *Variant = nullptr)
1691+
VPWidenCallRecipe(Value *UV, Function *Variant,
1692+
ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
16321693
: VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
16331694
*cast<Instruction>(UV)),
1634-
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
1695+
Variant(Variant) {
16351696
assert(
16361697
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
16371698
"last operand must be the called function");
@@ -1640,8 +1701,8 @@ class VPWidenCallRecipe : public VPRecipeWithIRFlags {
16401701
~VPWidenCallRecipe() override = default;
16411702

16421703
VPWidenCallRecipe *clone() override {
1643-
return new VPWidenCallRecipe(getUnderlyingValue(), operands(),
1644-
VectorIntrinsicID, getDebugLoc(), Variant);
1704+
return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1705+
{op_begin(), op_end()}, getDebugLoc());
16451706
}
16461707

16471708
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,9 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
268268
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
269269
VPWidenSelectRecipe>(
270270
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
271+
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
272+
return R->getResultType();
273+
})
271274
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
272275
// TODO: Use info from interleave group.
273276
return V->getUnderlyingValue()->getType();

0 commit comments

Comments
 (0)