Skip to content

Commit c0645f1

Browse files
fhahnvar-const
authored andcommitted
[VPlan] Introduce VPInstructionWithType, use instead of VPScalarCast(NFC) (llvm#129706)
There are some opcodes that currently require specialized recipes, due to their result type not being implied by their operands, including casts. This leads to duplication from defining multiple full recipes. This patch introduces a new VPInstructionWithType subclass that also stores the result type. The general idea is to have opcodes needing to specify a result type to use this general recipe. The current patch replaces VPScalarCastRecipe with VInstructionWithType, a similar patch for VPWidenCastRecipe will follow soon. There are a few proposed opcodes that should also benefit, without the need of workarounds: * llvm#129508 * llvm#119284 PR: llvm#129706
1 parent 2e9d953 commit c0645f1

16 files changed

+132
-127
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,10 @@ class VPBuilder {
249249
new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
250250
}
251251

252-
VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
253-
Type *ResultTy, DebugLoc DL) {
252+
VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
253+
Type *ResultTy, DebugLoc DL) {
254254
return tryInsertInstruction(
255-
new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
255+
new VPInstructionWithType(Opcode, Op, ResultTy, DL));
256256
}
257257

258258
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4462,7 +4462,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
44624462
switch (R.getVPDefID()) {
44634463
case VPDef::VPDerivedIVSC:
44644464
case VPDef::VPScalarIVStepsSC:
4465-
case VPDef::VPScalarCastSC:
44664465
case VPDef::VPReplicateSC:
44674466
case VPDef::VPInstructionSC:
44684467
case VPDef::VPCanonicalIVPHISC:
@@ -10679,8 +10678,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1067910678
assert(all_of(IV->users(),
1068010679
[](const VPUser *U) {
1068110680
return isa<VPScalarIVStepsRecipe>(U) ||
10682-
isa<VPScalarCastRecipe>(U) ||
1068310681
isa<VPDerivedIVRecipe>(U) ||
10682+
cast<VPRecipeBase>(U)->isScalarCast() ||
1068410683
cast<VPInstruction>(U)->getOpcode() ==
1068510684
Instruction::Add;
1068610685
}) &&

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 53 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,9 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
457457
/// Returns the debug location of the recipe.
458458
DebugLoc getDebugLoc() const { return DL; }
459459

460+
/// Return true if the recipe is a scalar cast.
461+
bool isScalarCast() const;
462+
460463
protected:
461464
/// Compute the cost of this recipe either using a recipe's specialized
462465
/// implementation or using the legacy cost model and the underlying
@@ -531,7 +534,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
531534
case VPRecipeBase::VPWidenIntOrFpInductionSC:
532535
case VPRecipeBase::VPWidenPointerInductionSC:
533536
case VPRecipeBase::VPReductionPHISC:
534-
case VPRecipeBase::VPScalarCastSC:
535537
case VPRecipeBase::VPPartialReductionSC:
536538
return true;
537539
case VPRecipeBase::VPBranchOnMaskSC:
@@ -1025,6 +1027,56 @@ class VPInstruction : public VPRecipeWithIRFlags,
10251027
StringRef getName() const { return Name; }
10261028
};
10271029

1030+
/// A specialization of VPInstruction augmenting it with a dedicated result
1031+
/// type, to be used when the opcode and operands of the VPInstruction don't
1032+
/// directly determine the result type. Note that there is no separate VPDef ID
1033+
/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1034+
/// distinguished purely by the opcode.
1035+
class VPInstructionWithType : public VPInstruction {
1036+
/// Scalar result type produced by the recipe.
1037+
Type *ResultTy;
1038+
1039+
public:
1040+
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
1041+
Type *ResultTy, DebugLoc DL, const Twine &Name = "")
1042+
: VPInstruction(Opcode, Operands, DL, Name), ResultTy(ResultTy) {}
1043+
1044+
static inline bool classof(const VPRecipeBase *R) {
1045+
// VPInstructionWithType are VPInstructions with specific opcodes requiring
1046+
// type information.
1047+
return R->isScalarCast();
1048+
}
1049+
1050+
static inline bool classof(const VPUser *R) {
1051+
return isa<VPInstructionWithType>(cast<VPRecipeBase>(R));
1052+
}
1053+
1054+
VPInstruction *clone() override {
1055+
SmallVector<VPValue *, 2> Operands(operands());
1056+
auto *New = new VPInstructionWithType(
1057+
getOpcode(), Operands, getResultType(), getDebugLoc(), getName());
1058+
New->setUnderlyingValue(getUnderlyingValue());
1059+
return New;
1060+
}
1061+
1062+
void execute(VPTransformState &State) override;
1063+
1064+
/// Return the cost of this VPInstruction.
1065+
InstructionCost computeCost(ElementCount VF,
1066+
VPCostContext &Ctx) const override {
1067+
// TODO: Compute accurate cost after retiring the legacy cost model.
1068+
return 0;
1069+
}
1070+
1071+
Type *getResultType() const { return ResultTy; }
1072+
1073+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1074+
/// Print the recipe.
1075+
void print(raw_ostream &O, const Twine &Indent,
1076+
VPSlotTracker &SlotTracker) const override;
1077+
#endif
1078+
};
1079+
10281080
/// A recipe to wrap on original IR instruction not to be modified during
10291081
/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
10301082
/// Expect PHIs, VPIRInstructions cannot have any operands.
@@ -1211,54 +1263,6 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
12111263
Type *getResultType() const { return ResultTy; }
12121264
};
12131265

1214-
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1215-
class VPScalarCastRecipe : public VPSingleDefRecipe {
1216-
Instruction::CastOps Opcode;
1217-
1218-
Type *ResultTy;
1219-
1220-
Value *generate(VPTransformState &State);
1221-
1222-
public:
1223-
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1224-
DebugLoc DL)
1225-
: VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
1226-
ResultTy(ResultTy) {}
1227-
1228-
~VPScalarCastRecipe() override = default;
1229-
1230-
VPScalarCastRecipe *clone() override {
1231-
return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
1232-
getDebugLoc());
1233-
}
1234-
1235-
VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1236-
1237-
void execute(VPTransformState &State) override;
1238-
1239-
/// Return the cost of this VPScalarCastRecipe.
1240-
InstructionCost computeCost(ElementCount VF,
1241-
VPCostContext &Ctx) const override {
1242-
// TODO: Compute accurate cost after retiring the legacy cost model.
1243-
return 0;
1244-
}
1245-
1246-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1247-
void print(raw_ostream &O, const Twine &Indent,
1248-
VPSlotTracker &SlotTracker) const override;
1249-
#endif
1250-
1251-
/// Returns the result type of the cast.
1252-
Type *getResultType() const { return ResultTy; }
1253-
1254-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1255-
// At the moment, only uniform codegen is implemented.
1256-
assert(is_contained(operands(), Op) &&
1257-
"Op must be an operand of the recipe");
1258-
return true;
1259-
}
1260-
};
1261-
12621266
/// A recipe for widening vector intrinsics.
12631267
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
12641268
/// ID of the vector intrinsic to widen.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -261,20 +261,18 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
261261
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
262262
return inferScalarType(R->getOperand(0));
263263
})
264+
// VPInstructionWithType must be handled before VPInstruction.
265+
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe>(
266+
[](const auto *R) { return R->getResultType(); })
264267
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
265268
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
266269
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
267-
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
268-
return R->getResultType();
269-
})
270270
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
271271
// TODO: Use info from interleave group.
272272
return V->getUnderlyingValue()->getType();
273273
})
274274
.Case<VPWidenCastRecipe>(
275275
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
276-
.Case<VPScalarCastRecipe>(
277-
[](const VPScalarCastRecipe *R) { return R->getResultType(); })
278276
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
279277
return R->getSCEV()->getType();
280278
})

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
142142
switch (getVPDefID()) {
143143
case VPDerivedIVSC:
144144
case VPPredInstPHISC:
145-
case VPScalarCastSC:
146145
case VPVectorEndPointerSC:
147146
return false;
148147
case VPInstructionSC:
@@ -278,6 +277,11 @@ bool VPRecipeBase::isPhi() const {
278277
cast<VPInstruction>(this)->getOpcode() == Instruction::PHI);
279278
}
280279

280+
bool VPRecipeBase::isScalarCast() const {
281+
auto *VPI = dyn_cast<VPInstruction>(this);
282+
return VPI && Instruction::isCast(VPI->getOpcode());
283+
}
284+
281285
InstructionCost
282286
VPPartialReductionRecipe::computeCost(ElementCount VF,
283287
VPCostContext &Ctx) const {
@@ -417,7 +421,7 @@ bool VPInstruction::doesGeneratePerAllLanes() const {
417421
}
418422

419423
bool VPInstruction::canGenerateScalarForFirstLane() const {
420-
if (Instruction::isBinaryOp(getOpcode()))
424+
if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
421425
return true;
422426
if (isSingleScalar() || isVectorToScalar())
423427
return true;
@@ -908,7 +912,7 @@ void VPInstruction::execute(VPTransformState &State) {
908912
}
909913

910914
bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
911-
if (Instruction::isBinaryOp(getOpcode()))
915+
if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
912916
return false;
913917
switch (getOpcode()) {
914918
case Instruction::ExtractElement:
@@ -932,7 +936,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
932936

933937
bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
934938
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
935-
if (Instruction::isBinaryOp(getOpcode()))
939+
if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
936940
return vputils::onlyFirstLaneUsed(this);
937941

938942
switch (getOpcode()) {
@@ -1070,6 +1074,35 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
10701074
}
10711075
#endif
10721076

1077+
void VPInstructionWithType::execute(VPTransformState &State) {
1078+
State.setDebugLocFrom(getDebugLoc());
1079+
assert(vputils::onlyFirstLaneUsed(this) &&
1080+
"Codegen only implemented for first lane.");
1081+
switch (getOpcode()) {
1082+
case Instruction::ZExt:
1083+
case Instruction::Trunc: {
1084+
Value *Op = State.get(getOperand(0), VPLane(0));
1085+
Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()),
1086+
Op, ResultTy);
1087+
State.set(this, Cast, VPLane(0));
1088+
break;
1089+
}
1090+
default:
1091+
llvm_unreachable("opcode not implemented yet");
1092+
}
1093+
}
1094+
1095+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1096+
void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
1097+
VPSlotTracker &SlotTracker) const {
1098+
O << Indent << "EMIT ";
1099+
printAsOperand(O, SlotTracker);
1100+
O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
1101+
printOperands(O, SlotTracker);
1102+
O << " to " << *ResultTy;
1103+
}
1104+
#endif
1105+
10731106
VPIRInstruction *VPIRInstruction ::create(Instruction &I) {
10741107
if (auto *Phi = dyn_cast<PHINode>(&I))
10751108
return new VPIRPhi(*Phi);
@@ -2551,37 +2584,6 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
25512584
}
25522585
#endif
25532586

2554-
Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2555-
assert(vputils::onlyFirstLaneUsed(this) &&
2556-
"Codegen only implemented for first lane.");
2557-
switch (Opcode) {
2558-
case Instruction::SExt:
2559-
case Instruction::ZExt:
2560-
case Instruction::Trunc: {
2561-
// Note: SExt/ZExt not used yet.
2562-
Value *Op = State.get(getOperand(0), VPLane(0));
2563-
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2564-
}
2565-
default:
2566-
llvm_unreachable("opcode not implemented yet");
2567-
}
2568-
}
2569-
2570-
void VPScalarCastRecipe ::execute(VPTransformState &State) {
2571-
State.set(this, generate(State), VPLane(0));
2572-
}
2573-
2574-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2575-
void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2576-
VPSlotTracker &SlotTracker) const {
2577-
O << Indent << "SCALAR-CAST ";
2578-
printAsOperand(O, SlotTracker);
2579-
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2580-
printOperands(O, SlotTracker);
2581-
O << " to " << *ResultTy;
2582-
}
2583-
#endif
2584-
25852587
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
25862588
assert(State.Lane && "Branch on Mask works only on single instance.");
25872589

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,11 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
111111
(isa<LoadInst, StoreInst>(R->getUnderlyingValue())) &&
112112
all_of(R->operands(), isUniformAcrossVFsAndUFs);
113113
})
114-
.Case<VPScalarCastRecipe, VPWidenCastRecipe>([](const auto *R) {
114+
.Case<VPInstruction>([](const auto *VPI) {
115+
return VPI->isScalarCast() &&
116+
isUniformAcrossVFsAndUFs(VPI->getOperand(0));
117+
})
118+
.Case<VPWidenCastRecipe>([](const auto *R) {
115119
// A cast is uniform according to its operand.
116120
return isUniformAcrossVFsAndUFs(R->getOperand(0));
117121
})

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) {
4545
return true;
4646
if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV))
4747
return Rep->isUniform();
48-
if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPScalarCastRecipe,
49-
VPBlendRecipe>(VPV))
48+
if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(VPV))
5049
return all_of(VPV->getDefiningRecipe()->operands(),
5150
isUniformAfterVectorization);
5251
if (auto *VPI = dyn_cast<VPInstruction>(VPV))

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,6 @@ class VPDef {
335335
VPReductionSC,
336336
VPPartialReductionSC,
337337
VPReplicateSC,
338-
VPScalarCastSC,
339338
VPScalarIVStepsSC,
340339
VPVectorPointerSC,
341340
VPVectorEndPointerSC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
147147
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
148148
.Case<VPWidenLoadEVLRecipe, VPVectorEndPointerRecipe>(
149149
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
150-
.Case<VPScalarCastRecipe>(
151-
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
150+
.Case<VPInstructionWithType>(
151+
[&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); })
152152
.Case<VPInstruction>([&](const VPInstruction *I) {
153153
if (I->getOpcode() == Instruction::PHI)
154154
return VerifyEVLUse(*I, 1);

0 commit comments

Comments
 (0)