@@ -895,6 +895,13 @@ class InstructionsState {
895
895
is_contained(AddSub, getAltOpcode());
896
896
}
897
897
898
+ /// Checks if main/alt instructions are cmp operations.
899
+ bool isCmpOp() const {
900
+ return (getOpcode() == Instruction::ICmp ||
901
+ getOpcode() == Instruction::FCmp) &&
902
+ getAltOpcode() == getOpcode();
903
+ }
904
+
898
905
/// Checks if the current state is valid, i.e. has non-null MainOp
899
906
bool valid() const { return MainOp && AltOp; }
900
907
@@ -9277,22 +9284,23 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
9277
9284
// as alternate ops.
9278
9285
if (NumParts >= VL.size())
9279
9286
return false;
9287
+ constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
9288
+ InstructionCost InsertCost = ::getShuffleCost(
9289
+ *TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
9290
+ FixedVectorType *SubVecTy =
9291
+ getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
9292
+ InstructionCost NewShuffleCost =
9293
+ ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
9294
+ if (!LocalState.isCmpOp() && NumParts <= 1 &&
9295
+ (Mask.empty() || InsertCost >= NewShuffleCost))
9296
+ return false;
9280
9297
if ((LocalState.getMainOp()->isBinaryOp() &&
9281
9298
LocalState.getAltOp()->isBinaryOp() &&
9282
9299
(LocalState.isShiftOp() || LocalState.isBitwiseLogicOp() ||
9283
9300
LocalState.isAddSubLikeOp() || LocalState.isMulDivLikeOp())) ||
9284
9301
(LocalState.getMainOp()->isCast() && LocalState.getAltOp()->isCast()) ||
9285
9302
(LocalState.getMainOp()->isUnaryOp() &&
9286
9303
LocalState.getAltOp()->isUnaryOp())) {
9287
- constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
9288
- InstructionCost InsertCost = ::getShuffleCost(
9289
- *TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
9290
- FixedVectorType *SubVecTy =
9291
- getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
9292
- InstructionCost NewShuffleCost =
9293
- ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
9294
- if (NumParts <= 1 && (Mask.empty() || InsertCost >= NewShuffleCost))
9295
- return false;
9296
9304
InstructionCost OriginalVecOpsCost =
9297
9305
TTI->getArithmeticInstrCost(Opcode0, VecTy, Kind) +
9298
9306
TTI->getArithmeticInstrCost(Opcode1, VecTy, Kind);
@@ -9429,18 +9437,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9429
9437
if (!canBuildSplitNode(VL, LocalState, Op1, Op2, ReorderIndices))
9430
9438
return false;
9431
9439
9432
- // Any value is used in split node already - just gather.
9433
- if (any_of(VL, [&](Value *V) {
9434
- return ScalarsInSplitNodes.contains(V) || isVectorized(V);
9435
- })) {
9436
- if (TryToFindDuplicates(S)) {
9437
- auto Invalid = ScheduleBundle::invalid();
9438
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9439
- ReuseShuffleIndices);
9440
- }
9441
- return true;
9442
- }
9443
-
9444
9440
SmallVector<Value *> NewVL(VL.size());
9445
9441
copy(Op1, NewVL.begin());
9446
9442
copy(Op2, std::next(NewVL.begin(), Op1.size()));
@@ -9616,9 +9612,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9616
9612
::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy, {}, Kind) +
9617
9613
::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Extracted,
9618
9614
/*Insert=*/false, /*Extract=*/true, Kind);
9619
- InstructionCost ScalarizeCostEstimate =
9620
- ::getScalarizationOverhead( *TTI, ScalarTy, VecTy, Vectorized,
9621
- /*Insert=*/true, /*Extract=*/false, Kind);
9615
+ InstructionCost ScalarizeCostEstimate = ::getScalarizationOverhead(
9616
+ *TTI, ScalarTy, VecTy, Vectorized,
9617
+ /*Insert=*/true, /*Extract=*/false, Kind, /*ForPoisonSrc=*/false );
9622
9618
PreferScalarize = VectorizeCostEstimate > ScalarizeCostEstimate;
9623
9619
}
9624
9620
if (PreferScalarize) {
0 commit comments