Skip to content

Commit 7fc0b30

Browse files
committed
[VPlan] Switch to checking sinking legality for recurrences in VPlan.
Building on D142885 and D142589, retire the SinkAfter map from the recurrence handling code. It is replaced by checking whether it is possible to sink all users of a recurrence directly in VPlan. This results in simpler code overall and allows to handle additional cases (see the improvements in @test_crash). Depends on D142885. Depends on D142589. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D142886
1 parent e1e9726 commit 7fc0b30

File tree

9 files changed

+110
-121
lines changed

9 files changed

+110
-121
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -186,14 +186,11 @@ class RecurrenceDescriptor {
186186
/// previous iteration (e.g. if the value is defined in the previous
187187
/// iteration, we refer to it as first-order recurrence, if it is defined in
188188
/// the iteration before the previous, we refer to it as second-order
189-
/// recurrence and so on). \p SinkAfter includes pairs of instructions where
190-
/// the first will be rescheduled to appear after the second if/when the loop
191-
/// is vectorized. It may be augmented with additional pairs if needed in
192-
/// order to handle Phi as a first-order recurrence.
193-
static bool
194-
isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
195-
MapVector<Instruction *, Instruction *> &SinkAfter,
196-
DominatorTree *DT);
189+
/// recurrence and so on). Note that this function optimistically assumes that
190+
/// uses of the recurrence can be re-ordered if necessary and users need to
191+
/// check and perform the re-ordering.
192+
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
193+
DominatorTree *DT);
197194

198195
RecurKind getRecurrenceKind() const { return Kind; }
199196

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -516,10 +516,6 @@ class LoopVectorizationLegality {
516516
/// Holds the phi nodes that are fixed-order recurrences.
517517
RecurrenceSet FixedOrderRecurrences;
518518

519-
/// Holds instructions that need to sink past other instructions to handle
520-
/// fixed-order recurrences.
521-
MapVector<Instruction *, Instruction *> SinkAfter;
522-
523519
/// Holds the widest induction type encountered.
524520
Type *WidestIndTy = nullptr;
525521

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 3 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -927,9 +927,8 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
927927
return false;
928928
}
929929

930-
bool RecurrenceDescriptor::isFixedOrderRecurrence(
931-
PHINode *Phi, Loop *TheLoop,
932-
MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
930+
bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
931+
DominatorTree *DT) {
933932

934933
// Ensure the phi node is in the loop header and has two incoming values.
935934
if (Phi->getParent() != TheLoop->getHeader() ||
@@ -965,8 +964,7 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
965964
Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
966965
}
967966

968-
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
969-
SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
967+
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
970968
return false;
971969

972970
// Ensure every user of the phi node (recursively) is dominated by the
@@ -975,23 +973,9 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
975973
// loop.
976974
// TODO: Consider extending this sinking to handle memory instructions.
977975

978-
// We optimistically assume we can sink all users after Previous. Keep a set
979-
// of instructions to sink after Previous ordered by dominance in the common
980-
// basic block. It will be applied to SinkAfter if all users can be sunk.
981-
auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
982-
return A->comesBefore(B);
983-
};
984-
std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
985-
CompareByComesBefore);
986-
987976
BasicBlock *PhiBB = Phi->getParent();
988977
SmallVector<Instruction *, 8> WorkList;
989978
auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
990-
// Already sunk SinkCandidate.
991-
if (SinkCandidate->getParent() == PhiBB &&
992-
InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
993-
return true;
994-
995979
// Cyclic dependence.
996980
if (Previous == SinkCandidate)
997981
return false;
@@ -1005,55 +989,12 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
1005989
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1006990
return false;
1007991

1008-
// Avoid sinking an instruction multiple times (if multiple operands are
1009-
// fixed order recurrences) by sinking once - after the latest 'previous'
1010-
// instruction.
1011-
auto It = SinkAfter.find(SinkCandidate);
1012-
if (It != SinkAfter.end()) {
1013-
auto *OtherPrev = It->second;
1014-
// Find the earliest entry in the 'sink-after' chain. The last entry in
1015-
// the chain is the original 'Previous' for a recurrence handled earlier.
1016-
auto EarlierIt = SinkAfter.find(OtherPrev);
1017-
while (EarlierIt != SinkAfter.end()) {
1018-
Instruction *EarlierInst = EarlierIt->second;
1019-
EarlierIt = SinkAfter.find(EarlierInst);
1020-
// Bail out if order has not been preserved.
1021-
if (EarlierIt != SinkAfter.end() &&
1022-
!DT->dominates(EarlierInst, OtherPrev))
1023-
return false;
1024-
OtherPrev = EarlierInst;
1025-
}
1026-
// Bail out if order has not been preserved.
1027-
if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
1028-
return false;
1029-
1030-
// SinkCandidate is already being sunk after an instruction after
1031-
// Previous. Nothing left to do.
1032-
if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
1033-
return true;
1034-
1035-
// If there are other instructions to be sunk after SinkCandidate, remove
1036-
// and re-insert SinkCandidate can break those instructions. Bail out for
1037-
// simplicity.
1038-
if (any_of(SinkAfter,
1039-
[SinkCandidate](const std::pair<Instruction *, Instruction *> &P) {
1040-
return P.second == SinkCandidate;
1041-
}))
1042-
return false;
1043-
1044-
// Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
1045-
// re-sunk to Previous, instead of sinking to OtherPrev. Remove
1046-
// SinkCandidate from SinkAfter to ensure it's insert position is updated.
1047-
SinkAfter.erase(SinkCandidate);
1048-
}
1049-
1050992
// If we reach a PHI node that is not dominated by Previous, we reached a
1051993
// header PHI. No need for sinking.
1052994
if (isa<PHINode>(SinkCandidate))
1053995
return true;
1054996

1055997
// Sink User tentatively and check its users
1056-
InstrsToSink.insert(SinkCandidate);
1057998
WorkList.push_back(SinkCandidate);
1058999
return true;
10591000
};
@@ -1068,11 +1009,6 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
10681009
}
10691010
}
10701011

1071-
// We can sink all users of Phi. Update the mapping.
1072-
for (Instruction *I : InstrsToSink) {
1073-
SinkAfter[I] = Previous;
1074-
Previous = I;
1075-
}
10761012
return true;
10771013
}
10781014

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -743,8 +743,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
743743
continue;
744744
}
745745

746-
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
747-
SinkAfter, DT)) {
746+
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
748747
AllowedExit.insert(Phi);
749748
FixedOrderRecurrences.insert(Phi);
750749
continue;
@@ -917,18 +916,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
917916
}
918917
}
919918

920-
// For fixed order recurrences, we use the previous value (incoming value from
921-
// the latch) to check if it dominates all users of the recurrence. Bail out
922-
// if we have to sink such an instruction for another recurrence, as the
923-
// dominance requirement may not hold after sinking.
924-
BasicBlock *LoopLatch = TheLoop->getLoopLatch();
925-
if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
926-
Instruction *V =
927-
cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
928-
return SinkAfter.contains(V);
929-
}))
930-
return false;
931-
932919
// Now we know the widest induction type, check if our found induction
933920
// is the same size. If it's not, unset it here and InnerLoopVectorizer
934921
// will create another.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9067,7 +9067,8 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
90679067

90689068
// Sink users of fixed-order recurrence past the recipe defining the previous
90699069
// value and introduce FirstOrderRecurrenceSplice VPInstructions.
9070-
VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder);
9070+
if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder))
9071+
return std::nullopt;
90719072

90729073
// Interleave memory: for each Interleave Group we marked earlier as relevant
90739074
// for this VPlan, replace the Recipes widening its memory instructions with a

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -652,9 +652,10 @@ static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
652652
return VPDT.properlyDominates(ParentA, ParentB);
653653
}
654654

655-
// Sink users of \p FOR after the recipe defining the previous value \p Previous
656-
// of the recurrence.
657-
static void
655+
/// Sink users of \p FOR after the recipe defining the previous value \p
656+
/// Previous of the recurrence. \returns true if all users of \p FOR could be
657+
/// re-arranged as needed or false if it is not possible.
658+
static bool
658659
sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
659660
VPRecipeBase *Previous,
660661
VPDominatorTree &VPDT) {
@@ -663,15 +664,18 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
663664
SmallPtrSet<VPRecipeBase *, 8> Seen;
664665
Seen.insert(Previous);
665666
auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
666-
assert(
667-
SinkCandidate != Previous &&
668-
"The previous value cannot depend on the users of the recurrence phi.");
667+
// The previous value must not depend on the users of the recurrence phi. In
668+
// that case, FOR is not a fixed order recurrence.
669+
if (SinkCandidate == Previous)
670+
return false;
671+
669672
if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
670673
!Seen.insert(SinkCandidate).second ||
671674
properlyDominates(Previous, SinkCandidate, VPDT))
672-
return;
675+
return true;
673676

674677
WorkList.push_back(SinkCandidate);
678+
return true;
675679
};
676680

677681
// Recursively sink users of FOR after Previous.
@@ -682,7 +686,8 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
682686
"only recipes with a single defined value expected");
683687
for (VPUser *User : Current->getVPSingleValue()->users()) {
684688
if (auto *R = dyn_cast<VPRecipeBase>(User))
685-
TryToPushSinkCandidate(R);
689+
if (!TryToPushSinkCandidate(R))
690+
return false;
686691
}
687692
}
688693

@@ -699,9 +704,10 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
699704
SinkCandidate->moveAfter(Previous);
700705
Previous = SinkCandidate;
701706
}
707+
return true;
702708
}
703709

704-
void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
710+
bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
705711
VPBuilder &Builder) {
706712
VPDominatorTree VPDT;
707713
VPDT.recalculate(Plan);
@@ -724,7 +730,8 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
724730
Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
725731
}
726732

727-
sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT);
733+
if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
734+
return false;
728735

729736
// Introduce a recipe to combine the incoming and previous values of a
730737
// fixed-order recurrence.
@@ -743,4 +750,5 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
743750
// all users.
744751
RecurSplice->setOperand(0, FOR);
745752
}
753+
return true;
746754
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ struct VPlanTransforms {
7777
/// to combine the value from the recurrence phis and previous values. The
7878
/// current implementation assumes all users can be sunk after the previous
7979
/// value, which is enforced by earlier legality checks.
80-
static void adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
80+
/// \returns true if all users of fixed-order recurrences could be re-arranged
81+
/// as needed or false if it is not possible. In the latter case, \p Plan is
82+
/// not valid.
83+
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
8184

8285
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
8386
/// resulting plan to \p BestVF and \p BestUF.

llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -647,12 +647,40 @@ exit:
647647
ret ptr %for.1
648648
}
649649

650-
; Make sure LLVM doesn't generate wrong data in SinkAfter, and causes crash in
651-
; loop vectorizer.
652-
define double @test_crash(ptr %p, ptr noalias %a, ptr noalias %b) {
653-
; CHECK-LABEL: @test_crash
654-
; CHECK-NOT: vector.body:
655-
; CHECK: ret
650+
; In this test case, %USE_2_FORS uses 2 different fixed-order recurrences and
651+
; it needs to be sunk past the previous value for both recurrences.
652+
define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
653+
; CHECK-LABEL: @test_resinking_required(
654+
; CHECK: vector.body:
655+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
656+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
657+
; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT4:%.*]], %vector.body ]
658+
; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
659+
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr %a, align 8
660+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0
661+
; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
662+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
663+
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> zeroinitializer, [[TMP1]]
664+
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr %b, align 8
665+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0
666+
; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer
667+
; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
668+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
669+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
670+
; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8
671+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
672+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
673+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
674+
; CHECK: middle.block:
675+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0
676+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3
677+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2
678+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3
679+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2
680+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
681+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
682+
; CHECK-NEXT: br i1 [[CMP_N]], label %End, label %scalar.ph
683+
;
656684
Entry:
657685
br label %Loop
658686

@@ -661,7 +689,7 @@ Loop:
661689
%for.2 = phi double [ %l2, %Loop ], [ 0.000000e+00, %Entry ]
662690
%for.3 = phi double [ %for.2, %Loop ], [ 0.000000e+00, %Entry ]
663691
%iv = phi i64 [ %iv.next, %Loop ], [ 0, %Entry ]
664-
%USE_2_INDVARS = fdiv double %for.3, %for.1
692+
%USE_2_FORS = fdiv double %for.3, %for.1
665693
%div = fdiv double 0.000000e+00, %for.1
666694
%l1 = load double, ptr %a, align 8
667695
%iv.next= add nuw nsw i64 %iv, 1

0 commit comments

Comments
 (0)