Skip to content

Commit e1850cb

Browse files
fhahnIanWood1
authored andcommitted
[VPlan] Remove ILV::sinkScalarOperands. (llvm#136023)
Remove legacy ILV sinkScalarOperands, which is superseded by the sinkScalarOperands VPlan transforms. There are a few cases that aren't handled by VPlan's sinkScalarOperands, because the recipes doesn't support replicating. Those are pointer inductions and blends. We could probably improve this further, by allowing replication for more recipes, but I don't think the extra complexity is warranted. Depends on llvm#136021. PR: llvm#136023
1 parent 84b3c29 commit e1850cb

File tree

6 files changed

+68
-148
lines changed

6 files changed

+68
-148
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -540,10 +540,6 @@ class InnerLoopVectorizer {
540540
protected:
541541
friend class LoopVectorizationPlanner;
542542

543-
/// Iteratively sink the scalarized operands of a predicated instruction into
544-
/// the block that was created for it.
545-
void sinkScalarOperands(Instruction *PredInst);
546-
547543
/// Returns (and creates if needed) the trip count of the widened loop.
548544
Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
549545

@@ -628,9 +624,6 @@ class InnerLoopVectorizer {
628624
/// A list of all bypass blocks. The first block is the entry of the loop.
629625
SmallVector<BasicBlock *, 4> LoopBypassBlocks;
630626

631-
/// Store instructions that were predicated.
632-
SmallVector<Instruction *, 4> PredicatedInstructions;
633-
634627
/// Trip count of the original loop.
635628
Value *TripCount = nullptr;
636629

@@ -2382,17 +2375,13 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23822375
if (auto *II = dyn_cast<AssumeInst>(Cloned))
23832376
AC->registerAssumption(II);
23842377

2385-
// End if-block.
2386-
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
2387-
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
23882378
assert(
2389-
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
2379+
(RepRecipe->getParent()->getParent() ||
2380+
!RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
23902381
all_of(RepRecipe->operands(),
23912382
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
23922383
"Expected a recipe is either within a region or all of its operands "
23932384
"are defined outside the vectorized region.");
2394-
if (IfPredicateInstr)
2395-
PredicatedInstructions.push_back(Cloned);
23962385
}
23972386

23982387
Value *
@@ -2866,9 +2855,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28662855
if (!State.Plan->getVectorLoopRegion())
28672856
return;
28682857

2869-
for (Instruction *PI : PredicatedInstructions)
2870-
sinkScalarOperands(&*PI);
2871-
28722858
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
28732859
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
28742860
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
@@ -2894,82 +2880,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28942880
VF.getKnownMinValue() * UF);
28952881
}
28962882

2897-
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
2898-
// The basic block and loop containing the predicated instruction.
2899-
auto *PredBB = PredInst->getParent();
2900-
auto *VectorLoop = LI->getLoopFor(PredBB);
2901-
2902-
// Initialize a worklist with the operands of the predicated instruction.
2903-
SetVector<Value *> Worklist(PredInst->op_begin(), PredInst->op_end());
2904-
2905-
// Holds instructions that we need to analyze again. An instruction may be
2906-
// reanalyzed if we don't yet know if we can sink it or not.
2907-
SmallVector<Instruction *, 8> InstsToReanalyze;
2908-
2909-
// Returns true if a given use occurs in the predicated block. Phi nodes use
2910-
// their operands in their corresponding predecessor blocks.
2911-
auto IsBlockOfUsePredicated = [&](Use &U) -> bool {
2912-
auto *I = cast<Instruction>(U.getUser());
2913-
BasicBlock *BB = I->getParent();
2914-
if (auto *Phi = dyn_cast<PHINode>(I))
2915-
BB = Phi->getIncomingBlock(
2916-
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
2917-
return BB == PredBB;
2918-
};
2919-
2920-
// Iteratively sink the scalarized operands of the predicated instruction
2921-
// into the block we created for it. When an instruction is sunk, it's
2922-
// operands are then added to the worklist. The algorithm ends after one pass
2923-
// through the worklist doesn't sink a single instruction.
2924-
bool Changed;
2925-
do {
2926-
// Add the instructions that need to be reanalyzed to the worklist, and
2927-
// reset the changed indicator.
2928-
Worklist.insert_range(InstsToReanalyze);
2929-
InstsToReanalyze.clear();
2930-
Changed = false;
2931-
2932-
while (!Worklist.empty()) {
2933-
auto *I = dyn_cast<Instruction>(Worklist.pop_back_val());
2934-
2935-
// We can't sink an instruction if it is a phi node, is not in the loop,
2936-
// may have side effects or may read from memory.
2937-
// TODO: Could do more granular checking to allow sinking
2938-
// a load past non-store instructions.
2939-
if (!I || isa<PHINode>(I) || !VectorLoop->contains(I) ||
2940-
I->mayHaveSideEffects() || I->mayReadFromMemory())
2941-
continue;
2942-
2943-
// If the instruction is already in PredBB, check if we can sink its
2944-
// operands. In that case, VPlan's sinkScalarOperands() succeeded in
2945-
// sinking the scalar instruction I, hence it appears in PredBB; but it
2946-
// may have failed to sink I's operands (recursively), which we try
2947-
// (again) here.
2948-
if (I->getParent() == PredBB) {
2949-
Worklist.insert_range(I->operands());
2950-
continue;
2951-
}
2952-
2953-
// It's legal to sink the instruction if all its uses occur in the
2954-
// predicated block. Otherwise, there's nothing to do yet, and we may
2955-
// need to reanalyze the instruction.
2956-
if (!llvm::all_of(I->uses(), IsBlockOfUsePredicated)) {
2957-
InstsToReanalyze.push_back(I);
2958-
continue;
2959-
}
2960-
2961-
// Move the instruction to the beginning of the predicated block, and add
2962-
// it's operands to the worklist.
2963-
I->moveBefore(PredBB->getFirstInsertionPt());
2964-
Worklist.insert_range(I->operands());
2965-
2966-
// The sinking may have enabled other instructions to be sunk, so we will
2967-
// need to iterate.
2968-
Changed = true;
2969-
}
2970-
} while (Changed);
2971-
}
2972-
29732883
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
29742884
auto Iter = vp_depth_first_deep(Plan.getEntry());
29752885
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,22 +1000,25 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
10001000
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE5]] ]
10011001
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[PRED_STORE_CONTINUE5]] ]
10021002
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
1003-
; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
1004-
; TFA_INTERLEAVE: [[PRED_STORE_IF]]:
10051003
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
1006-
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
1007-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true
1008-
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00
1009-
; TFA_INTERLEAVE-NEXT: store double [[TMP24]], ptr [[P]], align 8
1010-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE]]
1011-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE]]:
1012-
; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]]
1013-
; TFA_INTERLEAVE: [[PRED_STORE_IF4]]:
10141004
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
1005+
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
10151006
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fcmp ogt double [[TMP8]], 0.000000e+00
1016-
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP9]], true
1007+
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = xor i1 [[TMP6]], true
1008+
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor i1 [[TMP9]], true
1009+
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP18]], i1 false
1010+
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP20]], i1 false
10171011
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00
1018-
; TFA_INTERLEAVE-NEXT: store double [[TMP26]], ptr [[P]], align 8
1012+
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP21]], double 1.000000e+00, double 0.000000e+00
1013+
; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[TMP26]]
1014+
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor i1 [[ACTIVE_LANE_MASK]], true
1015+
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor i1 [[ACTIVE_LANE_MASK2]], true
1016+
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true
1017+
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true
1018+
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
1019+
; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[PRED_STORE_CONTINUE5]]
1020+
; TFA_INTERLEAVE: [[BB18]]:
1021+
; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8
10191022
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE5]]
10201023
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE5]]:
10211024
; TFA_INTERLEAVE-NEXT: [[TMP27]] = add i64 [[INDEX]], 2

llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
88
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ]
99
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1010
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
11-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP1]]
11+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
12+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
1213
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
1314
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
1415
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
1516
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1617
; CHECK: pred.load.if:
17-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
1818
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
1919
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
2020
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]

0 commit comments

Comments
 (0)