Skip to content

Commit 3ff1d01

Browse files
committed
Recommit "[VPlan] Try to narrow wide and replicating recipes to uniform recipes."
This reverts commit 0ebb3ac. Re-applies commit with typos fixed.
1 parent 0ebb3ac commit 3ff1d01

File tree

4 files changed

+35
-18
lines changed

4 files changed

+35
-18
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,11 +596,36 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
596596
if (!PhiR)
597597
continue;
598598

599+
// Try to narrow wide and replicating recipes to uniform recipes, based on
600+
// VPlan analysis.
601+
// TODO: Apply to all recipes in the future, to replace legacy uniformity
602+
// analysis.
603+
auto Users = collectUsersRecursively(PhiR);
604+
for (VPUser *U : reverse(Users)) {
605+
auto *Def = dyn_cast<VPSingleDefRecipe>(U);
606+
auto *RepR = dyn_cast<VPReplicateRecipe>(U);
607+
// Skip recipes that shouldn't be narrowed.
608+
if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
609+
Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
610+
(RepR && (RepR->isUniform() || RepR->isPredicated())))
611+
continue;
612+
613+
// Skip recipes that may have other lanes than their first used.
614+
if (!vputils::isUniformAfterVectorization(Def) &&
615+
!vputils::onlyFirstLaneUsed(Def))
616+
continue;
617+
618+
auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
619+
Def->operands(), /*IsUniform*/ true);
620+
Clone->insertAfter(Def);
621+
Def->replaceAllUsesWith(Clone);
622+
}
623+
599624
// Check if any uniform VPReplicateRecipes using the phi recipe are used by
600625
// ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to
601626
// ensure the final value is available.
602627
// TODO: Remove once uniformity analysis is done on VPlan.
603-
for (VPUser *U : collectUsersRecursively(PhiR)) {
628+
for (VPUser *U : Users) {
604629
auto *ExitIRI = dyn_cast<VPIRInstruction>(U);
605630
VPValue *Op;
606631
if (!ExitIRI || !match(ExitIRI->getOperand(0),

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,6 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
132132
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
133133
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
134134
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
135-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[CONV6]], i64 0
136-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
137135
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
138136
; CHECK: [[VECTOR_BODY]]:
139137
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -142,9 +140,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
142140
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
143141
; CHECK-NEXT: [[TMP22:%.*]] = icmp ule <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
144142
; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP22]], <vscale x 2 x i1> zeroinitializer
145-
; CHECK-NEXT: [[TMP24:%.*]] = select <vscale x 2 x i1> [[TMP23]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]], <vscale x 2 x i64> splat (i64 1)
146-
; CHECK-NEXT: [[TMP25:%.*]] = sdiv <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP24]]
147-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <vscale x 2 x i64> [[TMP25]], i32 0
143+
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i1> [[TMP23]], i32 0
144+
; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[CONV6]], i64 1
145+
; CHECK-NEXT: [[TMP26:%.*]] = sdiv i64 [[M]], [[TMP25]]
148146
; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP26]] to i32
149147
; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP26]], [[CONV61]]
150148
; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP21]], [[TMP28]]

llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,12 @@ define void @gep_use_in_dead_block(ptr noalias %dst, ptr %src) {
1212
; CHECK: [[VECTOR_BODY]]:
1313
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1414
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
15-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
16-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
17-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
1815
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]]
1916
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
2017
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
2118
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 10)
2219
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
2320
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP0]]
24-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP1]]
25-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP2]]
26-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP3]]
2721
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP8]], i32 0
2822
; CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> zeroinitializer, ptr [[TMP12]], i32 2, <4 x i1> [[TMP7]])
2923
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/scalable-assume.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
44
; CHECK-LABEL: @test1(
55
; CHECK: vector.body:
6-
; CHECK: [[FCMP1:%.*]] = fcmp ogt <vscale x 2 x float>
7-
; CHECK-NEXT: [[FCMP2:%.*]] = fcmp ogt <vscale x 2 x float>
8-
; CHECK-NEXT: [[FCMP1L0:%.*]] = extractelement <vscale x 2 x i1> [[FCMP1]], i32 0
9-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP1L0]])
10-
; CHECK-NEXT: [[FCMP2L0:%.*]] = extractelement <vscale x 2 x i1> [[FCMP2]], i32 0
11-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP2L0]])
6+
; CHECK: [[E1:%.*]] = extractelement <vscale x 2 x float> {{.+}}, i32 0
7+
; CHECK-NEXT: [[FCMP1:%.*]] = fcmp ogt float [[E1]]
8+
; CHECK-NEXT: [[E2:%.*]] = extractelement <vscale x 2 x float> {{.+}}, i32 0
9+
; CHECK-NEXT: [[FCMP2:%.*]] = fcmp ogt float [[E2]]
10+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP1]])
11+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP2]])
1212
entry:
1313
br label %for.body
1414

0 commit comments

Comments
 (0)