Skip to content

Commit 632c5d2

Browse files
authored
[VPlan] Support VPReverseVectorPointer in DataWithEVL vectorization (#113667)
VPReverseVectorPointer relies on the runtime VF, but in DataWithEVL tail-folding, EVL (which can be less than VF at runtime) should be used instead. This patch updates the logic to check the users of VF and replaces the second operand if the user is VPReverseVectorPointer.
1 parent 157d847 commit 632c5d2

File tree

4 files changed

+31
-21
lines changed

4 files changed

+31
-21
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,12 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
14451445
VPTypeAnalysis TypeInfo(CanonicalIVType);
14461446
LLVMContext &Ctx = CanonicalIVType->getContext();
14471447
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
1448+
1449+
for (VPUser *U : Plan.getVF().users()) {
1450+
if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(U))
1451+
R->setOperand(1, &EVL);
1452+
}
1453+
14481454
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
14491455
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
14501456
auto *CurRecipe = cast<VPRecipeBase>(U);

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
143143
.Case<VPWidenStoreEVLRecipe>([&](const VPWidenStoreEVLRecipe *S) {
144144
return VerifyEVLUse(*S, 2);
145145
})
146-
.Case<VPWidenLoadEVLRecipe>([&](const VPWidenLoadEVLRecipe *L) {
147-
return VerifyEVLUse(*L, 1);
148-
})
146+
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
147+
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
149148
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
150149
return VerifyEVLUse(
151150
*W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,26 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
3434
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
3535
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1
3636
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]]
37-
; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP4]]
38-
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP4]]
37+
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
38+
; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]]
39+
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP18]]
3940
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]]
4041
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]]
4142
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
4243
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
4344
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
44-
; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP4]]
45-
; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP4]]
45+
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64
46+
; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]]
47+
; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP19]]
4648
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]]
4749
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]]
4850
; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
4951
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
50-
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
51-
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
52+
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
53+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
5254
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
53-
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54-
; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
55+
; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
56+
; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5557
; IF-EVL: middle.block:
5658
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
5759
; IF-EVL: scalar.ph:
@@ -146,26 +148,28 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
146148
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 100)
147149
; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
148150
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]]
149-
; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]]
150-
; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]]
151+
; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64
152+
; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]]
153+
; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP26]]
151154
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]]
152155
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]]
153156
; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
154157
; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]])
155158
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
156159
; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
157-
; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP4]]
158-
; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP4]]
160+
; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64
161+
; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]]
162+
; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP27]]
159163
; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]]
160164
; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]]
161165
; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
162166
; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
163167
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
164-
; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64
165-
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]]
168+
; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64
169+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP28]], [[EVL_BASED_IV]]
166170
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
167-
; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
168-
; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
171+
; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
172+
; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
169173
; IF-EVL: middle.block:
170174
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
171175
; IF-EVL: scalar.ph:

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
3838
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
3939
; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]]
4040
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]]
41-
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP9]]
42-
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP9]]
41+
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64
42+
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP15]]
43+
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP15]]
4344
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]]
4445
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]]
4546
; CHECK-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])

0 commit comments

Comments
 (0)