Skip to content

Commit fc4c52e

Browse files
committed
[VPlan] Support VPReverseVectorPointer in DataWithEVL vectorization
1 parent 266ff98 commit fc4c52e

File tree

4 files changed

+48
-32
lines changed

4 files changed

+48
-32
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,15 +1462,22 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
14621462
assert(OrigMask && "Unmasked recipe when folding tail");
14631463
return HeaderMask == OrigMask ? nullptr : OrigMask;
14641464
};
1465+
auto SetEVLForReversePointer = [&EVL](VPValue *V) -> void {
1466+
if (auto R =
1467+
dyn_cast<VPReverseVectorPointerRecipe>(V->getDefiningRecipe()))
1468+
R->setOperand(1, &EVL);
1469+
};
14651470

14661471
VPRecipeBase *NewRecipe =
14671472
TypeSwitch<VPRecipeBase *, VPRecipeBase *>(CurRecipe)
14681473
.Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
14691474
VPValue *NewMask = GetNewMask(L->getMask());
1475+
SetEVLForReversePointer(L->getOperand(0));
14701476
return new VPWidenLoadEVLRecipe(*L, EVL, NewMask);
14711477
})
14721478
.Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
14731479
VPValue *NewMask = GetNewMask(S->getMask());
1480+
SetEVLForReversePointer(S->getOperand(0));
14741481
return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
14751482
})
14761483
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
157157
})
158158
.Case<VPScalarCastRecipe>(
159159
[&](const VPScalarCastRecipe *S) { return true; })
160+
.Case<VPReverseVectorPointerRecipe>(
161+
[&](const VPReverseVectorPointerRecipe *R) {
162+
return VerifyEVLUse(*R, 1);
163+
})
160164
.Case<VPInstruction>([&](const VPInstruction *I) {
161165
if (I->getOpcode() != Instruction::Add) {
162166
errs()

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,26 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
3434
; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
3535
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1
3636
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]]
37-
; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP4]]
38-
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP4]]
39-
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]]
40-
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]]
41-
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
37+
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64
38+
; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]]
39+
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP9]]
40+
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP10]]
41+
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP11]]
42+
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
4243
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
43-
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
44-
; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP4]]
45-
; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP4]]
46-
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]]
47-
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]]
44+
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
45+
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP5]] to i64
46+
; IF-EVL-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP15]]
47+
; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP15]]
48+
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP16]]
49+
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP17]]
4850
; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
49-
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
50-
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
51-
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
51+
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP19]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
52+
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
53+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
5254
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
53-
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54-
; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
55+
; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
56+
; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5557
; IF-EVL: middle.block:
5658
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
5759
; IF-EVL: scalar.ph:
@@ -146,26 +148,28 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
146148
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
147149
; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
148150
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]]
149-
; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]]
150-
; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]]
151-
; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]]
152-
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]]
151+
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP5]] to i64
152+
; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 0, [[TMP17]]
153+
; IF-EVL-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP17]]
154+
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP18]]
155+
; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 [[TMP19]]
153156
; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
154-
; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]])
157+
; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP21]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]])
155158
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
156-
; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
157-
; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP4]]
158-
; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP4]]
159-
; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]]
160-
; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]]
159+
; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
160+
; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP5]] to i64
161+
; IF-EVL-NEXT: [[TMP24:%.*]] = mul i64 0, [[TMP23]]
162+
; IF-EVL-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP23]]
163+
; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP24]]
164+
; IF-EVL-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP26]], i64 [[TMP25]]
161165
; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
162166
; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
163-
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
164-
; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64
165-
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]]
167+
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP27]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
168+
; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64
169+
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP28]], [[EVL_BASED_IV]]
166170
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
167-
; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
168-
; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
171+
; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
172+
; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
169173
; IF-EVL: middle.block:
170174
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
171175
; IF-EVL: scalar.ph:

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
3838
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
3939
; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]]
4040
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]]
41-
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP9]]
42-
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP9]]
41+
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64
42+
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP15]]
43+
; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP15]]
4344
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]]
4445
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]]
4546
; CHECK-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP11]])

0 commit comments

Comments
 (0)