Skip to content

Commit 90c7f53

Browse files
committed
[VPlan] Compute induction end values in VPlan.
Use createDerivedIV to compute IV end values directly in VPlan, instead of creating them up-front. This allows updating IV users outside the loop as follow-up. Depends on #110004 and #109975.
1 parent f593722 commit 90c7f53

33 files changed

+265
-178
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ class VPBuilder {
233233

234234
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
235235
FPMathOperator *FPBinOp, VPValue *Start,
236-
VPCanonicalIVPHIRecipe *CanonicalIV,
237-
VPValue *Step, const Twine &Name = "") {
236+
VPValue *CanonicalIV, VPValue *Step,
237+
const Twine &Name = "") {
238238
return tryInsertInstruction(
239239
new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step, Name));
240240
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 152 additions & 64 deletions
Large diffs are not rendered by default.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
6565
case VPInstruction::FirstOrderRecurrenceSplice:
6666
case VPInstruction::LogicalAnd:
6767
case VPInstruction::PtrAdd:
68+
case VPInstruction::ResumePhi:
6869
return false;
6970
default:
7071
return true;

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1313
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1414
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
16-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1716
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1817
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
18+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
2020
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
2121
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
@@ -102,9 +102,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
102102
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
103103
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
104104
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
105-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
106105
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
107106
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
107+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
108108
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
109109
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
110110
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -785,11 +785,11 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
785785
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 257, [[TMP2]]
786786
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
787787
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
788+
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
789+
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
788790
; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 8
789791
; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
790792
; PRED-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2
791-
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
792-
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
793793
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
794794
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
795795
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,8 +863,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
863863
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1
864864
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
865865
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
866-
; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
867866
; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1
867+
; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
868868
; PRED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
869869
; PRED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
870870
; PRED-NEXT: br label [[LOOP:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
101101
; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 4
102102
; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[TMP3]], [[TMP35]]
103103
; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF3]]
104-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC4]]
105104
; CHECK-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64()
106105
; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], 4
106+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC4]]
107107
; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[CONV]], i64 0
108108
; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT8]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
109109
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
@@ -316,9 +316,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
316316
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], [[TMP4]]
317317
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]]
318318
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
319-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]]
320319
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
321320
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16
321+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]]
322322
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]])
323323
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
324324
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
2323
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 2
2424
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]]
2525
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
26-
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
2726
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
2827
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
28+
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
2929
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
3030
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
3131
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -512,9 +512,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
512512
; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
513513
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
514514
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
515-
; CHECK-UNORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
516515
; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
517516
; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
517+
; CHECK-UNORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
518518
; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float [[A2]], i32 0
519519
; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float [[A1]], i32 0
520520
; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -583,9 +583,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
583583
; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
584584
; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
585585
; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
586-
; CHECK-ORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
587586
; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
588587
; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
588+
; CHECK-ORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
589589
; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
590590
; CHECK-ORDERED: vector.body:
591591
; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -649,9 +649,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
649649
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP5]]
650650
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
651651
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
652-
; CHECK-ORDERED-TF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
653652
; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
654653
; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
654+
; CHECK-ORDERED-TF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
655655
; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
656656
; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
657657
; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,9 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
380380
; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8
381381
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 10000, [[TMP24]]
382382
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 10000, [[N_MOD_VF2]]
383-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]]
384383
; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
385384
; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8
385+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]]
386386
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
387387
; CHECK: vec.epilog.vector.body:
388388
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ define void @induction_i7(ptr %dst) #0 {
1616
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1717
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
1818
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
19-
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
2019
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
2120
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2
2221
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
22+
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
2323
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
2424
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2525
; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc <vscale x 2 x i64> [[DOTSPLAT_]] to <vscale x 2 x i7>
@@ -83,10 +83,10 @@ define void @induction_i3_zext(ptr %dst) #0 {
8383
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
8484
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
8585
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
86-
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
8786
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
8887
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2
8988
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
89+
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
9090
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
9191
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
9292
; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc <vscale x 2 x i64> [[DOTSPLAT_]] to <vscale x 2 x i3>

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -467,9 +467,9 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
467467
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
468468
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
469469
; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 512, [[TMP1]]
470-
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
471470
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
472471
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
472+
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
473473
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
474474
; CHECK: vector.body:
475475
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -553,9 +553,9 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
553553
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
554554
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP6]], i64 [[N_MOD_VF]]
555555
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP9]]
556-
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
557556
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
558557
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
558+
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
559559
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
560560
; CHECK: vector.body:
561561
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1248,9 +1248,9 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
12481248
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
12491249
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4
12501250
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]]
1251-
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
12521251
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
12531252
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
1253+
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
12541254
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
12551255
; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
12561256
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
@@ -1339,10 +1339,10 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
13391339
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
13401340
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4
13411341
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]]
1342-
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[N_VEC]], 1
1343-
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP6]], 3
13441342
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
13451343
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
1344+
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1
1345+
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3
13461346
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
13471347
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
13481348
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP10]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
@@ -1449,9 +1449,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
14491449
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
14501450
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP8]], -4
14511451
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNEG]]
1452-
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
14531452
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
14541453
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
1454+
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
14551455
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
14561456
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2
14571457
; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1

llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
1919
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
2020
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]]
2121
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
22+
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
23+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
2224
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8
2325
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP8]]
2426
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 8
2527
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP9]]
26-
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
27-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
215215
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP5]]
216216
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
217217
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
218-
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4
219218
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
220219
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
220+
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4
221221
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
222222
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
223223
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]]

0 commit comments

Comments
 (0)