Skip to content

Commit 8282125

Browse files
authored
[LV] Use IVUpdateMayOverflow to set HasNUW. (#111758)
If IVUpdateMayOverflow is false, we proved that the induction increment cannot overflow in the vector loop. This allows setting NUW in some cases when folding the tail. PR: #111758
1 parent 831469f commit 8282125

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1095
-322
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9056,9 +9056,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
90569056

90579057
DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
90589058
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
9059-
// When not folding the tail, we know that the induction increment will not
9060-
// overflow.
9061-
bool HasNUW = Style == TailFoldingStyle::None;
9059+
// Use NUW for the induction increment if we proved that it won't overflow in
9060+
// the vector loop or when not folding the tail. In the later case, we know
9061+
// that the canonical induction increment will not overflow as the vector trip
9062+
// count is >= increment and a multiple of the increment.
9063+
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
90629064
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
90639065

90649066
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ define void @latch_branch_cost(ptr %dst) {
455455
; PRED-NEXT: store i8 0, ptr [[TMP24]], align 1
456456
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
457457
; PRED: pred.store.continue14:
458-
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
458+
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
459459
; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
460460
; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104
461461
; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -943,7 +943,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
943943
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE14]]
944944
; DEFAULT: pred.store.continue14:
945945
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
946-
; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
946+
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
947947
; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
948948
; DEFAULT: middle.block:
949949
; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -1045,7 +1045,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
10451045
; PRED-NEXT: br label [[PRED_STORE_CONTINUE14]]
10461046
; PRED: pred.store.continue14:
10471047
; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
1048-
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
1048+
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
10491049
; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
10501050
; PRED: middle.block:
10511051
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -1593,7 +1593,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
15931593
; DEFAULT-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4
15941594
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE6]]
15951595
; DEFAULT: pred.store.continue6:
1596-
; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1596+
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
15971597
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
15981598
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
15991599
; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
@@ -1656,7 +1656,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
16561656
; PRED-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4
16571657
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
16581658
; PRED: pred.store.continue6:
1659-
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1659+
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
16601660
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
16611661
; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
16621662
; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]

llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
1818
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
1919
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]])
2020
; CHECK-NEXT: [[TMP2]] = add i32 [[TMP1]], [[VEC_PHI]]
21-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
21+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2222
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
2323
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2424
; CHECK: middle.block:
@@ -71,7 +71,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
7171
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[TMP4]]
7272
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD1]])
7373
; CHECK-NEXT: [[TMP8]] = add i32 [[TMP7]], [[TMP6]]
74-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
74+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
7575
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
7676
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
7777
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -124,7 +124,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
124124
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
125125
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[WIDE_MASKED_LOAD1]]
126126
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
127-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
127+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
128128
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
129129
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
130130
; CHECK: middle.block:
@@ -176,7 +176,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
176176
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]]
177177
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> splat (i32 -1)
178178
; CHECK-NEXT: [[TMP4]] = and <4 x i32> [[VEC_PHI]], [[TMP3]]
179-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
179+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
180180
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
181181
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
182182
; CHECK: middle.block:
@@ -228,7 +228,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
228228
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
229229
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
230230
; CHECK-NEXT: [[TMP4]] = or <4 x i32> [[VEC_PHI]], [[TMP3]]
231-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
231+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
232232
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
233233
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
234234
; CHECK: middle.block:
@@ -280,7 +280,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
280280
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
281281
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
282282
; CHECK-NEXT: [[TMP4]] = xor <4 x i32> [[VEC_PHI]], [[TMP3]]
283-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
283+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
284284
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
285285
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
286286
; CHECK: middle.block:
@@ -332,7 +332,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
332332
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
333333
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], [[WIDE_MASKED_LOAD1]]
334334
; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]]
335-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
335+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
336336
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
337337
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
338338
; CHECK: middle.block:
@@ -384,7 +384,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
384384
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
385385
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[WIDE_MASKED_LOAD1]]
386386
; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]]
387-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
387+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
388388
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
389389
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
390390
; CHECK: middle.block:

llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ define i32 @mla_i32(ptr noalias nocapture readonly %A, ptr noalias nocapture rea
3333
; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
3434
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])
3535
; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]]
36-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
36+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
3737
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3838
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3939
; CHECK: middle.block:
@@ -116,7 +116,7 @@ define i32 @mla_i8(ptr noalias nocapture readonly %A, ptr noalias nocapture read
116116
; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
117117
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])
118118
; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]]
119-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
119+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
120120
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
121121
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
122122
; CHECK: middle.block:
@@ -193,7 +193,7 @@ define i32 @add_i32(ptr nocapture readonly %x, i32 %n) #0 {
193193
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
194194
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
195195
; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]]
196-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
196+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
197197
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
198198
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
199199
; CHECK: middle.block:
@@ -259,7 +259,7 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 {
259259
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
260260
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
261261
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
262-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
262+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
263263
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
264264
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
265265
; CHECK: middle.block:
@@ -326,7 +326,7 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 {
326326
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
327327
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
328328
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
329-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
329+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
330330
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
331331
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
332332
; CHECK: middle.block:
@@ -393,7 +393,7 @@ define i32 @or_i32(ptr nocapture readonly %x, i32 %n) #0 {
393393
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
394394
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
395395
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
396-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
396+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
397397
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
398398
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
399399
; CHECK: middle.block:
@@ -460,7 +460,7 @@ define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) #0 {
460460
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
461461
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
462462
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
463-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
463+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
464464
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
465465
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
466466
; CHECK: middle.block:
@@ -527,7 +527,7 @@ define float @fadd_f32(ptr nocapture readonly %x, i32 %n) #0 {
527527
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
528528
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
529529
; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]]
530-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
530+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
531531
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
532532
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
533533
; CHECK: middle.block:
@@ -594,7 +594,7 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 {
594594
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
595595
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
596596
; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]]
597-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
597+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
598598
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
599599
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
600600
; CHECK: middle.block:

0 commit comments

Comments
 (0)