@@ -33,7 +33,7 @@ define i32 @mla_i32(ptr noalias nocapture readonly %A, ptr noalias nocapture rea
33
33
; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
34
34
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])
35
35
; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]]
36
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
36
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
37
37
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
38
38
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39
39
; CHECK: middle.block:
@@ -116,7 +116,7 @@ define i32 @mla_i8(ptr noalias nocapture readonly %A, ptr noalias nocapture read
116
116
; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
117
117
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])
118
118
; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]]
119
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
119
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
120
120
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
121
121
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
122
122
; CHECK: middle.block:
@@ -193,7 +193,7 @@ define i32 @add_i32(ptr nocapture readonly %x, i32 %n) #0 {
193
193
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
194
194
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
195
195
; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]]
196
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
196
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
197
197
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
198
198
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
199
199
; CHECK: middle.block:
@@ -259,7 +259,7 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 {
259
259
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
260
260
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
261
261
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
262
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
262
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
263
263
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
264
264
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
265
265
; CHECK: middle.block:
@@ -326,7 +326,7 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 {
326
326
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
327
327
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
328
328
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
329
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
329
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
330
330
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
331
331
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
332
332
; CHECK: middle.block:
@@ -393,7 +393,7 @@ define i32 @or_i32(ptr nocapture readonly %x, i32 %n) #0 {
393
393
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
394
394
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
395
395
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
396
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
396
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
397
397
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
398
398
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
399
399
; CHECK: middle.block:
@@ -460,7 +460,7 @@ define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) #0 {
460
460
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
461
461
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
462
462
; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
463
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
463
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
464
464
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
465
465
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
466
466
; CHECK: middle.block:
@@ -527,7 +527,7 @@ define float @fadd_f32(ptr nocapture readonly %x, i32 %n) #0 {
527
527
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
528
528
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
529
529
; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]]
530
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
530
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
531
531
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
532
532
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
533
533
; CHECK: middle.block:
@@ -594,7 +594,7 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 {
594
594
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
595
595
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
596
596
; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]]
597
- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
597
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
598
598
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
599
599
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
600
600
; CHECK: middle.block:
0 commit comments