@@ -58,30 +58,30 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
58
58
; CHECK-NEXT: [[N_VEC5:%.*]] = and i64 [[TMP0]], 4294967288
59
59
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
60
60
; CHECK: vec.epilog.vector.body:
61
- ; CHECK-NEXT: [[INDEX7 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
62
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX7 ]]
63
- ; CHECK-NEXT: [[WIDE_LOAD8 :%.*]] = load <8 x i8>, ptr [[TMP14]], align 1
64
- ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX7 ]]
65
- ; CHECK-NEXT: [[WIDE_LOAD9 :%.*]] = load <8 x i8>, ptr [[TMP15]], align 1
66
- ; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD9 ]] to <8 x i16>
67
- ; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD8 ]] to <8 x i16>
61
+ ; CHECK-NEXT: [[INDEX6 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
62
+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX6 ]]
63
+ ; CHECK-NEXT: [[WIDE_LOAD7 :%.*]] = load <8 x i8>, ptr [[TMP14]], align 1
64
+ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX6 ]]
65
+ ; CHECK-NEXT: [[WIDE_LOAD8 :%.*]] = load <8 x i8>, ptr [[TMP15]], align 1
66
+ ; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD8 ]] to <8 x i16>
67
+ ; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD7 ]] to <8 x i16>
68
68
; CHECK-NEXT: [[TMP18:%.*]] = mul nuw <8 x i16> [[TMP16]], [[TMP17]]
69
69
; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
70
70
; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <8 x i16> [[TMP19]] to <8 x i8>
71
71
; CHECK-NEXT: store <8 x i8> [[TMP20]], ptr [[TMP15]], align 1
72
- ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX7 ]]
73
- ; CHECK-NEXT: [[WIDE_LOAD10 :%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
74
- ; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD10 ]] to <8 x i16>
72
+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX6 ]]
73
+ ; CHECK-NEXT: [[WIDE_LOAD9 :%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
74
+ ; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD9 ]] to <8 x i16>
75
75
; CHECK-NEXT: [[TMP23:%.*]] = mul nuw <8 x i16> [[TMP22]], [[TMP17]]
76
76
; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
77
77
; CHECK-NEXT: [[TMP25:%.*]] = trunc nuw <8 x i16> [[TMP24]] to <8 x i8>
78
78
; CHECK-NEXT: store <8 x i8> [[TMP25]], ptr [[TMP21]], align 1
79
- ; CHECK-NEXT: [[INDEX_NEXT11 ]] = add nuw i64 [[INDEX7 ]], 8
80
- ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT11 ]], [[N_VEC5]]
79
+ ; CHECK-NEXT: [[INDEX_NEXT10 ]] = add nuw i64 [[INDEX6 ]], 8
80
+ ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT10 ]], [[N_VEC5]]
81
81
; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
82
82
; CHECK: vec.epilog.middle.block:
83
- ; CHECK-NEXT: [[CMP_N6 :%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]]
84
- ; CHECK-NEXT: br i1 [[CMP_N6 ]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
83
+ ; CHECK-NEXT: [[CMP_N11 :%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]]
84
+ ; CHECK-NEXT: br i1 [[CMP_N11 ]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
85
85
; CHECK: vec.epilog.scalar.ph:
86
86
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
87
87
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -165,15 +165,15 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
165
165
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
166
166
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0
167
167
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> zeroinitializer
168
- ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
169
- ; CHECK: vector.body:
170
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
171
168
; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
172
169
; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
173
170
; CHECK-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP5]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
174
171
; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i16> [[TMP6]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
175
172
; CHECK-NEXT: [[TMP9:%.*]] = trunc nuw <16 x i16> [[TMP7]] to <16 x i8>
176
173
; CHECK-NEXT: [[TMP10:%.*]] = trunc nuw <16 x i16> [[TMP8]] to <16 x i8>
174
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
175
+ ; CHECK: vector.body:
176
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
177
177
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
178
178
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
179
179
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
@@ -190,13 +190,13 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
190
190
; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16
191
191
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
192
192
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0
193
- ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
194
- ; CHECK: vec.epilog.vector.body:
195
- ; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
196
193
; CHECK-NEXT: [[TMP18:%.*]] = mul <8 x i16> [[TMP16]], [[TMP17]]
197
194
; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
198
195
; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i16> [[TMP19]] to <8 x i8>
199
196
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> poison, <8 x i32> zeroinitializer
197
+ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
198
+ ; CHECK: vec.epilog.vector.body:
199
+ ; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
200
200
; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX7]] to i64
201
201
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
202
202
; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
@@ -244,41 +244,47 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
244
244
; CHECK: vector.ph:
245
245
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16
246
246
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
247
+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
248
+ ; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i16> [[TMP2]], <i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99>
249
+ ; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP2]], <i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99>
250
+ ; CHECK-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
251
+ ; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP4]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
252
+ ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[C]], <16 x i16> [[TMP5]], <16 x i16> [[TMP3]]
253
+ ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[C]], <16 x i16> [[TMP6]], <16 x i16> [[TMP4]]
254
+ ; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8>
255
+ ; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i16> [[TMP8]] to <16 x i8>
247
256
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
248
257
; CHECK: vector.body:
249
258
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
250
- ; CHECK-NEXT: [[TMP2:%.*]] = mul <16 x i16> [[TMP1]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
251
- ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> zeroinitializer
252
- ; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
253
- ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <16 x i16> [[TMP4]], <16 x i16> [[TMP3]]
254
- ; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[TMP5]] to <16 x i8>
255
- ; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64
256
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]
257
- ; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
258
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
259
- ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
260
- ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
259
+ ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
260
+ ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
261
+ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
262
+ ; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP12]], align 1
263
+ ; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1
264
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
265
+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
266
+ ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
261
267
; CHECK: middle.block:
262
268
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
263
269
; CHECK: vec.epilog.iter.check:
264
270
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
265
271
; CHECK: vec.epilog.ph:
266
- ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[A]] to i16
267
- ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i16> poison, i16 [[TMP10]], i64 0
272
+ ; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16
273
+ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
274
+ ; CHECK-NEXT: [[TMP17:%.*]] = mul <8 x i16> [[TMP16]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
275
+ ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i16> [[TMP17]], <8 x i16> poison, <8 x i32> zeroinitializer
276
+ ; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
277
+ ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[C]], <8 x i16> [[TMP19]], <8 x i16> [[TMP18]]
278
+ ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i16> [[TMP20]] to <8 x i8>
268
279
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
269
280
; CHECK: vec.epilog.vector.body:
270
281
; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
271
- ; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP11]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
272
- ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP12]], <8 x i16> poison, <8 x i32> zeroinitializer
273
- ; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
274
- ; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[C]], <8 x i16> [[TMP14]], <8 x i16> [[TMP13]]
275
- ; CHECK-NEXT: [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8>
276
- ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[INDEX3]] to i64
277
- ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP17]]
278
- ; CHECK-NEXT: store <8 x i8> [[TMP16]], ptr [[TMP18]], align 1
282
+ ; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX3]] to i64
283
+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
284
+ ; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
279
285
; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX3]], 8
280
- ; CHECK-NEXT: [[TMP19 :%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
281
- ; CHECK-NEXT: br i1 [[TMP19 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
286
+ ; CHECK-NEXT: [[TMP24 :%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
287
+ ; CHECK-NEXT: br i1 [[TMP24 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
282
288
; CHECK: vec.epilog.middle.block:
283
289
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
284
290
; CHECK: vec.epilog.scalar.ph:
0 commit comments