@@ -114,44 +114,8 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
114
114
; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
115
115
; CHECK-NEXT: [[ENTRY:.*]]:
116
116
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1
117
- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 54
118
- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
119
- ; CHECK: [[VECTOR_SCEVCHECK]]:
120
- ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG2]], i64 8
121
- ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
122
- ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
123
- ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
124
- ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
125
- ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
126
- ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
127
- ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
128
- ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 12
129
- ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
130
- ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
131
- ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
132
- ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]]
133
- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
134
- ; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]]
135
- ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
136
- ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[ARG2]], i64 4
137
- ; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
138
- ; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
139
- ; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
140
- ; CHECK-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT7]]
141
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
142
- ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP5]]
143
- ; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW8]]
144
- ; CHECK-NEXT: [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
145
- ; CHECK-NEXT: [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0
146
- ; CHECK-NEXT: [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1
147
- ; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT10]]
148
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[MUL_RESULT10]]
149
- ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[ARG2]]
150
- ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW11]]
151
- ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]]
152
- ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]]
153
- ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]]
154
- ; CHECK-NEXT: br i1 [[TMP19]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
117
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
118
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
155
119
; CHECK: [[VECTOR_MEMCHECK]]:
156
120
; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4
157
121
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16
@@ -171,9 +135,9 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
171
135
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
172
136
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 0
173
137
; CHECK-NEXT: [[TMP25:%.*]] = shl i64 [[TMP24]], 5
174
- ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP25]]
138
+ ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[TMP25]]
175
139
; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP24]], 4
176
- ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP27]]
140
+ ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[TMP27]]
177
141
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4
178
142
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
179
143
; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
@@ -203,44 +167,44 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
203
167
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
204
168
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
205
169
; CHECK: [[SCALAR_PH]]:
206
- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ VECTOR_MEMCHECK]] ]
170
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
207
171
; CHECK-NEXT: br label %[[LOOP:.*]]
208
172
; CHECK: [[LOOP]]:
209
173
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
210
174
; CHECK-NEXT: [[SHL_IV_5:%.*]] = shl i64 [[IV]], 5
211
- ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[SHL_IV_5]]
175
+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[SHL_IV_5]]
212
176
; CHECK-NEXT: [[ADD_5:%.*]] = or disjoint i64 [[SHL_IV_5]], 16
213
177
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[ADD_5]]
214
178
; CHECK-NEXT: [[SHL_IV_4:%.*]] = shl i64 [[IV]], 4
215
- ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[SHL_IV_4]]
179
+ ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[SHL_IV_4]]
216
180
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4
217
181
; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_2]], align 4
218
182
; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], [[L_2]]
219
183
; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 0.000000e+00
220
184
; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_3]], align 4
221
- ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 4
185
+ ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 4
222
186
; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_4]], align 4
223
- ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 4
187
+ ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 4
224
188
; CHECK-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_5]], align 4
225
189
; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_3]], [[L_4]]
226
190
; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 0.000000e+00
227
- ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 4
191
+ ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 4
228
192
; CHECK-NEXT: store float [[MUL_2]], ptr [[GEP_6]], align 4
229
- ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 8
193
+ ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 8
230
194
; CHECK-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_7]], align 4
231
- ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 8
195
+ ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 8
232
196
; CHECK-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_8]], align 4
233
197
; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_5]], [[L_6]]
234
198
; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 0.000000e+00
235
- ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 8
199
+ ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 8
236
200
; CHECK-NEXT: store float [[MUL_3]], ptr [[GEP_9]], align 4
237
- ; CHECK-NEXT: [[I27:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 12
201
+ ; CHECK-NEXT: [[I27:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 12
238
202
; CHECK-NEXT: [[L_7:%.*]] = load float, ptr [[I27]], align 4
239
- ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 12
203
+ ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 12
240
204
; CHECK-NEXT: [[L_8:%.*]] = load float, ptr [[GEP_10]], align 4
241
205
; CHECK-NEXT: [[ADD_4:%.*]] = fadd float [[L_7]], [[L_8]]
242
206
; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[ADD_4]], 0.000000e+00
243
- ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 12
207
+ ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 12
244
208
; CHECK-NEXT: store float [[MUL_4]], ptr [[GEP_11]], align 4
245
209
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
246
210
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[ARG1]]
@@ -254,39 +218,39 @@ entry:
254
218
loop:
255
219
%iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
256
220
%shl.iv.5 = shl i64 %iv , 5
257
- %gep.1 = getelementptr i8 , ptr %arg , i64 %shl.iv.5
221
+ %gep.1 = getelementptr inbounds i8 , ptr %arg , i64 %shl.iv.5
258
222
%add.5 = or disjoint i64 %shl.iv.5 , 16
259
223
%gep.2 = getelementptr i8 , ptr %arg , i64 %add.5
260
224
%shl.iv.4 = shl i64 %iv , 4
261
- %gep.3 = getelementptr i8 , ptr %arg2 , i64 %shl.iv.4
225
+ %gep.3 = getelementptr inbounds i8 , ptr %arg2 , i64 %shl.iv.4
262
226
%l.1 = load float , ptr %gep.1 , align 4
263
227
%l.2 = load float , ptr %gep.2 , align 4
264
228
%add.1 = fadd float %l.1 , %l.2
265
229
%mul.1 = fmul float %add.1 , 0 .000000e+00
266
230
store float %mul.1 , ptr %gep.3 , align 4
267
- %gep.4 = getelementptr i8 , ptr %gep.1 , i64 4
231
+ %gep.4 = getelementptr inbounds i8 , ptr %gep.1 , i64 4
268
232
%l.3 = load float , ptr %gep.4 , align 4
269
- %gep.5 = getelementptr i8 , ptr %gep.2 , i64 4
233
+ %gep.5 = getelementptr inbounds i8 , ptr %gep.2 , i64 4
270
234
%l.4 = load float , ptr %gep.5 , align 4
271
235
%add.2 = fadd float %l.3 , %l.4
272
236
%mul.2 = fmul float %add.2 , 0 .000000e+00
273
- %gep.6 = getelementptr i8 , ptr %gep.3 , i64 4
237
+ %gep.6 = getelementptr inbounds i8 , ptr %gep.3 , i64 4
274
238
store float %mul.2 , ptr %gep.6 , align 4
275
- %gep.7 = getelementptr i8 , ptr %gep.1 , i64 8
239
+ %gep.7 = getelementptr inbounds i8 , ptr %gep.1 , i64 8
276
240
%l.5 = load float , ptr %gep.7 , align 4
277
- %gep.8 = getelementptr i8 , ptr %gep.2 , i64 8
241
+ %gep.8 = getelementptr inbounds i8 , ptr %gep.2 , i64 8
278
242
%l.6 = load float , ptr %gep.8 , align 4
279
243
%add.3 = fadd float %l.5 , %l.6
280
244
%mul.3 = fmul float %add.3 , 0 .000000e+00
281
- %gep.9 = getelementptr i8 , ptr %gep.3 , i64 8
245
+ %gep.9 = getelementptr inbounds i8 , ptr %gep.3 , i64 8
282
246
store float %mul.3 , ptr %gep.9 , align 4
283
- %i27 = getelementptr i8 , ptr %gep.1 , i64 12
247
+ %i27 = getelementptr inbounds i8 , ptr %gep.1 , i64 12
284
248
%l.7 = load float , ptr %i27 , align 4
285
- %gep.10 = getelementptr i8 , ptr %gep.2 , i64 12
249
+ %gep.10 = getelementptr inbounds i8 , ptr %gep.2 , i64 12
286
250
%l.8 = load float , ptr %gep.10 , align 4
287
251
%add.4 = fadd float %l.7 , %l.8
288
252
%mul.4 = fmul float %add.4 , 0 .000000e+00
289
- %gep.11 = getelementptr i8 , ptr %gep.3 , i64 12
253
+ %gep.11 = getelementptr inbounds i8 , ptr %gep.3 , i64 12
290
254
store float %mul.4 , ptr %gep.11 , align 4
291
255
%iv.next = add i64 %iv , 1
292
256
%ec = icmp eq i64 %iv , %arg1
0 commit comments