@@ -122,7 +122,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
122
122
; CHECK-SAME: i64 [[IDX_NEG:%.*]], i8 [[A:%.*]]) #[[ATTR1:[0-9]+]] {
123
123
; CHECK-NEXT: iter.check:
124
124
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[IDX_NEG]]
125
- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
125
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
126
126
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[ENTRY:%.*]]
127
127
; CHECK: vector.main.loop.iter.check:
128
128
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
@@ -132,78 +132,78 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
132
132
; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
133
133
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0
134
134
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
135
- ; CHECK-NEXT: [[TMP1:%.*]] = sext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i64 >
135
+ ; CHECK-NEXT: [[TMP1:%.*]] = sext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32 >
136
136
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
137
137
; CHECK: vector.body:
138
138
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
139
- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64 > [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
139
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32 > [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
140
140
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr null, align 1
141
141
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0
142
142
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT2]], <16 x i8> poison, <16 x i32> zeroinitializer
143
- ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[BROADCAST_SPLAT3]] to <16 x i64 >
144
- ; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i64 > [[TMP3]], [[TMP1]]
145
- ; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <2 x i64 > @llvm.experimental.vector.partial.reduce.add.v2i64.v16i64(<2 x i64 > [[VEC_PHI]], <16 x i64 > [[TMP4]])
143
+ ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[BROADCAST_SPLAT3]] to <16 x i32 >
144
+ ; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32 > [[TMP3]], [[TMP1]]
145
+ ; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32 > @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32 > [[VEC_PHI]], <16 x i32 > [[TMP4]])
146
146
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
147
147
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]]
148
148
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
149
149
; CHECK: middle.block:
150
- ; CHECK-NEXT: [[ADD :%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64 > [[PARTIAL_REDUCE]])
150
+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32 > [[PARTIAL_REDUCE]])
151
151
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[IV_NEXT]]
152
152
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY:%.*]]
153
153
; CHECK: vec.epilog.iter.check:
154
154
; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IDX_NEG]], [[IV_NEXT]]
155
155
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[IV_NEXT]]
156
- ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
156
+ ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
157
157
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
158
158
; CHECK: vec.epilog.ph:
159
159
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]
160
- ; CHECK-NEXT: [[ACCUM :%.*]] = phi i64 [ [[ADD ]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]
161
- ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8
160
+ ; CHECK-NEXT: [[BC_MERGE_RDX :%.*]] = phi i32 [ [[TMP6 ]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]
161
+ ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 4
162
162
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
163
163
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]]
164
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7 :%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
165
- ; CHECK-NEXT: [[BROADCAST_SPLAT8 :%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT7 ]], <8 x i8> poison, <8 x i32> zeroinitializer
166
- ; CHECK-NEXT: [[TMP7 :%.*]] = sext <8 x i8> [[BROADCAST_SPLAT8 ]] to <8 x i64 >
167
- ; CHECK-NEXT: [[TMP8 :%.*]] = insertelement <1 x i64 > zeroinitializer, i64 [[ACCUM ]], i32 0
164
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6 :%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
165
+ ; CHECK-NEXT: [[BROADCAST_SPLAT7 :%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6 ]], <4 x i8> poison, <4 x i32> zeroinitializer
166
+ ; CHECK-NEXT: [[TMP8 :%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7 ]] to <4 x i32 >
167
+ ; CHECK-NEXT: [[TMP10 :%.*]] = insertelement <4 x i32 > zeroinitializer, i32 [[BC_MERGE_RDX ]], i32 0
168
168
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
169
169
; CHECK: vec.epilog.vector.body:
170
170
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
171
- ; CHECK-NEXT: [[VEC_PHI10 :%.*]] = phi <1 x i64 > [ [[TMP8 ]], [[VEC_EPILOG_PH]] ], [ [[PARTIAL_REDUCE13 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
171
+ ; CHECK-NEXT: [[VEC_PHI9 :%.*]] = phi <4 x i32 > [ [[TMP10 ]], [[VEC_EPILOG_PH]] ], [ [[TMP13 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
172
172
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr null, align 1
173
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT11 :%.*]] = insertelement <8 x i8> poison, i8 [[TMP9]], i64 0
174
- ; CHECK-NEXT: [[BROADCAST_SPLAT12 :%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT11 ]], <8 x i8> poison, <8 x i32> zeroinitializer
175
- ; CHECK-NEXT: [[TMP10 :%.*]] = sext <8 x i8> [[BROADCAST_SPLAT12 ]] to <8 x i64 >
176
- ; CHECK-NEXT: [[TMP11 :%.*]] = mul <8 x i64 > [[TMP10 ]], [[TMP7 ]]
177
- ; CHECK-NEXT: [[PARTIAL_REDUCE13 ]] = call <1 x i64> @llvm.experimental.vector.partial.reduce.add.v1i64.v8i64(<1 x i64> [[VEC_PHI10 ]], <8 x i64> [[TMP11]])
178
- ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 8
173
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT10 :%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
174
+ ; CHECK-NEXT: [[BROADCAST_SPLAT11 :%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT10 ]], <4 x i8> poison, <4 x i32> zeroinitializer
175
+ ; CHECK-NEXT: [[TMP11 :%.*]] = sext <4 x i8> [[BROADCAST_SPLAT11 ]] to <4 x i32 >
176
+ ; CHECK-NEXT: [[TMP14 :%.*]] = mul <4 x i32 > [[TMP11 ]], [[TMP8 ]]
177
+ ; CHECK-NEXT: [[TMP13 ]] = add <4 x i32> [[TMP14 ]], [[VEC_PHI9]]
178
+ ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 4
179
179
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]]
180
180
; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
181
181
; CHECK: vec.epilog.middle.block:
182
- ; CHECK-NEXT: [[TMP13 :%.*]] = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64 > [[PARTIAL_REDUCE13 ]])
182
+ ; CHECK-NEXT: [[TMP15 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32 > [[TMP13 ]])
183
183
; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
184
184
; CHECK-NEXT: br i1 [[CMP_N15]], label [[WHILE_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
185
185
; CHECK: vec.epilog.scalar.ph:
186
186
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[WHILE_BODY]] ], [ [[IDX_NEG]], [[ITER_CHECK:%.*]] ]
187
- ; CHECK-NEXT: [[BC_RESUME_VAL16 :%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IV_NEXT]], [[WHILE_BODY]] ], [ 0, [[ITER_CHECK]] ]
188
- ; CHECK-NEXT: [[BC_MERGE_RDX17 :%.*]] = phi i64 [ [[TMP13 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[ADD ]], [[WHILE_BODY]] ], [ 0, [[ITER_CHECK]] ]
187
+ ; CHECK-NEXT: [[BC_MERGE_RDX17 :%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IV_NEXT]], [[WHILE_BODY]] ], [ 0, [[ITER_CHECK]] ]
188
+ ; CHECK-NEXT: [[BC_MERGE_RDX15 :%.*]] = phi i32 [ [[TMP15 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP6 ]], [[WHILE_BODY]] ], [ 0, [[ITER_CHECK]] ]
189
189
; CHECK-NEXT: br label [[WHILE_BODY1:%.*]]
190
190
; CHECK: while.body:
191
191
; CHECK-NEXT: [[IV_NEG:%.*]] = phi i64 [ [[IV_NEG_NEXT:%.*]], [[WHILE_BODY1]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
192
- ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[WHILE_BODY1]] ], [ [[BC_RESUME_VAL16]], [[VEC_EPILOG_SCALAR_PH]] ]
193
192
; CHECK-NEXT: [[ACCUM1:%.*]] = phi i64 [ [[ADD1:%.*]], [[WHILE_BODY1]] ], [ [[BC_MERGE_RDX17]], [[VEC_EPILOG_SCALAR_PH]] ]
193
+ ; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY1]] ], [ [[BC_MERGE_RDX15]], [[VEC_EPILOG_SCALAR_PH]] ]
194
194
; CHECK-NEXT: [[IV_NEG_NEXT]] = add i64 [[IV_NEG]], 1
195
- ; CHECK-NEXT: [[EXT_A:%.*]] = sext i8 [[A]] to i64
196
- ; CHECK-NEXT: [[IV_NEXT1 ]] = add i64 [[IV1 ]], 1
195
+ ; CHECK-NEXT: [[EXT_A:%.*]] = sext i8 [[A]] to i32
196
+ ; CHECK-NEXT: [[ADD1 ]] = add i64 [[ACCUM1 ]], 1
197
197
; CHECK-NEXT: [[B:%.*]] = load i8, ptr null, align 1
198
- ; CHECK-NEXT: [[EXT_B:%.*]] = sext i8 [[B]] to i64
199
- ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[EXT_B]], [[EXT_A]]
200
- ; CHECK-NEXT: [[ADD1 ]] = add i64 [[MUL]], [[ACCUM1 ]]
198
+ ; CHECK-NEXT: [[EXT_B:%.*]] = sext i8 [[B]] to i32
199
+ ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
200
+ ; CHECK-NEXT: [[ADD ]] = add i32 [[MUL]], [[ACCUM ]]
201
201
; CHECK-NEXT: [[CMP_IV_NEG:%.*]] = icmp ugt i64 [[IV_NEG]], 0
202
- ; CHECK-NEXT: [[CMP_IV:%.*]] = icmp ne i64 [[IV1 ]], -1
202
+ ; CHECK-NEXT: [[CMP_IV:%.*]] = icmp ne i64 [[ACCUM1 ]], -1
203
203
; CHECK-NEXT: [[EXITCOND:%.*]] = and i1 [[CMP_IV_NEG]], [[CMP_IV]]
204
204
; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
205
205
; CHECK: while.end.loopexit:
206
- ; CHECK-NEXT: [[RESULT:%.*]] = phi i64 [ [[ADD1 ]], [[WHILE_BODY1]] ], [ [[ADD ]], [[MIDDLE_BLOCK]] ], [ [[TMP13 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
206
+ ; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[ADD ]], [[WHILE_BODY1]] ], [ [[TMP6 ]], [[MIDDLE_BLOCK]] ], [ [[TMP15 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
207
207
; CHECK-NEXT: ret void
208
208
;
209
209
entry:
@@ -212,21 +212,21 @@ entry:
212
212
while.body: ; preds = %while.body, %entry
213
213
%iv.neg = phi i64 [ %iv.neg.next , %while.body ], [ %idx.neg , %entry ]
214
214
%iv = phi i64 [ %iv.next , %while.body ], [ 0 , %entry ]
215
- %accum = phi i64 [ %add , %while.body ], [ 0 , %entry ]
215
+ %accum = phi i32 [ %add , %while.body ], [ 0 , %entry ]
216
216
%iv.neg.next = add i64 %iv.neg , 1
217
- %ext.a = sext i8 %a to i64
217
+ %ext.a = sext i8 %a to i32
218
218
%iv.next = add i64 %iv , 1
219
219
%b = load i8 , ptr null , align 1
220
- %ext.b = sext i8 %b to i64
221
- %mul = mul i64 %ext.b , %ext.a
222
- %add = add i64 %mul , %accum
220
+ %ext.b = sext i8 %b to i32
221
+ %mul = mul i32 %ext.b , %ext.a
222
+ %add = add i32 %mul , %accum
223
223
%cmp.iv.neg = icmp ugt i64 %iv.neg , 0
224
224
%cmp.iv = icmp ne i64 %iv , -1
225
225
%exitcond = and i1 %cmp.iv.neg , %cmp.iv
226
226
br i1 %exitcond , label %while.body , label %while.end.loopexit
227
227
228
228
while.end.loopexit: ; preds = %while.body
229
- %result = phi i64 [ %add , %while.body ]
229
+ %result = phi i32 [ %add , %while.body ]
230
230
ret void
231
231
}
232
232
0 commit comments