@@ -96,6 +96,16 @@ define <8 x i8> @strided_vpload_v8i8(ptr %ptr, i32 signext %stride, <8 x i1> %m,
96
96
ret <8 x i8 > %load
97
97
}
98
98
99
+ define <8 x i8 > @strided_vpload_v8i8_unit_stride (ptr %ptr , <8 x i1 > %m , i32 zeroext %evl ) {
100
+ ; CHECK-LABEL: strided_vpload_v8i8_unit_stride:
101
+ ; CHECK: # %bb.0:
102
+ ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
103
+ ; CHECK-NEXT: vle8.v v8, (a0), v0.t
104
+ ; CHECK-NEXT: ret
105
+ %load = call <8 x i8 > @llvm.experimental.vp.strided.load.v8i8.p0.i32 (ptr %ptr , i32 1 , <8 x i1 > %m , i32 %evl )
106
+ ret <8 x i8 > %load
107
+ }
108
+
99
109
declare <2 x i16 > @llvm.experimental.vp.strided.load.v2i16.p0.i32 (ptr , i32 , <2 x i1 >, i32 )
100
110
101
111
define <2 x i16 > @strided_vpload_v2i16 (ptr %ptr , i32 signext %stride , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -132,6 +142,16 @@ define <8 x i16> @strided_vpload_v8i16(ptr %ptr, i32 signext %stride, <8 x i1> %
132
142
ret <8 x i16 > %load
133
143
}
134
144
145
+ define <8 x i16 > @strided_vpload_v8i16_unit_stride (ptr %ptr , <8 x i1 > %m , i32 zeroext %evl ) {
146
+ ; CHECK-LABEL: strided_vpload_v8i16_unit_stride:
147
+ ; CHECK: # %bb.0:
148
+ ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
149
+ ; CHECK-NEXT: vle16.v v8, (a0), v0.t
150
+ ; CHECK-NEXT: ret
151
+ %load = call <8 x i16 > @llvm.experimental.vp.strided.load.v8i16.p0.i32 (ptr %ptr , i32 2 , <8 x i1 > %m , i32 %evl )
152
+ ret <8 x i16 > %load
153
+ }
154
+
135
155
define <8 x i16 > @strided_vpload_v8i16_allones_mask (ptr %ptr , i32 signext %stride , i32 zeroext %evl ) {
136
156
; CHECK-LABEL: strided_vpload_v8i16_allones_mask:
137
157
; CHECK: # %bb.0:
@@ -168,6 +188,16 @@ define <4 x i32> @strided_vpload_v4i32(ptr %ptr, i32 signext %stride, <4 x i1> %
168
188
ret <4 x i32 > %load
169
189
}
170
190
191
+ define <4 x i32 > @strided_vpload_v4i32_unit_stride (ptr %ptr , <4 x i1 > %m , i32 zeroext %evl ) {
192
+ ; CHECK-LABEL: strided_vpload_v4i32_unit_stride:
193
+ ; CHECK: # %bb.0:
194
+ ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
195
+ ; CHECK-NEXT: vle32.v v8, (a0), v0.t
196
+ ; CHECK-NEXT: ret
197
+ %load = call <4 x i32 > @llvm.experimental.vp.strided.load.v4i32.p0.i32 (ptr %ptr , i32 4 , <4 x i1 > %m , i32 %evl )
198
+ ret <4 x i32 > %load
199
+ }
200
+
171
201
declare <8 x i32 > @llvm.experimental.vp.strided.load.v8i32.p0.i32 (ptr , i32 , <8 x i1 >, i32 )
172
202
173
203
define <8 x i32 > @strided_vpload_v8i32 (ptr %ptr , i32 signext %stride , <8 x i1 > %m , i32 zeroext %evl ) {
@@ -204,6 +234,16 @@ define <2 x i64> @strided_vpload_v2i64(ptr %ptr, i32 signext %stride, <2 x i1> %
204
234
ret <2 x i64 > %load
205
235
}
206
236
237
+ define <2 x i64 > @strided_vpload_v2i64_unit_stride (ptr %ptr , <2 x i1 > %m , i32 zeroext %evl ) {
238
+ ; CHECK-LABEL: strided_vpload_v2i64_unit_stride:
239
+ ; CHECK: # %bb.0:
240
+ ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
241
+ ; CHECK-NEXT: vle64.v v8, (a0), v0.t
242
+ ; CHECK-NEXT: ret
243
+ %load = call <2 x i64 > @llvm.experimental.vp.strided.load.v2i64.p0.i32 (ptr %ptr , i32 8 , <2 x i1 > %m , i32 %evl )
244
+ ret <2 x i64 > %load
245
+ }
246
+
207
247
declare <4 x i64 > @llvm.experimental.vp.strided.load.v4i64.p0.i32 (ptr , i32 , <4 x i1 >, i32 )
208
248
209
249
define <4 x i64 > @strided_vpload_v4i64 (ptr %ptr , i32 signext %stride , <4 x i1 > %m , i32 zeroext %evl ) {
@@ -288,6 +328,16 @@ define <8 x half> @strided_vpload_v8f16(ptr %ptr, i32 signext %stride, <8 x i1>
288
328
ret <8 x half > %load
289
329
}
290
330
331
+ define <8 x half > @strided_vpload_v8f16_unit_stride (ptr %ptr , <8 x i1 > %m , i32 zeroext %evl ) {
332
+ ; CHECK-LABEL: strided_vpload_v8f16_unit_stride:
333
+ ; CHECK: # %bb.0:
334
+ ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
335
+ ; CHECK-NEXT: vle16.v v8, (a0), v0.t
336
+ ; CHECK-NEXT: ret
337
+ %load = call <8 x half > @llvm.experimental.vp.strided.load.v8f16.p0.i32 (ptr %ptr , i32 2 , <8 x i1 > %m , i32 %evl )
338
+ ret <8 x half > %load
339
+ }
340
+
291
341
declare <2 x float > @llvm.experimental.vp.strided.load.v2f32.p0.i32 (ptr , i32 , <2 x i1 >, i32 )
292
342
293
343
define <2 x float > @strided_vpload_v2f32 (ptr %ptr , i32 signext %stride , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -312,6 +362,16 @@ define <4 x float> @strided_vpload_v4f32(ptr %ptr, i32 signext %stride, <4 x i1>
312
362
ret <4 x float > %load
313
363
}
314
364
365
+ define <4 x float > @strided_vpload_v4f32_unit_stride (ptr %ptr , <4 x i1 > %m , i32 zeroext %evl ) {
366
+ ; CHECK-LABEL: strided_vpload_v4f32_unit_stride:
367
+ ; CHECK: # %bb.0:
368
+ ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
369
+ ; CHECK-NEXT: vle32.v v8, (a0), v0.t
370
+ ; CHECK-NEXT: ret
371
+ %load = call <4 x float > @llvm.experimental.vp.strided.load.v4f32.p0.i32 (ptr %ptr , i32 4 , <4 x i1 > %m , i32 %evl )
372
+ ret <4 x float > %load
373
+ }
374
+
315
375
declare <8 x float > @llvm.experimental.vp.strided.load.v8f32.p0.i32 (ptr , i32 , <8 x i1 >, i32 )
316
376
317
377
define <8 x float > @strided_vpload_v8f32 (ptr %ptr , i32 signext %stride , <8 x i1 > %m , i32 zeroext %evl ) {
@@ -348,6 +408,17 @@ define <2 x double> @strided_vpload_v2f64(ptr %ptr, i32 signext %stride, <2 x i1
348
408
ret <2 x double > %load
349
409
}
350
410
411
+ define <2 x double > @strided_vpload_v2f64_unit_stride (ptr %ptr , <2 x i1 > %m , i32 zeroext %evl ) {
412
+ ; CHECK-LABEL: strided_vpload_v2f64_unit_stride:
413
+ ; CHECK: # %bb.0:
414
+ ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
415
+ ; CHECK-NEXT: vle64.v v8, (a0), v0.t
416
+ ; CHECK-NEXT: ret
417
+ %load = call <2 x double > @llvm.experimental.vp.strided.load.v2f64.p0.i32 (ptr %ptr , i32 8 , <2 x i1 > %m , i32 %evl )
418
+ ret <2 x double > %load
419
+ }
420
+
421
+
351
422
declare <4 x double > @llvm.experimental.vp.strided.load.v4f64.p0.i32 (ptr , i32 , <4 x i1 >, i32 )
352
423
353
424
define <4 x double > @strided_vpload_v4f64 (ptr %ptr , i32 signext %stride , <4 x i1 > %m , i32 zeroext %evl ) {
@@ -416,10 +487,10 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x
416
487
; CHECK-NEXT: li a4, 16
417
488
; CHECK-NEXT: vmv1r.v v9, v0
418
489
; CHECK-NEXT: mv a3, a2
419
- ; CHECK-NEXT: bltu a2, a4, .LBB33_2
490
+ ; CHECK-NEXT: bltu a2, a4, .LBB40_2
420
491
; CHECK-NEXT: # %bb.1:
421
492
; CHECK-NEXT: li a3, 16
422
- ; CHECK-NEXT: .LBB33_2 :
493
+ ; CHECK-NEXT: .LBB40_2 :
423
494
; CHECK-NEXT: mul a4, a3, a1
424
495
; CHECK-NEXT: add a4, a0, a4
425
496
; CHECK-NEXT: addi a5, a2, -16
@@ -444,10 +515,10 @@ define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext %
444
515
; CHECK: # %bb.0:
445
516
; CHECK-NEXT: li a4, 16
446
517
; CHECK-NEXT: mv a3, a2
447
- ; CHECK-NEXT: bltu a2, a4, .LBB34_2
518
+ ; CHECK-NEXT: bltu a2, a4, .LBB41_2
448
519
; CHECK-NEXT: # %bb.1:
449
520
; CHECK-NEXT: li a3, 16
450
- ; CHECK-NEXT: .LBB34_2 :
521
+ ; CHECK-NEXT: .LBB41_2 :
451
522
; CHECK-NEXT: mul a4, a3, a1
452
523
; CHECK-NEXT: add a4, a0, a4
453
524
; CHECK-NEXT: addi a5, a2, -16
@@ -474,21 +545,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
474
545
; CHECK-RV32-NEXT: li a5, 32
475
546
; CHECK-RV32-NEXT: vmv1r.v v8, v0
476
547
; CHECK-RV32-NEXT: mv a3, a4
477
- ; CHECK-RV32-NEXT: bltu a4, a5, .LBB35_2
548
+ ; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_2
478
549
; CHECK-RV32-NEXT: # %bb.1:
479
550
; CHECK-RV32-NEXT: li a3, 32
480
- ; CHECK-RV32-NEXT: .LBB35_2 :
551
+ ; CHECK-RV32-NEXT: .LBB42_2 :
481
552
; CHECK-RV32-NEXT: mul a5, a3, a2
482
553
; CHECK-RV32-NEXT: addi a6, a4, -32
483
554
; CHECK-RV32-NEXT: sltu a4, a4, a6
484
555
; CHECK-RV32-NEXT: addi a4, a4, -1
485
556
; CHECK-RV32-NEXT: and a6, a4, a6
486
557
; CHECK-RV32-NEXT: li a4, 16
487
558
; CHECK-RV32-NEXT: add a5, a1, a5
488
- ; CHECK-RV32-NEXT: bltu a6, a4, .LBB35_4
559
+ ; CHECK-RV32-NEXT: bltu a6, a4, .LBB42_4
489
560
; CHECK-RV32-NEXT: # %bb.3:
490
561
; CHECK-RV32-NEXT: li a6, 16
491
- ; CHECK-RV32-NEXT: .LBB35_4 :
562
+ ; CHECK-RV32-NEXT: .LBB42_4 :
492
563
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
493
564
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
494
565
; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma
@@ -497,10 +568,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
497
568
; CHECK-RV32-NEXT: sltu a6, a3, a5
498
569
; CHECK-RV32-NEXT: addi a6, a6, -1
499
570
; CHECK-RV32-NEXT: and a5, a6, a5
500
- ; CHECK-RV32-NEXT: bltu a3, a4, .LBB35_6
571
+ ; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_6
501
572
; CHECK-RV32-NEXT: # %bb.5:
502
573
; CHECK-RV32-NEXT: li a3, 16
503
- ; CHECK-RV32-NEXT: .LBB35_6 :
574
+ ; CHECK-RV32-NEXT: .LBB42_6 :
504
575
; CHECK-RV32-NEXT: mul a4, a3, a2
505
576
; CHECK-RV32-NEXT: add a4, a1, a4
506
577
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -524,21 +595,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
524
595
; CHECK-RV64-NEXT: li a5, 32
525
596
; CHECK-RV64-NEXT: vmv1r.v v8, v0
526
597
; CHECK-RV64-NEXT: mv a4, a3
527
- ; CHECK-RV64-NEXT: bltu a3, a5, .LBB35_2
598
+ ; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_2
528
599
; CHECK-RV64-NEXT: # %bb.1:
529
600
; CHECK-RV64-NEXT: li a4, 32
530
- ; CHECK-RV64-NEXT: .LBB35_2 :
601
+ ; CHECK-RV64-NEXT: .LBB42_2 :
531
602
; CHECK-RV64-NEXT: mul a5, a4, a2
532
603
; CHECK-RV64-NEXT: addi a6, a3, -32
533
604
; CHECK-RV64-NEXT: sltu a3, a3, a6
534
605
; CHECK-RV64-NEXT: addi a3, a3, -1
535
606
; CHECK-RV64-NEXT: and a6, a3, a6
536
607
; CHECK-RV64-NEXT: li a3, 16
537
608
; CHECK-RV64-NEXT: add a5, a1, a5
538
- ; CHECK-RV64-NEXT: bltu a6, a3, .LBB35_4
609
+ ; CHECK-RV64-NEXT: bltu a6, a3, .LBB42_4
539
610
; CHECK-RV64-NEXT: # %bb.3:
540
611
; CHECK-RV64-NEXT: li a6, 16
541
- ; CHECK-RV64-NEXT: .LBB35_4 :
612
+ ; CHECK-RV64-NEXT: .LBB42_4 :
542
613
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
543
614
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
544
615
; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
@@ -547,10 +618,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
547
618
; CHECK-RV64-NEXT: sltu a6, a4, a5
548
619
; CHECK-RV64-NEXT: addi a6, a6, -1
549
620
; CHECK-RV64-NEXT: and a5, a6, a5
550
- ; CHECK-RV64-NEXT: bltu a4, a3, .LBB35_6
621
+ ; CHECK-RV64-NEXT: bltu a4, a3, .LBB42_6
551
622
; CHECK-RV64-NEXT: # %bb.5:
552
623
; CHECK-RV64-NEXT: li a4, 16
553
- ; CHECK-RV64-NEXT: .LBB35_6 :
624
+ ; CHECK-RV64-NEXT: .LBB42_6 :
554
625
; CHECK-RV64-NEXT: mul a3, a4, a2
555
626
; CHECK-RV64-NEXT: add a3, a1, a3
556
627
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
0 commit comments