Skip to content

Commit 5ad500c

Browse files
committed
[RISCV] Coverage for a few missed vector idioms
1 parent 054c23d commit 5ad500c

File tree

3 files changed

+175
-20
lines changed

3 files changed

+175
-20
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll

Lines changed: 77 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,33 @@ define void @buildvec_dominant0_v8i16(ptr %x) {
428428
ret void
429429
}
430430

431+
define void @buildvec_dominant0_v8i16_with_end_element(ptr %x) {
432+
; CHECK-LABEL: buildvec_dominant0_v8i16_with_end_element:
433+
; CHECK: # %bb.0:
434+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
435+
; CHECK-NEXT: vmv.v.i v8, 8
436+
; CHECK-NEXT: li a1, 3
437+
; CHECK-NEXT: vslide1down.vx v8, v8, a1
438+
; CHECK-NEXT: vse16.v v8, (a0)
439+
; CHECK-NEXT: ret
440+
store <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 3>, ptr %x
441+
ret void
442+
}
443+
444+
define void @buildvec_dominant0_v8i16_with_tail(ptr %x) {
445+
; CHECK-LABEL: buildvec_dominant0_v8i16_with_tail:
446+
; CHECK: # %bb.0:
447+
; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
448+
; CHECK-NEXT: addi a1, a1, %lo(.LCPI35_0)
449+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
450+
; CHECK-NEXT: vle16.v v8, (a1)
451+
; CHECK-NEXT: vse16.v v8, (a0)
452+
; CHECK-NEXT: ret
453+
store <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 undef, i16 2, i16 3>, ptr %x
454+
ret void
455+
}
456+
457+
431458
define void @buildvec_dominant1_v8i16(ptr %x) {
432459
; CHECK-LABEL: buildvec_dominant1_v8i16:
433460
; CHECK: # %bb.0:
@@ -494,17 +521,17 @@ define <2 x i8> @buildvec_dominant2_v2i8() {
494521
define void @buildvec_dominant0_v2i32(ptr %x) {
495522
; RV32-LABEL: buildvec_dominant0_v2i32:
496523
; RV32: # %bb.0:
497-
; RV32-NEXT: lui a1, %hi(.LCPI38_0)
498-
; RV32-NEXT: addi a1, a1, %lo(.LCPI38_0)
524+
; RV32-NEXT: lui a1, %hi(.LCPI40_0)
525+
; RV32-NEXT: addi a1, a1, %lo(.LCPI40_0)
499526
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
500527
; RV32-NEXT: vle32.v v8, (a1)
501528
; RV32-NEXT: vse32.v v8, (a0)
502529
; RV32-NEXT: ret
503530
;
504531
; RV64V-LABEL: buildvec_dominant0_v2i32:
505532
; RV64V: # %bb.0:
506-
; RV64V-NEXT: lui a1, %hi(.LCPI38_0)
507-
; RV64V-NEXT: ld a1, %lo(.LCPI38_0)(a1)
533+
; RV64V-NEXT: lui a1, %hi(.LCPI40_0)
534+
; RV64V-NEXT: ld a1, %lo(.LCPI40_0)(a1)
508535
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
509536
; RV64V-NEXT: vmv.v.i v8, -1
510537
; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma
@@ -514,8 +541,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) {
514541
;
515542
; RV64ZVE32-LABEL: buildvec_dominant0_v2i32:
516543
; RV64ZVE32: # %bb.0:
517-
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0)
518-
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1)
544+
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI40_0)
545+
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI40_0)(a1)
519546
; RV64ZVE32-NEXT: li a2, -1
520547
; RV64ZVE32-NEXT: sd a1, 0(a0)
521548
; RV64ZVE32-NEXT: sd a2, 8(a0)
@@ -527,26 +554,26 @@ define void @buildvec_dominant0_v2i32(ptr %x) {
527554
define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
528555
; RV32-LABEL: buildvec_dominant1_optsize_v2i32:
529556
; RV32: # %bb.0:
530-
; RV32-NEXT: lui a1, %hi(.LCPI39_0)
531-
; RV32-NEXT: addi a1, a1, %lo(.LCPI39_0)
557+
; RV32-NEXT: lui a1, %hi(.LCPI41_0)
558+
; RV32-NEXT: addi a1, a1, %lo(.LCPI41_0)
532559
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
533560
; RV32-NEXT: vle32.v v8, (a1)
534561
; RV32-NEXT: vse32.v v8, (a0)
535562
; RV32-NEXT: ret
536563
;
537564
; RV64V-LABEL: buildvec_dominant1_optsize_v2i32:
538565
; RV64V: # %bb.0:
539-
; RV64V-NEXT: lui a1, %hi(.LCPI39_0)
540-
; RV64V-NEXT: addi a1, a1, %lo(.LCPI39_0)
566+
; RV64V-NEXT: lui a1, %hi(.LCPI41_0)
567+
; RV64V-NEXT: addi a1, a1, %lo(.LCPI41_0)
541568
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
542569
; RV64V-NEXT: vle64.v v8, (a1)
543570
; RV64V-NEXT: vse64.v v8, (a0)
544571
; RV64V-NEXT: ret
545572
;
546573
; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32:
547574
; RV64ZVE32: # %bb.0:
548-
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0)
549-
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1)
575+
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI41_0)
576+
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI41_0)(a1)
550577
; RV64ZVE32-NEXT: li a2, -1
551578
; RV64ZVE32-NEXT: sd a1, 0(a0)
552579
; RV64ZVE32-NEXT: sd a2, 8(a0)
@@ -604,17 +631,17 @@ define void @buildvec_seq_v8i8_v2i32(ptr %x) {
604631
define void @buildvec_seq_v16i8_v2i64(ptr %x) {
605632
; RV32-LABEL: buildvec_seq_v16i8_v2i64:
606633
; RV32: # %bb.0:
607-
; RV32-NEXT: lui a1, %hi(.LCPI42_0)
608-
; RV32-NEXT: addi a1, a1, %lo(.LCPI42_0)
634+
; RV32-NEXT: lui a1, %hi(.LCPI44_0)
635+
; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0)
609636
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
610637
; RV32-NEXT: vle8.v v8, (a1)
611638
; RV32-NEXT: vse8.v v8, (a0)
612639
; RV32-NEXT: ret
613640
;
614641
; RV64V-LABEL: buildvec_seq_v16i8_v2i64:
615642
; RV64V: # %bb.0:
616-
; RV64V-NEXT: lui a1, %hi(.LCPI42_0)
617-
; RV64V-NEXT: ld a1, %lo(.LCPI42_0)(a1)
643+
; RV64V-NEXT: lui a1, %hi(.LCPI44_0)
644+
; RV64V-NEXT: ld a1, %lo(.LCPI44_0)(a1)
618645
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
619646
; RV64V-NEXT: vmv.v.x v8, a1
620647
; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
@@ -623,8 +650,8 @@ define void @buildvec_seq_v16i8_v2i64(ptr %x) {
623650
;
624651
; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64:
625652
; RV64ZVE32: # %bb.0:
626-
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI42_0)
627-
; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI42_0)
653+
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI44_0)
654+
; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI44_0)
628655
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
629656
; RV64ZVE32-NEXT: vle8.v v8, (a1)
630657
; RV64ZVE32-NEXT: vse8.v v8, (a0)
@@ -656,8 +683,8 @@ define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
656683
;
657684
; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64:
658685
; RV64ZVE32: # %bb.0:
659-
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI43_0)
660-
; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI43_0)
686+
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI45_0)
687+
; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI45_0)
661688
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
662689
; RV64ZVE32-NEXT: vle8.v v8, (a1)
663690
; RV64ZVE32-NEXT: vse8.v v8, (a0)
@@ -3384,3 +3411,33 @@ define <1 x i32> @buildvec_v1i32_pack(i32 %e1) {
33843411
ret <1 x i32> %v1
33853412
}
33863413

3414+
define <4 x i32> @buildvec_vslide1up(i32 %e1, i32 %e2) {
3415+
; CHECK-LABEL: buildvec_vslide1up:
3416+
; CHECK: # %bb.0:
3417+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3418+
; CHECK-NEXT: vmv.v.x v8, a0
3419+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
3420+
; CHECK-NEXT: vmv.s.x v8, a1
3421+
; CHECK-NEXT: ret
3422+
%v1 = insertelement <4 x i32> poison, i32 %e2, i32 0
3423+
%v2 = insertelement <4 x i32> %v1, i32 %e1, i32 1
3424+
%v3 = insertelement <4 x i32> %v2, i32 %e1, i32 2
3425+
%v4 = insertelement <4 x i32> %v3, i32 %e1, i32 3
3426+
ret <4 x i32> %v4
3427+
}
3428+
3429+
define <4 x i1> @buildvec_i1_splat(i1 %e1) {
3430+
; CHECK-LABEL: buildvec_i1_splat:
3431+
; CHECK: # %bb.0:
3432+
; CHECK-NEXT: andi a0, a0, 1
3433+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
3434+
; CHECK-NEXT: vmv.v.x v8, a0
3435+
; CHECK-NEXT: vmsne.vi v0, v8, 0
3436+
; CHECK-NEXT: ret
3437+
%v1 = insertelement <4 x i1> poison, i1 %e1, i32 0
3438+
%v2 = insertelement <4 x i1> %v1, i1 %e1, i32 1
3439+
%v3 = insertelement <4 x i1> %v2, i1 %e1, i32 2
3440+
%v4 = insertelement <4 x i1> %v3, i1 %e1, i32 3
3441+
ret <4 x i1> %v4
3442+
}
3443+

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,3 +566,26 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
566566
; ZVE32F-NEXT: ret
567567
ret <128 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>
568568
}
569+
570+
define <4 x i1> @buildvec_mask_splat(i1 %e1) {
571+
; CHECK-LABEL: buildvec_mask_splat:
572+
; CHECK: # %bb.0:
573+
; CHECK-NEXT: andi a0, a0, 1
574+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
575+
; CHECK-NEXT: vmv.v.x v8, a0
576+
; CHECK-NEXT: vmsne.vi v0, v8, 0
577+
; CHECK-NEXT: ret
578+
;
579+
; ZVE32F-LABEL: buildvec_mask_splat:
580+
; ZVE32F: # %bb.0:
581+
; ZVE32F-NEXT: andi a0, a0, 1
582+
; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
583+
; ZVE32F-NEXT: vmv.v.x v8, a0
584+
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
585+
; ZVE32F-NEXT: ret
586+
%v1 = insertelement <4 x i1> poison, i1 %e1, i32 0
587+
%v2 = insertelement <4 x i1> %v1, i1 %e1, i32 1
588+
%v3 = insertelement <4 x i1> %v2, i1 %e1, i32 2
589+
%v4 = insertelement <4 x i1> %v3, i1 %e1, i32 3
590+
ret <4 x i1> %v4
591+
}

llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,3 +366,78 @@ entry:
366366
ret void
367367
}
368368
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
369+
370+
define i64 @op_then_reduce(<4 x i64> %v, <4 x i64> %v2) {
371+
; CHECK-LABEL: op_then_reduce:
372+
; CHECK: # %bb.0: # %entry
373+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
374+
; CHECK-NEXT: vadd.vv v8, v8, v10
375+
; CHECK-NEXT: vmv.s.x v10, zero
376+
; CHECK-NEXT: vredsum.vs v8, v8, v10
377+
; CHECK-NEXT: vmv.x.s a0, v8
378+
; CHECK-NEXT: ret
379+
entry:
380+
%rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
381+
%rdx2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v2)
382+
%res = add i64 %rdx1, %rdx2
383+
ret i64 %res
384+
}
385+
386+
387+
define i64 @two_reduce_scalar_bypass(<4 x i64> %v, <4 x i64> %v2) {
388+
; CHECK-LABEL: two_reduce_scalar_bypass:
389+
; CHECK: # %bb.0: # %entry
390+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
391+
; CHECK-NEXT: vmv.s.x v12, zero
392+
; CHECK-NEXT: vredxor.vs v8, v8, v12
393+
; CHECK-NEXT: vredsum.vs v8, v10, v8
394+
; CHECK-NEXT: vmv.x.s a0, v8
395+
; CHECK-NEXT: ret
396+
entry:
397+
%rdx1 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
398+
%rdx2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v2)
399+
%res = add i64 %rdx1, %rdx2
400+
ret i64 %res
401+
}
402+
403+
define i64 @two_reduce_scalar_bypass_zext(<4 x i64> %v, <4 x i32> %v2) {
404+
; CHECK-LABEL: two_reduce_scalar_bypass_zext:
405+
; CHECK: # %bb.0: # %entry
406+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
407+
; CHECK-NEXT: vmv.s.x v11, zero
408+
; CHECK-NEXT: vredsum.vs v10, v10, v11
409+
; CHECK-NEXT: vmv.x.s a0, v10
410+
; CHECK-NEXT: slli a0, a0, 32
411+
; CHECK-NEXT: srli a0, a0, 32
412+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
413+
; CHECK-NEXT: vmv.s.x v10, a0
414+
; CHECK-NEXT: vredsum.vs v8, v8, v10
415+
; CHECK-NEXT: vmv.x.s a0, v8
416+
; CHECK-NEXT: ret
417+
entry:
418+
%rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
419+
%rdx2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v2)
420+
%rdx2.zext = zext i32 %rdx2 to i64
421+
%res = add i64 %rdx1, %rdx2.zext
422+
ret i64 %res
423+
}
424+
425+
define i64 @two_reduce_scalar_bypass_sext(<4 x i64> %v, <4 x i32> %v2) {
426+
; CHECK-LABEL: two_reduce_scalar_bypass_sext:
427+
; CHECK: # %bb.0: # %entry
428+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
429+
; CHECK-NEXT: vmv.s.x v11, zero
430+
; CHECK-NEXT: vredsum.vs v10, v10, v11
431+
; CHECK-NEXT: vmv.x.s a0, v10
432+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
433+
; CHECK-NEXT: vmv.s.x v10, a0
434+
; CHECK-NEXT: vredsum.vs v8, v8, v10
435+
; CHECK-NEXT: vmv.x.s a0, v8
436+
; CHECK-NEXT: ret
437+
entry:
438+
%rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
439+
%rdx2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v2)
440+
%rdx2.zext = sext i32 %rdx2 to i64
441+
%res = add i64 %rdx1, %rdx2.zext
442+
ret i64 %res
443+
}

0 commit comments

Comments
 (0)