Skip to content

Commit b4a99f1

Browse files
authored
[RISCV] Lower constant build_vectors with few non-sign bits via vsext (#65648)
If we have a build_vector such as [i64 0, i64 3, i64 1, i64 2], we instead lower this as vsext([i8 0, i8 3, i8 1, i8 2]). For vectors with 4 or fewer elements, the resulting narrow vector can be generated via scalar materialization. For shuffles which get lowered to vrgathers, constant build_vectors of small constants are idiomatic. As such, this change covers all shuffles with an output type of 4 or less. I deliberately started narrow here. I think it makes sense to expand this to longer vectors, but we need a more robust profit model on the recursive expansion. It's questionable if we want to do the zsext if we're going to generate a constant pool load for the narrower type anyways. One possibility for future exploration is to allow the narrower VT to be less than 8 bits. We can't use vsext for that, but we could use something analogous to our widening interleave lowering with some extra shifts and ands.
1 parent 9208065 commit b4a99f1

12 files changed

+349
-196
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3456,6 +3456,27 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
34563456
if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
34573457
return Res;
34583458

3459+
// If the number of signbits allows, see if we can lower as a <N x i8>.
3460+
// We restrict this to N <= 4 to ensure the resulting narrow vector is
3461+
// 32 bits of smaller and can thus be materialized cheaply from scalar.
3462+
// The main motivation for this is the constant index vector required
3463+
// by vrgather.vv. This covers all indice vectors up to size 4.
3464+
// TODO: We really should be costing the smaller vector. There are
3465+
// profitable cases this misses.
3466+
const unsigned ScalarSize =
3467+
Op.getSimpleValueType().getScalarSizeInBits();
3468+
if (ScalarSize > 8 && NumElts <= 4) {
3469+
unsigned SignBits = DAG.ComputeNumSignBits(Op);
3470+
if (ScalarSize - SignBits < 8) {
3471+
SDValue Source =
3472+
DAG.getNode(ISD::TRUNCATE, DL, VT.changeVectorElementType(MVT::i8), Op);
3473+
Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3474+
Source, DAG, Subtarget);
3475+
SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3476+
return convertFromScalableVector(VT, Res, DAG, Subtarget);
3477+
}
3478+
}
3479+
34593480
// For constant vectors, use generic constant pool lowering. Otherwise,
34603481
// we'd have to materialize constants in GPRs just to move them into the
34613482
// vector.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -806,18 +806,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
806806
; RV32NOM: # %bb.0:
807807
; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
808808
; RV32NOM-NEXT: vmv.v.i v9, 0
809-
; RV32NOM-NEXT: li a0, -1
810-
; RV32NOM-NEXT: vslide1down.vx v9, v9, a0
811809
; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
812810
; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
813811
; RV32NOM-NEXT: vle32.v v10, (a0)
814-
; RV32NOM-NEXT: lui a0, %hi(.LCPI42_1)
815-
; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_1)
816-
; RV32NOM-NEXT: vle32.v v11, (a0)
812+
; RV32NOM-NEXT: li a0, -1
813+
; RV32NOM-NEXT: vslide1down.vx v9, v9, a0
817814
; RV32NOM-NEXT: vand.vv v9, v8, v9
818815
; RV32NOM-NEXT: vmulh.vv v8, v8, v10
819816
; RV32NOM-NEXT: vadd.vv v8, v8, v9
820-
; RV32NOM-NEXT: vsra.vv v9, v8, v11
817+
; RV32NOM-NEXT: lui a0, 12320
818+
; RV32NOM-NEXT: addi a0, a0, 257
819+
; RV32NOM-NEXT: vmv.s.x v9, a0
820+
; RV32NOM-NEXT: vsext.vf4 v10, v9
821+
; RV32NOM-NEXT: vsra.vv v9, v8, v10
821822
; RV32NOM-NEXT: vsrl.vi v8, v8, 31
822823
; RV32NOM-NEXT: vadd.vv v8, v9, v8
823824
; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
@@ -841,18 +842,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
841842
; RV64NOM: # %bb.0:
842843
; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
843844
; RV64NOM-NEXT: vmv.v.i v9, 0
844-
; RV64NOM-NEXT: li a0, -1
845-
; RV64NOM-NEXT: vslide1down.vx v9, v9, a0
846845
; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
847846
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
848847
; RV64NOM-NEXT: vle32.v v10, (a0)
849-
; RV64NOM-NEXT: lui a0, %hi(.LCPI42_1)
850-
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_1)
851-
; RV64NOM-NEXT: vle32.v v11, (a0)
848+
; RV64NOM-NEXT: li a0, -1
849+
; RV64NOM-NEXT: vslide1down.vx v9, v9, a0
852850
; RV64NOM-NEXT: vand.vv v9, v8, v9
853851
; RV64NOM-NEXT: vmulh.vv v8, v8, v10
854852
; RV64NOM-NEXT: vadd.vv v8, v8, v9
855-
; RV64NOM-NEXT: vsra.vv v8, v8, v11
853+
; RV64NOM-NEXT: lui a0, 12320
854+
; RV64NOM-NEXT: addiw a0, a0, 257
855+
; RV64NOM-NEXT: vmv.s.x v9, a0
856+
; RV64NOM-NEXT: vsext.vf4 v10, v9
857+
; RV64NOM-NEXT: vsra.vv v8, v8, v10
856858
; RV64NOM-NEXT: vsrl.vi v9, v8, 31
857859
; RV64NOM-NEXT: vadd.vv v8, v8, v9
858860
; RV64NOM-NEXT: vslidedown.vi v8, v8, 2

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -435,40 +435,48 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) {
435435
define <4 x double> @unary_interleave_v4f64(<4 x double> %x) {
436436
; RV32-V128-LABEL: unary_interleave_v4f64:
437437
; RV32-V128: # %bb.0:
438-
; RV32-V128-NEXT: lui a0, %hi(.LCPI13_0)
439-
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI13_0)
440-
; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
441-
; RV32-V128-NEXT: vle16.v v12, (a0)
438+
; RV32-V128-NEXT: lui a0, 12304
439+
; RV32-V128-NEXT: addi a0, a0, 512
440+
; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
441+
; RV32-V128-NEXT: vmv.s.x v10, a0
442+
; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
443+
; RV32-V128-NEXT: vsext.vf2 v12, v10
444+
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
442445
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12
443446
; RV32-V128-NEXT: vmv.v.v v8, v10
444447
; RV32-V128-NEXT: ret
445448
;
446449
; RV64-V128-LABEL: unary_interleave_v4f64:
447450
; RV64-V128: # %bb.0:
448-
; RV64-V128-NEXT: lui a0, %hi(.LCPI13_0)
449-
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI13_0)
451+
; RV64-V128-NEXT: lui a0, 12304
452+
; RV64-V128-NEXT: addiw a0, a0, 512
450453
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
451-
; RV64-V128-NEXT: vle64.v v12, (a0)
454+
; RV64-V128-NEXT: vmv.s.x v10, a0
455+
; RV64-V128-NEXT: vsext.vf8 v12, v10
452456
; RV64-V128-NEXT: vrgather.vv v10, v8, v12
453457
; RV64-V128-NEXT: vmv.v.v v8, v10
454458
; RV64-V128-NEXT: ret
455459
;
456460
; RV32-V512-LABEL: unary_interleave_v4f64:
457461
; RV32-V512: # %bb.0:
458-
; RV32-V512-NEXT: lui a0, %hi(.LCPI13_0)
459-
; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI13_0)
460-
; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
461-
; RV32-V512-NEXT: vle16.v v10, (a0)
462+
; RV32-V512-NEXT: lui a0, 12304
463+
; RV32-V512-NEXT: addi a0, a0, 512
464+
; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
465+
; RV32-V512-NEXT: vmv.s.x v9, a0
466+
; RV32-V512-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
467+
; RV32-V512-NEXT: vsext.vf2 v10, v9
468+
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, ma
462469
; RV32-V512-NEXT: vrgatherei16.vv v9, v8, v10
463470
; RV32-V512-NEXT: vmv.v.v v8, v9
464471
; RV32-V512-NEXT: ret
465472
;
466473
; RV64-V512-LABEL: unary_interleave_v4f64:
467474
; RV64-V512: # %bb.0:
468-
; RV64-V512-NEXT: lui a0, %hi(.LCPI13_0)
469-
; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI13_0)
475+
; RV64-V512-NEXT: lui a0, 12304
476+
; RV64-V512-NEXT: addiw a0, a0, 512
470477
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
471-
; RV64-V512-NEXT: vle64.v v10, (a0)
478+
; RV64-V512-NEXT: vmv.s.x v9, a0
479+
; RV64-V512-NEXT: vsext.vf8 v10, v9
472480
; RV64-V512-NEXT: vrgather.vv v9, v8, v10
473481
; RV64-V512-NEXT: vmv.v.v v8, v9
474482
; RV64-V512-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,24 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
5757
define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
5858
; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64:
5959
; RV32: # %bb.0:
60-
; RV32-NEXT: lui a0, %hi(.LCPI4_0)
61-
; RV32-NEXT: addi a0, a0, %lo(.LCPI4_0)
62-
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
63-
; RV32-NEXT: vle16.v v12, (a0)
60+
; RV32-NEXT: lui a0, 4096
61+
; RV32-NEXT: addi a0, a0, 513
62+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
63+
; RV32-NEXT: vmv.s.x v10, a0
64+
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
65+
; RV32-NEXT: vsext.vf2 v12, v10
66+
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
6467
; RV32-NEXT: vrgatherei16.vv v10, v8, v12
6568
; RV32-NEXT: vmv.v.v v8, v10
6669
; RV32-NEXT: ret
6770
;
6871
; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64:
6972
; RV64: # %bb.0:
70-
; RV64-NEXT: lui a0, %hi(.LCPI4_0)
71-
; RV64-NEXT: addi a0, a0, %lo(.LCPI4_0)
73+
; RV64-NEXT: lui a0, 4096
74+
; RV64-NEXT: addiw a0, a0, 513
7275
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
73-
; RV64-NEXT: vle64.v v12, (a0)
76+
; RV64-NEXT: vmv.s.x v10, a0
77+
; RV64-NEXT: vsext.vf8 v12, v10
7478
; RV64-NEXT: vrgather.vv v10, v8, v12
7579
; RV64-NEXT: vmv.v.v v8, v10
7680
; RV64-NEXT: ret
@@ -81,20 +85,24 @@ define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
8185
define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
8286
; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64:
8387
; RV32: # %bb.0:
84-
; RV32-NEXT: lui a0, %hi(.LCPI5_0)
85-
; RV32-NEXT: addi a0, a0, %lo(.LCPI5_0)
86-
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
87-
; RV32-NEXT: vle16.v v12, (a0)
88+
; RV32-NEXT: lui a0, 4096
89+
; RV32-NEXT: addi a0, a0, 513
90+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
91+
; RV32-NEXT: vmv.s.x v10, a0
92+
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
93+
; RV32-NEXT: vsext.vf2 v12, v10
94+
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
8895
; RV32-NEXT: vrgatherei16.vv v10, v8, v12
8996
; RV32-NEXT: vmv.v.v v8, v10
9097
; RV32-NEXT: ret
9198
;
9299
; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64:
93100
; RV64: # %bb.0:
94-
; RV64-NEXT: lui a0, %hi(.LCPI5_0)
95-
; RV64-NEXT: addi a0, a0, %lo(.LCPI5_0)
101+
; RV64-NEXT: lui a0, 4096
102+
; RV64-NEXT: addiw a0, a0, 513
96103
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
97-
; RV64-NEXT: vle64.v v12, (a0)
104+
; RV64-NEXT: vmv.s.x v10, a0
105+
; RV64-NEXT: vsext.vf8 v12, v10
98106
; RV64-NEXT: vrgather.vv v10, v8, v12
99107
; RV64-NEXT: vmv.v.v v8, v10
100108
; RV64-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -668,40 +668,48 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
668668
define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
669669
; RV32-V128-LABEL: unary_interleave_v4i64:
670670
; RV32-V128: # %bb.0:
671-
; RV32-V128-NEXT: lui a0, %hi(.LCPI22_0)
672-
; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI22_0)
673-
; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
674-
; RV32-V128-NEXT: vle16.v v12, (a0)
671+
; RV32-V128-NEXT: lui a0, 12304
672+
; RV32-V128-NEXT: addi a0, a0, 512
673+
; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
674+
; RV32-V128-NEXT: vmv.s.x v10, a0
675+
; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
676+
; RV32-V128-NEXT: vsext.vf2 v12, v10
677+
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
675678
; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12
676679
; RV32-V128-NEXT: vmv.v.v v8, v10
677680
; RV32-V128-NEXT: ret
678681
;
679682
; RV64-V128-LABEL: unary_interleave_v4i64:
680683
; RV64-V128: # %bb.0:
681-
; RV64-V128-NEXT: lui a0, %hi(.LCPI22_0)
682-
; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI22_0)
684+
; RV64-V128-NEXT: lui a0, 12304
685+
; RV64-V128-NEXT: addiw a0, a0, 512
683686
; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
684-
; RV64-V128-NEXT: vle64.v v12, (a0)
687+
; RV64-V128-NEXT: vmv.s.x v10, a0
688+
; RV64-V128-NEXT: vsext.vf8 v12, v10
685689
; RV64-V128-NEXT: vrgather.vv v10, v8, v12
686690
; RV64-V128-NEXT: vmv.v.v v8, v10
687691
; RV64-V128-NEXT: ret
688692
;
689693
; RV32-V512-LABEL: unary_interleave_v4i64:
690694
; RV32-V512: # %bb.0:
691-
; RV32-V512-NEXT: lui a0, %hi(.LCPI22_0)
692-
; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI22_0)
693-
; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
694-
; RV32-V512-NEXT: vle16.v v10, (a0)
695+
; RV32-V512-NEXT: lui a0, 12304
696+
; RV32-V512-NEXT: addi a0, a0, 512
697+
; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
698+
; RV32-V512-NEXT: vmv.s.x v9, a0
699+
; RV32-V512-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
700+
; RV32-V512-NEXT: vsext.vf2 v10, v9
701+
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, ma
695702
; RV32-V512-NEXT: vrgatherei16.vv v9, v8, v10
696703
; RV32-V512-NEXT: vmv.v.v v8, v9
697704
; RV32-V512-NEXT: ret
698705
;
699706
; RV64-V512-LABEL: unary_interleave_v4i64:
700707
; RV64-V512: # %bb.0:
701-
; RV64-V512-NEXT: lui a0, %hi(.LCPI22_0)
702-
; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI22_0)
708+
; RV64-V512-NEXT: lui a0, 12304
709+
; RV64-V512-NEXT: addiw a0, a0, 512
703710
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
704-
; RV64-V512-NEXT: vle64.v v10, (a0)
711+
; RV64-V512-NEXT: vmv.s.x v9, a0
712+
; RV64-V512-NEXT: vsext.vf8 v10, v9
705713
; RV64-V512-NEXT: vrgather.vv v9, v8, v10
706714
; RV64-V512-NEXT: vmv.v.v v8, v9
707715
; RV64-V512-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -51,29 +51,57 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
5151
}
5252

5353
define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) {
54-
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16:
55-
; CHECK: # %bb.0:
56-
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
57-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
58-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
59-
; CHECK-NEXT: vle16.v v10, (a0)
60-
; CHECK-NEXT: vrgather.vv v9, v8, v10
61-
; CHECK-NEXT: vmv1r.v v8, v9
62-
; CHECK-NEXT: ret
54+
; RV32-LABEL: vrgather_permute_shuffle_vu_v4i16:
55+
; RV32: # %bb.0:
56+
; RV32-NEXT: lui a0, 4096
57+
; RV32-NEXT: addi a0, a0, 513
58+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
59+
; RV32-NEXT: vmv.s.x v9, a0
60+
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
61+
; RV32-NEXT: vsext.vf2 v10, v9
62+
; RV32-NEXT: vrgather.vv v9, v8, v10
63+
; RV32-NEXT: vmv1r.v v8, v9
64+
; RV32-NEXT: ret
65+
;
66+
; RV64-LABEL: vrgather_permute_shuffle_vu_v4i16:
67+
; RV64: # %bb.0:
68+
; RV64-NEXT: lui a0, 4096
69+
; RV64-NEXT: addiw a0, a0, 513
70+
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
71+
; RV64-NEXT: vmv.s.x v9, a0
72+
; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
73+
; RV64-NEXT: vsext.vf2 v10, v9
74+
; RV64-NEXT: vrgather.vv v9, v8, v10
75+
; RV64-NEXT: vmv1r.v v8, v9
76+
; RV64-NEXT: ret
6377
%s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
6478
ret <4 x i16> %s
6579
}
6680

6781
define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) {
68-
; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16:
69-
; CHECK: # %bb.0:
70-
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
71-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
72-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
73-
; CHECK-NEXT: vle16.v v10, (a0)
74-
; CHECK-NEXT: vrgather.vv v9, v8, v10
75-
; CHECK-NEXT: vmv1r.v v8, v9
76-
; CHECK-NEXT: ret
82+
; RV32-LABEL: vrgather_permute_shuffle_uv_v4i16:
83+
; RV32: # %bb.0:
84+
; RV32-NEXT: lui a0, 4096
85+
; RV32-NEXT: addi a0, a0, 513
86+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
87+
; RV32-NEXT: vmv.s.x v9, a0
88+
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
89+
; RV32-NEXT: vsext.vf2 v10, v9
90+
; RV32-NEXT: vrgather.vv v9, v8, v10
91+
; RV32-NEXT: vmv1r.v v8, v9
92+
; RV32-NEXT: ret
93+
;
94+
; RV64-LABEL: vrgather_permute_shuffle_uv_v4i16:
95+
; RV64: # %bb.0:
96+
; RV64-NEXT: lui a0, 4096
97+
; RV64-NEXT: addiw a0, a0, 513
98+
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
99+
; RV64-NEXT: vmv.s.x v9, a0
100+
; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
101+
; RV64-NEXT: vsext.vf2 v10, v9
102+
; RV64-NEXT: vrgather.vv v9, v8, v10
103+
; RV64-NEXT: vmv1r.v v8, v9
104+
; RV64-NEXT: ret
77105
%s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
78106
ret <4 x i16> %s
79107
}

0 commit comments

Comments
 (0)