Skip to content

Commit c1afcaf

Browse files
authored
[RISCV] Match deinterleave(4,8) shuffles to SHL/TRUNC when legal (#118509)
We can extend the existing SHL+TRUNC lowering used for deinterleave2 for deinterleave4, and deinterleave8 when the result types are small enough to allow the shift to be legal. On RV64, this means i8 and i16 results for deinterleave4 and i8 results for deinterleave8.
1 parent a93b77c commit c1afcaf

7 files changed

+131
-151
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 44 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -4446,34 +4446,9 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
44464446
VL);
44474447
}
44484448

4449-
// Is this a shuffle extracts either the even or odd elements of a vector?
4450-
// That is, specifically, either (a) or (b) in the options below.
4451-
// Single operand shuffle is easy:
4452-
// a) t35: v8i8 = vector_shuffle<0,2,4,6,u,u,u,u> t34, undef
4453-
// b) t35: v8i8 = vector_shuffle<1,3,5,7,u,u,u,u> t34, undef
4454-
// Double operand shuffle:
4455-
// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4456-
// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4457-
// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4458-
// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4459-
static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4460-
SDValue V2, ArrayRef<int> Mask,
4461-
const RISCVSubtarget &Subtarget) {
4462-
// Need to be able to widen the vector.
4463-
if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4464-
return SDValue();
4465-
4466-
// First index must be the first even or odd element from V1.
4467-
if (Mask[0] != 0 && Mask[0] != 1)
4468-
return SDValue();
4469-
4470-
// The others must increase by 2 each time.
4471-
for (unsigned i = 1; i != Mask.size(); ++i)
4472-
if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2)
4473-
return SDValue();
4474-
4475-
if (1 == count_if(Mask, [](int Idx) { return Idx != -1; }))
4476-
return SDValue();
4449+
// Can this shuffle be performed on exactly one (possibly larger) input?
4450+
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4451+
SDValue V2) {
44774452

44784453
if (V2.isUndef() &&
44794454
RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8)
@@ -4490,12 +4465,13 @@ static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
44904465
return SDValue();
44914466

44924467
// Src needs to have twice the number of elements.
4493-
if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4468+
unsigned NumElts = VT.getVectorNumElements();
4469+
if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
44944470
return SDValue();
44954471

44964472
// The extracts must extract the two halves of the source.
44974473
if (V1.getConstantOperandVal(1) != 0 ||
4498-
V2.getConstantOperandVal(1) != Mask.size())
4474+
V2.getConstantOperandVal(1) != NumElts)
44994475
return SDValue();
45004476

45014477
return Src;
@@ -4612,36 +4588,29 @@ static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
46124588
return Rotation;
46134589
}
46144590

4615-
// Lower a deinterleave shuffle to vnsrl.
4616-
// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4617-
// -> [p, q, r, s] (EvenElts == false)
4618-
// VT is the type of the vector to return, <[vscale x ]n x ty>
4619-
// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4620-
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
4621-
bool EvenElts, SelectionDAG &DAG) {
4622-
// The result is a vector of type <m x n x ty>. The source is a vector of
4623-
// type <m x n*2 x ty> (For the single source case, the high half is undef)
4624-
if (Src.getValueType() == VT) {
4625-
EVT WideVT = VT.getDoubleNumVectorElementsVT();
4626-
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, DAG.getUNDEF(WideVT),
4627-
Src, DAG.getVectorIdxConstant(0, DL));
4628-
}
4629-
4630-
// Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4631-
// This also converts FP to int.
4591+
// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4592+
// 2, 4, 8 and the integer type Factor-times larger than VT's
4593+
// element type must be a legal element type.
4594+
// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4595+
// -> [p, q, r, s] (Factor=2, Index=1)
4596+
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT,
4597+
SDValue Src, unsigned Factor,
4598+
unsigned Index, SelectionDAG &DAG) {
46324599
unsigned EltBits = VT.getScalarSizeInBits();
4633-
MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * 2),
4634-
VT.getVectorElementCount());
4600+
ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4601+
MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4602+
SrcEC.divideCoefficientBy(Factor));
4603+
MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4604+
SrcEC.divideCoefficientBy(Factor));
46354605
Src = DAG.getBitcast(WideSrcVT, Src);
46364606

4637-
MVT IntVT = VT.changeVectorElementTypeToInteger();
4638-
4639-
// If we want even elements, then the shift amount is 0. Otherwise, shift by
4640-
// the original element size.
4641-
unsigned Shift = EvenElts ? 0 : EltBits;
4607+
unsigned Shift = Index * EltBits;
46424608
SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
46434609
DAG.getConstant(Shift, DL, WideSrcVT));
4644-
Res = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Res);
4610+
Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4611+
MVT IntVT = VT.changeVectorElementTypeToInteger();
4612+
Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4613+
DAG.getVectorIdxConstant(0, DL));
46454614
return DAG.getBitcast(VT, Res);
46464615
}
46474616

@@ -5332,11 +5301,24 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
53325301
if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
53335302
return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
53345303

5335-
// If this is a deinterleave and we can widen the vector, then we can use
5336-
// vnsrl to deinterleave.
5337-
if (SDValue Src =
5338-
isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget))
5339-
return getDeinterleaveViaVNSRL(DL, VT, Src, Mask[0] == 0, DAG);
5304+
// If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5305+
// use shift and truncate to perform the shuffle.
5306+
// TODO: For Factor=6, we can perform the first step of the deinterleave via
5307+
// shift-and-trunc reducing total cost for everything except an mf8 result.
5308+
// TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5309+
// to do the entire operation.
5310+
if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5311+
const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5312+
assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5313+
for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5314+
unsigned Index = 0;
5315+
if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5316+
1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5317+
if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5318+
return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5319+
}
5320+
}
5321+
}
53405322

53415323
if (SDValue V =
53425324
lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
@@ -10739,8 +10721,8 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
1073910721
// We can deinterleave through vnsrl.wi if the element type is smaller than
1074010722
// ELEN
1074110723
if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10742-
SDValue Even = getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, DAG);
10743-
SDValue Odd = getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, DAG);
10724+
SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
10725+
SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
1074410726
return DAG.getMergeValues({Even, Odd}, DL);
1074510727
}
1074610728

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,20 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
2424
; CHECK-NEXT: vadd.vi v12, v11, -16
2525
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2626
; CHECK-NEXT: vslidedown.vi v0, v8, 2
27-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
27+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2828
; CHECK-NEXT: vadd.vi v11, v11, -15
2929
; CHECK-NEXT: vmerge.vim v13, v10, 1, v0
3030
; CHECK-NEXT: vmv1r.v v0, v8
31-
; CHECK-NEXT: vmerge.vim v14, v10, 1, v0
32-
; CHECK-NEXT: vnsrl.wi v8, v14, 0
31+
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
32+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
33+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
34+
; CHECK-NEXT: vnsrl.wi v8, v8, 8
3335
; CHECK-NEXT: vmv1r.v v0, v9
34-
; CHECK-NEXT: vrgather.vv v8, v13, v12, v0.t
35-
; CHECK-NEXT: vnsrl.wi v12, v14, 8
36-
; CHECK-NEXT: vmsne.vi v10, v8, 0
37-
; CHECK-NEXT: vrgather.vv v12, v13, v11, v0.t
38-
; CHECK-NEXT: vmsne.vi v8, v12, 0
39-
; CHECK-NEXT: vmv.v.v v0, v10
36+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
37+
; CHECK-NEXT: vrgather.vv v10, v13, v12, v0.t
38+
; CHECK-NEXT: vrgather.vv v8, v13, v11, v0.t
39+
; CHECK-NEXT: vmsne.vi v0, v10, 0
40+
; CHECK-NEXT: vmsne.vi v8, v8, 0
4041
; CHECK-NEXT: ret
4142
%vec = load <32 x i1>, ptr %p
4243
%retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -721,24 +721,12 @@ define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) {
721721
define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) {
722722
; CHECK-LABEL: shuffle_v64i8_v8i8:
723723
; CHECK: # %bb.0:
724-
; CHECK-NEXT: lui a0, 4112
725-
; CHECK-NEXT: li a1, 240
726-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
727-
; CHECK-NEXT: vmv.s.x v0, a1
728-
; CHECK-NEXT: li a1, 32
729-
; CHECK-NEXT: addi a0, a0, 257
730-
; CHECK-NEXT: vmv.s.x v14, a0
731-
; CHECK-NEXT: lui a0, 98561
732-
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
733-
; CHECK-NEXT: vcompress.vm v12, v8, v14
734-
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
735-
; CHECK-NEXT: vslidedown.vx v8, v8, a1
736-
; CHECK-NEXT: addi a0, a0, -2048
737724
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
738-
; CHECK-NEXT: vmv.v.x v10, a0
739-
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
740-
; CHECK-NEXT: vrgather.vv v12, v8, v10, v0.t
741-
; CHECK-NEXT: vmv1r.v v8, v12
725+
; CHECK-NEXT: vnsrl.wi v12, v8, 0
726+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
727+
; CHECK-NEXT: vnsrl.wi v8, v12, 0
728+
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
729+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
742730
; CHECK-NEXT: ret
743731
%s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56>
744732
ret <8 x i8> %s

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) {
104104
; RV32-NEXT: vmv.v.i v0, 10
105105
; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
106106
; RV32-NEXT: vslideup.vi v14, v12, 1
107-
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
107+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
108108
; RV32-NEXT: vnsrl.wx v12, v8, a0
109109
; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma
110110
; RV32-NEXT: vslidedown.vi v8, v8, 8
@@ -116,9 +116,8 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) {
116116
; RV64-LABEL: v4i32_v16i32:
117117
; RV64: # %bb.0:
118118
; RV64-NEXT: li a0, 32
119-
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
119+
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
120120
; RV64-NEXT: vmv.v.i v0, 10
121-
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
122121
; RV64-NEXT: vnsrl.wx v12, v8, a0
123122
; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma
124123
; RV64-NEXT: vslidedown.vi v8, v8, 8

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll

Lines changed: 23 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -67,22 +67,12 @@ define void @deinterleave4_0_i8(ptr %in, ptr %out) {
6767
; CHECK: # %bb.0: # %entry
6868
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6969
; CHECK-NEXT: vle8.v v8, (a0)
70-
; CHECK-NEXT: li a0, -1
71-
; CHECK-NEXT: vmv.v.i v0, 12
72-
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
73-
; CHECK-NEXT: vslidedown.vi v9, v8, 4
74-
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
75-
; CHECK-NEXT: vwaddu.vv v10, v8, v9
76-
; CHECK-NEXT: vwmaccu.vx v10, a0, v9
70+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
71+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
72+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
73+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
7774
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
78-
; CHECK-NEXT: vid.v v9
79-
; CHECK-NEXT: vsll.vi v9, v9, 2
80-
; CHECK-NEXT: vadd.vi v9, v9, -8
81-
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
82-
; CHECK-NEXT: vslidedown.vi v8, v8, 8
83-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
84-
; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t
85-
; CHECK-NEXT: vse8.v v10, (a1)
75+
; CHECK-NEXT: vse8.v v8, (a1)
8676
; CHECK-NEXT: ret
8777
entry:
8878
%0 = load <16 x i8>, ptr %in, align 1
@@ -96,20 +86,11 @@ define void @deinterleave4_8_i8(ptr %in, ptr %out) {
9686
; CHECK: # %bb.0: # %entry
9787
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
9888
; CHECK-NEXT: vle8.v v8, (a0)
99-
; CHECK-NEXT: li a0, -1
100-
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
101-
; CHECK-NEXT: vslidedown.vi v9, v8, 8
102-
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
103-
; CHECK-NEXT: vslidedown.vi v10, v9, 4
104-
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
105-
; CHECK-NEXT: vwaddu.vv v11, v9, v10
106-
; CHECK-NEXT: vwmaccu.vx v11, a0, v10
107-
; CHECK-NEXT: li a0, 34
108-
; CHECK-NEXT: vmv.v.i v0, 12
109-
; CHECK-NEXT: vmv.s.x v9, a0
89+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
90+
; CHECK-NEXT: vnsrl.wi v8, v8, 8
91+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
92+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
11093
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
111-
; CHECK-NEXT: vcompress.vm v10, v8, v9
112-
; CHECK-NEXT: vmerge.vvm v8, v10, v11, v0
11394
; CHECK-NEXT: vse8.v v8, (a1)
11495
; CHECK-NEXT: ret
11596
entry:
@@ -268,10 +249,12 @@ define void @deinterleave8_0_i8(ptr %in, ptr %out) {
268249
; CHECK: # %bb.0: # %entry
269250
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
270251
; CHECK-NEXT: vle8.v v8, (a0)
271-
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
272-
; CHECK-NEXT: vslidedown.vi v9, v8, 8
273-
; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
274-
; CHECK-NEXT: vslideup.vi v8, v9, 1
252+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
253+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
254+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
255+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
256+
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
257+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
275258
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
276259
; CHECK-NEXT: vse8.v v8, (a1)
277260
; CHECK-NEXT: ret
@@ -287,12 +270,14 @@ define void @deinterleave8_8_i8(ptr %in, ptr %out) {
287270
; CHECK: # %bb.0: # %entry
288271
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
289272
; CHECK-NEXT: vle8.v v8, (a0)
290-
; CHECK-NEXT: vmv.v.i v0, -3
291-
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
292-
; CHECK-NEXT: vslidedown.vi v9, v8, 8
293-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
294-
; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t
295-
; CHECK-NEXT: vse8.v v9, (a1)
273+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
274+
; CHECK-NEXT: vnsrl.wi v8, v8, 8
275+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
276+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
277+
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
278+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
279+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
280+
; CHECK-NEXT: vse8.v v8, (a1)
296281
; CHECK-NEXT: ret
297282
entry:
298283
%0 = load <16 x i8>, ptr %in, align 1

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -441,13 +441,25 @@ entry:
441441
}
442442

443443
define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) {
444-
; CHECK-LABEL: vnsrl_0_i8_single_src:
445-
; CHECK: # %bb.0: # %entry
446-
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
447-
; CHECK-NEXT: vle8.v v8, (a0)
448-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
449-
; CHECK-NEXT: vse8.v v8, (a1)
450-
; CHECK-NEXT: ret
444+
; V-LABEL: vnsrl_0_i8_single_src:
445+
; V: # %bb.0: # %entry
446+
; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
447+
; V-NEXT: vle8.v v8, (a0)
448+
; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
449+
; V-NEXT: vnsrl.wi v8, v8, 0
450+
; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
451+
; V-NEXT: vse8.v v8, (a1)
452+
; V-NEXT: ret
453+
;
454+
; ZVE32F-LABEL: vnsrl_0_i8_single_src:
455+
; ZVE32F: # %bb.0: # %entry
456+
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
457+
; ZVE32F-NEXT: vle8.v v8, (a0)
458+
; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
459+
; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
460+
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
461+
; ZVE32F-NEXT: vse8.v v8, (a1)
462+
; ZVE32F-NEXT: ret
451463
entry:
452464
%0 = load <8 x i8>, ptr %in, align 1
453465
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -456,13 +468,25 @@ entry:
456468
}
457469

458470
define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) {
459-
; CHECK-LABEL: vnsrl_0_i8_single_src2:
460-
; CHECK: # %bb.0: # %entry
461-
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
462-
; CHECK-NEXT: vle8.v v8, (a0)
463-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
464-
; CHECK-NEXT: vse8.v v8, (a1)
465-
; CHECK-NEXT: ret
471+
; V-LABEL: vnsrl_0_i8_single_src2:
472+
; V: # %bb.0: # %entry
473+
; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
474+
; V-NEXT: vle8.v v8, (a0)
475+
; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
476+
; V-NEXT: vnsrl.wi v8, v8, 0
477+
; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
478+
; V-NEXT: vse8.v v8, (a1)
479+
; V-NEXT: ret
480+
;
481+
; ZVE32F-LABEL: vnsrl_0_i8_single_src2:
482+
; ZVE32F: # %bb.0: # %entry
483+
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
484+
; ZVE32F-NEXT: vle8.v v8, (a0)
485+
; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
486+
; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
487+
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
488+
; ZVE32F-NEXT: vse8.v v8, (a1)
489+
; ZVE32F-NEXT: ret
466490
entry:
467491
%0 = load <8 x i8>, ptr %in, align 1
468492
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>

0 commit comments

Comments
 (0)