Skip to content

Commit c59d3ac

Browse files
committed
[RISCV] Allow non-power-of-2 vectors for VLS code generation
SLP supports non-power-of-2 vectors [1], so we should consider supporting this for RISC-V vector code generation. It is natural to support non-power-of-2 VLS vectors for the vector extension, as VL does not impose any constraints on this. In theory, we could support any length, but we want to prevent the number of MVTs from growing too quickly. Therefore, we only add v3, v5, v7 and v15. [1] #77790
1 parent 08e9653 commit c59d3ac

30 files changed

+696
-1232
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2183,8 +2183,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
21832183
[[maybe_unused]] bool ExactlyVecRegSized =
21842184
Subtarget->expandVScale(SubVecVT.getSizeInBits())
21852185
.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2186-
assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2187-
.getKnownMinValue()));
21882186
assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
21892187
}
21902188
MVT ContainerVT = VT;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2644,9 +2644,14 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
26442644
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
26452645
return false;
26462646

2647-
// TODO: Perhaps an artificial restriction, but worth having whilst getting
2648-
// the base fixed length RVV support in place.
2649-
if (!VT.isPow2VectorType())
2647+
// Only support non-power-of-2 fixed length vector types with lengths 3, 5, 7,
2648+
// or 15.
2649+
// In theory, we could support any length, but we want to prevent the
2650+
// number of MVTs from growing too quickly. Therefore, we only add these
2651+
// specific types.
2652+
unsigned NumElems = VT.getVectorNumElements();
2653+
if (!VT.isPow2VectorType() && NumElems != 3 && NumElems != 5 &&
2654+
NumElems != 7 && NumElems != 15)
26502655
return false;
26512656

26522657
return true;
@@ -2683,10 +2688,14 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
26832688
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
26842689
// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
26852690
// each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2691+
unsigned NumVLSElts = VT.getVectorNumElements();
2692+
if (!isPowerOf2_32(NumVLSElts))
2693+
NumVLSElts = llvm::NextPowerOf2 (NumVLSElts);
2694+
26862695
unsigned NumElts =
2687-
(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2696+
(NumVLSElts * RISCV::RVVBitsPerBlock) / MinVLen;
26882697
NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2689-
assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2698+
26902699
return MVT::getScalableVectorVT(EltVT, NumElts);
26912700
}
26922701
}
@@ -3628,6 +3637,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
36283637
// XLenVT if we're producing a v8i1. This results in more consistent
36293638
// codegen across RV32 and RV64.
36303639
unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3640+
if (!isPowerOf2_32(NumViaIntegerBits))
3641+
NumViaIntegerBits = llvm::NextPowerOf2 (NumViaIntegerBits);
36313642
NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
36323643
// If we have to use more than one INSERT_VECTOR_ELT then this
36333644
// optimization is likely to increase code size; avoid peforming it in
@@ -3671,10 +3682,16 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
36713682
// If we're producing a smaller vector than our minimum legal integer
36723683
// type, bitcast to the equivalent (known-legal) mask type, and extract
36733684
// our final mask.
3674-
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3675-
Vec = DAG.getBitcast(MVT::v8i1, Vec);
3676-
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3677-
DAG.getConstant(0, DL, XLenVT));
3685+
if (IntegerViaVecVT == MVT::v1i8){
3686+
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3687+
Vec = DAG.getBitcast(MVT::v8i1, Vec);
3688+
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3689+
DAG.getConstant(0, DL, XLenVT));
3690+
} else if (IntegerViaVecVT == MVT::v1i16) {
3691+
Vec = DAG.getBitcast(MVT::v16i1, Vec);
3692+
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3693+
DAG.getConstant(0, DL, XLenVT));
3694+
}
36783695
} else {
36793696
// Else we must have produced an integer type with the same size as the
36803697
// mask type; bitcast for the final result.
@@ -4827,6 +4844,10 @@ static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,
48274844

48284845
EVT VT = SVN->getValueType(0);
48294846
unsigned NumElts = VT.getVectorNumElements();
4847+
// We don't handle non-power-of-2 here.
4848+
if (!isPowerOf2_32(NumElts))
4849+
return false;
4850+
48304851
unsigned EltSizeInBits = VT.getScalarSizeInBits();
48314852
unsigned NumSubElts;
48324853
if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll

Lines changed: 89 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,28 +1649,16 @@ declare <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64>, <15 x i1>, i32)
16491649
define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
16501650
; RV32-LABEL: vp_bitreverse_v15i64:
16511651
; RV32: # %bb.0:
1652-
; RV32-NEXT: addi sp, sp, -48
1653-
; RV32-NEXT: .cfi_def_cfa_offset 48
1652+
; RV32-NEXT: addi sp, sp, -16
1653+
; RV32-NEXT: .cfi_def_cfa_offset 16
16541654
; RV32-NEXT: csrr a1, vlenb
16551655
; RV32-NEXT: li a2, 24
16561656
; RV32-NEXT: mul a1, a1, a2
16571657
; RV32-NEXT: sub sp, sp, a1
1658-
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
1659-
; RV32-NEXT: sw zero, 20(sp)
1658+
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1659+
; RV32-NEXT: sw zero, 12(sp)
16601660
; RV32-NEXT: lui a1, 1044480
1661-
; RV32-NEXT: sw a1, 16(sp)
1662-
; RV32-NEXT: lui a1, 61681
1663-
; RV32-NEXT: addi a1, a1, -241
1664-
; RV32-NEXT: sw a1, 44(sp)
1665-
; RV32-NEXT: sw a1, 40(sp)
1666-
; RV32-NEXT: lui a1, 209715
1667-
; RV32-NEXT: addi a1, a1, 819
1668-
; RV32-NEXT: sw a1, 36(sp)
1669-
; RV32-NEXT: sw a1, 32(sp)
1670-
; RV32-NEXT: lui a1, 349525
1671-
; RV32-NEXT: addi a1, a1, 1365
1672-
; RV32-NEXT: sw a1, 28(sp)
1673-
; RV32-NEXT: sw a1, 24(sp)
1661+
; RV32-NEXT: sw a1, 8(sp)
16741662
; RV32-NEXT: li a1, 56
16751663
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
16761664
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -1683,21 +1671,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
16831671
; RV32-NEXT: csrr a4, vlenb
16841672
; RV32-NEXT: slli a4, a4, 4
16851673
; RV32-NEXT: add a4, sp, a4
1686-
; RV32-NEXT: addi a4, a4, 48
1674+
; RV32-NEXT: addi a4, a4, 16
16871675
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1688-
; RV32-NEXT: addi a4, sp, 16
1689-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1676+
; RV32-NEXT: addi a4, sp, 8
1677+
; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma
16901678
; RV32-NEXT: vlse64.v v16, (a4), zero
16911679
; RV32-NEXT: csrr a4, vlenb
16921680
; RV32-NEXT: slli a4, a4, 3
16931681
; RV32-NEXT: add a4, sp, a4
1694-
; RV32-NEXT: addi a4, a4, 48
1682+
; RV32-NEXT: addi a4, a4, 16
16951683
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
16961684
; RV32-NEXT: lui a4, 4080
16971685
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
16981686
; RV32-NEXT: vand.vx v24, v8, a4, v0.t
16991687
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
1700-
; RV32-NEXT: addi a5, sp, 48
1688+
; RV32-NEXT: addi a5, sp, 16
17011689
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
17021690
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
17031691
; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
@@ -1706,62 +1694,65 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
17061694
; RV32-NEXT: csrr a5, vlenb
17071695
; RV32-NEXT: slli a5, a5, 4
17081696
; RV32-NEXT: add a5, sp, a5
1709-
; RV32-NEXT: addi a5, a5, 48
1697+
; RV32-NEXT: addi a5, a5, 16
17101698
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
17111699
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
17121700
; RV32-NEXT: csrr a5, vlenb
17131701
; RV32-NEXT: slli a5, a5, 4
17141702
; RV32-NEXT: add a5, sp, a5
1715-
; RV32-NEXT: addi a5, a5, 48
1703+
; RV32-NEXT: addi a5, a5, 16
17161704
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
17171705
; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
17181706
; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
17191707
; RV32-NEXT: vand.vx v24, v24, a2, v0.t
17201708
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1721-
; RV32-NEXT: addi a1, sp, 48
1709+
; RV32-NEXT: addi a1, sp, 16
17221710
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
17231711
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
17241712
; RV32-NEXT: vand.vx v24, v24, a4, v0.t
17251713
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
17261714
; RV32-NEXT: csrr a1, vlenb
17271715
; RV32-NEXT: slli a1, a1, 3
17281716
; RV32-NEXT: add a1, sp, a1
1729-
; RV32-NEXT: addi a1, a1, 48
1717+
; RV32-NEXT: addi a1, a1, 16
17301718
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
17311719
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
17321720
; RV32-NEXT: vor.vv v8, v8, v24, v0.t
1733-
; RV32-NEXT: addi a1, sp, 40
1734-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1735-
; RV32-NEXT: vlse64.v v24, (a1), zero
1736-
; RV32-NEXT: addi a1, sp, 48
1721+
; RV32-NEXT: addi a1, sp, 16
17371722
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1738-
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
17391723
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
17401724
; RV32-NEXT: csrr a1, vlenb
17411725
; RV32-NEXT: slli a1, a1, 4
17421726
; RV32-NEXT: add a1, sp, a1
1743-
; RV32-NEXT: addi a1, a1, 48
1727+
; RV32-NEXT: addi a1, a1, 16
17441728
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1745-
; RV32-NEXT: vor.vv v16, v16, v8, v0.t
1746-
; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t
1747-
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1748-
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1749-
; RV32-NEXT: addi a1, sp, 32
1750-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1751-
; RV32-NEXT: vlse64.v v24, (a1), zero
1729+
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1730+
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1731+
; RV32-NEXT: lui a1, 61681
1732+
; RV32-NEXT: addi a1, a1, -241
1733+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1734+
; RV32-NEXT: vmv.v.x v24, a1
17521735
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1753-
; RV32-NEXT: vsll.vi v16, v16, 4, v0.t
1754-
; RV32-NEXT: vor.vv v16, v8, v16, v0.t
1755-
; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t
1756-
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
17571736
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1758-
; RV32-NEXT: addi a1, sp, 24
1759-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1760-
; RV32-NEXT: vlse64.v v24, (a1), zero
1737+
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1738+
; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
1739+
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1740+
; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
1741+
; RV32-NEXT: lui a1, 209715
1742+
; RV32-NEXT: addi a1, a1, 819
1743+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1744+
; RV32-NEXT: vmv.v.x v24, a1
17611745
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1762-
; RV32-NEXT: vsll.vi v16, v16, 2, v0.t
1763-
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1746+
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1747+
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1748+
; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
1749+
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
17641750
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1751+
; RV32-NEXT: lui a1, 349525
1752+
; RV32-NEXT: addi a1, a1, 1365
1753+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1754+
; RV32-NEXT: vmv.v.x v24, a1
1755+
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
17651756
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
17661757
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
17671758
; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
@@ -1770,7 +1761,7 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
17701761
; RV32-NEXT: li a1, 24
17711762
; RV32-NEXT: mul a0, a0, a1
17721763
; RV32-NEXT: add sp, sp, a0
1773-
; RV32-NEXT: addi sp, sp, 48
1764+
; RV32-NEXT: addi sp, sp, 16
17741765
; RV32-NEXT: ret
17751766
;
17761767
; RV64-LABEL: vp_bitreverse_v15i64:
@@ -1856,27 +1847,15 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
18561847
define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
18571848
; RV32-LABEL: vp_bitreverse_v15i64_unmasked:
18581849
; RV32: # %bb.0:
1859-
; RV32-NEXT: addi sp, sp, -48
1860-
; RV32-NEXT: .cfi_def_cfa_offset 48
1850+
; RV32-NEXT: addi sp, sp, -16
1851+
; RV32-NEXT: .cfi_def_cfa_offset 16
18611852
; RV32-NEXT: csrr a1, vlenb
18621853
; RV32-NEXT: slli a1, a1, 3
18631854
; RV32-NEXT: sub sp, sp, a1
1864-
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
1865-
; RV32-NEXT: sw zero, 20(sp)
1855+
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1856+
; RV32-NEXT: sw zero, 12(sp)
18661857
; RV32-NEXT: lui a1, 1044480
1867-
; RV32-NEXT: sw a1, 16(sp)
1868-
; RV32-NEXT: lui a1, 61681
1869-
; RV32-NEXT: addi a1, a1, -241
1870-
; RV32-NEXT: sw a1, 44(sp)
1871-
; RV32-NEXT: sw a1, 40(sp)
1872-
; RV32-NEXT: lui a1, 209715
1873-
; RV32-NEXT: addi a1, a1, 819
1874-
; RV32-NEXT: sw a1, 36(sp)
1875-
; RV32-NEXT: sw a1, 32(sp)
1876-
; RV32-NEXT: lui a1, 349525
1877-
; RV32-NEXT: addi a1, a1, 1365
1878-
; RV32-NEXT: sw a1, 28(sp)
1879-
; RV32-NEXT: sw a1, 24(sp)
1858+
; RV32-NEXT: sw a1, 8(sp)
18801859
; RV32-NEXT: li a1, 56
18811860
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
18821861
; RV32-NEXT: vsll.vx v16, v8, a1
@@ -1886,66 +1865,69 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev
18861865
; RV32-NEXT: li a3, 40
18871866
; RV32-NEXT: vsll.vx v24, v24, a3
18881867
; RV32-NEXT: vor.vv v16, v16, v24
1889-
; RV32-NEXT: addi a4, sp, 48
1890-
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
18911868
; RV32-NEXT: addi a4, sp, 16
1892-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1893-
; RV32-NEXT: vlse64.v v24, (a4), zero
1869+
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1870+
; RV32-NEXT: addi a4, sp, 8
1871+
; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma
1872+
; RV32-NEXT: vlse64.v v16, (a4), zero
18941873
; RV32-NEXT: lui a4, 4080
18951874
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
18961875
; RV32-NEXT: vand.vx v0, v8, a4
18971876
; RV32-NEXT: vsll.vi v0, v0, 24
1898-
; RV32-NEXT: vand.vv v16, v8, v24
1899-
; RV32-NEXT: vsll.vi v16, v16, 8
1900-
; RV32-NEXT: vor.vv v16, v0, v16
1901-
; RV32-NEXT: addi a5, sp, 48
1877+
; RV32-NEXT: vand.vv v24, v8, v16
1878+
; RV32-NEXT: vsll.vi v24, v24, 8
1879+
; RV32-NEXT: vor.vv v24, v0, v24
1880+
; RV32-NEXT: addi a5, sp, 16
19021881
; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload
1903-
; RV32-NEXT: vor.vv v16, v0, v16
1904-
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
1882+
; RV32-NEXT: vor.vv v24, v0, v24
1883+
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
19051884
; RV32-NEXT: vsrl.vx v0, v8, a3
19061885
; RV32-NEXT: vand.vx v0, v0, a2
1907-
; RV32-NEXT: vsrl.vx v16, v8, a1
1908-
; RV32-NEXT: vor.vv v0, v0, v16
1909-
; RV32-NEXT: vsrl.vi v16, v8, 8
1910-
; RV32-NEXT: vand.vv v16, v16, v24
1886+
; RV32-NEXT: vsrl.vx v24, v8, a1
1887+
; RV32-NEXT: vor.vv v24, v0, v24
1888+
; RV32-NEXT: vsrl.vi v0, v8, 8
1889+
; RV32-NEXT: vand.vv v16, v0, v16
19111890
; RV32-NEXT: vsrl.vi v8, v8, 24
19121891
; RV32-NEXT: vand.vx v8, v8, a4
19131892
; RV32-NEXT: vor.vv v8, v16, v8
1914-
; RV32-NEXT: addi a1, sp, 40
1915-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1916-
; RV32-NEXT: vlse64.v v16, (a1), zero
1917-
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1918-
; RV32-NEXT: vor.vv v8, v8, v0
1919-
; RV32-NEXT: addi a1, sp, 48
1920-
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1921-
; RV32-NEXT: vor.vv v8, v24, v8
1922-
; RV32-NEXT: vsrl.vi v24, v8, 4
1923-
; RV32-NEXT: vand.vv v24, v24, v16
1924-
; RV32-NEXT: vand.vv v8, v8, v16
1925-
; RV32-NEXT: addi a1, sp, 32
1926-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1927-
; RV32-NEXT: vlse64.v v16, (a1), zero
1893+
; RV32-NEXT: vor.vv v8, v8, v24
1894+
; RV32-NEXT: addi a1, sp, 16
1895+
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1896+
; RV32-NEXT: vor.vv v8, v16, v8
1897+
; RV32-NEXT: vsrl.vi v16, v8, 4
1898+
; RV32-NEXT: lui a1, 61681
1899+
; RV32-NEXT: addi a1, a1, -241
1900+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1901+
; RV32-NEXT: vmv.v.x v24, a1
19281902
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1903+
; RV32-NEXT: vand.vv v16, v16, v24
1904+
; RV32-NEXT: vand.vv v8, v8, v24
19291905
; RV32-NEXT: vsll.vi v8, v8, 4
1930-
; RV32-NEXT: vor.vv v8, v24, v8
1931-
; RV32-NEXT: vsrl.vi v24, v8, 2
1932-
; RV32-NEXT: vand.vv v24, v24, v16
1933-
; RV32-NEXT: vand.vv v8, v8, v16
1934-
; RV32-NEXT: addi a1, sp, 24
1935-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1936-
; RV32-NEXT: vlse64.v v16, (a1), zero
1906+
; RV32-NEXT: vor.vv v8, v16, v8
1907+
; RV32-NEXT: vsrl.vi v16, v8, 2
1908+
; RV32-NEXT: lui a1, 209715
1909+
; RV32-NEXT: addi a1, a1, 819
1910+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1911+
; RV32-NEXT: vmv.v.x v24, a1
19371912
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1913+
; RV32-NEXT: vand.vv v16, v16, v24
1914+
; RV32-NEXT: vand.vv v8, v8, v24
19381915
; RV32-NEXT: vsll.vi v8, v8, 2
1939-
; RV32-NEXT: vor.vv v8, v24, v8
1940-
; RV32-NEXT: vsrl.vi v24, v8, 1
1941-
; RV32-NEXT: vand.vv v24, v24, v16
1942-
; RV32-NEXT: vand.vv v8, v8, v16
1916+
; RV32-NEXT: vor.vv v8, v16, v8
1917+
; RV32-NEXT: vsrl.vi v16, v8, 1
1918+
; RV32-NEXT: lui a1, 349525
1919+
; RV32-NEXT: addi a1, a1, 1365
1920+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1921+
; RV32-NEXT: vmv.v.x v24, a1
1922+
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1923+
; RV32-NEXT: vand.vv v16, v16, v24
1924+
; RV32-NEXT: vand.vv v8, v8, v24
19431925
; RV32-NEXT: vadd.vv v8, v8, v8
1944-
; RV32-NEXT: vor.vv v8, v24, v8
1926+
; RV32-NEXT: vor.vv v8, v16, v8
19451927
; RV32-NEXT: csrr a0, vlenb
19461928
; RV32-NEXT: slli a0, a0, 3
19471929
; RV32-NEXT: add sp, sp, a0
1948-
; RV32-NEXT: addi sp, sp, 48
1930+
; RV32-NEXT: addi sp, sp, 16
19491931
; RV32-NEXT: ret
19501932
;
19511933
; RV64-LABEL: vp_bitreverse_v15i64_unmasked:

0 commit comments

Comments
 (0)