Skip to content

Commit cdd3b10

Browse files
committed
[RISCV] Allow non-power-of-2 vectors for VLS code generation
SLP supports non-power-of-2 vectors [1], so we should consider supporting this for RISC-V vector code generation. It is natural to support non-power-of-2 VLS vectors for the vector extension, as VL does not impose any constraints on this. In theory, we could support any length, but we want to prevent the number of MVTs from growing too quickly. Therefore, we only add v3, v5, v7 and v15. [1] #77790
1 parent 7a3ce57 commit cdd3b10

30 files changed

+710
-1251
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2259,8 +2259,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
22592259
[[maybe_unused]] bool ExactlyVecRegSized =
22602260
Subtarget->expandVScale(SubVecVT.getSizeInBits())
22612261
.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2262-
assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2263-
.getKnownMinValue()));
22642262
assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
22652263
}
22662264
MVT ContainerVT = VT;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2584,9 +2584,14 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
25842584
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
25852585
return false;
25862586

2587-
// TODO: Perhaps an artificial restriction, but worth having whilst getting
2588-
// the base fixed length RVV support in place.
2589-
if (!VT.isPow2VectorType())
2587+
// Only support non-power-of-2 fixed length vector types with lengths 3, 5, 7,
2588+
// or 15.
2589+
// In theory, we could support any length, but we want to prevent the
2590+
// number of MVTs from growing too quickly. Therefore, we only add these
2591+
// specific types.
2592+
unsigned NumElems = VT.getVectorNumElements();
2593+
if (!VT.isPow2VectorType() && NumElems != 3 && NumElems != 5 &&
2594+
NumElems != 7 && NumElems != 15)
25902595
return false;
25912596

25922597
return true;
@@ -2623,10 +2628,14 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
26232628
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
26242629
// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
26252630
// each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2631+
unsigned NumVLSElts = VT.getVectorNumElements();
2632+
if (!isPowerOf2_32(NumVLSElts))
2633+
NumVLSElts = llvm::NextPowerOf2 (NumVLSElts);
2634+
26262635
unsigned NumElts =
2627-
(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2636+
(NumVLSElts * RISCV::RVVBitsPerBlock) / MinVLen;
26282637
NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2629-
assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2638+
26302639
return MVT::getScalableVectorVT(EltVT, NumElts);
26312640
}
26322641
}
@@ -3573,6 +3582,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
35733582
// XLenVT if we're producing a v8i1. This results in more consistent
35743583
// codegen across RV32 and RV64.
35753584
unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3585+
if (!isPowerOf2_32(NumViaIntegerBits))
3586+
NumViaIntegerBits = llvm::NextPowerOf2 (NumViaIntegerBits);
35763587
NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
35773588
// If we have to use more than one INSERT_VECTOR_ELT then this
35783589
// optimization is likely to increase code size; avoid peforming it in
@@ -3616,10 +3627,16 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
36163627
// If we're producing a smaller vector than our minimum legal integer
36173628
// type, bitcast to the equivalent (known-legal) mask type, and extract
36183629
// our final mask.
3619-
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3620-
Vec = DAG.getBitcast(MVT::v8i1, Vec);
3621-
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3622-
DAG.getConstant(0, DL, XLenVT));
3630+
if (IntegerViaVecVT == MVT::v1i8){
3631+
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3632+
Vec = DAG.getBitcast(MVT::v8i1, Vec);
3633+
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3634+
DAG.getConstant(0, DL, XLenVT));
3635+
} else if (IntegerViaVecVT == MVT::v1i16) {
3636+
Vec = DAG.getBitcast(MVT::v16i1, Vec);
3637+
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3638+
DAG.getConstant(0, DL, XLenVT));
3639+
}
36233640
} else {
36243641
// Else we must have produced an integer type with the same size as the
36253642
// mask type; bitcast for the final result.
@@ -4873,6 +4890,10 @@ static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,
48734890

48744891
EVT VT = SVN->getValueType(0);
48754892
unsigned NumElts = VT.getVectorNumElements();
4893+
// We don't handle non-power-of-2 here.
4894+
if (!isPowerOf2_32(NumElts))
4895+
return false;
4896+
48764897
unsigned EltSizeInBits = VT.getScalarSizeInBits();
48774898
unsigned NumSubElts;
48784899
if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll

Lines changed: 89 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,28 +1649,16 @@ declare <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64>, <15 x i1>, i32)
16491649
define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
16501650
; RV32-LABEL: vp_bitreverse_v15i64:
16511651
; RV32: # %bb.0:
1652-
; RV32-NEXT: addi sp, sp, -48
1653-
; RV32-NEXT: .cfi_def_cfa_offset 48
1652+
; RV32-NEXT: addi sp, sp, -16
1653+
; RV32-NEXT: .cfi_def_cfa_offset 16
16541654
; RV32-NEXT: csrr a1, vlenb
16551655
; RV32-NEXT: li a2, 24
16561656
; RV32-NEXT: mul a1, a1, a2
16571657
; RV32-NEXT: sub sp, sp, a1
1658-
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
1659-
; RV32-NEXT: sw zero, 20(sp)
1658+
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
1659+
; RV32-NEXT: sw zero, 12(sp)
16601660
; RV32-NEXT: lui a1, 1044480
1661-
; RV32-NEXT: sw a1, 16(sp)
1662-
; RV32-NEXT: lui a1, 61681
1663-
; RV32-NEXT: addi a1, a1, -241
1664-
; RV32-NEXT: sw a1, 44(sp)
1665-
; RV32-NEXT: sw a1, 40(sp)
1666-
; RV32-NEXT: lui a1, 209715
1667-
; RV32-NEXT: addi a1, a1, 819
1668-
; RV32-NEXT: sw a1, 36(sp)
1669-
; RV32-NEXT: sw a1, 32(sp)
1670-
; RV32-NEXT: lui a1, 349525
1671-
; RV32-NEXT: addi a1, a1, 1365
1672-
; RV32-NEXT: sw a1, 28(sp)
1673-
; RV32-NEXT: sw a1, 24(sp)
1661+
; RV32-NEXT: sw a1, 8(sp)
16741662
; RV32-NEXT: li a1, 56
16751663
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
16761664
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -1683,21 +1671,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
16831671
; RV32-NEXT: csrr a4, vlenb
16841672
; RV32-NEXT: slli a4, a4, 4
16851673
; RV32-NEXT: add a4, sp, a4
1686-
; RV32-NEXT: addi a4, a4, 48
1674+
; RV32-NEXT: addi a4, a4, 16
16871675
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1688-
; RV32-NEXT: addi a4, sp, 16
1689-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1676+
; RV32-NEXT: addi a4, sp, 8
1677+
; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma
16901678
; RV32-NEXT: vlse64.v v16, (a4), zero
16911679
; RV32-NEXT: csrr a4, vlenb
16921680
; RV32-NEXT: slli a4, a4, 3
16931681
; RV32-NEXT: add a4, sp, a4
1694-
; RV32-NEXT: addi a4, a4, 48
1682+
; RV32-NEXT: addi a4, a4, 16
16951683
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
16961684
; RV32-NEXT: lui a4, 4080
16971685
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
16981686
; RV32-NEXT: vand.vx v24, v8, a4, v0.t
16991687
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
1700-
; RV32-NEXT: addi a5, sp, 48
1688+
; RV32-NEXT: addi a5, sp, 16
17011689
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
17021690
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
17031691
; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
@@ -1706,62 +1694,65 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
17061694
; RV32-NEXT: csrr a5, vlenb
17071695
; RV32-NEXT: slli a5, a5, 4
17081696
; RV32-NEXT: add a5, sp, a5
1709-
; RV32-NEXT: addi a5, a5, 48
1697+
; RV32-NEXT: addi a5, a5, 16
17101698
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
17111699
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
17121700
; RV32-NEXT: csrr a5, vlenb
17131701
; RV32-NEXT: slli a5, a5, 4
17141702
; RV32-NEXT: add a5, sp, a5
1715-
; RV32-NEXT: addi a5, a5, 48
1703+
; RV32-NEXT: addi a5, a5, 16
17161704
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
17171705
; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
17181706
; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
17191707
; RV32-NEXT: vand.vx v24, v24, a2, v0.t
17201708
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
1721-
; RV32-NEXT: addi a1, sp, 48
1709+
; RV32-NEXT: addi a1, sp, 16
17221710
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
17231711
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
17241712
; RV32-NEXT: vand.vx v24, v24, a4, v0.t
17251713
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
17261714
; RV32-NEXT: csrr a1, vlenb
17271715
; RV32-NEXT: slli a1, a1, 3
17281716
; RV32-NEXT: add a1, sp, a1
1729-
; RV32-NEXT: addi a1, a1, 48
1717+
; RV32-NEXT: addi a1, a1, 16
17301718
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
17311719
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
17321720
; RV32-NEXT: vor.vv v8, v8, v24, v0.t
1733-
; RV32-NEXT: addi a1, sp, 40
1734-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1735-
; RV32-NEXT: vlse64.v v24, (a1), zero
1736-
; RV32-NEXT: addi a1, sp, 48
1721+
; RV32-NEXT: addi a1, sp, 16
17371722
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1738-
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
17391723
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
17401724
; RV32-NEXT: csrr a1, vlenb
17411725
; RV32-NEXT: slli a1, a1, 4
17421726
; RV32-NEXT: add a1, sp, a1
1743-
; RV32-NEXT: addi a1, a1, 48
1727+
; RV32-NEXT: addi a1, a1, 16
17441728
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1745-
; RV32-NEXT: vor.vv v16, v16, v8, v0.t
1746-
; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t
1747-
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1748-
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1749-
; RV32-NEXT: addi a1, sp, 32
1750-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1751-
; RV32-NEXT: vlse64.v v24, (a1), zero
1729+
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1730+
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
1731+
; RV32-NEXT: lui a1, 61681
1732+
; RV32-NEXT: addi a1, a1, -241
1733+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1734+
; RV32-NEXT: vmv.v.x v24, a1
17521735
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1753-
; RV32-NEXT: vsll.vi v16, v16, 4, v0.t
1754-
; RV32-NEXT: vor.vv v16, v8, v16, v0.t
1755-
; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t
1756-
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
17571736
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1758-
; RV32-NEXT: addi a1, sp, 24
1759-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1760-
; RV32-NEXT: vlse64.v v24, (a1), zero
1737+
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1738+
; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
1739+
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
1740+
; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
1741+
; RV32-NEXT: lui a1, 209715
1742+
; RV32-NEXT: addi a1, a1, 819
1743+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1744+
; RV32-NEXT: vmv.v.x v24, a1
17611745
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1762-
; RV32-NEXT: vsll.vi v16, v16, 2, v0.t
1763-
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
1746+
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
1747+
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
1748+
; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
1749+
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
17641750
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
1751+
; RV32-NEXT: lui a1, 349525
1752+
; RV32-NEXT: addi a1, a1, 1365
1753+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1754+
; RV32-NEXT: vmv.v.x v24, a1
1755+
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
17651756
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
17661757
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
17671758
; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
@@ -1770,7 +1761,7 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
17701761
; RV32-NEXT: li a1, 24
17711762
; RV32-NEXT: mul a0, a0, a1
17721763
; RV32-NEXT: add sp, sp, a0
1773-
; RV32-NEXT: addi sp, sp, 48
1764+
; RV32-NEXT: addi sp, sp, 16
17741765
; RV32-NEXT: ret
17751766
;
17761767
; RV64-LABEL: vp_bitreverse_v15i64:
@@ -1856,27 +1847,15 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
18561847
define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
18571848
; RV32-LABEL: vp_bitreverse_v15i64_unmasked:
18581849
; RV32: # %bb.0:
1859-
; RV32-NEXT: addi sp, sp, -48
1860-
; RV32-NEXT: .cfi_def_cfa_offset 48
1850+
; RV32-NEXT: addi sp, sp, -16
1851+
; RV32-NEXT: .cfi_def_cfa_offset 16
18611852
; RV32-NEXT: csrr a1, vlenb
18621853
; RV32-NEXT: slli a1, a1, 3
18631854
; RV32-NEXT: sub sp, sp, a1
1864-
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
1865-
; RV32-NEXT: sw zero, 20(sp)
1855+
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1856+
; RV32-NEXT: sw zero, 12(sp)
18661857
; RV32-NEXT: lui a1, 1044480
1867-
; RV32-NEXT: sw a1, 16(sp)
1868-
; RV32-NEXT: lui a1, 61681
1869-
; RV32-NEXT: addi a1, a1, -241
1870-
; RV32-NEXT: sw a1, 44(sp)
1871-
; RV32-NEXT: sw a1, 40(sp)
1872-
; RV32-NEXT: lui a1, 209715
1873-
; RV32-NEXT: addi a1, a1, 819
1874-
; RV32-NEXT: sw a1, 36(sp)
1875-
; RV32-NEXT: sw a1, 32(sp)
1876-
; RV32-NEXT: lui a1, 349525
1877-
; RV32-NEXT: addi a1, a1, 1365
1878-
; RV32-NEXT: sw a1, 28(sp)
1879-
; RV32-NEXT: sw a1, 24(sp)
1858+
; RV32-NEXT: sw a1, 8(sp)
18801859
; RV32-NEXT: li a1, 56
18811860
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
18821861
; RV32-NEXT: vsll.vx v16, v8, a1
@@ -1886,66 +1865,69 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev
18861865
; RV32-NEXT: li a3, 40
18871866
; RV32-NEXT: vsll.vx v24, v24, a3
18881867
; RV32-NEXT: vor.vv v16, v16, v24
1889-
; RV32-NEXT: addi a4, sp, 48
1890-
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
18911868
; RV32-NEXT: addi a4, sp, 16
1892-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1893-
; RV32-NEXT: vlse64.v v24, (a4), zero
1869+
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
1870+
; RV32-NEXT: addi a4, sp, 8
1871+
; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma
1872+
; RV32-NEXT: vlse64.v v16, (a4), zero
18941873
; RV32-NEXT: lui a4, 4080
18951874
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
18961875
; RV32-NEXT: vand.vx v0, v8, a4
18971876
; RV32-NEXT: vsll.vi v0, v0, 24
1898-
; RV32-NEXT: vand.vv v16, v8, v24
1899-
; RV32-NEXT: vsll.vi v16, v16, 8
1900-
; RV32-NEXT: vor.vv v16, v0, v16
1901-
; RV32-NEXT: addi a5, sp, 48
1877+
; RV32-NEXT: vand.vv v24, v8, v16
1878+
; RV32-NEXT: vsll.vi v24, v24, 8
1879+
; RV32-NEXT: vor.vv v24, v0, v24
1880+
; RV32-NEXT: addi a5, sp, 16
19021881
; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload
1903-
; RV32-NEXT: vor.vv v16, v0, v16
1904-
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
1882+
; RV32-NEXT: vor.vv v24, v0, v24
1883+
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
19051884
; RV32-NEXT: vsrl.vx v0, v8, a3
19061885
; RV32-NEXT: vand.vx v0, v0, a2
1907-
; RV32-NEXT: vsrl.vx v16, v8, a1
1908-
; RV32-NEXT: vor.vv v0, v0, v16
1909-
; RV32-NEXT: vsrl.vi v16, v8, 8
1910-
; RV32-NEXT: vand.vv v16, v16, v24
1886+
; RV32-NEXT: vsrl.vx v24, v8, a1
1887+
; RV32-NEXT: vor.vv v24, v0, v24
1888+
; RV32-NEXT: vsrl.vi v0, v8, 8
1889+
; RV32-NEXT: vand.vv v16, v0, v16
19111890
; RV32-NEXT: vsrl.vi v8, v8, 24
19121891
; RV32-NEXT: vand.vx v8, v8, a4
19131892
; RV32-NEXT: vor.vv v8, v16, v8
1914-
; RV32-NEXT: addi a1, sp, 40
1915-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1916-
; RV32-NEXT: vlse64.v v16, (a1), zero
1917-
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1918-
; RV32-NEXT: vor.vv v8, v8, v0
1919-
; RV32-NEXT: addi a1, sp, 48
1920-
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1921-
; RV32-NEXT: vor.vv v8, v24, v8
1922-
; RV32-NEXT: vsrl.vi v24, v8, 4
1923-
; RV32-NEXT: vand.vv v24, v24, v16
1924-
; RV32-NEXT: vand.vv v8, v8, v16
1925-
; RV32-NEXT: addi a1, sp, 32
1926-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1927-
; RV32-NEXT: vlse64.v v16, (a1), zero
1893+
; RV32-NEXT: vor.vv v8, v8, v24
1894+
; RV32-NEXT: addi a1, sp, 16
1895+
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1896+
; RV32-NEXT: vor.vv v8, v16, v8
1897+
; RV32-NEXT: vsrl.vi v16, v8, 4
1898+
; RV32-NEXT: lui a1, 61681
1899+
; RV32-NEXT: addi a1, a1, -241
1900+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1901+
; RV32-NEXT: vmv.v.x v24, a1
19281902
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1903+
; RV32-NEXT: vand.vv v16, v16, v24
1904+
; RV32-NEXT: vand.vv v8, v8, v24
19291905
; RV32-NEXT: vsll.vi v8, v8, 4
1930-
; RV32-NEXT: vor.vv v8, v24, v8
1931-
; RV32-NEXT: vsrl.vi v24, v8, 2
1932-
; RV32-NEXT: vand.vv v24, v24, v16
1933-
; RV32-NEXT: vand.vv v8, v8, v16
1934-
; RV32-NEXT: addi a1, sp, 24
1935-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1936-
; RV32-NEXT: vlse64.v v16, (a1), zero
1906+
; RV32-NEXT: vor.vv v8, v16, v8
1907+
; RV32-NEXT: vsrl.vi v16, v8, 2
1908+
; RV32-NEXT: lui a1, 209715
1909+
; RV32-NEXT: addi a1, a1, 819
1910+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1911+
; RV32-NEXT: vmv.v.x v24, a1
19371912
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1913+
; RV32-NEXT: vand.vv v16, v16, v24
1914+
; RV32-NEXT: vand.vv v8, v8, v24
19381915
; RV32-NEXT: vsll.vi v8, v8, 2
1939-
; RV32-NEXT: vor.vv v8, v24, v8
1940-
; RV32-NEXT: vsrl.vi v24, v8, 1
1941-
; RV32-NEXT: vand.vv v24, v24, v16
1942-
; RV32-NEXT: vand.vv v8, v8, v16
1916+
; RV32-NEXT: vor.vv v8, v16, v8
1917+
; RV32-NEXT: vsrl.vi v16, v8, 1
1918+
; RV32-NEXT: lui a1, 349525
1919+
; RV32-NEXT: addi a1, a1, 1365
1920+
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
1921+
; RV32-NEXT: vmv.v.x v24, a1
1922+
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1923+
; RV32-NEXT: vand.vv v16, v16, v24
1924+
; RV32-NEXT: vand.vv v8, v8, v24
19431925
; RV32-NEXT: vadd.vv v8, v8, v8
1944-
; RV32-NEXT: vor.vv v8, v24, v8
1926+
; RV32-NEXT: vor.vv v8, v16, v8
19451927
; RV32-NEXT: csrr a0, vlenb
19461928
; RV32-NEXT: slli a0, a0, 3
19471929
; RV32-NEXT: add sp, sp, a0
1948-
; RV32-NEXT: addi sp, sp, 48
1930+
; RV32-NEXT: addi sp, sp, 16
19491931
; RV32-NEXT: ret
19501932
;
19511933
; RV64-LABEL: vp_bitreverse_v15i64_unmasked:

0 commit comments

Comments
 (0)