[RISCV] Lower shuffle which splats a single span (without exact VLEN) #127108

preames · 2025-02-13T18:50:56Z

If we have a shuffle which repeats the same pattern of elements, all of which come from the first register in the source register group, we can lower this to a single vrgather at m1 to perform the element rearrangement, and reuse that for each register in the result vector register group.

llvmbot · 2025-02-13T18:51:32Z

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

If we have a shuffle which repeats the same pattern of elements, all of which come from the first register in the source register group, we can lower this to a single vrgather at m1 to perform the element rearrangement, and reuse that for each register in the result vector register group.

Full diff: https://github.com/llvm/llvm-project/pull/127108.diff

2 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+48)
(modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll (+23-14)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 829eef2e4d9d9..1156fd2e67fed 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5360,6 +5360,23 @@ static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
                 [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
 }
 
+/// Return true for a mask which performs an arbitrary shuffle within the first
+/// span, and then repeats that same result across all remaining spans.  Note
+/// that this doesn't check if all the inputs come from a single span!
+static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
+  SmallVector<int> LowSpan(Span, -1);
+  for (auto [I, M] : enumerate(Mask)) {
+    if (M == -1)
+      continue;
+    int SpanIdx = I % Span;
+    if (LowSpan[SpanIdx] == -1)
+      LowSpan[SpanIdx] = M;
+    if (LowSpan[SpanIdx] != M)
+      return false;
+  }
+  return true;
+}
+
 /// Try to widen element type to get a new mask value for a better permutation
 /// sequence.  This doesn't try to inspect the widened mask for profitability;
 /// we speculate the widened form is equal or better.  This has the effect of
@@ -5775,6 +5792,37 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
         Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
                              SubVec, SubIdx);
       }
+    } else if (NumElts > MinVLMAX && isLowSourceShuffle(Mask, MinVLMAX) &&
+               isSpanSplatShuffle(Mask, MinVLMAX)) {
+      // If we have a shuffle which only uses the first register in our source
+      // register group, and repeats the same index across all spans, we can
+      // use a single vrgather (and possibly some register moves).
+      // TODO: This can be generalized for m2 or m4, or for any shuffle for
+      // which we can do a linear number of shuffles to form an m1 which
+      // contains all the output elements.
+      const MVT M1VT = getLMUL1VT(ContainerVT);
+      EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
+      auto [InnerTrueMask, InnerVL] =
+          getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
+      int N = ContainerVT.getVectorMinNumElements() /
+              M1VT.getVectorMinNumElements();
+      assert(isPowerOf2_32(N) && N <= 8);
+      SDValue SubV1 =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, V1,
+                      DAG.getVectorIdxConstant(0, DL));
+      SDValue SubIndex =
+        DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
+                    DAG.getVectorIdxConstant(0, DL));
+      SDValue SubVec =
+        DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
+                    DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
+      Gather = DAG.getUNDEF(ContainerVT);
+      for (int i = 0; i < N; i++) {
+        SDValue SubIdx =
+            DAG.getVectorIdxConstant(M1VT.getVectorMinNumElements() * i, DL);
+        Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
+                             SubVec, SubIdx);
+      }
     } else if (NumElts > MinVLMAX && isLowSourceShuffle(Mask, MinVLMAX)) {
       // If we have a shuffle which only uses the first register in our
       // source register group, we can do a linear number of m1 vrgathers
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index d7120b4a16938..3e31c9de61657 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -1311,22 +1311,14 @@ define void @shuffle_i128_splat(ptr %p) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    lui a2, 16
-; CHECK-NEXT:    srli a1, a1, 3
+; CHECK-NEXT:    lui a1, 16
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a2
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vx v10, v9, a1
-; CHECK-NEXT:    vslidedown.vx v11, v10, a1
-; CHECK-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v13, v8, v10
-; CHECK-NEXT:    vrgatherei16.vv v12, v8, v9
-; CHECK-NEXT:    vrgatherei16.vv v14, v8, v11
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vx v9, v11, a1
+; CHECK-NEXT:    vmv.v.x v9, a1
 ; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v15, v8, v9
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v9
+; CHECK-NEXT:    vmv.v.v v13, v12
+; CHECK-NEXT:    vmv.v.v v14, v12
+; CHECK-NEXT:    vmv.v.v v15, v12
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; CHECK-NEXT:    vse64.v v12, (a0)
 ; CHECK-NEXT:    ret
@@ -1435,3 +1427,20 @@ define <4 x i16> @vmerge_3(<4 x i16> %x) {
    %s = shufflevector <4 x i16> %x, <4 x i16> <i16 poison, i16 5, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 5, i32 5, i32 3>
    ret <4 x i16> %s
 }
+
+
+define <8 x i64> @shuffle_v8i164_span_splat(<8 x i64> %a) nounwind {
+; CHECK-LABEL: shuffle_v8i164_span_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v9, 1
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v9
+; CHECK-NEXT:    vmv.v.v v13, v12
+; CHECK-NEXT:    vmv.v.v v14, v12
+; CHECK-NEXT:    vmv.v.v v15, v12
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>
+  ret <8 x i64> %res
+}

github-actions · 2025-02-13T18:54:20Z

✅ With the latest revision this PR passed the C/C++ code formatter.

topperc

LGTM

…llvm#127108) If we have a shuffle which repeats the same pattern of elements, all of which come from the first register in the source register group, we can lower this to a single vrgather at m1 to perform the element rearrangement, and reuse that for each register in the result vector register group.

preames requested review from lukel97 and topperc February 13, 2025 18:50

llvmbot added the backend:RISC-V label Feb 13, 2025

clang-format

7ec5b85

topperc approved these changes Feb 14, 2025

View reviewed changes

preames merged commit 25e4333 into llvm:main Feb 15, 2025
8 checks passed

preames deleted the pr-riscv-span-splat-shuffle-vla branch February 15, 2025 01:22

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV] Lower shuffle which splats a single span (without exact VLEN) #127108

[RISCV] Lower shuffle which splats a single span (without exact VLEN) #127108

Uh oh!

preames commented Feb 13, 2025

Uh oh!

llvmbot commented Feb 13, 2025

Uh oh!

github-actions bot commented Feb 13, 2025 •

edited

Loading

Uh oh!

topperc left a comment

Uh oh!

Uh oh!

Uh oh!

[RISCV] Lower shuffle which splats a single span (without exact VLEN) #127108

[RISCV] Lower shuffle which splats a single span (without exact VLEN) #127108

Uh oh!

Conversation

preames commented Feb 13, 2025

Uh oh!

llvmbot commented Feb 13, 2025

Uh oh!

github-actions bot commented Feb 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Feb 13, 2025 •

edited

Loading