Skip to content

Commit ff8f6ab

Browse files
committed
Reapply "[RISCV] Allow undef prefix for local repeating VLA shuffle lowering (#126097)"
(With a fix to recently added code.) Implement the first TODO from #125735, and minorly cleanup code using same style as the recently landed strict prefix case.
1 parent 2dd2608 commit ff8f6ab

File tree

2 files changed

+56
-19
lines changed

2 files changed

+56
-19
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5338,13 +5338,17 @@ static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
53385338
/// Is this mask local (i.e. elements only move within their local span), and
53395339
/// repeating (that is, the same rearrangement is being done within each span)?
53405340
static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5341-
// TODO: Could improve the case where undef elements exist in the first span.
5341+
SmallVector<int> LowSpan(Span, -1);
53425342
for (auto [I, M] : enumerate(Mask)) {
53435343
if (M == -1)
53445344
continue;
5345-
int ChunkLo = I - (I % Span);
5346-
int ChunkHi = ChunkLo + Span;
5347-
if (M < ChunkLo || M >= ChunkHi || M - ChunkLo != Mask[I % Span])
5345+
if ((M / Span) != (int)(I / Span))
5346+
return false;
5347+
int SpanIdx = I % Span;
5348+
int Expected = M % Span;
5349+
if (LowSpan[SpanIdx] == -1)
5350+
LowSpan[SpanIdx] = Expected;
5351+
if (LowSpan[SpanIdx] != Expected)
53485352
return false;
53495353
}
53505354
return true;
@@ -5745,12 +5749,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
57455749
convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
57465750

57475751
SDValue Gather;
5748-
// If we have a locally repeating mask, then we can reuse the first register
5749-
// in the index register group for all registers within the source register
5750-
// group. TODO: This generalizes to m2, and m4.
5751-
const MVT M1VT = getLMUL1VT(ContainerVT);
5752-
auto VLMAX = RISCVTargetLowering::computeVLMAXBounds(M1VT, Subtarget).first;
5753-
if (ContainerVT.bitsGT(M1VT) && isLocalRepeatingShuffle(Mask, VLMAX)) {
5752+
if (NumElts > MinVLMAX && isLocalRepeatingShuffle(Mask, MinVLMAX)) {
5753+
// If we have a locally repeating mask, then we can reuse the first
5754+
// register in the index register group for all registers within the
5755+
// source register group. TODO: This generalizes to m2, and m4.
5756+
const MVT M1VT = getLMUL1VT(ContainerVT);
57545757
EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
57555758
SDValue SubIndex =
57565759
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
@@ -5772,12 +5775,13 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
57725775
Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
57735776
SubVec, SubIdx);
57745777
}
5775-
} else if (ContainerVT.bitsGT(M1VT) && isLowSourceShuffle(Mask, VLMAX)) {
5778+
} else if (NumElts > MinVLMAX && isLowSourceShuffle(Mask, MinVLMAX)) {
57765779
// If we have a shuffle which only uses the first register in our
57775780
// source register group, we can do a linear number of m1 vrgathers
57785781
// reusing the same source register (but with different indices)
57795782
// TODO: This can be generalized for m2 or m4, or for any shuffle
57805783
// for which we can do a vslidedown followed by this expansion.
5784+
const MVT M1VT = getLMUL1VT(ContainerVT);
57815785
EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
57825786
auto [InnerTrueMask, InnerVL] =
57835787
getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -969,11 +969,44 @@ define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) {
969969
ret <8 x i64> %shuffle
970970
}
971971

972-
define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
973-
; CHECK-LABEL: shuffle_v8i64_as_i256:
972+
; Test case where first span has undefs
973+
define <8 x i64> @shuffle_v8i64_as_i128_2(<8 x i64> %v) {
974+
; CHECK-LABEL: shuffle_v8i64_as_i128_2:
974975
; CHECK: # %bb.0:
975976
; CHECK-NEXT: lui a0, %hi(.LCPI30_0)
976977
; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0)
978+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
979+
; CHECK-NEXT: vle16.v v16, (a0)
980+
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
981+
; CHECK-NEXT: vrgatherei16.vv v13, v9, v16
982+
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
983+
; CHECK-NEXT: vrgatherei16.vv v14, v10, v16
984+
; CHECK-NEXT: vrgatherei16.vv v15, v11, v16
985+
; CHECK-NEXT: vmv4r.v v8, v12
986+
; CHECK-NEXT: ret
987+
;
988+
; ZVKB-V-LABEL: shuffle_v8i64_as_i128_2:
989+
; ZVKB-V: # %bb.0:
990+
; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0)
991+
; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0)
992+
; ZVKB-V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
993+
; ZVKB-V-NEXT: vle16.v v16, (a0)
994+
; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma
995+
; ZVKB-V-NEXT: vrgatherei16.vv v13, v9, v16
996+
; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16
997+
; ZVKB-V-NEXT: vrgatherei16.vv v14, v10, v16
998+
; ZVKB-V-NEXT: vrgatherei16.vv v15, v11, v16
999+
; ZVKB-V-NEXT: vmv4r.v v8, v12
1000+
; ZVKB-V-NEXT: ret
1001+
%shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 undef, i32 undef, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1002+
ret <8 x i64> %shuffle
1003+
}
1004+
1005+
define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
1006+
; CHECK-LABEL: shuffle_v8i64_as_i256:
1007+
; CHECK: # %bb.0:
1008+
; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
1009+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
9771010
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9781011
; CHECK-NEXT: vle16.v v16, (a0)
9791012
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
@@ -982,8 +1015,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
9821015
;
9831016
; ZVKB-V-LABEL: shuffle_v8i64_as_i256:
9841017
; ZVKB-V: # %bb.0:
985-
; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0)
986-
; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0)
1018+
; ZVKB-V-NEXT: lui a0, %hi(.LCPI31_0)
1019+
; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI31_0)
9871020
; ZVKB-V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9881021
; ZVKB-V-NEXT: vle16.v v16, (a0)
9891022
; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16
@@ -996,8 +1029,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
9961029
define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0) {
9971030
; CHECK-LABEL: shuffle_v8i64_as_i256_zvl256b:
9981031
; CHECK: # %bb.0:
999-
; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
1000-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
1032+
; CHECK-NEXT: lui a0, %hi(.LCPI32_0)
1033+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0)
10011034
; CHECK-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
10021035
; CHECK-NEXT: vle16.v v12, (a0)
10031036
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
@@ -1008,8 +1041,8 @@ define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0)
10081041
;
10091042
; ZVKB-V-LABEL: shuffle_v8i64_as_i256_zvl256b:
10101043
; ZVKB-V: # %bb.0:
1011-
; ZVKB-V-NEXT: lui a0, %hi(.LCPI31_0)
1012-
; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI31_0)
1044+
; ZVKB-V-NEXT: lui a0, %hi(.LCPI32_0)
1045+
; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI32_0)
10131046
; ZVKB-V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
10141047
; ZVKB-V-NEXT: vle16.v v12, (a0)
10151048
; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma

0 commit comments

Comments
 (0)