Skip to content

Commit d34437e

Browse files
authored
[RISCV] Recognize a zipeven/zipodd requiring larger SEW (#134923)
This is a follow up to f8ee58a, and improves code generation for the XRivosVizip extension. If we have a slide pair which could be a zipeven or zipodd if the shuffle was widened, widen the shuffle and then mask the zipeven or zipodd. This is basically working around an order of matching issue; we match the slide pair variants before trying widening. I considered whether we should just widen slide pairs without any consideration of the zip idioms, but the resulting codegen changes look mostly like churn, and have no clear evidence of profitability.
1 parent d9cfd90 commit d34437e

File tree

2 files changed

+46
-27
lines changed

2 files changed

+46
-27
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4654,7 +4654,8 @@ static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
46544654
}
46554655

46564656
static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4657-
ArrayRef<int> Mask, bool RequiredPolarity) {
4657+
ArrayRef<int> Mask, unsigned Factor,
4658+
bool RequiredPolarity) {
46584659
int NumElts = Mask.size();
46594660
for (int i = 0; i != NumElts; ++i) {
46604661
int M = Mask[i];
@@ -4665,7 +4666,7 @@ static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
46654666
bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
46664667
assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
46674668
"Must match exactly one of the two slides");
4668-
if (RequiredPolarity != (C == i % 2))
4669+
if (RequiredPolarity != (C == (i / Factor) % 2))
46694670
return false;
46704671
}
46714672
return true;
@@ -4677,9 +4678,11 @@ static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
46774678
/// vs1: b0 b1 b2 b3
46784679
/// vd: a0 b0 a2 b2
46794680
static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4680-
ArrayRef<int> Mask) {
4681-
return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 &&
4682-
isAlternating(SrcInfo, Mask, true);
4681+
ArrayRef<int> Mask, unsigned &Factor) {
4682+
Factor = SrcInfo[1].second;
4683+
return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4684+
Mask.size() % Factor == 0 &&
4685+
isAlternating(SrcInfo, Mask, Factor, true);
46834686
}
46844687

46854688
/// Given a shuffle which can be represented as a pair of two slides,
@@ -4690,9 +4693,11 @@ static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
46904693
/// Note that the operand order is swapped due to the way we canonicalize
46914694
/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
46924695
static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4693-
ArrayRef<int> Mask) {
4694-
return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 &&
4695-
isAlternating(SrcInfo, Mask, false);
4696+
ArrayRef<int> Mask, unsigned &Factor) {
4697+
Factor = -SrcInfo[1].second;
4698+
return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4699+
Mask.size() % Factor == 0 &&
4700+
isAlternating(SrcInfo, Mask, Factor, false);
46964701
}
46974702

46984703
// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
@@ -5779,16 +5784,33 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
57795784
return convertFromScalableVector(VT, Res, DAG, Subtarget);
57805785
}
57815786

5782-
if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) {
5783-
SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
5784-
SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
5785-
return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
5786-
Subtarget);
5787-
}
5788-
if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) {
5789-
SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
5790-
SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
5791-
return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget);
5787+
if (Subtarget.hasVendorXRivosVizip()) {
5788+
bool TryWiden = false;
5789+
unsigned Factor;
5790+
if (isZipEven(SrcInfo, Mask, Factor)) {
5791+
if (Factor == 1) {
5792+
SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
5793+
SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
5794+
return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
5795+
Subtarget);
5796+
}
5797+
TryWiden = true;
5798+
}
5799+
if (isZipOdd(SrcInfo, Mask, Factor)) {
5800+
if (Factor == 1) {
5801+
SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
5802+
SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
5803+
return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
5804+
Subtarget);
5805+
}
5806+
TryWiden = true;
5807+
}
5808+
// If we found a widening oppurtunity which would let us form a
5809+
// zipeven or zipodd, use the generic code to widen the shuffle
5810+
// and recurse through this logic.
5811+
if (TryWiden)
5812+
if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5813+
return V;
57925814
}
57935815

57945816
// Build the mask. Note that vslideup unconditionally preserves elements

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -365,10 +365,9 @@ define <8 x i32> @zipeven_v8i32_as_v4i64(<8 x i32> %v1, <8 x i32> %v2) {
365365
;
366366
; ZIP-LABEL: zipeven_v8i32_as_v4i64:
367367
; ZIP: # %bb.0:
368-
; ZIP-NEXT: li a0, 204
369-
; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, mu
370-
; ZIP-NEXT: vmv.s.x v0, a0
371-
; ZIP-NEXT: vslideup.vi v8, v10, 2, v0.t
368+
; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
369+
; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10
370+
; ZIP-NEXT: vmv.v.v v8, v12
372371
; ZIP-NEXT: ret
373372
%out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
374373
ret <8 x i32> %out
@@ -386,11 +385,9 @@ define <8 x i32> @zipodd_v8i32_as_v4i64(<8 x i32> %v1, <8 x i32> %v2) {
386385
;
387386
; ZIP-LABEL: zipodd_v8i32_as_v4i64:
388387
; ZIP: # %bb.0:
389-
; ZIP-NEXT: li a0, 51
390-
; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, mu
391-
; ZIP-NEXT: vmv.s.x v0, a0
392-
; ZIP-NEXT: vslidedown.vi v10, v8, 2, v0.t
393-
; ZIP-NEXT: vmv.v.v v8, v10
388+
; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
389+
; ZIP-NEXT: ri.vzipodd.vv v12, v8, v10
390+
; ZIP-NEXT: vmv.v.v v8, v12
394391
; ZIP-NEXT: ret
395392
%out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
396393
ret <8 x i32> %out

0 commit comments

Comments
 (0)