Skip to content

Commit c1e95b2

Browse files
authored
[RISCV] Fix matching bug in VLA shuffle lowering (llvm#134750)
Fix llvm#134126. The matching code was previous written as if we were mutating the indices to replace undef elements with preferred values, but the actual lowering code just took a prefix of the index vector. This resulted in us using undef indices for lanes which should have been defined, resulting in incorrect codegen. Longer term, we probably should rewrite the mask, but this seemed like an easier tactical fix.
1 parent 8b11c39 commit c1e95b2

File tree

2 files changed

+26
-19
lines changed

2 files changed

+26
-19
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5399,17 +5399,16 @@ static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
53995399
/// Is this mask local (i.e. elements only move within their local span), and
54005400
/// repeating (that is, the same rearrangement is being done within each span)?
54015401
static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5402-
SmallVector<int> LowSpan(Span, -1);
5402+
// Require a prefix from the original mask until the consumer code
5403+
// is adjusted to rewrite the mask instead of just taking a prefix.
54035404
for (auto [I, M] : enumerate(Mask)) {
54045405
if (M == -1)
54055406
continue;
54065407
if ((M / Span) != (int)(I / Span))
54075408
return false;
54085409
int SpanIdx = I % Span;
54095410
int Expected = M % Span;
5410-
if (LowSpan[SpanIdx] == -1)
5411-
LowSpan[SpanIdx] = Expected;
5412-
if (LowSpan[SpanIdx] != Expected)
5411+
if (Mask[SpanIdx] != Expected)
54135412
return false;
54145413
}
54155414
return true;
@@ -5424,14 +5423,13 @@ static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
54245423
/// span, and then repeats that same result across all remaining spans. Note
54255424
/// that this doesn't check if all the inputs come from a single span!
54265425
static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5427-
SmallVector<int> LowSpan(Span, -1);
5426+
// Require a prefix from the original mask until the consumer code
5427+
// is adjusted to rewrite the mask instead of just taking a prefix.
54285428
for (auto [I, M] : enumerate(Mask)) {
54295429
if (M == -1)
54305430
continue;
54315431
int SpanIdx = I % Span;
5432-
if (LowSpan[SpanIdx] == -1)
5433-
LowSpan[SpanIdx] = M;
5434-
if (LowSpan[SpanIdx] != M)
5432+
if (Mask[SpanIdx] != M)
54355433
return false;
54365434
}
54375435
return true;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,36 +1372,45 @@ define <8 x i64> @shuffle_v8i164_span_splat(<8 x i64> %a) nounwind {
13721372
ret <8 x i64> %res
13731373
}
13741374

1375-
; FIXME: Doing this as a span spat requires rewriting the undef elements in
1376-
; the mask not just using a prefix of the mask.
1375+
; Doing this as a span splat requires rewriting the undef elements in the mask
1376+
; not just using a prefix of the mask.
13771377
define <8 x i64> @shuffle_v8i64_span_splat_neg(<8 x i64> %a) nounwind {
13781378
; CHECK-LABEL: shuffle_v8i64_span_splat_neg:
13791379
; CHECK: # %bb.0:
1380+
; CHECK-NEXT: csrr a0, vlenb
13801381
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13811382
; CHECK-NEXT: vmv.v.i v9, 1
1382-
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1383+
; CHECK-NEXT: srli a0, a0, 3
1384+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1385+
; CHECK-NEXT: vslidedown.vx v10, v9, a0
1386+
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1387+
; CHECK-NEXT: vrgatherei16.vv v13, v8, v10
1388+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1389+
; CHECK-NEXT: vslidedown.vx v10, v10, a0
1390+
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
13831391
; CHECK-NEXT: vrgatherei16.vv v12, v8, v9
1384-
; CHECK-NEXT: vmv.v.v v13, v12
1385-
; CHECK-NEXT: vmv.v.v v14, v12
1386-
; CHECK-NEXT: vmv.v.v v15, v12
1392+
; CHECK-NEXT: vrgatherei16.vv v14, v8, v10
1393+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1394+
; CHECK-NEXT: vslidedown.vx v9, v10, a0
1395+
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1396+
; CHECK-NEXT: vrgatherei16.vv v15, v8, v9
13871397
; CHECK-NEXT: vmv4r.v v8, v12
13881398
; CHECK-NEXT: ret
13891399
%res = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>
13901400
ret <8 x i64> %res
13911401
}
13921402

1393-
; FIXME: A locally repeating shuffle needs to use a mask prefix
1403+
; Doing this as a locally repeating shuffle requires rewriting the undef
1404+
; elements in the mask not just using a prefix of the mask.
13941405
define <8 x i32> @shuffle_v8i32_locally_repeating_neg(<8 x i32> %a) {
13951406
; CHECK-LABEL: shuffle_v8i32_locally_repeating_neg:
13961407
; CHECK: # %bb.0:
13971408
; CHECK-NEXT: lui a0, %hi(.LCPI87_0)
13981409
; CHECK-NEXT: addi a0, a0, %lo(.LCPI87_0)
1399-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1410+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
14001411
; CHECK-NEXT: vle16.v v12, (a0)
1401-
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1402-
; CHECK-NEXT: vrgatherei16.vv v11, v9, v12
14031412
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
1404-
; CHECK-NEXT: vmv2r.v v8, v10
1413+
; CHECK-NEXT: vmv.v.v v8, v10
14051414
; CHECK-NEXT: ret
14061415
%res = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 poison, i32 poison, i32 5, i32 4, i32 6, i32 6>
14071416
ret <8 x i32> %res

0 commit comments

Comments
 (0)