Skip to content

Commit d0cd6f3

Browse files
authored
[AArch64] Fix tryToConvertShuffleOfTbl2ToTbl4 with non-buildvector input operands. (#135961)
It looks like this code is only considering buildvector inputs, expecting the inputs to have at least 16 operands. This adds a check to make sure that is true. Fixes #135950
1 parent ae47f25 commit d0cd6f3

File tree

2 files changed

+43
-11
lines changed

2 files changed

+43
-11
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13872,25 +13872,27 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
1387213872
DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, dl, MVT::i64);
1387313873

1387413874
EVT VT = Op.getValueType();
13875-
if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13876-
Tbl1->getOperand(0) != Tbl2ID ||
13877-
Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13878-
Tbl2->getOperand(0) != Tbl2ID)
13875+
if (Tbl1.getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13876+
Tbl1.getOperand(0) != Tbl2ID ||
13877+
Tbl2.getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13878+
Tbl2.getOperand(0) != Tbl2ID)
1387913879
return SDValue();
1388013880

13881-
if (Tbl1->getValueType(0) != MVT::v16i8 ||
13882-
Tbl2->getValueType(0) != MVT::v16i8)
13881+
if (Tbl1.getValueType() != MVT::v16i8 || Tbl2.getValueType() != MVT::v16i8)
13882+
return SDValue();
13883+
13884+
SDValue Mask1 = Tbl1.getOperand(3);
13885+
SDValue Mask2 = Tbl2.getOperand(3);
13886+
if (Mask1.getOpcode() != ISD::BUILD_VECTOR ||
13887+
Mask2.getOpcode() != ISD::BUILD_VECTOR)
1388313888
return SDValue();
1388413889

13885-
SDValue Mask1 = Tbl1->getOperand(3);
13886-
SDValue Mask2 = Tbl2->getOperand(3);
1388713890
SmallVector<SDValue, 16> TBLMaskParts(16, SDValue());
1388813891
for (unsigned I = 0; I < 16; I++) {
1388913892
if (ShuffleMask[I] < 16)
13890-
TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
13893+
TBLMaskParts[I] = Mask1.getOperand(ShuffleMask[I]);
1389113894
else {
13892-
auto *C =
13893-
dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
13895+
auto *C = dyn_cast<ConstantSDNode>(Mask2.getOperand(ShuffleMask[I] - 16));
1389413896
if (!C)
1389513897
return SDValue();
1389613898
TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);

llvm/test/CodeGen/AArch64/arm64-tbl.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,6 +1254,36 @@ define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
12541254
ret <16 x i8> %tmp3
12551255
}
12561256

1257+
define <16 x i8> @pr135950(<16 x i8> %A, <16 x i8> %B, <16 x i8> %M) {
1258+
; CHECK-SD-LABEL: pr135950:
1259+
; CHECK-SD: // %bb.0:
1260+
; CHECK-SD-NEXT: mov.16b v3, v1
1261+
; CHECK-SD-NEXT: movi.2d v1, #0000000000000000
1262+
; CHECK-SD-NEXT: mov.16b v4, v0
1263+
; CHECK-SD-NEXT: mov.16b v5, v3
1264+
; CHECK-SD-NEXT: tbl.16b v1, { v3, v4 }, v1
1265+
; CHECK-SD-NEXT: tbl.16b v0, { v4, v5 }, v2
1266+
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
1267+
; CHECK-SD-NEXT: ret
1268+
;
1269+
; CHECK-GI-LABEL: pr135950:
1270+
; CHECK-GI: // %bb.0:
1271+
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
1272+
; CHECK-GI-NEXT: mov.16b v3, v2
1273+
; CHECK-GI-NEXT: movi.2d v4, #0000000000000000
1274+
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
1275+
; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v3
1276+
; CHECK-GI-NEXT: mov.16b v2, v0
1277+
; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v4
1278+
; CHECK-GI-NEXT: zip1.16b v0, v3, v0
1279+
; CHECK-GI-NEXT: ret
1280+
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %M)
1281+
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %B, <16 x i8> %A, <16 x i8> zeroinitializer)
1282+
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1283+
ret <16 x i8> %s
1284+
}
1285+
1286+
12571287
declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
12581288
declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
12591289
declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone

0 commit comments

Comments
 (0)