Skip to content

Commit 7bd9888

Browse files
committed
[AArch64] Fix tryToConvertShuffleOfTbl2ToTbl4 with non-buildvectror input outoperands.
It looks like this code is only considering buildvector inputs, expecting the inputs to have at least 16 operands. This adds a check to make sure that is true. Fixes #135950
1 parent d3153ad commit 7bd9888

File tree

2 files changed

+43
-11
lines changed

2 files changed

+43
-11
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13871,25 +13871,27 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
1387113871
DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, dl, MVT::i64);
1387213872

1387313873
EVT VT = Op.getValueType();
13874-
if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13875-
Tbl1->getOperand(0) != Tbl2ID ||
13876-
Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13877-
Tbl2->getOperand(0) != Tbl2ID)
13874+
if (Tbl1.getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13875+
Tbl1.getOperand(0) != Tbl2ID ||
13876+
Tbl2.getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13877+
Tbl2.getOperand(0) != Tbl2ID)
1387813878
return SDValue();
1387913879

13880-
if (Tbl1->getValueType(0) != MVT::v16i8 ||
13881-
Tbl2->getValueType(0) != MVT::v16i8)
13880+
if (Tbl1.getValueType() != MVT::v16i8 || Tbl2.getValueType() != MVT::v16i8)
13881+
return SDValue();
13882+
13883+
SDValue Mask1 = Tbl1.getOperand(3);
13884+
SDValue Mask2 = Tbl2.getOperand(3);
13885+
if (Mask1.getOpcode() != ISD::BUILD_VECTOR ||
13886+
Mask2.getOpcode() != ISD::BUILD_VECTOR)
1388213887
return SDValue();
1388313888

13884-
SDValue Mask1 = Tbl1->getOperand(3);
13885-
SDValue Mask2 = Tbl2->getOperand(3);
1388613889
SmallVector<SDValue, 16> TBLMaskParts(16, SDValue());
1388713890
for (unsigned I = 0; I < 16; I++) {
1388813891
if (ShuffleMask[I] < 16)
13889-
TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
13892+
TBLMaskParts[I] = Mask1.getOperand(ShuffleMask[I]);
1389013893
else {
13891-
auto *C =
13892-
dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
13894+
auto *C = dyn_cast<ConstantSDNode>(Mask2.getOperand(ShuffleMask[I] - 16));
1389313895
if (!C)
1389413896
return SDValue();
1389513897
TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);

llvm/test/CodeGen/AArch64/arm64-tbl.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,6 +1254,36 @@ define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
12541254
ret <16 x i8> %tmp3
12551255
}
12561256

1257+
define <16 x i8> @pr135950(<16 x i8> %A, <16 x i8> %B, <16 x i8> %M) {
1258+
; CHECK-SD-LABEL: pr135950:
1259+
; CHECK-SD: // %bb.0:
1260+
; CHECK-SD-NEXT: mov.16b v3, v1
1261+
; CHECK-SD-NEXT: movi.2d v1, #0000000000000000
1262+
; CHECK-SD-NEXT: mov.16b v4, v0
1263+
; CHECK-SD-NEXT: mov.16b v5, v3
1264+
; CHECK-SD-NEXT: tbl.16b v1, { v3, v4 }, v1
1265+
; CHECK-SD-NEXT: tbl.16b v0, { v4, v5 }, v2
1266+
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
1267+
; CHECK-SD-NEXT: ret
1268+
;
1269+
; CHECK-GI-LABEL: pr135950:
1270+
; CHECK-GI: // %bb.0:
1271+
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
1272+
; CHECK-GI-NEXT: mov.16b v3, v2
1273+
; CHECK-GI-NEXT: movi.2d v4, #0000000000000000
1274+
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
1275+
; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v3
1276+
; CHECK-GI-NEXT: mov.16b v2, v0
1277+
; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v4
1278+
; CHECK-GI-NEXT: zip1.16b v0, v3, v0
1279+
; CHECK-GI-NEXT: ret
1280+
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %M)
1281+
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %B, <16 x i8> %A, <16 x i8> zeroinitializer)
1282+
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1283+
ret <16 x i8> %s
1284+
}
1285+
1286+
12571287
declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
12581288
declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
12591289
declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone

0 commit comments

Comments
 (0)