Skip to content

Commit e01bdc1

Browse files
[LLVM][SelectionDAG] Simplify SplitVecOp_VSETCC. (llvm#139295)
Preserving the original result element type when splitting vector setcc operations removes redundant extensions that are awkward to optimise after the fact.
1 parent 9570bf9 commit e01bdc1

File tree

4 files changed

+54
-147
lines changed

4 files changed

+54
-147
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+3-10
Original file line numberDiff line numberDiff line change
@@ -4341,11 +4341,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
43414341
GetSplitVector(N->getOperand(isStrict ? 1 : 0), Lo0, Hi0);
43424342
GetSplitVector(N->getOperand(isStrict ? 2 : 1), Lo1, Hi1);
43434343

4344-
auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
4345-
4346-
LLVMContext &Context = *DAG.getContext();
4347-
EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
4348-
EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
4344+
EVT VT = N->getValueType(0);
4345+
EVT PartResVT = Lo0.getValueType().changeElementType(VT.getScalarType());
43494346

43504347
if (Opc == ISD::SETCC) {
43514348
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
@@ -4369,12 +4366,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
43694366
HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1,
43704367
N->getOperand(2), MaskHi, EVLHi);
43714368
}
4372-
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
43734369

4374-
EVT OpVT = N->getOperand(0).getValueType();
4375-
ISD::NodeType ExtendCode =
4376-
TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
4377-
return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);
4370+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoRes, HiRes);
43784371
}
43794372

43804373

llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll

-28
Original file line numberDiff line numberDiff line change
@@ -890,8 +890,6 @@ define <8 x i1> @test_fcmp_une(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
890890
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
891891
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
892892
; CHECK-NEXT: mvn v0.16b, v0.16b
893-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
894-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
895893
; CHECK-NEXT: xtn v0.8b, v0.8h
896894
; CHECK-NEXT: ret
897895
%1 = fcmp une <8 x bfloat> %a, %b
@@ -913,8 +911,6 @@ define <8 x i1> @test_fcmp_ueq(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
913911
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
914912
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
915913
; CHECK-NEXT: mvn v0.16b, v0.16b
916-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
917-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
918914
; CHECK-NEXT: xtn v0.8b, v0.8h
919915
; CHECK-NEXT: ret
920916
%1 = fcmp ueq <8 x bfloat> %a, %b
@@ -932,8 +928,6 @@ define <8 x i1> @test_fcmp_ugt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
932928
; CHECK-NEXT: fcmge v0.4s, v1.4s, v0.4s
933929
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
934930
; CHECK-NEXT: mvn v0.16b, v0.16b
935-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
936-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
937931
; CHECK-NEXT: xtn v0.8b, v0.8h
938932
; CHECK-NEXT: ret
939933
%1 = fcmp ugt <8 x bfloat> %a, %b
@@ -951,8 +945,6 @@ define <8 x i1> @test_fcmp_uge(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
951945
; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
952946
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
953947
; CHECK-NEXT: mvn v0.16b, v0.16b
954-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
955-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
956948
; CHECK-NEXT: xtn v0.8b, v0.8h
957949
; CHECK-NEXT: ret
958950
%1 = fcmp uge <8 x bfloat> %a, %b
@@ -970,8 +962,6 @@ define <8 x i1> @test_fcmp_ult(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
970962
; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s
971963
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
972964
; CHECK-NEXT: mvn v0.16b, v0.16b
973-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
974-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
975965
; CHECK-NEXT: xtn v0.8b, v0.8h
976966
; CHECK-NEXT: ret
977967
%1 = fcmp ult <8 x bfloat> %a, %b
@@ -989,8 +979,6 @@ define <8 x i1> @test_fcmp_ule(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
989979
; CHECK-NEXT: fcmgt v0.4s, v0.4s, v1.4s
990980
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
991981
; CHECK-NEXT: mvn v0.16b, v0.16b
992-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
993-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
994982
; CHECK-NEXT: xtn v0.8b, v0.8h
995983
; CHECK-NEXT: ret
996984
%1 = fcmp ule <8 x bfloat> %a, %b
@@ -1012,8 +1000,6 @@ define <8 x i1> @test_fcmp_uno(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10121000
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
10131001
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
10141002
; CHECK-NEXT: mvn v0.16b, v0.16b
1015-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1016-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10171003
; CHECK-NEXT: xtn v0.8b, v0.8h
10181004
; CHECK-NEXT: ret
10191005
%1 = fcmp uno <8 x bfloat> %a, %b
@@ -1034,8 +1020,6 @@ define <8 x i1> @test_fcmp_one(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10341020
; CHECK-NEXT: orr v1.16b, v2.16b, v4.16b
10351021
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
10361022
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
1037-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1038-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10391023
; CHECK-NEXT: xtn v0.8b, v0.8h
10401024
; CHECK-NEXT: ret
10411025
%1 = fcmp one <8 x bfloat> %a, %b
@@ -1052,8 +1036,6 @@ define <8 x i1> @test_fcmp_oeq(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10521036
; CHECK-NEXT: fcmeq v2.4s, v3.4s, v2.4s
10531037
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
10541038
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1055-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1056-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10571039
; CHECK-NEXT: xtn v0.8b, v0.8h
10581040
; CHECK-NEXT: ret
10591041
%1 = fcmp oeq <8 x bfloat> %a, %b
@@ -1070,8 +1052,6 @@ define <8 x i1> @test_fcmp_ogt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10701052
; CHECK-NEXT: fcmgt v2.4s, v3.4s, v2.4s
10711053
; CHECK-NEXT: fcmgt v0.4s, v0.4s, v1.4s
10721054
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1073-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1074-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10751055
; CHECK-NEXT: xtn v0.8b, v0.8h
10761056
; CHECK-NEXT: ret
10771057
%1 = fcmp ogt <8 x bfloat> %a, %b
@@ -1088,8 +1068,6 @@ define <8 x i1> @test_fcmp_oge(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10881068
; CHECK-NEXT: fcmge v2.4s, v3.4s, v2.4s
10891069
; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s
10901070
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1091-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1092-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10931071
; CHECK-NEXT: xtn v0.8b, v0.8h
10941072
; CHECK-NEXT: ret
10951073
%1 = fcmp oge <8 x bfloat> %a, %b
@@ -1106,8 +1084,6 @@ define <8 x i1> @test_fcmp_olt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
11061084
; CHECK-NEXT: fcmgt v2.4s, v3.4s, v2.4s
11071085
; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
11081086
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1109-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1110-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
11111087
; CHECK-NEXT: xtn v0.8b, v0.8h
11121088
; CHECK-NEXT: ret
11131089
%1 = fcmp olt <8 x bfloat> %a, %b
@@ -1124,8 +1100,6 @@ define <8 x i1> @test_fcmp_ole(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
11241100
; CHECK-NEXT: fcmge v2.4s, v3.4s, v2.4s
11251101
; CHECK-NEXT: fcmge v0.4s, v1.4s, v0.4s
11261102
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1127-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1128-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
11291103
; CHECK-NEXT: xtn v0.8b, v0.8h
11301104
; CHECK-NEXT: ret
11311105
%1 = fcmp ole <8 x bfloat> %a, %b
@@ -1146,8 +1120,6 @@ define <8 x i1> @test_fcmp_ord(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
11461120
; CHECK-NEXT: orr v1.16b, v2.16b, v4.16b
11471121
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
11481122
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
1149-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1150-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
11511123
; CHECK-NEXT: xtn v0.8b, v0.8h
11521124
; CHECK-NEXT: ret
11531125
%1 = fcmp ord <8 x bfloat> %a, %b

llvm/test/CodeGen/AArch64/fcmp.ll

+51-81
Original file line numberDiff line numberDiff line change
@@ -1145,8 +1145,6 @@ define <7 x half> @v7f16_half(<7 x half> %a, <7 x half> %b, <7 x half> %d, <7 x
11451145
; CHECK-SD-NOFP16-NEXT: fcmgt v4.4s, v5.4s, v4.4s
11461146
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
11471147
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h
1148-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1149-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
11501148
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
11511149
; CHECK-SD-NOFP16-NEXT: ret
11521150
;
@@ -1275,8 +1273,6 @@ define <8 x half> @v8f16_half(<8 x half> %a, <8 x half> %b, <8 x half> %d, <8 x
12751273
; CHECK-SD-NOFP16-NEXT: fcmgt v4.4s, v5.4s, v4.4s
12761274
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
12771275
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h
1278-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1279-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
12801276
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
12811277
; CHECK-SD-NOFP16-NEXT: ret
12821278
;
@@ -1328,10 +1324,6 @@ define <16 x half> @v16f16_half(<16 x half> %a, <16 x half> %b, <16 x half> %d,
13281324
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v2.4s, v0.4s
13291325
; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v16.8h
13301326
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v3.8h
1331-
; CHECK-SD-NOFP16-NEXT: shl v1.8h, v1.8h, #15
1332-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1333-
; CHECK-SD-NOFP16-NEXT: cmlt v1.8h, v1.8h, #0
1334-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
13351327
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v7.16b
13361328
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v4.16b, v6.16b
13371329
; CHECK-SD-NOFP16-NEXT: ret
@@ -1384,45 +1376,41 @@ entry:
13841376
define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32> %e) {
13851377
; CHECK-SD-NOFP16-LABEL: v7f16_i32:
13861378
; CHECK-SD-NOFP16: // %bb.0: // %entry
1387-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v0.8h
1388-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v1.8h
1379+
; CHECK-SD-NOFP16-NEXT: fmov s2, w0
1380+
; CHECK-SD-NOFP16-NEXT: fmov s4, w7
13891381
; CHECK-SD-NOFP16-NEXT: mov x8, sp
1390-
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1391-
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1392-
; CHECK-SD-NOFP16-NEXT: ldr s4, [sp, #24]
1393-
; CHECK-SD-NOFP16-NEXT: add x9, sp, #32
1394-
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[1], [x9]
1395-
; CHECK-SD-NOFP16-NEXT: add x9, sp, #16
1396-
; CHECK-SD-NOFP16-NEXT: fcmgt v2.4s, v3.4s, v2.4s
1397-
; CHECK-SD-NOFP16-NEXT: fmov s3, w4
1382+
; CHECK-SD-NOFP16-NEXT: fmov s5, w4
1383+
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v0.4h
1384+
; CHECK-SD-NOFP16-NEXT: ldr s3, [sp, #24]
1385+
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
1386+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1387+
; CHECK-SD-NOFP16-NEXT: add x9, sp, #8
1388+
; CHECK-SD-NOFP16-NEXT: mov v2.s[1], w1
1389+
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[1], [x8]
1390+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1391+
; CHECK-SD-NOFP16-NEXT: mov v5.s[1], w5
1392+
; CHECK-SD-NOFP16-NEXT: add x8, sp, #32
1393+
; CHECK-SD-NOFP16-NEXT: ld1 { v3.s }[1], [x8]
1394+
; CHECK-SD-NOFP16-NEXT: add x8, sp, #16
1395+
; CHECK-SD-NOFP16-NEXT: fcmgt v6.4s, v7.4s, v6.4s
1396+
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[2], [x9]
1397+
; CHECK-SD-NOFP16-NEXT: add x9, sp, #40
1398+
; CHECK-SD-NOFP16-NEXT: mov v2.s[2], w2
13981399
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
1399-
; CHECK-SD-NOFP16-NEXT: fmov s1, w0
1400-
; CHECK-SD-NOFP16-NEXT: mov v3.s[1], w5
1401-
; CHECK-SD-NOFP16-NEXT: mov v1.s[1], w1
1402-
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1403-
; CHECK-SD-NOFP16-NEXT: fmov s2, w7
1404-
; CHECK-SD-NOFP16-NEXT: mov v3.s[2], w6
1405-
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[1], [x8]
1406-
; CHECK-SD-NOFP16-NEXT: mov v1.s[2], w2
1407-
; CHECK-SD-NOFP16-NEXT: add x8, sp, #8
1408-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1409-
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[2], [x8]
1410-
; CHECK-SD-NOFP16-NEXT: add x8, sp, #40
1411-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
1412-
; CHECK-SD-NOFP16-NEXT: mov v1.s[3], w3
1413-
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[2], [x8]
1414-
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[3], [x9]
1415-
; CHECK-SD-NOFP16-NEXT: sshll v5.4s, v0.4h, #0
1416-
; CHECK-SD-NOFP16-NEXT: sshll2 v0.4s, v0.8h, #0
1417-
; CHECK-SD-NOFP16-NEXT: bif v1.16b, v2.16b, v5.16b
1418-
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v3.16b, v4.16b
1400+
; CHECK-SD-NOFP16-NEXT: mov v5.s[2], w6
1401+
; CHECK-SD-NOFP16-NEXT: ld1 { v3.s }[2], [x9]
1402+
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[3], [x8]
1403+
; CHECK-SD-NOFP16-NEXT: mov v1.16b, v6.16b
1404+
; CHECK-SD-NOFP16-NEXT: mov v2.s[3], w3
1405+
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v5.16b, v3.16b
1406+
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v2.16b, v4.16b
1407+
; CHECK-SD-NOFP16-NEXT: mov w5, v0.s[1]
1408+
; CHECK-SD-NOFP16-NEXT: mov w6, v0.s[2]
1409+
; CHECK-SD-NOFP16-NEXT: fmov w4, s0
14191410
; CHECK-SD-NOFP16-NEXT: mov w1, v1.s[1]
14201411
; CHECK-SD-NOFP16-NEXT: mov w2, v1.s[2]
14211412
; CHECK-SD-NOFP16-NEXT: mov w3, v1.s[3]
1422-
; CHECK-SD-NOFP16-NEXT: mov w5, v0.s[1]
1423-
; CHECK-SD-NOFP16-NEXT: mov w6, v0.s[2]
14241413
; CHECK-SD-NOFP16-NEXT: fmov w0, s1
1425-
; CHECK-SD-NOFP16-NEXT: fmov w4, s0
14261414
; CHECK-SD-NOFP16-NEXT: ret
14271415
;
14281416
; CHECK-SD-FP16-LABEL: v7f16_i32:
@@ -1630,17 +1618,12 @@ entry:
16301618
define <8 x i32> @v8f16_i32(<8 x half> %a, <8 x half> %b, <8 x i32> %d, <8 x i32> %e) {
16311619
; CHECK-SD-NOFP16-LABEL: v8f16_i32:
16321620
; CHECK-SD-NOFP16: // %bb.0: // %entry
1633-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v6.4s, v0.8h
1634-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v7.4s, v1.8h
1635-
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1636-
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1621+
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v0.4h
1622+
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
1623+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1624+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
16371625
; CHECK-SD-NOFP16-NEXT: fcmgt v6.4s, v7.4s, v6.4s
16381626
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
1639-
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v6.8h
1640-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1641-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
1642-
; CHECK-SD-NOFP16-NEXT: sshll v6.4s, v0.4h, #0
1643-
; CHECK-SD-NOFP16-NEXT: sshll2 v0.4s, v0.8h, #0
16441627
; CHECK-SD-NOFP16-NEXT: mov v1.16b, v0.16b
16451628
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b
16461629
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v3.16b, v5.16b
@@ -1694,37 +1677,24 @@ entry:
16941677
define <16 x i32> @v16f16_i32(<16 x half> %a, <16 x half> %b, <16 x i32> %d, <16 x i32> %e) {
16951678
; CHECK-SD-NOFP16-LABEL: v16f16_i32:
16961679
; CHECK-SD-NOFP16: // %bb.0: // %entry
1697-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v17.4s, v0.8h
1698-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v18.4s, v2.8h
1699-
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1700-
; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v2.4h
1701-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v16.4s, v1.8h
1702-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v19.4s, v3.8h
1703-
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1704-
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v3.4h
1705-
; CHECK-SD-NOFP16-NEXT: fcmgt v17.4s, v18.4s, v17.4s
1706-
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v2.4s, v0.4s
1707-
; CHECK-SD-NOFP16-NEXT: fcmgt v2.4s, v19.4s, v16.4s
1708-
; CHECK-SD-NOFP16-NEXT: fcmgt v1.4s, v3.4s, v1.4s
1709-
; CHECK-SD-NOFP16-NEXT: ldp q18, q19, [sp, #32]
1710-
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v17.8h
1711-
; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v2.8h
1712-
; CHECK-SD-NOFP16-NEXT: ldp q2, q20, [sp]
1713-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1714-
; CHECK-SD-NOFP16-NEXT: shl v1.8h, v1.8h, #15
1715-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
1716-
; CHECK-SD-NOFP16-NEXT: cmlt v1.8h, v1.8h, #0
1717-
; CHECK-SD-NOFP16-NEXT: sshll v3.4s, v0.4h, #0
1718-
; CHECK-SD-NOFP16-NEXT: sshll v16.4s, v1.4h, #0
1719-
; CHECK-SD-NOFP16-NEXT: sshll2 v17.4s, v1.8h, #0
1720-
; CHECK-SD-NOFP16-NEXT: sshll2 v1.4s, v0.8h, #0
1721-
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
1722-
; CHECK-SD-NOFP16-NEXT: mov v3.16b, v17.16b
1723-
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v20.16b
1724-
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v4.16b, v2.16b
1725-
; CHECK-SD-NOFP16-NEXT: mov v2.16b, v16.16b
1726-
; CHECK-SD-NOFP16-NEXT: bsl v3.16b, v7.16b, v19.16b
1727-
; CHECK-SD-NOFP16-NEXT: bsl v2.16b, v6.16b, v18.16b
1680+
; CHECK-SD-NOFP16-NEXT: fcvtl v16.4s, v1.4h
1681+
; CHECK-SD-NOFP16-NEXT: fcvtl v17.4s, v3.4h
1682+
; CHECK-SD-NOFP16-NEXT: fcvtl v18.4s, v0.4h
1683+
; CHECK-SD-NOFP16-NEXT: fcvtl v19.4s, v2.4h
1684+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1685+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
1686+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1687+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1688+
; CHECK-SD-NOFP16-NEXT: fcmgt v16.4s, v17.4s, v16.4s
1689+
; CHECK-SD-NOFP16-NEXT: fcmgt v18.4s, v19.4s, v18.4s
1690+
; CHECK-SD-NOFP16-NEXT: fcmgt v3.4s, v3.4s, v1.4s
1691+
; CHECK-SD-NOFP16-NEXT: fcmgt v1.4s, v2.4s, v0.4s
1692+
; CHECK-SD-NOFP16-NEXT: ldp q0, q19, [sp]
1693+
; CHECK-SD-NOFP16-NEXT: ldp q2, q17, [sp, #32]
1694+
; CHECK-SD-NOFP16-NEXT: bit v0.16b, v4.16b, v18.16b
1695+
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v19.16b
1696+
; CHECK-SD-NOFP16-NEXT: bsl v3.16b, v7.16b, v17.16b
1697+
; CHECK-SD-NOFP16-NEXT: bit v2.16b, v6.16b, v16.16b
17281698
; CHECK-SD-NOFP16-NEXT: ret
17291699
;
17301700
; CHECK-SD-FP16-LABEL: v16f16_i32:

0 commit comments

Comments
 (0)