@@ -14908,10 +14908,11 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
14908
14908
// NOP cast operands to the largest legal vector of the same element count.
14909
14909
if (VT.isFloatingPoint()) {
14910
14910
Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
14911
- Vec1 = getSVESafeBitCast(WideVT , Vec1, DAG);
14911
+ Vec1 = getSVESafeBitCast(NarrowVT , Vec1, DAG);
14912
14912
} else {
14913
14913
// Legal integer vectors are already their largest so Vec0 is fine as is.
14914
14914
Vec1 = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
14915
+ Vec1 = DAG.getNode(AArch64ISD::NVCAST, DL, NarrowVT, Vec1);
14915
14916
}
14916
14917
14917
14918
// To replace the top/bottom half of vector V with vector SubV we widen the
@@ -14920,11 +14921,13 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
14920
14921
SDValue Narrow;
14921
14922
if (Idx == 0) {
14922
14923
SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
14924
+ HiVec0 = DAG.getNode(AArch64ISD::NVCAST, DL, NarrowVT, HiVec0);
14923
14925
Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, Vec1, HiVec0);
14924
14926
} else {
14925
14927
assert(Idx == InVT.getVectorMinNumElements() &&
14926
14928
"Invalid subvector index!");
14927
14929
SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
14930
+ LoVec0 = DAG.getNode(AArch64ISD::NVCAST, DL, NarrowVT, LoVec0);
14928
14931
Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, LoVec0, Vec1);
14929
14932
}
14930
14933
@@ -15024,7 +15027,9 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
15024
15027
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
15025
15028
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
15026
15029
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
15027
- return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
15030
+ SDValue ResultLoCast = DAG.getNode(AArch64ISD::NVCAST, dl, VT, ResultLo);
15031
+ SDValue ResultHiCast = DAG.getNode(AArch64ISD::NVCAST, dl, VT, ResultHi);
15032
+ return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLoCast, ResultHiCast);
15028
15033
}
15029
15034
15030
15035
bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
@@ -22739,7 +22744,19 @@ static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG,
22739
22744
SDValue Rshrnb = DAG.getNode(
22740
22745
AArch64ISD::RSHRNB_I, DL, ResVT,
22741
22746
{RShOperand, DAG.getTargetConstant(ShiftValue, DL, MVT::i32)});
22742
- return DAG.getNode(ISD::BITCAST, DL, VT, Rshrnb);
22747
+ return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Rshrnb);
22748
+ }
22749
+
22750
+ static SDValue isNVCastToHalfWidthElements(SDValue V) {
22751
+ if (V.getOpcode() != AArch64ISD::NVCAST)
22752
+ return SDValue();
22753
+
22754
+ SDValue Op = V.getOperand(0);
22755
+ if (V.getValueType().getVectorElementCount() !=
22756
+ Op.getValueType().getVectorElementCount() * 2)
22757
+ return SDValue();
22758
+
22759
+ return Op;
22743
22760
}
22744
22761
22745
22762
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
@@ -22802,25 +22819,37 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
22802
22819
if (SDValue Urshr = tryCombineExtendRShTrunc(N, DAG))
22803
22820
return Urshr;
22804
22821
22805
- if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(Op0, DAG, Subtarget))
22806
- return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Rshrnb, Op1);
22822
+ if (SDValue PreCast = isNVCastToHalfWidthElements(Op0)) {
22823
+ if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(PreCast, DAG, Subtarget)) {
22824
+ Rshrnb = DAG.getNode(AArch64ISD::NVCAST, DL, ResVT, Rshrnb);
22825
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Rshrnb, Op1);
22826
+ }
22827
+ }
22807
22828
22808
- if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(Op1, DAG, Subtarget))
22809
- return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Rshrnb);
22829
+ if (SDValue PreCast = isNVCastToHalfWidthElements(Op1)) {
22830
+ if (SDValue Rshrnb = trySimplifySrlAddToRshrnb(PreCast, DAG, Subtarget)) {
22831
+ Rshrnb = DAG.getNode(AArch64ISD::NVCAST, DL, ResVT, Rshrnb);
22832
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Rshrnb);
22833
+ }
22834
+ }
22810
22835
22811
- // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
22812
- if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
22813
- if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
22814
- SDValue X = Op0.getOperand(0).getOperand(0);
22815
- return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
22836
+ // uzp1<ty>(nvcast(unpklo(uzp1<ty>(x, y))), z) => uzp1<ty>(x, z)
22837
+ if (SDValue PreCast = isNVCastToHalfWidthElements(Op0)) {
22838
+ if (PreCast.getOpcode() == AArch64ISD::UUNPKLO) {
22839
+ if (PreCast.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
22840
+ SDValue X = PreCast.getOperand(0).getOperand(0);
22841
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
22842
+ }
22816
22843
}
22817
22844
}
22818
22845
22819
- // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
22820
- if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
22821
- if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
22822
- SDValue Z = Op1.getOperand(0).getOperand(1);
22823
- return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
22846
+ // uzp1<ty>(x, nvcast(unpkhi(uzp1<ty>(y, z)))) => uzp1<ty>(x, z)
22847
+ if (SDValue PreCast = isNVCastToHalfWidthElements(Op1)) {
22848
+ if (PreCast.getOpcode() == AArch64ISD::UUNPKHI) {
22849
+ if (PreCast.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
22850
+ SDValue Z = PreCast.getOperand(0).getOperand(1);
22851
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
22852
+ }
22824
22853
}
22825
22854
}
22826
22855
@@ -29415,9 +29444,6 @@ void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const {
29415
29444
VT.isInteger() && "Expected integer vectors!");
29416
29445
assert(OpVT.getSizeInBits() == VT.getSizeInBits() &&
29417
29446
"Expected vectors of equal size!");
29418
- // TODO: Enable assert once bogus creations have been fixed.
29419
- if (VT.isScalableVector())
29420
- break;
29421
29447
assert(OpVT.getVectorElementCount() == VT.getVectorElementCount() * 2 &&
29422
29448
"Expected result vector with half the lanes of its input!");
29423
29449
break;
@@ -29435,12 +29461,27 @@ void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const {
29435
29461
EVT Op1VT = N->getOperand(1).getValueType();
29436
29462
assert(VT.isVector() && Op0VT.isVector() && Op1VT.isVector() &&
29437
29463
"Expected vectors!");
29438
- // TODO: Enable assert once bogus creations have been fixed.
29439
- if (VT.isScalableVector())
29440
- break;
29441
29464
assert(VT == Op0VT && VT == Op1VT && "Expected matching vectors!");
29442
29465
break;
29443
29466
}
29467
+ case AArch64ISD::RSHRNB_I: {
29468
+ assert(N->getNumValues() == 1 && "Expected one result!");
29469
+ assert(N->getNumOperands() == 2 && "Expected two operands!");
29470
+ EVT VT = N->getValueType(0);
29471
+ EVT Op0VT = N->getOperand(0).getValueType();
29472
+ EVT Op1VT = N->getOperand(1).getValueType();
29473
+ assert(VT.isVector() && VT.isInteger() &&
29474
+ "Expected integer vector result type!");
29475
+ assert(Op0VT.isVector() && Op0VT.isInteger() &&
29476
+ "Expected first operand to be an integer vector!");
29477
+ assert(VT.getSizeInBits() == Op0VT.getSizeInBits() &&
29478
+ "Expected vectors of equal size!");
29479
+ assert(VT.getVectorElementCount() == Op0VT.getVectorElementCount() * 2 &&
29480
+ "Expected input vector with half the lanes of its result!");
29481
+ assert(Op1VT == MVT::i32 && isa<ConstantSDNode>(N->getOperand(1)) &&
29482
+ "Expected second operand to be a constant i32!");
29483
+ break;
29484
+ }
29444
29485
}
29445
29486
}
29446
29487
#endif
0 commit comments