@@ -20937,6 +20937,53 @@ static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG,
20937
20937
Store->getMemOperand());
20938
20938
}
20939
20939
20940
+ // Combine store (trunc X to <3 x i8>) to sequence of ST1.b.
20941
+ static SDValue combineI8TruncStore(StoreSDNode *ST, SelectionDAG &DAG,
20942
+ const AArch64Subtarget *Subtarget) {
20943
+ SDValue Value = ST->getValue();
20944
+ EVT ValueVT = Value.getValueType();
20945
+
20946
+ if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
20947
+ Value.getOpcode() != ISD::TRUNCATE ||
20948
+ ValueVT != EVT::getVectorVT(*DAG.getContext(), MVT::i8, 3))
20949
+ return SDValue();
20950
+
20951
+ assert(ST->getOffset().isUndef() && "undef offset expected");
20952
+ SDLoc DL(ST);
20953
+ auto WideVT = EVT::getVectorVT(
20954
+ *DAG.getContext(),
20955
+ Value->getOperand(0).getValueType().getVectorElementType(), 4);
20956
+ SDValue UndefVector = DAG.getUNDEF(WideVT);
20957
+ SDValue WideTrunc = DAG.getNode(
20958
+ ISD::INSERT_SUBVECTOR, DL, WideVT,
20959
+ {UndefVector, Value->getOperand(0), DAG.getVectorIdxConstant(0, DL)});
20960
+ SDValue Cast = DAG.getNode(
20961
+ ISD::BITCAST, DL, WideVT.getSizeInBits() == 64 ? MVT::v8i8 : MVT::v16i8,
20962
+ WideTrunc);
20963
+
20964
+ MachineFunction &MF = DAG.getMachineFunction();
20965
+ SDValue Chain = ST->getChain();
20966
+ MachineMemOperand *MMO = ST->getMemOperand();
20967
+ unsigned IdxScale = WideVT.getScalarSizeInBits() / 8;
20968
+ SDValue E2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, Cast,
20969
+ DAG.getConstant(2 * IdxScale, DL, MVT::i64));
20970
+ TypeSize Offset2 = TypeSize::getFixed(2);
20971
+ SDValue Ptr2 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset2, DL);
20972
+ Chain = DAG.getStore(Chain, DL, E2, Ptr2, MF.getMachineMemOperand(MMO, 2, 1));
20973
+
20974
+ SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, Cast,
20975
+ DAG.getConstant(1 * IdxScale, DL, MVT::i64));
20976
+ TypeSize Offset1 = TypeSize::getFixed(1);
20977
+ SDValue Ptr1 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset1, DL);
20978
+ Chain = DAG.getStore(Chain, DL, E1, Ptr1, MF.getMachineMemOperand(MMO, 1, 1));
20979
+
20980
+ SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, Cast,
20981
+ DAG.getConstant(0, DL, MVT::i64));
20982
+ Chain = DAG.getStore(Chain, DL, E0, ST->getBasePtr(),
20983
+ MF.getMachineMemOperand(MMO, 0, 1));
20984
+ return Chain;
20985
+ }
20986
+
20940
20987
static SDValue performSTORECombine(SDNode *N,
20941
20988
TargetLowering::DAGCombinerInfo &DCI,
20942
20989
SelectionDAG &DAG,
@@ -20952,6 +20999,9 @@ static SDValue performSTORECombine(SDNode *N,
20952
20999
return EltVT == MVT::f32 || EltVT == MVT::f64;
20953
21000
};
20954
21001
21002
+ if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
21003
+ return Res;
21004
+
20955
21005
// If this is an FP_ROUND followed by a store, fold this into a truncating
20956
21006
// store. We can do this even if this is already a truncstore.
20957
21007
// We purposefully don't care about legality of the nodes here as we know
0 commit comments