Skip to content

Commit b0f6f4f

Browse files
authored
Merge pull request #8052 from fhahn/vec3-trunc-store
[AArch64] Combine store (trunc X to <3 x i8>) to sequence of ST1.b
2 parents 4301aa1 + 25f139e commit b0f6f4f

File tree

2 files changed

+851
-0
lines changed

2 files changed

+851
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20937,6 +20937,53 @@ static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG,
2093720937
Store->getMemOperand());
2093820938
}
2093920939

20940+
// Combine store (trunc X to <3 x i8>) to sequence of ST1.b.
20941+
static SDValue combineI8TruncStore(StoreSDNode *ST, SelectionDAG &DAG,
20942+
const AArch64Subtarget *Subtarget) {
20943+
SDValue Value = ST->getValue();
20944+
EVT ValueVT = Value.getValueType();
20945+
20946+
if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
20947+
Value.getOpcode() != ISD::TRUNCATE ||
20948+
ValueVT != EVT::getVectorVT(*DAG.getContext(), MVT::i8, 3))
20949+
return SDValue();
20950+
20951+
assert(ST->getOffset().isUndef() && "undef offset expected");
20952+
SDLoc DL(ST);
20953+
auto WideVT = EVT::getVectorVT(
20954+
*DAG.getContext(),
20955+
Value->getOperand(0).getValueType().getVectorElementType(), 4);
20956+
SDValue UndefVector = DAG.getUNDEF(WideVT);
20957+
SDValue WideTrunc = DAG.getNode(
20958+
ISD::INSERT_SUBVECTOR, DL, WideVT,
20959+
{UndefVector, Value->getOperand(0), DAG.getVectorIdxConstant(0, DL)});
20960+
SDValue Cast = DAG.getNode(
20961+
ISD::BITCAST, DL, WideVT.getSizeInBits() == 64 ? MVT::v8i8 : MVT::v16i8,
20962+
WideTrunc);
20963+
20964+
MachineFunction &MF = DAG.getMachineFunction();
20965+
SDValue Chain = ST->getChain();
20966+
MachineMemOperand *MMO = ST->getMemOperand();
20967+
unsigned IdxScale = WideVT.getScalarSizeInBits() / 8;
20968+
SDValue E2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, Cast,
20969+
DAG.getConstant(2 * IdxScale, DL, MVT::i64));
20970+
TypeSize Offset2 = TypeSize::getFixed(2);
20971+
SDValue Ptr2 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset2, DL);
20972+
Chain = DAG.getStore(Chain, DL, E2, Ptr2, MF.getMachineMemOperand(MMO, 2, 1));
20973+
20974+
SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, Cast,
20975+
DAG.getConstant(1 * IdxScale, DL, MVT::i64));
20976+
TypeSize Offset1 = TypeSize::getFixed(1);
20977+
SDValue Ptr1 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset1, DL);
20978+
Chain = DAG.getStore(Chain, DL, E1, Ptr1, MF.getMachineMemOperand(MMO, 1, 1));
20979+
20980+
SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, Cast,
20981+
DAG.getConstant(0, DL, MVT::i64));
20982+
Chain = DAG.getStore(Chain, DL, E0, ST->getBasePtr(),
20983+
MF.getMachineMemOperand(MMO, 0, 1));
20984+
return Chain;
20985+
}
20986+
2094020987
static SDValue performSTORECombine(SDNode *N,
2094120988
TargetLowering::DAGCombinerInfo &DCI,
2094220989
SelectionDAG &DAG,
@@ -20952,6 +20999,9 @@ static SDValue performSTORECombine(SDNode *N,
2095220999
return EltVT == MVT::f32 || EltVT == MVT::f64;
2095321000
};
2095421001

21002+
if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
21003+
return Res;
21004+
2095521005
// If this is an FP_ROUND followed by a store, fold this into a truncating
2095621006
// store. We can do this even if this is already a truncstore.
2095721007
// We purposefully don't care about legality of the nodes here as we know

0 commit comments

Comments
 (0)