@@ -59284,36 +59284,34 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
59284
59284
!(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) {
59285
59285
SDValue ExtSrc = SubVec.getOperand(0);
59286
59286
int ExtIdxVal = SubVec.getConstantOperandVal(1);
59287
- if (ExtIdxVal != 0) {
59288
- SmallVector<int, 64> Mask(VecNumElts);
59289
- // First create an identity shuffle mask.
59290
- for (int i = 0; i != VecNumElts; ++i)
59291
- Mask[i] = i;
59292
- // Now insert the extracted portion.
59293
- for (int i = 0; i != SubVecNumElts; ++i)
59294
- Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
59287
+ // Create a shuffle mask matching the extraction and insertion.
59288
+ SmallVector<int, 64> Mask(VecNumElts);
59289
+ std::iota(Mask.begin(), Mask.end(), 0);
59290
+ std::iota(Mask.begin() + IdxVal, Mask.begin() + IdxVal + SubVecNumElts,
59291
+ ExtIdxVal + VecNumElts);
59292
+ if (ExtIdxVal != 0)
59295
59293
return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask);
59296
- }
59297
- // If we're broadcasting, see if we can use a blend instead of
59298
- // extract/insert pair. Ensure that the subvector is aligned with the
59299
- // insertion/extractions.
59300
- if ((ExtIdxVal % SubVecNumElts) == 0 && ( IdxVal % SubVecNumElts) == 0 &&
59301
- ( ExtSrc.getOpcode() == X86ISD::VBROADCAST ||
59302
- ExtSrc.getOpcode() == X86ISD::VBROADCAST_LOAD ||
59303
- (ExtSrc.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
59304
- cast<MemIntrinsicSDNode>(ExtSrc)->getMemoryVT() == SubVecVT))) {
59294
+ // See if we can use a blend instead of extract/insert pair.
59295
+ SmallVector<int, 64> BlendMask(VecNumElts);
59296
+ std::iota(BlendMask.begin(), BlendMask.end(), 0);
59297
+ std::iota(BlendMask.begin() + IdxVal,
59298
+ BlendMask.begin() + IdxVal + SubVecNumElts, VecNumElts + IdxVal);
59299
+ if (isShuffleEquivalent(Mask, BlendMask, Vec, ExtSrc) &&
59300
+ VecNumElts == (2 * SubVecNumElts)) {
59301
+ assert((IdxVal == 0 || IdxVal == SubVecNumElts) &&
59302
+ "Unaligned subvector insertion");
59305
59303
if (OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
59306
- uint64_t BlendMask = IdxVal == 0 ? 0x0F : 0xF0;
59307
59304
SDValue Blend = DAG.getNode(
59308
59305
X86ISD::BLENDI, dl, MVT::v8f32, DAG.getBitcast(MVT::v8f32, Vec),
59309
59306
DAG.getBitcast(MVT::v8f32, ExtSrc),
59310
- DAG.getTargetConstant(BlendMask , dl, MVT::i8));
59307
+ DAG.getTargetConstant(IdxVal == 0 ? 0x0F : 0xF0 , dl, MVT::i8));
59311
59308
return DAG.getBitcast(OpVT, Blend);
59312
59309
} else if (OpVT.is512BitVector() && SubVecVT.is256BitVector()) {
59313
- SDValue Lo = DAG.getBitcast(MVT::v8f64, IdxVal == 0 ? ExtSrc : Vec);
59314
- SDValue Hi = DAG.getBitcast(MVT::v8f64, IdxVal == 0 ? Vec : ExtSrc);
59310
+ MVT ShufVT = OpVT.isInteger() ? MVT::v8i64 : MVT::v8f64;
59311
+ SDValue Lo = DAG.getBitcast(ShufVT, IdxVal == 0 ? ExtSrc : Vec);
59312
+ SDValue Hi = DAG.getBitcast(ShufVT, IdxVal == 0 ? Vec : ExtSrc);
59315
59313
SDValue Shuffle =
59316
- DAG.getNode(X86ISD::SHUF128, dl, MVT::v8f64 , Lo, Hi,
59314
+ DAG.getNode(X86ISD::SHUF128, dl, ShufVT , Lo, Hi,
59317
59315
getV4X86ShuffleImm8ForMask({0, 1, 2, 3}, dl, DAG));
59318
59316
return DAG.getBitcast(OpVT, Shuffle);
59319
59317
}
0 commit comments