@@ -10096,7 +10096,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
10096
10096
if (Size != (int)ExpectedMask.size())
10097
10097
return false;
10098
10098
assert(llvm::all_of(ExpectedMask,
10099
- [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
10099
+ [Size](int M) {
10100
+ return M == SM_SentinelZero || (M, 0, 2 * Size);
10101
+ }) &&
10100
10102
"Illegal target shuffle mask");
10101
10103
10102
10104
// Check for out-of-range target shuffle mask indices.
@@ -10119,6 +10121,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
10119
10121
int ExpectedIdx = ExpectedMask[i];
10120
10122
if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
10121
10123
continue;
10124
+ // If we failed to match an expected SM_SentinelZero then early out.
10125
+ if (ExpectedIdx < 0)
10126
+ return false;
10122
10127
if (MaskIdx == SM_SentinelZero) {
10123
10128
// If we need this expected index to be a zero element, then update the
10124
10129
// relevant zero mask and perform the known bits at the end to minimize
@@ -39594,18 +39599,46 @@ static bool matchBinaryPermuteShuffle(
39594
39599
((MaskVT.is128BitVector() && Subtarget.hasVLX()) ||
39595
39600
(MaskVT.is256BitVector() && Subtarget.hasVLX()) ||
39596
39601
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
39602
+ MVT AlignVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits),
39603
+ MaskVT.getSizeInBits() / EltSizeInBits);
39597
39604
if (!isAnyZero(Mask)) {
39598
39605
int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
39599
39606
if (0 < Rotation) {
39600
39607
Shuffle = X86ISD::VALIGN;
39601
- if (EltSizeInBits == 64)
39602
- ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
39603
- else
39604
- ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
39608
+ ShuffleVT = AlignVT;
39605
39609
PermuteImm = Rotation;
39606
39610
return true;
39607
39611
}
39608
39612
}
39613
+ // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.
39614
+ unsigned ZeroLo = Zeroable.countr_one();
39615
+ unsigned ZeroHi = Zeroable.countl_one();
39616
+ assert((ZeroLo + ZeroHi) < NumMaskElts && "Zeroable shuffle detected");
39617
+ if (ZeroLo) {
39618
+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
39619
+ std::iota(ShiftMask.begin() + ZeroLo, ShiftMask.end(), 0);
39620
+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
39621
+ V1 = V1;
39622
+ V2 = getZeroVector(AlignVT, Subtarget, DAG, DL);
39623
+ Shuffle = X86ISD::VALIGN;
39624
+ ShuffleVT = AlignVT;
39625
+ PermuteImm = NumMaskElts - ZeroLo;
39626
+ return true;
39627
+ }
39628
+ }
39629
+ if (ZeroHi) {
39630
+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
39631
+ std::iota(ShiftMask.begin(), ShiftMask.begin() + NumMaskElts - ZeroHi,
39632
+ ZeroHi);
39633
+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
39634
+ V2 = V1;
39635
+ V1 = getZeroVector(AlignVT, Subtarget, DAG, DL);
39636
+ Shuffle = X86ISD::VALIGN;
39637
+ ShuffleVT = AlignVT;
39638
+ PermuteImm = ZeroHi;
39639
+ return true;
39640
+ }
39641
+ }
39609
39642
}
39610
39643
39611
39644
// Attempt to match against PALIGNR byte rotate.
0 commit comments