@@ -10096,7 +10096,10 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
10096
10096
if (Size != (int)ExpectedMask.size())
10097
10097
return false;
10098
10098
assert(llvm::all_of(ExpectedMask,
10099
- [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
10099
+ [Size](int M) {
10100
+ return M == SM_SentinelZero ||
10101
+ isInRange(M, 0, 2 * Size);
10102
+ }) &&
10100
10103
"Illegal target shuffle mask");
10101
10104
10102
10105
// Check for out-of-range target shuffle mask indices.
@@ -10119,6 +10122,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
10119
10122
int ExpectedIdx = ExpectedMask[i];
10120
10123
if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
10121
10124
continue;
10125
+ // If we failed to match an expected SM_SentinelZero then early out.
10126
+ if (ExpectedIdx < 0)
10127
+ return false;
10122
10128
if (MaskIdx == SM_SentinelZero) {
10123
10129
// If we need this expected index to be a zero element, then update the
10124
10130
// relevant zero mask and perform the known bits at the end to minimize
@@ -39594,18 +39600,46 @@ static bool matchBinaryPermuteShuffle(
39594
39600
((MaskVT.is128BitVector() && Subtarget.hasVLX()) ||
39595
39601
(MaskVT.is256BitVector() && Subtarget.hasVLX()) ||
39596
39602
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
39603
+ MVT AlignVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits),
39604
+ MaskVT.getSizeInBits() / EltSizeInBits);
39597
39605
if (!isAnyZero(Mask)) {
39598
39606
int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
39599
39607
if (0 < Rotation) {
39600
39608
Shuffle = X86ISD::VALIGN;
39601
- if (EltSizeInBits == 64)
39602
- ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
39603
- else
39604
- ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
39609
+ ShuffleVT = AlignVT;
39605
39610
PermuteImm = Rotation;
39606
39611
return true;
39607
39612
}
39608
39613
}
39614
+ // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.
39615
+ unsigned ZeroLo = Zeroable.countr_one();
39616
+ unsigned ZeroHi = Zeroable.countl_one();
39617
+ assert((ZeroLo + ZeroHi) < NumMaskElts && "Zeroable shuffle detected");
39618
+ if (ZeroLo) {
39619
+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
39620
+ std::iota(ShiftMask.begin() + ZeroLo, ShiftMask.end(), 0);
39621
+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
39622
+ V1 = V1;
39623
+ V2 = getZeroVector(AlignVT, Subtarget, DAG, DL);
39624
+ Shuffle = X86ISD::VALIGN;
39625
+ ShuffleVT = AlignVT;
39626
+ PermuteImm = NumMaskElts - ZeroLo;
39627
+ return true;
39628
+ }
39629
+ }
39630
+ if (ZeroHi) {
39631
+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
39632
+ std::iota(ShiftMask.begin(), ShiftMask.begin() + NumMaskElts - ZeroHi,
39633
+ ZeroHi);
39634
+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
39635
+ V2 = V1;
39636
+ V1 = getZeroVector(AlignVT, Subtarget, DAG, DL);
39637
+ Shuffle = X86ISD::VALIGN;
39638
+ ShuffleVT = AlignVT;
39639
+ PermuteImm = ZeroHi;
39640
+ return true;
39641
+ }
39642
+ }
39609
39643
}
39610
39644
39611
39645
// Attempt to match against PALIGNR byte rotate.
0 commit comments