@@ -1669,20 +1669,20 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
1669
1669
return true ;
1670
1670
}
1671
1671
1672
- using InstLane = std::pair<Value *, int >;
1672
+ using InstLane = std::pair<Use *, int >;
1673
1673
1674
- static InstLane lookThroughShuffles (Value *V, int Lane) {
1675
- while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
1674
+ static InstLane lookThroughShuffles (Use *V, int Lane) {
1675
+ while (auto *SV = dyn_cast<ShuffleVectorInst>(V-> get () )) {
1676
1676
unsigned NumElts =
1677
1677
cast<FixedVectorType>(SV->getOperand (0 )->getType ())->getNumElements ();
1678
1678
int M = SV->getMaskValue (Lane);
1679
1679
if (M < 0 )
1680
1680
return {nullptr , PoisonMaskElem};
1681
1681
if (static_cast <unsigned >(M) < NumElts) {
1682
- V = SV->getOperand (0 );
1682
+ V = & SV->getOperandUse (0 );
1683
1683
Lane = M;
1684
1684
} else {
1685
- V = SV->getOperand (1 );
1685
+ V = & SV->getOperandUse (1 );
1686
1686
Lane = M - NumElts;
1687
1687
}
1688
1688
}
@@ -1695,37 +1695,83 @@ generateInstLaneVectorFromOperand(ArrayRef<InstLane> Item, int Op) {
1695
1695
for (InstLane IL : Item) {
1696
1696
auto [V, Lane] = IL;
1697
1697
InstLane OpLane =
1698
- V ? lookThroughShuffles (cast<Instruction>(V)->getOperand (Op), Lane)
1698
+ V ? lookThroughShuffles (&cast<Instruction>(V->get ())->getOperandUse (Op),
1699
+ Lane)
1699
1700
: InstLane{nullptr , PoisonMaskElem};
1700
1701
NItem.emplace_back (OpLane);
1701
1702
}
1702
1703
return NItem;
1703
1704
}
1704
1705
1706
+ // / Detect concat of multiple values into a vector
1707
+ static bool isFreeConcat (ArrayRef<InstLane> Item,
1708
+ const TargetTransformInfo &TTI) {
1709
+ auto *Ty = cast<FixedVectorType>(Item.front ().first ->get ()->getType ());
1710
+ unsigned NumElts = Ty->getNumElements ();
1711
+ if (Item.size () == NumElts || NumElts == 1 || Item.size () % NumElts != 0 )
1712
+ return false ;
1713
+
1714
+ // Check that the concat is free, usually meaning that the type will be split
1715
+ // during legalization.
1716
+ SmallVector<int , 16 > ConcatMask (Ty->getNumElements () * 2 );
1717
+ std::iota (ConcatMask.begin (), ConcatMask.end (), 0 );
1718
+ if (TTI.getShuffleCost (TTI::SK_PermuteTwoSrc, Ty, ConcatMask,
1719
+ TTI::TCK_RecipThroughput) != 0 )
1720
+ return false ;
1721
+
1722
+ unsigned NumSlices = Item.size () / NumElts;
1723
+ // Currently we generate a tree of shuffles for the concats, which limits us
1724
+ // to a power2.
1725
+ if (!isPowerOf2_32 (NumSlices))
1726
+ return false ;
1727
+ for (unsigned Slice = 0 ; Slice < NumSlices; ++Slice) {
1728
+ Use *SliceV = Item[Slice * NumElts].first ;
1729
+ if (!SliceV || SliceV->get ()->getType () != Ty)
1730
+ return false ;
1731
+ for (unsigned Elt = 0 ; Elt < NumElts; ++Elt) {
1732
+ auto [V, Lane] = Item[Slice * NumElts + Elt];
1733
+ if (Lane != static_cast <int >(Elt) || SliceV->get () != V->get ())
1734
+ return false ;
1735
+ }
1736
+ }
1737
+ return true ;
1738
+ }
1739
+
1705
1740
static Value *generateNewInstTree (ArrayRef<InstLane> Item, FixedVectorType *Ty,
1706
- const SmallPtrSet<Value *, 4 > &IdentityLeafs,
1707
- const SmallPtrSet<Value *, 4 > &SplatLeafs,
1741
+ const SmallPtrSet<Use *, 4 > &IdentityLeafs,
1742
+ const SmallPtrSet<Use *, 4 > &SplatLeafs,
1743
+ const SmallPtrSet<Use *, 4 > &ConcatLeafs,
1708
1744
IRBuilder<> &Builder) {
1709
1745
auto [FrontV, FrontLane] = Item.front ();
1710
1746
1711
- if (IdentityLeafs.contains (FrontV) &&
1712
- all_of (drop_begin (enumerate(Item)), [Item](const auto &E) {
1713
- Value *FrontV = Item.front ().first ;
1714
- auto [V, Lane] = E.value ();
1715
- return !V || (V == FrontV && Lane == (int )E.index ());
1716
- })) {
1717
- return FrontV;
1747
+ if (IdentityLeafs.contains (FrontV)) {
1748
+ return FrontV->get ();
1718
1749
}
1719
1750
if (SplatLeafs.contains (FrontV)) {
1720
- if (auto *ILI = dyn_cast<Instruction>(FrontV))
1721
- Builder.SetInsertPoint (*ILI->getInsertionPointAfterDef ());
1722
- else if (auto *Arg = dyn_cast<Argument>(FrontV))
1723
- Builder.SetInsertPointPastAllocas (Arg->getParent ());
1724
1751
SmallVector<int , 16 > Mask (Ty->getNumElements (), FrontLane);
1725
- return Builder.CreateShuffleVector (FrontV, Mask);
1752
+ return Builder.CreateShuffleVector (FrontV->get (), Mask);
1753
+ }
1754
+ if (ConcatLeafs.contains (FrontV)) {
1755
+ unsigned NumElts =
1756
+ cast<FixedVectorType>(FrontV->get ()->getType ())->getNumElements ();
1757
+ SmallVector<Value *> Values (Item.size () / NumElts, nullptr );
1758
+ for (unsigned S = 0 ; S < Values.size (); ++S)
1759
+ Values[S] = Item[S * NumElts].first ->get ();
1760
+
1761
+ while (Values.size () > 1 ) {
1762
+ NumElts *= 2 ;
1763
+ SmallVector<int , 16 > Mask (NumElts, 0 );
1764
+ std::iota (Mask.begin (), Mask.end (), 0 );
1765
+ SmallVector<Value *> NewValues (Values.size () / 2 , nullptr );
1766
+ for (unsigned S = 0 ; S < NewValues.size (); ++S)
1767
+ NewValues[S] =
1768
+ Builder.CreateShuffleVector (Values[S * 2 ], Values[S * 2 + 1 ], Mask);
1769
+ Values = NewValues;
1770
+ }
1771
+ return Values[0 ];
1726
1772
}
1727
1773
1728
- auto *I = cast<Instruction>(FrontV);
1774
+ auto *I = cast<Instruction>(FrontV-> get () );
1729
1775
auto *II = dyn_cast<IntrinsicInst>(I);
1730
1776
unsigned NumOps = I->getNumOperands () - (II ? 1 : 0 );
1731
1777
SmallVector<Value *> Ops (NumOps);
@@ -1734,16 +1780,16 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
1734
1780
Ops[Idx] = II->getOperand (Idx);
1735
1781
continue ;
1736
1782
}
1737
- Ops[Idx] = generateNewInstTree (generateInstLaneVectorFromOperand (Item, Idx),
1738
- Ty, IdentityLeafs, SplatLeafs, Builder);
1783
+ Ops[Idx] =
1784
+ generateNewInstTree (generateInstLaneVectorFromOperand (Item, Idx), Ty,
1785
+ IdentityLeafs, SplatLeafs, ConcatLeafs, Builder);
1739
1786
}
1740
1787
1741
1788
SmallVector<Value *, 8 > ValueList;
1742
1789
for (const auto &Lane : Item)
1743
1790
if (Lane.first )
1744
- ValueList.push_back (Lane.first );
1791
+ ValueList.push_back (Lane.first -> get () );
1745
1792
1746
- Builder.SetInsertPoint (I);
1747
1793
Type *DstTy =
1748
1794
FixedVectorType::get (I->getType ()->getScalarType (), Ty->getNumElements ());
1749
1795
if (auto *BI = dyn_cast<BinaryOperator>(I)) {
@@ -1785,16 +1831,16 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
1785
1831
// do so.
1786
1832
bool VectorCombine::foldShuffleToIdentity (Instruction &I) {
1787
1833
auto *Ty = dyn_cast<FixedVectorType>(I.getType ());
1788
- if (!Ty)
1834
+ if (!Ty || I. use_empty () )
1789
1835
return false ;
1790
1836
1791
1837
SmallVector<InstLane> Start (Ty->getNumElements ());
1792
1838
for (unsigned M = 0 , E = Ty->getNumElements (); M < E; ++M)
1793
- Start[M] = lookThroughShuffles (&I , M);
1839
+ Start[M] = lookThroughShuffles (&*I. use_begin () , M);
1794
1840
1795
1841
SmallVector<SmallVector<InstLane>> Worklist;
1796
1842
Worklist.push_back (Start);
1797
- SmallPtrSet<Value *, 4 > IdentityLeafs, SplatLeafs;
1843
+ SmallPtrSet<Use *, 4 > IdentityLeafs, SplatLeafs, ConcatLeafs ;
1798
1844
unsigned NumVisited = 0 ;
1799
1845
1800
1846
while (!Worklist.empty ()) {
@@ -1809,12 +1855,12 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1809
1855
return false ;
1810
1856
1811
1857
// Look for an identity value.
1812
- if (! FrontLane &&
1813
- cast<FixedVectorType>(FrontV->getType ())->getNumElements () ==
1858
+ if (FrontLane == 0 &&
1859
+ cast<FixedVectorType>(FrontV->get ()-> getType ())->getNumElements () ==
1814
1860
Ty->getNumElements () &&
1815
1861
all_of (drop_begin (enumerate(Item)), [Item](const auto &E) {
1816
- Value *FrontV = Item.front ().first ;
1817
- return !E.value ().first || (E.value ().first == FrontV &&
1862
+ Value *FrontV = Item.front ().first -> get () ;
1863
+ return !E.value ().first || (E.value ().first -> get () == FrontV &&
1818
1864
E.value ().second == (int )E.index ());
1819
1865
})) {
1820
1866
IdentityLeafs.insert (FrontV);
@@ -1824,9 +1870,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1824
1870
if (auto *C = dyn_cast<Constant>(FrontV);
1825
1871
C && C->getSplatValue () &&
1826
1872
all_of (drop_begin (Item), [Item](InstLane &IL) {
1827
- Value *FrontV = Item.front ().first ;
1828
- Value *V = IL.first ;
1829
- return !V || V == FrontV;
1873
+ Value *FrontV = Item.front ().first -> get () ;
1874
+ Use *V = IL.first ;
1875
+ return !V || V-> get () == FrontV;
1830
1876
})) {
1831
1877
SplatLeafs.insert (FrontV);
1832
1878
continue ;
@@ -1835,19 +1881,19 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1835
1881
if (all_of (drop_begin (Item), [Item](InstLane &IL) {
1836
1882
auto [FrontV, FrontLane] = Item.front ();
1837
1883
auto [V, Lane] = IL;
1838
- return !V || (V == FrontV && Lane == FrontLane);
1884
+ return !V || (V-> get () == FrontV-> get () && Lane == FrontLane);
1839
1885
})) {
1840
1886
SplatLeafs.insert (FrontV);
1841
1887
continue ;
1842
1888
}
1843
1889
1844
1890
// We need each element to be the same type of value, and check that each
1845
1891
// element has a single use.
1846
- if (!all_of (drop_begin (Item), [Item](InstLane IL) {
1847
- Value *FrontV = Item.front ().first ;
1848
- Value *V = IL.first ;
1849
- if (!V)
1892
+ if (all_of (drop_begin (Item), [Item](InstLane IL) {
1893
+ Value *FrontV = Item.front ().first ->get ();
1894
+ if (!IL.first )
1850
1895
return true ;
1896
+ Value *V = IL.first ->get ();
1851
1897
if (auto *I = dyn_cast<Instruction>(V); I && !I->hasOneUse ())
1852
1898
return false ;
1853
1899
if (V->getValueID () != FrontV->getValueID ())
@@ -1864,48 +1910,59 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1864
1910
return !II || (isa<IntrinsicInst>(FrontV) &&
1865
1911
II->getIntrinsicID () ==
1866
1912
cast<IntrinsicInst>(FrontV)->getIntrinsicID ());
1867
- }))
1868
- return false ;
1869
-
1870
- // Check the operator is one that we support. We exclude div/rem in case
1871
- // they hit UB from poison lanes.
1872
- if ((isa<BinaryOperator>(FrontV) &&
1873
- !cast<BinaryOperator>(FrontV)->isIntDivRem ()) ||
1874
- isa<CmpInst>(FrontV)) {
1875
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1876
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 1 ));
1877
- } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontV)) {
1878
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1879
- } else if (isa<SelectInst>(FrontV)) {
1880
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1881
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 1 ));
1882
- Worklist.push_back (generateInstLaneVectorFromOperand (Item, 2 ));
1883
- } else if (auto *II = dyn_cast<IntrinsicInst>(FrontV);
1884
- II && isTriviallyVectorizable (II->getIntrinsicID ())) {
1885
- for (unsigned Op = 0 , E = II->getNumOperands () - 1 ; Op < E; Op++) {
1886
- if (isVectorIntrinsicWithScalarOpAtArg (II->getIntrinsicID (), Op)) {
1887
- if (!all_of (drop_begin (Item), [Item, Op](InstLane &IL) {
1888
- Value *FrontV = Item.front ().first ;
1889
- Value *V = IL.first ;
1890
- return !V || (cast<Instruction>(V)->getOperand (Op) ==
1891
- cast<Instruction>(FrontV)->getOperand (Op));
1892
- }))
1893
- return false ;
1894
- continue ;
1913
+ })) {
1914
+ // Check the operator is one that we support.
1915
+ if (isa<BinaryOperator, CmpInst>(FrontV)) {
1916
+ // We exclude div/rem in case they hit UB from poison lanes.
1917
+ if (auto *BO = dyn_cast<BinaryOperator>(FrontV);
1918
+ BO && BO->isIntDivRem ())
1919
+ return false ;
1920
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1921
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 1 ));
1922
+ continue ;
1923
+ } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontV)) {
1924
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1925
+ continue ;
1926
+ } else if (isa<SelectInst>(FrontV)) {
1927
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 0 ));
1928
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 1 ));
1929
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, 2 ));
1930
+ continue ;
1931
+ } else if (auto *II = dyn_cast<IntrinsicInst>(FrontV);
1932
+ II && isTriviallyVectorizable (II->getIntrinsicID ())) {
1933
+ for (unsigned Op = 0 , E = II->getNumOperands () - 1 ; Op < E; Op++) {
1934
+ if (isVectorIntrinsicWithScalarOpAtArg (II->getIntrinsicID (), Op)) {
1935
+ if (!all_of (drop_begin (Item), [Item, Op](InstLane &IL) {
1936
+ Value *FrontV = Item.front ().first ->get ();
1937
+ Value *V = IL.first ->get ();
1938
+ return !V || (cast<Instruction>(V)->getOperand (Op) ==
1939
+ cast<Instruction>(FrontV)->getOperand (Op));
1940
+ }))
1941
+ return false ;
1942
+ continue ;
1943
+ }
1944
+ Worklist.push_back (generateInstLaneVectorFromOperand (Item, Op));
1895
1945
}
1896
- Worklist. push_back ( generateInstLaneVectorFromOperand (Item, Op)) ;
1946
+ continue ;
1897
1947
}
1898
- } else {
1899
- return false ;
1900
1948
}
1949
+
1950
+ if (isFreeConcat (Item, TTI)) {
1951
+ ConcatLeafs.insert (FrontV);
1952
+ continue ;
1953
+ }
1954
+
1955
+ return false ;
1901
1956
}
1902
1957
1903
1958
if (NumVisited <= 1 )
1904
1959
return false ;
1905
1960
1906
1961
// If we got this far, we know the shuffles are superfluous and can be
1907
1962
// removed. Scan through again and generate the new tree of instructions.
1908
- Value *V = generateNewInstTree (Start, Ty, IdentityLeafs, SplatLeafs, Builder);
1963
+ Builder.SetInsertPoint (&I);
1964
+ Value *V = generateNewInstTree (Start, Ty, IdentityLeafs, SplatLeafs,
1965
+ ConcatLeafs, Builder);
1909
1966
replaceValue (I, *V);
1910
1967
return true ;
1911
1968
}
0 commit comments