Skip to content

Commit 37dda86

Browse files
committed
[VectorCombine] Add foldShuffleToIdentity
This patch adds a basic version of a combine that attempts to fold away shuffles that when combines simplify away to an identity shuffle. For example: %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> %abt = fneg <4 x half> %at %abb = fneg <4 x half> %ab %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> By looking through the shuffles, it can be simplified to: %r = fneg <8 x half> %a The code tracks each lane starting from the original shuffle, keeping a track of a vector of {src, idx}. As we propagate up through the instructions we will either look through intermediate instructions (binops and unops) or see a collections of lanes that all have the same src and incrementing idx (an identity). We can also see a single value with identical lanes, which we can treat like a splat. Only the basic version is added here, handling identites, splats, binops and unops. In follow-up patches other instructions can be added such as constants, intrinsics, cmp/sel and zext/sext/trunc.
1 parent 302d0f3 commit 37dda86

File tree

4 files changed

+165
-118
lines changed

4 files changed

+165
-118
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class VectorCombine {
113113
bool scalarizeLoadExtract(Instruction &I);
114114
bool foldShuffleOfBinops(Instruction &I);
115115
bool foldShuffleOfCastops(Instruction &I);
116+
bool foldShuffleToIdentity(Instruction &I);
116117
bool foldShuffleFromReductions(Instruction &I);
117118
bool foldTruncFromReductions(Instruction &I);
118119
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -1547,6 +1548,145 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
15471548
return true;
15481549
}
15491550

1551+
// Starting from a shuffle, look up through operands tracking the shuffled index
1552+
// of each lane. If we can simplify away the shuffles to identities then
1553+
// do so.
1554+
bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1555+
FixedVectorType *Ty = dyn_cast<FixedVectorType>(I.getType());
1556+
if (!Ty || !isa<Instruction>(I.getOperand(0)) ||
1557+
!isa<Instruction>(I.getOperand(1)))
1558+
return false;
1559+
1560+
using InstLane = std::pair<Value *, int>;
1561+
1562+
auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane {
1563+
while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
1564+
unsigned NumElts =
1565+
cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
1566+
int M = SV->getMaskValue(Lane);
1567+
if (M < 0)
1568+
return {nullptr, -1};
1569+
else if (M < (int)NumElts) {
1570+
V = SV->getOperand(0);
1571+
Lane = M;
1572+
} else {
1573+
V = SV->getOperand(1);
1574+
Lane = M - NumElts;
1575+
}
1576+
}
1577+
return InstLane{V, Lane};
1578+
};
1579+
1580+
auto GenerateInstLaneVectorFromOperand =
1581+
[&LookThroughShuffles](const SmallVector<InstLane> &Item, int Op) {
1582+
SmallVector<InstLane> NItem;
1583+
for (InstLane V : Item) {
1584+
NItem.emplace_back(
1585+
!V.first
1586+
? InstLane{nullptr, -1}
1587+
: LookThroughShuffles(
1588+
cast<Instruction>(V.first)->getOperand(Op), V.second));
1589+
}
1590+
return NItem;
1591+
};
1592+
1593+
SmallVector<InstLane> Start;
1594+
for (unsigned M = 0; M < Ty->getNumElements(); ++M)
1595+
Start.push_back(LookThroughShuffles(&I, M));
1596+
1597+
SmallVector<SmallVector<InstLane>> Worklist;
1598+
Worklist.push_back(Start);
1599+
SmallPtrSet<Value *, 4> IdentityLeafs, SplatLeafs;
1600+
1601+
while (!Worklist.empty()) {
1602+
SmallVector<InstLane> Item = Worklist.pop_back_val();
1603+
1604+
// If we found an undef first lane then bail out to keep things simple.
1605+
if (!Item[0].first)
1606+
return false;
1607+
1608+
// Look for an identity value.
1609+
if (Item[0].second == 0 && Item[0].first->getType() == Ty &&
1610+
all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
1611+
return !E.value().first || (E.value().first == Item[0].first &&
1612+
E.value().second == (int)E.index());
1613+
})) {
1614+
IdentityLeafs.insert(Item[0].first);
1615+
continue;
1616+
}
1617+
// Look for a splat value.
1618+
if (all_of(drop_begin(Item), [&](InstLane &IL) {
1619+
return !IL.first ||
1620+
(IL.first == Item[0].first && IL.second == Item[0].second);
1621+
})) {
1622+
SplatLeafs.insert(Item[0].first);
1623+
continue;
1624+
}
1625+
1626+
// We need each element to be the same type of value, and check that each
1627+
// element has a single use.
1628+
if (!all_of(drop_begin(Item), [&](InstLane IL) {
1629+
if (!IL.first)
1630+
return true;
1631+
if (isa<Instruction>(IL.first) &&
1632+
!cast<Instruction>(IL.first)->hasOneUse())
1633+
return false;
1634+
return IL.first->getValueID() == Item[0].first->getValueID() &&
1635+
(!isa<IntrinsicInst>(IL.first) ||
1636+
cast<IntrinsicInst>(IL.first)->getIntrinsicID() ==
1637+
cast<IntrinsicInst>(Item[0].first)->getIntrinsicID());
1638+
}))
1639+
return false;
1640+
1641+
// Check the operator is one that we support.
1642+
if (isa<BinaryOperator>(Item[0].first)) {
1643+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1644+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
1645+
} else if (isa<UnaryOperator>(Item[0].first)) {
1646+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1647+
} else {
1648+
return false;
1649+
}
1650+
}
1651+
1652+
// If we got this far, we know the shuffles are superfluous and can be
1653+
// removed. Scan through again and generate the new tree of instructions.
1654+
std::function<Value *(const SmallVector<InstLane> &)> generate =
1655+
[&](const SmallVector<InstLane> &Item) -> Value * {
1656+
if (IdentityLeafs.contains(Item[0].first) &&
1657+
all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
1658+
return !E.value().first || (E.value().first == Item[0].first &&
1659+
E.value().second == (int)E.index());
1660+
})) {
1661+
return Item[0].first;
1662+
} else if (SplatLeafs.contains(Item[0].first)) {
1663+
if (auto ILI = dyn_cast<Instruction>(Item[0].first))
1664+
Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef());
1665+
else if (isa<Argument>(Item[0].first))
1666+
Builder.SetInsertPointPastAllocas(I.getParent()->getParent());
1667+
SmallVector<int, 16> Mask(Ty->getNumElements(), Item[0].second);
1668+
return Builder.CreateShuffleVector(Item[0].first, Mask);
1669+
}
1670+
1671+
auto *I = cast<Instruction>(Item[0].first);
1672+
SmallVector<Value *> Ops;
1673+
unsigned E = I->getNumOperands();
1674+
for (unsigned Idx = 0; Idx < E; Idx++)
1675+
Ops.push_back(generate(GenerateInstLaneVectorFromOperand(Item, Idx)));
1676+
Builder.SetInsertPoint(I);
1677+
if (auto BI = dyn_cast<BinaryOperator>(I))
1678+
return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
1679+
Ops[0], Ops[1]);
1680+
if (auto UI = dyn_cast<UnaryOperator>(I))
1681+
return Builder.CreateUnOp((Instruction::UnaryOps)UI->getOpcode(), Ops[0]);
1682+
llvm_unreachable("Unhandled instruction in generate");
1683+
};
1684+
1685+
Value *V = generate(Start);
1686+
replaceValue(I, *V);
1687+
return true;
1688+
}
1689+
15501690
/// Given a commutative reduction, the order of the input lanes does not alter
15511691
/// the results. We can use this to remove certain shuffles feeding the
15521692
/// reduction, removing the need to shuffle at all.
@@ -2103,6 +2243,7 @@ bool VectorCombine::run() {
21032243
MadeChange |= foldShuffleOfBinops(I);
21042244
MadeChange |= foldShuffleOfCastops(I);
21052245
MadeChange |= foldSelectShuffle(I);
2246+
MadeChange |= foldShuffleToIdentity(I);
21062247
break;
21072248
case Instruction::BitCast:
21082249
MadeChange |= foldBitcastShuffle(I);

0 commit comments

Comments
 (0)