-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[SLPVectorizer] Use accurate cost for external users of resize shuffles #137419
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14344,6 +14344,7 @@ static T *performExtractsShuffleAction( | |
[[maybe_unused]] auto *V = ValueSelect::get<T *>(Base); | ||
assert((!V || GetVF(V) == Mask.size()) && | ||
"Expected base vector of VF number of elements."); | ||
|
||
Prev = Action(Mask, {nullptr, Res.first}); | ||
} else if (ShuffleMask.size() == 1) { | ||
// Base is undef and only 1 vector is shuffled - perform the action only for | ||
|
@@ -14802,7 +14803,39 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals, | |
<< " for final shuffle of insertelement external users.\n"; | ||
TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n"); | ||
Cost += C; | ||
return std::make_pair(TE, true); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the patch! What about something like this, which is more close to the codegen:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks -- I'll try to adopt it, but it's causing some concerning looking diffs in the lit tests that I need to work through. |
||
bool HasLargeIndex = | ||
any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); }); | ||
|
||
// If the resize source is just an identity vector, then will produce an | ||
// insert subvector shufflevector | ||
bool NeedsResizeExtract = true; | ||
if ((VecVF < VF) && !HasLargeIndex) { | ||
NeedsResizeExtract = false; | ||
SmallVector<int> ResizeMask(VF, PoisonMaskElem); | ||
for (unsigned I = 0; I < VF; ++I) { | ||
if (Mask[I] != PoisonMaskElem) { | ||
assert((size_t)Mask[I] < ResizeMask.size()); | ||
ResizeMask[Mask[I]] = Mask[I]; | ||
} | ||
} | ||
|
||
unsigned MinVF = std::min(VF, VecVF); | ||
// Check if our mask is a a padded identity mask with non poision | ||
jrbyrnes marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// intermediaries. This implies that our resize shuffle is just a | ||
// contiguous insertsubvector | ||
for (auto [Position, Mask] : enumerate(ResizeMask)) { | ||
if (Position >= MinVF) | ||
break; | ||
|
||
if ((int)Position != Mask) { | ||
NeedsResizeExtract = true; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
return std::make_pair(TE, NeedsResizeExtract); | ||
} | ||
return std::make_pair(TE, false); | ||
}; | ||
|
Uh oh!
There was an error while loading. Please reload this page.