Skip to content

Commit 0032c15

Browse files
committed
[SROA] Optimize reloaded values in allocas that escape into readonly nocapture calls. (llvm#116645)
Given an alloca that potentially has many uses in big complex code and escapes into a call that is readonly+nocapture, we cannot easily split up the alloca. There are several optimizations that will attempt to take a value that is stored and a reload, and replace the load with the original stored value. Instcombine has some simple heuristics, GVN can sometimes do it, as can CSE in limited situations. They all suffer from the same issue with complex code - they start from a load/store and need to prove no-alias for all code between, which in complex cases might be a lot to look through. Especially if the ptr is an alloca with many uses that is over the normal escape capture limits. The pass that does do well with allocas is SROA, as it has a complete view of all of the uses. This patch adds a case to SROA where it can detect allocas that are passed into calls that are no-capture readonly. It can then optimize the reloaded values inside the alloca slice with the stored value knowing that it is valid no matter the location of the loads/stores from the no-escaping nature of the alloca.
1 parent 331c2dd commit 0032c15

File tree

6 files changed

+226
-82
lines changed

6 files changed

+226
-82
lines changed

llvm/include/llvm/Analysis/PtrUseVisitor.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class PtrUseVisitorBase {
6464
/// Is the pointer escaped at some point?
6565
bool isEscaped() const { return EscapedInfo != nullptr; }
6666

67+
/// Is the pointer escaped into a read-only nocapture call at some point?
68+
bool isEscapedReadOnly() const { return EscapedReadOnly != nullptr; }
69+
6770
/// Get the instruction causing the visit to abort.
6871
/// \returns a pointer to the instruction causing the abort if one is
6972
/// available; otherwise returns null.
@@ -74,6 +77,10 @@ class PtrUseVisitorBase {
7477
/// is available; otherwise returns null.
7578
Instruction *getEscapingInst() const { return EscapedInfo; }
7679

80+
/// Get the instruction causing the pointer to escape which is a read-only
81+
/// nocapture call.
82+
Instruction *getEscapedReadOnlyInst() const { return EscapedReadOnly; }
83+
7784
/// Mark the visit as aborted. Intended for use in a void return.
7885
/// \param I The instruction which caused the visit to abort, if available.
7986
void setAborted(Instruction *I) {
@@ -88,6 +95,12 @@ class PtrUseVisitorBase {
8895
EscapedInfo = I;
8996
}
9097

98+
/// Mark the pointer as escaped into a readonly-nocapture call.
99+
void setEscapedReadOnly(Instruction *I) {
100+
assert(I && "Expected a valid pointer in setEscapedReadOnly");
101+
EscapedReadOnly = I;
102+
}
103+
91104
/// Mark the pointer as escaped, and the visit as aborted. Intended
92105
/// for use in a void return.
93106
/// \param I The instruction which both escapes the pointer and aborts the
@@ -100,6 +113,7 @@ class PtrUseVisitorBase {
100113
private:
101114
Instruction *AbortedInfo = nullptr;
102115
Instruction *EscapedInfo = nullptr;
116+
Instruction *EscapedReadOnly = nullptr;
103117
};
104118

105119
protected:

llvm/include/llvm/Transforms/Utils/SSAUpdater.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,13 @@ class LoadAndStorePromoter {
188188
/// Return false if a sub-class wants to keep one of the loads/stores
189189
/// after the SSA construction.
190190
virtual bool shouldDelete(Instruction *I) const { return true; }
191+
192+
/// Return the value to use for the point in the code that the alloca is
193+
/// positioned. This will only be used if an Alloca is included in Insts,
194+
/// otherwise the value of a uninitialized load will be assumed to be poison.
195+
virtual Value *getValueToUseForAlloca(Instruction *AI) const {
196+
return nullptr;
197+
}
191198
};
192199

193200
} // end namespace llvm

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "llvm/Analysis/GlobalsModRef.h"
4444
#include "llvm/Analysis/Loads.h"
4545
#include "llvm/Analysis/PtrUseVisitor.h"
46+
#include "llvm/Analysis/ValueTracking.h"
4647
#include "llvm/Config/llvm-config.h"
4748
#include "llvm/IR/BasicBlock.h"
4849
#include "llvm/IR/Constant.h"
@@ -83,6 +84,7 @@
8384
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
8485
#include "llvm/Transforms/Utils/Local.h"
8586
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
87+
#include "llvm/Transforms/Utils/SSAUpdater.h"
8688
#include <algorithm>
8789
#include <cassert>
8890
#include <cstddef>
@@ -246,6 +248,7 @@ class SROA {
246248
bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
247249
AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
248250
bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
251+
bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
249252
std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
250253
void clobberUse(Use &U);
251254
bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
@@ -598,6 +601,7 @@ class AllocaSlices {
598601
/// If this is true, the slices are never fully built and should be
599602
/// ignored.
600603
bool isEscaped() const { return PointerEscapingInstr; }
604+
bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
601605

602606
/// Support for iterating over the slices.
603607
/// @{
@@ -680,6 +684,7 @@ class AllocaSlices {
680684
/// store a pointer to that here and abort trying to form slices of the
681685
/// alloca. This will be null if the alloca slices are analyzed successfully.
682686
Instruction *PointerEscapingInstr;
687+
Instruction *PointerEscapingInstrReadOnly;
683688

684689
/// The slices of the alloca.
685690
///
@@ -1390,14 +1395,26 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
13901395

13911396
/// Disable SROA entirely if there are unhandled users of the alloca.
13921397
void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1398+
1399+
void visitCallBase(CallBase &CB) {
1400+
// If the call operand is NoCapture ReadOnly, then we mark it as
1401+
// EscapedReadOnly.
1402+
if (CB.doesNotCapture(U->getOperandNo()) &&
1403+
CB.onlyReadsMemory(U->getOperandNo())) {
1404+
PI.setEscapedReadOnly(&CB);
1405+
return;
1406+
}
1407+
1408+
Base::visitCallBase(CB);
1409+
}
13931410
};
13941411

13951412
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
13961413
:
13971414
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
13981415
AI(AI),
13991416
#endif
1400-
PointerEscapingInstr(nullptr) {
1417+
PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
14011418
SliceBuilder PB(DL, AI, *this);
14021419
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
14031420
if (PtrI.isEscaped() || PtrI.isAborted()) {
@@ -1408,6 +1425,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
14081425
assert(PointerEscapingInstr && "Did not track a bad instruction");
14091426
return;
14101427
}
1428+
PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
14111429

14121430
llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
14131431

@@ -1445,6 +1463,9 @@ void AllocaSlices::print(raw_ostream &OS) const {
14451463
return;
14461464
}
14471465

1466+
if (PointerEscapingInstrReadOnly)
1467+
OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1468+
14481469
OS << "Slices of alloca: " << AI << "\n";
14491470
for (const_iterator I = begin(), E = end(); I != E; ++I)
14501471
print(OS, I);
@@ -5454,6 +5475,88 @@ void SROA::clobberUse(Use &U) {
54545475
}
54555476
}
54565477

5478+
/// A basic LoadAndStorePromoter that does not remove store nodes.
5479+
class BasicLoadAndStorePromoter : public LoadAndStorePromoter {
5480+
public:
5481+
BasicLoadAndStorePromoter(ArrayRef<const Instruction *> Insts, SSAUpdater &S,
5482+
Type *ZeroType)
5483+
: LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {}
5484+
bool shouldDelete(Instruction *I) const override {
5485+
return !isa<StoreInst>(I) && !isa<AllocaInst>(I);
5486+
}
5487+
5488+
Value *getValueToUseForAlloca(Instruction *I) const override {
5489+
return UndefValue::get(ZeroType);
5490+
}
5491+
5492+
private:
5493+
Type *ZeroType;
5494+
};
5495+
5496+
bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5497+
// Look through each "partition", looking for slices with the same start/end
5498+
// that do not overlap with any before them. The slices are sorted by
5499+
// increasing beginOffset. We don't use AS.partitions(), as it will use a more
5500+
// sophisticated algorithm that takes splittable slices into account.
5501+
auto PartitionBegin = AS.begin();
5502+
auto PartitionEnd = PartitionBegin;
5503+
uint64_t BeginOffset = PartitionBegin->beginOffset();
5504+
uint64_t EndOffset = PartitionBegin->endOffset();
5505+
while (PartitionBegin != AS.end()) {
5506+
bool AllSameAndValid = true;
5507+
SmallVector<Instruction *> Insts;
5508+
Type *PartitionType = nullptr;
5509+
while (PartitionEnd != AS.end() &&
5510+
(PartitionEnd->beginOffset() < EndOffset ||
5511+
PartitionEnd->endOffset() <= EndOffset)) {
5512+
if (AllSameAndValid) {
5513+
AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset &&
5514+
PartitionEnd->endOffset() == EndOffset;
5515+
Instruction *User =
5516+
cast<Instruction>(PartitionEnd->getUse()->getUser());
5517+
if (auto *LI = dyn_cast<LoadInst>(User)) {
5518+
Type *UserTy = LI->getType();
5519+
// LoadAndStorePromoter requires all the types to be the same.
5520+
if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5521+
AllSameAndValid = false;
5522+
PartitionType = UserTy;
5523+
Insts.push_back(User);
5524+
} else if (auto *SI = dyn_cast<StoreInst>(User)) {
5525+
Type *UserTy = SI->getValueOperand()->getType();
5526+
if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5527+
AllSameAndValid = false;
5528+
PartitionType = UserTy;
5529+
Insts.push_back(User);
5530+
} else if (!isAssumeLikeIntrinsic(User)) {
5531+
AllSameAndValid = false;
5532+
}
5533+
}
5534+
EndOffset = std::max(EndOffset, PartitionEnd->endOffset());
5535+
++PartitionEnd;
5536+
}
5537+
5538+
// So long as all the slices start and end offsets matched, update loads to
5539+
// the values stored in the partition.
5540+
if (AllSameAndValid && !Insts.empty()) {
5541+
LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5542+
<< EndOffset << ")\n");
5543+
SmallVector<PHINode *, 4> NewPHIs;
5544+
SSAUpdater SSA(&NewPHIs);
5545+
Insts.push_back(&AI);
5546+
BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5547+
Promoter.run(Insts);
5548+
}
5549+
5550+
// Step on to the next partition.
5551+
PartitionBegin = PartitionEnd;
5552+
if (PartitionBegin == AS.end())
5553+
break;
5554+
BeginOffset = PartitionBegin->beginOffset();
5555+
EndOffset = PartitionBegin->endOffset();
5556+
}
5557+
return true;
5558+
}
5559+
54575560
/// Analyze an alloca for SROA.
54585561
///
54595562
/// This analyzes the alloca to ensure we can reason about it, builds
@@ -5494,6 +5597,11 @@ SROA::runOnAlloca(AllocaInst &AI) {
54945597
if (AS.isEscaped())
54955598
return {Changed, CFGChanged};
54965599

5600+
if (AS.isEscapedReadOnly()) {
5601+
Changed |= propagateStoredValuesToLoads(AI, AS);
5602+
return {Changed, CFGChanged};
5603+
}
5604+
54975605
// Delete all the dead users of this alloca before splitting and rewriting it.
54985606
for (Instruction *DeadUser : AS.getDeadUsers()) {
54995607
// Free up everything used by this instruction.

llvm/lib/Transforms/Utils/SSAUpdater.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -412,17 +412,21 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
412412
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
413413
updateDebugInfo(SI);
414414
SSA.AddAvailableValue(BB, SI->getOperand(0));
415-
} else
415+
} else if (auto *AI = dyn_cast<AllocaInst>(User)) {
416+
// We treat AllocaInst as a store of an getValueToUseForAlloca value.
417+
SSA.AddAvailableValue(BB, getValueToUseForAlloca(AI));
418+
} else {
416419
// Otherwise it is a load, queue it to rewrite as a live-in load.
417420
LiveInLoads.push_back(cast<LoadInst>(User));
421+
}
418422
BlockUses.clear();
419423
continue;
420424
}
421425

422426
// Otherwise, check to see if this block is all loads.
423427
bool HasStore = false;
424428
for (Instruction *I : BlockUses) {
425-
if (isa<StoreInst>(I)) {
429+
if (isa<StoreInst>(I) || isa<AllocaInst>(I)) {
426430
HasStore = true;
427431
break;
428432
}
@@ -468,6 +472,12 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
468472

469473
// Remember that this is the active value in the block.
470474
StoredValue = SI->getOperand(0);
475+
} else if (auto *AI = dyn_cast<AllocaInst>(&I)) {
476+
// Check if this an alloca, in which case we treat it as a store of
477+
// getValueToUseForAlloca.
478+
if (!isInstInList(AI, Insts))
479+
continue;
480+
StoredValue = getValueToUseForAlloca(AI);
471481
}
472482
}
473483

0 commit comments

Comments
 (0)