Skip to content

Commit 403da6a

Browse files
committed
Reapply [LICM] Make promotion faster
Relative to the previous implementation, this always uses aliasesUnknownInst() instead of aliasesPointer() to correctly handle atomics. The added test case was previously miscompiled. ----- Even when MemorySSA-based LICM is used, an AST is still populated for scalar promotion. As the AST has quadratic complexity, a lot of time is spent in this step despite the existing access count limit. This patch optimizes the identification of promotable stores. The idea here is pretty simple: We're only interested in must-alias mod sets of loop invariant pointers. As such, only populate the AST with loop-invariant loads and stores (anything else is definitely not promotable) and then discard any sets which alias with any of the remaining, definitely non-promotable accesses. If we promoted something, check whether this has made some other accesses loop invariant and thus possible promotion candidates. This is much faster in practice, because we need to perform AA queries for O(NumPromotable^2 + NumPromotable*NumNonPromotable) instead of O(NumTotal^2), and NumPromotable tends to be small. Additionally, promotable accesses have loop invariant pointers, for which AA is cheaper. This has a signicant positive compile-time impact. We save ~1.8% geomean on CTMark at O3, with 6% on lencod in particular and 25% on individual files. Conceptually, this change is NFC, but may not be so in practice, because the AST is only an approximation, and can produce different results depending on the order in which accesses are added. However, there is at least no impact on the number of promotions (licm.NumPromoted) in test-suite O3 configuration with this change. Differential Revision: https://reviews.llvm.org/D89264
1 parent 3d47f1f commit 403da6a

File tree

4 files changed

+147
-54
lines changed

4 files changed

+147
-54
lines changed

llvm/include/llvm/Analysis/AliasSetTracker.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ class AAResults;
3838
class AliasSetTracker;
3939
class BasicBlock;
4040
class LoadInst;
41-
class Loop;
42-
class MemorySSA;
4341
class AnyMemSetInst;
4442
class AnyMemTransferInst;
4543
class raw_ostream;
@@ -343,8 +341,6 @@ class AliasSetTracker {
343341
struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
344342

345343
AAResults &AA;
346-
MemorySSA *MSSA = nullptr;
347-
Loop *L = nullptr;
348344
ilist<AliasSet> AliasSets;
349345

350346
using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *,
@@ -357,8 +353,6 @@ class AliasSetTracker {
357353
/// Create an empty collection of AliasSets, and use the specified alias
358354
/// analysis object to disambiguate load and store addresses.
359355
explicit AliasSetTracker(AAResults &AA) : AA(AA) {}
360-
explicit AliasSetTracker(AAResults &AA, MemorySSA *MSSA, Loop *L)
361-
: AA(AA), MSSA(MSSA), L(L) {}
362356
~AliasSetTracker() { clear(); }
363357

364358
/// These methods are used to add different types of instructions to the alias
@@ -383,7 +377,6 @@ class AliasSetTracker {
383377
void add(BasicBlock &BB); // Add all instructions in basic block
384378
void add(const AliasSetTracker &AST); // Add alias relations from another AST
385379
void addUnknown(Instruction *I);
386-
void addAllInstructionsInLoopUsingMSSA();
387380

388381
void clear();
389382

llvm/lib/Analysis/AliasSetTracker.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/Analysis/AliasSetTracker.h"
14+
#include "llvm/Analysis/AliasAnalysis.h"
1415
#include "llvm/Analysis/GuardUtils.h"
1516
#include "llvm/Analysis/LoopInfo.h"
1617
#include "llvm/Analysis/MemoryLocation.h"
17-
#include "llvm/Analysis/MemorySSA.h"
1818
#include "llvm/Config/llvm-config.h"
1919
#include "llvm/IR/Constants.h"
2020
#include "llvm/IR/DataLayout.h"
@@ -535,15 +535,6 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
535535
}
536536
}
537537

538-
void AliasSetTracker::addAllInstructionsInLoopUsingMSSA() {
539-
assert(MSSA && L && "MSSA and L must be available");
540-
for (const BasicBlock *BB : L->blocks())
541-
if (auto *Accesses = MSSA->getBlockAccesses(BB))
542-
for (auto &Access : *Accesses)
543-
if (auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
544-
add(MUD->getMemoryInst());
545-
}
546-
547538
// deleteValue method - This method is used to remove a pointer value from the
548539
// AliasSetTracker entirely. It should be used when an instruction is deleted
549540
// from the program to update the AST. If you don't use this, you would have

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 112 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,12 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
185185
ICFLoopSafetyInfo &SafetyInfo,
186186
MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
187187

188+
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
189+
function_ref<void(Instruction *)> Fn);
190+
static SmallVector<SmallSetVector<Value *, 8>, 0>
191+
collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L,
192+
SmallVectorImpl<Instruction *> &MaybePromotable);
193+
188194
namespace {
189195
struct LoopInvariantCodeMotion {
190196
bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
@@ -203,9 +209,6 @@ struct LoopInvariantCodeMotion {
203209

204210
std::unique_ptr<AliasSetTracker>
205211
collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA);
206-
std::unique_ptr<AliasSetTracker>
207-
collectAliasInfoForLoopWithMSSA(Loop *L, AAResults *AA,
208-
MemorySSAUpdater *MSSAU);
209212
};
210213

211214
struct LegacyLICMPass : public LoopPass {
@@ -446,31 +449,48 @@ bool LoopInvariantCodeMotion::runOnLoop(
446449
PredIteratorCache PIC;
447450

448451
bool Promoted = false;
449-
450-
// Build an AST using MSSA.
451-
if (!CurAST.get())
452-
CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
453-
454-
// Loop over all of the alias sets in the tracker object.
455-
for (AliasSet &AS : *CurAST) {
456-
// We can promote this alias set if it has a store, if it is a "Must"
457-
// alias set, if the pointer is loop invariant, and if we are not
458-
// eliminating any volatile loads or stores.
459-
if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
460-
!L->isLoopInvariant(AS.begin()->getValue()))
461-
continue;
462-
463-
assert(
464-
!AS.empty() &&
465-
"Must alias set should have at least one pointer element in it!");
466-
467-
SmallSetVector<Value *, 8> PointerMustAliases;
468-
for (const auto &ASI : AS)
469-
PointerMustAliases.insert(ASI.getValue());
470-
471-
Promoted |= promoteLoopAccessesToScalars(
472-
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
473-
DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
452+
if (CurAST.get()) {
453+
// Loop over all of the alias sets in the tracker object.
454+
for (AliasSet &AS : *CurAST) {
455+
// We can promote this alias set if it has a store, if it is a "Must"
456+
// alias set, if the pointer is loop invariant, and if we are not
457+
// eliminating any volatile loads or stores.
458+
if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
459+
!L->isLoopInvariant(AS.begin()->getValue()))
460+
continue;
461+
462+
assert(
463+
!AS.empty() &&
464+
"Must alias set should have at least one pointer element in it!");
465+
466+
SmallSetVector<Value *, 8> PointerMustAliases;
467+
for (const auto &ASI : AS)
468+
PointerMustAliases.insert(ASI.getValue());
469+
470+
Promoted |= promoteLoopAccessesToScalars(
471+
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
472+
DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
473+
}
474+
} else {
475+
SmallVector<Instruction *, 16> MaybePromotable;
476+
foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
477+
MaybePromotable.push_back(I);
478+
});
479+
480+
// Promoting one set of accesses may make the pointers for another set
481+
// loop invariant, so run this in a loop (with the MaybePromotable set
482+
// decreasing in size over time).
483+
bool LocalPromoted;
484+
do {
485+
LocalPromoted = false;
486+
for (const SmallSetVector<Value *, 8> &PointerMustAliases :
487+
collectPromotionCandidates(MSSA, AA, L, MaybePromotable)) {
488+
LocalPromoted |= promoteLoopAccessesToScalars(
489+
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
490+
LI, DT, TLI, L, /*AST*/nullptr, MSSAU.get(), &SafetyInfo, ORE);
491+
}
492+
Promoted |= LocalPromoted;
493+
} while (LocalPromoted);
474494
}
475495

476496
// Once we have promoted values across the loop body we have to
@@ -2233,6 +2253,70 @@ bool llvm::promoteLoopAccessesToScalars(
22332253
return true;
22342254
}
22352255

2256+
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2257+
function_ref<void(Instruction *)> Fn) {
2258+
for (const BasicBlock *BB : L->blocks())
2259+
if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2260+
for (const auto &Access : *Accesses)
2261+
if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2262+
Fn(MUD->getMemoryInst());
2263+
}
2264+
2265+
static SmallVector<SmallSetVector<Value *, 8>, 0>
2266+
collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L,
2267+
SmallVectorImpl<Instruction *> &MaybePromotable) {
2268+
AliasSetTracker AST(*AA);
2269+
2270+
auto IsPotentiallyPromotable = [L](const Instruction *I) {
2271+
if (const auto *SI = dyn_cast<StoreInst>(I))
2272+
return L->isLoopInvariant(SI->getPointerOperand());
2273+
if (const auto *LI = dyn_cast<LoadInst>(I))
2274+
return L->isLoopInvariant(LI->getPointerOperand());
2275+
return false;
2276+
};
2277+
2278+
// Populate AST with potentially promotable accesses and remove them from
2279+
// MaybePromotable, so they will not be checked again on the next iteration.
2280+
SmallPtrSet<Value *, 16> AttemptingPromotion;
2281+
llvm::erase_if(MaybePromotable, [&](Instruction *I) {
2282+
if (IsPotentiallyPromotable(I)) {
2283+
AttemptingPromotion.insert(I);
2284+
AST.add(I);
2285+
return true;
2286+
}
2287+
return false;
2288+
});
2289+
2290+
// We're only interested in must-alias sets that contain a mod.
2291+
SmallVector<const AliasSet *, 8> Sets;
2292+
for (AliasSet &AS : AST)
2293+
if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2294+
Sets.push_back(&AS);
2295+
2296+
if (Sets.empty())
2297+
return {}; // Nothing to promote...
2298+
2299+
// Discard any sets for which there is an aliasing non-promotable access.
2300+
foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2301+
if (AttemptingPromotion.contains(I))
2302+
return;
2303+
2304+
llvm::erase_if(Sets, [&](const AliasSet *AS) {
2305+
return AS->aliasesUnknownInst(I, *AA);
2306+
});
2307+
});
2308+
2309+
SmallVector<SmallSetVector<Value *, 8>, 0> Result;
2310+
for (const AliasSet *Set : Sets) {
2311+
SmallSetVector<Value *, 8> PointerMustAliases;
2312+
for (const auto &ASI : *Set)
2313+
PointerMustAliases.insert(ASI.getValue());
2314+
Result.push_back(std::move(PointerMustAliases));
2315+
}
2316+
2317+
return Result;
2318+
}
2319+
22362320
/// Returns an owning pointer to an alias set which incorporates aliasing info
22372321
/// from L and all subloops of L.
22382322
std::unique_ptr<AliasSetTracker>
@@ -2253,15 +2337,6 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
22532337
return CurAST;
22542338
}
22552339

2256-
std::unique_ptr<AliasSetTracker>
2257-
LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2258-
Loop *L, AAResults *AA, MemorySSAUpdater *MSSAU) {
2259-
auto *MSSA = MSSAU->getMemorySSA();
2260-
auto CurAST = std::make_unique<AliasSetTracker>(*AA, MSSA, L);
2261-
CurAST->addAllInstructionsInLoopUsingMSSA();
2262-
return CurAST;
2263-
}
2264-
22652340
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
22662341
AliasSetTracker *CurAST, Loop *CurLoop,
22672342
AAResults *AA) {
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -licm < %s | FileCheck %s
3+
4+
%class.LiveThread = type { i64, %class.LiveThread* }
5+
6+
@globallive = external dso_local global i64, align 8
7+
8+
; The store should not be sunk (via scalar promotion) past the cmpxchg.
9+
10+
define void @test(%class.LiveThread* %live_thread) {
11+
; CHECK-LABEL: @test(
12+
; CHECK-NEXT: [[NEXT_UNPROCESSED_:%.*]] = getelementptr inbounds [[CLASS_LIVETHREAD:%.*]], %class.LiveThread* [[LIVE_THREAD:%.*]], i64 0, i32 1
13+
; CHECK-NEXT: br label [[LOOP:%.*]]
14+
; CHECK: loop:
15+
; CHECK-NEXT: store %class.LiveThread* undef, %class.LiveThread** [[NEXT_UNPROCESSED_]], align 8
16+
; CHECK-NEXT: [[XCHG:%.*]] = cmpxchg weak i64* @globallive, i64 undef, i64 undef release monotonic, align 8
17+
; CHECK-NEXT: [[DONE:%.*]] = extractvalue { i64, i1 } [[XCHG]], 1
18+
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
19+
; CHECK: exit:
20+
; CHECK-NEXT: ret void
21+
;
22+
%next_unprocessed_ = getelementptr inbounds %class.LiveThread, %class.LiveThread* %live_thread, i64 0, i32 1
23+
br label %loop
24+
25+
loop:
26+
store %class.LiveThread* undef, %class.LiveThread** %next_unprocessed_, align 8
27+
%xchg = cmpxchg weak i64* @globallive, i64 undef, i64 undef release monotonic, align 8
28+
%done = extractvalue { i64, i1 } %xchg, 1
29+
br i1 %done, label %exit, label %loop
30+
31+
exit:
32+
ret void
33+
}
34+

0 commit comments

Comments
 (0)