Skip to content

Commit 661c593

Browse files
authored
[FunctionAttrs] Add the "initializes" attribute inference (#97373)
Add the "initializes" attribute inference. This change is expected to have ~0.09% compile time regression, which seems acceptable for interprocedural DSE. https://llvm-compile-time-tracker.com/compare.php?from=9f10252c4ad7cffbbcf692fa9c953698f82ac4f5&to=56345c1cee4375eb5c28b8e7abf4803d20216b3b&stat=instructions%3Au
1 parent ff9509e commit 661c593

File tree

15 files changed

+972
-77
lines changed

15 files changed

+972
-77
lines changed

llvm/lib/Transforms/IPO/FunctionAttrs.cpp

Lines changed: 326 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/Transforms/IPO/FunctionAttrs.h"
1616
#include "llvm/ADT/ArrayRef.h"
1717
#include "llvm/ADT/DenseMap.h"
18+
#include "llvm/ADT/PostOrderIterator.h"
1819
#include "llvm/ADT/SCCIterator.h"
1920
#include "llvm/ADT/STLExtras.h"
2021
#include "llvm/ADT/SetVector.h"
@@ -36,6 +37,7 @@
3637
#include "llvm/IR/Attributes.h"
3738
#include "llvm/IR/BasicBlock.h"
3839
#include "llvm/IR/Constant.h"
40+
#include "llvm/IR/ConstantRangeList.h"
3941
#include "llvm/IR/Constants.h"
4042
#include "llvm/IR/Function.h"
4143
#include "llvm/IR/InstIterator.h"
@@ -581,6 +583,200 @@ struct ArgumentUsesTracker : public CaptureTracker {
581583
const SCCNodeSet &SCCNodes;
582584
};
583585

586+
/// A struct of argument use: a Use and the offset it accesses. This struct
587+
/// is to track uses inside function via GEP. If GEP has a non-constant index,
588+
/// the Offset field is nullopt.
589+
struct ArgumentUse {
590+
Use *U;
591+
std::optional<int64_t> Offset;
592+
};
593+
594+
/// A struct of argument access info. "Unknown" accesses are the cases like
595+
/// unrecognized instructions, instructions that have more than one use of
596+
/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call
597+
/// instructions that not only write an argument but also capture it.
598+
struct ArgumentAccessInfo {
599+
enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown };
600+
AccessType ArgAccessType;
601+
ConstantRangeList AccessRanges;
602+
};
603+
604+
/// A struct to wrap the argument use info per block.
605+
struct UsesPerBlockInfo {
606+
SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts;
607+
bool HasWrites = false;
608+
bool HasUnknownAccess = false;
609+
};
610+
611+
/// A struct to summarize the argument use info in a function.
612+
struct ArgumentUsesSummary {
613+
bool HasAnyWrite = false;
614+
bool HasWriteOutsideEntryBB = false;
615+
SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock;
616+
};
617+
618+
ArgumentAccessInfo getArgmentAccessInfo(const Instruction *I,
619+
const ArgumentUse &ArgUse,
620+
const DataLayout &DL) {
621+
auto GetTypeAccessRange =
622+
[&DL](Type *Ty,
623+
std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
624+
auto TypeSize = DL.getTypeStoreSize(Ty);
625+
if (!TypeSize.isScalable() && Offset) {
626+
int64_t Size = TypeSize.getFixedValue();
627+
return ConstantRange(APInt(64, *Offset, true),
628+
APInt(64, *Offset + Size, true));
629+
}
630+
return std::nullopt;
631+
};
632+
auto GetConstantIntRange =
633+
[](Value *Length,
634+
std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
635+
auto *ConstantLength = dyn_cast<ConstantInt>(Length);
636+
if (ConstantLength && Offset)
637+
return ConstantRange(
638+
APInt(64, *Offset, true),
639+
APInt(64, *Offset + ConstantLength->getSExtValue(), true));
640+
return std::nullopt;
641+
};
642+
if (auto *SI = dyn_cast<StoreInst>(I)) {
643+
if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) {
644+
// Get the fixed type size of "SI". Since the access range of a write
645+
// will be unioned, if "SI" doesn't have a fixed type size, we just set
646+
// the access range to empty.
647+
ConstantRangeList AccessRanges;
648+
if (auto TypeAccessRange =
649+
GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset))
650+
AccessRanges.insert(*TypeAccessRange);
651+
return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)};
652+
}
653+
} else if (auto *LI = dyn_cast<LoadInst>(I)) {
654+
if (LI->isSimple()) {
655+
assert(&LI->getOperandUse(0) == ArgUse.U);
656+
// Get the fixed type size of "LI". Different from Write, if "LI"
657+
// doesn't have a fixed type size, we conservatively set as a clobber
658+
// with an empty access range.
659+
if (auto TypeAccessRange =
660+
GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset))
661+
return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}};
662+
}
663+
} else if (auto *MemSet = dyn_cast<MemSetInst>(I)) {
664+
if (!MemSet->isVolatile()) {
665+
ConstantRangeList AccessRanges;
666+
if (auto AccessRange =
667+
GetConstantIntRange(MemSet->getLength(), ArgUse.Offset))
668+
AccessRanges.insert(*AccessRange);
669+
return {ArgumentAccessInfo::AccessType::Write, AccessRanges};
670+
}
671+
} else if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
672+
if (!MTI->isVolatile()) {
673+
if (&MTI->getOperandUse(0) == ArgUse.U) {
674+
ConstantRangeList AccessRanges;
675+
if (auto AccessRange =
676+
GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
677+
AccessRanges.insert(*AccessRange);
678+
return {ArgumentAccessInfo::AccessType::Write, AccessRanges};
679+
} else if (&MTI->getOperandUse(1) == ArgUse.U) {
680+
if (auto AccessRange =
681+
GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
682+
return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}};
683+
}
684+
}
685+
} else if (auto *CB = dyn_cast<CallBase>(I)) {
686+
if (CB->isArgOperand(ArgUse.U)) {
687+
unsigned ArgNo = CB->getArgOperandNo(ArgUse.U);
688+
bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes);
689+
// Argument is a Write when parameter is writeonly/readnone
690+
// and nocapture. Otherwise, it's a WriteWithSideEffect.
691+
auto Access = CB->onlyWritesMemory(ArgNo) &&
692+
CB->paramHasAttr(ArgNo, Attribute::NoCapture)
693+
? ArgumentAccessInfo::AccessType::Write
694+
: ArgumentAccessInfo::AccessType::WriteWithSideEffect;
695+
ConstantRangeList AccessRanges;
696+
if (IsInitialize && ArgUse.Offset) {
697+
Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes);
698+
ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
699+
for (ConstantRange &CR : CBCRL)
700+
AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset,
701+
CR.getUpper() + *ArgUse.Offset));
702+
return {Access, AccessRanges};
703+
}
704+
}
705+
}
706+
// Other unrecognized instructions are considered as unknown.
707+
return {ArgumentAccessInfo::AccessType::Unknown, {}};
708+
}
709+
710+
// Collect the uses of argument "A" in "F".
711+
ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) {
712+
auto &DL = F.getParent()->getDataLayout();
713+
unsigned PointerSize =
714+
DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace());
715+
ArgumentUsesSummary Result;
716+
717+
BasicBlock &EntryBB = F.getEntryBlock();
718+
SmallVector<ArgumentUse, 4> Worklist;
719+
for (Use &U : A.uses())
720+
Worklist.push_back({&U, 0});
721+
722+
// Update "UsesPerBlock" with the block of "I" as key and "Info" as value.
723+
// Return true if the block of "I" has write accesses after updating.
724+
auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) {
725+
auto *BB = I->getParent();
726+
auto &BBInfo = Result.UsesPerBlock[BB];
727+
bool AlreadyVisitedInst = BBInfo.Insts.contains(I);
728+
auto &IInfo = BBInfo.Insts[I];
729+
730+
// Instructions that have more than one use of the argument are considered
731+
// as clobbers.
732+
if (AlreadyVisitedInst) {
733+
IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}};
734+
BBInfo.HasUnknownAccess = true;
735+
return false;
736+
}
737+
738+
IInfo = std::move(Info);
739+
BBInfo.HasUnknownAccess |=
740+
IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown;
741+
bool InfoHasWrites =
742+
(IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
743+
IInfo.ArgAccessType ==
744+
ArgumentAccessInfo::AccessType::WriteWithSideEffect) &&
745+
!IInfo.AccessRanges.empty();
746+
BBInfo.HasWrites |= InfoHasWrites;
747+
return InfoHasWrites;
748+
};
749+
750+
// No need for a visited set because we don't look through phis, so there are
751+
// no cycles.
752+
while (!Worklist.empty()) {
753+
ArgumentUse ArgUse = Worklist.pop_back_val();
754+
User *U = ArgUse.U->getUser();
755+
// Add GEP uses to worklist.
756+
// If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt.
757+
if (auto *GEP = dyn_cast<GEPOperator>(U)) {
758+
std::optional<int64_t> NewOffset = std::nullopt;
759+
if (ArgUse.Offset) {
760+
APInt Offset(PointerSize, 0);
761+
if (GEP->accumulateConstantOffset(DL, Offset))
762+
NewOffset = *ArgUse.Offset + Offset.getSExtValue();
763+
}
764+
for (Use &U : GEP->uses())
765+
Worklist.push_back({&U, NewOffset});
766+
continue;
767+
}
768+
769+
auto *I = cast<Instruction>(U);
770+
bool HasWrite = UpdateUseInfo(I, getArgmentAccessInfo(I, ArgUse, DL));
771+
772+
Result.HasAnyWrite |= HasWrite;
773+
774+
if (HasWrite && I->getParent() != &EntryBB)
775+
Result.HasWriteOutsideEntryBB = true;
776+
}
777+
return Result;
778+
}
779+
584780
} // end anonymous namespace
585781

586782
namespace llvm {
@@ -867,9 +1063,129 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
8671063
return true;
8681064
}
8691065

1066+
static bool inferInitializes(Argument &A, Function &F) {
1067+
auto ArgumentUses = collectArgumentUsesPerBlock(A, F);
1068+
// No write anywhere in the function, bail.
1069+
if (!ArgumentUses.HasAnyWrite)
1070+
return false;
1071+
1072+
auto &UsesPerBlock = ArgumentUses.UsesPerBlock;
1073+
BasicBlock &EntryBB = F.getEntryBlock();
1074+
// A map to store the argument ranges initialized by a BasicBlock (including
1075+
// its successors).
1076+
DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
1077+
// Visit the successors of "BB" block and the instructions in BB (post-order)
1078+
// to get the argument ranges initialized by "BB" (including its successors).
1079+
// The result will be cached in "Initialized".
1080+
auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
1081+
auto UPB = UsesPerBlock.find(BB);
1082+
ConstantRangeList CRL;
1083+
1084+
// Start with intersection of successors.
1085+
// If this block has any clobbering use, we're going to clear out the
1086+
// ranges at some point in this block anyway, so don't bother looking at
1087+
// successors.
1088+
if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) {
1089+
bool HasAddedSuccessor = false;
1090+
for (auto *Succ : successors(BB)) {
1091+
if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) {
1092+
if (HasAddedSuccessor) {
1093+
CRL = CRL.intersectWith(SuccI->second);
1094+
} else {
1095+
CRL = SuccI->second;
1096+
HasAddedSuccessor = true;
1097+
}
1098+
} else {
1099+
CRL = ConstantRangeList();
1100+
break;
1101+
}
1102+
}
1103+
}
1104+
1105+
if (UPB != UsesPerBlock.end()) {
1106+
// Sort uses in this block by instruction order.
1107+
SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
1108+
append_range(Insts, UPB->second.Insts);
1109+
sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
1110+
std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
1111+
return LHS.first->comesBefore(RHS.first);
1112+
});
1113+
1114+
// From the end of the block to the beginning of the block, set
1115+
// initializes ranges.
1116+
for (auto &[_, Info] : reverse(Insts)) {
1117+
if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown ||
1118+
Info.ArgAccessType ==
1119+
ArgumentAccessInfo::AccessType::WriteWithSideEffect)
1120+
CRL = ConstantRangeList();
1121+
if (!Info.AccessRanges.empty()) {
1122+
if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
1123+
Info.ArgAccessType ==
1124+
ArgumentAccessInfo::AccessType::WriteWithSideEffect) {
1125+
CRL = CRL.unionWith(Info.AccessRanges);
1126+
} else {
1127+
assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
1128+
for (const auto &ReadRange : Info.AccessRanges)
1129+
CRL.subtract(ReadRange);
1130+
}
1131+
}
1132+
}
1133+
}
1134+
return CRL;
1135+
};
1136+
1137+
ConstantRangeList EntryCRL;
1138+
// If all write instructions are in the EntryBB, or if the EntryBB has
1139+
// a clobbering use, we only need to look at EntryBB.
1140+
bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB;
1141+
if (!OnlyScanEntryBlock)
1142+
if (auto EntryUPB = UsesPerBlock.find(&EntryBB);
1143+
EntryUPB != UsesPerBlock.end())
1144+
OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess;
1145+
if (OnlyScanEntryBlock) {
1146+
EntryCRL = VisitBlock(&EntryBB);
1147+
if (EntryCRL.empty())
1148+
return false;
1149+
} else {
1150+
// Now we have to go through CFG to get the initialized argument ranges
1151+
// across blocks. With dominance and post-dominance, the initialized ranges
1152+
// by a block include both accesses inside this block and accesses in its
1153+
// (transitive) successors. So visit successors before predecessors with a
1154+
// post-order walk of the blocks and memorize the results in "Initialized".
1155+
for (const BasicBlock *BB : post_order(&F)) {
1156+
ConstantRangeList CRL = VisitBlock(BB);
1157+
if (!CRL.empty())
1158+
Initialized[BB] = CRL;
1159+
}
1160+
1161+
auto EntryCRLI = Initialized.find(&EntryBB);
1162+
if (EntryCRLI == Initialized.end())
1163+
return false;
1164+
1165+
EntryCRL = EntryCRLI->second;
1166+
}
1167+
1168+
assert(!EntryCRL.empty() &&
1169+
"should have bailed already if EntryCRL is empty");
1170+
1171+
if (A.hasAttribute(Attribute::Initializes)) {
1172+
ConstantRangeList PreviousCRL =
1173+
A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList();
1174+
if (PreviousCRL == EntryCRL)
1175+
return false;
1176+
EntryCRL = EntryCRL.unionWith(PreviousCRL);
1177+
}
1178+
1179+
A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes,
1180+
EntryCRL.rangesRef()));
1181+
1182+
return true;
1183+
}
1184+
8701185
/// Deduce nocapture attributes for the SCC.
8711186
static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
872-
SmallSet<Function *, 8> &Changed) {
1187+
SmallSet<Function *, 8> &Changed,
1188+
bool SkipInitializes) {
8731189
ArgumentGraph AG;
8741190

8751191
// Check each function in turn, determining which pointer arguments are not
@@ -937,6 +1253,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
9371253
if (addAccessAttr(&A, R))
9381254
Changed.insert(F);
9391255
}
1256+
if (!SkipInitializes && !A.onlyReadsMemory()) {
1257+
if (inferInitializes(A, *F))
1258+
Changed.insert(F);
1259+
}
9401260
}
9411261
}
9421262

@@ -1910,13 +2230,16 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
19102230

19112231
SmallSet<Function *, 8> Changed;
19122232
if (ArgAttrsOnly) {
1913-
addArgumentAttrs(Nodes.SCCNodes, Changed);
2233+
// ArgAttrsOnly means to only infer attributes that may aid optimizations
2234+
// on the *current* function. "initializes" attribute is to aid
2235+
// optimizations (like DSE) on the callers, so skip "initializes" here.
2236+
addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
19142237
return Changed;
19152238
}
19162239

19172240
addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
19182241
addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
1919-
addArgumentAttrs(Nodes.SCCNodes, Changed);
2242+
addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
19202243
inferConvergent(Nodes.SCCNodes, Changed);
19212244
addNoReturnAttrs(Nodes.SCCNodes, Changed);
19222245
addColdAttrs(Nodes.SCCNodes, Changed);

llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define void @test0_yes(ptr %p) nounwind {
1515
ret void
1616
}
1717

18-
; CHECK: define void @test0_no(ptr nocapture writeonly %p) #1 {
18+
; CHECK: define void @test0_no(ptr nocapture writeonly initializes((0, 4)) %p) #1 {
1919
define void @test0_no(ptr %p) nounwind {
2020
store i32 0, ptr %p, !tbaa !2
2121
ret void

llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; Should have call to sincos declarations, not calls to the asm pseudo-libcalls
1111
define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 {
1212
; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865(
13-
; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
13+
; CHECK-SAME: ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
1414
; CHECK-NEXT: entry:
1515
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
1616
; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]]

0 commit comments

Comments
 (0)