Skip to content

Commit 2c5f470

Browse files
authored
[AMDGPU] Move LDS utilities from amdgpu-lower-module-lds pass to AMDGPUMemoryUtils (#88002)
This moves some of the utility methods from amdgpu-lower-module-lds pass to AMDGPUMemoryUtils.
1 parent 21be818 commit 2c5f470

File tree

3 files changed

+245
-187
lines changed

3 files changed

+245
-187
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 1 addition & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@
212212
#define DEBUG_TYPE "amdgpu-lower-module-lds"
213213

214214
using namespace llvm;
215+
using namespace AMDGPU;
215216

216217
namespace {
217218

@@ -234,17 +235,6 @@ cl::opt<LoweringKind> LoweringKindLoc(
234235
clEnumValN(LoweringKind::hybrid, "hybrid",
235236
"Lower via mixture of above strategies")));
236237

237-
bool isKernelLDS(const Function *F) {
238-
// Some weirdness here. AMDGPU::isKernelCC does not call into
239-
// AMDGPU::isKernel with the calling conv, it instead calls into
240-
// isModuleEntryFunction which returns true for more calling conventions
241-
// than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
242-
// There's also a test that checks that the LDS lowering does not hit on
243-
// a graphics shader, denoted amdgpu_ps, so stay with the limited case.
244-
// Putting LDS in the name of the function to draw attention to this.
245-
return AMDGPU::isKernel(F->getCallingConv());
246-
}
247-
248238
template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
249239
llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {
250240
return L->getName() < R->getName();
@@ -305,183 +295,9 @@ class AMDGPULowerModuleLDS {
305295
Decl, {}, {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)});
306296
}
307297

308-
static bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
309-
// Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
310-
// global may have uses from multiple different functions as a result.
311-
// This pass specialises LDS variables with respect to the kernel that
312-
// allocates them.
313-
314-
// This is semantically equivalent to (the unimplemented as slow):
315-
// for (auto &F : M.functions())
316-
// for (auto &BB : F)
317-
// for (auto &I : BB)
318-
// for (Use &Op : I.operands())
319-
// if (constantExprUsesLDS(Op))
320-
// replaceConstantExprInFunction(I, Op);
321-
322-
SmallVector<Constant *> LDSGlobals;
323-
for (auto &GV : M.globals())
324-
if (AMDGPU::isLDSVariableToLower(GV))
325-
LDSGlobals.push_back(&GV);
326-
327-
return convertUsersOfConstantsToInstructions(LDSGlobals);
328-
}
329-
330298
public:
331299
AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
332300

333-
using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
334-
335-
using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
336-
337-
static void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
338-
FunctionVariableMap &kernels,
339-
FunctionVariableMap &functions) {
340-
341-
// Get uses from the current function, excluding uses by called functions
342-
// Two output variables to avoid walking the globals list twice
343-
for (auto &GV : M.globals()) {
344-
if (!AMDGPU::isLDSVariableToLower(GV)) {
345-
continue;
346-
}
347-
348-
for (User *V : GV.users()) {
349-
if (auto *I = dyn_cast<Instruction>(V)) {
350-
Function *F = I->getFunction();
351-
if (isKernelLDS(F)) {
352-
kernels[F].insert(&GV);
353-
} else {
354-
functions[F].insert(&GV);
355-
}
356-
}
357-
}
358-
}
359-
}
360-
361-
struct LDSUsesInfoTy {
362-
FunctionVariableMap direct_access;
363-
FunctionVariableMap indirect_access;
364-
};
365-
366-
static LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
367-
368-
FunctionVariableMap direct_map_kernel;
369-
FunctionVariableMap direct_map_function;
370-
getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
371-
372-
// Collect variables that are used by functions whose address has escaped
373-
DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
374-
for (Function &F : M.functions()) {
375-
if (!isKernelLDS(&F))
376-
if (F.hasAddressTaken(nullptr,
377-
/* IgnoreCallbackUses */ false,
378-
/* IgnoreAssumeLikeCalls */ false,
379-
/* IgnoreLLVMUsed */ true,
380-
/* IgnoreArcAttachedCall */ false)) {
381-
set_union(VariablesReachableThroughFunctionPointer,
382-
direct_map_function[&F]);
383-
}
384-
}
385-
386-
auto functionMakesUnknownCall = [&](const Function *F) -> bool {
387-
assert(!F->isDeclaration());
388-
for (const CallGraphNode::CallRecord &R : *CG[F]) {
389-
if (!R.second->getFunction()) {
390-
return true;
391-
}
392-
}
393-
return false;
394-
};
395-
396-
// Work out which variables are reachable through function calls
397-
FunctionVariableMap transitive_map_function = direct_map_function;
398-
399-
// If the function makes any unknown call, assume the worst case that it can
400-
// access all variables accessed by functions whose address escaped
401-
for (Function &F : M.functions()) {
402-
if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
403-
if (!isKernelLDS(&F)) {
404-
set_union(transitive_map_function[&F],
405-
VariablesReachableThroughFunctionPointer);
406-
}
407-
}
408-
}
409-
410-
// Direct implementation of collecting all variables reachable from each
411-
// function
412-
for (Function &Func : M.functions()) {
413-
if (Func.isDeclaration() || isKernelLDS(&Func))
414-
continue;
415-
416-
DenseSet<Function *> seen; // catches cycles
417-
SmallVector<Function *, 4> wip{&Func};
418-
419-
while (!wip.empty()) {
420-
Function *F = wip.pop_back_val();
421-
422-
// Can accelerate this by referring to transitive map for functions that
423-
// have already been computed, with more care than this
424-
set_union(transitive_map_function[&Func], direct_map_function[F]);
425-
426-
for (const CallGraphNode::CallRecord &R : *CG[F]) {
427-
Function *ith = R.second->getFunction();
428-
if (ith) {
429-
if (!seen.contains(ith)) {
430-
seen.insert(ith);
431-
wip.push_back(ith);
432-
}
433-
}
434-
}
435-
}
436-
}
437-
438-
// direct_map_kernel lists which variables are used by the kernel
439-
// find the variables which are used through a function call
440-
FunctionVariableMap indirect_map_kernel;
441-
442-
for (Function &Func : M.functions()) {
443-
if (Func.isDeclaration() || !isKernelLDS(&Func))
444-
continue;
445-
446-
for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
447-
Function *ith = R.second->getFunction();
448-
if (ith) {
449-
set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
450-
} else {
451-
set_union(indirect_map_kernel[&Func],
452-
VariablesReachableThroughFunctionPointer);
453-
}
454-
}
455-
}
456-
457-
// Verify that we fall into one of 2 cases:
458-
// - All variables are absolute: this is a re-run of the pass
459-
// so we don't have anything to do.
460-
// - No variables are absolute.
461-
std::optional<bool> HasAbsoluteGVs;
462-
for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
463-
for (auto &[Fn, GVs] : Map) {
464-
for (auto *GV : GVs) {
465-
bool IsAbsolute = GV->isAbsoluteSymbolRef();
466-
if (HasAbsoluteGVs.has_value()) {
467-
if (*HasAbsoluteGVs != IsAbsolute) {
468-
report_fatal_error(
469-
"Module cannot mix absolute and non-absolute LDS GVs");
470-
}
471-
} else
472-
HasAbsoluteGVs = IsAbsolute;
473-
}
474-
}
475-
}
476-
477-
// If we only had absolute GVs, we have nothing to do, return an empty
478-
// result.
479-
if (HasAbsoluteGVs && *HasAbsoluteGVs)
480-
return {FunctionVariableMap(), FunctionVariableMap()};
481-
482-
return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
483-
}
484-
485301
struct LDSVariableReplacement {
486302
GlobalVariable *SGV = nullptr;
487303
DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;

0 commit comments

Comments
 (0)