Skip to content

Commit 08b9cae

Browse files
committed
Revert "[AMDGPU] Move LDS utilities from amdgpu-lower-module-lds pass to AMDGPUMemoryUtils (llvm#88002)"
This reverts commit 2c5f470.
1 parent a1d43c1 commit 08b9cae

File tree

3 files changed

+187
-245
lines changed

3 files changed

+187
-245
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 185 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@
212212
#define DEBUG_TYPE "amdgpu-lower-module-lds"
213213

214214
using namespace llvm;
215-
using namespace AMDGPU;
216215

217216
namespace {
218217

@@ -235,6 +234,17 @@ cl::opt<LoweringKind> LoweringKindLoc(
235234
clEnumValN(LoweringKind::hybrid, "hybrid",
236235
"Lower via mixture of above strategies")));
237236

237+
bool isKernelLDS(const Function *F) {
238+
// Some weirdness here. AMDGPU::isKernelCC does not call into
239+
// AMDGPU::isKernel with the calling conv, it instead calls into
240+
// isModuleEntryFunction which returns true for more calling conventions
241+
// than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
242+
// There's also a test that checks that the LDS lowering does not hit on
243+
// a graphics shader, denoted amdgpu_ps, so stay with the limited case.
244+
// Putting LDS in the name of the function to draw attention to this.
245+
return AMDGPU::isKernel(F->getCallingConv());
246+
}
247+
238248
template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
239249
llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {
240250
return L->getName() < R->getName();
@@ -295,9 +305,183 @@ class AMDGPULowerModuleLDS {
295305
Decl, {}, {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)});
296306
}
297307

308+
static bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
309+
// Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
310+
// global may have uses from multiple different functions as a result.
311+
// This pass specialises LDS variables with respect to the kernel that
312+
// allocates them.
313+
314+
// This is semantically equivalent to (the unimplemented as slow):
315+
// for (auto &F : M.functions())
316+
// for (auto &BB : F)
317+
// for (auto &I : BB)
318+
// for (Use &Op : I.operands())
319+
// if (constantExprUsesLDS(Op))
320+
// replaceConstantExprInFunction(I, Op);
321+
322+
SmallVector<Constant *> LDSGlobals;
323+
for (auto &GV : M.globals())
324+
if (AMDGPU::isLDSVariableToLower(GV))
325+
LDSGlobals.push_back(&GV);
326+
327+
return convertUsersOfConstantsToInstructions(LDSGlobals);
328+
}
329+
298330
public:
299331
AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
300332

333+
using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
334+
335+
using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
336+
337+
static void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
338+
FunctionVariableMap &kernels,
339+
FunctionVariableMap &functions) {
340+
341+
// Get uses from the current function, excluding uses by called functions
342+
// Two output variables to avoid walking the globals list twice
343+
for (auto &GV : M.globals()) {
344+
if (!AMDGPU::isLDSVariableToLower(GV)) {
345+
continue;
346+
}
347+
348+
for (User *V : GV.users()) {
349+
if (auto *I = dyn_cast<Instruction>(V)) {
350+
Function *F = I->getFunction();
351+
if (isKernelLDS(F)) {
352+
kernels[F].insert(&GV);
353+
} else {
354+
functions[F].insert(&GV);
355+
}
356+
}
357+
}
358+
}
359+
}
360+
361+
struct LDSUsesInfoTy {
362+
FunctionVariableMap direct_access;
363+
FunctionVariableMap indirect_access;
364+
};
365+
366+
static LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
367+
368+
FunctionVariableMap direct_map_kernel;
369+
FunctionVariableMap direct_map_function;
370+
getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
371+
372+
// Collect variables that are used by functions whose address has escaped
373+
DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
374+
for (Function &F : M.functions()) {
375+
if (!isKernelLDS(&F))
376+
if (F.hasAddressTaken(nullptr,
377+
/* IgnoreCallbackUses */ false,
378+
/* IgnoreAssumeLikeCalls */ false,
379+
/* IgnoreLLVMUsed */ true,
380+
/* IgnoreArcAttachedCall */ false)) {
381+
set_union(VariablesReachableThroughFunctionPointer,
382+
direct_map_function[&F]);
383+
}
384+
}
385+
386+
auto functionMakesUnknownCall = [&](const Function *F) -> bool {
387+
assert(!F->isDeclaration());
388+
for (const CallGraphNode::CallRecord &R : *CG[F]) {
389+
if (!R.second->getFunction()) {
390+
return true;
391+
}
392+
}
393+
return false;
394+
};
395+
396+
// Work out which variables are reachable through function calls
397+
FunctionVariableMap transitive_map_function = direct_map_function;
398+
399+
// If the function makes any unknown call, assume the worst case that it can
400+
// access all variables accessed by functions whose address escaped
401+
for (Function &F : M.functions()) {
402+
if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
403+
if (!isKernelLDS(&F)) {
404+
set_union(transitive_map_function[&F],
405+
VariablesReachableThroughFunctionPointer);
406+
}
407+
}
408+
}
409+
410+
// Direct implementation of collecting all variables reachable from each
411+
// function
412+
for (Function &Func : M.functions()) {
413+
if (Func.isDeclaration() || isKernelLDS(&Func))
414+
continue;
415+
416+
DenseSet<Function *> seen; // catches cycles
417+
SmallVector<Function *, 4> wip{&Func};
418+
419+
while (!wip.empty()) {
420+
Function *F = wip.pop_back_val();
421+
422+
// Can accelerate this by referring to transitive map for functions that
423+
// have already been computed, with more care than this
424+
set_union(transitive_map_function[&Func], direct_map_function[F]);
425+
426+
for (const CallGraphNode::CallRecord &R : *CG[F]) {
427+
Function *ith = R.second->getFunction();
428+
if (ith) {
429+
if (!seen.contains(ith)) {
430+
seen.insert(ith);
431+
wip.push_back(ith);
432+
}
433+
}
434+
}
435+
}
436+
}
437+
438+
// direct_map_kernel lists which variables are used by the kernel
439+
// find the variables which are used through a function call
440+
FunctionVariableMap indirect_map_kernel;
441+
442+
for (Function &Func : M.functions()) {
443+
if (Func.isDeclaration() || !isKernelLDS(&Func))
444+
continue;
445+
446+
for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
447+
Function *ith = R.second->getFunction();
448+
if (ith) {
449+
set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
450+
} else {
451+
set_union(indirect_map_kernel[&Func],
452+
VariablesReachableThroughFunctionPointer);
453+
}
454+
}
455+
}
456+
457+
// Verify that we fall into one of 2 cases:
458+
// - All variables are absolute: this is a re-run of the pass
459+
// so we don't have anything to do.
460+
// - No variables are absolute.
461+
std::optional<bool> HasAbsoluteGVs;
462+
for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
463+
for (auto &[Fn, GVs] : Map) {
464+
for (auto *GV : GVs) {
465+
bool IsAbsolute = GV->isAbsoluteSymbolRef();
466+
if (HasAbsoluteGVs.has_value()) {
467+
if (*HasAbsoluteGVs != IsAbsolute) {
468+
report_fatal_error(
469+
"Module cannot mix absolute and non-absolute LDS GVs");
470+
}
471+
} else
472+
HasAbsoluteGVs = IsAbsolute;
473+
}
474+
}
475+
}
476+
477+
// If we only had absolute GVs, we have nothing to do, return an empty
478+
// result.
479+
if (HasAbsoluteGVs && *HasAbsoluteGVs)
480+
return {FunctionVariableMap(), FunctionVariableMap()};
481+
482+
return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
483+
}
484+
301485
struct LDSVariableReplacement {
302486
GlobalVariable *SGV = nullptr;
303487
DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;

0 commit comments

Comments
 (0)