212
212
#define DEBUG_TYPE " amdgpu-lower-module-lds"
213
213
214
214
using namespace llvm ;
215
+ using namespace AMDGPU ;
215
216
216
217
namespace {
217
218
@@ -234,17 +235,6 @@ cl::opt<LoweringKind> LoweringKindLoc(
234
235
clEnumValN(LoweringKind::hybrid, " hybrid" ,
235
236
" Lower via mixture of above strategies" )));
236
237
237
- bool isKernelLDS (const Function *F) {
238
- // Some weirdness here. AMDGPU::isKernelCC does not call into
239
- // AMDGPU::isKernel with the calling conv, it instead calls into
240
- // isModuleEntryFunction which returns true for more calling conventions
241
- // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
242
- // There's also a test that checks that the LDS lowering does not hit on
243
- // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
244
- // Putting LDS in the name of the function to draw attention to this.
245
- return AMDGPU::isKernel (F->getCallingConv ());
246
- }
247
-
248
238
template <typename T> std::vector<T> sortByName (std::vector<T> &&V) {
249
239
llvm::sort (V.begin (), V.end (), [](const auto *L, const auto *R) {
250
240
return L->getName () < R->getName ();
@@ -305,183 +295,9 @@ class AMDGPULowerModuleLDS {
305
295
Decl, {}, {OperandBundleDefT<Value *>(" ExplicitUse" , UseInstance)});
306
296
}
307
297
308
- static bool eliminateConstantExprUsesOfLDSFromAllInstructions (Module &M) {
309
- // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
310
- // global may have uses from multiple different functions as a result.
311
- // This pass specialises LDS variables with respect to the kernel that
312
- // allocates them.
313
-
314
- // This is semantically equivalent to (the unimplemented as slow):
315
- // for (auto &F : M.functions())
316
- // for (auto &BB : F)
317
- // for (auto &I : BB)
318
- // for (Use &Op : I.operands())
319
- // if (constantExprUsesLDS(Op))
320
- // replaceConstantExprInFunction(I, Op);
321
-
322
- SmallVector<Constant *> LDSGlobals;
323
- for (auto &GV : M.globals ())
324
- if (AMDGPU::isLDSVariableToLower (GV))
325
- LDSGlobals.push_back (&GV);
326
-
327
- return convertUsersOfConstantsToInstructions (LDSGlobals);
328
- }
329
-
330
298
public:
331
299
AMDGPULowerModuleLDS (const AMDGPUTargetMachine &TM_) : TM(TM_) {}
332
300
333
- using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
334
-
335
- using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
336
-
337
- static void getUsesOfLDSByFunction (CallGraph const &CG, Module &M,
338
- FunctionVariableMap &kernels,
339
- FunctionVariableMap &functions) {
340
-
341
- // Get uses from the current function, excluding uses by called functions
342
- // Two output variables to avoid walking the globals list twice
343
- for (auto &GV : M.globals ()) {
344
- if (!AMDGPU::isLDSVariableToLower (GV)) {
345
- continue ;
346
- }
347
-
348
- for (User *V : GV.users ()) {
349
- if (auto *I = dyn_cast<Instruction>(V)) {
350
- Function *F = I->getFunction ();
351
- if (isKernelLDS (F)) {
352
- kernels[F].insert (&GV);
353
- } else {
354
- functions[F].insert (&GV);
355
- }
356
- }
357
- }
358
- }
359
- }
360
-
361
- struct LDSUsesInfoTy {
362
- FunctionVariableMap direct_access;
363
- FunctionVariableMap indirect_access;
364
- };
365
-
366
- static LDSUsesInfoTy getTransitiveUsesOfLDS (CallGraph const &CG, Module &M) {
367
-
368
- FunctionVariableMap direct_map_kernel;
369
- FunctionVariableMap direct_map_function;
370
- getUsesOfLDSByFunction (CG, M, direct_map_kernel, direct_map_function);
371
-
372
- // Collect variables that are used by functions whose address has escaped
373
- DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
374
- for (Function &F : M.functions ()) {
375
- if (!isKernelLDS (&F))
376
- if (F.hasAddressTaken (nullptr ,
377
- /* IgnoreCallbackUses */ false ,
378
- /* IgnoreAssumeLikeCalls */ false ,
379
- /* IgnoreLLVMUsed */ true ,
380
- /* IgnoreArcAttachedCall */ false )) {
381
- set_union (VariablesReachableThroughFunctionPointer,
382
- direct_map_function[&F]);
383
- }
384
- }
385
-
386
- auto functionMakesUnknownCall = [&](const Function *F) -> bool {
387
- assert (!F->isDeclaration ());
388
- for (const CallGraphNode::CallRecord &R : *CG[F]) {
389
- if (!R.second ->getFunction ()) {
390
- return true ;
391
- }
392
- }
393
- return false ;
394
- };
395
-
396
- // Work out which variables are reachable through function calls
397
- FunctionVariableMap transitive_map_function = direct_map_function;
398
-
399
- // If the function makes any unknown call, assume the worst case that it can
400
- // access all variables accessed by functions whose address escaped
401
- for (Function &F : M.functions ()) {
402
- if (!F.isDeclaration () && functionMakesUnknownCall (&F)) {
403
- if (!isKernelLDS (&F)) {
404
- set_union (transitive_map_function[&F],
405
- VariablesReachableThroughFunctionPointer);
406
- }
407
- }
408
- }
409
-
410
- // Direct implementation of collecting all variables reachable from each
411
- // function
412
- for (Function &Func : M.functions ()) {
413
- if (Func.isDeclaration () || isKernelLDS (&Func))
414
- continue ;
415
-
416
- DenseSet<Function *> seen; // catches cycles
417
- SmallVector<Function *, 4 > wip{&Func};
418
-
419
- while (!wip.empty ()) {
420
- Function *F = wip.pop_back_val ();
421
-
422
- // Can accelerate this by referring to transitive map for functions that
423
- // have already been computed, with more care than this
424
- set_union (transitive_map_function[&Func], direct_map_function[F]);
425
-
426
- for (const CallGraphNode::CallRecord &R : *CG[F]) {
427
- Function *ith = R.second ->getFunction ();
428
- if (ith) {
429
- if (!seen.contains (ith)) {
430
- seen.insert (ith);
431
- wip.push_back (ith);
432
- }
433
- }
434
- }
435
- }
436
- }
437
-
438
- // direct_map_kernel lists which variables are used by the kernel
439
- // find the variables which are used through a function call
440
- FunctionVariableMap indirect_map_kernel;
441
-
442
- for (Function &Func : M.functions ()) {
443
- if (Func.isDeclaration () || !isKernelLDS (&Func))
444
- continue ;
445
-
446
- for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
447
- Function *ith = R.second ->getFunction ();
448
- if (ith) {
449
- set_union (indirect_map_kernel[&Func], transitive_map_function[ith]);
450
- } else {
451
- set_union (indirect_map_kernel[&Func],
452
- VariablesReachableThroughFunctionPointer);
453
- }
454
- }
455
- }
456
-
457
- // Verify that we fall into one of 2 cases:
458
- // - All variables are absolute: this is a re-run of the pass
459
- // so we don't have anything to do.
460
- // - No variables are absolute.
461
- std::optional<bool > HasAbsoluteGVs;
462
- for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
463
- for (auto &[Fn, GVs] : Map) {
464
- for (auto *GV : GVs) {
465
- bool IsAbsolute = GV->isAbsoluteSymbolRef ();
466
- if (HasAbsoluteGVs.has_value ()) {
467
- if (*HasAbsoluteGVs != IsAbsolute) {
468
- report_fatal_error (
469
- " Module cannot mix absolute and non-absolute LDS GVs" );
470
- }
471
- } else
472
- HasAbsoluteGVs = IsAbsolute;
473
- }
474
- }
475
- }
476
-
477
- // If we only had absolute GVs, we have nothing to do, return an empty
478
- // result.
479
- if (HasAbsoluteGVs && *HasAbsoluteGVs)
480
- return {FunctionVariableMap (), FunctionVariableMap ()};
481
-
482
- return {std::move (direct_map_kernel), std::move (indirect_map_kernel)};
483
- }
484
-
485
301
struct LDSVariableReplacement {
486
302
GlobalVariable *SGV = nullptr ;
487
303
DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
0 commit comments