212
212
#define DEBUG_TYPE " amdgpu-lower-module-lds"
213
213
214
214
using namespace llvm ;
215
- using namespace AMDGPU ;
216
215
217
216
namespace {
218
217
@@ -235,6 +234,17 @@ cl::opt<LoweringKind> LoweringKindLoc(
235
234
clEnumValN(LoweringKind::hybrid, " hybrid" ,
236
235
" Lower via mixture of above strategies" )));
237
236
237
+ bool isKernelLDS (const Function *F) {
238
+ // Some weirdness here. AMDGPU::isKernelCC does not call into
239
+ // AMDGPU::isKernel with the calling conv, it instead calls into
240
+ // isModuleEntryFunction which returns true for more calling conventions
241
+ // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
242
+ // There's also a test that checks that the LDS lowering does not hit on
243
+ // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
244
+ // Putting LDS in the name of the function to draw attention to this.
245
+ return AMDGPU::isKernel (F->getCallingConv ());
246
+ }
247
+
238
248
template <typename T> std::vector<T> sortByName (std::vector<T> &&V) {
239
249
llvm::sort (V.begin (), V.end (), [](const auto *L, const auto *R) {
240
250
return L->getName () < R->getName ();
@@ -295,9 +305,183 @@ class AMDGPULowerModuleLDS {
295
305
Decl, {}, {OperandBundleDefT<Value *>(" ExplicitUse" , UseInstance)});
296
306
}
297
307
308
+ static bool eliminateConstantExprUsesOfLDSFromAllInstructions (Module &M) {
309
+ // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
310
+ // global may have uses from multiple different functions as a result.
311
+ // This pass specialises LDS variables with respect to the kernel that
312
+ // allocates them.
313
+
314
+ // This is semantically equivalent to (the unimplemented as slow):
315
+ // for (auto &F : M.functions())
316
+ // for (auto &BB : F)
317
+ // for (auto &I : BB)
318
+ // for (Use &Op : I.operands())
319
+ // if (constantExprUsesLDS(Op))
320
+ // replaceConstantExprInFunction(I, Op);
321
+
322
+ SmallVector<Constant *> LDSGlobals;
323
+ for (auto &GV : M.globals ())
324
+ if (AMDGPU::isLDSVariableToLower (GV))
325
+ LDSGlobals.push_back (&GV);
326
+
327
+ return convertUsersOfConstantsToInstructions (LDSGlobals);
328
+ }
329
+
298
330
public:
299
331
AMDGPULowerModuleLDS (const AMDGPUTargetMachine &TM_) : TM(TM_) {}
300
332
333
+ using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
334
+
335
+ using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
336
+
337
+ static void getUsesOfLDSByFunction (CallGraph const &CG, Module &M,
338
+ FunctionVariableMap &kernels,
339
+ FunctionVariableMap &functions) {
340
+
341
+ // Get uses from the current function, excluding uses by called functions
342
+ // Two output variables to avoid walking the globals list twice
343
+ for (auto &GV : M.globals ()) {
344
+ if (!AMDGPU::isLDSVariableToLower (GV)) {
345
+ continue ;
346
+ }
347
+
348
+ for (User *V : GV.users ()) {
349
+ if (auto *I = dyn_cast<Instruction>(V)) {
350
+ Function *F = I->getFunction ();
351
+ if (isKernelLDS (F)) {
352
+ kernels[F].insert (&GV);
353
+ } else {
354
+ functions[F].insert (&GV);
355
+ }
356
+ }
357
+ }
358
+ }
359
+ }
360
+
361
+ struct LDSUsesInfoTy {
362
+ FunctionVariableMap direct_access;
363
+ FunctionVariableMap indirect_access;
364
+ };
365
+
366
+ static LDSUsesInfoTy getTransitiveUsesOfLDS (CallGraph const &CG, Module &M) {
367
+
368
+ FunctionVariableMap direct_map_kernel;
369
+ FunctionVariableMap direct_map_function;
370
+ getUsesOfLDSByFunction (CG, M, direct_map_kernel, direct_map_function);
371
+
372
+ // Collect variables that are used by functions whose address has escaped
373
+ DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
374
+ for (Function &F : M.functions ()) {
375
+ if (!isKernelLDS (&F))
376
+ if (F.hasAddressTaken (nullptr ,
377
+ /* IgnoreCallbackUses */ false ,
378
+ /* IgnoreAssumeLikeCalls */ false ,
379
+ /* IgnoreLLVMUsed */ true ,
380
+ /* IgnoreArcAttachedCall */ false )) {
381
+ set_union (VariablesReachableThroughFunctionPointer,
382
+ direct_map_function[&F]);
383
+ }
384
+ }
385
+
386
+ auto functionMakesUnknownCall = [&](const Function *F) -> bool {
387
+ assert (!F->isDeclaration ());
388
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
389
+ if (!R.second ->getFunction ()) {
390
+ return true ;
391
+ }
392
+ }
393
+ return false ;
394
+ };
395
+
396
+ // Work out which variables are reachable through function calls
397
+ FunctionVariableMap transitive_map_function = direct_map_function;
398
+
399
+ // If the function makes any unknown call, assume the worst case that it can
400
+ // access all variables accessed by functions whose address escaped
401
+ for (Function &F : M.functions ()) {
402
+ if (!F.isDeclaration () && functionMakesUnknownCall (&F)) {
403
+ if (!isKernelLDS (&F)) {
404
+ set_union (transitive_map_function[&F],
405
+ VariablesReachableThroughFunctionPointer);
406
+ }
407
+ }
408
+ }
409
+
410
+ // Direct implementation of collecting all variables reachable from each
411
+ // function
412
+ for (Function &Func : M.functions ()) {
413
+ if (Func.isDeclaration () || isKernelLDS (&Func))
414
+ continue ;
415
+
416
+ DenseSet<Function *> seen; // catches cycles
417
+ SmallVector<Function *, 4 > wip{&Func};
418
+
419
+ while (!wip.empty ()) {
420
+ Function *F = wip.pop_back_val ();
421
+
422
+ // Can accelerate this by referring to transitive map for functions that
423
+ // have already been computed, with more care than this
424
+ set_union (transitive_map_function[&Func], direct_map_function[F]);
425
+
426
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
427
+ Function *ith = R.second ->getFunction ();
428
+ if (ith) {
429
+ if (!seen.contains (ith)) {
430
+ seen.insert (ith);
431
+ wip.push_back (ith);
432
+ }
433
+ }
434
+ }
435
+ }
436
+ }
437
+
438
+ // direct_map_kernel lists which variables are used by the kernel
439
+ // find the variables which are used through a function call
440
+ FunctionVariableMap indirect_map_kernel;
441
+
442
+ for (Function &Func : M.functions ()) {
443
+ if (Func.isDeclaration () || !isKernelLDS (&Func))
444
+ continue ;
445
+
446
+ for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
447
+ Function *ith = R.second ->getFunction ();
448
+ if (ith) {
449
+ set_union (indirect_map_kernel[&Func], transitive_map_function[ith]);
450
+ } else {
451
+ set_union (indirect_map_kernel[&Func],
452
+ VariablesReachableThroughFunctionPointer);
453
+ }
454
+ }
455
+ }
456
+
457
+ // Verify that we fall into one of 2 cases:
458
+ // - All variables are absolute: this is a re-run of the pass
459
+ // so we don't have anything to do.
460
+ // - No variables are absolute.
461
+ std::optional<bool > HasAbsoluteGVs;
462
+ for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
463
+ for (auto &[Fn, GVs] : Map) {
464
+ for (auto *GV : GVs) {
465
+ bool IsAbsolute = GV->isAbsoluteSymbolRef ();
466
+ if (HasAbsoluteGVs.has_value ()) {
467
+ if (*HasAbsoluteGVs != IsAbsolute) {
468
+ report_fatal_error (
469
+ " Module cannot mix absolute and non-absolute LDS GVs" );
470
+ }
471
+ } else
472
+ HasAbsoluteGVs = IsAbsolute;
473
+ }
474
+ }
475
+ }
476
+
477
+ // If we only had absolute GVs, we have nothing to do, return an empty
478
+ // result.
479
+ if (HasAbsoluteGVs && *HasAbsoluteGVs)
480
+ return {FunctionVariableMap (), FunctionVariableMap ()};
481
+
482
+ return {std::move (direct_map_kernel), std::move (indirect_map_kernel)};
483
+ }
484
+
301
485
struct LDSVariableReplacement {
302
486
GlobalVariable *SGV = nullptr ;
303
487
DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
0 commit comments