95
95
#include < algorithm>
96
96
97
97
#define DEBUG_TYPE " amdgpu-sw-lower-lds"
98
+ #define COV5_HIDDEN_DYN_LDS_SIZE_ARG 15
98
99
99
100
using namespace llvm ;
100
101
using namespace AMDGPU ;
@@ -153,6 +154,10 @@ class AMDGPUSwLowerLDS {
153
154
void lowerNonKernelLDSAccesses (Function *Func,
154
155
SetVector<GlobalVariable *> &LDSGlobals,
155
156
NonKernelLDSParameters &NKLDSParams);
157
+ void
158
+ updateMallocSizeForDynamicLDS (Function *Func, Value *CurrMallocSize,
159
+ Value *HiddenDynLDSSize,
160
+ SetVector<GlobalVariable *> &DynamicLDSGlobals);
156
161
157
162
private:
158
163
Module &M;
@@ -195,7 +200,6 @@ SetVector<Function *> AMDGPUSwLowerLDS::getOrderedIndirectLDSAccessingKernels(
195
200
Function *Func = OrderedKernels[i];
196
201
Func->setMetadata (" llvm.amdgcn.lds.kernel.id" ,
197
202
MDNode::get (Ctx, AttrMDArgs));
198
- auto &LDSParams = KernelToLDSParametersMap[Func];
199
203
}
200
204
return std::move (OrderedKernels);
201
205
}
@@ -232,6 +236,9 @@ void AMDGPUSwLowerLDS::populateSwLDSGlobal(Function *Func) {
232
236
M, IRB.getPtrTy (), false , GlobalValue::InternalLinkage,
233
237
PoisonValue::get (IRB.getPtrTy ()), " llvm.amdgcn.sw.lds." + Func->getName (),
234
238
nullptr , GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, false );
239
+ GlobalValue::SanitizerMetadata MD;
240
+ MD.NoAddress = true ;
241
+ LDSParams.SwLDS ->setSanitizerMetadata (MD);
235
242
return ;
236
243
}
237
244
@@ -265,7 +272,7 @@ void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {
265
272
// {StartOffset, AlignedSizeInBytes}
266
273
SmallString<128 > MDItemStr;
267
274
raw_svector_ostream MDItemOS (MDItemStr);
268
- MDItemOS << " llvm.amdgcn.sw.lds." << Func->getName (). str () << " .md.item" ;
275
+ MDItemOS << " llvm.amdgcn.sw.lds." << Func->getName () << " .md.item" ;
269
276
270
277
StructType *LDSItemTy =
271
278
StructType::create (Ctx, {Int32Ty, Int32Ty, Int32Ty}, MDItemOS.str ());
@@ -296,13 +303,13 @@ void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {
296
303
297
304
SmallString<128 > MDTypeStr;
298
305
raw_svector_ostream MDTypeOS (MDTypeStr);
299
- MDTypeOS << " llvm.amdgcn.sw.lds." << Func->getName (). str () << " .md.type" ;
306
+ MDTypeOS << " llvm.amdgcn.sw.lds." << Func->getName () << " .md.type" ;
300
307
301
308
StructType *MetadataStructType =
302
309
StructType::create (Ctx, Items, MDTypeOS.str ());
303
310
SmallString<128 > MDStr;
304
311
raw_svector_ostream MDOS (MDStr);
305
- MDOS << " llvm.amdgcn.sw.lds." << Func->getName (). str () << " .md" ;
312
+ MDOS << " llvm.amdgcn.sw.lds." << Func->getName () << " .md" ;
306
313
LDSParams.SwLDSMetadata = new GlobalVariable (
307
314
M, MetadataStructType, false , GlobalValue::InternalLinkage,
308
315
PoisonValue::get (MetadataStructType), MDOS.str (), nullptr ,
@@ -387,8 +394,7 @@ void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) {
387
394
Value *Load = IRB.CreateLoad (Int32Ty, GEP);
388
395
Value *BasePlusOffset =
389
396
IRB.CreateInBoundsGEP (IRB.getInt8Ty (), SwLDS, {Load});
390
- LLVM_DEBUG (dbgs () << " Sw LDS Lowering, Replacing LDS "
391
- << GV->getName ().str ());
397
+ LLVM_DEBUG (dbgs () << " Sw LDS Lowering, Replacing LDS " << GV->getName ());
392
398
replacesUsesOfGlobalInFunction (Func, GV, BasePlusOffset);
393
399
}
394
400
};
@@ -398,10 +404,57 @@ void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) {
398
404
ReplaceLDSGlobalUses (IndirectAccess.DynamicLDSGlobals );
399
405
}
400
406
407
+ void AMDGPUSwLowerLDS::updateMallocSizeForDynamicLDS (
408
+ Function *Func, Value *CurrMallocSize, Value *HiddenDynLDSSize,
409
+ SetVector<GlobalVariable *> &DynamicLDSGlobals) {
410
+ auto &LDSParams = KernelToLDSParametersMap[Func];
411
+ Type *Int32Ty = IRB.getInt32Ty ();
412
+
413
+ GlobalVariable *SwLDS = LDSParams.SwLDS ;
414
+ GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata ;
415
+ assert (SwLDS && SwLDSMetadata);
416
+ StructType *MetadataStructType =
417
+ cast<StructType>(SwLDSMetadata->getValueType ());
418
+ unsigned MaxAlignment = SwLDS->getAlignment ();
419
+ Value *MaxAlignValue = IRB.getInt32 (MaxAlignment);
420
+ Value *MaxAlignValueMinusOne = IRB.getInt32 (MaxAlignment - 1 );
421
+
422
+ for (GlobalVariable *DynGV : DynamicLDSGlobals) {
423
+ auto &Indices = LDSParams.LDSToReplacementIndicesMap [DynGV];
424
+ // Update the Offset metadata.
425
+ Constant *Index0 = ConstantInt::get (Int32Ty, 0 );
426
+ Constant *Index1 = ConstantInt::get (Int32Ty, Indices[1 ]);
427
+
428
+ Constant *Index2Offset = ConstantInt::get (Int32Ty, 0 );
429
+ auto *GEPForOffset = IRB.CreateInBoundsGEP (
430
+ MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2Offset});
431
+
432
+ IRB.CreateStore (CurrMallocSize, GEPForOffset);
433
+ // Update the size and Aligned Size metadata.
434
+ Constant *Index2Size = ConstantInt::get (Int32Ty, 1 );
435
+ auto *GEPForSize = IRB.CreateInBoundsGEP (MetadataStructType, SwLDSMetadata,
436
+ {Index0, Index1, Index2Size});
437
+
438
+ Value *CurrDynLDSSize = IRB.CreateLoad (Int32Ty, HiddenDynLDSSize);
439
+ IRB.CreateStore (CurrDynLDSSize, GEPForSize);
440
+ Constant *Index2AlignedSize = ConstantInt::get (Int32Ty, 1 );
441
+ auto *GEPForAlignedSize = IRB.CreateInBoundsGEP (
442
+ MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2AlignedSize});
443
+
444
+ Value *AlignedDynLDSSize =
445
+ IRB.CreateAdd (CurrDynLDSSize, MaxAlignValueMinusOne);
446
+ AlignedDynLDSSize = IRB.CreateUDiv (AlignedDynLDSSize, MaxAlignValue);
447
+ AlignedDynLDSSize = IRB.CreateMul (AlignedDynLDSSize, MaxAlignValue);
448
+ IRB.CreateStore (AlignedDynLDSSize, GEPForAlignedSize);
449
+
450
+ // Update the Current Malloc Size
451
+ CurrMallocSize = IRB.CreateAdd (CurrMallocSize, AlignedDynLDSSize);
452
+ }
453
+ }
454
+
401
455
void AMDGPUSwLowerLDS::lowerKernelLDSAccesses (Function *Func,
402
456
DomTreeUpdater &DTU) {
403
- LLVM_DEBUG (dbgs () << " Sw Lowering Kernel LDS for : "
404
- << Func->getName ().str ());
457
+ LLVM_DEBUG (dbgs () << " Sw Lowering Kernel LDS for : " << Func->getName ());
405
458
auto &LDSParams = KernelToLDSParametersMap[Func];
406
459
auto &Ctx = M.getContext ();
407
460
auto *PrevEntryBlock = &Func->getEntryBlock ();
@@ -423,12 +476,6 @@ void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
423
476
auto *const XYZOr = IRB.CreateOr (XYOr, WIdz);
424
477
auto *const WIdzCond = IRB.CreateICmpEQ (XYZOr, IRB.getInt32 (0 ));
425
478
426
- GlobalVariable *SwLDS = LDSParams.SwLDS ;
427
- GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata ;
428
- assert (SwLDS && SwLDSMetadata);
429
- StructType *MetadataStructType =
430
- cast<StructType>(SwLDSMetadata->getValueType ());
431
-
432
479
// All work items will branch to PrevEntryBlock except {0,0,0} index
433
480
// work item which will branch to malloc block.
434
481
IRB.CreateCondBr (WIdzCond, MallocBlock, PrevEntryBlock);
@@ -439,79 +486,56 @@ void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
439
486
// If Dynamic LDS globals are accessed by the kernel,
440
487
// Get the size of dyn lds from hidden dyn_lds_size kernel arg.
441
488
// Update the corresponding metadata global entries for this dyn lds global.
489
+ GlobalVariable *SwLDS = LDSParams.SwLDS ;
490
+ GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata ;
491
+ assert (SwLDS && SwLDSMetadata);
492
+ StructType *MetadataStructType =
493
+ cast<StructType>(SwLDSMetadata->getValueType ());
442
494
uint32_t MallocSize = 0 ;
443
495
Value *CurrMallocSize;
496
+ Type *Int32Ty = IRB.getInt32Ty ();
497
+ Type *Int64Ty = IRB.getInt64Ty ();
444
498
445
499
unsigned NumStaticLDS = LDSParams.DirectAccess .StaticLDSGlobals .size () +
446
500
LDSParams.IndirectAccess .StaticLDSGlobals .size ();
447
501
unsigned NumDynLDS = LDSParams.DirectAccess .DynamicLDSGlobals .size () +
448
502
LDSParams.IndirectAccess .DynamicLDSGlobals .size ();
449
503
450
504
if (NumStaticLDS) {
451
- auto *GEPForEndStaticLDSOffset = IRB.CreateInBoundsGEP (
452
- MetadataStructType, SwLDSMetadata,
453
- {IRB.getInt32 (0 ), IRB.getInt32 (NumStaticLDS - 1 ), IRB.getInt32 (0 )});
454
-
455
- auto *GEPForEndStaticLDSSize = IRB.CreateInBoundsGEP (
456
- MetadataStructType, SwLDSMetadata,
457
- {IRB.getInt32 (0 ), IRB.getInt32 (NumStaticLDS - 1 ), IRB.getInt32 (2 )});
505
+ auto *GEPForEndStaticLDSOffset =
506
+ IRB.CreateInBoundsGEP (MetadataStructType, SwLDSMetadata,
507
+ {ConstantInt::get (Int32Ty, 0 ),
508
+ ConstantInt::get (Int32Ty, NumStaticLDS - 1 ),
509
+ ConstantInt::get (Int32Ty, 0 )});
510
+
511
+ auto *GEPForEndStaticLDSSize =
512
+ IRB.CreateInBoundsGEP (MetadataStructType, SwLDSMetadata,
513
+ {ConstantInt::get (Int32Ty, 0 ),
514
+ ConstantInt::get (Int32Ty, NumStaticLDS - 1 ),
515
+ ConstantInt::get (Int32Ty, 2 )});
458
516
459
517
Value *EndStaticLDSOffset =
460
- IRB.CreateLoad (IRB.getInt64Ty (), GEPForEndStaticLDSOffset);
461
- Value *EndStaticLDSSize =
462
- IRB.CreateLoad (IRB.getInt64Ty (), GEPForEndStaticLDSSize);
518
+ IRB.CreateLoad (Int32Ty, GEPForEndStaticLDSOffset);
519
+ Value *EndStaticLDSSize = IRB.CreateLoad (Int32Ty, GEPForEndStaticLDSSize);
463
520
CurrMallocSize = IRB.CreateAdd (EndStaticLDSOffset, EndStaticLDSSize);
464
521
} else
465
- CurrMallocSize = IRB.getInt64 (MallocSize);
522
+ CurrMallocSize = IRB.getInt32 (MallocSize);
466
523
467
524
if (NumDynLDS) {
468
- unsigned MaxAlignment = SwLDS->getAlignment ();
469
- Value *MaxAlignValue = IRB.getInt64 (MaxAlignment);
470
- Value *MaxAlignValueMinusOne = IRB.getInt64 (MaxAlignment - 1 );
471
-
525
+ // Get size from hidden dyn_lds_size argument of kernel
472
526
Value *ImplicitArg =
473
527
IRB.CreateIntrinsic (Intrinsic::amdgcn_implicitarg_ptr, {}, {});
474
528
Value *HiddenDynLDSSize = IRB.CreateInBoundsGEP (
475
- ImplicitArg->getType (), ImplicitArg, {IRB.getInt32 (15 )});
476
-
477
- auto MallocSizeCalcLambda =
478
- [&](SetVector<GlobalVariable *> &DynamicLDSGlobals) {
479
- for (GlobalVariable *DynGV : DynamicLDSGlobals) {
480
- auto &Indices = LDSParams.LDSToReplacementIndicesMap [DynGV];
481
-
482
- // Update the Offset metadata.
483
- auto *GEPForOffset = IRB.CreateInBoundsGEP (
484
- MetadataStructType, SwLDSMetadata,
485
- {IRB.getInt32 (0 ), IRB.getInt32 (Indices[1 ]), IRB.getInt32 (0 )});
486
- IRB.CreateStore (CurrMallocSize, GEPForOffset);
487
-
488
- // Get size from hidden dyn_lds_size argument of kernel
489
- // Update the size and Aligned Size metadata.
490
- auto *GEPForSize = IRB.CreateInBoundsGEP (
491
- MetadataStructType, SwLDSMetadata,
492
- {IRB.getInt32 (0 ), IRB.getInt32 (Indices[1 ]), IRB.getInt32 (1 )});
493
- Value *CurrDynLDSSize =
494
- IRB.CreateLoad (IRB.getInt64Ty (), HiddenDynLDSSize);
495
- IRB.CreateStore (CurrDynLDSSize, GEPForSize);
496
-
497
- auto *GEPForAlignedSize = IRB.CreateInBoundsGEP (
498
- MetadataStructType, SwLDSMetadata,
499
- {IRB.getInt32 (0 ), IRB.getInt32 (Indices[1 ]), IRB.getInt32 (2 )});
500
- Value *AlignedDynLDSSize =
501
- IRB.CreateAdd (CurrDynLDSSize, MaxAlignValueMinusOne);
502
- AlignedDynLDSSize =
503
- IRB.CreateUDiv (AlignedDynLDSSize, MaxAlignValue);
504
- AlignedDynLDSSize = IRB.CreateMul (AlignedDynLDSSize, MaxAlignValue);
505
- IRB.CreateStore (AlignedDynLDSSize, GEPForAlignedSize);
506
-
507
- // Update the Current Malloc Size
508
- CurrMallocSize = IRB.CreateAdd (CurrMallocSize, AlignedDynLDSSize);
509
- }
510
- };
511
- MallocSizeCalcLambda (LDSParams.DirectAccess .DynamicLDSGlobals );
512
- MallocSizeCalcLambda (LDSParams.IndirectAccess .DynamicLDSGlobals );
529
+ ImplicitArg->getType (), ImplicitArg,
530
+ {ConstantInt::get (Int64Ty, COV5_HIDDEN_DYN_LDS_SIZE_ARG)});
531
+ updateMallocSizeForDynamicLDS (Func, CurrMallocSize, HiddenDynLDSSize,
532
+ LDSParams.DirectAccess .DynamicLDSGlobals );
533
+ updateMallocSizeForDynamicLDS (Func, CurrMallocSize, HiddenDynLDSSize,
534
+ LDSParams.IndirectAccess .DynamicLDSGlobals );
513
535
}
514
536
537
+ CurrMallocSize = IRB.CreateZExt (CurrMallocSize, Int64Ty);
538
+
515
539
// Create a call to malloc function which does device global memory allocation
516
540
// with size equals to all LDS global accesses size in this kernel.
517
541
FunctionCallee AMDGPUMallocFunc = M.getOrInsertFunction (
@@ -679,7 +703,7 @@ void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses(
679
703
// Replace LDS access in non-kernel with replacement queried from
680
704
// Base table and offset from offset table.
681
705
LLVM_DEBUG (dbgs () << " Sw LDS lowering, lower non-kernel access for : "
682
- << Func->getName (). str () );
706
+ << Func->getName ());
683
707
auto *EntryBlock = &Func->getEntryBlock ();
684
708
IRB.SetInsertPoint (EntryBlock, EntryBlock->begin ());
685
709
Function *Decl =
@@ -752,6 +776,8 @@ bool AMDGPUSwLowerLDS::run() {
752
776
for (auto &K : LDSAccesses) {
753
777
Function *F = K.first ;
754
778
assert (isKernelLDS (F));
779
+ if (!F->hasFnAttribute (Attribute::SanitizeAddress))
780
+ continue ;
755
781
756
782
if (!KernelToLDSParametersMap.contains (F)) {
757
783
KernelLDSParameters KernelLDSParams;
@@ -816,6 +842,8 @@ bool AMDGPUSwLowerLDS::run() {
816
842
buildNonKernelLDSOffsetTable (NKLDSParams);
817
843
for (auto &K : NonKernelToLDSAccessMap) {
818
844
Function *Func = K.first ;
845
+ if (!Func->hasFnAttribute (Attribute::SanitizeAddress))
846
+ continue ;
819
847
DenseSet<GlobalVariable *> &LDSGlobals = K.second ;
820
848
SetVector<GlobalVariable *> OrderedLDSGlobals = sortByName (
821
849
std::vector<GlobalVariable *>(LDSGlobals.begin (), LDSGlobals.end ()));
0 commit comments