Skip to content

Commit f2f4138

Browse files
committed
[AMDGPU] Update PR as per review comments:1
1 parent 8c0acc9 commit f2f4138

12 files changed

+398
-312
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

Lines changed: 96 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
#include <algorithm>
9696

9797
#define DEBUG_TYPE "amdgpu-sw-lower-lds"
98+
#define COV5_HIDDEN_DYN_LDS_SIZE_ARG 15
9899

99100
using namespace llvm;
100101
using namespace AMDGPU;
@@ -153,6 +154,10 @@ class AMDGPUSwLowerLDS {
153154
void lowerNonKernelLDSAccesses(Function *Func,
154155
SetVector<GlobalVariable *> &LDSGlobals,
155156
NonKernelLDSParameters &NKLDSParams);
157+
void
158+
updateMallocSizeForDynamicLDS(Function *Func, Value *CurrMallocSize,
159+
Value *HiddenDynLDSSize,
160+
SetVector<GlobalVariable *> &DynamicLDSGlobals);
156161

157162
private:
158163
Module &M;
@@ -195,7 +200,6 @@ SetVector<Function *> AMDGPUSwLowerLDS::getOrderedIndirectLDSAccessingKernels(
195200
Function *Func = OrderedKernels[i];
196201
Func->setMetadata("llvm.amdgcn.lds.kernel.id",
197202
MDNode::get(Ctx, AttrMDArgs));
198-
auto &LDSParams = KernelToLDSParametersMap[Func];
199203
}
200204
return std::move(OrderedKernels);
201205
}
@@ -232,6 +236,9 @@ void AMDGPUSwLowerLDS::populateSwLDSGlobal(Function *Func) {
232236
M, IRB.getPtrTy(), false, GlobalValue::InternalLinkage,
233237
PoisonValue::get(IRB.getPtrTy()), "llvm.amdgcn.sw.lds." + Func->getName(),
234238
nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, false);
239+
GlobalValue::SanitizerMetadata MD;
240+
MD.NoAddress = true;
241+
LDSParams.SwLDS->setSanitizerMetadata(MD);
235242
return;
236243
}
237244

@@ -265,7 +272,7 @@ void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {
265272
//{StartOffset, AlignedSizeInBytes}
266273
SmallString<128> MDItemStr;
267274
raw_svector_ostream MDItemOS(MDItemStr);
268-
MDItemOS << "llvm.amdgcn.sw.lds." << Func->getName().str() << ".md.item";
275+
MDItemOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md.item";
269276

270277
StructType *LDSItemTy =
271278
StructType::create(Ctx, {Int32Ty, Int32Ty, Int32Ty}, MDItemOS.str());
@@ -296,13 +303,13 @@ void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {
296303

297304
SmallString<128> MDTypeStr;
298305
raw_svector_ostream MDTypeOS(MDTypeStr);
299-
MDTypeOS << "llvm.amdgcn.sw.lds." << Func->getName().str() << ".md.type";
306+
MDTypeOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md.type";
300307

301308
StructType *MetadataStructType =
302309
StructType::create(Ctx, Items, MDTypeOS.str());
303310
SmallString<128> MDStr;
304311
raw_svector_ostream MDOS(MDStr);
305-
MDOS << "llvm.amdgcn.sw.lds." << Func->getName().str() << ".md";
312+
MDOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md";
306313
LDSParams.SwLDSMetadata = new GlobalVariable(
307314
M, MetadataStructType, false, GlobalValue::InternalLinkage,
308315
PoisonValue::get(MetadataStructType), MDOS.str(), nullptr,
@@ -387,8 +394,7 @@ void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) {
387394
Value *Load = IRB.CreateLoad(Int32Ty, GEP);
388395
Value *BasePlusOffset =
389396
IRB.CreateInBoundsGEP(IRB.getInt8Ty(), SwLDS, {Load});
390-
LLVM_DEBUG(dbgs() << "Sw LDS Lowering, Replacing LDS "
391-
<< GV->getName().str());
397+
LLVM_DEBUG(dbgs() << "Sw LDS Lowering, Replacing LDS " << GV->getName());
392398
replacesUsesOfGlobalInFunction(Func, GV, BasePlusOffset);
393399
}
394400
};
@@ -398,10 +404,57 @@ void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) {
398404
ReplaceLDSGlobalUses(IndirectAccess.DynamicLDSGlobals);
399405
}
400406

407+
void AMDGPUSwLowerLDS::updateMallocSizeForDynamicLDS(
408+
Function *Func, Value *CurrMallocSize, Value *HiddenDynLDSSize,
409+
SetVector<GlobalVariable *> &DynamicLDSGlobals) {
410+
auto &LDSParams = KernelToLDSParametersMap[Func];
411+
Type *Int32Ty = IRB.getInt32Ty();
412+
413+
GlobalVariable *SwLDS = LDSParams.SwLDS;
414+
GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata;
415+
assert(SwLDS && SwLDSMetadata);
416+
StructType *MetadataStructType =
417+
cast<StructType>(SwLDSMetadata->getValueType());
418+
unsigned MaxAlignment = SwLDS->getAlignment();
419+
Value *MaxAlignValue = IRB.getInt32(MaxAlignment);
420+
Value *MaxAlignValueMinusOne = IRB.getInt32(MaxAlignment - 1);
421+
422+
for (GlobalVariable *DynGV : DynamicLDSGlobals) {
423+
auto &Indices = LDSParams.LDSToReplacementIndicesMap[DynGV];
424+
// Update the Offset metadata.
425+
Constant *Index0 = ConstantInt::get(Int32Ty, 0);
426+
Constant *Index1 = ConstantInt::get(Int32Ty, Indices[1]);
427+
428+
Constant *Index2Offset = ConstantInt::get(Int32Ty, 0);
429+
auto *GEPForOffset = IRB.CreateInBoundsGEP(
430+
MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2Offset});
431+
432+
IRB.CreateStore(CurrMallocSize, GEPForOffset);
433+
// Update the size and Aligned Size metadata.
434+
Constant *Index2Size = ConstantInt::get(Int32Ty, 1);
435+
auto *GEPForSize = IRB.CreateInBoundsGEP(MetadataStructType, SwLDSMetadata,
436+
{Index0, Index1, Index2Size});
437+
438+
Value *CurrDynLDSSize = IRB.CreateLoad(Int32Ty, HiddenDynLDSSize);
439+
IRB.CreateStore(CurrDynLDSSize, GEPForSize);
440+
Constant *Index2AlignedSize = ConstantInt::get(Int32Ty, 1);
441+
auto *GEPForAlignedSize = IRB.CreateInBoundsGEP(
442+
MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2AlignedSize});
443+
444+
Value *AlignedDynLDSSize =
445+
IRB.CreateAdd(CurrDynLDSSize, MaxAlignValueMinusOne);
446+
AlignedDynLDSSize = IRB.CreateUDiv(AlignedDynLDSSize, MaxAlignValue);
447+
AlignedDynLDSSize = IRB.CreateMul(AlignedDynLDSSize, MaxAlignValue);
448+
IRB.CreateStore(AlignedDynLDSSize, GEPForAlignedSize);
449+
450+
// Update the Current Malloc Size
451+
CurrMallocSize = IRB.CreateAdd(CurrMallocSize, AlignedDynLDSSize);
452+
}
453+
}
454+
401455
void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
402456
DomTreeUpdater &DTU) {
403-
LLVM_DEBUG(dbgs() << "Sw Lowering Kernel LDS for : "
404-
<< Func->getName().str());
457+
LLVM_DEBUG(dbgs() << "Sw Lowering Kernel LDS for : " << Func->getName());
405458
auto &LDSParams = KernelToLDSParametersMap[Func];
406459
auto &Ctx = M.getContext();
407460
auto *PrevEntryBlock = &Func->getEntryBlock();
@@ -423,12 +476,6 @@ void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
423476
auto *const XYZOr = IRB.CreateOr(XYOr, WIdz);
424477
auto *const WIdzCond = IRB.CreateICmpEQ(XYZOr, IRB.getInt32(0));
425478

426-
GlobalVariable *SwLDS = LDSParams.SwLDS;
427-
GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata;
428-
assert(SwLDS && SwLDSMetadata);
429-
StructType *MetadataStructType =
430-
cast<StructType>(SwLDSMetadata->getValueType());
431-
432479
// All work items will branch to PrevEntryBlock except {0,0,0} index
433480
// work item which will branch to malloc block.
434481
IRB.CreateCondBr(WIdzCond, MallocBlock, PrevEntryBlock);
@@ -439,79 +486,56 @@ void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
439486
// If Dynamic LDS globals are accessed by the kernel,
440487
// Get the size of dyn lds from hidden dyn_lds_size kernel arg.
441488
// Update the corresponding metadata global entries for this dyn lds global.
489+
GlobalVariable *SwLDS = LDSParams.SwLDS;
490+
GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata;
491+
assert(SwLDS && SwLDSMetadata);
492+
StructType *MetadataStructType =
493+
cast<StructType>(SwLDSMetadata->getValueType());
442494
uint32_t MallocSize = 0;
443495
Value *CurrMallocSize;
496+
Type *Int32Ty = IRB.getInt32Ty();
497+
Type *Int64Ty = IRB.getInt64Ty();
444498

445499
unsigned NumStaticLDS = LDSParams.DirectAccess.StaticLDSGlobals.size() +
446500
LDSParams.IndirectAccess.StaticLDSGlobals.size();
447501
unsigned NumDynLDS = LDSParams.DirectAccess.DynamicLDSGlobals.size() +
448502
LDSParams.IndirectAccess.DynamicLDSGlobals.size();
449503

450504
if (NumStaticLDS) {
451-
auto *GEPForEndStaticLDSOffset = IRB.CreateInBoundsGEP(
452-
MetadataStructType, SwLDSMetadata,
453-
{IRB.getInt32(0), IRB.getInt32(NumStaticLDS - 1), IRB.getInt32(0)});
454-
455-
auto *GEPForEndStaticLDSSize = IRB.CreateInBoundsGEP(
456-
MetadataStructType, SwLDSMetadata,
457-
{IRB.getInt32(0), IRB.getInt32(NumStaticLDS - 1), IRB.getInt32(2)});
505+
auto *GEPForEndStaticLDSOffset =
506+
IRB.CreateInBoundsGEP(MetadataStructType, SwLDSMetadata,
507+
{ConstantInt::get(Int32Ty, 0),
508+
ConstantInt::get(Int32Ty, NumStaticLDS - 1),
509+
ConstantInt::get(Int32Ty, 0)});
510+
511+
auto *GEPForEndStaticLDSSize =
512+
IRB.CreateInBoundsGEP(MetadataStructType, SwLDSMetadata,
513+
{ConstantInt::get(Int32Ty, 0),
514+
ConstantInt::get(Int32Ty, NumStaticLDS - 1),
515+
ConstantInt::get(Int32Ty, 2)});
458516

459517
Value *EndStaticLDSOffset =
460-
IRB.CreateLoad(IRB.getInt64Ty(), GEPForEndStaticLDSOffset);
461-
Value *EndStaticLDSSize =
462-
IRB.CreateLoad(IRB.getInt64Ty(), GEPForEndStaticLDSSize);
518+
IRB.CreateLoad(Int32Ty, GEPForEndStaticLDSOffset);
519+
Value *EndStaticLDSSize = IRB.CreateLoad(Int32Ty, GEPForEndStaticLDSSize);
463520
CurrMallocSize = IRB.CreateAdd(EndStaticLDSOffset, EndStaticLDSSize);
464521
} else
465-
CurrMallocSize = IRB.getInt64(MallocSize);
522+
CurrMallocSize = IRB.getInt32(MallocSize);
466523

467524
if (NumDynLDS) {
468-
unsigned MaxAlignment = SwLDS->getAlignment();
469-
Value *MaxAlignValue = IRB.getInt64(MaxAlignment);
470-
Value *MaxAlignValueMinusOne = IRB.getInt64(MaxAlignment - 1);
471-
525+
// Get size from hidden dyn_lds_size argument of kernel
472526
Value *ImplicitArg =
473527
IRB.CreateIntrinsic(Intrinsic::amdgcn_implicitarg_ptr, {}, {});
474528
Value *HiddenDynLDSSize = IRB.CreateInBoundsGEP(
475-
ImplicitArg->getType(), ImplicitArg, {IRB.getInt32(15)});
476-
477-
auto MallocSizeCalcLambda =
478-
[&](SetVector<GlobalVariable *> &DynamicLDSGlobals) {
479-
for (GlobalVariable *DynGV : DynamicLDSGlobals) {
480-
auto &Indices = LDSParams.LDSToReplacementIndicesMap[DynGV];
481-
482-
// Update the Offset metadata.
483-
auto *GEPForOffset = IRB.CreateInBoundsGEP(
484-
MetadataStructType, SwLDSMetadata,
485-
{IRB.getInt32(0), IRB.getInt32(Indices[1]), IRB.getInt32(0)});
486-
IRB.CreateStore(CurrMallocSize, GEPForOffset);
487-
488-
// Get size from hidden dyn_lds_size argument of kernel
489-
// Update the size and Aligned Size metadata.
490-
auto *GEPForSize = IRB.CreateInBoundsGEP(
491-
MetadataStructType, SwLDSMetadata,
492-
{IRB.getInt32(0), IRB.getInt32(Indices[1]), IRB.getInt32(1)});
493-
Value *CurrDynLDSSize =
494-
IRB.CreateLoad(IRB.getInt64Ty(), HiddenDynLDSSize);
495-
IRB.CreateStore(CurrDynLDSSize, GEPForSize);
496-
497-
auto *GEPForAlignedSize = IRB.CreateInBoundsGEP(
498-
MetadataStructType, SwLDSMetadata,
499-
{IRB.getInt32(0), IRB.getInt32(Indices[1]), IRB.getInt32(2)});
500-
Value *AlignedDynLDSSize =
501-
IRB.CreateAdd(CurrDynLDSSize, MaxAlignValueMinusOne);
502-
AlignedDynLDSSize =
503-
IRB.CreateUDiv(AlignedDynLDSSize, MaxAlignValue);
504-
AlignedDynLDSSize = IRB.CreateMul(AlignedDynLDSSize, MaxAlignValue);
505-
IRB.CreateStore(AlignedDynLDSSize, GEPForAlignedSize);
506-
507-
// Update the Current Malloc Size
508-
CurrMallocSize = IRB.CreateAdd(CurrMallocSize, AlignedDynLDSSize);
509-
}
510-
};
511-
MallocSizeCalcLambda(LDSParams.DirectAccess.DynamicLDSGlobals);
512-
MallocSizeCalcLambda(LDSParams.IndirectAccess.DynamicLDSGlobals);
529+
ImplicitArg->getType(), ImplicitArg,
530+
{ConstantInt::get(Int64Ty, COV5_HIDDEN_DYN_LDS_SIZE_ARG)});
531+
updateMallocSizeForDynamicLDS(Func, CurrMallocSize, HiddenDynLDSSize,
532+
LDSParams.DirectAccess.DynamicLDSGlobals);
533+
updateMallocSizeForDynamicLDS(Func, CurrMallocSize, HiddenDynLDSSize,
534+
LDSParams.IndirectAccess.DynamicLDSGlobals);
513535
}
514536

537+
CurrMallocSize = IRB.CreateZExt(CurrMallocSize, Int64Ty);
538+
515539
// Create a call to malloc function which does device global memory allocation
516540
// with size equals to all LDS global accesses size in this kernel.
517541
FunctionCallee AMDGPUMallocFunc = M.getOrInsertFunction(
@@ -679,7 +703,7 @@ void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses(
679703
// Replace LDS access in non-kernel with replacement queried from
680704
// Base table and offset from offset table.
681705
LLVM_DEBUG(dbgs() << "Sw LDS lowering, lower non-kernel access for : "
682-
<< Func->getName().str());
706+
<< Func->getName());
683707
auto *EntryBlock = &Func->getEntryBlock();
684708
IRB.SetInsertPoint(EntryBlock, EntryBlock->begin());
685709
Function *Decl =
@@ -752,6 +776,8 @@ bool AMDGPUSwLowerLDS::run() {
752776
for (auto &K : LDSAccesses) {
753777
Function *F = K.first;
754778
assert(isKernelLDS(F));
779+
if (!F->hasFnAttribute(Attribute::SanitizeAddress))
780+
continue;
755781

756782
if (!KernelToLDSParametersMap.contains(F)) {
757783
KernelLDSParameters KernelLDSParams;
@@ -816,6 +842,8 @@ bool AMDGPUSwLowerLDS::run() {
816842
buildNonKernelLDSOffsetTable(NKLDSParams);
817843
for (auto &K : NonKernelToLDSAccessMap) {
818844
Function *Func = K.first;
845+
if (!Func->hasFnAttribute(Attribute::SanitizeAddress))
846+
continue;
819847
DenseSet<GlobalVariable *> &LDSGlobals = K.second;
820848
SetVector<GlobalVariable *> OrderedLDSGlobals = sortByName(
821849
std::vector<GlobalVariable *>(LDSGlobals.begin(), LDSGlobals.end()));

llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ namespace AMDGPU {
3131
using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
3232
using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
3333

34-
Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
34+
Align getAlign(const DataLayout &DL, const GlobalVariable *GV);
3535

3636
bool isDynamicLDS(const GlobalVariable &GV);
3737
bool isLDSVariableToLower(const GlobalVariable &GV);

0 commit comments

Comments
 (0)