Skip to content

Commit 774893d

Browse files
krzysz00qedawkins
andauthored
[mlir][ROCDL] Plumb through AMDGPU memory access metadata (#110916)
The LLVM backend has moved from function-wide attributes for making assurances about potentially unsafe atomic operations (like "unsafe-fp-atomics") to metadata on individual atomic operations. This commit adds support for generating this metadata from MLIR. --------- Co-authored-by: Quinn Dawkins <[email protected]>
1 parent d905a3c commit 774893d

File tree

4 files changed

+57
-3
lines changed

4 files changed

+57
-3
lines changed

mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td

+1
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,7 @@ def LLVM_ConstantRangeAttr : LLVM_Attr<"ConstantRange", "constant_range"> {
10711071
Syntax:
10721072
```
10731073
`<` `i`(width($lower)) $lower `,` $upper `>`
1074+
```
10741075
}];
10751076

10761077
let builders = [

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td

+7-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,12 @@ def ROCDL_Dialect : Dialect {
5858
"::mlir::StringAttr":$flat_work_group_size,
5959
"::mlir::IntegerAttr":$max_flat_work_group_size,
6060
"::mlir::IntegerAttr":$waves_per_eu,
61-
"::mlir::BoolAttr":$unsafe_fp_atomics
61+
"::mlir::BoolAttr":$unsafe_fp_atomics,
62+
// Correspond to LLVM metadata of the same name
63+
"::mlir::UnitAttr":$last_use,
64+
"::mlir::UnitAttr":$no_remote_memory,
65+
"::mlir::UnitAttr":$no_fine_grained_memory,
66+
"::mlir::UnitAttr":$ignore_denormal_mode
6267
);
6368

6469
let useDefaultAttributePrinterParser = 1;
@@ -88,7 +93,7 @@ class ROCDL_IntrPure1Op<string mnemonic> :
8893

8994
class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
9095
list<int> overloadedOperands, list<Trait> traits, int numResults,
91-
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
96+
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
9297
list<string> immArgAttrNames = []> :
9398
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
9499
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,

mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp

+26-1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class ROCDLDialectLLVMIRTranslationInterface
7777
NamedAttribute attribute,
7878
LLVM::ModuleTranslation &moduleTranslation) const final {
7979
auto *dialect = dyn_cast<ROCDL::ROCDLDialect>(attribute.getNameDialect());
80+
llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
8081
if (dialect->getKernelAttrHelper().getName() == attribute.getName()) {
8182
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
8283
if (!func)
@@ -198,7 +199,6 @@ class ROCDLDialectLLVMIRTranslationInterface
198199
if (!value)
199200
return op->emitOpError(Twine(attribute.getName()) +
200201
" must be a dense i32 array attribute");
201-
llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
202202
SmallVector<llvm::Metadata *, 3> metadata;
203203
llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
204204
for (int32_t i : value.asArrayRef()) {
@@ -210,6 +210,31 @@ class ROCDLDialectLLVMIRTranslationInterface
210210
llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
211211
llvmFunc->setMetadata("reqd_work_group_size", node);
212212
}
213+
214+
// Atomic and nontemporal metadata
215+
if (dialect->getLastUseAttrHelper().getName() == attribute.getName()) {
216+
for (llvm::Instruction *i : instructions)
217+
i->setMetadata("amdgpu.last.use", llvm::MDNode::get(llvmContext, {}));
218+
}
219+
if (dialect->getNoRemoteMemoryAttrHelper().getName() ==
220+
attribute.getName()) {
221+
for (llvm::Instruction *i : instructions)
222+
i->setMetadata("amdgpu.no.remote.memory",
223+
llvm::MDNode::get(llvmContext, {}));
224+
}
225+
if (dialect->getNoFineGrainedMemoryAttrHelper().getName() ==
226+
attribute.getName()) {
227+
for (llvm::Instruction *i : instructions)
228+
i->setMetadata("amdgpu.no.fine.grained.memory",
229+
llvm::MDNode::get(llvmContext, {}));
230+
}
231+
if (dialect->getIgnoreDenormalModeAttrHelper().getName() ==
232+
attribute.getName()) {
233+
for (llvm::Instruction *i : instructions)
234+
i->setMetadata("amdgpu.ignore.denormal.mode",
235+
llvm::MDNode::get(llvmContext, {}));
236+
}
237+
213238
return success();
214239
}
215240
};

mlir/test/Target/LLVMIR/rocdl.mlir

+23
Original file line numberDiff line numberDiff line change
@@ -564,11 +564,34 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
564564
}
565565

566566
llvm.func @rocdl_16bit_packed_floats(%sourceA: f32, %sourceB: f32) -> vector<2xf16> {
567+
// CHECK-LABEL: @rocdl_16bit_packed_floats
567568
// CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float {{.*}}, float {{.*}})
568569
%source = rocdl.cvt.pkrtz %sourceA, %sourceB : vector<2xf16>
569570
llvm.return %source : vector<2xf16>
570571
}
571572

573+
llvm.func @rocdl_atomic_attrs(%ptr: !llvm.ptr<1>, %data: f32) {
574+
// CHECK-LABEL: @rocdl_atomic_attrs
575+
// CHECK: atomicrmw
576+
// CHECK-SAME: !amdgpu.ignore.denormal.mode
577+
// CHECK-SAME: !amdgpu.no.fine.grained.memory
578+
// CHECK-SAME: !amdgpu.no.remote.memory
579+
llvm.atomicrmw fadd %ptr, %data monotonic {
580+
rocdl.ignore_denormal_mode,
581+
rocdl.no_fine_grained_memory,
582+
rocdl.no_remote_memory} : !llvm.ptr<1>, f32
583+
llvm.return
584+
}
585+
586+
llvm.func @rocdl_last_use(%ptr: !llvm.ptr<1>) -> i32 {
587+
// CHECK-LABEL: @rocdl_last_use
588+
// CHECK: %[[ret:.+]] = load
589+
// CHECK-SAME: !amdgpu.last.use
590+
// CHECK: ret i32 %[[ret]]
591+
%ret = llvm.load %ptr {rocdl.last_use} : !llvm.ptr<1> -> i32
592+
llvm.return %ret : i32
593+
}
594+
572595
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
573596
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
574597
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"

0 commit comments

Comments
 (0)