Skip to content

Commit 1ba1fc7

Browse files
committed
Rebase and use LLVM offload utilities to get AMD metadata
1 parent 8370503 commit 1ba1fc7

File tree

2 files changed

+42
-185
lines changed

2 files changed

+42
-185
lines changed

mlir/lib/Target/LLVM/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ add_mlir_dialect_library(MLIRROCDLTarget
115115
OBJECT
116116

117117
LINK_COMPONENTS
118+
FrontendOffloading
118119
MCParser
119-
Object
120120
${AMDGPU_LIBS}
121121

122122
LINK_LIBS PUBLIC

mlir/lib/Target/LLVM/ROCDL/Utils.cpp

Lines changed: 41 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -15,200 +15,57 @@
1515
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
1616

1717
#include "llvm/ADT/StringMap.h"
18-
#include "llvm/BinaryFormat/MsgPackDocument.h"
19-
#include "llvm/Object/ELFObjectFile.h"
20-
#include "llvm/Object/ObjectFile.h"
21-
#include "llvm/Support/AMDGPUMetadata.h"
18+
#include "llvm/Frontend/Offloading/Utility.h"
2219

2320
using namespace mlir;
2421
using namespace mlir::ROCDL;
2522

26-
/// Search the ELF object and return an object containing the `amdhsa.kernels`
27-
/// metadata note. Function adapted from:
28-
/// llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp Also see
29-
/// `amdhsa.kernels`:
30-
/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata
31-
template <typename ELFT>
32-
static std::unique_ptr<llvm::msgpack::Document>
33-
getAMDHSANote(llvm::object::ELFObjectFile<ELFT> &elfObj) {
34-
using namespace llvm;
35-
using namespace llvm::object;
36-
using namespace llvm::ELF;
37-
const ELFFile<ELFT> &elf = elfObj.getELFFile();
38-
Expected<typename ELFT::ShdrRange> secOrErr = elf.sections();
39-
if (!secOrErr) {
40-
consumeError(secOrErr.takeError());
41-
return nullptr;
42-
}
43-
ArrayRef<typename ELFT::Shdr> sections = *secOrErr;
44-
for (const typename ELFT::Shdr &section : sections) {
45-
if (section.sh_type != ELF::SHT_NOTE)
46-
continue;
47-
size_t align = std::max(static_cast<unsigned>(section.sh_addralign), 4u);
48-
Error err = Error::success();
49-
for (const typename ELFT::Note note : elf.notes(section, err)) {
50-
StringRef name = note.getName();
51-
if (name != "AMDGPU")
52-
continue;
53-
uint32_t type = note.getType();
54-
if (type != ELF::NT_AMDGPU_METADATA)
55-
continue;
56-
ArrayRef<uint8_t> desc = note.getDesc(align);
57-
StringRef msgPackString =
58-
StringRef(reinterpret_cast<const char *>(desc.data()), desc.size());
59-
std::unique_ptr<llvm::msgpack::Document> msgPackDoc(
60-
new llvm::msgpack::Document());
61-
if (!msgPackDoc->readFromBlob(msgPackString, /*Multi=*/false))
62-
return nullptr;
63-
if (msgPackDoc->getRoot().isScalar())
64-
return nullptr;
65-
return msgPackDoc;
66-
}
67-
}
68-
return nullptr;
69-
}
70-
71-
/// Return the `amdhsa.kernels` metadata in the ELF object or nullptr on
72-
/// failure. This is a helper function that casts a generic `ObjectFile` to the
73-
/// appropiate `ELFObjectFile`.
74-
static std::unique_ptr<llvm::msgpack::Document>
75-
getAMDHSANote(ArrayRef<char> elfData) {
76-
using namespace llvm;
77-
using namespace llvm::object;
78-
if (elfData.empty())
79-
return nullptr;
80-
MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), "buffer");
81-
Expected<std::unique_ptr<ObjectFile>> objOrErr =
82-
ObjectFile::createELFObjectFile(buffer);
83-
if (!objOrErr || !objOrErr.get()) {
84-
// Drop the error.
85-
llvm::consumeError(objOrErr.takeError());
86-
return nullptr;
87-
}
88-
ObjectFile &elf = *(objOrErr.get());
89-
if (auto *obj = dyn_cast<ELF32LEObjectFile>(&elf))
90-
return getAMDHSANote(*obj);
91-
else if (auto *obj = dyn_cast<ELF32BEObjectFile>(&elf))
92-
return getAMDHSANote(*obj);
93-
else if (auto *obj = dyn_cast<ELF64LEObjectFile>(&elf))
94-
return getAMDHSANote(*obj);
95-
else if (auto *obj = dyn_cast<ELF64BEObjectFile>(&elf))
96-
return getAMDHSANote(*obj);
97-
return nullptr;
98-
}
99-
100-
/// Utility functions for converting `llvm::msgpack::DocNode` nodes.
101-
static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node);
102-
static Attribute convertNode(Builder &builder,
103-
llvm::msgpack::MapDocNode &node) {
104-
NamedAttrList attrs;
105-
for (auto &[keyNode, valueNode] : node) {
106-
if (!keyNode.isString())
107-
continue;
108-
StringRef key = keyNode.getString();
109-
if (Attribute attr = convertNode(builder, valueNode)) {
110-
key.consume_front(".");
111-
key.consume_back(".");
112-
attrs.append(key, attr);
113-
}
114-
}
115-
if (attrs.empty())
116-
return nullptr;
117-
return builder.getDictionaryAttr(attrs);
118-
}
119-
120-
static Attribute convertNode(Builder &builder,
121-
llvm::msgpack::ArrayDocNode &node) {
122-
// Use `DenseIntAttr` if we know all the attrs are ints.
123-
if (llvm::all_of(node, [](llvm::msgpack::DocNode &n) {
124-
llvm::msgpack::Type kind = n.getKind();
125-
return kind == llvm::msgpack::Type::Int ||
126-
kind == llvm::msgpack::Type::UInt;
127-
})) {
128-
SmallVector<int64_t> values;
129-
for (llvm::msgpack::DocNode &n : node) {
130-
llvm::msgpack::Type kind = n.getKind();
131-
if (kind == llvm::msgpack::Type::Int)
132-
values.push_back(n.getInt());
133-
else if (kind == llvm::msgpack::Type::UInt)
134-
values.push_back(n.getUInt());
135-
}
136-
return builder.getDenseI64ArrayAttr(values);
137-
}
138-
// Convert the array.
139-
SmallVector<Attribute> attrs;
140-
for (llvm::msgpack::DocNode &n : node) {
141-
if (Attribute attr = convertNode(builder, n))
142-
attrs.push_back(attr);
143-
}
144-
if (attrs.empty())
145-
return nullptr;
146-
return builder.getArrayAttr(attrs);
147-
}
148-
149-
static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node) {
150-
using namespace llvm::msgpack;
151-
switch (node.getKind()) {
152-
case llvm::msgpack::Type::Int:
153-
return builder.getI64IntegerAttr(node.getInt());
154-
case llvm::msgpack::Type::UInt:
155-
return builder.getI64IntegerAttr(node.getUInt());
156-
case llvm::msgpack::Type::Boolean:
157-
return builder.getI64IntegerAttr(node.getBool());
158-
case llvm::msgpack::Type::String:
159-
return builder.getStringAttr(node.getString());
160-
case llvm::msgpack::Type::Array:
161-
return convertNode(builder, node.getArray());
162-
case llvm::msgpack::Type::Map:
163-
return convertNode(builder, node.getMap());
164-
default:
165-
return nullptr;
166-
}
167-
}
168-
169-
/// The following function should succeed for Code object V3 and above.
17023
std::optional<DenseMap<StringAttr, NamedAttrList>>
17124
mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder,
17225
ArrayRef<char> elfData) {
173-
using namespace llvm::msgpack;
174-
std::unique_ptr<llvm::msgpack::Document> metadata = getAMDHSANote(elfData);
175-
if (!metadata)
26+
uint16_t elfABIVersion;
27+
llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels;
28+
llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()),
29+
"buffer");
30+
// Get the metadata.
31+
llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
32+
buffer, kernels, elfABIVersion);
33+
// Return `nullopt` if the metadata couldn't be retrieved.
34+
if (!error) {
35+
llvm::consumeError(std::move(error));
17636
return std::nullopt;
37+
}
38+
// Helper lambda for converting values.
39+
auto getI32Array = [&builder](const uint32_t *array) {
40+
return builder.getDenseI32ArrayAttr({static_cast<int32_t>(array[0]),
41+
static_cast<int32_t>(array[1]),
42+
static_cast<int32_t>(array[2])});
43+
};
17744
DenseMap<StringAttr, NamedAttrList> kernelMD;
178-
DocNode &rootNode = (metadata)->getRoot();
179-
// Fail if `rootNode` is not a map -it should be for AMD Obj Ver 3.
180-
if (!rootNode.isMap())
181-
return std::nullopt;
182-
DocNode &kernels = rootNode.getMap()["amdhsa.kernels"];
183-
// Fail if `amdhsa.kernels` is not an array.
184-
if (!kernels.isArray())
185-
return std::nullopt;
186-
// Convert each of the kernels.
187-
for (DocNode &kernel : kernels.getArray()) {
188-
if (!kernel.isMap())
189-
continue;
190-
MapDocNode &kernelMap = kernel.getMap();
191-
DocNode &nameNode = kernelMap[".name"];
192-
if (!nameNode.isString())
193-
continue;
194-
StringRef name = nameNode.getString();
195-
NamedAttrList attrList;
196-
// Convert the kernel properties.
197-
for (auto &[keyNode, valueNode] : kernelMap) {
198-
if (!keyNode.isString())
199-
continue;
200-
StringRef key = keyNode.getString();
201-
key.consume_front(".");
202-
key.consume_back(".");
203-
if (key == "name")
204-
continue;
205-
if (Attribute attr = convertNode(builder, valueNode))
206-
attrList.append(key, attr);
207-
}
208-
if (!attrList.empty())
209-
kernelMD[builder.getStringAttr(name)] = std::move(attrList);
45+
for (const auto &[name, kernel] : kernels) {
46+
NamedAttrList attrs;
47+
// Add kernel metadata.
48+
attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount));
49+
attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount));
50+
attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount));
51+
attrs.append("sgpr_spill_count",
52+
builder.getI64IntegerAttr(kernel.SGPRSpillCount));
53+
attrs.append("vgpr_spill_count",
54+
builder.getI64IntegerAttr(kernel.VGPRSpillCount));
55+
attrs.append("wavefront_size",
56+
builder.getI64IntegerAttr(kernel.WavefrontSize));
57+
attrs.append("max_flat_workgroup_size",
58+
builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize));
59+
attrs.append("group_segment_fixed_size",
60+
builder.getI64IntegerAttr(kernel.GroupSegmentList));
61+
attrs.append("private_segment_fixed_size",
62+
builder.getI64IntegerAttr(kernel.PrivateSegmentSize));
63+
attrs.append("reqd_workgroup_size",
64+
getI32Array(kernel.RequestedWorkgroupSize));
65+
attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint));
66+
kernelMD[builder.getStringAttr(name)] = std::move(attrs);
21067
}
211-
return kernelMD;
68+
return std::move(kernelMD);
21269
}
21370

21471
gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule,

0 commit comments

Comments
 (0)