|
15 | 15 | #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
|
16 | 16 |
|
17 | 17 | #include "llvm/ADT/StringMap.h"
|
18 |
| -#include "llvm/BinaryFormat/MsgPackDocument.h" |
19 |
| -#include "llvm/Object/ELFObjectFile.h" |
20 |
| -#include "llvm/Object/ObjectFile.h" |
21 |
| -#include "llvm/Support/AMDGPUMetadata.h" |
| 18 | +#include "llvm/Frontend/Offloading/Utility.h" |
22 | 19 |
|
23 | 20 | using namespace mlir;
|
24 | 21 | using namespace mlir::ROCDL;
|
25 | 22 |
|
26 |
| -/// Search the ELF object and return an object containing the `amdhsa.kernels` |
27 |
| -/// metadata note. Function adapted from: |
28 |
| -/// llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp Also see |
29 |
| -/// `amdhsa.kernels`: |
30 |
| -/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata |
31 |
| -template <typename ELFT> |
32 |
| -static std::unique_ptr<llvm::msgpack::Document> |
33 |
| -getAMDHSANote(llvm::object::ELFObjectFile<ELFT> &elfObj) { |
34 |
| - using namespace llvm; |
35 |
| - using namespace llvm::object; |
36 |
| - using namespace llvm::ELF; |
37 |
| - const ELFFile<ELFT> &elf = elfObj.getELFFile(); |
38 |
| - Expected<typename ELFT::ShdrRange> secOrErr = elf.sections(); |
39 |
| - if (!secOrErr) { |
40 |
| - consumeError(secOrErr.takeError()); |
41 |
| - return nullptr; |
42 |
| - } |
43 |
| - ArrayRef<typename ELFT::Shdr> sections = *secOrErr; |
44 |
| - for (const typename ELFT::Shdr §ion : sections) { |
45 |
| - if (section.sh_type != ELF::SHT_NOTE) |
46 |
| - continue; |
47 |
| - size_t align = std::max(static_cast<unsigned>(section.sh_addralign), 4u); |
48 |
| - Error err = Error::success(); |
49 |
| - for (const typename ELFT::Note note : elf.notes(section, err)) { |
50 |
| - StringRef name = note.getName(); |
51 |
| - if (name != "AMDGPU") |
52 |
| - continue; |
53 |
| - uint32_t type = note.getType(); |
54 |
| - if (type != ELF::NT_AMDGPU_METADATA) |
55 |
| - continue; |
56 |
| - ArrayRef<uint8_t> desc = note.getDesc(align); |
57 |
| - StringRef msgPackString = |
58 |
| - StringRef(reinterpret_cast<const char *>(desc.data()), desc.size()); |
59 |
| - std::unique_ptr<llvm::msgpack::Document> msgPackDoc( |
60 |
| - new llvm::msgpack::Document()); |
61 |
| - if (!msgPackDoc->readFromBlob(msgPackString, /*Multi=*/false)) |
62 |
| - return nullptr; |
63 |
| - if (msgPackDoc->getRoot().isScalar()) |
64 |
| - return nullptr; |
65 |
| - return msgPackDoc; |
66 |
| - } |
67 |
| - } |
68 |
| - return nullptr; |
69 |
| -} |
70 |
| - |
71 |
| -/// Return the `amdhsa.kernels` metadata in the ELF object or nullptr on |
72 |
| -/// failure. This is a helper function that casts a generic `ObjectFile` to the |
73 |
| -/// appropiate `ELFObjectFile`. |
74 |
| -static std::unique_ptr<llvm::msgpack::Document> |
75 |
| -getAMDHSANote(ArrayRef<char> elfData) { |
76 |
| - using namespace llvm; |
77 |
| - using namespace llvm::object; |
78 |
| - if (elfData.empty()) |
79 |
| - return nullptr; |
80 |
| - MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), "buffer"); |
81 |
| - Expected<std::unique_ptr<ObjectFile>> objOrErr = |
82 |
| - ObjectFile::createELFObjectFile(buffer); |
83 |
| - if (!objOrErr || !objOrErr.get()) { |
84 |
| - // Drop the error. |
85 |
| - llvm::consumeError(objOrErr.takeError()); |
86 |
| - return nullptr; |
87 |
| - } |
88 |
| - ObjectFile &elf = *(objOrErr.get()); |
89 |
| - if (auto *obj = dyn_cast<ELF32LEObjectFile>(&elf)) |
90 |
| - return getAMDHSANote(*obj); |
91 |
| - else if (auto *obj = dyn_cast<ELF32BEObjectFile>(&elf)) |
92 |
| - return getAMDHSANote(*obj); |
93 |
| - else if (auto *obj = dyn_cast<ELF64LEObjectFile>(&elf)) |
94 |
| - return getAMDHSANote(*obj); |
95 |
| - else if (auto *obj = dyn_cast<ELF64BEObjectFile>(&elf)) |
96 |
| - return getAMDHSANote(*obj); |
97 |
| - return nullptr; |
98 |
| -} |
99 |
| - |
100 |
| -/// Utility functions for converting `llvm::msgpack::DocNode` nodes. |
101 |
| -static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node); |
102 |
| -static Attribute convertNode(Builder &builder, |
103 |
| - llvm::msgpack::MapDocNode &node) { |
104 |
| - NamedAttrList attrs; |
105 |
| - for (auto &[keyNode, valueNode] : node) { |
106 |
| - if (!keyNode.isString()) |
107 |
| - continue; |
108 |
| - StringRef key = keyNode.getString(); |
109 |
| - if (Attribute attr = convertNode(builder, valueNode)) { |
110 |
| - key.consume_front("."); |
111 |
| - key.consume_back("."); |
112 |
| - attrs.append(key, attr); |
113 |
| - } |
114 |
| - } |
115 |
| - if (attrs.empty()) |
116 |
| - return nullptr; |
117 |
| - return builder.getDictionaryAttr(attrs); |
118 |
| -} |
119 |
| - |
120 |
| -static Attribute convertNode(Builder &builder, |
121 |
| - llvm::msgpack::ArrayDocNode &node) { |
122 |
| - // Use `DenseIntAttr` if we know all the attrs are ints. |
123 |
| - if (llvm::all_of(node, [](llvm::msgpack::DocNode &n) { |
124 |
| - llvm::msgpack::Type kind = n.getKind(); |
125 |
| - return kind == llvm::msgpack::Type::Int || |
126 |
| - kind == llvm::msgpack::Type::UInt; |
127 |
| - })) { |
128 |
| - SmallVector<int64_t> values; |
129 |
| - for (llvm::msgpack::DocNode &n : node) { |
130 |
| - llvm::msgpack::Type kind = n.getKind(); |
131 |
| - if (kind == llvm::msgpack::Type::Int) |
132 |
| - values.push_back(n.getInt()); |
133 |
| - else if (kind == llvm::msgpack::Type::UInt) |
134 |
| - values.push_back(n.getUInt()); |
135 |
| - } |
136 |
| - return builder.getDenseI64ArrayAttr(values); |
137 |
| - } |
138 |
| - // Convert the array. |
139 |
| - SmallVector<Attribute> attrs; |
140 |
| - for (llvm::msgpack::DocNode &n : node) { |
141 |
| - if (Attribute attr = convertNode(builder, n)) |
142 |
| - attrs.push_back(attr); |
143 |
| - } |
144 |
| - if (attrs.empty()) |
145 |
| - return nullptr; |
146 |
| - return builder.getArrayAttr(attrs); |
147 |
| -} |
148 |
| - |
149 |
| -static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node) { |
150 |
| - using namespace llvm::msgpack; |
151 |
| - switch (node.getKind()) { |
152 |
| - case llvm::msgpack::Type::Int: |
153 |
| - return builder.getI64IntegerAttr(node.getInt()); |
154 |
| - case llvm::msgpack::Type::UInt: |
155 |
| - return builder.getI64IntegerAttr(node.getUInt()); |
156 |
| - case llvm::msgpack::Type::Boolean: |
157 |
| - return builder.getI64IntegerAttr(node.getBool()); |
158 |
| - case llvm::msgpack::Type::String: |
159 |
| - return builder.getStringAttr(node.getString()); |
160 |
| - case llvm::msgpack::Type::Array: |
161 |
| - return convertNode(builder, node.getArray()); |
162 |
| - case llvm::msgpack::Type::Map: |
163 |
| - return convertNode(builder, node.getMap()); |
164 |
| - default: |
165 |
| - return nullptr; |
166 |
| - } |
167 |
| -} |
168 |
| - |
169 |
| -/// The following function should succeed for Code object V3 and above. |
170 | 23 | std::optional<DenseMap<StringAttr, NamedAttrList>>
|
171 | 24 | mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder,
|
172 | 25 | ArrayRef<char> elfData) {
|
173 |
| - using namespace llvm::msgpack; |
174 |
| - std::unique_ptr<llvm::msgpack::Document> metadata = getAMDHSANote(elfData); |
175 |
| - if (!metadata) |
| 26 | + uint16_t elfABIVersion; |
| 27 | + llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels; |
| 28 | + llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), |
| 29 | + "buffer"); |
| 30 | + // Get the metadata. |
| 31 | + llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( |
| 32 | + buffer, kernels, elfABIVersion); |
| 33 | + // Return `nullopt` if the metadata couldn't be retrieved. |
| 34 | + if (!error) { |
| 35 | + llvm::consumeError(std::move(error)); |
176 | 36 | return std::nullopt;
|
| 37 | + } |
| 38 | + // Helper lambda for converting values. |
| 39 | + auto getI32Array = [&builder](const uint32_t *array) { |
| 40 | + return builder.getDenseI32ArrayAttr({static_cast<int32_t>(array[0]), |
| 41 | + static_cast<int32_t>(array[1]), |
| 42 | + static_cast<int32_t>(array[2])}); |
| 43 | + }; |
177 | 44 | DenseMap<StringAttr, NamedAttrList> kernelMD;
|
178 |
| - DocNode &rootNode = (metadata)->getRoot(); |
179 |
| - // Fail if `rootNode` is not a map -it should be for AMD Obj Ver 3. |
180 |
| - if (!rootNode.isMap()) |
181 |
| - return std::nullopt; |
182 |
| - DocNode &kernels = rootNode.getMap()["amdhsa.kernels"]; |
183 |
| - // Fail if `amdhsa.kernels` is not an array. |
184 |
| - if (!kernels.isArray()) |
185 |
| - return std::nullopt; |
186 |
| - // Convert each of the kernels. |
187 |
| - for (DocNode &kernel : kernels.getArray()) { |
188 |
| - if (!kernel.isMap()) |
189 |
| - continue; |
190 |
| - MapDocNode &kernelMap = kernel.getMap(); |
191 |
| - DocNode &nameNode = kernelMap[".name"]; |
192 |
| - if (!nameNode.isString()) |
193 |
| - continue; |
194 |
| - StringRef name = nameNode.getString(); |
195 |
| - NamedAttrList attrList; |
196 |
| - // Convert the kernel properties. |
197 |
| - for (auto &[keyNode, valueNode] : kernelMap) { |
198 |
| - if (!keyNode.isString()) |
199 |
| - continue; |
200 |
| - StringRef key = keyNode.getString(); |
201 |
| - key.consume_front("."); |
202 |
| - key.consume_back("."); |
203 |
| - if (key == "name") |
204 |
| - continue; |
205 |
| - if (Attribute attr = convertNode(builder, valueNode)) |
206 |
| - attrList.append(key, attr); |
207 |
| - } |
208 |
| - if (!attrList.empty()) |
209 |
| - kernelMD[builder.getStringAttr(name)] = std::move(attrList); |
| 45 | + for (const auto &[name, kernel] : kernels) { |
| 46 | + NamedAttrList attrs; |
| 47 | + // Add kernel metadata. |
| 48 | + attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount)); |
| 49 | + attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount)); |
| 50 | + attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount)); |
| 51 | + attrs.append("sgpr_spill_count", |
| 52 | + builder.getI64IntegerAttr(kernel.SGPRSpillCount)); |
| 53 | + attrs.append("vgpr_spill_count", |
| 54 | + builder.getI64IntegerAttr(kernel.VGPRSpillCount)); |
| 55 | + attrs.append("wavefront_size", |
| 56 | + builder.getI64IntegerAttr(kernel.WavefrontSize)); |
| 57 | + attrs.append("max_flat_workgroup_size", |
| 58 | + builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize)); |
| 59 | + attrs.append("group_segment_fixed_size", |
| 60 | + builder.getI64IntegerAttr(kernel.GroupSegmentList)); |
| 61 | + attrs.append("private_segment_fixed_size", |
| 62 | + builder.getI64IntegerAttr(kernel.PrivateSegmentSize)); |
| 63 | + attrs.append("reqd_workgroup_size", |
| 64 | + getI32Array(kernel.RequestedWorkgroupSize)); |
| 65 | + attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint)); |
| 66 | + kernelMD[builder.getStringAttr(name)] = std::move(attrs); |
210 | 67 | }
|
211 |
| - return kernelMD; |
| 68 | + return std::move(kernelMD); |
212 | 69 | }
|
213 | 70 |
|
214 | 71 | gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule,
|
|
0 commit comments