-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[MLIR][GPU-LLVM] Convert gpu.func
to llvm.func
#101664
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
08332a6
098af95
36d5bf0
ed7b600
81bf21c
361c336
bf25aec
3981cc8
af5955a
512724f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
//===- AttrToLLVMConverter.h - SPIR-V attributes conversion to LLVM - C++ -===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
#ifndef MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_ | ||
#define MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_ | ||
|
||
#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" | ||
|
||
namespace mlir { | ||
unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI, | ||
spirv::StorageClass storageClass); | ||
} // namespace mlir | ||
|
||
#endif // MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,29 +25,58 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, | |
Location loc = gpuFuncOp.getLoc(); | ||
|
||
SmallVector<LLVM::GlobalOp, 3> workgroupBuffers; | ||
workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); | ||
for (const auto [idx, attribution] : | ||
llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { | ||
auto type = dyn_cast<MemRefType>(attribution.getType()); | ||
assert(type && type.hasStaticShape() && "unexpected type in attribution"); | ||
|
||
uint64_t numElements = type.getNumElements(); | ||
|
||
auto elementType = | ||
cast<Type>(typeConverter->convertType(type.getElementType())); | ||
auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements); | ||
std::string name = | ||
std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx)); | ||
uint64_t alignment = 0; | ||
if (auto alignAttr = | ||
dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getWorkgroupAttributionAttr( | ||
idx, LLVM::LLVMDialect::getAlignAttrName()))) | ||
alignment = alignAttr.getInt(); | ||
auto globalOp = rewriter.create<LLVM::GlobalOp>( | ||
gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false, | ||
LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment, | ||
workgroupAddrSpace); | ||
workgroupBuffers.push_back(globalOp); | ||
if (encodeWorkgroupAttributionsAsArguments) { | ||
ArrayRef<BlockArgument> workgroupAttributions = | ||
gpuFuncOp.getWorkgroupAttributions(); | ||
std::size_t numAttributions = workgroupAttributions.size(); | ||
victor-eds marked this conversation as resolved.
Show resolved
Hide resolved
victor-eds marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Insert all arguments at the end. | ||
unsigned index = gpuFuncOp.getNumArguments(); | ||
SmallVector<unsigned> argIndices(numAttributions, index); | ||
|
||
// New arguments will simply be `llvm.ptr` with the correct address space | ||
Type workgroupPtrType = | ||
rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace); | ||
SmallVector<Type> argTypes(numAttributions, workgroupPtrType); | ||
|
||
// No argument attributes will be added | ||
DictionaryAttr emptyDict = rewriter.getDictionaryAttr({}); | ||
SmallVector<DictionaryAttr> argAttrs(numAttributions, emptyDict); | ||
|
||
// Location match function location | ||
SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc()); | ||
|
||
// Perform signature modification | ||
rewriter.modifyOpInPlace( | ||
gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() { | ||
static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments( | ||
argIndices, argTypes, argAttrs, argLocs); | ||
}); | ||
} else { | ||
workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); | ||
for (const auto [idx, attribution] : | ||
llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { | ||
auto type = dyn_cast<MemRefType>(attribution.getType()); | ||
assert(type && type.hasStaticShape() && "unexpected type in attribution"); | ||
|
||
uint64_t numElements = type.getNumElements(); | ||
|
||
auto elementType = | ||
cast<Type>(typeConverter->convertType(type.getElementType())); | ||
auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements); | ||
std::string name = | ||
std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx)); | ||
uint64_t alignment = 0; | ||
if (auto alignAttr = dyn_cast_or_null<IntegerAttr>( | ||
gpuFuncOp.getWorkgroupAttributionAttr( | ||
idx, LLVM::LLVMDialect::getAlignAttrName()))) | ||
alignment = alignAttr.getInt(); | ||
auto globalOp = rewriter.create<LLVM::GlobalOp>( | ||
gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false, | ||
LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment, | ||
workgroupAddrSpace); | ||
workgroupBuffers.push_back(globalOp); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Original code |
||
} | ||
|
||
// Remap proper input types. | ||
|
@@ -101,16 +130,20 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, | |
// attribute. The former is necessary for further translation while the | ||
// latter is expected by gpu.launch_func. | ||
if (gpuFuncOp.isKernel()) { | ||
attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr()); | ||
if (kernelAttributeName) | ||
attributes.emplace_back(*kernelAttributeName, rewriter.getUnitAttr()); | ||
// Set the dialect-specific block size attribute if there is one. | ||
if (kernelBlockSizeAttributeName.has_value() && knownBlockSize) { | ||
attributes.emplace_back(kernelBlockSizeAttributeName.value(), | ||
knownBlockSize); | ||
} | ||
} | ||
LLVM::CConv callingConvention = gpuFuncOp.isKernel() | ||
? kernelCallingConvention | ||
: nonKernelCallingConvention; | ||
auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>( | ||
gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType, | ||
LLVM::Linkage::External, /*dsoLocal=*/false, /*cconv=*/LLVM::CConv::C, | ||
LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention, | ||
/*comdat=*/nullptr, attributes); | ||
|
||
{ | ||
|
@@ -125,24 +158,49 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, | |
rewriter.setInsertionPointToStart(&gpuFuncOp.front()); | ||
unsigned numProperArguments = gpuFuncOp.getNumArguments(); | ||
|
||
for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) { | ||
auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(), | ||
global.getAddrSpace()); | ||
Value address = rewriter.create<LLVM::AddressOfOp>( | ||
loc, ptrType, global.getSymNameAttr()); | ||
Value memory = | ||
rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(), address, | ||
ArrayRef<LLVM::GEPArg>{0, 0}); | ||
|
||
// Build a memref descriptor pointing to the buffer to plug with the | ||
// existing memref infrastructure. This may use more registers than | ||
// otherwise necessary given that memref sizes are fixed, but we can try | ||
// and canonicalize that away later. | ||
Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx]; | ||
auto type = cast<MemRefType>(attribution.getType()); | ||
auto descr = MemRefDescriptor::fromStaticShape( | ||
rewriter, loc, *getTypeConverter(), type, memory); | ||
signatureConversion.remapInput(numProperArguments + idx, descr); | ||
if (encodeWorkgroupAttributionsAsArguments) { | ||
victor-eds marked this conversation as resolved.
Show resolved
Hide resolved
|
||
unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions(); | ||
assert(numProperArguments >= numAttributions && | ||
"Expecting attributions to be encoded as arguments already"); | ||
|
||
// Arguments encoding workgroup attributions will be in positions | ||
// [numProperArguments, numProperArguments+numAttributions) | ||
ArrayRef<BlockArgument> attributionArguments = | ||
gpuFuncOp.getArguments().slice(numProperArguments - numAttributions, | ||
numAttributions); | ||
for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal( | ||
gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) { | ||
auto [attribution, arg] = vals; | ||
auto type = cast<MemRefType>(attribution.getType()); | ||
|
||
// Arguments are of llvm.ptr type and attributions are of memref type: | ||
// we need to wrap them in memref descriptors. | ||
Value descr = MemRefDescriptor::fromStaticShape( | ||
rewriter, loc, *getTypeConverter(), type, arg); | ||
|
||
// And remap the arguments | ||
signatureConversion.remapInput(numProperArguments + idx, descr); | ||
} | ||
} else { | ||
for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) { | ||
victor-eds marked this conversation as resolved.
Show resolved
Hide resolved
|
||
auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(), | ||
global.getAddrSpace()); | ||
Value address = rewriter.create<LLVM::AddressOfOp>( | ||
loc, ptrType, global.getSymNameAttr()); | ||
Value memory = | ||
rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(), | ||
address, ArrayRef<LLVM::GEPArg>{0, 0}); | ||
|
||
// Build a memref descriptor pointing to the buffer to plug with the | ||
// existing memref infrastructure. This may use more registers than | ||
// otherwise necessary given that memref sizes are fixed, but we can try | ||
// and canonicalize that away later. | ||
Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx]; | ||
auto type = cast<MemRefType>(attribution.getType()); | ||
auto descr = MemRefDescriptor::fromStaticShape( | ||
rewriter, loc, *getTypeConverter(), type, memory); | ||
signatureConversion.remapInput(numProperArguments + idx, descr); | ||
} | ||
Comment on lines
+208
to
+226
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Original code |
||
} | ||
|
||
// Rewrite private memory attributions to alloca'ed buffers. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,16 +35,39 @@ struct GPUDynamicSharedMemoryOpLowering | |
unsigned alignmentBit; | ||
}; | ||
|
||
struct GPUFuncOpLoweringOptions { | ||
/// The address space to use for `alloca`s in private memory. | ||
unsigned allocaAddrSpace; | ||
/// The address space to use declaring workgroup memory. | ||
unsigned workgroupAddrSpace; | ||
victor-eds marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/// The attribute name to use instead of `gpu.kernel`. | ||
std::optional<StringAttr> kernelAttributeName = std::nullopt; | ||
/// The attribute name to to set block size | ||
std::optional<StringAttr> kernelBlockSizeAttributeName = std::nullopt; | ||
|
||
/// The calling convention to use for kernel functions | ||
LLVM::CConv kernelCallingConvention = LLVM::CConv::C; | ||
/// The calling convention to use for non-kernel functions | ||
LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C; | ||
|
||
/// Whether to encode workgroup attributions as additional arguments instead | ||
/// of a global variable. | ||
bool encodeWorkgroupAttributionsAsArguments = false; | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was getting out of hand. Cleaner this way. |
||
|
||
struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { | ||
GPUFuncOpLowering( | ||
const LLVMTypeConverter &converter, unsigned allocaAddrSpace, | ||
unsigned workgroupAddrSpace, StringAttr kernelAttributeName, | ||
std::optional<StringAttr> kernelBlockSizeAttributeName = std::nullopt) | ||
GPUFuncOpLowering(const LLVMTypeConverter &converter, | ||
const GPUFuncOpLoweringOptions &options) | ||
: ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter), | ||
allocaAddrSpace(allocaAddrSpace), | ||
workgroupAddrSpace(workgroupAddrSpace), | ||
kernelAttributeName(kernelAttributeName), | ||
kernelBlockSizeAttributeName(kernelBlockSizeAttributeName) {} | ||
allocaAddrSpace(options.allocaAddrSpace), | ||
workgroupAddrSpace(options.workgroupAddrSpace), | ||
kernelAttributeName(options.kernelAttributeName), | ||
kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName), | ||
kernelCallingConvention(options.kernelCallingConvention), | ||
nonKernelCallingConvention(options.nonKernelCallingConvention), | ||
encodeWorkgroupAttributionsAsArguments( | ||
options.encodeWorkgroupAttributionsAsArguments) {} | ||
|
||
LogicalResult | ||
matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, | ||
|
@@ -57,10 +80,18 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { | |
unsigned workgroupAddrSpace; | ||
|
||
/// The attribute name to use instead of `gpu.kernel`. | ||
StringAttr kernelAttributeName; | ||
|
||
std::optional<StringAttr> kernelAttributeName; | ||
victor-eds marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// The attribute name to to set block size | ||
std::optional<StringAttr> kernelBlockSizeAttributeName; | ||
|
||
/// The calling convention to use for kernel functions | ||
LLVM::CConv kernelCallingConvention; | ||
FMarno marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// The calling convention to use for non-kernel functions | ||
LLVM::CConv nonKernelCallingConvention; | ||
|
||
/// Whether to encode workgroup attributions as additional arguments instead | ||
/// of a global variable. | ||
bool encodeWorkgroupAttributionsAsArguments; | ||
}; | ||
|
||
/// The lowering of gpu.printf to a call to HIP hostcalls | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,15 +8,18 @@ | |
|
||
#include "mlir/Conversion/GPUToLLVMSPV/GPUToLLVMSPVPass.h" | ||
|
||
#include "../GPUCommon/GPUOpsLowering.h" | ||
victor-eds marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h" | ||
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h" | ||
#include "mlir/Conversion/LLVMCommon/Pattern.h" | ||
#include "mlir/Conversion/LLVMCommon/TypeConverter.h" | ||
#include "mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h" | ||
#include "mlir/Dialect/GPU/IR/GPUDialect.h" | ||
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" | ||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h" | ||
#include "mlir/Dialect/LLVMIR/LLVMTypes.h" | ||
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" | ||
#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" | ||
#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" | ||
#include "mlir/IR/BuiltinTypes.h" | ||
#include "mlir/IR/Matchers.h" | ||
|
@@ -321,8 +324,8 @@ struct GPUToLLVMSPVConversionPass final | |
LLVMConversionTarget target(*context); | ||
|
||
target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp, | ||
gpu::GlobalIdOp, gpu::GridDimOp, gpu::ShuffleOp, | ||
gpu::ThreadIdOp>(); | ||
gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp, | ||
gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>(); | ||
|
||
populateGpuToLLVMSPVConversionPatterns(converter, patterns); | ||
|
||
|
@@ -340,11 +343,27 @@ struct GPUToLLVMSPVConversionPass final | |
namespace mlir { | ||
void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter, | ||
RewritePatternSet &patterns) { | ||
patterns.add<GPUBarrierConversion, GPUShuffleConversion, | ||
patterns.add<GPUBarrierConversion, GPUReturnOpLowering, GPUShuffleConversion, | ||
LaunchConfigOpConversion<gpu::BlockIdOp>, | ||
LaunchConfigOpConversion<gpu::GridDimOp>, | ||
LaunchConfigOpConversion<gpu::BlockDimOp>, | ||
LaunchConfigOpConversion<gpu::ThreadIdOp>, | ||
LaunchConfigOpConversion<gpu::GlobalIdOp>>(typeConverter); | ||
constexpr spirv::ClientAPI clientAPI = spirv::ClientAPI::OpenCL; | ||
MLIRContext *context = &typeConverter.getContext(); | ||
unsigned privateAddressSpace = | ||
storageClassToAddressSpace(clientAPI, spirv::StorageClass::Function); | ||
unsigned localAddressSpace = | ||
storageClassToAddressSpace(clientAPI, spirv::StorageClass::Workgroup); | ||
OperationName llvmFuncOpName(LLVM::LLVMFuncOp::getOperationName(), context); | ||
StringAttr kernelBlockSizeAttributeName = | ||
LLVM::LLVMFuncOp::getReqdWorkGroupSizeAttrName(llvmFuncOpName); | ||
Comment on lines
+358
to
+360
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've always thought this should be a static member... Is there a better way to do this? I didn't wanna add the static member function to the LLVM dialect, so I went with this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This cannot be static, as an attribute requires the context present in the operation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know, I was wondering if at least the string name should be |
||
patterns.add<GPUFuncOpLowering>( | ||
typeConverter, | ||
GPUFuncOpLoweringOptions{ | ||
privateAddressSpace, localAddressSpace, | ||
/*kernelAttributeName=*/std::nullopt, kernelBlockSizeAttributeName, | ||
LLVM::CConv::SPIR_KERNEL, LLVM::CConv::SPIR_FUNC, | ||
/*encodeWorkgroupAttributionsAsArguments=*/true}); | ||
} | ||
} // namespace mlir |
Uh oh!
There was an error while loading. Please reload this page.