Skip to content

Commit 8eae773

Browse files
committed
Amend from conversion to dialect rewrite pattern
1 parent 2c292f8 commit 8eae773

File tree

11 files changed

+52
-89
lines changed

11 files changed

+52
-89
lines changed

mlir/include/mlir/Conversion/Passes.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@
7373
#include "mlir/Conversion/TosaToTensor/TosaToTensor.h"
7474
#include "mlir/Conversion/UBToLLVM/UBToLLVM.h"
7575
#include "mlir/Conversion/UBToSPIRV/UBToSPIRV.h"
76-
#include "mlir/Conversion/VectorToAMDGPU/VectorToAMDGPU.h"
7776
#include "mlir/Conversion/VectorToArmSME/VectorToArmSME.h"
7877
#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
7978
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"

mlir/include/mlir/Conversion/Passes.td

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,16 +1333,6 @@ def ConvertVectorToArmSMEPass : Pass<"convert-vector-to-arm-sme"> {
13331333
let dependentDialects = ["arm_sme::ArmSMEDialect", "arm_sve::ArmSVEDialect"];
13341334
}
13351335

1336-
//===----------------------------------------------------------------------===//
1337-
// VectorToAMDGPU
1338-
//===----------------------------------------------------------------------===//
1339-
1340-
def ConvertVectorToAMDGPUPass : Pass<"convert-vector-to-amdgpu"> {
1341-
let summary = "Lower the operations from the vector dialect into the AMDGPU "
1342-
"dialect";
1343-
let dependentDialects = ["vector::VectorDialect"];
1344-
}
1345-
13461336
//===----------------------------------------------------------------------===//
13471337
// ArmSMEToSCF
13481338
//===----------------------------------------------------------------------===//

mlir/include/mlir/Conversion/VectorToAMDGPU/VectorToAMDGPU.h

Lines changed: 0 additions & 24 deletions
This file was deleted.

mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ namespace amdgpu {
2222

2323
#define GEN_PASS_DECL_AMDGPUEMULATEATOMICSPASS
2424
#define GEN_PASS_DECL_AMDGPURESOLVESTRIDEDMETADATAPASS
25+
#define GEN_PASS_DECL_AMDGPUTRANSFERREADTOLOADPASS
2526
#define GEN_PASS_REGISTRATION
2627
#include "mlir/Dialect/AMDGPU/Transforms/Passes.h.inc"
2728

@@ -30,6 +31,9 @@ void populateAmdgpuEmulateAtomicsPatterns(ConversionTarget &target,
3031
Chipset chipset);
3132

3233
void populateAmdgpuResolveStridedMetadataPatterns(RewritePatternSet &patterns);
34+
35+
void populateAmdgpuTransferReadToLoadPatterns(RewritePatternSet &patterns);
36+
3337
} // namespace amdgpu
3438
} // namespace mlir
3539

mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,18 @@ def AmdgpuResolveStridedMetadataPass : Pass<"amdgpu-resolve-strided-metadata"> {
5151
];
5252
}
5353

54+
def AmdgpuTransferReadToLoadPass : Pass<"convert-transfer-read-to-load"> {
55+
let summary = "Lower the operations from the vector transfer_read to vector load";
56+
let description = [{
57+
This pass creates a transfer read op lowering. A vector trasfer read op
58+
will be lowered to a combination of vector.load, arith.select and
59+
vector.broadcast.
60+
61+
This pattern will make it possible for masked transfer_read to be lowered
62+
towards buffer load with bounds check, allowing a more optimized global
63+
load accessing pattern compared with existing implementation of
64+
llvm.intr.masked.load on vectors.
65+
}];
66+
let dependentDialects = [];
67+
}
5468
#endif // MLIR_DIALECT_AMDGPU_TRANSFORMS_PASSES_TD_

mlir/lib/Conversion/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ add_subdirectory(TosaToSCF)
6666
add_subdirectory(TosaToTensor)
6767
add_subdirectory(UBToLLVM)
6868
add_subdirectory(UBToSPIRV)
69-
add_subdirectory(VectorToAMDGPU)
7069
add_subdirectory(VectorToArmSME)
7170
add_subdirectory(VectorToGPU)
7271
add_subdirectory(VectorToLLVM)

mlir/lib/Conversion/VectorToAMDGPU/CMakeLists.txt

Lines changed: 0 additions & 18 deletions
This file was deleted.

mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
add_mlir_dialect_library(MLIRAMDGPUTransforms
22
EmulateAtomics.cpp
33
ResolveStridedMetadata.cpp
4+
TransferReadToLoad.cpp
45

56
ADDITIONAL_HEADER_DIRS
67
{$MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/AMDGPU/Transforms

mlir/lib/Conversion/VectorToAMDGPU/VectorToAMDGPU.cpp renamed to mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
//===- VectorToAMDGPU.cpp - Vector to AMDGPU dialect conversion ---------===//
1+
//===- TransferReadToLoad.cpp - Lowers masked transfer read to load -------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include "mlir/Conversion/VectorToAMDGPU/VectorToAMDGPU.h"
9+
#include "mlir/Dialect/AMDGPU/Transforms/Passes.h"
1010

1111
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
1212
#include "mlir/Dialect/Vector/IR/VectorOps.h"
@@ -17,12 +17,13 @@
1717
#include "mlir/Support/LogicalResult.h"
1818
#include "mlir/Transforms/WalkPatternRewriteDriver.h"
1919

20-
namespace mlir {
21-
#define GEN_PASS_DEF_CONVERTVECTORTOAMDGPUPASS
22-
#include "mlir/Conversion/Passes.h.inc"
23-
} // namespace mlir
20+
namespace mlir::amdgpu {
21+
#define GEN_PASS_DEF_AMDGPUTRANSFERREADTOLOADPASS
22+
#include "mlir/Dialect/AMDGPU/Transforms/Passes.h.inc"
23+
} // namespace mlir::amdgpu
2424

2525
using namespace mlir;
26+
using namespace mlir::amdgpu;
2627

2728
/// This pattern supports lowering of:
2829
/// `vector.transfer_read` to a combination of `vector.load`, `arith.select` and
@@ -55,8 +56,11 @@ static LogicalResult transferPreconditions(
5556
return rewriter.notifyMatchFailure(xferOp, "not a memref source");
5657

5758
Attribute addrSpace = memRefType.getMemorySpace();
58-
if (!addrSpace || dyn_cast<amdgpu::AddressSpaceAttr>(addrSpace).getValue() !=
59-
amdgpu::AddressSpace::FatRawBuffer)
59+
if (!addrSpace || !dyn_cast<amdgpu::AddressSpaceAttr>(addrSpace))
60+
return rewriter.notifyMatchFailure(xferOp, "no address space");
61+
62+
if (dyn_cast<amdgpu::AddressSpaceAttr>(addrSpace).getValue() !=
63+
amdgpu::AddressSpace::FatRawBuffer)
6064
return rewriter.notifyMatchFailure(xferOp, "not in buffer address space");
6165

6266
// Non-unit strides are handled by VectorToSCF.
@@ -134,16 +138,17 @@ struct TransferReadLowering final : OpRewritePattern<vector::TransferReadOp> {
134138

135139
} // namespace
136140

137-
void mlir::populateVectorToAMDGPUConversionPatterns(
141+
void mlir::amdgpu::populateAmdgpuTransferReadToLoadPatterns(
138142
RewritePatternSet &patterns) {
139143
patterns.add<TransferReadLowering>(patterns.getContext());
140144
}
141145

142-
struct ConvertVectorToAMDGPUPass final
143-
: impl::ConvertVectorToAMDGPUPassBase<ConvertVectorToAMDGPUPass> {
146+
struct AmdgpuTransferReadToLoadPass final
147+
: amdgpu::impl::AmdgpuTransferReadToLoadPassBase<
148+
AmdgpuTransferReadToLoadPass> {
144149
void runOnOperation() override {
145150
RewritePatternSet patterns(&getContext());
146-
populateVectorToAMDGPUConversionPatterns(patterns);
151+
populateAmdgpuTransferReadToLoadPatterns(patterns);
147152
walkAndApplyPatterns(getOperation(), std::move(patterns));
148153
}
149154
};
Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt %s --convert-vector-to-amdgpu --split-input-file | FileCheck %s
1+
// RUN: mlir-opt %s --convert-transfer-read-to-load --split-input-file | FileCheck %s
22

33
// CHECK-LABEL: func @transfer_to_maskedload_fatrawbuffer(
44
// CHECK-SAME: %[[ARG0:.*]]: memref<8x8xf32, #amdgpu.address_space<fat_raw_buffer>>
@@ -32,6 +32,21 @@ func.func @transfer_to_maskedload_regular(%mem : memref<8x8xf32>, %idx : index,
3232

3333
// -----
3434

35+
// CHECK-LABEL: func @transfer_to_maskedload_addrspace(
36+
// CHECK-SAME: %[[ARG0:.*]]: memref<8x8xf32, #gpu.address_space<workgroup>>
37+
// CHECK-SAME: %[[ARG1:.*]]: index
38+
// CHECK-SAME: %[[ARG2:.*]]: vector<4xi1>
39+
func.func @transfer_to_maskedload_addrspace(%mem : memref<8x8xf32, #gpu.address_space<workgroup>>, %idx : index, %mask : vector<4xi1>) -> vector<4xf32> {
40+
%cf0 = arith.constant 0.0 : f32
41+
%res = vector.transfer_read %mem[%idx, %idx], %cf0, %mask {in_bounds = [true]} : memref<8x8xf32, #gpu.address_space<workgroup>>, vector<4xf32>
42+
return %res : vector<4xf32>
43+
}
44+
// CHECK: %[[CST:.*]] = arith.constant 0.0
45+
// CHECK: %[[RES:.*]] = vector.transfer_read %arg0[%arg1, %arg1], %[[CST]], %arg2 {in_bounds = [true]} : memref<8x8xf32, #gpu.address_space<workgroup>>, vector<4xf32>
46+
// CHECK: return %[[RES]] : vector<4xf32>
47+
48+
// -----
49+
3550
// CHECK-LABEL: func @transfer_broadcasting(
3651
// CHECK-SAME: %[[ARG0:.*]]: memref<8x8xf32, #amdgpu.address_space<fat_raw_buffer>>
3752
// CHECK-SAME: %[[ARG1:.*]]: index

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4466,7 +4466,6 @@ cc_library(
44664466
":TosaToTensor",
44674467
":UBToLLVM",
44684468
":UBToSPIRV",
4469-
":VectorToAMDGPU",
44704469
":VectorToArmSME",
44714470
":VectorToGPU",
44724471
":VectorToLLVM",
@@ -12190,27 +12189,6 @@ cc_library(
1219012189
],
1219112190
)
1219212191

12193-
cc_library(
12194-
name = "VectorToAMDGPU",
12195-
srcs = glob([
12196-
"lib/Conversion/VectorToAMDGPU/*.cpp",
12197-
]),
12198-
hdrs = glob([
12199-
"include/mlir/Conversion/VectorToAMDGPU/*.h",
12200-
]),
12201-
includes = ["include"],
12202-
deps = [
12203-
":AMDGPUDialect",
12204-
":VectorDialect",
12205-
":ConversionPassIncGen",
12206-
":IR",
12207-
":MemRefDialect",
12208-
":Pass",
12209-
":TransformUtils",
12210-
"//llvm:Support",
12211-
],
12212-
)
12213-
1221412192
cc_library(
1221512193
name = "VectorToArmSME",
1221612194
srcs = glob([

0 commit comments

Comments
 (0)