Skip to content

Commit 889f1a1

Browse files
committed
Fix the format
1 parent 1e6a448 commit 889f1a1

File tree

1 file changed

+17
-18
lines changed

1 file changed

+17
-18
lines changed

mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ constexpr int64_t kDefaultVectorSizeBits = 64;
5050
static Value permuteVectorOffset(OpBuilder &b, Location loc,
5151
ArrayRef<Value> indices, MemRefType memrefTy,
5252
int64_t srcDim, int64_t tgtDim) {
53-
// Adjust the src index to change how often the permutation changes
54-
// if necessary.
53+
/// Adjust the src index to change how often the permutation changes
54+
/// if necessary.
5555
Value src = indices[srcDim];
5656

57-
// We only want to permute every N iterations of the target dim where N is
58-
// ceil(sharedMemoryLineSizeBytes / dimSizeBytes(tgtDim)).
57+
/// We only want to permute every N iterations of the target dim where N is
58+
/// ceil(sharedMemoryLineSizeBytes / dimSizeBytes(tgtDim)).
5959
const int64_t permuteEveryN = std::max<int64_t>(
6060
1, kSharedMemoryLineSizeBytes / ((memrefTy.getDimSize(tgtDim) *
6161
memrefTy.getElementTypeBitWidth()) /
@@ -81,8 +81,8 @@ static Value permuteVectorOffset(OpBuilder &b, Location loc,
8181
Value srcBits = b.create<arith::ConstantIndexOp>(loc, mask);
8282
srcBits = b.create<arith::AndIOp>(loc, src, srcBits);
8383

84-
// Use the src bits to permute the target bits b[N:M] containing the
85-
// vector offset.
84+
/// Use the src bits to permute the target bits b[N:M] containing the
85+
/// vector offset.
8686
if (permuteEveryN > 1) {
8787
int64_t shlBits = n - llvm::Log2_64(permuteEveryN);
8888
if (shlBits > 0) {
@@ -131,8 +131,8 @@ getShmReadAndWriteOps(Operation *parentOp, Value shmMemRef,
131131
writeOps.push_back(op);
132132
});
133133

134-
// Restrict to a supported set of ops. We also require at least 2D access,
135-
// although this could be relaxed.
134+
/// Restrict to a supported set of ops. We also require at least 2D access,
135+
/// although this could be relaxed.
136136
if (llvm::any_of(readOps, [](Operation *op) {
137137
return !isa<memref::LoadOp, vector::LoadOp, vector::TransferReadOp>(
138138
op) ||
@@ -157,15 +157,15 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp,
157157
!amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(memRefType))
158158
return failure();
159159

160-
// Abort if the given value has any sub-views; we do not do any alias
161-
// analysis.
160+
/// Abort if the given value has any sub-views; we do not do any alias
161+
/// analysis.
162162
bool hasSubView = false;
163163
parentOp->walk([&](memref::SubViewOp subView) { hasSubView = true; });
164164
if (hasSubView)
165165
return failure();
166166

167-
// Check if this is necessary given the assumption of 128b accesses:
168-
// If dim[rank-1] is small enough to fit 8 rows in a 128B line.
167+
/// Check if this is necessary given the assumption of 128b accesses:
168+
/// If dim[rank-1] is small enough to fit 8 rows in a 128B line.
169169
const int64_t rowSize = memRefType.getDimSize(memRefType.getRank() - 1);
170170
const int64_t rowsPerLine =
171171
(8 * kSharedMemoryLineSizeBytes / memRefType.getElementTypeBitWidth()) /
@@ -175,8 +175,8 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp,
175175
if (rowsPerLine >= threadGroupSize)
176176
return failure();
177177

178-
// Get sets of operations within the function that read/write to shared
179-
// memory.
178+
/// Get sets of operations within the function that read/write to shared
179+
/// memory.
180180
SmallVector<Operation *, 16> shmReadOps;
181181
SmallVector<Operation *, 16> shmWriteOps;
182182
if (failed(getShmReadAndWriteOps(parentOp, memrefValue, shmReadOps,
@@ -191,7 +191,7 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp,
191191
int64_t tgtDim = memRefType.getRank() - 1;
192192
int64_t srcDim = memRefType.getRank() - 2;
193193

194-
// Transform indices for the ops writing to shared memory.
194+
/// Transform indices for the ops writing to shared memory.
195195
while (!shmWriteOps.empty()) {
196196
Operation *shmWriteOp = shmWriteOps.pop_back_val();
197197
builder.setInsertionPoint(shmWriteOp);
@@ -203,7 +203,7 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp,
203203
amdgpu::setIndices(shmWriteOp, transformedIndices);
204204
}
205205

206-
// Transform indices for the ops reading from shared memory.
206+
/// Transform indices for the ops reading from shared memory.
207207
while (!shmReadOps.empty()) {
208208
Operation *shmReadOp = shmReadOps.pop_back_val();
209209
builder.setInsertionPoint(shmReadOp);
@@ -218,8 +218,7 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp,
218218
return success();
219219
}
220220

221-
void amdgpu::optimizeSharedMemoryReadsAndWritesOp(
222-
func::FuncOp funcOp) {
221+
void amdgpu::optimizeSharedMemoryReadsAndWritesOp(func::FuncOp funcOp) {
223222
SmallVector<memref::AllocOp> shmAllocOps;
224223
funcOp.walk([&](memref::AllocOp allocOp) {
225224
if (!amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(allocOp.getType()))

0 commit comments

Comments
 (0)