Skip to content

Commit 5c702d3

Browse files
committed
[MLIR][OpenMP] Extend omp.private materialization support: firstprivate
Extends current support for delayed privatization during translation to LLVM IR. This adds support for one-block `firstprivate` `omp.private` ops.
1 parent 9d56be0 commit 5c702d3

File tree

2 files changed

+144
-7
lines changed

2 files changed

+144
-7
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,17 +1176,38 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
11761176
}();
11771177

11781178
if (privVar) {
1179+
Region &allocRegion = privatizerClone.getAllocRegion();
1180+
1181+
// If this is a `firstprivate` clause, prepare the `omp.private` op by:
11791182
if (privatizerClone.getDataSharingType() ==
11801183
omp::DataSharingClauseType::FirstPrivate) {
1181-
privatizerClone.emitOpError(
1182-
"TODO: delayed privatization is not "
1183-
"supported for `firstprivate` clauses yet.");
1184-
bodyGenStatus = failure();
1185-
return codeGenIP;
1184+
auto oldAllocBackBlock = std::prev(allocRegion.end());
1185+
omp::YieldOp oldAllocYieldOp =
1186+
llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
1187+
1188+
Region &copyRegion = privatizerClone.getCopyRegion();
1189+
1190+
mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
1191+
// 1. Cloning the `copy` region to the end of the `alloc` region.
1192+
copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
1193+
allocRegion.end());
1194+
1195+
auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
1196+
// 2. Merging the last `alloc` block with the first block in the `copy`
1197+
// region clone.
1198+
// 3. Re-mapping the first argument of the `copy` region to be the
1199+
// argument of the `alloc` region and the second argument of the `copy`
1200+
// region to be the yielded value of the `alloc` region (this is the
1201+
// private clone of the privatized value).
1202+
copyCloneBuilder.mergeBlocks(
1203+
&*newCopyRegionFrontBlock, &*oldAllocBackBlock,
1204+
{allocRegion.getArgument(0), oldAllocYieldOp.getOperand(0)});
1205+
1206+
// 4. The old terminator of the `alloc` region is not needed anymore, so
1207+
// delete it.
1208+
oldAllocYieldOp.erase();
11861209
}
11871210

1188-
Region &allocRegion = privatizerClone.getAllocRegion();
1189-
11901211
// Replace the privatizer block argument with mlir value being privatized.
11911212
// This way, the body of the privatizer will be changed from using the
11921213
// region/block argument to the value being privatized.
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
2+
// `omp.private` ops).
3+
4+
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
5+
6+
llvm.func @parallel_op_firstprivate(%arg0: !llvm.ptr) {
7+
omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
8+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
9+
omp.terminator
10+
}
11+
llvm.return
12+
}
13+
14+
omp.private {type = firstprivate} @x.privatizer : !llvm.ptr alloc {
15+
^bb0(%arg0: !llvm.ptr):
16+
%c1 = llvm.mlir.constant(1 : i32) : i32
17+
%0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
18+
omp.yield(%0 : !llvm.ptr)
19+
} copy {
20+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
21+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
22+
llvm.store %0, %arg1 : f32, !llvm.ptr
23+
omp.yield(%arg1 : !llvm.ptr)
24+
}
25+
26+
// CHECK-LABEL: @parallel_op_firstprivate
27+
// CHECK-SAME: (ptr %[[ORIG:.*]]) {
28+
// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
29+
// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
30+
// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
31+
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_firstprivate..omp_par, ptr %[[OMP_PAR_ARG]])
32+
// CHECK: }
33+
34+
// CHECK-LABEL: void @parallel_op_firstprivate..omp_par
35+
// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
36+
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
37+
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
38+
39+
// Check that the privatizer alloc region was inlined properly.
40+
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
41+
42+
// Check that the privatizer copy region was inlined properly.
43+
44+
// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
45+
// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
46+
// CHECK-NEXT: br
47+
48+
// Check that the privatized value is used (rather than the original one).
49+
// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
50+
// CHECK: }
51+
52+
// -----
53+
54+
llvm.func @parallel_op_firstprivate_multi_block(%arg0: !llvm.ptr) {
55+
omp.parallel private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
56+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
57+
omp.terminator
58+
}
59+
llvm.return
60+
}
61+
62+
// CHECK-LABEL: define internal void @parallel_op_firstprivate_multi_block..omp_par
63+
// CHECK: omp.par.entry:
64+
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0
65+
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
66+
// CHECK: br label %[[PRIV_BB1:.*]]
67+
68+
// CHECK: [[PRIV_BB1]]:
69+
// The 1st `alloc` block directly branches to the 2nd `alloc` block since the
70+
// only insruction is `llvm.mlir.constant` which gets translated to compile-time
71+
// constant in LLVM IR.
72+
// CHECK-NEXT: br label %[[PRIV_BB2:.*]]
73+
74+
// CHECK: [[PRIV_BB2]]:
75+
// CHECK-NEXT: %[[C1:.*]] = phi i32 [ 1, %[[PRIV_BB1]] ]
76+
// CHECK-NEXT: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[C1]], align 4
77+
// The entry block of the `copy` region is merged into the exit block of the
78+
// `alloc` region. So check for that.
79+
// CHECK-NEXT: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
80+
// CHECK-NEXT: br label %[[PRIV_BB3:.*]]
81+
82+
// Check contents of the 2nd block in the `copy` region.
83+
// CHECK: [[PRIV_BB3]]:
84+
// CHECK-NEXT: %[[ORIG_VAL2:.*]] = phi float [ %[[ORIG_VAL]], %[[PRIV_BB2]] ]
85+
// CHECK-NEXT: %[[PRIV_ALLOC2:.*]] = phi ptr [ %[[PRIV_ALLOC]], %[[PRIV_BB2]] ]
86+
// CHECK-NEXT: store float %[[ORIG_VAL2]], ptr %[[PRIV_ALLOC2]], align 4
87+
// CHECK-NEXT: br label %[[PRIV_CONT:.*]]
88+
89+
// Check that the privatizer's continuation block yileds the private clone's
90+
// address.
91+
// CHECK: [[PRIV_CONT]]:
92+
// CHECK-NEXT: %[[PRIV_ALLOC3:.*]] = phi ptr [ %[[PRIV_ALLOC2]], %[[PRIV_BB3]] ]
93+
// CHECK-NEXT: br label %[[PAR_REG:.*]]
94+
95+
// Check that the body of the parallel region loads from the private clone.
96+
// CHECK: [[PAR_REG]]:
97+
// CHECK: %{{.*}} = load float, ptr %[[PRIV_ALLOC3]], align 4
98+
99+
omp.private {type = firstprivate} @multi_block.privatizer : !llvm.ptr alloc {
100+
^bb0(%arg0: !llvm.ptr):
101+
%c1 = llvm.mlir.constant(1 : i32) : i32
102+
llvm.br ^bb1(%c1 : i32)
103+
104+
^bb1(%arg1: i32):
105+
%0 = llvm.alloca %arg1 x f32 : (i32) -> !llvm.ptr
106+
omp.yield(%0 : !llvm.ptr)
107+
108+
} copy {
109+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
110+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
111+
llvm.br ^bb1(%0, %arg1 : f32, !llvm.ptr)
112+
113+
^bb1(%arg2: f32, %arg3: !llvm.ptr):
114+
llvm.store %arg2, %arg3 : f32, !llvm.ptr
115+
omp.yield(%arg3 : !llvm.ptr)
116+
}

0 commit comments

Comments
 (0)