Skip to content

Commit e0ba399

Browse files
committed
[MLIR][OpenMP] Extend omp.private materialization support: firstprivate
Extends current support for delayed privatization during translation to LLVM IR. This adds support for one-block `firstprivate` `omp.private` ops.
1 parent af31311 commit e0ba399

File tree

2 files changed

+144
-7
lines changed

2 files changed

+144
-7
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,17 +1167,38 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
11671167
}();
11681168

11691169
if (privVar) {
1170+
Region &allocRegion = privatizerClone.getAllocRegion();
1171+
1172+
// If this is a `firstprivate` clause, prepare the `omp.private` op by:
11701173
if (privatizerClone.getDataSharingType() ==
11711174
omp::DataSharingClauseType::FirstPrivate) {
1172-
privatizerClone.emitOpError(
1173-
"TODO: delayed privatization is not "
1174-
"supported for `firstprivate` clauses yet.");
1175-
bodyGenStatus = failure();
1176-
return codeGenIP;
1175+
auto oldAllocBackBlock = std::prev(allocRegion.end());
1176+
omp::YieldOp oldAllockYieldOp =
1177+
llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
1178+
1179+
Region &copyRegion = privatizerClone.getCopyRegion();
1180+
1181+
mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
1182+
// 1. Cloning the `copy` region to the end of the `alloc` region.
1183+
copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
1184+
allocRegion.end());
1185+
1186+
auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
1187+
// 2. Merging the last `alloc` block with the first block in the `copy`
1188+
// region clone.
1189+
// 3. Re-mapping the first argument of the `copy` region to be the
1190+
// argument of the `alloc` region and the second argument of the `copy`
1191+
// region to be the yielded value of the `alloc` region (this is the
1192+
// private clone of the privatized value).
1193+
copyCloneBuilder.mergeBlocks(
1194+
&*newCopyRegionFrontBlock, &*oldAllocBackBlock,
1195+
{allocRegion.getArgument(0), oldAllockYieldOp.getOperand(0)});
1196+
1197+
// 4. The old terminator of the `alloc` region is not needed anymore, so
1198+
// delete it.
1199+
oldAllockYieldOp.erase();
11771200
}
11781201

1179-
Region &allocRegion = privatizerClone.getAllocRegion();
1180-
11811202
// Replace the privatizer block argument with mlir value being privatized.
11821203
// This way, the body of the privatizer will be changed from using the
11831204
// region/block argument to the value being privatized.
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
2+
// `omp.private` ops).
3+
4+
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
5+
6+
llvm.func @parallel_op_firstprivate(%arg0: !llvm.ptr) {
7+
omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
8+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
9+
omp.terminator
10+
}
11+
llvm.return
12+
}
13+
14+
omp.private {type = firstprivate} @x.privatizer : !llvm.ptr alloc {
15+
^bb0(%arg0: !llvm.ptr):
16+
%c1 = llvm.mlir.constant(1 : i32) : i32
17+
%0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
18+
omp.yield(%0 : !llvm.ptr)
19+
} copy {
20+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
21+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
22+
llvm.store %0, %arg1 : f32, !llvm.ptr
23+
omp.yield(%arg1 : !llvm.ptr)
24+
}
25+
26+
// CHECK-LABEL: @parallel_op_firstprivate
27+
// CHECK-SAME: (ptr %[[ORIG:.*]]) {
28+
// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
29+
// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
30+
// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
31+
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_firstprivate..omp_par, ptr %[[OMP_PAR_ARG]])
32+
// CHECK: }
33+
34+
// CHECK-LABEL: void @parallel_op_firstprivate..omp_par
35+
// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
36+
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
37+
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
38+
39+
// Check that the privatizer alloc region was inlined properly.
40+
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
41+
42+
// Check that the privatizer copy region was inlined properly.
43+
44+
// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
45+
// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
46+
// CHECK-NEXT: br
47+
48+
// Check that the privatized value is used (rather than the original one).
49+
// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
50+
// CHECK: }
51+
52+
// -----
53+
54+
llvm.func @parallel_op_private_multi_block(%arg0: !llvm.ptr) {
55+
omp.parallel private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
56+
%0 = llvm.load %arg2 : !llvm.ptr -> f32
57+
omp.terminator
58+
}
59+
llvm.return
60+
}
61+
62+
// CHECK-LABEL: define internal void @parallel_op_private_multi_block..omp_par
63+
// CHECK: omp.par.entry:
64+
// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0
65+
// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
66+
// CHECK: br label %[[PRIV_BB1:.*]]
67+
68+
// CHECK: [[PRIV_BB1]]:
69+
// The 1st `alloc` block directly branches to the 2nd `alloc` block since the
70+
// only insruction is `llvm.mlir.constant` which gets translated to compile-time
71+
// constant in LLVM IR.
72+
// CHECK-NEXT: br label %[[PRIV_BB2:.*]]
73+
74+
// CHECK: [[PRIV_BB2]]:
75+
// CHECK-NEXT: %[[C1:.*]] = phi i32 [ 1, %[[PRIV_BB1]] ]
76+
// CHECK-NEXT: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[C1]], align 4
77+
// The entry block of the `copy` region is merged into the exit block of the
78+
// `alloc` region. So check for that.
79+
// CHECK-NEXT: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
80+
// CHECK-NEXT: br label %[[PRIV_BB3:.*]]
81+
82+
// Check contents of the 2nd block in the `copy` region.
83+
// CHECK: [[PRIV_BB3]]:
84+
// CHECK-NEXT: %[[ORIG_VAL2:.*]] = phi float [ %[[ORIG_VAL]], %[[PRIV_BB2]] ]
85+
// CHECK-NEXT: %[[PRIV_ALLOC2:.*]] = phi ptr [ %[[PRIV_ALLOC]], %[[PRIV_BB2]] ]
86+
// CHECK-NEXT: store float %[[ORIG_VAL2]], ptr %[[PRIV_ALLOC2]], align 4
87+
// CHECK-NEXT: br label %[[PRIV_CONT:.*]]
88+
89+
// Check that the privatizer's continuation block yileds the private clone's
90+
// address.
91+
// CHECK: [[PRIV_CONT]]:
92+
// CHECK-NEXT: %[[PRIV_ALLOC3:.*]] = phi ptr [ %[[PRIV_ALLOC2]], %[[PRIV_BB3]] ]
93+
// CHECK-NEXT: br label %[[PAR_REG:.*]]
94+
95+
// Check that the body of the parallel region loads from the private clone.
96+
// CHECK: [[PAR_REG]]:
97+
// CHECK: %{{.*}} = load float, ptr %[[PRIV_ALLOC3]], align 4
98+
99+
omp.private {type = firstprivate} @multi_block.privatizer : !llvm.ptr alloc {
100+
^bb0(%arg0: !llvm.ptr):
101+
%c1 = llvm.mlir.constant(1 : i32) : i32
102+
llvm.br ^bb1(%c1 : i32)
103+
104+
^bb1(%arg1: i32):
105+
%0 = llvm.alloca %arg1 x f32 : (i32) -> !llvm.ptr
106+
omp.yield(%0 : !llvm.ptr)
107+
108+
} copy {
109+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
110+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
111+
llvm.br ^bb1(%0, %arg1 : f32, !llvm.ptr)
112+
113+
^bb1(%arg2: f32, %arg3: !llvm.ptr):
114+
llvm.store %arg2, %arg3 : f32, !llvm.ptr
115+
omp.yield(%arg3 : !llvm.ptr)
116+
}

0 commit comments

Comments
 (0)