-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[flang][OpenMP] Add alias analysis for omp private #113566
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Enable alias analysis for omp private clause for code: ``` program main integer :: arrayA(10,10) integer :: tmp(2) integer :: i,j !$omp target teams distribute parallel do private(tmp) do j = 1, 10 do i = 1,10 tmp = [i,j] arrayA = tmp(1) end do end do end program main ``` Currently only OpenMP parallel construct has support for private clause. In the future this patch should be extended for other OpenMP directives which can have private clause.
@llvm/pr-subscribers-flang-fir-hlfir Author: Dominik Adamski (DominikAdamski) ChangesEnable alias analysis for omp private clause for code:
At present, only the OpenMP parallel construct is supported by the private clause. In the future, this patch should be expanded to include other OpenMP directives that can have a private clause. Full diff: https://github.com/llvm/llvm-project/pull/113566.diff 2 Files Affected:
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index 8b7918744017cc..e7e9130d5e37b4 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -372,6 +372,17 @@ getAttrsFromVariable(fir::FortranVariableOpInterface var) {
return attrs;
}
+static std::optional<omp::BlockArgOpenMPOpInterface>
+getOpenMPBlockArgInterface(Operation *op) {
+ std::optional<omp::BlockArgOpenMPOpInterface> blockArgOpenMPOpInterface;
+ if (!op)
+ return blockArgOpenMPOpInterface;
+ if (llvm::isa<omp::TargetOp>(op) || llvm::isa<omp::ParallelOp>(op)) {
+ blockArgOpenMPOpInterface = dyn_cast<omp::BlockArgOpenMPOpInterface>(op);
+ }
+ return blockArgOpenMPOpInterface;
+}
+
AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
bool getInstantiationPoint) {
auto *defOp = v.getDefiningOp();
@@ -470,20 +481,56 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
breakFromLoop = true;
})
.Case<hlfir::DeclareOp, fir::DeclareOp>([&](auto op) {
- // If declare operation is inside omp target region,
- // continue alias analysis outside the target region
- if (auto targetOp =
- llvm::dyn_cast<omp::TargetOp>(op->getParentOp())) {
- auto argIface = cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
- for (auto [opArg, blockArg] : llvm::zip_equal(
- targetOp.getMapVars(), argIface.getMapBlockArgs())) {
- if (blockArg == op.getMemref()) {
- omp::MapInfoOp mapInfo =
- llvm::cast<omp::MapInfoOp>(opArg.getDefiningOp());
- v = mapInfo.getVarPtr();
- defOp = v.getDefiningOp();
- return;
- }
+ auto argIface = getOpenMPBlockArgInterface(op->getParentOp());
+ if (argIface) {
+ Value ompValArg;
+ llvm::TypeSwitch<Operation *>(op->getParentOp())
+ .template Case<omp::TargetOp>([&](auto targetOp) {
+ // If declare operation is inside omp target region,
+ // continue alias analysis outside the target region
+ for (auto [opArg, blockArg] :
+ llvm::zip_equal(targetOp.getMapVars(),
+ (*argIface).getMapBlockArgs())) {
+ if (blockArg == op.getMemref()) {
+ omp::MapInfoOp mapInfo =
+ llvm::cast<omp::MapInfoOp>(opArg.getDefiningOp());
+ ompValArg = mapInfo.getVarPtr();
+ break;
+ }
+ }
+ })
+ .template Case<omp::ParallelOp>([&](auto parallelOp) {
+ // TODO private clause can be part of multiple
+ // OpenMP directives( target, simd, etc.)
+ // We should extend alias analysis when Flang
+ // will handle private clause for different than parallel
+ // directives.
+ if (!parallelOp.getPrivateSyms().has_value())
+ return;
+ for (auto [opSym, blockArg] :
+ llvm::zip_equal(*parallelOp.getPrivateSyms(),
+ (*argIface).getPrivateBlockArgs())) {
+ if (blockArg == op.getMemref()) {
+ omp::PrivateClauseOp privateOp =
+ SymbolTable::lookupNearestSymbolFrom<
+ omp::PrivateClauseOp>(parallelOp,
+ cast<SymbolRefAttr>(opSym));
+ privateOp.walk([&](omp::YieldOp yieldOp) {
+ llvm::TypeSwitch<Operation *>(
+ yieldOp.getResults()[0].getDefiningOp())
+ .template Case<fir::DeclareOp, hlfir::DeclareOp>(
+ [&](auto declOp) {
+ ompValArg = declOp.getMemref();
+ });
+ });
+ return;
+ }
+ }
+ });
+ if (ompValArg) {
+ v = ompValArg;
+ defOp = ompValArg.getDefiningOp();
+ return;
}
}
auto varIf = llvm::cast<fir::FortranVariableOpInterface>(defOp);
diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-teams-distribute-private.mlir b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-teams-distribute-private.mlir
new file mode 100644
index 00000000000000..4668b2c215c8c3
--- /dev/null
+++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-teams-distribute-private.mlir
@@ -0,0 +1,121 @@
+// Use --mlir-disable-threading so that the AA queries are serialized
+// as well as its diagnostic output.
+// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s
+
+// Fortran code:
+//
+// program main
+// integer :: arrayA(10,10)
+// integer :: tmp(2)
+// integer :: i,j
+// !$omp teams distribute parallel do private(tmp)
+// do j = 1, 10
+// do i = 1,10
+// tmp = [i,j]
+// arrayA = tmp(1)
+// end do
+// end do
+// end program main
+
+// CHECK-LABEL: Testing : "_QQmain"
+// CHECK-DAG: tmp_private_array#0 <-> unnamed_array#0: NoAlias
+// CHECK-DAG: tmp_private_array#1 <-> unnamed_array#0: NoAlias
+
+omp.private {type = private} @_QFEi_private_ref_i32 : !fir.ref<i32> alloc {
+^bb0(%arg0: !fir.ref<i32>):
+ %0 = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+ %1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ omp.yield(%1#0 : !fir.ref<i32>)
+}
+omp.private {type = private} @_QFEj_private_ref_i32 : !fir.ref<i32> alloc {
+^bb0(%arg0: !fir.ref<i32>):
+ %0 = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFEj"}
+ %1:2 = hlfir.declare %0 {uniq_name = "_QFEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ omp.yield(%1#0 : !fir.ref<i32>)
+}
+omp.private {type = private} @_QFEtmp_private_ref_2xi32 : !fir.ref<!fir.array<2xi32>> alloc {
+^bb0(%arg0: !fir.ref<!fir.array<2xi32>>):
+ %c2 = arith.constant 2 : index
+ %0 = fir.alloca !fir.array<2xi32> {bindc_name = "tmp", pinned, uniq_name = "_QFEtmp"}
+ %1 = fir.shape %c2 : (index) -> !fir.shape<1>
+ %2:2 = hlfir.declare %0(%1) {uniq_name = "_QFEtmp"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+ omp.yield(%2#0 : !fir.ref<!fir.array<2xi32>>)
+}
+func.func @_QQmain() attributes {fir.bindc_name = "main"} {
+ %0 = fir.address_of(@_QFEarraya) : !fir.ref<!fir.array<10x10xi32>>
+ %c10 = arith.constant 10 : index
+ %c10_0 = arith.constant 10 : index
+ %1 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2>
+ %2:2 = hlfir.declare %0(%1) {uniq_name = "_QFEarraya"} : (!fir.ref<!fir.array<10x10xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<10x10xi32>>, !fir.ref<!fir.array<10x10xi32>>)
+ %3 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+ %4:2 = hlfir.declare %3 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %5 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFEj"}
+ %6:2 = hlfir.declare %5 {uniq_name = "_QFEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %c2 = arith.constant 2 : index
+ %7 = fir.alloca !fir.array<2xi32> {bindc_name = "tmp", uniq_name = "_QFEtmp"}
+ %8 = fir.shape %c2 : (index) -> !fir.shape<1>
+ %9:2 = hlfir.declare %7(%8) {uniq_name = "_QFEtmp"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+ omp.teams {
+ omp.parallel private(@_QFEtmp_private_ref_2xi32 %9#0 -> %arg0, @_QFEj_private_ref_i32 %6#0 -> %arg1, @_QFEi_private_ref_i32 %4#0 -> %arg2 : !fir.ref<!fir.array<2xi32>>, !fir.ref<i32>, !fir.ref<i32>) {
+ %c2_1 = arith.constant 2 : index
+ %10 = fir.shape %c2_1 : (index) -> !fir.shape<1>
+ %11:2 = hlfir.declare %arg0(%10) {uniq_name = "_QFEtmp", test.ptr = "tmp_private_array"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+ %12:2 = hlfir.declare %arg1 {uniq_name = "_QFEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %13:2 = hlfir.declare %arg2 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %c1_i32 = arith.constant 1 : i32
+ %c10_i32 = arith.constant 10 : i32
+ %c1_i32_2 = arith.constant 1 : i32
+ omp.distribute {
+ omp.wsloop {
+ omp.loop_nest (%arg3) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32_2) {
+ fir.store %arg3 to %12#1 : !fir.ref<i32>
+ %c1_i32_3 = arith.constant 1 : i32
+ %14 = fir.convert %c1_i32_3 : (i32) -> index
+ %c10_i32_4 = arith.constant 10 : i32
+ %15 = fir.convert %c10_i32_4 : (i32) -> index
+ %c1 = arith.constant 1 : index
+ %16 = fir.convert %14 : (index) -> i32
+ %17:2 = fir.do_loop %arg4 = %14 to %15 step %c1 iter_args(%arg5 = %16) -> (index, i32) {
+ fir.store %arg5 to %13#1 : !fir.ref<i32>
+ %c2_5 = arith.constant 2 : index
+ %c1_6 = arith.constant 1 : index
+ %c1_7 = arith.constant 1 : index
+ %18 = fir.allocmem !fir.array<2xi32> {bindc_name = ".tmp.arrayctor", uniq_name = ""}
+ %19 = fir.shape %c2_5 : (index) -> !fir.shape<1>
+ %20:2 = hlfir.declare %18(%19) {uniq_name = ".tmp.arrayctor"} : (!fir.heap<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<2xi32>>, !fir.heap<!fir.array<2xi32>>)
+ %21 = fir.load %13#0 : !fir.ref<i32>
+ %22 = arith.addi %c1_6, %c1_7 : index
+ %23 = hlfir.designate %20#0 (%c1_6) : (!fir.heap<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+ hlfir.assign %21 to %23 : i32, !fir.ref<i32>
+ %24 = fir.load %12#0 : !fir.ref<i32>
+ %25 = hlfir.designate %20#0 (%22) : (!fir.heap<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+ hlfir.assign %24 to %25 : i32, !fir.ref<i32>
+ %true = arith.constant true
+ %26 = hlfir.as_expr %20#0 move %true {test.ptr = "unnamed_array"} : (!fir.heap<!fir.array<2xi32>>, i1) -> !hlfir.expr<2xi32>
+ hlfir.assign %26 to %11#0 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
+ hlfir.destroy %26 : !hlfir.expr<2xi32>
+ %c1_8 = arith.constant 1 : index
+ %27 = hlfir.designate %11#0 (%c1_8) : (!fir.ref<!fir.array<2xi32>>, index) -> !fir.ref<i32>
+ %28 = fir.load %27 : !fir.ref<i32>
+ hlfir.assign %28 to %2#0 : i32, !fir.ref<!fir.array<10x10xi32>>
+ %29 = arith.addi %arg4, %c1 : index
+ %30 = fir.convert %c1 : (index) -> i32
+ %31 = fir.load %13#1 : !fir.ref<i32>
+ %32 = arith.addi %31, %30 : i32
+ fir.result %29, %32 : index, i32
+ }
+ fir.store %17#1 to %13#1 : !fir.ref<i32>
+ omp.yield
+ }
+ } {omp.composite}
+ } {omp.composite}
+ omp.terminator
+ } {omp.composite}
+ omp.terminator
+ }
+ return
+}
+fir.global internal @_QFEarraya : !fir.array<10x10xi32> {
+ %0 = fir.zero_bits !fir.array<10x10xi32>
+ fir.has_value %0 : !fir.array<10x10xi32>
+}
|
} | ||
}) | ||
.template Case<omp::ParallelOp>([&](auto parallelOp) { | ||
// TODO private clause can be part of multiple |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I imagine it should be possible to write this generically to work on any operation with an OpenMPBlockArgInterface
. If the operation does not yet support delayed privatisation the private syms should be empty.
Reductions also work in the same way as delayed privatization (but don't feel that has to be done in this PR).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed -> extended support for alias analysis for private clause for different operations even if Flang does not fully support private clause for all of these operations.
std::optional<omp::BlockArgOpenMPOpInterface> blockArgOpenMPOpInterface; | ||
if (!op) | ||
return blockArgOpenMPOpInterface; | ||
if (llvm::isa<omp::TargetOp>(op) || llvm::isa<omp::ParallelOp>(op)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not return the block args interface for other operation types (where the dyn_cast succeeds)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the updates. This is much easier to extend now.
And thanks for looking at the alias analysis for OpenMP - is this for performance improvement? Do you have anything else planned? No worries if you can't say or don't want to commit to something - I'm just curious!
if (!op) | ||
return {}; | ||
return dyn_cast<omp::BlockArgOpenMPOpInterface>(op); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: this can be simpler (maybe we don't need the helper now?)
if (!op) | |
return {}; | |
return dyn_cast<omp::BlockArgOpenMPOpInterface>(op); | |
return dyn_cast_if_present<omp::BlockArgOpenMPOpInterface>(op); |
https://llvm.org/docs/ProgrammersManual.html#the-isa-cast-and-dyn-cast-templates
@tblah |
|
||
// CHECK-LABEL: Testing : "_QQmain" | ||
// CHECK-DAG: tmp_private_array#0 <-> unnamed_array#0: NoAlias | ||
// CHECK-DAG: tmp_private_array#1 <-> unnamed_array#0: NoAlias |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a positive case? I understand NoAlias is what matter from an optimization point of view, but MayAlias is the most important to prove from a correctness point of view, so I think it is always good to add both negative and positive test cases here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
This reverts commit f3025c8.
Enable alias analysis for omp private clause for code: ``` program main integer :: arrayA(10,10) integer :: tmp(2) integer :: i,j !$omp target teams distribute parallel do private(tmp) do j = 1, 10 do i = 1,10 tmp = [i,j] arrayA = tmp(1) end do end do end program main ```
Enable alias analysis for omp private clause for code: ``` program main integer :: arrayA(10,10) integer :: tmp(2) integer :: i,j !$omp target teams distribute parallel do private(tmp) do j = 1, 10 do i = 1,10 tmp = [i,j] arrayA = tmp(1) end do end do end program main ``` This PR is based on: #113566 and it contains fix for Fujitsu test suite. Previous PR introduced regression in Fujitsu test suite. For some Fujitsu test cases `omp.yield` operation points to block argument. Alias analysis for such MLIR code will be added in separate PR.
Enable alias analysis for omp private clause for code: ``` program main integer :: arrayA(10,10) integer :: tmp(2) integer :: i,j !$omp target teams distribute parallel do private(tmp) do j = 1, 10 do i = 1,10 tmp = [i,j] arrayA = tmp(1) end do end do end program main ``` This PR is based on: llvm#113566 and it contains fix for Fujitsu test suite. Previous PR introduced regression in Fujitsu test suite. For some Fujitsu test cases `omp.yield` operation points to block argument. Alias analysis for such MLIR code will be added in separate PR.
Enable alias analysis for omp private clause for code:
At present, only the OpenMP parallel construct is supported by the private clause. In the future, this patch should be expanded to include other OpenMP directives that can have a private clause.