Skip to content

Commit 1013514

Browse files
[MLIR][OpenMP] Fix GPU codegen for omp teams distribute (#29)
Pragma `omp target teams distribute` should use `__kmpc_distribute_static_loop` for loop worksharing.
1 parent 58ac649 commit 1013514

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3910,6 +3910,7 @@ static void createTargetLoopWorkshareCall(
39103910
RealArgs.push_back(TripCount);
39113911
if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
39123912
RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
3913+
Builder.restoreIP({InsertBlock, std::prev(InsertBlock->end())});
39133914
Builder.CreateCall(RTLFn, RealArgs);
39143915
return;
39153916
}

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,24 +1001,23 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
10011001
std::optional<omp::ScheduleModifier> scheduleModifier =
10021002
loop.getScheduleModifier();
10031003
bool isSimd = loop.getSimdModifier();
1004-
// TODO: Handle distribute loop without parallel clause
1005-
bool distributeParallelCodeGen = opInst.getParentOfType<omp::DistributeOp>();
1006-
if (distributeParallelCodeGen) {
1007-
ompBuilder->applyWorkshareLoop(
1008-
ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
1009-
convertToScheduleKind(schedule), chunk, isSimd,
1010-
scheduleModifier == omp::ScheduleModifier::monotonic,
1011-
scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered,
1012-
llvm::omp::WorksharingLoopType::DistributeForStaticLoop);
1013-
}
1014-
else {
1015-
ompBuilder->applyWorkshareLoop(
1016-
ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
1017-
convertToScheduleKind(schedule), chunk, isSimd,
1018-
scheduleModifier == omp::ScheduleModifier::monotonic,
1019-
scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered,
1020-
llvm::omp::WorksharingLoopType::ForStaticLoop);
1004+
1005+
bool distributeCodeGen = opInst.getParentOfType<omp::DistributeOp>();
1006+
bool parallelCodeGen = opInst.getParentOfType<omp::ParallelOp>();
1007+
llvm::omp::WorksharingLoopType workshareLoopType;
1008+
if (distributeCodeGen && parallelCodeGen) {
1009+
workshareLoopType = llvm::omp::WorksharingLoopType::DistributeForStaticLoop;
1010+
} else if (distributeCodeGen) {
1011+
workshareLoopType = llvm::omp::WorksharingLoopType::DistributeStaticLoop;
1012+
} else {
1013+
workshareLoopType = llvm::omp::WorksharingLoopType::ForStaticLoop;
10211014
}
1015+
ompBuilder->applyWorkshareLoop(
1016+
ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
1017+
convertToScheduleKind(schedule), chunk, isSimd,
1018+
scheduleModifier == omp::ScheduleModifier::monotonic,
1019+
scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered,
1020+
workshareLoopType);
10221021

10231022
// Continue building IR after the loop. Note that the LoopInfo returned by
10241023
// `collapseLoops` points inside the outermost loop and is intended for
@@ -1044,7 +1043,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
10441043
ompBuilder->createReductions(builder.saveIP(), allocaIP,
10451044
ompBuilder->RIManager.getReductionInfos(),
10461045
loop.getNowait(), /*IsTeamsReduction*/ false,
1047-
/*HasDistribute*/ distributeParallelCodeGen);
1046+
/*HasDistribute*/ distributeCodeGen);
10481047
if (!contInsertPoint.getBlock())
10491048
return loop->emitOpError() << "failed to convert reductions";
10501049
auto nextInsertionPoint =

0 commit comments

Comments
 (0)