Skip to content

Commit daa1820

Browse files
authored
[Flang][OpenMP] Fix copyin allocatable lowering to MLIR (#122097)
Fixes #113191 Issue: [flang][OpenMP] Runtime segfault when an allocatable variable is used with copyin Rootcause: The value of the threadprivate variable is not being copied from the primary thread to the other threads within a parallel region. As a result it tries to access a null pointer inside a parallel region which causes segfault. Fix: When allocatables used with copyin clause need to ensure that, on entry to any parallel region each thread’s copy of a variable will acquire the allocation status of the primary thread, before copying the value of a threadprivate variable of the primary thread to the threadprivate variable of each other member of the team.
1 parent 646f034 commit daa1820

File tree

2 files changed

+127
-12
lines changed

2 files changed

+127
-12
lines changed

flang/lib/Lower/Bridge.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,9 +1290,30 @@ class FirConverter : public Fortran::lower::AbstractConverter {
12901290
auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
12911291
builder->create<fir::StoreOp>(loc, loadVal, lhs);
12921292
} else if (isAllocatable &&
1293-
(flags.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) ||
1294-
flags.test(Fortran::semantics::Symbol::Flag::OmpCopyIn))) {
1295-
// For firstprivate and copyin allocatable variables, RHS must be copied
1293+
flags.test(Fortran::semantics::Symbol::Flag::OmpCopyIn)) {
1294+
// For copyin allocatable variables, RHS must be copied to lhs
1295+
// only when rhs is allocated.
1296+
hlfir::Entity temp =
1297+
hlfir::derefPointersAndAllocatables(loc, *builder, rhs);
1298+
mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, temp);
1299+
mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
1300+
builder->genIfThenElse(loc, isAllocated)
1301+
.genThen([&]() { copyData(lhs, rhs); })
1302+
.genElse([&]() {
1303+
fir::ExtendedValue hexv = symBoxToExtendedValue(dst);
1304+
hexv.match(
1305+
[&](const fir::MutableBoxValue &new_box) -> void {
1306+
// if the allocation status of original list item is
1307+
// unallocated, unallocate the copy if it is allocated, else
1308+
// do nothing.
1309+
Fortran::lower::genDeallocateIfAllocated(*this, new_box, loc);
1310+
},
1311+
[&](const auto &) -> void {});
1312+
})
1313+
.end();
1314+
} else if (isAllocatable &&
1315+
flags.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
1316+
// For firstprivate allocatable variables, RHS must be copied
12961317
// only when LHS is allocated.
12971318
hlfir::Entity temp =
12981319
hlfir::derefPointersAndAllocatables(loc, *builder, lhs);

flang/test/Lower/OpenMP/copyin.f90

Lines changed: 103 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -395,12 +395,34 @@ subroutine pointer()
395395
! CHECK: %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
396396
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatableEp"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
397397
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
398-
! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_5]]#0 realloc : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
398+
! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
399+
! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<!fir.array<?xi32>>) -> i64
400+
! CHECK: %[[C0_I64:.*]] = arith.constant 0 : i64
401+
! CHECK: %[[VAL_9:.*]] = arith.cmpi ne, %[[VAL_8]], %[[C0_I64]] : i64
402+
! CHECK: fir.if %[[VAL_9]] {
403+
! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
404+
! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_5]]#0 realloc : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
405+
! CHECK: } else {
406+
! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
407+
! CHECK: %[[VAL_11:.*]] = fir.box_addr %[[VAL_10]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
408+
! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.heap<!fir.array<?xi32>>) -> i64
409+
! CHECK: %[[C0_I64_0:.*]] = arith.constant 0 : i64
410+
! CHECK: %[[VAL_13:.*]] = arith.cmpi ne, %[[VAL_12]], %[[C0_I64_0]] : i64
411+
! CHECK: fir.if %[[VAL_13]] {
412+
! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
413+
! CHECK: %[[VAL_15:.*]] = fir.box_addr %[[VAL_14]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
414+
! CHECK: fir.freemem %[[VAL_15]] : !fir.heap<!fir.array<?xi32>>
415+
! CHECK: %[[VAL_16:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
416+
! CHECK: %[[C0:.*]] = arith.constant 0 : index
417+
! CHECK: %[[VAL_17:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1>
418+
! CHECK: %[[VAL_18:.*]] = fir.embox %[[VAL_16]](%[[VAL_17]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
419+
! CHECK: fir.store %[[VAL_18]] to %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
420+
! CHECK: }
399421
! CHECK: omp.barrier
400-
! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
401-
! CHECK: %[[VAL_8:.*]] = fir.box_addr %[[VAL_7]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
402-
! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
403-
! CHECK: fir.call @_QPsub8(%[[VAL_9]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
422+
! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
423+
! CHECK: %[[VAL_20:.*]] = fir.box_addr %[[VAL_19]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
424+
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
425+
! CHECK: fir.call @_QPsub8(%[[VAL_21]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
404426
! CHECK: omp.terminator
405427
! CHECK: }
406428
! CHECK: return
@@ -422,7 +444,7 @@ subroutine allocatable()
422444
! CHECK: omp.parallel {
423445
! CHECK: %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>> -> !fir.ref<!fir.box<!fir.heap<i32>>>
424446
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable2Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
425-
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
447+
! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
426448
! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
427449
! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<i32>) -> i64
428450
! CHECK: %[[VAL_9:.*]] = arith.constant 0 : i64
@@ -432,10 +454,23 @@ subroutine allocatable()
432454
! CHECK: %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
433455
! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.heap<i32>
434456
! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
435-
! CHECK: }
457+
! CHECK: } else {
458+
! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
459+
! CHECK: %[[VAL_15:.*]] = fir.box_addr %[[VAL_11]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
460+
! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.heap<i32>) -> i64
461+
! CHECK: %[[C0_I64_0:.*]] = arith.constant 0 : i64
462+
! CHECK: %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_16]], %[[C0_I64_0]] : i64
463+
! CHECK: fir.if %[[VAL_17]] {
464+
! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
465+
! CHECK: %[[VAL_19:.*]] = fir.box_addr %[[VAL_18]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
466+
! CHECK: fir.freemem %[[VAL_19]] : !fir.heap<i32>
467+
! CHECK: %[[VAL_20:.*]] = fir.zero_bits !fir.heap<i32>
468+
! CHECK: %[[VAL_21:.*]] = fir.embox %[[VAL_20]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
469+
! CHECK: fir.store %[[VAL_21]] to %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
470+
! CHECK: }
436471
! CHECK: omp.barrier
437-
! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
438-
! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
472+
! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32
473+
! CHECK: hlfir.assign %[[VAL_22]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
439474
! CHECK: omp.terminator
440475
! CHECK: }
441476
! CHECK: return
@@ -448,3 +483,62 @@ subroutine allocatable2()
448483
a = 1
449484
!$omp end parallel
450485
end subroutine
486+
487+
! CHECK: func.func @_QPallocatable3() {
488+
! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFallocatable3Ea) : !fir.ref<!fir.box<!fir.heap<i32>>>
489+
! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable3Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
490+
! CHECK: %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>> -> !fir.ref<!fir.box<!fir.heap<i32>>>
491+
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable3Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
492+
! CHECK: %[[VAL_4:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFallocatable3Ea.alloc"}
493+
! CHECK: %[[VAL_5:.*]] = fir.embox %[[VAL_4]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
494+
! CHECK: fir.store %[[VAL_5]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
495+
! CHECK: %[[C10_I32:.*]] = arith.constant 10 : i32
496+
! CHECK: hlfir.assign %[[C10_I32]] to %[[VAL_3]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
497+
! CHECK: omp.parallel {
498+
! CHECK: %[[VAL_6:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>> -> !fir.ref<!fir.box<!fir.heap<i32>>>
499+
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable3Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
500+
! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
501+
! CHECK: %[[VAL_9:.*]] = fir.box_addr %[[VAL_8]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
502+
! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (!fir.heap<i32>) -> i64
503+
! CHECK: %[[C10_I64:.*]] = arith.constant 0 : i64
504+
! CHECK: %[[VAL_11:.*]] = arith.cmpi ne, %[[VAL_10]], %[[C10_I64]] : i64
505+
! CHECK: fir.if %[[VAL_11]] {
506+
! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
507+
! CHECK: %[[VAL_13:.*]] = fir.box_addr %[[VAL_12]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
508+
! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.heap<i32>
509+
! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_7]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
510+
! CHECK: } else {
511+
! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_7]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
512+
! CHECK: %[[VAL_15:.*]] = fir.box_addr %[[VAL_12]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
513+
! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.heap<i32>) -> i64
514+
! CHECK: %[[C0_I64_0:.*]] = arith.constant 0 : i64
515+
! CHECK: %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_16]], %[[C0_I64_0]] : i64
516+
! CHECK: fir.if %[[VAL_17]] {
517+
! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_7]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
518+
! CHECK: %[[VAL_19:.*]] = fir.box_addr %[[VAL_18]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
519+
! CHECK: fir.freemem %[[VAL_19]] : !fir.heap<i32>
520+
! CHECK: %[[VAL_20:.*]] = fir.zero_bits !fir.heap<i32>
521+
! CHECK: %[[VAL_21:.*]] = fir.embox %[[VAL_20]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
522+
! CHECK: fir.store %[[VAL_21]] to %[[VAL_7]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
523+
! CHECK: }
524+
! CHECK: }
525+
! CHECK: omp.barrier
526+
! CHECK: %[[VAL_22:.*]] = fir.load %7#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
527+
! CHECK: %[[VAL_23:.*]] = fir.box_addr %[[VAL_22]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
528+
! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.heap<i32>
529+
! CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32
530+
! CHECK: %[[VAL_25:.*]]= arith.addi %[[VAL_24]], %[[C1_I32]] : i32
531+
! CHECK: hlfir.assign %[[VAL_25]]to %[[VAL_7]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
532+
! CHECK: omp.terminator
533+
! CHECK: }
534+
! CHECK: return
535+
! CHECK: }
536+
subroutine allocatable3()
537+
integer, allocatable, save :: a
538+
!$omp threadprivate(a)
539+
allocate(a)
540+
a = 10
541+
!$omp parallel copyin(a)
542+
a = a + 1
543+
!$omp end parallel
544+
end subroutine

0 commit comments

Comments
 (0)