Support OpenACC 'declare create' with Fortran allocatable arrays, part II [PR106643, PR96668]
PR libgomp/106643 PR fortran/96668 libgomp/ * oacc-mem.c (goacc_enter_data_internal): Support OpenACC 'declare create' with Fortran allocatable arrays, part II. * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90: Adjust. * testsuite/libgomp.oacc-fortran/pr106643-1.f90: New.
This commit is contained in:
parent
da8e0e1191
commit
f6ce1e77bb
3 changed files with 160 additions and 28 deletions
|
@ -1166,7 +1166,10 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
|
|||
|
||||
struct target_mem_desc *tgt = n->tgt;
|
||||
|
||||
/* Arrange so that OpenACC 'declare' code à la PR106643
|
||||
/* Minimal OpenACC variant corresponding to PR96668
|
||||
"[OpenMP] Re-mapping allocated but previously unallocated
|
||||
allocatable does not work" 'libgomp/target.c' changes, so that
|
||||
OpenACC 'declare' code à la PR106643
|
||||
"[gfortran + OpenACC] Allocate in module causes refcount error"
|
||||
has a chance to work. */
|
||||
if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET
|
||||
|
@ -1181,6 +1184,16 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
|
|||
assert ((kinds[i + k] & 0xff) == GOMP_MAP_POINTER);
|
||||
}
|
||||
|
||||
/* Let 'goacc_map_vars' -> 'gomp_map_vars_internal' handle
|
||||
this. */
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
struct target_mem_desc *tgt_
|
||||
= goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
|
||||
&sizes[i], &kinds[i], true,
|
||||
GOMP_MAP_VARS_ENTER_DATA);
|
||||
assert (tgt_ == NULL);
|
||||
gomp_mutex_lock (&acc_dev->lock);
|
||||
|
||||
/* Given that 'goacc_exit_data_internal'/'goacc_exit_datum_1'
|
||||
will always see 'n->refcount == REFCOUNT_INFINITY',
|
||||
there's no need to adjust 'n->dynamic_refcount' here. */
|
||||
|
|
|
@ -105,27 +105,50 @@ program test
|
|||
!$acc enter data create (b)
|
||||
! This is now OpenACC "present":
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
! This still has the initial array descriptor:
|
||||
! ..., and got the actual array descriptor installed:
|
||||
!$acc serial
|
||||
call verify_initial
|
||||
call verify_n1_allocated
|
||||
!$acc end serial
|
||||
|
||||
do i = n1_lb, n1_ub
|
||||
b(i) = i - 1
|
||||
end do
|
||||
|
||||
! Verify that host-to-device copy doesn't touch the device-side (still
|
||||
! initial) array descriptor (but it does copy the array data).
|
||||
! In 'declare-allocatable-array_descriptor-1-runtime.f90', this does "verify
|
||||
! that host-to-device copy doesn't touch the device-side (still initial)
|
||||
! array descriptor (but it does copy the array data"). This is here not
|
||||
! applicable anymore, as we've already gotten the actual array descriptor
|
||||
! installed. Thus now verify that it does copy the array data.
|
||||
call acc_update_device (b)
|
||||
!$acc serial
|
||||
call verify_initial
|
||||
call verify_n1_allocated
|
||||
!$acc end serial
|
||||
|
||||
b = 40
|
||||
|
||||
! Verify that device-to-host copy doesn't touch the host-side array
|
||||
! descriptor, doesn't copy out the device-side (still initial) array
|
||||
! descriptor (but it does copy the array data).
|
||||
!$acc parallel copyout (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'.
|
||||
call verify_n1_values (-1)
|
||||
id1_1 = 0
|
||||
!$acc end parallel
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(from:id1_1\)$} 1 original } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
|
||||
!$acc parallel copy (b) copyout (id1_2)
|
||||
! As already present, 'copy (b)' doesn't copy; addend is still '-1'.
|
||||
call verify_n1_values (-1)
|
||||
id1_2 = 0
|
||||
!$acc end parallel
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } }
|
||||
!TODO ..., but without an actual use of 'b', the gimplifier removes the
|
||||
!TODO 'GOMP_MAP_TO_PSET':
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
|
||||
! In 'declare-allocatable-array_descriptor-1-runtime.f90', this does "verify
|
||||
! that device-to-host copy doesn't touch the host-side array descriptor,
|
||||
! doesn't copy out the device-side (still initial) array descriptor (but it
|
||||
! does copy the array data)". This is here not applicable anymore, as we've
|
||||
! already gotten the actual array descriptor installed. Thus now verify that
|
||||
! it does copy the array data.
|
||||
call acc_update_self (b)
|
||||
call verify_n1_allocated
|
||||
|
||||
|
@ -142,11 +165,19 @@ program test
|
|||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_to:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
! ..., but it's silently skipped in 'GOACC_update'.
|
||||
!$acc serial
|
||||
call verify_initial
|
||||
call verify_n1_allocated
|
||||
!$acc end serial
|
||||
|
||||
b = 41
|
||||
|
||||
!$acc parallel
|
||||
call verify_n1_values (1)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel copy (b)
|
||||
call verify_n1_values (1)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc update self (b) self (id1_2)
|
||||
! We do have 'GOMP_MAP_TO_PSET' here:
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_from:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2\);$} 1 original } }
|
||||
|
@ -159,20 +190,9 @@ program test
|
|||
b(i) = b(i) + 2
|
||||
end do
|
||||
|
||||
! Now install the actual array descriptor, via a data clause for 'b'
|
||||
! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in
|
||||
! 'gomp_map_vars_internal' is handled as 'declare target', and because of
|
||||
! '*(void **) hostaddrs[i] != NULL', we've got 'has_always_ptrset == true',
|
||||
! 'always_to_cnt == 1', and therefore 'gomp_map_vars_existing' does update
|
||||
! the 'GOMP_MAP_TO_PSET'.
|
||||
!$acc serial present (b) copyin (id1_1)
|
||||
call verify_initial
|
||||
id1_1 = 0
|
||||
!$acc end serial
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1\)$} 1 original } }
|
||||
!TODO ..., but without an actual use of 'b', the gimplifier removes the
|
||||
!TODO 'GOMP_MAP_TO_PSET':
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
! Now test that (potentially re-)installing the actual array descriptor is a
|
||||
! no-op, via a data clause for 'b' (explicit or implicit): must get a
|
||||
! 'GOMP_MAP_TO_PSET'.
|
||||
!$acc serial present (b) copyin (id1_2)
|
||||
call verify_n1_allocated
|
||||
!TODO Use of 'b':
|
||||
|
@ -243,9 +263,9 @@ program test
|
|||
if (acc_is_present (b)) error stop
|
||||
!$acc enter data create (b)
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
! This still has the previous (n1) array descriptor:
|
||||
! ..., and got the actual array descriptor installed:
|
||||
!$acc serial
|
||||
call verify_n1_deallocated (.true.)
|
||||
call verify_n2_allocated
|
||||
!$acc end serial
|
||||
|
||||
do i = n2_lb, n2_ub
|
||||
|
@ -254,11 +274,19 @@ program test
|
|||
|
||||
call acc_update_device (b)
|
||||
!$acc serial
|
||||
call verify_n1_deallocated (.true.)
|
||||
call verify_n2_allocated
|
||||
!$acc end serial
|
||||
|
||||
b = -40
|
||||
|
||||
!$acc parallel
|
||||
call verify_n2_values (20)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel copy (b)
|
||||
call verify_n2_values (20)
|
||||
!$acc end parallel
|
||||
|
||||
call acc_update_self (b)
|
||||
call verify_n2_allocated
|
||||
|
||||
|
@ -269,11 +297,19 @@ program test
|
|||
|
||||
!$acc update device (b)
|
||||
!$acc serial
|
||||
call verify_n1_deallocated (.true.)
|
||||
call verify_n2_allocated
|
||||
!$acc end serial
|
||||
|
||||
b = -41
|
||||
|
||||
!$acc parallel
|
||||
call verify_n2_values (-20)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel copy (b)
|
||||
call verify_n2_values (-20)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc update self (b)
|
||||
call verify_n2_allocated
|
||||
|
||||
|
|
83
libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90
Normal file
83
libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90
Normal file
|
@ -0,0 +1,83 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-options -cpp }
|
||||
|
||||
|
||||
!TODO OpenACC 'serial' vs. GCC/nvptx:
|
||||
!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} }
|
||||
|
||||
|
||||
module m_macron
|
||||
|
||||
implicit none
|
||||
|
||||
real(kind(0d0)), allocatable, dimension(:) :: valls
|
||||
!$acc declare create(valls)
|
||||
|
||||
contains
|
||||
|
||||
subroutine s_macron_compute(size)
|
||||
|
||||
integer :: size
|
||||
|
||||
!$acc routine seq
|
||||
|
||||
#if ACC_MEM_SHARED
|
||||
if (valls(size) /= 1) error stop
|
||||
#else
|
||||
if (valls(size) /= size - 2) error stop
|
||||
#endif
|
||||
|
||||
valls(size) = size + 2
|
||||
|
||||
end subroutine s_macron_compute
|
||||
|
||||
subroutine s_macron_init(size)
|
||||
|
||||
integer :: size
|
||||
|
||||
print*, "size=", size
|
||||
|
||||
print*, "allocate(valls(1:size))"
|
||||
allocate(valls(1:size))
|
||||
|
||||
print*, "acc enter data create(valls(1:size))"
|
||||
!$acc enter data create(valls(1:size))
|
||||
|
||||
print*, "!$acc update device(valls(1:size))"
|
||||
valls(size) = size - 2
|
||||
!$acc update device(valls(1:size))
|
||||
|
||||
valls(size) = 1
|
||||
|
||||
!$acc serial
|
||||
call s_macron_compute(size)
|
||||
!$acc end serial
|
||||
|
||||
valls(size) = -1
|
||||
|
||||
!$acc update host(valls(1:size))
|
||||
#if ACC_MEM_SHARED
|
||||
if (valls(size) /= -1) error stop
|
||||
#else
|
||||
if (valls(size) /= size + 2) error stop
|
||||
#endif
|
||||
|
||||
print*, valls(1:size)
|
||||
|
||||
print*, "acc exit data delete(valls)"
|
||||
!$acc exit data delete(valls)
|
||||
|
||||
end subroutine s_macron_init
|
||||
|
||||
end module m_macron
|
||||
|
||||
|
||||
program p_main
|
||||
|
||||
use m_macron
|
||||
|
||||
implicit none
|
||||
|
||||
call s_macron_init(10)
|
||||
|
||||
end program p_main
|
Loading…
Add table
Reference in a new issue