Support OpenACC 'declare create' with Fortran allocatable arrays, part I [PR106643]
PR libgomp/106643 libgomp/ * oacc-mem.c (goacc_enter_data_internal): Support OpenACC 'declare create' with Fortran allocatable arrays, part I. * testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90: New. * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90: New.
This commit is contained in:
parent
abeaf3735f
commit
da8e0e1191
3 changed files with 706 additions and 2 deletions
|
@ -1150,8 +1150,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
|
|||
}
|
||||
else if (n && groupnum > 1)
|
||||
{
|
||||
assert (n->refcount != REFCOUNT_INFINITY
|
||||
&& n->refcount != REFCOUNT_LINK);
|
||||
assert (n->refcount != REFCOUNT_LINK);
|
||||
|
||||
for (size_t j = i + 1; j <= group_last; j++)
|
||||
if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH)
|
||||
|
@ -1166,6 +1165,31 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
|
|||
bool processed = false;
|
||||
|
||||
struct target_mem_desc *tgt = n->tgt;
|
||||
|
||||
/* Arrange so that OpenACC 'declare' code à la PR106643
|
||||
"[gfortran + OpenACC] Allocate in module causes refcount error"
|
||||
has a chance to work. */
|
||||
if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET
|
||||
&& tgt->list_count == 0)
|
||||
{
|
||||
/* 'declare target'. */
|
||||
assert (n->refcount == REFCOUNT_INFINITY);
|
||||
|
||||
for (size_t k = 1; k < groupnum; k++)
|
||||
{
|
||||
/* The only thing we expect to see here. */
|
||||
assert ((kinds[i + k] & 0xff) == GOMP_MAP_POINTER);
|
||||
}
|
||||
|
||||
/* Given that 'goacc_exit_data_internal'/'goacc_exit_datum_1'
|
||||
will always see 'n->refcount == REFCOUNT_INFINITY',
|
||||
there's no need to adjust 'n->dynamic_refcount' here. */
|
||||
|
||||
processed = true;
|
||||
}
|
||||
else
|
||||
assert (n->refcount != REFCOUNT_INFINITY);
|
||||
|
||||
for (size_t j = 0; j < tgt->list_count; j++)
|
||||
if (tgt->list[j].key == n)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,278 @@
|
|||
! Test OpenACC 'declare create' with allocatable arrays.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
!TODO-OpenACC-declare-allocate
|
||||
! Missing support for OpenACC "Changes from Version 2.0 to 2.5":
|
||||
! "The 'declare create' directive with a Fortran 'allocatable' has new behavior".
|
||||
! Thus, after 'allocate'/before 'deallocate', do
|
||||
! '!$acc enter data create'/'!$acc exit data delete' manually.
|
||||
|
||||
!TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'.
|
||||
|
||||
! { dg-additional-options -fopt-info-all-omp }
|
||||
! { dg-additional-options -foffload=-fopt-info-all-omp }
|
||||
|
||||
! { dg-additional-options --param=openacc-privatization=noisy }
|
||||
! { dg-additional-options -foffload=--param=openacc-privatization=noisy }
|
||||
! Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
|
||||
! { dg-prune-output {note: variable '[Di]\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} }
|
||||
|
||||
! { dg-additional-options -Wopenacc-parallelism }
|
||||
|
||||
! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
|
||||
! passed to 'incr' may be unset, and in that case, it will be set to [...]",
|
||||
! so to maintain compatibility with earlier Tcl releases, we manually
|
||||
! initialize counter variables:
|
||||
! { dg-line l_dummy[variable c 0] }
|
||||
! { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
|
||||
! "WARNING: dg-line var l_dummy defined, but not used".
|
||||
|
||||
|
||||
module vars
|
||||
implicit none
|
||||
integer, parameter :: n = 100
|
||||
real*8, allocatable :: b(:)
|
||||
!$acc declare create (b)
|
||||
end module vars
|
||||
|
||||
program test
|
||||
use vars
|
||||
use openacc
|
||||
implicit none
|
||||
real*8 :: a
|
||||
integer :: i
|
||||
|
||||
interface
|
||||
subroutine sub1
|
||||
!$acc routine gang
|
||||
end subroutine sub1
|
||||
|
||||
subroutine sub2
|
||||
end subroutine sub2
|
||||
|
||||
real*8 function fun1 (ix)
|
||||
integer ix
|
||||
!$acc routine seq
|
||||
end function fun1
|
||||
|
||||
real*8 function fun2 (ix)
|
||||
integer ix
|
||||
!$acc routine seq
|
||||
end function fun2
|
||||
end interface
|
||||
|
||||
if (allocated (b)) error stop
|
||||
|
||||
! Test local usage of an allocated declared array.
|
||||
|
||||
allocate (b(n))
|
||||
!$acc enter data create (b)
|
||||
|
||||
if (.not.allocated (b)) error stop
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
a = 2.0
|
||||
|
||||
!$acc parallel loop ! { dg-line l[incr c] }
|
||||
! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c }
|
||||
! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c }
|
||||
! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c }
|
||||
! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c }
|
||||
! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c }
|
||||
do i = 1, n
|
||||
b(i) = i * a
|
||||
end do
|
||||
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc update host(b)
|
||||
|
||||
do i = 1, n
|
||||
if (b(i) /= i*a) error stop
|
||||
end do
|
||||
|
||||
!$acc exit data delete (b)
|
||||
deallocate (b)
|
||||
|
||||
! Test the usage of an allocated declared array inside an acc
|
||||
! routine subroutine.
|
||||
|
||||
allocate (b(n))
|
||||
!$acc enter data create (b)
|
||||
|
||||
if (.not.allocated (b)) error stop
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc parallel
|
||||
call sub1 ! { dg-line l[incr c] }
|
||||
! { dg-optimized {assigned OpenACC gang worker vector loop parallelism} {} { target *-*-* } l$c }
|
||||
!$acc end parallel
|
||||
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc update host(b)
|
||||
|
||||
do i = 1, n
|
||||
if (b(i) /= i*2) error stop
|
||||
end do
|
||||
|
||||
!$acc exit data delete (b)
|
||||
deallocate (b)
|
||||
|
||||
! Test the usage of an allocated declared array inside a host
|
||||
! subroutine.
|
||||
|
||||
call sub2
|
||||
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc update host(b)
|
||||
|
||||
do i = 1, n
|
||||
if (b(i) /= 1.0) error stop
|
||||
end do
|
||||
|
||||
!$acc exit data delete (b)
|
||||
deallocate (b)
|
||||
|
||||
if (allocated (b)) error stop
|
||||
|
||||
! Test the usage of an allocated declared array inside an acc
|
||||
! routine function.
|
||||
|
||||
allocate (b(n))
|
||||
!$acc enter data create (b)
|
||||
|
||||
if (.not.allocated (b)) error stop
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc parallel loop ! { dg-line l[incr c] }
|
||||
! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c }
|
||||
! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c }
|
||||
! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c }
|
||||
! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c }
|
||||
! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c }
|
||||
do i = 1, n
|
||||
b(i) = 1.0
|
||||
end do
|
||||
|
||||
!$acc parallel loop ! { dg-line l[incr c] }
|
||||
! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c }
|
||||
! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c }
|
||||
! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c }
|
||||
! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c }
|
||||
! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c }
|
||||
do i = 1, n
|
||||
b(i) = fun1 (i) ! { dg-line l[incr c] }
|
||||
! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l$c }
|
||||
end do
|
||||
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc update host(b)
|
||||
|
||||
do i = 1, n
|
||||
if (b(i) /= i) error stop
|
||||
end do
|
||||
|
||||
!$acc exit data delete (b)
|
||||
deallocate (b)
|
||||
|
||||
! Test the usage of an allocated declared array inside a host
|
||||
! function.
|
||||
|
||||
allocate (b(n))
|
||||
!$acc enter data create (b)
|
||||
|
||||
if (.not.allocated (b)) error stop
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc parallel loop ! { dg-line l[incr c] }
|
||||
! { dg-note {variable 'i' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l$c }
|
||||
! { dg-note {variable 'i' ought to be adjusted for OpenACC privatization level: 'vector'} {} { target *-*-* } l$c }
|
||||
! { dg-note {variable 'i' adjusted for OpenACC privatization level: 'vector'} {} { target { ! openacc_host_selected } } l$c }
|
||||
! { dg-note {variable 'i\.[0-9]+' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c }
|
||||
! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c }
|
||||
do i = 1, n
|
||||
b(i) = 1.0
|
||||
end do
|
||||
|
||||
!$acc update host(b)
|
||||
|
||||
do i = 1, n
|
||||
b(i) = fun2 (i)
|
||||
end do
|
||||
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
do i = 1, n
|
||||
if (b(i) /= i*i) error stop
|
||||
end do
|
||||
|
||||
!$acc exit data delete (b)
|
||||
deallocate (b)
|
||||
end program test ! { dg-line l[incr c] }
|
||||
! { dg-bogus {note: variable 'overflow\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {TODO n/a} { xfail *-*-* } l$c }
|
||||
! { dg-bogus {note: variable 'not_prev_allocated\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {TODO n/a} { xfail *-*-* } l$c }
|
||||
! { dg-bogus {note: variable 'parm\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: artificial} {TODO n/a} { xfail *-*-* } l$c }
|
||||
|
||||
! Set each element in array 'b' at index i to i*2.
|
||||
|
||||
subroutine sub1 ! { dg-line subroutine_sub1 }
|
||||
use vars
|
||||
implicit none
|
||||
integer i
|
||||
!$acc routine gang
|
||||
! { dg-bogus {[Ww]arning: region is worker partitioned but does not contain worker partitioned code} {TODO default 'gang' 'vector'} { xfail *-*-* } subroutine_sub1 }
|
||||
|
||||
!$acc loop ! { dg-line l[incr c] }
|
||||
! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c }
|
||||
! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c }
|
||||
do i = 1, n
|
||||
b(i) = i*2
|
||||
end do
|
||||
end subroutine sub1
|
||||
|
||||
! Allocate array 'b', and set it to all 1.0.
|
||||
|
||||
subroutine sub2
|
||||
use vars
|
||||
use openacc
|
||||
implicit none
|
||||
integer i
|
||||
|
||||
allocate (b(n))
|
||||
!$acc enter data create (b)
|
||||
|
||||
if (.not.allocated (b)) error stop
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc parallel loop ! { dg-line l[incr c] }
|
||||
! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l$c }
|
||||
! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l$c }
|
||||
do i = 1, n
|
||||
b(i) = 1.0
|
||||
end do
|
||||
end subroutine sub2
|
||||
|
||||
! Return b(i) * i;
|
||||
|
||||
real*8 function fun1 (i)
|
||||
use vars
|
||||
implicit none
|
||||
integer i
|
||||
!$acc routine seq
|
||||
|
||||
fun1 = b(i) * i
|
||||
end function fun1
|
||||
|
||||
! Return b(i) * i * i;
|
||||
|
||||
real*8 function fun2 (i)
|
||||
use vars
|
||||
implicit none
|
||||
integer i
|
||||
|
||||
fun2 = b(i) * i * i
|
||||
end function fun2
|
|
@ -0,0 +1,402 @@
|
|||
! Test OpenACC 'declare create' with allocatable arrays.
|
||||
|
||||
! { dg-do run }
|
||||
|
||||
! Note that we're not testing OpenACC semantics here, but rather documenting
|
||||
! current GCC behavior, specifically, behavior concerning updating of
|
||||
! host/device array descriptors.
|
||||
! { dg-skip-if n/a { *-*-* } { -DACC_MEM_SHARED=1 } }
|
||||
|
||||
!TODO-OpenACC-declare-allocate
|
||||
! Missing support for OpenACC "Changes from Version 2.0 to 2.5":
|
||||
! "The 'declare create' directive with a Fortran 'allocatable' has new behavior".
|
||||
! Thus, after 'allocate'/before 'deallocate', do
|
||||
! '!$acc enter data create'/'!$acc exit data delete' manually.
|
||||
|
||||
|
||||
!TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'.
|
||||
|
||||
|
||||
!TODO OpenACC 'serial' vs. GCC/nvptx:
|
||||
!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} }
|
||||
|
||||
|
||||
! { dg-additional-options -fdump-tree-original }
|
||||
! { dg-additional-options -fdump-tree-gimple }
|
||||
|
||||
|
||||
module vars
|
||||
implicit none
|
||||
integer, parameter :: n1_lb = -3
|
||||
integer, parameter :: n1_ub = 6
|
||||
integer, parameter :: n2_lb = -9999
|
||||
integer, parameter :: n2_ub = 22222
|
||||
|
||||
integer, allocatable :: b(:)
|
||||
!$acc declare create (b)
|
||||
|
||||
end module vars
|
||||
|
||||
program test
|
||||
use vars
|
||||
use openacc
|
||||
implicit none
|
||||
integer :: i
|
||||
|
||||
! Identifiers for purposes of reliable '-fdump-tree-[...]' scanning.
|
||||
integer :: id1_1, id1_2
|
||||
|
||||
interface
|
||||
|
||||
subroutine verify_initial
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
end subroutine verify_initial
|
||||
|
||||
subroutine verify_n1_allocated
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
end subroutine verify_n1_allocated
|
||||
|
||||
subroutine verify_n1_values (addend)
|
||||
implicit none
|
||||
!$acc routine gang
|
||||
integer, value :: addend
|
||||
end subroutine verify_n1_values
|
||||
|
||||
subroutine verify_n1_deallocated (expect_allocated)
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
logical, value :: expect_allocated
|
||||
end subroutine verify_n1_deallocated
|
||||
|
||||
subroutine verify_n2_allocated
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
end subroutine verify_n2_allocated
|
||||
|
||||
subroutine verify_n2_values (addend)
|
||||
implicit none
|
||||
!$acc routine gang
|
||||
integer, value :: addend
|
||||
end subroutine verify_n2_values
|
||||
|
||||
subroutine verify_n2_deallocated (expect_allocated)
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
logical, value :: expect_allocated
|
||||
end subroutine verify_n2_deallocated
|
||||
|
||||
end interface
|
||||
|
||||
call acc_create (id1_1)
|
||||
call acc_create (id1_2)
|
||||
|
||||
call verify_initial
|
||||
! It is important here (and similarly, following) that there is no data
|
||||
! clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'.
|
||||
!$acc serial
|
||||
call verify_initial
|
||||
!$acc end serial
|
||||
|
||||
allocate (b(n1_lb:n1_ub))
|
||||
call verify_n1_allocated
|
||||
if (acc_is_present (b)) error stop
|
||||
!$acc enter data create (b)
|
||||
! This is now OpenACC "present":
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
! This still has the initial array descriptor:
|
||||
!$acc serial
|
||||
call verify_initial
|
||||
!$acc end serial
|
||||
|
||||
do i = n1_lb, n1_ub
|
||||
b(i) = i - 1
|
||||
end do
|
||||
|
||||
! Verify that host-to-device copy doesn't touch the device-side (still
|
||||
! initial) array descriptor (but it does copy the array data).
|
||||
call acc_update_device (b)
|
||||
!$acc serial
|
||||
call verify_initial
|
||||
!$acc end serial
|
||||
|
||||
b = 40
|
||||
|
||||
! Verify that device-to-host copy doesn't touch the host-side array
|
||||
! descriptor, doesn't copy out the device-side (still initial) array
|
||||
! descriptor (but it does copy the array data).
|
||||
call acc_update_self (b)
|
||||
call verify_n1_allocated
|
||||
|
||||
do i = n1_lb, n1_ub
|
||||
if (b(i) /= i - 1) error stop
|
||||
b(i) = b(i) + 2
|
||||
end do
|
||||
|
||||
! The same using the OpenACC 'update' directive.
|
||||
|
||||
!$acc update device (b) self (id1_1)
|
||||
! We do have 'GOMP_MAP_TO_PSET' here:
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_to:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1\);$} 1 original } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_to:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
! ..., but it's silently skipped in 'GOACC_update'.
|
||||
!$acc serial
|
||||
call verify_initial
|
||||
!$acc end serial
|
||||
|
||||
b = 41
|
||||
|
||||
!$acc update self (b) self (id1_2)
|
||||
! We do have 'GOMP_MAP_TO_PSET' here:
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_from:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2\);$} 1 original } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_from:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
! ..., but it's silently skipped in 'GOACC_update'.
|
||||
call verify_n1_allocated
|
||||
|
||||
do i = n1_lb, n1_ub
|
||||
if (b(i) /= i + 1) error stop
|
||||
b(i) = b(i) + 2
|
||||
end do
|
||||
|
||||
! Now install the actual array descriptor, via a data clause for 'b'
|
||||
! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in
|
||||
! 'gomp_map_vars_internal' is handled as 'declare target', and because of
|
||||
! '*(void **) hostaddrs[i] != NULL', we've got 'has_always_ptrset == true',
|
||||
! 'always_to_cnt == 1', and therefore 'gomp_map_vars_existing' does update
|
||||
! the 'GOMP_MAP_TO_PSET'.
|
||||
!$acc serial present (b) copyin (id1_1)
|
||||
call verify_initial
|
||||
id1_1 = 0
|
||||
!$acc end serial
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1\)$} 1 original } }
|
||||
!TODO ..., but without an actual use of 'b', the gimplifier removes the
|
||||
!TODO 'GOMP_MAP_TO_PSET':
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
!$acc serial present (b) copyin (id1_2)
|
||||
call verify_n1_allocated
|
||||
!TODO Use of 'b':
|
||||
id1_2 = ubound (b, 1)
|
||||
!$acc end serial
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2\)$} 1 original } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
|
||||
!$acc parallel copyin (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'.
|
||||
call verify_n1_values (1)
|
||||
id1_1 = 0
|
||||
!$acc end parallel
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(to:id1_1\)$} 1 original } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
|
||||
!$acc parallel copy (b) copyin (id1_2)
|
||||
! As already present, 'copy (b)' doesn't copy; addend is still '1'.
|
||||
call verify_n1_values (1)
|
||||
id1_2 = 0
|
||||
!$acc end parallel
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2\)$} 1 original } }
|
||||
!TODO ..., but without an actual use of 'b', the gimplifier removes the
|
||||
!TODO 'GOMP_MAP_TO_PSET':
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
|
||||
call verify_n1_allocated
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc exit data delete (b)
|
||||
if (.not.allocated (b)) error stop
|
||||
if (acc_is_present (b)) error stop
|
||||
! The device-side array descriptor doesn't get updated, so 'b' still appears
|
||||
! as "allocated":
|
||||
!$acc serial
|
||||
call verify_n1_allocated
|
||||
!$acc end serial
|
||||
|
||||
deallocate (b)
|
||||
call verify_n1_deallocated (.false.)
|
||||
! The device-side array descriptor doesn't get updated, so 'b' still appears
|
||||
! as "allocated":
|
||||
!$acc serial
|
||||
call verify_n1_allocated
|
||||
!$acc end serial
|
||||
|
||||
! Now try to install the actual array descriptor, via a data clause for 'b'
|
||||
! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in
|
||||
! 'gomp_map_vars_internal' is handled as 'declare target', but because of
|
||||
! '*(void **) hostaddrs[i] == NULL', we've got 'has_always_ptrset == false',
|
||||
! 'always_to_cnt == 0', and therefore 'gomp_map_vars_existing' doesn't update
|
||||
! the 'GOMP_MAP_TO_PSET'.
|
||||
! The device-side array descriptor doesn't get updated, so 'b' still appears
|
||||
! as "allocated":
|
||||
!TODO Why does 'present (b)' still work here?
|
||||
!$acc serial present (b) copyout (id1_2)
|
||||
call verify_n1_deallocated (.true.)
|
||||
!TODO Use of 'b'.
|
||||
id1_2 = ubound (b, 1)
|
||||
!$acc end serial
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } }
|
||||
! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } }
|
||||
|
||||
|
||||
! Restart the procedure, with different array dimensions.
|
||||
|
||||
allocate (b(n2_lb:n2_ub))
|
||||
call verify_n2_allocated
|
||||
if (acc_is_present (b)) error stop
|
||||
!$acc enter data create (b)
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
! This still has the previous (n1) array descriptor:
|
||||
!$acc serial
|
||||
call verify_n1_deallocated (.true.)
|
||||
!$acc end serial
|
||||
|
||||
do i = n2_lb, n2_ub
|
||||
b(i) = i + 20
|
||||
end do
|
||||
|
||||
call acc_update_device (b)
|
||||
!$acc serial
|
||||
call verify_n1_deallocated (.true.)
|
||||
!$acc end serial
|
||||
|
||||
b = -40
|
||||
|
||||
call acc_update_self (b)
|
||||
call verify_n2_allocated
|
||||
|
||||
do i = n2_lb, n2_ub
|
||||
if (b(i) /= i + 20) error stop
|
||||
b(i) = b(i) - 40
|
||||
end do
|
||||
|
||||
!$acc update device (b)
|
||||
!$acc serial
|
||||
call verify_n1_deallocated (.true.)
|
||||
!$acc end serial
|
||||
|
||||
b = -41
|
||||
|
||||
!$acc update self (b)
|
||||
call verify_n2_allocated
|
||||
|
||||
do i = n2_lb, n2_ub
|
||||
if (b(i) /= i - 20) error stop
|
||||
b(i) = b(i) + 10
|
||||
end do
|
||||
|
||||
!$acc serial present (b) copy (id1_2)
|
||||
call verify_n2_allocated
|
||||
!TODO Use of 'b':
|
||||
id1_2 = ubound (b, 1)
|
||||
!$acc end serial
|
||||
|
||||
!$acc parallel
|
||||
call verify_n2_values (-20)
|
||||
!$acc end parallel
|
||||
|
||||
!$acc parallel copy (b)
|
||||
call verify_n2_values (-20)
|
||||
!$acc end parallel
|
||||
|
||||
call verify_n2_allocated
|
||||
if (.not.acc_is_present (b)) error stop
|
||||
|
||||
!$acc exit data delete (b)
|
||||
if (.not.allocated (b)) error stop
|
||||
if (acc_is_present (b)) error stop
|
||||
!$acc serial
|
||||
call verify_n2_allocated
|
||||
!$acc end serial
|
||||
|
||||
deallocate (b)
|
||||
call verify_n2_deallocated (.false.)
|
||||
!$acc serial
|
||||
call verify_n2_allocated
|
||||
!$acc end serial
|
||||
|
||||
!$acc serial present (b) copy (id1_2)
|
||||
call verify_n2_deallocated (.true.)
|
||||
!TODO Use of 'b':
|
||||
id1_2 = ubound (b, 1)
|
||||
!$acc end serial
|
||||
|
||||
end program test
|
||||
|
||||
|
||||
subroutine verify_initial
|
||||
use vars
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
|
||||
if (allocated (b)) error stop "verify_initial allocated"
|
||||
if (any (lbound (b) /= [0])) error stop "verify_initial lbound"
|
||||
if (any (ubound (b) /= [0])) error stop "verify_initial ubound"
|
||||
end subroutine verify_initial
|
||||
|
||||
subroutine verify_n1_allocated
|
||||
use vars
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
|
||||
if (.not.allocated (b)) error stop "verify_n1_allocated allocated"
|
||||
if (any (lbound (b) /= [n1_lb])) error stop "verify_n1_allocated lbound"
|
||||
if (any (ubound (b) /= [n1_ub])) error stop "verify_n1_allocated ubound"
|
||||
end subroutine verify_n1_allocated
|
||||
|
||||
subroutine verify_n1_values (addend)
|
||||
use vars
|
||||
implicit none
|
||||
!$acc routine gang
|
||||
integer, value :: addend
|
||||
integer :: i
|
||||
|
||||
!$acc loop
|
||||
do i = n1_lb, n1_ub
|
||||
if (b(i) /= i + addend) error stop
|
||||
end do
|
||||
end subroutine verify_n1_values
|
||||
|
||||
subroutine verify_n1_deallocated (expect_allocated)
|
||||
use vars
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
logical, value :: expect_allocated
|
||||
|
||||
if (allocated(b) .neqv. expect_allocated) error stop "verify_n1_deallocated allocated"
|
||||
! Apparently 'deallocate'ing doesn't unset the bounds.
|
||||
if (any (lbound (b) /= [n1_lb])) error stop "verify_n1_deallocated lbound"
|
||||
if (any (ubound (b) /= [n1_ub])) error stop "verify_n1_deallocated ubound"
|
||||
end subroutine verify_n1_deallocated
|
||||
|
||||
subroutine verify_n2_allocated
|
||||
use vars
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
|
||||
if (.not.allocated(b)) error stop "verify_n2_allocated allocated"
|
||||
if (any (lbound (b) /= [n2_lb])) error stop "verify_n2_allocated lbound"
|
||||
if (any (ubound (b) /= [n2_ub])) error stop "verify_n2_allocated ubound"
|
||||
end subroutine verify_n2_allocated
|
||||
|
||||
subroutine verify_n2_values (addend)
|
||||
use vars
|
||||
implicit none
|
||||
!$acc routine gang
|
||||
integer, value :: addend
|
||||
integer :: i
|
||||
|
||||
!$acc loop
|
||||
do i = n2_lb, n2_ub
|
||||
if (b(i) /= i + addend) error stop
|
||||
end do
|
||||
end subroutine verify_n2_values
|
||||
|
||||
subroutine verify_n2_deallocated (expect_allocated)
|
||||
use vars
|
||||
implicit none
|
||||
!$acc routine seq
|
||||
logical, value :: expect_allocated
|
||||
|
||||
if (allocated(b) .neqv. expect_allocated) error stop "verify_n2_deallocated allocated"
|
||||
! Apparently 'deallocate'ing doesn't unset the bounds.
|
||||
if (any (lbound (b) /= [n2_lb])) error stop "verify_n2_deallocated lbound"
|
||||
if (any (ubound (b) /= [n2_ub])) error stop "verify_n2_deallocated ubound"
|
||||
end subroutine verify_n2_deallocated
|
Loading…
Add table
Reference in a new issue