
Currently in OpenACC 'kernels' decomposition, there is special handling of 'GOMP_MAP_FORCE_TOFROM', documented to be done to avoid "internal compiler errors in later passes". For performance reasons, the current repetitive to/from device copying for every region is not ideal, compared to using 'present' clauses, as done for almost all other 'GOMP_MAP_*'. Also, the current special handling (incomplete, evidently) is the reason for the PR104892 misbehavior. For PR100280 etc. we've resolved all such known ICEs -- removing the special handling for 'GOMP_MAP_FORCE_TOFROM' now resolves PR104892. PR middle-end/100280 PR middle-end/104892 gcc/ * omp-oacc-kernels-decompose.cc (omp_oacc_kernels_decompose_1): Remove special handling of 'GOMP_MAP_FORCE_TOFROM'. gcc/testsuite/ * c-c++-common/goacc/kernels-decompose-2.c: Adjust. * c-c++-common/goacc/kernels-decompose-pr100400-1-1.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr100400-1-2.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr100400-1-3.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr100400-1-4.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104061-1-1.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104061-1-2.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104061-1-3.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104061-1-4.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104132-1.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104133-1.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104774-1.c: Likewise. * gfortran.dg/goacc/classify-kernels.f95: Likewise. * gfortran.dg/goacc/kernels-decompose-2.f95: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/declare-vla.c: Adjust. * testsuite/libgomp.oacc-c-c++-common/default-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90: Likewise.
95 lines
3.6 KiB
C
95 lines
3.6 KiB
C
/* Verify OpenACC 'declare' with VLAs. */
|
|
|
|
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
|
|
|
|
/* { dg-additional-options "-fopt-info-omp-all" }
|
|
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
|
|
|
/* { dg-additional-options "--param=openacc-privatization=noisy" }
|
|
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
|
|
Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
|
|
{ dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
|
|
|
|
/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
|
|
passed to 'incr' may be unset, and in that case, it will be set to [...]",
|
|
so to maintain compatibility with earlier Tcl releases, we manually
|
|
initialize counter variables:
|
|
{ dg-line l_dummy[variable c_compute 0] }
|
|
{ dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid
|
|
"WARNING: dg-line var l_dummy defined, but not used". */
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
void
|
|
f (void)
|
|
{
|
|
int N = 1000;
|
|
int i, A[N];
|
|
#pragma acc declare copy(A)
|
|
|
|
for (i = 0; i < N; i++)
|
|
A[i] = -i;
|
|
|
|
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
|
|
/* { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
|
|
{ dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
|
|
/* { dg-note {OpenACC 'kernels' decomposition: variable 'N' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
|
|
{ dg-note {variable 'N' made addressable} {} { target *-*-* } l_compute$c_compute } */
|
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { ! __OPTIMIZE__ } } l_compute$c_compute }
|
|
{ dg-optimized {assigned OpenACC gang loop parallelism} {} { target __OPTIMIZE__ } l_compute$c_compute } */
|
|
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
|
|
for (i = 0; i < N; i++)
|
|
A[i] = i;
|
|
|
|
#pragma acc update host(A)
|
|
|
|
for (i = 0; i < N; i++)
|
|
assert (A[i] == i);
|
|
}
|
|
|
|
|
|
/* The same as 'f' but everything contained in an OpenACC 'data' construct. */
|
|
|
|
void
|
|
f_data (void)
|
|
{
|
|
#pragma acc data
|
|
/* { dg-bogus {note: variable [^\n\r]+ candidate for adjusting OpenACC privatization level} {TODO 'data'} { xfail *-*-* } .-1 } */
|
|
{
|
|
int N = 1000;
|
|
int i, A[N];
|
|
# pragma acc declare copy(A)
|
|
|
|
for (i = 0; i < N; i++)
|
|
A[i] = -i;
|
|
|
|
# pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
|
|
/* { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
|
|
{ dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
|
|
/* { dg-note {OpenACC 'kernels' decomposition: variable 'N' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
|
|
{ dg-note {variable 'N' made addressable} {} { target *-*-* } l_compute$c_compute } */
|
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { ! __OPTIMIZE__ } } l_compute$c_compute }
|
|
{ dg-optimized {assigned OpenACC gang loop parallelism} {} { target __OPTIMIZE__ } l_compute$c_compute } */
|
|
/* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
|
|
for (i = 0; i < N; i++)
|
|
A[i] = i;
|
|
|
|
# pragma acc update host(A)
|
|
|
|
for (i = 0; i < N; i++)
|
|
assert (A[i] == i);
|
|
}
|
|
}
|
|
|
|
|
|
int
|
|
main ()
|
|
{
|
|
f ();
|
|
|
|
f_data ();
|
|
|
|
return 0;
|
|
}
|