
Currently in OpenACC 'kernels' decomposition, there is special handling of 'GOMP_MAP_FORCE_TOFROM', documented to be done to avoid "internal compiler errors in later passes". For performance reasons, the current repetitive to/from device copying for every region is not ideal, compared to using 'present' clauses, as done for almost all other 'GOMP_MAP_*'. Also, the current special handling (incomplete, evidently) is the reason for the PR104892 misbehavior. For PR100280 etc. we've resolved all such known ICEs -- removing the special handling for 'GOMP_MAP_FORCE_TOFROM' now resolves PR104892. PR middle-end/100280 PR middle-end/104892 gcc/ * omp-oacc-kernels-decompose.cc (omp_oacc_kernels_decompose_1): Remove special handling of 'GOMP_MAP_FORCE_TOFROM'. gcc/testsuite/ * c-c++-common/goacc/kernels-decompose-2.c: Adjust. * c-c++-common/goacc/kernels-decompose-pr100400-1-1.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr100400-1-2.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr100400-1-3.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr100400-1-4.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104061-1-1.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104061-1-2.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104061-1-3.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104061-1-4.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104132-1.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104133-1.c: Likewise. * c-c++-common/goacc/kernels-decompose-pr104774-1.c: Likewise. * gfortran.dg/goacc/classify-kernels.f95: Likewise. * gfortran.dg/goacc/kernels-decompose-2.f95: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/declare-vla.c: Adjust. * testsuite/libgomp.oacc-c-c++-common/default-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90: Likewise.
39 lines
1.5 KiB
C
39 lines
1.5 KiB
C
/* Verify that a simple, explicit acc loop reduction works inside
|
|
a kernels region. */
|
|
|
|
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
|
|
|
|
/* { dg-additional-options "-fopt-info-all-omp" }
|
|
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
|
|
|
/* { dg-additional-options "--param=openacc-privatization=noisy" }
|
|
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
|
|
Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
|
|
{ dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
|
|
|
|
#include <stdlib.h>
|
|
|
|
#define N 100
|
|
|
|
int
|
|
main ()
|
|
{
|
|
int i, red = 0;
|
|
|
|
#pragma acc kernels /* { dg-line l_compute1 } */
|
|
/* { dg-note {OpenACC 'kernels' decomposition: variable 'red' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute1 }
|
|
{ dg-note {variable 'red' made addressable} {} { target *-*-* } l_compute1 } */
|
|
{
|
|
#pragma acc loop reduction (+:red) /* { dg-line l_loop_i1 } */
|
|
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i1 } */
|
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 } */
|
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i1 } */
|
|
for (i = 0; i < N; i++)
|
|
red++;
|
|
}
|
|
|
|
if (red != N)
|
|
abort ();
|
|
|
|
return 0;
|
|
}
|