
Rather than rubber-stamp whatever requested vs. actual device kernel launch configuration happens, actually (again) verify the requested values (modulo expected variations). This better highlights that "AMD GCN has an upper limit of 'num_workers(16)'", and the deficiency that "AMD GCN uses the autovectorizer for the vector dimension: the use of a function call in vector-partitioned code [...] is not currently supported". And, this removes several instances of race conditions, where variables are concurrently written to in OpenACC gang-redundant mode. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Strengthen. * testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Likewise.
81 lines
2.5 KiB
C
81 lines
2.5 KiB
C
/* { dg-additional-options "-fopt-info-note-omp" }
|
|
{ dg-additional-options "--param=openacc-privatization=noisy" }
|
|
{ dg-additional-options "-foffload=-fopt-info-note-omp" }
|
|
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
|
|
for testing/documenting aspects of that functionality. */
|
|
|
|
/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
|
|
aspects of that functionality. */
|
|
|
|
#include <stdio.h>
|
|
#include <openacc.h>
|
|
#include <gomp-constants.h>
|
|
|
|
#define N (32*32*32+17)
|
|
int main ()
|
|
{
|
|
int ary[N];
|
|
int ix;
|
|
int exit = 0;
|
|
int ondev = 0;
|
|
int workersize;
|
|
|
|
for (ix = 0; ix < N;ix++)
|
|
ary[ix] = -1;
|
|
|
|
#define NW 32
|
|
#define VL 32
|
|
#pragma acc parallel num_workers(NW) vector_length(VL) \
|
|
copy(ary) copy(ondev)
|
|
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
|
|
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } .-3 } */
|
|
{
|
|
#pragma acc loop worker
|
|
/* { dg-note {variable 'ix' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */
|
|
/* { dg-note {variable 'g' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
|
|
/* { dg-note {variable 'w' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */
|
|
/* { dg-note {variable 'v' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-4 } */
|
|
for (unsigned ix = 0; ix < N; ix++)
|
|
{
|
|
if (acc_on_device (acc_device_not_host))
|
|
{
|
|
int g, w, v;
|
|
|
|
g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
|
|
w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
|
|
v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
|
|
ary[ix] = (g << 16) | (w << 8) | v;
|
|
ondev = 1;
|
|
}
|
|
else
|
|
ary[ix] = ix;
|
|
}
|
|
}
|
|
workersize = NW;
|
|
#ifdef ACC_DEVICE_TYPE_radeon
|
|
/* AMD GCN has an upper limit of 'num_workers(16)'. */
|
|
if (workersize > 16)
|
|
workersize = 16;
|
|
#endif
|
|
|
|
for (ix = 0; ix < N; ix++)
|
|
{
|
|
int expected = ix;
|
|
if(ondev)
|
|
{
|
|
int g = 0;
|
|
int w = ix % workersize;
|
|
int v = 0;
|
|
|
|
expected = (g << 16) | (w << 8) | v;
|
|
}
|
|
|
|
if (ary[ix] != expected)
|
|
{
|
|
exit = 1;
|
|
printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
|
|
}
|
|
}
|
|
|
|
return exit;
|
|
}
|