
When gcc is configured for nvptx offloading with --without-cuda-driver and full CUDA isn't installed, many libgomp.oacc-*/* tests fail, some of them because cuda.h header can't be found, others because the tests can't be linked against -lcuda, -lcudart or -lcublas. I usually only have akmod-nvidia and xorg-x11-drv-nvidia-cuda rpms installed, so libcuda.so.1 can be dlopened and the offloading works, but linking against those libraries isn't possible nor are the headers around (for the plugin itself there is the fallback libgomp/plugin/cuda/cuda.h). The following patch adds 3 new effective targets and uses them in tests that needs those. 2021-05-27 Jakub Jelinek <jakub@redhat.com> * testsuite/lib/libgomp.exp (check_effective_target_openacc_cuda, check_effective_target_openacc_cublas, check_effective_target_openacc_cudart): New. * testsuite/libgomp.oacc-fortran/host_data-4.f90: Require effective target openacc_cublas. * testsuite/libgomp.oacc-fortran/host_data-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/host_data-3.f: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-91.c: Require effective target openacc_cuda. * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-90.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/pr87835.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-83.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-84.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: Require effective targets openacc_cublas and openacc_cudart. * testsuite/libgomp.oacc-c-c++-common/context-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c: Require effective target openacc_cudart. * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Add -DUSE_CUDA_H for effective target openacc_cuda and add && defined USE_CUDA_H to preprocessor conditionals. Guard -lcuda also on openacc_cuda effective target.
106 lines
2 KiB
C
106 lines
2 KiB
C
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
|
/* { dg-additional-options "-lm -lcuda -lcublas -lcudart -Wall -Wextra" } */
|
|
/* { dg-require-effective-target openacc_cublas } */
|
|
/* { dg-require-effective-target openacc_cudart } */
|
|
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
#include <openacc.h>
|
|
#include <cuda.h>
|
|
#include <cuda_runtime_api.h>
|
|
#include <cublas_v2.h>
|
|
|
|
#pragma acc routine
|
|
void
|
|
saxpy (int n, float a, float *x, float *y)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < n; i++)
|
|
y[i] = y[i] + a * x[i];
|
|
}
|
|
|
|
void
|
|
validate_results (int n, float *a, float *b)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < n; i++)
|
|
if (fabs (a[i] - b[i]) > .00001)
|
|
abort ();
|
|
}
|
|
|
|
int
|
|
main()
|
|
{
|
|
#define N 8
|
|
int i;
|
|
float x_ref[N], y_ref[N];
|
|
float x[N], y[N];
|
|
cublasHandle_t h;
|
|
float a = 2.0;
|
|
|
|
for (i = 0; i < N; i++)
|
|
{
|
|
x[i] = x_ref[i] = 4.0 + i;
|
|
y[i] = y_ref[i] = 3.0;
|
|
}
|
|
|
|
saxpy (N, a, x_ref, y_ref);
|
|
|
|
cublasCreate (&h);
|
|
|
|
#pragma acc data copyin (x[0:N]) copy (y[0:N])
|
|
{
|
|
#pragma acc host_data use_device (x, y)
|
|
{
|
|
cublasSaxpy (h, N, &a, x, 1, y, 1);
|
|
}
|
|
}
|
|
|
|
validate_results (N, y, y_ref);
|
|
|
|
#pragma acc data create (x[0:N]) copyout (y[0:N])
|
|
{
|
|
#pragma acc kernels
|
|
for (i = 0; i < N; i++)
|
|
y[i] = 3.0;
|
|
|
|
#pragma acc host_data use_device (x, y)
|
|
{
|
|
cublasSaxpy (h, N, &a, x, 1, y, 1);
|
|
}
|
|
}
|
|
|
|
cublasDestroy (h);
|
|
|
|
validate_results (N, y, y_ref);
|
|
|
|
for (i = 0; i < N; i++)
|
|
y[i] = 3.0;
|
|
|
|
/* There's no need to use host_data here. */
|
|
#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
|
|
{
|
|
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
|
|
saxpy (N, a, x, y);
|
|
}
|
|
|
|
validate_results (N, y, y_ref);
|
|
|
|
/* Exercise host_data with data transferred with acc enter data. */
|
|
|
|
for (i = 0; i < N; i++)
|
|
y[i] = 3.0;
|
|
|
|
#pragma acc enter data copyin (x, a, y)
|
|
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
|
|
{
|
|
saxpy (N, a, x, y);
|
|
}
|
|
#pragma acc exit data delete (x, a) copyout (y)
|
|
|
|
validate_results (N, y, y_ref);
|
|
|
|
return 0;
|
|
}
|