libgomp: Add offload_device_gcn check, add requires-4a.c test
Duplicate libgomp.c-c++-common/requires-4.c (as ...-4a.c) but with using a heap-allocated instead of static memory for a variable. This change and the added offload_device_gcn check prepare for pseudo-USM, where the device hardware cannot access all host memory but only managed and pinned memory; for those, requires-4.c will fail and the new check permits to add target { ! { offload_device_nvptx || offload_device_gcn } } to requires-4.c; however, it has not been added yet as pseuo-USM support is not yet on mainline. (Review is pending for the USM patches.) include/ChangeLog: * gomp-constants.h (GOMP_DEVICE_HSA): Comment out unused define. libgomp/ChangeLog: * testsuite/lib/libgomp.exp (check_effective_target_offload_device_gcn): New. * testsuite/libgomp.c-c++-common/on_device_arch.h (device_arch_gcn, on_device_arch_gcn): New. * testsuite/libgomp.c-c++-common/requires-4a.c: New test; copied from requires-4.c but using heap-allocated memory.
This commit is contained in:
parent
5362b5cc8d
commit
12d9f5afbd
4 changed files with 65 additions and 1 deletions
|
@ -230,7 +230,7 @@ enum gomp_map_kind
|
|||
#define GOMP_DEVICE_NOT_HOST 4
|
||||
#define GOMP_DEVICE_NVIDIA_PTX 5
|
||||
#define GOMP_DEVICE_INTEL_MIC 6
|
||||
#define GOMP_DEVICE_HSA 7
|
||||
/* #define GOMP_DEVICE_HSA 7 removed. */
|
||||
#define GOMP_DEVICE_GCN 8
|
||||
|
||||
/* We have a compatibility issue. OpenMP 5.2 introduced
|
||||
|
|
|
@ -415,6 +415,18 @@ proc check_effective_target_offload_device_nvptx { } {
|
|||
} ]
|
||||
}
|
||||
|
||||
# Return 1 if using a GCN offload device.
|
||||
proc check_effective_target_offload_device_gcn { } {
|
||||
return [check_runtime_nocache offload_device_gcn {
|
||||
#include <omp.h>
|
||||
#include "testsuite/libgomp.c-c++-common/on_device_arch.h"
|
||||
int main ()
|
||||
{
|
||||
return !on_device_arch_gcn ();
|
||||
}
|
||||
} ]
|
||||
}
|
||||
|
||||
# Return 1 if at least one Nvidia GPU is accessible.
|
||||
|
||||
proc check_effective_target_openacc_nvidia_accel_present { } {
|
||||
|
|
|
@ -7,6 +7,12 @@ device_arch_nvptx (void)
|
|||
return GOMP_DEVICE_NVIDIA_PTX;
|
||||
}
|
||||
|
||||
/* static */ int
|
||||
device_arch_gcn (void)
|
||||
{
|
||||
return GOMP_DEVICE_GCN;
|
||||
}
|
||||
|
||||
/* static */ int
|
||||
device_arch_intel_mic (void)
|
||||
{
|
||||
|
@ -14,6 +20,7 @@ device_arch_intel_mic (void)
|
|||
}
|
||||
|
||||
#pragma omp declare variant (device_arch_nvptx) match(construct={target},device={arch(nvptx)})
|
||||
#pragma omp declare variant (device_arch_gcn) match(construct={target},device={arch(gcn)})
|
||||
#pragma omp declare variant (device_arch_intel_mic) match(construct={target},device={arch(intel_mic)})
|
||||
/* static */ int
|
||||
device_arch (void)
|
||||
|
@ -37,6 +44,12 @@ on_device_arch_nvptx ()
|
|||
return on_device_arch (GOMP_DEVICE_NVIDIA_PTX);
|
||||
}
|
||||
|
||||
int
|
||||
on_device_arch_gcn ()
|
||||
{
|
||||
return on_device_arch (GOMP_DEVICE_GCN);
|
||||
}
|
||||
|
||||
int
|
||||
on_device_arch_intel_mic ()
|
||||
{
|
||||
|
|
39
libgomp/testsuite/libgomp.c-c++-common/requires-4a.c
Normal file
39
libgomp/testsuite/libgomp.c-c++-common/requires-4a.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* { dg-additional-options "-flto" } */
|
||||
/* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */
|
||||
/* { dg-additional-sources requires-4-aux.c } */
|
||||
|
||||
/* Same as requires-4.c, but uses heap memory for 'a'. */
|
||||
|
||||
/* Check no diagnostic by device-compiler's or host compiler's lto1.
|
||||
Other file uses: 'requires reverse_offload', but that's inactive as
|
||||
there are no declare target directives, device constructs nor device routines */
|
||||
|
||||
/* Depending on offload device capabilities, it may print something like the
|
||||
following (only) if GOMP_DEBUG=1:
|
||||
"devices present but 'omp requires unified_address, unified_shared_memory, reverse_offload' cannot be fulfilled"
|
||||
and in that case does host-fallback execution.
|
||||
|
||||
No offload devices support USM at present, so we may verify host-fallback
|
||||
execution by presence of separate memory spaces. */
|
||||
|
||||
#pragma omp requires unified_address,unified_shared_memory
|
||||
|
||||
int *a;
|
||||
extern void foo (void);
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
a = (int *) __builtin_calloc (sizeof (int), 10);
|
||||
#pragma omp target map(to: a)
|
||||
for (int i = 0; i < 10; i++)
|
||||
a[i] = i;
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
if (a[i] != i)
|
||||
__builtin_abort ();
|
||||
|
||||
foo ();
|
||||
__builtin_free (a);
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue