gcc/libgomp/testsuite/lib/libgomp.exp
Thomas Schwinge 6c3b30ef9e Support parallel testing in libgomp, part II [PR66005]
..., and enable if 'flock' is available for serializing execution testing.

Regarding the default of 19 parallel slots, this turned out to be a local
minimum for wall time when testing this on:

    $ uname -srvi
    Linux 4.2.0-42-generic #49~14.04.1-Ubuntu SMP Wed Jun 29 20:22:11 UTC 2016 x86_64
    $ grep '^model name' < /proc/cpuinfo | uniq -c
         32 model name      : Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz

... in two configurations: case (a) standard configuration, no offloading
configured, case (b) offloading for GCN and nvptx configured but no devices
available.  For both cases, default plus '-m32' variant.

    $ \time make check-target-libgomp RUNTESTFLAGS="--target_board=unix\{,-m32\}"

Case (a), baseline:

    6432.23user 332.38system 47:32.28elapsed 237%CPU (0avgtext+0avgdata 505044maxresident)k
    6382.43user 319.21system 47:06.04elapsed 237%CPU (0avgtext+0avgdata 505172maxresident)k

This is what people have been complaining about, rightly so, in
<https://gcc.gnu.org/PR66005> "libgomp make check time is excessive" and
elsewhere.

Case (a), parallelized:

    -j12 GCC_TEST_PARALLEL_SLOTS=10
    3088.49user 267.74system 6:43.82elapsed 831%CPU (0avgtext+0avgdata 505188maxresident)k
    -j15 GCC_TEST_PARALLEL_SLOTS=15
    3308.08user 294.79system 5:56.04elapsed 1011%CPU (0avgtext+0avgdata 505360maxresident)k
    -j17 GCC_TEST_PARALLEL_SLOTS=17
    3539.93user 298.99system 5:27.86elapsed 1170%CPU (0avgtext+0avgdata 505112maxresident)k
    -j18 GCC_TEST_PARALLEL_SLOTS=18
    3697.50user 317.18system 5:14.63elapsed 1275%CPU (0avgtext+0avgdata 505360maxresident)k
    -j19 GCC_TEST_PARALLEL_SLOTS=19
    3765.94user 324.27system 5:13.22elapsed 1305%CPU (0avgtext+0avgdata 505128maxresident)k
    -j20 GCC_TEST_PARALLEL_SLOTS=20
    3684.66user 312.32system 5:15.26elapsed 1267%CPU (0avgtext+0avgdata 505100maxresident)k
    -j23 GCC_TEST_PARALLEL_SLOTS=23
    4040.59user 347.10system 5:29.12elapsed 1333%CPU (0avgtext+0avgdata 505200maxresident)k
    -j26 GCC_TEST_PARALLEL_SLOTS=26
    3973.24user 377.96system 5:24.70elapsed 1340%CPU (0avgtext+0avgdata 505160maxresident)k
    -j32 GCC_TEST_PARALLEL_SLOTS=32
    4004.42user 346.10system 5:16.11elapsed 1376%CPU (0avgtext+0avgdata 505160maxresident)k

Yay!

Case (b), baseline; 2+ h:

    7227.58user 700.54system 2:14:33elapsed 98%CPU (0avgtext+0avgdata 994264maxresident)k

Case (b), parallelized:

    -j12 GCC_TEST_PARALLEL_SLOTS=10
    7377.46user 777.52system 16:06.63elapsed 843%CPU (0avgtext+0avgdata 994344maxresident)k
    -j15 GCC_TEST_PARALLEL_SLOTS=15
    8019.18user 721.42system 12:13.56elapsed 1191%CPU (0avgtext+0avgdata 994228maxresident)k
    -j17 GCC_TEST_PARALLEL_SLOTS=17
    8530.11user 716.95system 10:45.92elapsed 1431%CPU (0avgtext+0avgdata 994176maxresident)k
    -j18 GCC_TEST_PARALLEL_SLOTS=18
    8776.79user 645.89system 10:27.20elapsed 1502%CPU (0avgtext+0avgdata 994248maxresident)k
    -j19 GCC_TEST_PARALLEL_SLOTS=19
    9332.37user 641.76system 10:15.09elapsed 1621%CPU (0avgtext+0avgdata 994260maxresident)k
    -j20 GCC_TEST_PARALLEL_SLOTS=20
    9609.54user 789.88system 10:26.94elapsed 1658%CPU (0avgtext+0avgdata 994284maxresident)k
    -j23 GCC_TEST_PARALLEL_SLOTS=23
    10362.40user 911.14system 10:44.47elapsed 1749%CPU (0avgtext+0avgdata 994208maxresident)k
    -j26 GCC_TEST_PARALLEL_SLOTS=26
    11159.44user 850.99system 11:09.25elapsed 1794%CPU (0avgtext+0avgdata 994256maxresident)k
    -j32 GCC_TEST_PARALLEL_SLOTS=32
    11453.50user 939.52system 11:00.38elapsed 1876%CPU (0avgtext+0avgdata 994240maxresident)k

On my Dell Precision 7530 laptop:

    $ uname -srvi
    Linux 5.15.0-71-generic #78-Ubuntu SMP Tue Apr 18 09:00:29 UTC 2023 x86_64
    $ grep '^model name' < /proc/cpuinfo | uniq -c
         12 model name      : Intel(R) Core(TM) i7-8850H CPU @ 2.60GHz
    $ nvidia-smi -L
    GPU 0: Quadro P1000 (UUID: GPU-e043973b-b52a-d02b-c066-a8fdbf64e8ea)

... in two configurations: case (c) standard configuration, no offloading
configured, case (d) offloading for nvptx configured and device available.
For both cases, only default variant, no '-m32'.

    $ \time make check-target-libgomp

Case (c), baseline; roughly half of case (a) (just one variant):

    1180.98user 110.80system 19:36.40elapsed 109%CPU (0avgtext+0avgdata 505148maxresident)k
    1133.22user 111.08system 19:35.75elapsed 105%CPU (0avgtext+0avgdata 505212maxresident)k

Case (c), parallelized:

    -j12 GCC_TEST_PARALLEL_SLOTS=2
    1143.83user 110.76system 10:20.46elapsed 202%CPU (0avgtext+0avgdata 505216maxresident)k
    -j12 GCC_TEST_PARALLEL_SLOTS=6
    1737.08user 143.94system 4:59.48elapsed 628%CPU (0avgtext+0avgdata 505200maxresident)k
    1730.31user 143.02system 4:58.75elapsed 627%CPU (0avgtext+0avgdata 505152maxresident)k
    -j12 GCC_TEST_PARALLEL_SLOTS=8
    2192.63user 169.34system 4:52.96elapsed 806%CPU (0avgtext+0avgdata 505216maxresident)k
    2219.04user 167.67system 4:53.19elapsed 814%CPU (0avgtext+0avgdata 505152maxresident)k
    -j12 GCC_TEST_PARALLEL_SLOTS=10
    2463.93user 184.98system 4:48.39elapsed 918%CPU (0avgtext+0avgdata 505200maxresident)k
    2455.62user 183.68system 4:47.40elapsed 918%CPU (0avgtext+0avgdata 505216maxresident)k
    -j12 GCC_TEST_PARALLEL_SLOTS=12
    2591.04user 192.64system 4:44.98elapsed 976%CPU (0avgtext+0avgdata 505216maxresident)k
    2581.23user 195.21system 4:47.51elapsed 965%CPU (0avgtext+0avgdata 505212maxresident)k
    -j20 GCC_TEST_PARALLEL_SLOTS=20 [oversubscribe]
    2613.18user 199.51system 4:44.06elapsed 990%CPU (0avgtext+0avgdata 505216maxresident)k

Case (d), baseline (compared to case (b): only nvptx offloading compilation,
but also nvptx offloading execution); ~1 h:

    2841.93user 653.68system 1:02:26elapsed 93%CPU (0avgtext+0avgdata 909792maxresident)k
    2842.03user 654.39system 1:02:24elapsed 93%CPU (0avgtext+0avgdata 909880maxresident)k

Case (d), parallelized:

    -j12 GCC_TEST_PARALLEL_SLOTS=2
    2856.39user 606.87system 33:58.64elapsed 169%CPU (0avgtext+0avgdata 909948maxresident)k
    -j12 GCC_TEST_PARALLEL_SLOTS=6
    3444.90user 666.86system 18:37.57elapsed 367%CPU (0avgtext+0avgdata 909856maxresident)k
    3462.13user 667.13system 18:36.87elapsed 369%CPU (0avgtext+0avgdata 909872maxresident)k
    -j12 GCC_TEST_PARALLEL_SLOTS=8
    3929.74user 716.22system 18:02.36elapsed 429%CPU (0avgtext+0avgdata 909832maxresident)k
    -j12 GCC_TEST_PARALLEL_SLOTS=10
    4152.84user 736.16system 17:43.05elapsed 459%CPU (0avgtext+0avgdata 909872maxresident)k
    -j12 GCC_TEST_PARALLEL_SLOTS=12
    4209.60user 749.00system 17:35.20elapsed 469%CPU (0avgtext+0avgdata 909840maxresident)k
    -j20 GCC_TEST_PARALLEL_SLOTS=20 [oversubscribe]
    4255.54user 756.78system 17:29.06elapsed 477%CPU (0avgtext+0avgdata 909868maxresident)k

Worth noting is that with nvptx offloading, there is one execution test case
that times out ('libgomp.fortran/reverse-offload-5.f90').  This effectively
stalls progress for almost 5 min: quickly other executions test cases queue up
on the lock for all parallel slots.  That's working as expected; just noting
this as it accordingly does skew the wall time numbers.

	PR testsuite/66005
	libgomp/
	* configure.ac: Look for 'flock'.
	* testsuite/Makefile.am (gcc_test_parallel_slots): Enable parallel testing.
	* testsuite/config/default.exp: Don't 'load_lib "standard.exp"' here...
	* testsuite/lib/libgomp.exp: ... but here, instead.
	(libgomp_load): Override for parallel testing.
	* testsuite/libgomp-site-extra.exp.in (FLOCK): Set.
	* configure: Regenerate.
	* Makefile.in: Regenerate.
	* testsuite/Makefile.in: Regenerate.
2023-05-15 12:11:18 +02:00

583 lines
17 KiB
Text

# Damn dejagnu for not having proper library search paths for load_lib.
# We have to explicitly load everything that gcc-dg.exp wants to load.
proc load_gcc_lib { filename } {
global srcdir loaded_libs
load_file $srcdir/../../gcc/testsuite/lib/$filename
set loaded_libs($filename) ""
}
load_lib dg.exp
load_lib standard.exp
# Required to use gcc-dg.exp - however, the latter should NOT be
# loaded until ${tool}_target_compile is defined since it uses that
# to determine default LTO options.
load_gcc_lib multiline.exp
load_gcc_lib prune.exp
load_gcc_lib target-libpath.exp
load_gcc_lib wrapper.exp
load_gcc_lib target-supports.exp
load_gcc_lib target-utils.exp
load_gcc_lib gcc-defs.exp
load_gcc_lib timeout.exp
load_gcc_lib file-format.exp
load_gcc_lib target-supports-dg.exp
load_gcc_lib scanasm.exp
load_gcc_lib scandump.exp
load_gcc_lib scanlang.exp
load_gcc_lib scanrtl.exp
load_gcc_lib scansarif.exp
load_gcc_lib scantree.exp
load_gcc_lib scanltranstree.exp
load_gcc_lib scanoffload.exp
load_gcc_lib scanoffloadipa.exp
load_gcc_lib scanoffloadtree.exp
load_gcc_lib scanoffloadrtl.exp
load_gcc_lib scanipa.exp
load_gcc_lib scanwpaipa.exp
load_gcc_lib timeout-dg.exp
load_gcc_lib torture-options.exp
load_gcc_lib fortran-modules.exp
# Try to load a test support file, built during libgomp configuration.
# Search in '..' vs. '.' to support parallel vs. sequential testing.
if [info exists ::env(GCC_RUNTEST_PARALLELIZE_DIR)] {
load_file ../libgomp-test-support.exp
} else {
load_file libgomp-test-support.exp
}
set dg-do-what-default run
set libgomp_compile_options ""
#
# libgomp_init
#
if [info exists TOOL_OPTIONS] {
set multilibs [get_multilibs $TOOL_OPTIONS]
} else {
set multilibs [get_multilibs]
}
proc libgomp_init { args } {
global srcdir blddir objdir tool_root_dir
global libgomp_initialized
global tmpdir
global gluefile wrap_flags
global ALWAYS_CFLAGS
global CFLAGS
global TOOL_EXECUTABLE TOOL_OPTIONS
global GCC_UNDER_TEST GXX_UNDER_TEST GFORTRAN_UNDER_TEST
global TESTING_IN_BUILD_TREE
global target_triplet
global always_ld_library_path
set blddir [lookfor_file [get_multilibs] libgomp]
# We set LC_ALL and LANG to C so that we get the same error
# messages as expected.
setenv LC_ALL C
setenv LANG C
# Many hosts now default to a non-ASCII C locale, however, so
# they can set a charset encoding here if they need.
if { [ishost "*-*-cygwin*"] } {
setenv LC_ALL C.ASCII
setenv LANG C.ASCII
}
if { $blddir != "" } {
# Fix up '-funconfigured-libstdc++-v3' in 'GXX_UNDER_TEST' (see
# '../../configure.ac').
set flags_file "${blddir}/../libstdc++-v3/scripts/testsuite_flags"
if { [file exists $flags_file] } {
set flags [exec sh $flags_file --build-includes]
verbose -log "GXX_UNDER_TEST = $GXX_UNDER_TEST"
set GXX_UNDER_TEST [string map [list \
" -funconfigured-libstdc++-v3 " " $flags " \
] $GXX_UNDER_TEST]
verbose -log "GXX_UNDER_TEST = $GXX_UNDER_TEST"
}
}
if ![info exists GCC_UNDER_TEST] then {
if [info exists TOOL_EXECUTABLE] {
set GCC_UNDER_TEST $TOOL_EXECUTABLE
} else {
set GCC_UNDER_TEST "[find_gcc]"
}
# Only if we're guessing 'GCC_UNDER_TEST', we're also going to guess
# 'GXX_UNDER_TEST', 'GFORTRAN_UNDER_TEST'.
if ![info exists GXX_UNDER_TEST] then {
if [info exists TOOL_EXECUTABLE] {
set GXX_UNDER_TEST $TOOL_EXECUTABLE
} else {
set GXX_UNDER_TEST "[find_g++]"
}
} else {
error "GXX_UNDER_TEST set but not GCC_UNDER_TEST"
}
if ![info exists GFORTRAN_UNDER_TEST] then {
if [info exists TOOL_EXECUTABLE] {
set GFORTRAN_UNDER_TEST $TOOL_EXECUTABLE
} else {
set GFORTRAN_UNDER_TEST "[find_gfortran]"
}
} else {
error "GFORTRAN_UNDER_TEST set but not GCC_UNDER_TEST"
}
}
if ![info exists tmpdir] {
set tmpdir "/tmp"
}
if [info exists gluefile] {
unset gluefile
}
if {![info exists CFLAGS]} {
set CFLAGS ""
}
# Locate libgcc.a so we don't need to account for different values of
# SHLIB_EXT on different platforms
set gccdir [lookfor_file $tool_root_dir gcc/libgcc.a]
if {$gccdir != ""} {
set gccdir [file dirname $gccdir]
}
# Compute what needs to be put into LD_LIBRARY_PATH
set always_ld_library_path "."
global offload_additional_lib_paths
if { $offload_additional_lib_paths != "" } {
append always_ld_library_path "${offload_additional_lib_paths}"
}
# Compute what needs to be added to the existing LD_LIBRARY_PATH.
if {$gccdir != ""} {
# Add AIX pthread directory first.
if { [llength [glob -nocomplain ${gccdir}/pthread/libgcc_s*.a]] >= 1 } {
append always_ld_library_path ":${gccdir}/pthread"
}
append always_ld_library_path ":${gccdir}"
set compiler [lindex $GCC_UNDER_TEST 0]
if { [is_remote host] == 0 && [which $compiler] != 0 } {
foreach i "[exec $compiler --print-multi-lib]" {
set mldir ""
regexp -- "\[a-z0-9=_/\.-\]*;" $i mldir
set mldir [string trimright $mldir "\;@"]
if { "$mldir" == "." } {
continue
}
if { [llength [glob -nocomplain ${gccdir}/${mldir}/libgcc_s*.so.*]] >= 1 } {
append always_ld_library_path ":${gccdir}/${mldir}"
}
}
}
}
set ALWAYS_CFLAGS ""
if { $blddir != "" } {
lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/"
# targets that use libgomp.a%s in their specs need a -B option
# for uninstalled testing.
lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs"
lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}"
lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs"
append always_ld_library_path ":${blddir}/.libs"
}
# The top-level include directory, for gomp-constants.h.
lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include"
lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.."
# For build-tree testing, also consider the library paths used for builing.
# For installed testing, we assume all that to be provided in the sysroot.
if { $blddir != "" } {
# The `-fopenacc' and `-fopenmp' options imply `-pthread', and
# that implies `-latomic' on some hosts, so wire in libatomic
# build directories.
if [ishost "riscv*-*-linux*"] {
set shlib_ext [get_shlib_extension]
set atomic_library_path "${blddir}/../libatomic/.libs"
if { [file exists "${atomic_library_path}/libatomic.a"]
|| [file exists \
"${atomic_library_path}/libatomic.${shlib_ext}"] } {
lappend ALWAYS_CFLAGS \
"additional_flags=-L${atomic_library_path}"
append always_ld_library_path ":${atomic_library_path}"
}
}
}
# We use atomic operations in the testcases to validate results.
if { ([istarget i?86-*-*] || [istarget x86_64-*-*])
&& [check_effective_target_ia32]
&& ![check_effective_target_cas_char] } {
lappend ALWAYS_CFLAGS "additional_flags=-march=i486"
}
if [istarget *-*-darwin*] {
lappend ALWAYS_CFLAGS "additional_flags=-shared-libgcc"
}
if [istarget sparc*-*-*] {
lappend ALWAYS_CFLAGS "additional_flags=-mcpu=v9"
}
if [info exists TOOL_OPTIONS] {
lappend ALWAYS_CFLAGS "additional_flags=$TOOL_OPTIONS"
}
# Make sure that lines are not wrapped. That can confuse the
# error-message parsing machinery.
lappend ALWAYS_CFLAGS "additional_flags=-fmessage-length=0"
# Disable caret
lappend ALWAYS_CFLAGS "additional_flags=-fno-diagnostics-show-caret"
# Disable color diagnostics
lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-color=never"
# Help GCC to find offload compilers' 'mkoffload'.
global offload_additional_options
if { $offload_additional_options != "" } {
lappend ALWAYS_CFLAGS "additional_flags=${offload_additional_options}"
}
# Tell warning from error diagnostics. This fits for C, C++, and Fortran.
global gcc_warning_prefix
set gcc_warning_prefix "\[Ww\]arning:"
global gcc_error_prefix
set gcc_error_prefix "(\[Ff\]atal )?\[Ee\]rror:"
}
#
# libgomp_target_compile -- compile a source file
#
proc libgomp_target_compile { source dest type options } {
global blddir
global libgomp_compile_options
global gluefile wrap_flags
global ALWAYS_CFLAGS
global GCC_UNDER_TEST
global lang_source_re lang_include_flags
if { [info exists lang_include_flags] \
&& [regexp ${lang_source_re} ${source}] } {
lappend options "additional_flags=${lang_include_flags}"
}
global lang_library_paths
if { [info exists lang_library_paths] } {
foreach lang_library_path $lang_library_paths {
# targets that use lib[...].a%s in their specs need a -B option
# for uninstalled testing.
lappend options "additional_flags=-B${blddir}/${lang_library_path}"
lappend options "ldflags=-L${blddir}/${lang_library_path}"
}
}
global lang_link_flags
if { [info exists lang_link_flags] } {
lappend options "ldflags=${lang_link_flags}"
}
if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
lappend options "libs=${gluefile}"
lappend options "ldflags=${wrap_flags}"
}
lappend options "additional_flags=[libio_include_flags]"
lappend options "timeout=[timeout_value]"
set options [concat $libgomp_compile_options $options]
if [info exists ALWAYS_CFLAGS] {
set options [concat "$ALWAYS_CFLAGS" $options]
}
set options [dg-additional-files-options $options $source]
set result [target_compile $source $dest $type $options]
return $result
}
proc libgomp_option_help { } {
send_user " --additional_options,OPTIONS\t\tUse OPTIONS to compile the testcase files. OPTIONS should be comma-separated.\n"
}
proc libgomp_option_proc { option } {
if [regexp "^--additional_options," $option] {
global libgomp_compile_options
regsub "--additional_options," $option "" option
foreach x [split $option ","] {
lappend libgomp_compile_options "additional_flags=$x"
}
return 1
} else {
return 0
}
}
if ![info exists ::env(GCC_RUNTEST_PARALLELIZE_DIR)] {
# No parallel testing.
} elseif { $FLOCK == "" } {
# Using just one parallel slot.
} else {
# Using several parallel slots. Override DejaGnu
# 'standard.exp:${tool}_load'...
rename libgomp_load standard_libgomp_load
proc libgomp_load { program args } {
# ... in order to serialize execution testing via an exclusive lock.
set lock_file ../lock
set lock_kind --exclusive
set lock_fd [open $lock_file a+]
set lock_clock_begin [clock seconds]
global FLOCK
exec $FLOCK $lock_kind 0 <@ $lock_fd
set lock_clock_end [clock seconds]
verbose -log "Got ${FLOCK}('$lock_file', '$lock_kind') at [clock format $lock_clock_end] after [expr $lock_clock_end - $lock_clock_begin] s" 2
set result [standard_libgomp_load $program $args]
# Unlock (implicit with 'close').
close $lock_fd
return $result
}
}
# Translate offload target to OpenACC device type. Return the empty string if
# not supported, and 'host' for offload target 'disable'.
proc offload_target_to_openacc_device_type { offload_target } {
switch -glob $offload_target {
amdgcn* {
return "radeon"
}
disable {
return "host"
}
nvptx* {
return "nvidia"
}
default {
error "Unknown offload target: $offload_target"
}
}
}
# Return 1 if compiling for the specified offload target
# Takes -foffload=... into account by checking OFFLOAD_TARGET_NAMES=
# in the -v compiler output.
proc libgomp_check_effective_target_offload_target { target_name } {
# Consider all actual options, including the flags passed to
# 'gcc-dg-runtest', or 'gfortran-dg-runtest' (see the 'libgomp.*/*.exp'
# files; in particular, '-foffload', 'libgomp.oacc-*/*.exp'), which don't
# get passed on to 'check_effective_target_*' functions. (Not caching the
# result due to that.)
set options [list "additional_flags=[concat "-v" [current_compiler_flags]]"]
# Instead of inspecting command-line options, look what the compiler driver
# decides. This is somewhat modelled after
# 'gcc/testsuite/lib/target-supports.exp:check_configured_with'.
set gcc_output [libgomp_target_compile "" "" "none" $options]
if [regexp "(?n)^OFFLOAD_TARGET_NAMES=(.*)" $gcc_output dummy gcc_offload_targets] {
verbose "compiling for offload targets: $gcc_offload_targets"
return [string match "*:$target_name*:*" ":$gcc_offload_targets:"]
}
verbose "not compiling for $target_name offload target"
return 0
}
# Return 1 if compiling for any offload target.
proc check_effective_target_offload_target_any { } {
return [libgomp_check_effective_target_offload_target ""]
}
# Return 1 if compiling for offload target nvptx.
proc check_effective_target_offload_target_nvptx { } {
return [libgomp_check_effective_target_offload_target "nvptx"]
}
# Return 1 if compiling for offload target amdgcn
proc check_effective_target_offload_target_amdgcn { } {
return [libgomp_check_effective_target_offload_target "amdgcn"]
}
# Return 1 if offload device is available.
proc check_effective_target_offload_device { } {
return [check_runtime_nocache offload_device_available_ {
#include <omp.h>
int main ()
{
int a;
#pragma omp target map(from: a)
a = omp_is_initial_device ();
return a;
}
} ]
}
# Return 1 if offload device is available and it has non-shared address space.
proc check_effective_target_offload_device_nonshared_as { } {
return [check_runtime_nocache offload_device_nonshared_as {
int main ()
{
int a = 8;
#pragma omp target map(to: a)
a++;
return a != 8;
}
} ]
}
# Return 1 if offload device is available and it has shared address space.
proc check_effective_target_offload_device_shared_as { } {
return [check_runtime_nocache offload_device_shared_as {
int main ()
{
int x = 10;
#pragma omp target map(to: x)
x++;
return x == 10;
}
} ]
}
# Return 1 if using nvptx offload device.
proc check_effective_target_offload_device_nvptx { } {
return [check_runtime_nocache offload_device_nvptx {
#include <omp.h>
#include "testsuite/libgomp.c-c++-common/on_device_arch.h"
int main ()
{
return !on_device_arch_nvptx ();
}
} ]
}
# Return 1 if using a GCN offload device.
proc check_effective_target_offload_device_gcn { } {
return [check_runtime_nocache offload_device_gcn {
#include <omp.h>
#include "testsuite/libgomp.c-c++-common/on_device_arch.h"
int main ()
{
return !on_device_arch_gcn ();
}
} ]
}
# Return 1 if at least one Nvidia GPU is accessible.
proc check_effective_target_openacc_nvidia_accel_present { } {
return [check_runtime openacc_nvidia_accel_present {
#include <openacc.h>
int main () {
return !(acc_get_num_devices (acc_device_nvidia) > 0);
}
} "" ]
}
# Return 1 if at least one Nvidia GPU is accessible, and the OpenACC 'nvidia'
# device type is selected.
proc check_effective_target_openacc_nvidia_accel_selected { } {
if { ![check_effective_target_openacc_nvidia_accel_present] } {
return 0;
}
global openacc_device_type
return [string match "nvidia" $openacc_device_type]
}
# Return 1 if the OpenACC 'host' device type is selected.
proc check_effective_target_openacc_host_selected { } {
global openacc_device_type
return [string match "host" $openacc_device_type]
}
# Return 1 if at least one AMD GPU is accessible.
proc check_effective_target_openacc_radeon_accel_present { } {
return [check_runtime openacc_radeon_accel_present {
#include <openacc.h>
int main () {
return !(acc_get_num_devices (acc_device_radeon) > 0);
}
} "" ]
}
# Return 1 if at least one AMD GPU is accessible, and the OpenACC 'radeon'
# device type is selected.
proc check_effective_target_openacc_radeon_accel_selected { } {
if { ![check_effective_target_openacc_radeon_accel_present] } {
return 0;
}
global openacc_device_type
return [string match "radeon" $openacc_device_type]
}
# Return 1 if cuda.h and -lcuda are available.
proc check_effective_target_openacc_cuda { } {
return [check_no_compiler_messages openacc_cuda executable {
#include <cuda.h>
int main() {
CUdevice dev;
CUresult r = cuDeviceGet (&dev, 0);
if (r != CUDA_SUCCESS)
return 1;
return 0;
} } "-lcuda" ]
}
# Return 1 if cublas_v2.h and -lcublas are available.
proc check_effective_target_openacc_cublas { } {
return [check_no_compiler_messages openacc_cublas executable {
#include <cuda.h>
#include <cublas_v2.h>
int main() {
cublasStatus_t s;
cublasHandle_t h;
CUdevice dev;
CUresult r = cuDeviceGet (&dev, 0);
if (r != CUDA_SUCCESS)
return 1;
s = cublasCreate (&h);
if (s != CUBLAS_STATUS_SUCCESS)
return 1;
return 0;
} } "-lcuda -lcublas" ]
}
# Return 1 if cuda_runtime_api.h and -lcudart are available.
proc check_effective_target_openacc_cudart { } {
return [check_no_compiler_messages openacc_cudart executable {
#include <cuda.h>
#include <cuda_runtime_api.h>
int main() {
cudaError_t e;
int devn;
CUdevice dev;
CUresult r = cuDeviceGet (&dev, 0);
if (r != CUDA_SUCCESS)
return 1;
e = cudaGetDevice (&devn);
if (e != cudaSuccess)
return 1;
return 0;
} } "-lcuda -lcudart" ]
}