
..., and enable if 'flock' is available for serializing execution testing. Regarding the default of 19 parallel slots, this turned out to be a local minimum for wall time when testing this on: $ uname -srvi Linux 4.2.0-42-generic #49~14.04.1-Ubuntu SMP Wed Jun 29 20:22:11 UTC 2016 x86_64 $ grep '^model name' < /proc/cpuinfo | uniq -c 32 model name : Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz ... in two configurations: case (a) standard configuration, no offloading configured, case (b) offloading for GCN and nvptx configured but no devices available. For both cases, default plus '-m32' variant. $ \time make check-target-libgomp RUNTESTFLAGS="--target_board=unix\{,-m32\}" Case (a), baseline: 6432.23user 332.38system 47:32.28elapsed 237%CPU (0avgtext+0avgdata 505044maxresident)k 6382.43user 319.21system 47:06.04elapsed 237%CPU (0avgtext+0avgdata 505172maxresident)k This is what people have been complaining about, rightly so, in <https://gcc.gnu.org/PR66005> "libgomp make check time is excessive" and elsewhere. Case (a), parallelized: -j12 GCC_TEST_PARALLEL_SLOTS=10 3088.49user 267.74system 6:43.82elapsed 831%CPU (0avgtext+0avgdata 505188maxresident)k -j15 GCC_TEST_PARALLEL_SLOTS=15 3308.08user 294.79system 5:56.04elapsed 1011%CPU (0avgtext+0avgdata 505360maxresident)k -j17 GCC_TEST_PARALLEL_SLOTS=17 3539.93user 298.99system 5:27.86elapsed 1170%CPU (0avgtext+0avgdata 505112maxresident)k -j18 GCC_TEST_PARALLEL_SLOTS=18 3697.50user 317.18system 5:14.63elapsed 1275%CPU (0avgtext+0avgdata 505360maxresident)k -j19 GCC_TEST_PARALLEL_SLOTS=19 3765.94user 324.27system 5:13.22elapsed 1305%CPU (0avgtext+0avgdata 505128maxresident)k -j20 GCC_TEST_PARALLEL_SLOTS=20 3684.66user 312.32system 5:15.26elapsed 1267%CPU (0avgtext+0avgdata 505100maxresident)k -j23 GCC_TEST_PARALLEL_SLOTS=23 4040.59user 347.10system 5:29.12elapsed 1333%CPU (0avgtext+0avgdata 505200maxresident)k -j26 GCC_TEST_PARALLEL_SLOTS=26 3973.24user 377.96system 5:24.70elapsed 1340%CPU (0avgtext+0avgdata 505160maxresident)k -j32 GCC_TEST_PARALLEL_SLOTS=32 4004.42user 346.10system 5:16.11elapsed 1376%CPU (0avgtext+0avgdata 505160maxresident)k Yay! Case (b), baseline; 2+ h: 7227.58user 700.54system 2:14:33elapsed 98%CPU (0avgtext+0avgdata 994264maxresident)k Case (b), parallelized: -j12 GCC_TEST_PARALLEL_SLOTS=10 7377.46user 777.52system 16:06.63elapsed 843%CPU (0avgtext+0avgdata 994344maxresident)k -j15 GCC_TEST_PARALLEL_SLOTS=15 8019.18user 721.42system 12:13.56elapsed 1191%CPU (0avgtext+0avgdata 994228maxresident)k -j17 GCC_TEST_PARALLEL_SLOTS=17 8530.11user 716.95system 10:45.92elapsed 1431%CPU (0avgtext+0avgdata 994176maxresident)k -j18 GCC_TEST_PARALLEL_SLOTS=18 8776.79user 645.89system 10:27.20elapsed 1502%CPU (0avgtext+0avgdata 994248maxresident)k -j19 GCC_TEST_PARALLEL_SLOTS=19 9332.37user 641.76system 10:15.09elapsed 1621%CPU (0avgtext+0avgdata 994260maxresident)k -j20 GCC_TEST_PARALLEL_SLOTS=20 9609.54user 789.88system 10:26.94elapsed 1658%CPU (0avgtext+0avgdata 994284maxresident)k -j23 GCC_TEST_PARALLEL_SLOTS=23 10362.40user 911.14system 10:44.47elapsed 1749%CPU (0avgtext+0avgdata 994208maxresident)k -j26 GCC_TEST_PARALLEL_SLOTS=26 11159.44user 850.99system 11:09.25elapsed 1794%CPU (0avgtext+0avgdata 994256maxresident)k -j32 GCC_TEST_PARALLEL_SLOTS=32 11453.50user 939.52system 11:00.38elapsed 1876%CPU (0avgtext+0avgdata 994240maxresident)k On my Dell Precision 7530 laptop: $ uname -srvi Linux 5.15.0-71-generic #78-Ubuntu SMP Tue Apr 18 09:00:29 UTC 2023 x86_64 $ grep '^model name' < /proc/cpuinfo | uniq -c 12 model name : Intel(R) Core(TM) i7-8850H CPU @ 2.60GHz $ nvidia-smi -L GPU 0: Quadro P1000 (UUID: GPU-e043973b-b52a-d02b-c066-a8fdbf64e8ea) ... in two configurations: case (c) standard configuration, no offloading configured, case (d) offloading for nvptx configured and device available. For both cases, only default variant, no '-m32'. $ \time make check-target-libgomp Case (c), baseline; roughly half of case (a) (just one variant): 1180.98user 110.80system 19:36.40elapsed 109%CPU (0avgtext+0avgdata 505148maxresident)k 1133.22user 111.08system 19:35.75elapsed 105%CPU (0avgtext+0avgdata 505212maxresident)k Case (c), parallelized: -j12 GCC_TEST_PARALLEL_SLOTS=2 1143.83user 110.76system 10:20.46elapsed 202%CPU (0avgtext+0avgdata 505216maxresident)k -j12 GCC_TEST_PARALLEL_SLOTS=6 1737.08user 143.94system 4:59.48elapsed 628%CPU (0avgtext+0avgdata 505200maxresident)k 1730.31user 143.02system 4:58.75elapsed 627%CPU (0avgtext+0avgdata 505152maxresident)k -j12 GCC_TEST_PARALLEL_SLOTS=8 2192.63user 169.34system 4:52.96elapsed 806%CPU (0avgtext+0avgdata 505216maxresident)k 2219.04user 167.67system 4:53.19elapsed 814%CPU (0avgtext+0avgdata 505152maxresident)k -j12 GCC_TEST_PARALLEL_SLOTS=10 2463.93user 184.98system 4:48.39elapsed 918%CPU (0avgtext+0avgdata 505200maxresident)k 2455.62user 183.68system 4:47.40elapsed 918%CPU (0avgtext+0avgdata 505216maxresident)k -j12 GCC_TEST_PARALLEL_SLOTS=12 2591.04user 192.64system 4:44.98elapsed 976%CPU (0avgtext+0avgdata 505216maxresident)k 2581.23user 195.21system 4:47.51elapsed 965%CPU (0avgtext+0avgdata 505212maxresident)k -j20 GCC_TEST_PARALLEL_SLOTS=20 [oversubscribe] 2613.18user 199.51system 4:44.06elapsed 990%CPU (0avgtext+0avgdata 505216maxresident)k Case (d), baseline (compared to case (b): only nvptx offloading compilation, but also nvptx offloading execution); ~1 h: 2841.93user 653.68system 1:02:26elapsed 93%CPU (0avgtext+0avgdata 909792maxresident)k 2842.03user 654.39system 1:02:24elapsed 93%CPU (0avgtext+0avgdata 909880maxresident)k Case (d), parallelized: -j12 GCC_TEST_PARALLEL_SLOTS=2 2856.39user 606.87system 33:58.64elapsed 169%CPU (0avgtext+0avgdata 909948maxresident)k -j12 GCC_TEST_PARALLEL_SLOTS=6 3444.90user 666.86system 18:37.57elapsed 367%CPU (0avgtext+0avgdata 909856maxresident)k 3462.13user 667.13system 18:36.87elapsed 369%CPU (0avgtext+0avgdata 909872maxresident)k -j12 GCC_TEST_PARALLEL_SLOTS=8 3929.74user 716.22system 18:02.36elapsed 429%CPU (0avgtext+0avgdata 909832maxresident)k -j12 GCC_TEST_PARALLEL_SLOTS=10 4152.84user 736.16system 17:43.05elapsed 459%CPU (0avgtext+0avgdata 909872maxresident)k -j12 GCC_TEST_PARALLEL_SLOTS=12 4209.60user 749.00system 17:35.20elapsed 469%CPU (0avgtext+0avgdata 909840maxresident)k -j20 GCC_TEST_PARALLEL_SLOTS=20 [oversubscribe] 4255.54user 756.78system 17:29.06elapsed 477%CPU (0avgtext+0avgdata 909868maxresident)k Worth noting is that with nvptx offloading, there is one execution test case that times out ('libgomp.fortran/reverse-offload-5.f90'). This effectively stalls progress for almost 5 min: quickly other executions test cases queue up on the lock for all parallel slots. That's working as expected; just noting this as it accordingly does skew the wall time numbers. PR testsuite/66005 libgomp/ * configure.ac: Look for 'flock'. * testsuite/Makefile.am (gcc_test_parallel_slots): Enable parallel testing. * testsuite/config/default.exp: Don't 'load_lib "standard.exp"' here... * testsuite/lib/libgomp.exp: ... but here, instead. (libgomp_load): Override for parallel testing. * testsuite/libgomp-site-extra.exp.in (FLOCK): Set. * configure: Regenerate. * Makefile.in: Regenerate. * testsuite/Makefile.in: Regenerate.
583 lines
17 KiB
Text
583 lines
17 KiB
Text
# Damn dejagnu for not having proper library search paths for load_lib.
|
|
# We have to explicitly load everything that gcc-dg.exp wants to load.
|
|
|
|
proc load_gcc_lib { filename } {
|
|
global srcdir loaded_libs
|
|
|
|
load_file $srcdir/../../gcc/testsuite/lib/$filename
|
|
set loaded_libs($filename) ""
|
|
}
|
|
|
|
load_lib dg.exp
|
|
load_lib standard.exp
|
|
|
|
# Required to use gcc-dg.exp - however, the latter should NOT be
|
|
# loaded until ${tool}_target_compile is defined since it uses that
|
|
# to determine default LTO options.
|
|
|
|
load_gcc_lib multiline.exp
|
|
load_gcc_lib prune.exp
|
|
load_gcc_lib target-libpath.exp
|
|
load_gcc_lib wrapper.exp
|
|
load_gcc_lib target-supports.exp
|
|
load_gcc_lib target-utils.exp
|
|
load_gcc_lib gcc-defs.exp
|
|
load_gcc_lib timeout.exp
|
|
load_gcc_lib file-format.exp
|
|
load_gcc_lib target-supports-dg.exp
|
|
load_gcc_lib scanasm.exp
|
|
load_gcc_lib scandump.exp
|
|
load_gcc_lib scanlang.exp
|
|
load_gcc_lib scanrtl.exp
|
|
load_gcc_lib scansarif.exp
|
|
load_gcc_lib scantree.exp
|
|
load_gcc_lib scanltranstree.exp
|
|
load_gcc_lib scanoffload.exp
|
|
load_gcc_lib scanoffloadipa.exp
|
|
load_gcc_lib scanoffloadtree.exp
|
|
load_gcc_lib scanoffloadrtl.exp
|
|
load_gcc_lib scanipa.exp
|
|
load_gcc_lib scanwpaipa.exp
|
|
load_gcc_lib timeout-dg.exp
|
|
load_gcc_lib torture-options.exp
|
|
load_gcc_lib fortran-modules.exp
|
|
|
|
# Try to load a test support file, built during libgomp configuration.
|
|
# Search in '..' vs. '.' to support parallel vs. sequential testing.
|
|
if [info exists ::env(GCC_RUNTEST_PARALLELIZE_DIR)] {
|
|
load_file ../libgomp-test-support.exp
|
|
} else {
|
|
load_file libgomp-test-support.exp
|
|
}
|
|
|
|
set dg-do-what-default run
|
|
|
|
set libgomp_compile_options ""
|
|
|
|
#
|
|
# libgomp_init
|
|
#
|
|
|
|
if [info exists TOOL_OPTIONS] {
|
|
set multilibs [get_multilibs $TOOL_OPTIONS]
|
|
} else {
|
|
set multilibs [get_multilibs]
|
|
}
|
|
|
|
proc libgomp_init { args } {
|
|
global srcdir blddir objdir tool_root_dir
|
|
global libgomp_initialized
|
|
global tmpdir
|
|
global gluefile wrap_flags
|
|
global ALWAYS_CFLAGS
|
|
global CFLAGS
|
|
global TOOL_EXECUTABLE TOOL_OPTIONS
|
|
global GCC_UNDER_TEST GXX_UNDER_TEST GFORTRAN_UNDER_TEST
|
|
global TESTING_IN_BUILD_TREE
|
|
global target_triplet
|
|
global always_ld_library_path
|
|
|
|
set blddir [lookfor_file [get_multilibs] libgomp]
|
|
|
|
# We set LC_ALL and LANG to C so that we get the same error
|
|
# messages as expected.
|
|
setenv LC_ALL C
|
|
setenv LANG C
|
|
|
|
# Many hosts now default to a non-ASCII C locale, however, so
|
|
# they can set a charset encoding here if they need.
|
|
if { [ishost "*-*-cygwin*"] } {
|
|
setenv LC_ALL C.ASCII
|
|
setenv LANG C.ASCII
|
|
}
|
|
|
|
if { $blddir != "" } {
|
|
# Fix up '-funconfigured-libstdc++-v3' in 'GXX_UNDER_TEST' (see
|
|
# '../../configure.ac').
|
|
set flags_file "${blddir}/../libstdc++-v3/scripts/testsuite_flags"
|
|
if { [file exists $flags_file] } {
|
|
set flags [exec sh $flags_file --build-includes]
|
|
verbose -log "GXX_UNDER_TEST = $GXX_UNDER_TEST"
|
|
set GXX_UNDER_TEST [string map [list \
|
|
" -funconfigured-libstdc++-v3 " " $flags " \
|
|
] $GXX_UNDER_TEST]
|
|
verbose -log "GXX_UNDER_TEST = $GXX_UNDER_TEST"
|
|
}
|
|
}
|
|
if ![info exists GCC_UNDER_TEST] then {
|
|
if [info exists TOOL_EXECUTABLE] {
|
|
set GCC_UNDER_TEST $TOOL_EXECUTABLE
|
|
} else {
|
|
set GCC_UNDER_TEST "[find_gcc]"
|
|
}
|
|
# Only if we're guessing 'GCC_UNDER_TEST', we're also going to guess
|
|
# 'GXX_UNDER_TEST', 'GFORTRAN_UNDER_TEST'.
|
|
if ![info exists GXX_UNDER_TEST] then {
|
|
if [info exists TOOL_EXECUTABLE] {
|
|
set GXX_UNDER_TEST $TOOL_EXECUTABLE
|
|
} else {
|
|
set GXX_UNDER_TEST "[find_g++]"
|
|
}
|
|
} else {
|
|
error "GXX_UNDER_TEST set but not GCC_UNDER_TEST"
|
|
}
|
|
if ![info exists GFORTRAN_UNDER_TEST] then {
|
|
if [info exists TOOL_EXECUTABLE] {
|
|
set GFORTRAN_UNDER_TEST $TOOL_EXECUTABLE
|
|
} else {
|
|
set GFORTRAN_UNDER_TEST "[find_gfortran]"
|
|
}
|
|
} else {
|
|
error "GFORTRAN_UNDER_TEST set but not GCC_UNDER_TEST"
|
|
}
|
|
}
|
|
|
|
if ![info exists tmpdir] {
|
|
set tmpdir "/tmp"
|
|
}
|
|
|
|
if [info exists gluefile] {
|
|
unset gluefile
|
|
}
|
|
|
|
if {![info exists CFLAGS]} {
|
|
set CFLAGS ""
|
|
}
|
|
|
|
# Locate libgcc.a so we don't need to account for different values of
|
|
# SHLIB_EXT on different platforms
|
|
set gccdir [lookfor_file $tool_root_dir gcc/libgcc.a]
|
|
if {$gccdir != ""} {
|
|
set gccdir [file dirname $gccdir]
|
|
}
|
|
|
|
# Compute what needs to be put into LD_LIBRARY_PATH
|
|
set always_ld_library_path "."
|
|
|
|
global offload_additional_lib_paths
|
|
if { $offload_additional_lib_paths != "" } {
|
|
append always_ld_library_path "${offload_additional_lib_paths}"
|
|
}
|
|
|
|
# Compute what needs to be added to the existing LD_LIBRARY_PATH.
|
|
if {$gccdir != ""} {
|
|
# Add AIX pthread directory first.
|
|
if { [llength [glob -nocomplain ${gccdir}/pthread/libgcc_s*.a]] >= 1 } {
|
|
append always_ld_library_path ":${gccdir}/pthread"
|
|
}
|
|
append always_ld_library_path ":${gccdir}"
|
|
set compiler [lindex $GCC_UNDER_TEST 0]
|
|
|
|
if { [is_remote host] == 0 && [which $compiler] != 0 } {
|
|
foreach i "[exec $compiler --print-multi-lib]" {
|
|
set mldir ""
|
|
regexp -- "\[a-z0-9=_/\.-\]*;" $i mldir
|
|
set mldir [string trimright $mldir "\;@"]
|
|
if { "$mldir" == "." } {
|
|
continue
|
|
}
|
|
if { [llength [glob -nocomplain ${gccdir}/${mldir}/libgcc_s*.so.*]] >= 1 } {
|
|
append always_ld_library_path ":${gccdir}/${mldir}"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
set ALWAYS_CFLAGS ""
|
|
if { $blddir != "" } {
|
|
lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/"
|
|
# targets that use libgomp.a%s in their specs need a -B option
|
|
# for uninstalled testing.
|
|
lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs"
|
|
lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}"
|
|
lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs"
|
|
|
|
append always_ld_library_path ":${blddir}/.libs"
|
|
}
|
|
# The top-level include directory, for gomp-constants.h.
|
|
lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include"
|
|
lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.."
|
|
|
|
# For build-tree testing, also consider the library paths used for builing.
|
|
# For installed testing, we assume all that to be provided in the sysroot.
|
|
if { $blddir != "" } {
|
|
# The `-fopenacc' and `-fopenmp' options imply `-pthread', and
|
|
# that implies `-latomic' on some hosts, so wire in libatomic
|
|
# build directories.
|
|
if [ishost "riscv*-*-linux*"] {
|
|
set shlib_ext [get_shlib_extension]
|
|
set atomic_library_path "${blddir}/../libatomic/.libs"
|
|
if { [file exists "${atomic_library_path}/libatomic.a"]
|
|
|| [file exists \
|
|
"${atomic_library_path}/libatomic.${shlib_ext}"] } {
|
|
lappend ALWAYS_CFLAGS \
|
|
"additional_flags=-L${atomic_library_path}"
|
|
append always_ld_library_path ":${atomic_library_path}"
|
|
}
|
|
}
|
|
}
|
|
|
|
# We use atomic operations in the testcases to validate results.
|
|
if { ([istarget i?86-*-*] || [istarget x86_64-*-*])
|
|
&& [check_effective_target_ia32]
|
|
&& ![check_effective_target_cas_char] } {
|
|
lappend ALWAYS_CFLAGS "additional_flags=-march=i486"
|
|
}
|
|
|
|
if [istarget *-*-darwin*] {
|
|
lappend ALWAYS_CFLAGS "additional_flags=-shared-libgcc"
|
|
}
|
|
|
|
if [istarget sparc*-*-*] {
|
|
lappend ALWAYS_CFLAGS "additional_flags=-mcpu=v9"
|
|
}
|
|
|
|
if [info exists TOOL_OPTIONS] {
|
|
lappend ALWAYS_CFLAGS "additional_flags=$TOOL_OPTIONS"
|
|
}
|
|
|
|
# Make sure that lines are not wrapped. That can confuse the
|
|
# error-message parsing machinery.
|
|
lappend ALWAYS_CFLAGS "additional_flags=-fmessage-length=0"
|
|
|
|
# Disable caret
|
|
lappend ALWAYS_CFLAGS "additional_flags=-fno-diagnostics-show-caret"
|
|
|
|
# Disable color diagnostics
|
|
lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-color=never"
|
|
|
|
# Help GCC to find offload compilers' 'mkoffload'.
|
|
global offload_additional_options
|
|
if { $offload_additional_options != "" } {
|
|
lappend ALWAYS_CFLAGS "additional_flags=${offload_additional_options}"
|
|
}
|
|
|
|
# Tell warning from error diagnostics. This fits for C, C++, and Fortran.
|
|
global gcc_warning_prefix
|
|
set gcc_warning_prefix "\[Ww\]arning:"
|
|
global gcc_error_prefix
|
|
set gcc_error_prefix "(\[Ff\]atal )?\[Ee\]rror:"
|
|
}
|
|
|
|
#
|
|
# libgomp_target_compile -- compile a source file
|
|
#
|
|
|
|
proc libgomp_target_compile { source dest type options } {
|
|
global blddir
|
|
global libgomp_compile_options
|
|
global gluefile wrap_flags
|
|
global ALWAYS_CFLAGS
|
|
global GCC_UNDER_TEST
|
|
|
|
global lang_source_re lang_include_flags
|
|
if { [info exists lang_include_flags] \
|
|
&& [regexp ${lang_source_re} ${source}] } {
|
|
lappend options "additional_flags=${lang_include_flags}"
|
|
}
|
|
|
|
global lang_library_paths
|
|
if { [info exists lang_library_paths] } {
|
|
foreach lang_library_path $lang_library_paths {
|
|
# targets that use lib[...].a%s in their specs need a -B option
|
|
# for uninstalled testing.
|
|
lappend options "additional_flags=-B${blddir}/${lang_library_path}"
|
|
lappend options "ldflags=-L${blddir}/${lang_library_path}"
|
|
}
|
|
}
|
|
global lang_link_flags
|
|
if { [info exists lang_link_flags] } {
|
|
lappend options "ldflags=${lang_link_flags}"
|
|
}
|
|
|
|
if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
|
|
lappend options "libs=${gluefile}"
|
|
lappend options "ldflags=${wrap_flags}"
|
|
}
|
|
|
|
lappend options "additional_flags=[libio_include_flags]"
|
|
lappend options "timeout=[timeout_value]"
|
|
|
|
set options [concat $libgomp_compile_options $options]
|
|
|
|
if [info exists ALWAYS_CFLAGS] {
|
|
set options [concat "$ALWAYS_CFLAGS" $options]
|
|
}
|
|
|
|
set options [dg-additional-files-options $options $source]
|
|
|
|
set result [target_compile $source $dest $type $options]
|
|
|
|
return $result
|
|
}
|
|
|
|
proc libgomp_option_help { } {
|
|
send_user " --additional_options,OPTIONS\t\tUse OPTIONS to compile the testcase files. OPTIONS should be comma-separated.\n"
|
|
}
|
|
|
|
proc libgomp_option_proc { option } {
|
|
if [regexp "^--additional_options," $option] {
|
|
global libgomp_compile_options
|
|
regsub "--additional_options," $option "" option
|
|
foreach x [split $option ","] {
|
|
lappend libgomp_compile_options "additional_flags=$x"
|
|
}
|
|
return 1
|
|
} else {
|
|
return 0
|
|
}
|
|
}
|
|
|
|
if ![info exists ::env(GCC_RUNTEST_PARALLELIZE_DIR)] {
|
|
# No parallel testing.
|
|
} elseif { $FLOCK == "" } {
|
|
# Using just one parallel slot.
|
|
} else {
|
|
# Using several parallel slots. Override DejaGnu
|
|
# 'standard.exp:${tool}_load'...
|
|
rename libgomp_load standard_libgomp_load
|
|
proc libgomp_load { program args } {
|
|
# ... in order to serialize execution testing via an exclusive lock.
|
|
set lock_file ../lock
|
|
set lock_kind --exclusive
|
|
set lock_fd [open $lock_file a+]
|
|
set lock_clock_begin [clock seconds]
|
|
global FLOCK
|
|
exec $FLOCK $lock_kind 0 <@ $lock_fd
|
|
set lock_clock_end [clock seconds]
|
|
verbose -log "Got ${FLOCK}('$lock_file', '$lock_kind') at [clock format $lock_clock_end] after [expr $lock_clock_end - $lock_clock_begin] s" 2
|
|
|
|
set result [standard_libgomp_load $program $args]
|
|
|
|
# Unlock (implicit with 'close').
|
|
close $lock_fd
|
|
|
|
return $result
|
|
}
|
|
}
|
|
|
|
# Translate offload target to OpenACC device type. Return the empty string if
|
|
# not supported, and 'host' for offload target 'disable'.
|
|
proc offload_target_to_openacc_device_type { offload_target } {
|
|
switch -glob $offload_target {
|
|
amdgcn* {
|
|
return "radeon"
|
|
}
|
|
disable {
|
|
return "host"
|
|
}
|
|
nvptx* {
|
|
return "nvidia"
|
|
}
|
|
default {
|
|
error "Unknown offload target: $offload_target"
|
|
}
|
|
}
|
|
}
|
|
|
|
# Return 1 if compiling for the specified offload target
|
|
# Takes -foffload=... into account by checking OFFLOAD_TARGET_NAMES=
|
|
# in the -v compiler output.
|
|
proc libgomp_check_effective_target_offload_target { target_name } {
|
|
# Consider all actual options, including the flags passed to
|
|
# 'gcc-dg-runtest', or 'gfortran-dg-runtest' (see the 'libgomp.*/*.exp'
|
|
# files; in particular, '-foffload', 'libgomp.oacc-*/*.exp'), which don't
|
|
# get passed on to 'check_effective_target_*' functions. (Not caching the
|
|
# result due to that.)
|
|
set options [list "additional_flags=[concat "-v" [current_compiler_flags]]"]
|
|
# Instead of inspecting command-line options, look what the compiler driver
|
|
# decides. This is somewhat modelled after
|
|
# 'gcc/testsuite/lib/target-supports.exp:check_configured_with'.
|
|
set gcc_output [libgomp_target_compile "" "" "none" $options]
|
|
if [regexp "(?n)^OFFLOAD_TARGET_NAMES=(.*)" $gcc_output dummy gcc_offload_targets] {
|
|
verbose "compiling for offload targets: $gcc_offload_targets"
|
|
return [string match "*:$target_name*:*" ":$gcc_offload_targets:"]
|
|
}
|
|
|
|
verbose "not compiling for $target_name offload target"
|
|
return 0
|
|
}
|
|
|
|
# Return 1 if compiling for any offload target.
|
|
proc check_effective_target_offload_target_any { } {
|
|
return [libgomp_check_effective_target_offload_target ""]
|
|
}
|
|
|
|
# Return 1 if compiling for offload target nvptx.
|
|
proc check_effective_target_offload_target_nvptx { } {
|
|
return [libgomp_check_effective_target_offload_target "nvptx"]
|
|
}
|
|
|
|
# Return 1 if compiling for offload target amdgcn
|
|
proc check_effective_target_offload_target_amdgcn { } {
|
|
return [libgomp_check_effective_target_offload_target "amdgcn"]
|
|
}
|
|
|
|
# Return 1 if offload device is available.
|
|
proc check_effective_target_offload_device { } {
|
|
return [check_runtime_nocache offload_device_available_ {
|
|
#include <omp.h>
|
|
int main ()
|
|
{
|
|
int a;
|
|
#pragma omp target map(from: a)
|
|
a = omp_is_initial_device ();
|
|
return a;
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if offload device is available and it has non-shared address space.
|
|
proc check_effective_target_offload_device_nonshared_as { } {
|
|
return [check_runtime_nocache offload_device_nonshared_as {
|
|
int main ()
|
|
{
|
|
int a = 8;
|
|
#pragma omp target map(to: a)
|
|
a++;
|
|
return a != 8;
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if offload device is available and it has shared address space.
|
|
proc check_effective_target_offload_device_shared_as { } {
|
|
return [check_runtime_nocache offload_device_shared_as {
|
|
int main ()
|
|
{
|
|
int x = 10;
|
|
#pragma omp target map(to: x)
|
|
x++;
|
|
return x == 10;
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if using nvptx offload device.
|
|
proc check_effective_target_offload_device_nvptx { } {
|
|
return [check_runtime_nocache offload_device_nvptx {
|
|
#include <omp.h>
|
|
#include "testsuite/libgomp.c-c++-common/on_device_arch.h"
|
|
int main ()
|
|
{
|
|
return !on_device_arch_nvptx ();
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if using a GCN offload device.
|
|
proc check_effective_target_offload_device_gcn { } {
|
|
return [check_runtime_nocache offload_device_gcn {
|
|
#include <omp.h>
|
|
#include "testsuite/libgomp.c-c++-common/on_device_arch.h"
|
|
int main ()
|
|
{
|
|
return !on_device_arch_gcn ();
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if at least one Nvidia GPU is accessible.
|
|
|
|
proc check_effective_target_openacc_nvidia_accel_present { } {
|
|
return [check_runtime openacc_nvidia_accel_present {
|
|
#include <openacc.h>
|
|
int main () {
|
|
return !(acc_get_num_devices (acc_device_nvidia) > 0);
|
|
}
|
|
} "" ]
|
|
}
|
|
|
|
# Return 1 if at least one Nvidia GPU is accessible, and the OpenACC 'nvidia'
|
|
# device type is selected.
|
|
|
|
proc check_effective_target_openacc_nvidia_accel_selected { } {
|
|
if { ![check_effective_target_openacc_nvidia_accel_present] } {
|
|
return 0;
|
|
}
|
|
global openacc_device_type
|
|
return [string match "nvidia" $openacc_device_type]
|
|
}
|
|
|
|
# Return 1 if the OpenACC 'host' device type is selected.
|
|
|
|
proc check_effective_target_openacc_host_selected { } {
|
|
global openacc_device_type
|
|
return [string match "host" $openacc_device_type]
|
|
}
|
|
|
|
# Return 1 if at least one AMD GPU is accessible.
|
|
|
|
proc check_effective_target_openacc_radeon_accel_present { } {
|
|
return [check_runtime openacc_radeon_accel_present {
|
|
#include <openacc.h>
|
|
int main () {
|
|
return !(acc_get_num_devices (acc_device_radeon) > 0);
|
|
}
|
|
} "" ]
|
|
}
|
|
|
|
# Return 1 if at least one AMD GPU is accessible, and the OpenACC 'radeon'
|
|
# device type is selected.
|
|
|
|
proc check_effective_target_openacc_radeon_accel_selected { } {
|
|
if { ![check_effective_target_openacc_radeon_accel_present] } {
|
|
return 0;
|
|
}
|
|
global openacc_device_type
|
|
return [string match "radeon" $openacc_device_type]
|
|
}
|
|
|
|
# Return 1 if cuda.h and -lcuda are available.
|
|
|
|
proc check_effective_target_openacc_cuda { } {
|
|
return [check_no_compiler_messages openacc_cuda executable {
|
|
#include <cuda.h>
|
|
int main() {
|
|
CUdevice dev;
|
|
CUresult r = cuDeviceGet (&dev, 0);
|
|
if (r != CUDA_SUCCESS)
|
|
return 1;
|
|
return 0;
|
|
} } "-lcuda" ]
|
|
}
|
|
|
|
# Return 1 if cublas_v2.h and -lcublas are available.
|
|
|
|
proc check_effective_target_openacc_cublas { } {
|
|
return [check_no_compiler_messages openacc_cublas executable {
|
|
#include <cuda.h>
|
|
#include <cublas_v2.h>
|
|
int main() {
|
|
cublasStatus_t s;
|
|
cublasHandle_t h;
|
|
CUdevice dev;
|
|
CUresult r = cuDeviceGet (&dev, 0);
|
|
if (r != CUDA_SUCCESS)
|
|
return 1;
|
|
s = cublasCreate (&h);
|
|
if (s != CUBLAS_STATUS_SUCCESS)
|
|
return 1;
|
|
return 0;
|
|
} } "-lcuda -lcublas" ]
|
|
}
|
|
|
|
# Return 1 if cuda_runtime_api.h and -lcudart are available.
|
|
|
|
proc check_effective_target_openacc_cudart { } {
|
|
return [check_no_compiler_messages openacc_cudart executable {
|
|
#include <cuda.h>
|
|
#include <cuda_runtime_api.h>
|
|
int main() {
|
|
cudaError_t e;
|
|
int devn;
|
|
CUdevice dev;
|
|
CUresult r = cuDeviceGet (&dev, 0);
|
|
if (r != CUDA_SUCCESS)
|
|
return 1;
|
|
e = cudaGetDevice (&devn);
|
|
if (e != cudaSuccess)
|
|
return 1;
|
|
return 0;
|
|
} } "-lcuda -lcudart" ]
|
|
}
|