GCN libgomp port
2019-11-13 Andrew Stubbs <ams@codesourcery.com> Kwok Cheung Yeung <kcy@codesourcery.com> Julian Brown <julian@codesourcery.com> Tom de Vries <tom@codesourcery.com> include/ * gomp-constants.h (GOMP_DEVICE_GCN): Define. (GOMP_VERSION_GCN): Define. libgomp/ * Makefile.am (libgomp_la_SOURCES): Add oacc-target.c. * Makefile.in: Regenerate. * config.h.in (PLUGIN_GCN): Add new undef. * config/accel/openacc.f90 (acc_device_gcn): New parameter. * config/gcn/affinity-fmt.c: New file. * config/gcn/bar.c: New file. * config/gcn/bar.h: New file. * config/gcn/doacross.h: New file. * config/gcn/icv-device.c: New file. * config/gcn/oacc-target.c: New file. * config/gcn/simple-bar.h: New file. * config/gcn/target.c: New file. * config/gcn/task.c: New file. * config/gcn/team.c: New file. * config/gcn/time.c: New file. * configure.ac: Add amdgcn*-*-*. * configure: Regenerate. * configure.tgt: Add amdgcn*-*-*. * libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN. * libgomp.h (gcn_thrs): Add amdgcn variant. (set_gcn_thrs): Likewise. (gomp_thread): Likewise. * oacc-int.h (goacc_thread): Likewise. * oacc-target.c: New file. * openacc.f90 (acc_device_gcn): New parameter. * openacc.h (acc_device_t): Add acc_device_gcn. * team.c (gomp_free_pool_helper): Add amdgcn support. Co-Authored-By: Julian Brown <julian@codesourcery.com> Co-Authored-By: Kwok Cheung Yeung <kcy@codesourcery.com> Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r278135
This commit is contained in:
parent
d2903ce05b
commit
fa4999953d
28 changed files with 1125 additions and 7 deletions
|
@ -1,3 +1,11 @@
|
|||
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||
Kwok Cheung Yeung <kcy@codesourcery.com>
|
||||
Julian Brown <julian@codesourcery.com>
|
||||
Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* gomp-constants.h (GOMP_DEVICE_GCN): Define.
|
||||
(GOMP_VERSION_GCN): Define.
|
||||
|
||||
2019-08-08 Martin Liska <mliska@suse.cz>
|
||||
|
||||
PR bootstrap/91352
|
||||
|
|
|
@ -174,6 +174,7 @@ enum gomp_map_kind
|
|||
#define GOMP_DEVICE_NVIDIA_PTX 5
|
||||
#define GOMP_DEVICE_INTEL_MIC 6
|
||||
#define GOMP_DEVICE_HSA 7
|
||||
#define GOMP_DEVICE_GCN 8
|
||||
|
||||
#define GOMP_DEVICE_ICV -1
|
||||
#define GOMP_DEVICE_HOST_FALLBACK -2
|
||||
|
@ -215,6 +216,7 @@ enum gomp_map_kind
|
|||
#define GOMP_VERSION_NVIDIA_PTX 1
|
||||
#define GOMP_VERSION_INTEL_MIC 0
|
||||
#define GOMP_VERSION_HSA 0
|
||||
#define GOMP_VERSION_GCN 1
|
||||
|
||||
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
|
||||
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
|
||||
|
|
|
@ -1,3 +1,36 @@
|
|||
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||
Kwok Cheung Yeung <kcy@codesourcery.com>
|
||||
Julian Brown <julian@codesourcery.com>
|
||||
Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* Makefile.am (libgomp_la_SOURCES): Add oacc-target.c.
|
||||
* Makefile.in: Regenerate.
|
||||
* config.h.in (PLUGIN_GCN): Add new undef.
|
||||
* config/accel/openacc.f90 (acc_device_gcn): New parameter.
|
||||
* config/gcn/affinity-fmt.c: New file.
|
||||
* config/gcn/bar.c: New file.
|
||||
* config/gcn/bar.h: New file.
|
||||
* config/gcn/doacross.h: New file.
|
||||
* config/gcn/icv-device.c: New file.
|
||||
* config/gcn/oacc-target.c: New file.
|
||||
* config/gcn/simple-bar.h: New file.
|
||||
* config/gcn/target.c: New file.
|
||||
* config/gcn/task.c: New file.
|
||||
* config/gcn/team.c: New file.
|
||||
* config/gcn/time.c: New file.
|
||||
* configure.ac: Add amdgcn*-*-*.
|
||||
* configure: Regenerate.
|
||||
* configure.tgt: Add amdgcn*-*-*.
|
||||
* libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN.
|
||||
* libgomp.h (gcn_thrs): Add amdgcn variant.
|
||||
(set_gcn_thrs): Likewise.
|
||||
(gomp_thread): Likewise.
|
||||
* oacc-int.h (goacc_thread): Likewise.
|
||||
* oacc-target.c: New file.
|
||||
* openacc.f90 (acc_device_gcn): New parameter.
|
||||
* openacc.h (acc_device_t): Add acc_device_gcn.
|
||||
* team.c (gomp_free_pool_helper): Add amdgcn support.
|
||||
|
||||
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
|
||||
Julian Brown <julian@codesourcery.com>
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
|
|||
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
|
||||
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
|
||||
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
|
||||
affinity-fmt.c teams.c oacc-profiling.c
|
||||
affinity-fmt.c teams.c oacc-profiling.c oacc-target.c
|
||||
|
||||
include $(top_srcdir)/plugin/Makefrag.am
|
||||
|
||||
|
|
|
@ -217,7 +217,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
|
|||
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
|
||||
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
|
||||
oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
|
||||
teams.lo oacc-profiling.lo $(am__objects_1)
|
||||
teams.lo oacc-profiling.lo oacc-target.lo $(am__objects_1)
|
||||
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
|
||||
AM_V_P = $(am__v_P_@AM_V@)
|
||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
||||
|
@ -552,7 +552,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
|
|||
affinity.c target.c splay-tree.c libgomp-plugin.c \
|
||||
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
|
||||
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
|
||||
affinity-fmt.c teams.c oacc-profiling.c $(am__append_3)
|
||||
affinity-fmt.c teams.c oacc-profiling.c oacc-target.c \
|
||||
$(am__append_3)
|
||||
|
||||
# Nvidia PTX OpenACC plugin.
|
||||
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
|
||||
|
@ -755,6 +756,7 @@ distclean-compile:
|
|||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
|
||||
|
|
|
@ -170,6 +170,9 @@
|
|||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to 1 if the GCN plugin is built, 0 if not. */
|
||||
#undef PLUGIN_GCN
|
||||
|
||||
/* Define to 1 if the HSA plugin is built, 0 if not. */
|
||||
#undef PLUGIN_HSA
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ module openacc_kinds
|
|||
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
|
||||
integer (acc_device_kind), parameter :: acc_device_not_host = 4
|
||||
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
|
||||
integer (acc_device_kind), parameter :: acc_device_gcn = 8
|
||||
|
||||
end module
|
||||
|
||||
|
|
51
libgomp/config/gcn/affinity-fmt.c
Normal file
51
libgomp/config/gcn/affinity-fmt.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "libgomp.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_INTTYPES_H
|
||||
# include <inttypes.h> /* For PRIx64. */
|
||||
#endif
|
||||
#ifdef HAVE_UNAME
|
||||
#include <sys/utsname.h>
|
||||
#endif
|
||||
|
||||
/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for nvptx,
|
||||
while the nvptx newlib implementation does not support those functions.
|
||||
Override the configure test results here. */
|
||||
#undef HAVE_GETPID
|
||||
#undef HAVE_GETHOSTNAME
|
||||
|
||||
/* The GCN newlib implementation does not support fwrite, but it does support
|
||||
write. Map fwrite to write. */
|
||||
#undef fwrite
|
||||
#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
|
||||
|
||||
#include "../../affinity-fmt.c"
|
||||
|
232
libgomp/config/gcn/bar.c
Normal file
232
libgomp/config/gcn/bar.c
Normal file
|
@ -0,0 +1,232 @@
|
|||
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This is an AMD GCN specific implementation of a barrier synchronization
|
||||
mechanism for libgomp. This type is private to the library. This
|
||||
implementation uses atomic instructions and s_barrier instruction. It
|
||||
uses MEMMODEL_RELAXED here because barriers are within workgroups and
|
||||
therefore don't need to flush caches. */
|
||||
|
||||
#include <limits.h>
|
||||
#include "libgomp.h"
|
||||
|
||||
|
||||
void
|
||||
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||
{
|
||||
if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||||
{
|
||||
/* Next time we'll be awaiting TOTAL threads again. */
|
||||
bar->awaited = bar->total;
|
||||
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
|
||||
MEMMODEL_RELAXED);
|
||||
}
|
||||
asm ("s_barrier" ::: "memory");
|
||||
}
|
||||
|
||||
void
|
||||
gomp_barrier_wait (gomp_barrier_t *bar)
|
||||
{
|
||||
gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
|
||||
}
|
||||
|
||||
/* Like gomp_barrier_wait, except that if the encountering thread
|
||||
is not the last one to hit the barrier, it returns immediately.
|
||||
The intended usage is that a thread which intends to gomp_barrier_destroy
|
||||
this barrier calls gomp_barrier_wait, while all other threads
|
||||
call gomp_barrier_wait_last. When gomp_barrier_wait returns,
|
||||
the barrier can be safely destroyed. */
|
||||
|
||||
void
|
||||
gomp_barrier_wait_last (gomp_barrier_t *bar)
|
||||
{
|
||||
/* Deferring to gomp_barrier_wait does not use the optimization opportunity
|
||||
allowed by the interface contract for all-but-last participants. The
|
||||
original implementation in config/linux/bar.c handles this better. */
|
||||
gomp_barrier_wait (bar);
|
||||
}
|
||||
|
||||
void
|
||||
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
|
||||
{
|
||||
asm ("s_barrier" ::: "memory");
|
||||
}
|
||||
|
||||
void
|
||||
gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||
{
|
||||
unsigned int generation, gen;
|
||||
|
||||
if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||||
{
|
||||
/* Next time we'll be awaiting TOTAL threads again. */
|
||||
struct gomp_thread *thr = gomp_thread ();
|
||||
struct gomp_team *team = thr->ts.team;
|
||||
|
||||
bar->awaited = bar->total;
|
||||
team->work_share_cancelled = 0;
|
||||
if (__builtin_expect (team->task_count, 0))
|
||||
{
|
||||
gomp_barrier_handle_tasks (state);
|
||||
state &= ~BAR_WAS_LAST;
|
||||
}
|
||||
else
|
||||
{
|
||||
state &= ~BAR_CANCELLED;
|
||||
state += BAR_INCR - BAR_WAS_LAST;
|
||||
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
|
||||
asm ("s_barrier" ::: "memory");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
generation = state;
|
||||
state &= ~BAR_CANCELLED;
|
||||
int retry = 100;
|
||||
do
|
||||
{
|
||||
if (retry-- == 0)
|
||||
{
|
||||
/* It really shouldn't happen that barriers get out of sync, but
|
||||
if they do then this will loop until they realign, so we need
|
||||
to avoid an infinite loop where the thread just isn't there. */
|
||||
const char msg[] = ("Barrier sync failed (another thread died?);"
|
||||
" aborting.");
|
||||
write (2, msg, sizeof (msg)-1);
|
||||
abort();
|
||||
}
|
||||
|
||||
asm ("s_barrier" ::: "memory");
|
||||
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
|
||||
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
|
||||
{
|
||||
gomp_barrier_handle_tasks (state);
|
||||
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
|
||||
}
|
||||
generation |= gen & BAR_WAITING_FOR_TASK;
|
||||
}
|
||||
while (gen != state + BAR_INCR);
|
||||
}
|
||||
|
||||
void
|
||||
gomp_team_barrier_wait (gomp_barrier_t *bar)
|
||||
{
|
||||
gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
|
||||
}
|
||||
|
||||
void
|
||||
gomp_team_barrier_wait_final (gomp_barrier_t *bar)
|
||||
{
|
||||
gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
|
||||
if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||||
bar->awaited_final = bar->total;
|
||||
gomp_team_barrier_wait_end (bar, state);
|
||||
}
|
||||
|
||||
bool
|
||||
gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
|
||||
gomp_barrier_state_t state)
|
||||
{
|
||||
unsigned int generation, gen;
|
||||
|
||||
if (__builtin_expect (state & BAR_WAS_LAST, 0))
|
||||
{
|
||||
/* Next time we'll be awaiting TOTAL threads again. */
|
||||
/* BAR_CANCELLED should never be set in state here, because
|
||||
cancellation means that at least one of the threads has been
|
||||
cancelled, thus on a cancellable barrier we should never see
|
||||
all threads to arrive. */
|
||||
struct gomp_thread *thr = gomp_thread ();
|
||||
struct gomp_team *team = thr->ts.team;
|
||||
|
||||
bar->awaited = bar->total;
|
||||
team->work_share_cancelled = 0;
|
||||
if (__builtin_expect (team->task_count, 0))
|
||||
{
|
||||
gomp_barrier_handle_tasks (state);
|
||||
state &= ~BAR_WAS_LAST;
|
||||
}
|
||||
else
|
||||
{
|
||||
state += BAR_INCR - BAR_WAS_LAST;
|
||||
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
|
||||
asm ("s_barrier" ::: "memory");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (__builtin_expect (state & BAR_CANCELLED, 0))
|
||||
return true;
|
||||
|
||||
generation = state;
|
||||
int retry = 100;
|
||||
do
|
||||
{
|
||||
if (retry-- == 0)
|
||||
{
|
||||
/* It really shouldn't happen that barriers get out of sync, but
|
||||
if they do then this will loop until they realign, so we need
|
||||
to avoid an infinite loop where the thread just isn't there. */
|
||||
const char msg[] = ("Barrier sync failed (another thread died?);"
|
||||
" aborting.");
|
||||
write (2, msg, sizeof (msg)-1);
|
||||
abort();
|
||||
}
|
||||
|
||||
asm ("s_barrier" ::: "memory");
|
||||
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||
if (__builtin_expect (gen & BAR_CANCELLED, 0))
|
||||
return true;
|
||||
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
|
||||
{
|
||||
gomp_barrier_handle_tasks (state);
|
||||
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||
}
|
||||
generation |= gen & BAR_WAITING_FOR_TASK;
|
||||
}
|
||||
while (gen != state + BAR_INCR);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
|
||||
{
|
||||
return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
|
||||
}
|
||||
|
||||
void
|
||||
gomp_team_barrier_cancel (struct gomp_team *team)
|
||||
{
|
||||
gomp_mutex_lock (&team->task_lock);
|
||||
if (team->barrier.generation & BAR_CANCELLED)
|
||||
{
|
||||
gomp_mutex_unlock (&team->task_lock);
|
||||
return;
|
||||
}
|
||||
team->barrier.generation |= BAR_CANCELLED;
|
||||
gomp_mutex_unlock (&team->task_lock);
|
||||
gomp_team_barrier_wake (&team->barrier, INT_MAX);
|
||||
}
|
168
libgomp/config/gcn/bar.h
Normal file
168
libgomp/config/gcn/bar.h
Normal file
|
@ -0,0 +1,168 @@
|
|||
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This is an AMD GCN specific implementation of a barrier synchronization
|
||||
mechanism for libgomp. This type is private to the library. This
|
||||
implementation uses atomic instructions and s_barrier instruction. It
|
||||
uses MEMMODEL_RELAXED here because barriers are within workgroups and
|
||||
therefore don't need to flush caches. */
|
||||
|
||||
#ifndef GOMP_BARRIER_H
|
||||
#define GOMP_BARRIER_H 1
|
||||
|
||||
#include "mutex.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned total;
|
||||
unsigned generation;
|
||||
unsigned awaited;
|
||||
unsigned awaited_final;
|
||||
} gomp_barrier_t;
|
||||
|
||||
typedef unsigned int gomp_barrier_state_t;
|
||||
|
||||
/* The generation field contains a counter in the high bits, with a few
|
||||
low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
|
||||
share space because WAS_LAST is never stored back to generation. */
|
||||
#define BAR_TASK_PENDING 1
|
||||
#define BAR_WAS_LAST 1
|
||||
#define BAR_WAITING_FOR_TASK 2
|
||||
#define BAR_CANCELLED 4
|
||||
#define BAR_INCR 8
|
||||
|
||||
static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
|
||||
{
|
||||
bar->total = count;
|
||||
bar->awaited = count;
|
||||
bar->awaited_final = count;
|
||||
bar->generation = 0;
|
||||
}
|
||||
|
||||
static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
|
||||
{
|
||||
__atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_RELAXED);
|
||||
bar->total = count;
|
||||
}
|
||||
|
||||
static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
|
||||
{
|
||||
}
|
||||
|
||||
extern void gomp_barrier_wait (gomp_barrier_t *);
|
||||
extern void gomp_barrier_wait_last (gomp_barrier_t *);
|
||||
extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
|
||||
extern void gomp_team_barrier_wait (gomp_barrier_t *);
|
||||
extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
|
||||
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
|
||||
gomp_barrier_state_t);
|
||||
extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
|
||||
extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
|
||||
gomp_barrier_state_t);
|
||||
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
|
||||
struct gomp_team;
|
||||
extern void gomp_team_barrier_cancel (struct gomp_team *);
|
||||
|
||||
static inline gomp_barrier_state_t
|
||||
gomp_barrier_wait_start (gomp_barrier_t *bar)
|
||||
{
|
||||
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||
ret &= -BAR_INCR | BAR_CANCELLED;
|
||||
/* A memory barrier is needed before exiting from the various forms
|
||||
of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
|
||||
2.8.6 flush Construct, which says there is an implicit flush during
|
||||
a barrier region. This is a convenient place to add the barrier,
|
||||
so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
|
||||
if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_RELAXED) == 0)
|
||||
ret |= BAR_WAS_LAST;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline gomp_barrier_state_t
|
||||
gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
|
||||
{
|
||||
return gomp_barrier_wait_start (bar);
|
||||
}
|
||||
|
||||
/* This is like gomp_barrier_wait_start, except it decrements
|
||||
bar->awaited_final rather than bar->awaited and should be used
|
||||
for the gomp_team_end barrier only. */
|
||||
static inline gomp_barrier_state_t
|
||||
gomp_barrier_wait_final_start (gomp_barrier_t *bar)
|
||||
{
|
||||
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
|
||||
ret &= -BAR_INCR | BAR_CANCELLED;
|
||||
/* See above gomp_barrier_wait_start comment. */
|
||||
if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_RELAXED) == 0)
|
||||
ret |= BAR_WAS_LAST;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
gomp_barrier_last_thread (gomp_barrier_state_t state)
|
||||
{
|
||||
return state & BAR_WAS_LAST;
|
||||
}
|
||||
|
||||
/* All the inlines below must be called with team->task_lock
|
||||
held. */
|
||||
|
||||
static inline void
|
||||
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
|
||||
{
|
||||
bar->generation |= BAR_TASK_PENDING;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
|
||||
{
|
||||
bar->generation &= ~BAR_TASK_PENDING;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
|
||||
{
|
||||
bar->generation |= BAR_WAITING_FOR_TASK;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
|
||||
{
|
||||
return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
gomp_team_barrier_cancelled (gomp_barrier_t *bar)
|
||||
{
|
||||
return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
|
||||
{
|
||||
bar->generation = (state & -BAR_INCR) + BAR_INCR;
|
||||
}
|
||||
|
||||
#endif /* GOMP_BARRIER_H */
|
58
libgomp/config/gcn/doacross.h
Normal file
58
libgomp/config/gcn/doacross.h
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This is the AMD GCN implementation of doacross spinning. */
|
||||
|
||||
#ifndef GOMP_DOACROSS_H
|
||||
#define GOMP_DOACROSS_H 1
|
||||
|
||||
#include "libgomp.h"
|
||||
|
||||
static inline int
|
||||
cpu_relax (void)
|
||||
{
|
||||
/* This can be implemented as just a memory barrier, but a sleep seems
|
||||
like it should allow the wavefront to yield (maybe?)
|
||||
Use the shortest possible sleep time of 1*64 cycles. */
|
||||
asm volatile ("s_sleep\t1" ::: "memory");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void doacross_spin (unsigned long *addr, unsigned long expected,
|
||||
unsigned long cur)
|
||||
{
|
||||
/* Prevent compiler from optimizing based on bounds of containing object. */
|
||||
asm ("" : "+r" (addr));
|
||||
do
|
||||
{
|
||||
/* An alternative implementation might use s_setprio to lower the
|
||||
priority temporarily, and then restore it after. */
|
||||
int i = cpu_relax ();
|
||||
cur = addr[i];
|
||||
}
|
||||
while (cur <= expected);
|
||||
}
|
||||
|
||||
#endif /* GOMP_DOACROSS_H */
|
72
libgomp/config/gcn/icv-device.c
Normal file
72
libgomp/config/gcn/icv-device.c
Normal file
|
@ -0,0 +1,72 @@
|
|||
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This file defines OpenMP API entry points that accelerator targets are
|
||||
expected to replace. */
|
||||
|
||||
#include "libgomp.h"
|
||||
|
||||
void
|
||||
omp_set_default_device (int device_num __attribute__((unused)))
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
omp_get_default_device (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
omp_get_num_devices (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
omp_get_num_teams (void)
|
||||
{
|
||||
return gomp_num_teams_var + 1;
|
||||
}
|
||||
|
||||
int __attribute__ ((__optimize__ ("O2")))
|
||||
omp_get_team_num (void)
|
||||
{
|
||||
return __builtin_gcn_dim_pos (0);
|
||||
}
|
||||
|
||||
int
|
||||
omp_is_initial_device (void)
|
||||
{
|
||||
/* AMD GCN is an accelerator-only target. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
ialias (omp_set_default_device)
|
||||
ialias (omp_get_default_device)
|
||||
ialias (omp_get_num_devices)
|
||||
ialias (omp_get_num_teams)
|
||||
ialias (omp_get_team_num)
|
||||
ialias (omp_is_initial_device)
|
31
libgomp/config/gcn/oacc-target.c
Normal file
31
libgomp/config/gcn/oacc-target.c
Normal file
|
@ -0,0 +1,31 @@
|
|||
/* Oversized reductions lock variable
|
||||
Copyright (C) 2017-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Graphics.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* We use a global lock variable for reductions on objects larger than
|
||||
64 bits. Until and unless proven that lock contention for
|
||||
different reductions is a problem, a single lock will suffice. */
|
||||
|
||||
unsigned volatile __reduction_lock = 0;
|
61
libgomp/config/gcn/simple-bar.h
Normal file
61
libgomp/config/gcn/simple-bar.h
Normal file
|
@ -0,0 +1,61 @@
|
|||
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This is a simplified barrier that is suitable for thread pool
|
||||
synchronizaton. Only a subset of full barrier API (bar.h) is exposed.
|
||||
Here in the AMD GCN-specific implementation, we expect that thread pool
|
||||
corresponds to the wavefronts within a work group. */
|
||||
|
||||
#ifndef GOMP_SIMPLE_BARRIER_H
|
||||
#define GOMP_SIMPLE_BARRIER_H 1
|
||||
|
||||
/* AMD GCN has no use for this type. */
|
||||
typedef int gomp_simple_barrier_t;
|
||||
|
||||
/* GCN barriers block all wavefronts, so the count is not interesting. */
|
||||
static inline void
|
||||
gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
|
||||
{
|
||||
asm volatile ("s_barrier" ::: "memory");
|
||||
}
|
||||
|
||||
static inline void
|
||||
gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
|
||||
{
|
||||
/* GCN has no way to signal a barrier without waiting. */
|
||||
asm volatile ("s_barrier" ::: "memory");
|
||||
}
|
||||
|
||||
#endif /* GOMP_SIMPLE_BARRIER_H */
|
67
libgomp/config/gcn/target.c
Normal file
67
libgomp/config/gcn/target.c
Normal file
|
@ -0,0 +1,67 @@
|
|||
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "libgomp.h"
|
||||
#include <limits.h>
|
||||
|
||||
void
|
||||
GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
||||
{
|
||||
if (thread_limit)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (true);
|
||||
icv->thread_limit_var
|
||||
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
||||
}
|
||||
unsigned int num_workgroups, workgroup_id;
|
||||
num_workgroups = __builtin_gcn_dim_size (0);
|
||||
workgroup_id = __builtin_gcn_dim_pos (0);
|
||||
if (!num_teams || num_teams >= num_workgroups)
|
||||
num_teams = num_workgroups;
|
||||
else if (workgroup_id >= num_teams)
|
||||
{
|
||||
gomp_free_thread (gcn_thrs ());
|
||||
exit (0);
|
||||
}
|
||||
gomp_num_teams_var = num_teams - 1;
|
||||
}
|
||||
|
||||
int
|
||||
omp_pause_resource (omp_pause_resource_t kind, int device_num)
|
||||
{
|
||||
(void) kind;
|
||||
(void) device_num;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
omp_pause_resource_all (omp_pause_resource_t kind)
|
||||
{
|
||||
(void) kind;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ialias (omp_pause_resource)
|
||||
ialias (omp_pause_resource_all)
|
39
libgomp/config/gcn/task.c
Normal file
39
libgomp/config/gcn/task.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This file handles the maintainence of tasks in response to task
|
||||
creation and termination. */
|
||||
|
||||
#include "libgomp.h"
|
||||
|
||||
/* AMD GCN is an accelerator-only target, so this should never be called. */
|
||||
|
||||
bool
|
||||
gomp_target_task_fn (void *data)
|
||||
{
|
||||
__builtin_unreachable ();
|
||||
}
|
||||
|
||||
#include "../../task.c"
|
202
libgomp/config/gcn/team.c
Normal file
202
libgomp/config/gcn/team.c
Normal file
|
@ -0,0 +1,202 @@
|
|||
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This file handles maintainance of threads on AMD GCN. */
|
||||
|
||||
#include "libgomp.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static void gomp_thread_start (struct gomp_thread_pool *);
|
||||
|
||||
/* This externally visible function handles target region entry. It
|
||||
sets up a per-team thread pool and transfers control by returning to
|
||||
the kernel in the master thread or gomp_thread_start in other threads.
|
||||
|
||||
The name of this function is part of the interface with the compiler: for
|
||||
each OpenMP kernel the compiler configures the stack, then calls here.
|
||||
|
||||
Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue. */
|
||||
|
||||
void
|
||||
gomp_gcn_enter_kernel (void)
|
||||
{
|
||||
int threadid = __builtin_gcn_dim_pos (1);
|
||||
|
||||
if (threadid == 0)
|
||||
{
|
||||
int numthreads = __builtin_gcn_dim_size (1);
|
||||
int teamid = __builtin_gcn_dim_pos(0);
|
||||
|
||||
/* Set up the global state.
|
||||
Every team will do this, but that should be harmless. */
|
||||
gomp_global_icv.nthreads_var = 16;
|
||||
gomp_global_icv.thread_limit_var = numthreads;
|
||||
/* Starting additional threads is not supported. */
|
||||
gomp_global_icv.dyn_var = true;
|
||||
|
||||
/* Allocate and initialize the team-local-storage data. */
|
||||
struct gomp_thread *thrs = gomp_malloc_cleared (sizeof (*thrs)
|
||||
* numthreads);
|
||||
set_gcn_thrs (thrs);
|
||||
|
||||
/* Allocate and initailize a pool of threads in the team.
|
||||
The threads are already running, of course, we just need to manage
|
||||
the communication between them. */
|
||||
struct gomp_thread_pool *pool = gomp_malloc (sizeof (*pool));
|
||||
pool->threads = gomp_malloc (sizeof (void *) * numthreads);
|
||||
for (int tid = 0; tid < numthreads; tid++)
|
||||
pool->threads[tid] = &thrs[tid];
|
||||
pool->threads_size = numthreads;
|
||||
pool->threads_used = numthreads;
|
||||
pool->threads_busy = 1;
|
||||
pool->last_team = NULL;
|
||||
gomp_simple_barrier_init (&pool->threads_dock, numthreads);
|
||||
thrs->thread_pool = pool;
|
||||
|
||||
asm ("s_barrier" ::: "memory");
|
||||
return; /* Return to kernel. */
|
||||
}
|
||||
else
|
||||
{
|
||||
asm ("s_barrier" ::: "memory");
|
||||
gomp_thread_start (gcn_thrs ()[0].thread_pool);
|
||||
/* gomp_thread_start does not return. */
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gomp_gcn_exit_kernel (void)
|
||||
{
|
||||
gomp_free_thread (gcn_thrs ());
|
||||
free (gcn_thrs ());
|
||||
}
|
||||
|
||||
/* This function contains the idle loop in which a thread waits
|
||||
to be called up to become part of a team. */
|
||||
|
||||
static void
|
||||
gomp_thread_start (struct gomp_thread_pool *pool)
|
||||
{
|
||||
struct gomp_thread *thr = gomp_thread ();
|
||||
|
||||
gomp_sem_init (&thr->release, 0);
|
||||
thr->thread_pool = pool;
|
||||
|
||||
/* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
|
||||
which contains "s_endpgm", or an infinite no-op loop is
|
||||
suspected (this happens when the thread master crashes). */
|
||||
int nul_limit = 99;
|
||||
do
|
||||
{
|
||||
gomp_simple_barrier_wait (&pool->threads_dock);
|
||||
if (!thr->fn)
|
||||
{
|
||||
if (nul_limit-- > 0)
|
||||
continue;
|
||||
else
|
||||
{
|
||||
const char msg[] = ("team master not responding;"
|
||||
" slave thread aborting");
|
||||
write (2, msg, sizeof (msg)-1);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
thr->fn (thr->data);
|
||||
thr->fn = NULL;
|
||||
|
||||
struct gomp_task *task = thr->task;
|
||||
gomp_team_barrier_wait_final (&thr->ts.team->barrier);
|
||||
gomp_finish_task (task);
|
||||
}
|
||||
while (1);
|
||||
}
|
||||
|
||||
/* Launch a team. */
|
||||
|
||||
void
|
||||
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
|
||||
unsigned flags, struct gomp_team *team,
|
||||
struct gomp_taskgroup *taskgroup)
|
||||
{
|
||||
struct gomp_thread *thr, *nthr;
|
||||
struct gomp_task *task;
|
||||
struct gomp_task_icv *icv;
|
||||
struct gomp_thread_pool *pool;
|
||||
unsigned long nthreads_var;
|
||||
|
||||
thr = gomp_thread ();
|
||||
pool = thr->thread_pool;
|
||||
task = thr->task;
|
||||
icv = task ? &task->icv : &gomp_global_icv;
|
||||
|
||||
/* Always save the previous state, even if this isn't a nested team.
|
||||
In particular, we should save any work share state from an outer
|
||||
orphaned work share construct. */
|
||||
team->prev_ts = thr->ts;
|
||||
|
||||
thr->ts.team = team;
|
||||
thr->ts.team_id = 0;
|
||||
++thr->ts.level;
|
||||
if (nthreads > 1)
|
||||
++thr->ts.active_level;
|
||||
thr->ts.work_share = &team->work_shares[0];
|
||||
thr->ts.last_work_share = NULL;
|
||||
thr->ts.single_count = 0;
|
||||
thr->ts.static_trip = 0;
|
||||
thr->task = &team->implicit_task[0];
|
||||
nthreads_var = icv->nthreads_var;
|
||||
gomp_init_task (thr->task, task, icv);
|
||||
team->implicit_task[0].icv.nthreads_var = nthreads_var;
|
||||
team->implicit_task[0].taskgroup = taskgroup;
|
||||
|
||||
if (nthreads == 1)
|
||||
return;
|
||||
|
||||
/* Release existing idle threads. */
|
||||
for (unsigned i = 1; i < nthreads; ++i)
|
||||
{
|
||||
nthr = pool->threads[i];
|
||||
nthr->ts.team = team;
|
||||
nthr->ts.work_share = &team->work_shares[0];
|
||||
nthr->ts.last_work_share = NULL;
|
||||
nthr->ts.team_id = i;
|
||||
nthr->ts.level = team->prev_ts.level + 1;
|
||||
nthr->ts.active_level = thr->ts.active_level;
|
||||
nthr->ts.single_count = 0;
|
||||
nthr->ts.static_trip = 0;
|
||||
nthr->task = &team->implicit_task[i];
|
||||
gomp_init_task (nthr->task, task, icv);
|
||||
team->implicit_task[i].icv.nthreads_var = nthreads_var;
|
||||
team->implicit_task[i].taskgroup = taskgroup;
|
||||
nthr->fn = fn;
|
||||
nthr->data = data;
|
||||
team->ordered_release[i] = &nthr->release;
|
||||
}
|
||||
|
||||
gomp_simple_barrier_wait (&pool->threads_dock);
|
||||
}
|
||||
|
||||
#include "../../team.c"
|
52
libgomp/config/gcn/time.c
Normal file
52
libgomp/config/gcn/time.c
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Embedded.
|
||||
|
||||
This file is part of the GNU Offloading and Multi Processing Library
|
||||
(libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This file implements timer routines for AMD GCN. */
|
||||
|
||||
#include "libgomp.h"
|
||||
|
||||
/* According to AMD:
|
||||
dGPU RTC is 27MHz
|
||||
AGPU RTC is 100MHz
|
||||
FIXME: DTRT on an APU. */
|
||||
#define RTC_TICKS (1.0 / 27000000.0) /* 27MHz */
|
||||
|
||||
double
|
||||
omp_get_wtime (void)
|
||||
{
|
||||
uint64_t clock;
|
||||
asm ("s_memrealtime %0\n\t"
|
||||
"s_waitcnt 0" : "=r" (clock));
|
||||
return clock * RTC_TICKS;
|
||||
}
|
||||
|
||||
double
|
||||
omp_get_wtick (void)
|
||||
{
|
||||
return RTC_TICKS;
|
||||
}
|
||||
|
||||
ialias (omp_get_wtime)
|
||||
ialias (omp_get_wtick)
|
2
libgomp/configure
vendored
2
libgomp/configure
vendored
|
@ -14921,7 +14921,7 @@ case "$host" in
|
|||
*-*-rtems*)
|
||||
# RTEMS supports Pthreads, but the library is not available at GCC build time.
|
||||
;;
|
||||
nvptx*-*-*)
|
||||
nvptx*-*-* | amdgcn*-*-*)
|
||||
# NVPTX does not support Pthreads, has its own code replacement.
|
||||
libgomp_use_pthreads=no
|
||||
# NVPTX is an accelerator-only target
|
||||
|
|
|
@ -176,7 +176,7 @@ case "$host" in
|
|||
*-*-rtems*)
|
||||
# RTEMS supports Pthreads, but the library is not available at GCC build time.
|
||||
;;
|
||||
nvptx*-*-*)
|
||||
nvptx*-*-* | amdgcn*-*-*)
|
||||
# NVPTX does not support Pthreads, has its own code replacement.
|
||||
libgomp_use_pthreads=no
|
||||
# NVPTX is an accelerator-only target
|
||||
|
|
|
@ -164,6 +164,10 @@ case "${target}" in
|
|||
fi
|
||||
;;
|
||||
|
||||
amdgcn*-*-*)
|
||||
config_path="gcn accel"
|
||||
;;
|
||||
|
||||
*)
|
||||
;;
|
||||
|
||||
|
|
|
@ -50,7 +50,8 @@ enum offload_target_type
|
|||
/* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */
|
||||
OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
|
||||
OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
|
||||
OFFLOAD_TARGET_TYPE_HSA = 7
|
||||
OFFLOAD_TARGET_TYPE_HSA = 7,
|
||||
OFFLOAD_TARGET_TYPE_GCN = 8
|
||||
};
|
||||
|
||||
/* Opaque type to represent plugin-dependent implementation of an
|
||||
|
|
|
@ -692,6 +692,24 @@ static inline struct gomp_thread *gomp_thread (void)
|
|||
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
|
||||
return nvptx_thrs + tid;
|
||||
}
|
||||
#elif defined __AMDGCN__
|
||||
static inline struct gomp_thread *gcn_thrs (void)
|
||||
{
|
||||
/* The value is at the bottom of LDS. */
|
||||
struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
|
||||
return *thrs;
|
||||
}
|
||||
static inline void set_gcn_thrs (struct gomp_thread *val)
|
||||
{
|
||||
/* The value is at the bottom of LDS. */
|
||||
struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
|
||||
*thrs = val;
|
||||
}
|
||||
static inline struct gomp_thread *gomp_thread (void)
|
||||
{
|
||||
int tid = __builtin_gcn_dim_pos(1);
|
||||
return gcn_thrs () + tid;
|
||||
}
|
||||
#elif defined HAVE_TLS || defined USE_EMUTLS
|
||||
extern __thread struct gomp_thread gomp_tls_data;
|
||||
static inline struct gomp_thread *gomp_thread (void)
|
||||
|
|
|
@ -82,7 +82,14 @@ struct goacc_thread
|
|||
void *target_tls;
|
||||
};
|
||||
|
||||
#if defined HAVE_TLS || defined USE_EMUTLS
|
||||
#ifdef __AMDGCN__
|
||||
static inline struct goacc_thread *
|
||||
goacc_thread (void)
|
||||
{
|
||||
/* Unused in the offload libgomp for OpenACC: return a dummy value. */
|
||||
return 0;
|
||||
}
|
||||
#elif defined HAVE_TLS || defined USE_EMUTLS
|
||||
extern __thread struct goacc_thread *goacc_tls_data;
|
||||
static inline struct goacc_thread *
|
||||
goacc_thread (void)
|
||||
|
|
1
libgomp/oacc-target.c
Normal file
1
libgomp/oacc-target.c
Normal file
|
@ -0,0 +1 @@
|
|||
/* Nothing needed here. */
|
|
@ -46,6 +46,7 @@ module openacc_kinds
|
|||
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
|
||||
integer (acc_device_kind), parameter :: acc_device_not_host = 4
|
||||
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
|
||||
integer (acc_device_kind), parameter :: acc_device_gcn = 8
|
||||
|
||||
public :: acc_handle_kind
|
||||
|
||||
|
|
|
@ -55,6 +55,7 @@ typedef enum acc_device_t {
|
|||
/* acc_device_host_nonshm = 3 removed. */
|
||||
acc_device_not_host = 4,
|
||||
acc_device_nvidia = 5,
|
||||
acc_device_gcn = 8,
|
||||
_ACC_device_hwm,
|
||||
/* Ensure enumeration is layout compatible with int. */
|
||||
_ACC_highest = __INT_MAX__,
|
||||
|
|
|
@ -239,6 +239,9 @@ gomp_free_pool_helper (void *thread_pool)
|
|||
pthread_exit (NULL);
|
||||
#elif defined(__nvptx__)
|
||||
asm ("exit;");
|
||||
#elif defined(__AMDGCN__)
|
||||
asm ("s_dcache_wb\n\t"
|
||||
"s_endpgm");
|
||||
#else
|
||||
#error gomp_free_pool_helper must terminate the thread
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue