[libgomp, nvptx] Remove hard-coded const in nvptx_open_device
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR is defined in cuda driver api version 6.0 and higher. Currently nvptx_open_device uses a hard-coded constant instead. This patch fixes that by: - defining CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR to the hardcoded constant at toplevel, if not present in cuda.h, and - using CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR in nvptx_open_device Build on x86_64 with nvptx accelerator and reg-tested libgomp. 2018-08-08 Tom de Vries <tdevries@suse.de> * plugin/plugin-nvptx.c (CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR): Define. (nvptx_open_device): Use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR. From-SVN: r263406
This commit is contained in:
parent
94767dacea
commit
b113af959c
2 changed files with 12 additions and 2 deletions
|
@ -1,3 +1,10 @@
|
|||
2018-08-08 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
* plugin/plugin-nvptx.c
|
||||
(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR): Define.
|
||||
(nvptx_open_device): Use
|
||||
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR.
|
||||
|
||||
2018-08-08 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
* plugin/plugin-nvptx.c (cuda_error): Move declaration of cuGetErrorString ...
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
|
||||
#if CUDA_VERSION < 6000
|
||||
extern CUresult cuGetErrorString (CUresult, const char **);
|
||||
#define CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR 82
|
||||
#endif
|
||||
|
||||
#define DO_PRAGMA(x) _Pragma (#x)
|
||||
|
@ -741,9 +742,11 @@ nvptx_open_device (int n)
|
|||
&pi, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, dev);
|
||||
ptx_dev->regs_per_block = pi;
|
||||
|
||||
/* CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82 is defined only
|
||||
/* CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR is defined only
|
||||
in CUDA 6.0 and newer. */
|
||||
r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi, 82, dev);
|
||||
r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR,
|
||||
dev);
|
||||
/* Fallback: use limit of registers per block, which is usually equal. */
|
||||
if (r == CUDA_ERROR_INVALID_VALUE)
|
||||
pi = ptx_dev->regs_per_block;
|
||||
|
|
Loading…
Add table
Reference in a new issue