[nvptx] Force vl32 if calling vector-partitionable routines

With PTX_MAX_VECTOR_LENGTH set to larger than PTX_WARP_SIZE, routines can be
called from offloading regions with vector-size set to larger than warp size.
OTOH, vector-partitionable routines assume warp-sized vector length.

Detect if we're calling a vector-partitionable routine from an offloading
region, and if so, fall back to warp-sized vector length in that region.

2019-01-07  Tom de Vries  <tdevries@suse.de>

	PR target/85486
	* config/nvptx/nvptx.c (has_vector_partitionable_routine_calls_p): New
	function.
	(nvptx_goacc_validate_dims): Force vl32 if calling vector-partitionable
	routines.

From-SVN: r267640
This commit is contained in:
Tom de Vries 2019-01-07 10:01:49 +00:00 committed by Tom de Vries
parent d471bdec41
commit 6e723923df
2 changed files with 53 additions and 0 deletions

View file

@ -1,3 +1,11 @@
2019-01-07 Tom de Vries <tdevries@suse.de>
PR target/85486
* config/nvptx/nvptx.c (has_vector_partitionable_routine_calls_p): New
function.
(nvptx_goacc_validate_dims): Force vl32 if calling vector-partitionable
routines.
2019-01-07 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (vec_extract<mode><ssehalfvecmodelower>): Use

View file

@ -59,6 +59,7 @@
#include "builtins.h"
#include "omp-general.h"
#include "omp-low.h"
#include "omp-offload.h"
#include "gomp-constants.h"
#include "dumpfile.h"
#include "internal-fn.h"
@ -5496,6 +5497,40 @@ nvptx_apply_dim_limits (int dims[])
dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE;
}
/* Return true if FNDECL contains calls to vector-partitionable routines. */
static bool
has_vector_partitionable_routine_calls_p (tree fndecl)
{
if (!fndecl)
return false;
basic_block bb;
FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fndecl))
for (gimple_stmt_iterator i = gsi_start_bb (bb); !gsi_end_p (i);
gsi_next_nondebug (&i))
{
gimple *stmt = gsi_stmt (i);
if (gimple_code (stmt) != GIMPLE_CALL)
continue;
tree callee = gimple_call_fndecl (stmt);
if (!callee)
continue;
tree attrs = oacc_get_fn_attrib (callee);
if (attrs == NULL_TREE)
return false;
int partition_level = oacc_fn_attrib_level (attrs);
bool seq_routine_p = partition_level == GOMP_DIM_MAX;
if (!seq_routine_p)
return true;
}
return false;
}
/* As nvptx_goacc_validate_dims, but does not return bool to indicate whether
DIMS has changed. */
@ -5611,6 +5646,16 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level)
old_dims[i] = dims[i];
const char *vector_reason = NULL;
if (offload_region_p && has_vector_partitionable_routine_calls_p (decl))
{
if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE)
{
vector_reason = G_("using vector_length (%d) due to call to"
" vector-partitionable routine, ignoring %d");
dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE;
}
}
if (dims[GOMP_DIM_VECTOR] == 0)
{
vector_reason = G_("using vector_length (%d), ignoring runtime setting");