[nvptx] Force vl32 if calling vector-partitionable routines

With PTX_MAX_VECTOR_LENGTH set to larger than PTX_WARP_SIZE, routines can be called from offloading regions with vector-size set to larger than warp size. OTOH, vector-partitionable routines assume warp-sized vector length. Detect if we're calling a vector-partitionable routine from an offloading region, and if so, fall back to warp-sized vector length in that region. 2019-01-07 Tom de Vries <tdevries@suse.de> PR target/85486 * config/nvptx/nvptx.c (has_vector_partitionable_routine_calls_p): New function. (nvptx_goacc_validate_dims): Force vl32 if calling vector-partitionable routines. From-SVN: r267640
2019-01-07 10:01:49 +00:00 · 2019-01-07 10:01:49 +00:00 · 6e723923df
commit 6e723923df
parent d471bdec41
2 changed files with 53 additions and 0 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,11 @@
+2019-01-07  Tom de Vries  <tdevries@suse.de>
+
+	PR target/85486
+	* config/nvptx/nvptx.c (has_vector_partitionable_routine_calls_p): New
+	function.
+	(nvptx_goacc_validate_dims): Force vl32 if calling vector-partitionable
+	routines.
+
 2019-01-07  Jakub Jelinek  <jakub@redhat.com>

 	* config/i386/sse.md (vec_extract<mode><ssehalfvecmodelower>): Use
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@ -59,6 +59,7 @@
 #include "builtins.h"
 #include "omp-general.h"
 #include "omp-low.h"
+#include "omp-offload.h"
 #include "gomp-constants.h"
 #include "dumpfile.h"
 #include "internal-fn.h"
@ -5496,6 +5497,40 @@ nvptx_apply_dim_limits (int dims[])
    dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE;
 }

+/* Return true if FNDECL contains calls to vector-partitionable routines.  */
+
+static bool
+has_vector_partitionable_routine_calls_p (tree fndecl)
+{
+  if (!fndecl)
+    return false;
+
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fndecl))
+    for (gimple_stmt_iterator i = gsi_start_bb (bb); !gsi_end_p (i);
+	 gsi_next_nondebug (&i))
+      {
+	gimple *stmt = gsi_stmt (i);
+	if (gimple_code (stmt) != GIMPLE_CALL)
+	  continue;
+
+	tree callee = gimple_call_fndecl (stmt);
+	if (!callee)
+	  continue;
+
+	tree attrs  = oacc_get_fn_attrib (callee);
+	if (attrs == NULL_TREE)
+	  return false;
+
+	int partition_level = oacc_fn_attrib_level (attrs);
+	bool seq_routine_p = partition_level == GOMP_DIM_MAX;
+	if (!seq_routine_p)
+	  return true;
+      }
+
+  return false;
+}
+
 /* As nvptx_goacc_validate_dims, but does not return bool to indicate whether
   DIMS has changed.  */

@ -5611,6 +5646,16 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level)
    old_dims[i] = dims[i];

  const char *vector_reason = NULL;
+  if (offload_region_p && has_vector_partitionable_routine_calls_p (decl))
+    {
+      if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE)
+	{
+	  vector_reason = G_("using vector_length (%d) due to call to"
+			     " vector-partitionable routine, ignoring %d");
+	  dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE;
+	}
+    }
+
  if (dims[GOMP_DIM_VECTOR] == 0)
    {
      vector_reason = G_("using vector_length (%d), ignoring runtime setting");