Add clobbers around IFN_LOAD/STORE_LANES

We build up the input to IFN_STORE_LANES one vector at a time.
In RTL, each of these vector assignments becomes a write to
subregs of the form (subreg:VEC (reg:AGGR R)), where R is the
eventual input to the store lanes instruction.  The problem is
that RTL isn't very good at tracking liveness when things are
initialised piecemeal by subregs, so R tends to end up being
live on all paths from the entry block to the store.  This in
turn leads to unnecessary spilling around calls, as well as to
excess register pressure in vector loops.

This patch adds gimple clobbers to indicate the liveness of the
IFN_STORE_LANES variable and makes sure that gimple clobbers are
expanded to rtl clobbers where useful.  For consistency it also
uses clobbers to mark the point at which an IFN_LOAD_LANES
variable is no longer needed.

2018-05-08  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* cfgexpand.c (expand_clobber): New function.
	(expand_gimple_stmt_1): Use it.
	* tree-vect-stmts.c (vect_clobber_variable): New function,
	split out from...
	(vectorizable_simd_clone_call): ...here.
	(vectorizable_store): Emit a clobber either side of an
	IFN_STORE_LANES sequence.
	(vectorizable_load): Emit a clobber after an IFN_LOAD_LANES sequence.

gcc/testsuite/
	* gcc.target/aarch64/store_lane_spill_1.c: New test.
	* gcc.target/aarch64/sve/store_lane_spill_1.c: Likewise.

From-SVN: r260073
This commit is contained in:
Richard Sandiford 2018-05-09 10:35:31 +00:00 committed by Richard Sandiford
parent 82191cbf3f
commit 3ba4ff4130
6 changed files with 105 additions and 17 deletions

View file

@ -1,3 +1,14 @@
2018-05-09 Richard Sandiford <richard.sandiford@linaro.org>
* cfgexpand.c (expand_clobber): New function.
(expand_gimple_stmt_1): Use it.
* tree-vect-stmts.c (vect_clobber_variable): New function,
split out from...
(vectorizable_simd_clone_call): ...here.
(vectorizable_store): Emit a clobber either side of an
IFN_STORE_LANES sequence.
(vectorizable_load): Emit a clobber after an IFN_LOAD_LANES sequence.
2018-05-09 Tom de Vries <tom@codesourcery.com>
PR target/85626

View file

@ -3582,6 +3582,26 @@ expand_return (tree retval, tree bounds)
}
}
/* Expand a clobber of LHS. If LHS is stored it in a multi-part
register, tell the rtl optimizers that its value is no longer
needed. */
static void
expand_clobber (tree lhs)
{
if (DECL_P (lhs))
{
rtx decl_rtl = DECL_RTL_IF_SET (lhs);
if (decl_rtl && REG_P (decl_rtl))
{
machine_mode decl_mode = GET_MODE (decl_rtl);
if (maybe_gt (GET_MODE_SIZE (decl_mode),
REGMODE_NATURAL_SIZE (decl_mode)))
emit_clobber (decl_rtl);
}
}
}
/* A subroutine of expand_gimple_stmt, expanding one gimple statement
STMT that doesn't require special handling for outgoing edges. That
is no tailcalls and no GIMPLE_COND. */
@ -3687,7 +3707,7 @@ expand_gimple_stmt_1 (gimple *stmt)
if (TREE_CLOBBER_P (rhs))
/* This is a clobber to mark the going out of scope for
this LHS. */
;
expand_clobber (lhs);
else
expand_assignment (lhs, rhs,
gimple_assign_nontemporal_move_p (

View file

@ -1,3 +1,8 @@
2018-05-09 Richard Sandiford <richard.sandiford@linaro.org>
* gcc.target/aarch64/store_lane_spill_1.c: New test.
* gcc.target/aarch64/sve/store_lane_spill_1.c: Likewise.
2018-05-08 Carl Love <cel@us.ibm.com>
* gcc.target/powerpc/builtins-8-p9-runnable.c: Add new test file.

View file

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
int cont (void);
void
f (int (*x)[3], int *a, int *b, int *c, int n)
{
do
for (int i = 0; i < n; ++i)
{
x[i][0] = a[i] + 1;
x[i][1] = b[i] + 2;
x[i][2] = c[i] + 3;
}
while (cont ());
}
/* { dg-final { scan-assembler-not {\tst1\t} } } */

View file

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
int cont (void);
void
f (int (*x)[3], int *a, int *b, int *c, int n)
{
do
for (int i = 0; i < n; ++i)
{
x[i][0] = a[i] + 1;
x[i][1] = b[i] + 2;
x[i][2] = c[i] + 3;
}
while (cont ());
}
/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */

View file

@ -182,6 +182,17 @@ create_array_ref (tree type, tree ptr, tree alias_ptr_type)
return mem_ref;
}
/* Add a clobber of variable VAR to the vectorization of STMT.
Emit the clobber before *GSI. */
static void
vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
{
tree clobber = build_clobber (TREE_TYPE (var));
gimple *new_stmt = gimple_build_assign (var, clobber);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
/* Function vect_mark_relevant.
@ -4128,12 +4139,7 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
}
if (ratype)
{
tree clobber = build_constructor (ratype, NULL);
TREE_THIS_VOLATILE (clobber) = 1;
new_stmt = gimple_build_assign (new_temp, clobber);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
vect_clobber_variable (stmt, gsi, new_temp);
continue;
}
else if (simd_clone_subparts (vectype) > nunits)
@ -4156,10 +4162,7 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
gimple_assign_lhs (new_stmt));
}
tree clobber = build_constructor (ratype, NULL);
TREE_THIS_VOLATILE (clobber) = 1;
new_stmt = gimple_build_assign (new_temp, clobber);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vect_clobber_variable (stmt, gsi, new_temp);
}
else
CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
@ -4186,11 +4189,7 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
new_stmt
= gimple_build_assign (make_ssa_name (vec_dest), t);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
tree clobber = build_constructor (ratype, NULL);
TREE_THIS_VOLATILE (clobber) = 1;
vect_finish_stmt_generation (stmt,
gimple_build_assign (new_temp,
clobber), gsi);
vect_clobber_variable (stmt, gsi, new_temp);
}
}
@ -6913,8 +6912,15 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
{
tree vec_array;
/* Combine all the vectors into an array. */
/* Get an array into which we can store the individual vectors. */
vec_array = create_vector_array (vectype, vec_num);
/* Invalidate the current contents of VEC_ARRAY. This should
become an RTL clobber too, which prevents the vector registers
from being upward-exposed. */
vect_clobber_variable (stmt, gsi, vec_array);
/* Store the individual vectors into the array. */
for (i = 0; i < vec_num; i++)
{
vec_oprnd = dr_chain[i];
@ -6953,6 +6959,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
gimple_call_set_nothrow (call, true);
new_stmt = call;
vect_finish_stmt_generation (stmt, new_stmt, gsi);
/* Record that VEC_ARRAY is now dead. */
vect_clobber_variable (stmt, gsi, vec_array);
}
else
{
@ -8105,6 +8114,9 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
/* Record the mapping between SSA_NAMEs and statements. */
vect_record_grouped_load_vectors (stmt, dr_chain);
/* Record that VEC_ARRAY is now dead. */
vect_clobber_variable (stmt, gsi, vec_array);
}
else
{