tree-optimization/97678 - fix SLP induction epilogue vectorization
This restores not tracking SLP nodes for induction initial values in not nested context because this interferes with peeling and epilogue vectorization. 2020-11-03 Richard Biener <rguenther@suse.de> PR tree-optimization/97678 * tree-vect-slp.c (vect_build_slp_tree_2): Do not track the initial values of inductions when not nested. * tree-vect-loop.c (vectorizable_induction): Look at PHI node initial values again for SLP and not nested inductions. Handle LOOP_VINFO_MASK_SKIP_NITERS and cost invariants. * gcc.dg/vect/pr97678.c: New testcase.
This commit is contained in:
parent
0caf400a86
commit
f53e9d40de
3 changed files with 79 additions and 7 deletions
29
gcc/testsuite/gcc.dg/vect/pr97678.c
Normal file
29
gcc/testsuite/gcc.dg/vect/pr97678.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned int i = 0;
|
||||
unsigned short b[158 * 2];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < 158; i++)
|
||||
{
|
||||
b[i * 2] = i * 7;
|
||||
b[i * 2 + 1] = i * 8;
|
||||
}
|
||||
|
||||
for (i = 0; i < 158; ++i)
|
||||
if (b[i*2] != (unsigned short)(i*7)
|
||||
|| b[i*2+1] != (unsigned short)(i*8))
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The init loop should be vectorized with SLP. */
|
||||
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
|
|
@ -7800,6 +7800,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
|
|||
= record_stmt_cost (cost_vec,
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
|
||||
vector_stmt, stmt_info, 0, vect_body);
|
||||
/* prologue cost for vec_init (if not nested) and step. */
|
||||
prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
|
||||
scalar_to_vec,
|
||||
stmt_info, 0, vect_prologue);
|
||||
}
|
||||
else /* if (!slp_node) */
|
||||
{
|
||||
|
@ -7858,9 +7862,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
|
|||
cycles we have to reconstruct the step from SCEV data. */
|
||||
unsigned group_size = SLP_TREE_LANES (slp_node);
|
||||
tree *steps = XALLOCAVEC (tree, group_size);
|
||||
tree *inits = XALLOCAVEC (tree, group_size);
|
||||
stmt_vec_info phi_info;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
|
||||
steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
|
||||
{
|
||||
steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
|
||||
if (!init_node)
|
||||
inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
|
||||
pe->dest_idx);
|
||||
}
|
||||
|
||||
/* Now generate the IVs. */
|
||||
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||
|
@ -7875,16 +7885,39 @@ vectorizable_induction (loop_vec_info loop_vinfo,
|
|||
? build_real_from_wide (stept, lup_mul,
|
||||
UNSIGNED)
|
||||
: build_int_cstu (stept, lup_mul));
|
||||
tree peel_mul = NULL_TREE;
|
||||
if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
|
||||
{
|
||||
stmts = NULL;
|
||||
if (SCALAR_FLOAT_TYPE_P (stept))
|
||||
peel_mul = gimple_build (&stmts, FLOAT_EXPR, stept,
|
||||
LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
|
||||
else
|
||||
peel_mul = gimple_convert (&stmts, stept,
|
||||
LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
|
||||
peel_mul = gimple_build_vector_from_val (&stmts, step_vectype, peel_mul);
|
||||
if (stmts)
|
||||
{
|
||||
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
|
||||
gcc_assert (!new_bb);
|
||||
}
|
||||
}
|
||||
unsigned ivn;
|
||||
auto_vec<tree> vec_steps;
|
||||
for (ivn = 0; ivn < nivs; ++ivn)
|
||||
{
|
||||
tree_vector_builder elts (step_vectype, const_nunits, 1);
|
||||
tree_vector_builder step_elts (step_vectype, const_nunits, 1);
|
||||
tree_vector_builder init_elts (vectype, const_nunits, 1);
|
||||
tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
|
||||
for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
|
||||
{
|
||||
tree elt = steps[(ivn*const_nunits + eltn) % group_size];
|
||||
elts.quick_push (elt);
|
||||
step_elts.quick_push (elt);
|
||||
if (!init_node)
|
||||
{
|
||||
elt = inits[(ivn*const_nunits + eltn) % group_size];
|
||||
init_elts.quick_push (elt);
|
||||
}
|
||||
unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
|
||||
mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
|
||||
? build_real_from_wide (stept,
|
||||
|
@ -7892,10 +7925,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
|
|||
: build_int_cstu (stept, mul_elt));
|
||||
}
|
||||
stmts = NULL;
|
||||
vec_step = gimple_build_vector (&stmts, &elts);
|
||||
vec_step = gimple_build_vector (&stmts, &step_elts);
|
||||
vec_step = gimple_convert (&stmts, step_vectype, vec_step);
|
||||
vec_steps.safe_push (vec_step);
|
||||
tree step_mul = gimple_build_vector (&stmts, &mul_elts);
|
||||
if (peel_mul)
|
||||
gimple_build (&stmts, PLUS_EXPR, step_vectype,
|
||||
step_mul, peel_mul);
|
||||
if (!init_node)
|
||||
vec_init = gimple_build_vector (&stmts, &init_elts);
|
||||
if (stmts)
|
||||
{
|
||||
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
|
||||
|
@ -7926,7 +7964,8 @@ vectorizable_induction (loop_vec_info loop_vinfo,
|
|||
add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
|
||||
UNKNOWN_LOCATION);
|
||||
|
||||
vec_init = vect_get_slp_vect_def (init_node, ivn);
|
||||
if (init_node)
|
||||
vec_init = vect_get_slp_vect_def (init_node, ivn);
|
||||
if (!integer_zerop (step_mul))
|
||||
{
|
||||
stmts = NULL;
|
||||
|
|
|
@ -1444,9 +1444,13 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
|
|||
if (def_type == vect_induction_def)
|
||||
{
|
||||
/* Induction PHIs are not cycles but walk the initial
|
||||
value. */
|
||||
value. Only for inner loops through, for outer loops
|
||||
we need to pick up the value from the actual PHIs
|
||||
to more easily support peeling and epilogue vectorization. */
|
||||
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
if (nested_in_vect_loop_p (loop, stmt_info))
|
||||
if (!nested_in_vect_loop_p (loop, stmt_info))
|
||||
skip_args[loop_preheader_edge (loop)->dest_idx] = true;
|
||||
else
|
||||
loop = loop->inner;
|
||||
skip_args[loop_latch_edge (loop)->dest_idx] = true;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue