tree-optimization/96920 - another ICE when vectorizing nested cycles

This refines the previous fix for PR96698 by re-doing how and where
we arrange for setting vectorized cycle PHI backedge values.

2020-09-04  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/96698
	PR tree-optimization/96920
	* tree-vectorizer.h (loop_vec_info::reduc_latch_defs): Remove.
	(loop_vec_info::reduc_latch_slp_defs): Likewise.
	* tree-vect-stmts.c (vect_transform_stmt): Remove vectorized
	cycle PHI latch code.
	* tree-vect-loop.c (maybe_set_vectorized_backedge_value): New
	helper to set vectorized cycle PHI latch values.
	(vect_transform_loop): Walk over all PHIs again after
	vectorizing them, calling maybe_set_vectorized_backedge_value.
	Call maybe_set_vectorized_backedge_value for each vectorized
	stmt.  Remove delayed update code.
	* tree-vect-slp.c (vect_analyze_slp_instance): Initialize
	SLP instance reduc_phis member.
	(vect_schedule_slp): Set vectorized cycle PHI latch values.

	* gfortran.dg/vect/pr96920.f90: New testcase.
	* gcc.dg/vect/pr96920.c: Likewise.
This commit is contained in:
Richard Biener 2020-09-04 14:35:39 +02:00
parent 09fa6acd8d
commit 46a58c779a
6 changed files with 144 additions and 67 deletions

View file

@ -0,0 +1,20 @@
/* { dg-do compile } */
int a[1024];
int b[2048];
void foo (int x, int y)
{
for (int i = 0; i < 1024; ++i)
{
int tem0 = b[2*i];
int tem1 = b[2*i+1];
for (int j = 0; j < 32; ++j)
{
int tem = tem0;
tem0 = tem1;
tem1 = tem;
a[i] += tem0;
}
}
}

View file

@ -0,0 +1,37 @@
! { dg-do compile }
subroutine ice(npoint, nterm, x, g)
implicit none
integer norder
parameter (norder=10)
integer j
integer k
integer ii
integer nterm
integer npoint
real b(norder)
real c(norder)
real d(norder)
real x(npoint)
real g(npoint)
real gg
real prev
real prev2
j = 1
100 continue
j = j+1
if (nterm == j) then
do ii=1,npoint
k = nterm
gg= d(k)
prev= 0.0
do k=k-1,1,-1
prev2= prev
prev= gg
gg = d(k)+(x(ii)-b(k))*prev-c(k+1)*prev2
enddo
g(ii) = gg
enddo
endif
go to 100
end

View file

@ -8555,6 +8555,42 @@ scale_profile_for_vect_loop (class loop *loop, unsigned vf)
scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob); scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
} }
/* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI
latch edge values originally defined by it. */
static void
maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
stmt_vec_info def_stmt_info)
{
tree def = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt);
if (!def || TREE_CODE (def) != SSA_NAME)
return;
stmt_vec_info phi_info;
imm_use_iterator iter;
use_operand_p use_p;
FOR_EACH_IMM_USE_FAST (use_p, iter, def)
if (gphi *phi = dyn_cast <gphi *> (USE_STMT (use_p)))
if (gimple_bb (phi)->loop_father->header == gimple_bb (phi)
&& (phi_info = loop_vinfo->lookup_stmt (phi))
&& VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
&& STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
&& STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
{
loop_p loop = gimple_bb (phi)->loop_father;
edge e = loop_latch_edge (loop);
if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def)
{
vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
gcc_assert (phi_defs.length () == latch_defs.length ());
for (unsigned i = 0; i < phi_defs.length (); ++i)
add_phi_arg (as_a <gphi *> (phi_defs[i]),
gimple_get_lhs (latch_defs[i]), e,
gimple_phi_arg_location (phi, e->dest_idx));
}
}
}
/* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI. /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
When vectorizing STMT_INFO as a store, set *SEEN_STORE to its When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
stmt_vec_info. */ stmt_vec_info. */
@ -8933,7 +8969,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
gsi_next (&si)) gsi_next (&si))
{ {
gphi *phi = si.phi (); gphi *phi = si.phi ();
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, dump_printf_loc (MSG_NOTE, vect_location,
@ -8968,6 +9004,27 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
} }
} }
for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
gsi_next (&si))
{
gphi *phi = si.phi ();
stmt_info = loop_vinfo->lookup_stmt (phi);
if (!stmt_info)
continue;
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
continue;
if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
&& ! PURE_SLP_STMT (stmt_info))
maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info);
}
for (gimple_stmt_iterator si = gsi_start_bb (bb); for (gimple_stmt_iterator si = gsi_start_bb (bb);
!gsi_end_p (si);) !gsi_end_p (si);)
{ {
@ -9005,9 +9062,16 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
= STMT_VINFO_RELATED_STMT (stmt_info); = STMT_VINFO_RELATED_STMT (stmt_info);
vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si, vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
&seen_store); &seen_store);
maybe_set_vectorized_backedge_value (loop_vinfo,
pat_stmt_info);
}
else
{
vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
&seen_store);
maybe_set_vectorized_backedge_value (loop_vinfo,
stmt_info);
} }
vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
&seen_store);
} }
gsi_next (&si); gsi_next (&si);
if (seen_store) if (seen_store)
@ -9025,38 +9089,6 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
} }
} }
/* Fill in backedge defs of reductions. */
for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i)
{
stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i];
stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
vec<gimple *> &phi_info
= STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info));
vec<gimple *> &vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info);
gcc_assert (phi_info.length () == vec_stmt.length ());
gphi *phi
= dyn_cast <gphi *> (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt);
edge e = loop_latch_edge (gimple_bb (phi_info[0])->loop_father);
for (unsigned j = 0; j < phi_info.length (); ++j)
add_phi_arg (as_a <gphi *> (phi_info[j]),
gimple_get_lhs (vec_stmt[j]), e,
gimple_phi_arg_location (phi, e->dest_idx));
}
for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i)
{
slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first;
slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second;
gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
e = loop_latch_edge (gimple_bb (phi)->loop_father);
gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
== SLP_TREE_VEC_STMTS (slp_node).length ());
for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]),
vect_get_slp_vect_def (slp_node, j),
e, gimple_phi_arg_location (phi, e->dest_idx));
}
/* Stub out scalar statements that must not survive vectorization. /* Stub out scalar statements that must not survive vectorization.
Doing this here helps with grouped statements, or statements that Doing this here helps with grouped statements, or statements that
are involved in patterns. */ are involved in patterns. */

View file

@ -2244,6 +2244,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_LOADS (new_instance) = vNULL; SLP_INSTANCE_LOADS (new_instance) = vNULL;
SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL; SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
new_instance->reduc_phis = NULL;
vect_gather_slp_loads (new_instance, node); vect_gather_slp_loads (new_instance, node);
if (dump_enabled_p ()) if (dump_enabled_p ())
@ -4565,6 +4566,25 @@ vect_schedule_slp (vec_info *vinfo)
stmt_vec_info store_info; stmt_vec_info store_info;
unsigned int j; unsigned int j;
/* For reductions set the latch values of the vectorized PHIs. */
if (instance->reduc_phis
&& STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE
(instance->reduc_phis)) != FOLD_LEFT_REDUCTION
&& STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE
(instance->reduc_phis)) != EXTRACT_LAST_REDUCTION)
{
slp_tree slp_node = root;
slp_tree phi_node = instance->reduc_phis;
gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
== SLP_TREE_VEC_STMTS (slp_node).length ());
for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]),
vect_get_slp_vect_def (slp_node, j),
e, gimple_phi_arg_location (phi, e->dest_idx));
}
/* Remove scalar call stmts. Do not do this for basic-block /* Remove scalar call stmts. Do not do this for basic-block
vectorization as not all uses may be vectorized. vectorization as not all uses may be vectorized.
??? Why should this be necessary? DCE should be able to ??? Why should this be necessary? DCE should be able to

View file

@ -10921,33 +10921,6 @@ vect_transform_stmt (vec_info *vinfo,
if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
return is_store; return is_store;
/* If this stmt defines a value used on a backedge, record it so
we can update the vectorized PHIs later. */
stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
stmt_vec_info reduc_info;
if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
&& vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
&& (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
&& STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
&& STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
{
gphi *phi;
edge e;
if (!slp_node
&& (phi = dyn_cast <gphi *>
(STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
&& dominated_by_p (CDI_DOMINATORS,
gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
&& (e = loop_latch_edge (gimple_bb (phi)->loop_father))
&& (PHI_ARG_DEF_FROM_EDGE (phi, e)
== gimple_get_lhs (orig_stmt_info->stmt)))
as_a <loop_vec_info> (vinfo)->reduc_latch_defs.safe_push (stmt_info);
else if (slp_node
&& slp_node != slp_node_instance->reduc_phis)
as_a <loop_vec_info> (vinfo)->reduc_latch_slp_defs.safe_push
(std::make_pair (slp_node, slp_node_instance->reduc_phis));
}
/* Handle stmts whose DEF is used outside the loop-nest that is /* Handle stmts whose DEF is used outside the loop-nest that is
being vectorized. */ being vectorized. */
if (is_a <loop_vec_info> (vinfo)) if (is_a <loop_vec_info> (vinfo))

View file

@ -627,11 +627,6 @@ public:
stmt in the chain. */ stmt in the chain. */
auto_vec<stmt_vec_info> reduction_chains; auto_vec<stmt_vec_info> reduction_chains;
/* The vectorized stmts defining the latch values of the reduction
they are involved with. */
auto_vec<stmt_vec_info> reduc_latch_defs;
auto_vec<std::pair<slp_tree, slp_tree> > reduc_latch_slp_defs;
/* Cost vector for a single scalar iteration. */ /* Cost vector for a single scalar iteration. */
auto_vec<stmt_info_for_cost> scalar_cost_vec; auto_vec<stmt_info_for_cost> scalar_cost_vec;