code generate live lanes in basic-block vectorization

The following adds the capability to code-generate live lanes in
basic-block vectorization using lane extracts from vector stmts
rather than keeping the original scalar code around for those.
This eventually makes previously not profitable vectorizations
profitable (the live scalar code was appropriately costed so
are the lane extracts now), without considering the cost model
this patch doesn't add or remove any basic-block vectorization
capabilities.

The patch re/ab-uses STMT_VINFO_LIVE_P in basic-block vectorization
mode to tell whether a live lane is vectorized or whether it is
provided by means of keeping the scalar code live.

The patch is a first step towards vectorizing sequences of
stmts that do not end up in stores or vector constructors though.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

2020-09-04  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (vectorizable_live_operation): Adjust.
	* tree-vect-loop.c (vectorizable_live_operation): Vectorize
	live lanes out of basic-block vectorization nodes.
	* tree-vect-slp.c (vect_bb_slp_mark_live_stmts): New function.
	(vect_slp_analyze_operations): Analyze live lanes and their
	vectorization possibility after the whole SLP graph is final.
	(vect_bb_slp_scalar_cost): Adjust for vectorized live lanes.
	* tree-vect-stmts.c (can_vectorize_live_stmts): Adjust.
	(vect_transform_stmt): Call can_vectorize_live_stmts also for
	basic-block vectorization.

	* gcc.dg/vect/bb-slp-46.c: New testcase.
	* gcc.dg/vect/bb-slp-47.c: Likewise.
	* gcc.dg/vect/bb-slp-32.c: Adjust.
This commit is contained in:
Richard Biener 2020-09-04 15:33:19 +02:00
parent d30869a8d4
commit 095d42feed
7 changed files with 338 additions and 125 deletions

View file

@ -7,16 +7,21 @@ int foo (int *p, int a, int b)
{
int x[4];
int tem0, tem1, tem2, tem3;
int sum = 0;
tem0 = p[0] + 1 + a;
sum += tem0;
x[0] = tem0;
tem1 = p[1] + 2 + b;
sum += tem1;
x[1] = tem1;
tem2 = p[2] + 3 + b;
sum += tem2;
x[2] = tem2;
tem3 = p[3] + 4 + a;
sum += tem3;
x[3] = tem3;
bar (x);
return tem0 + tem1 + tem2 + tem3;
return sum;
}
/* { dg-final { scan-tree-dump "vectorization is not profitable" "slp2" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */

View file

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fdump-tree-optimized" } */
int a[4], b[4];
int foo ()
{
int tem0 = a[0] + b[0];
int temx = tem0 * 17; /* this fails without a real need */
int tem1 = a[1] + b[1];
int tem2 = a[2] + b[2];
int tem3 = a[3] + b[3];
int temy = tem3 * 13;
a[0] = tem0;
a[1] = tem1;
a[2] = tem2;
a[3] = tem3;
return temx + temy;
}
/* We should extract the live lane from the vectorized add rather than
keeping the original scalar add.
??? Because of a too conservative check we fail for temx here. */
/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */
/* { dg-final { scan-tree-dump "extracting lane for live stmt" "slp2" } } */
/* { dg-final { scan-tree-dump-times "extracting lane for live stmt" 2 "slp2" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times " \\+ " 3 "optimized" } } */
/* { dg-final { scan-tree-dump-times " \\+ " 2 "optimized" { xfail *-*-* } } } */

View file

@ -0,0 +1,14 @@
/* { dg-do compile } */
int bar();
int foo (int *a, int b, int c)
{
int tem0 = bar ();
int tem1 = tem0 + b;
int tem3 = tem1 + c;
a[0] = tem3;
a[1] = tem3 + 1;
a[2] = tem3 + 2;
a[3] = tem3 + 3;
return tem1;
}

View file

@ -8012,14 +8012,14 @@ vectorizable_induction (loop_vec_info loop_vinfo,
it can be supported. */
bool
vectorizable_live_operation (loop_vec_info loop_vinfo,
vectorizable_live_operation (vec_info *vinfo,
stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
slp_tree slp_node, slp_instance slp_node_instance,
int slp_index, bool vec_stmt_p,
stmt_vector_for_cost *)
stmt_vector_for_cost *cost_vec)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
imm_use_iterator imm_iter;
tree lhs, lhs_type, bitsize, vec_bitsize;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
@ -8064,10 +8064,6 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
return true;
}
/* FORNOW. CHECKME. */
if (nested_in_vect_loop_p (loop, stmt_info))
return false;
/* If STMT is not relevant and it is a simple assignment and its inputs are
invariant then it can remain in place, unvectorized. The original last
scalar value that it computes will be used. */
@ -8090,12 +8086,11 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
{
gcc_assert (slp_index >= 0);
int num_scalar = SLP_TREE_LANES (slp_node);
int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
/* Get the last occurrence of the scalar index from the concatenation of
all the slp vectors. Calculate which slp vector it is and the index
within. */
int num_scalar = SLP_TREE_LANES (slp_node);
int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
/* Calculate which vector contains the result, and which lane of
@ -8113,7 +8108,7 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
if (!vec_stmt_p)
{
/* No transformation required. */
if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
OPTIMIZE_FOR_SPEED))
@ -8150,14 +8145,20 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
1, vectype, NULL);
}
}
/* ??? Enable for loop costing as well. */
if (!loop_vinfo)
record_stmt_cost (cost_vec, 1, vec_to_scalar, stmt_info, NULL_TREE,
0, vect_epilogue);
return true;
}
/* Use the lhs of the original scalar statement. */
gimple *stmt = vect_orig_stmt (stmt_info)->stmt;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "extracting lane for live "
"stmt %G", stmt);
lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt)
: gimple_get_lhs (stmt);
lhs = gimple_get_lhs (stmt);
lhs_type = TREE_TYPE (lhs);
bitsize = vector_element_bits_tree (vectype);
@ -8165,16 +8166,14 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
/* Get the vectorized lhs of STMT and the lane to use (counted in bits). */
tree vec_lhs, bitstart;
gimple *vec_stmt;
if (slp_node)
{
gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
gcc_assert (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
/* Get the correct slp vectorized stmt. */
gimple *vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry];
if (gphi *phi = dyn_cast <gphi *> (vec_stmt))
vec_lhs = gimple_phi_result (phi);
else
vec_lhs = gimple_get_lhs (vec_stmt);
vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry];
vec_lhs = gimple_get_lhs (vec_stmt);
/* Get entry to use. */
bitstart = bitsize_int (vec_index);
@ -8183,102 +8182,158 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
else
{
/* For multiple copies, get the last copy. */
vec_lhs = gimple_get_lhs (STMT_VINFO_VEC_STMTS (stmt_info).last ());
vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info).last ();
vec_lhs = gimple_get_lhs (vec_stmt);
/* Get the last lane in the vector. */
bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
}
/* Ensure the VEC_LHS for lane extraction stmts satisfy loop-closed PHI
requirement, insert one phi node for it. It looks like:
loop;
BB:
# lhs' = PHI <lhs>
==>
loop;
BB:
# vec_lhs' = PHI <vec_lhs>
new_tree = lane_extract <vec_lhs', ...>;
lhs' = new_tree; */
basic_block exit_bb = single_exit (loop)->dest;
gcc_assert (single_pred_p (exit_bb));
tree vec_lhs_phi = copy_ssa_name (vec_lhs);
gimple *phi = create_phi_node (vec_lhs_phi, exit_bb);
SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, vec_lhs);
gimple_seq stmts = NULL;
tree new_tree;
if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
if (loop_vinfo)
{
/* Emit:
/* Ensure the VEC_LHS for lane extraction stmts satisfy loop-closed PHI
requirement, insert one phi node for it. It looks like:
loop;
BB:
# lhs' = PHI <lhs>
==>
loop;
BB:
# vec_lhs' = PHI <vec_lhs>
new_tree = lane_extract <vec_lhs', ...>;
lhs' = new_tree; */
SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK>
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block exit_bb = single_exit (loop)->dest;
gcc_assert (single_pred_p (exit_bb));
where VEC_LHS is the vectorized live-out result and MASK is
the loop mask for the final iteration. */
gcc_assert (ncopies == 1 && !slp_node);
tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo), 1,
vectype, 0);
tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
mask, vec_lhs_phi);
tree vec_lhs_phi = copy_ssa_name (vec_lhs);
gimple *phi = create_phi_node (vec_lhs_phi, exit_bb);
SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, vec_lhs);
/* Convert the extracted vector element to the required scalar type. */
new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
}
else
{
tree bftype = TREE_TYPE (vectype);
if (VECTOR_BOOLEAN_TYPE_P (vectype))
bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs_phi, bitsize, bitstart);
new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
&stmts, true, NULL_TREE);
}
if (stmts)
{
gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb);
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
/* Remove existing phi from lhs and create one copy from new_tree. */
tree lhs_phi = NULL_TREE;
gimple_stmt_iterator gsi;
for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
gimple_seq stmts = NULL;
tree new_tree;
if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
{
gimple *phi = gsi_stmt (gsi);
if ((gimple_phi_arg_def (phi, 0) == lhs))
{
remove_phi_node (&gsi, false);
lhs_phi = gimple_phi_result (phi);
gimple *copy = gimple_build_assign (lhs_phi, new_tree);
gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT);
break;
}
}
}
/* Emit:
/* Replace use of lhs with newly computed result. If the use stmt is a
single arg PHI, just replace all uses of PHI result. It's necessary
because lcssa PHI defining lhs may be before newly inserted stmt. */
use_operand_p use_p;
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
&& !is_gimple_debug (use_stmt))
{
if (gimple_code (use_stmt) == GIMPLE_PHI
&& gimple_phi_num_args (use_stmt) == 1)
{
replace_uses_by (gimple_phi_result (use_stmt), new_tree);
SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK>
where VEC_LHS is the vectorized live-out result and MASK is
the loop mask for the final iteration. */
gcc_assert (ncopies == 1 && !slp_node);
tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo),
1, vectype, 0);
tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
mask, vec_lhs_phi);
/* Convert the extracted vector element to the scalar type. */
new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
}
else
{
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, new_tree);
tree bftype = TREE_TYPE (vectype);
if (VECTOR_BOOLEAN_TYPE_P (vectype))
bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
new_tree = build3 (BIT_FIELD_REF, bftype,
vec_lhs_phi, bitsize, bitstart);
new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
&stmts, true, NULL_TREE);
}
update_stmt (use_stmt);
if (stmts)
{
gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb);
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
/* Remove existing phi from lhs and create one copy from new_tree. */
tree lhs_phi = NULL_TREE;
gimple_stmt_iterator gsi;
for (gsi = gsi_start_phis (exit_bb);
!gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *phi = gsi_stmt (gsi);
if ((gimple_phi_arg_def (phi, 0) == lhs))
{
remove_phi_node (&gsi, false);
lhs_phi = gimple_phi_result (phi);
gimple *copy = gimple_build_assign (lhs_phi, new_tree);
gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT);
break;
}
}
}
/* Replace use of lhs with newly computed result. If the use stmt is a
single arg PHI, just replace all uses of PHI result. It's necessary
because lcssa PHI defining lhs may be before newly inserted stmt. */
use_operand_p use_p;
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
&& !is_gimple_debug (use_stmt))
{
if (gimple_code (use_stmt) == GIMPLE_PHI
&& gimple_phi_num_args (use_stmt) == 1)
{
replace_uses_by (gimple_phi_result (use_stmt), new_tree);
}
else
{
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, new_tree);
}
update_stmt (use_stmt);
}
}
else
{
/* For basic-block vectorization simply insert the lane-extraction. */
tree bftype = TREE_TYPE (vectype);
if (VECTOR_BOOLEAN_TYPE_P (vectype))
bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
tree new_tree = build3 (BIT_FIELD_REF, bftype,
vec_lhs, bitsize, bitstart);
gimple_seq stmts = NULL;
new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
&stmts, true, NULL_TREE);
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
/* Replace use of lhs with newly computed result. If the use stmt is a
single arg PHI, just replace all uses of PHI result. It's necessary
because lcssa PHI defining lhs may be before newly inserted stmt. */
use_operand_p use_p;
stmt_vec_info use_stmt_info;
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
if (!is_gimple_debug (use_stmt)
&& (!(use_stmt_info = vinfo->lookup_stmt (use_stmt))
|| !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info))))
{
/* ??? This can happen when the live lane ends up being
used in a vector construction code-generated by an
external SLP node (and code-generation for that already
happened). See gcc.dg/vect/bb-slp-47.c.
Doing this is what would happen if that vector CTOR
were not code-generated yet so it is not too bad.
??? In fact we'd likely want to avoid this situation
in the first place. */
if (gimple_code (use_stmt) != GIMPLE_PHI
&& !vect_stmt_dominates_stmt_p (gsi_stmt (*gsi), use_stmt))
{
gcc_assert (is_gimple_assign (use_stmt)
&& gimple_assign_rhs_code (use_stmt) == CONSTRUCTOR);
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Using original scalar computation for "
"live lane because use preceeds vector "
"def\n");
continue;
}
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, new_tree);
update_stmt (use_stmt);
}
}
return true;

View file

@ -2970,6 +2970,101 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
}
/* Mark lanes of NODE that are live outside of the basic-block vectorized
region and that can be vectorized using vectorizable_live_operation
with STMT_VINFO_LIVE_P. Not handled live operations will cause the
scalar code computing it to be retained. */
static void
vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
slp_instance instance,
stmt_vector_for_cost *cost_vec,
hash_set<stmt_vec_info> &svisited)
{
unsigned i;
stmt_vec_info stmt_info;
stmt_vec_info last_stmt = vect_find_last_scalar_stmt_in_slp (node);
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
{
stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
if (svisited.contains (orig_stmt_info))
continue;
bool mark_visited = true;
gimple *orig_stmt = orig_stmt_info->stmt;
ssa_op_iter op_iter;
def_operand_p def_p;
FOR_EACH_SSA_DEF_OPERAND (def_p, orig_stmt, op_iter, SSA_OP_DEF)
{
imm_use_iterator use_iter;
gimple *use_stmt;
stmt_vec_info use_stmt_info;
FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
if (!is_gimple_debug (use_stmt))
{
use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
if (!use_stmt_info
|| !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
{
STMT_VINFO_LIVE_P (stmt_info) = true;
if (vectorizable_live_operation (bb_vinfo, stmt_info,
NULL, node, instance, i,
false, cost_vec))
/* ??? So we know we can vectorize the live stmt
from one SLP node. If we cannot do so from all
or none consistently we'd have to record which
SLP node (and lane) we want to use for the live
operation. So make sure we can code-generate
from all nodes. */
mark_visited = false;
else
STMT_VINFO_LIVE_P (stmt_info) = false;
BREAK_FROM_IMM_USE_STMT (use_iter);
}
}
/* We have to verify whether we can insert the lane extract
before all uses. The following is a conservative approximation.
We cannot put this into vectorizable_live_operation because
iterating over all use stmts from inside a FOR_EACH_IMM_USE_STMT
doesn't work.
Note that while the fact that we emit code for loads at the
first load should make this a non-problem leafs we construct
from scalars are vectorized after the last scalar def.
??? If we'd actually compute the insert location during
analysis we could use sth less conservative than the last
scalar stmt in the node for the dominance check. */
/* ??? What remains is "live" uses in vector CTORs in the same
SLP graph which is where those uses can end up code-generated
right after their definition instead of close to their original
use. But that would restrict us to code-generate lane-extracts
from the latest stmt in a node. So we compensate for this
during code-generation, simply not replacing uses for those
hopefully rare cases. */
if (STMT_VINFO_LIVE_P (stmt_info))
FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
if (!is_gimple_debug (use_stmt)
&& (!(use_stmt_info = bb_vinfo->lookup_stmt (use_stmt))
|| !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
&& !vect_stmt_dominates_stmt_p (last_stmt->stmt, use_stmt))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Cannot determine insertion place for "
"lane extract\n");
STMT_VINFO_LIVE_P (stmt_info) = false;
mark_visited = true;
}
}
if (mark_visited)
svisited.add (orig_stmt_info);
}
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance,
cost_vec, svisited);
}
/* Analyze statements in SLP instances of VINFO. Return true if the
operations are supported. */
@ -3019,6 +3114,19 @@ vect_slp_analyze_operations (vec_info *vinfo)
}
}
/* Compute vectorizable live stmts. */
if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
{
hash_set<stmt_vec_info> svisited;
stmt_vector_for_cost cost_vec;
cost_vec.create (2);
for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
instance, &cost_vec, svisited);
add_stmt_costs (vinfo, vinfo->target_cost_data, &cost_vec);
cost_vec.release ();
}
return !vinfo->slp_instances.is_empty ();
}
@ -3048,31 +3156,36 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
if ((*life)[i])
continue;
stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
gimple *orig_stmt = orig_stmt_info->stmt;
/* If there is a non-vectorized use of the defs then the scalar
stmt is kept live in which case we do not account it or any
required defs in the SLP children in the scalar cost. This
way we make the vectorization more costly when compared to
the scalar cost. */
stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
gimple *orig_stmt = orig_stmt_info->stmt;
FOR_EACH_SSA_DEF_OPERAND (def_p, orig_stmt, op_iter, SSA_OP_DEF)
if (!STMT_VINFO_LIVE_P (stmt_info))
{
imm_use_iterator use_iter;
gimple *use_stmt;
FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
if (!is_gimple_debug (use_stmt))
{
stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
if (!use_stmt_info
|| !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
FOR_EACH_SSA_DEF_OPERAND (def_p, orig_stmt, op_iter, SSA_OP_DEF)
{
imm_use_iterator use_iter;
gimple *use_stmt;
FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
if (!is_gimple_debug (use_stmt))
{
(*life)[i] = true;
BREAK_FROM_IMM_USE_STMT (use_iter);
stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
if (!use_stmt_info
|| !PURE_SLP_STMT
(vect_stmt_to_vectorize (use_stmt_info)))
{
(*life)[i] = true;
BREAK_FROM_IMM_USE_STMT (use_iter);
}
}
}
}
if ((*life)[i])
continue;
}
if ((*life)[i])
continue;
/* Count scalar stmts only once. */
if (gimple_visited_p (orig_stmt))

View file

@ -10532,7 +10532,7 @@ vectorizable_comparison (vec_info *vinfo,
GSI and VEC_STMT_P are as for vectorizable_live_operation. */
static bool
can_vectorize_live_stmts (loop_vec_info loop_vinfo,
can_vectorize_live_stmts (vec_info *vinfo,
stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
slp_tree slp_node, slp_instance slp_node_instance,
bool vec_stmt_p,
@ -10545,7 +10545,7 @@ can_vectorize_live_stmts (loop_vec_info loop_vinfo,
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
{
if (STMT_VINFO_LIVE_P (slp_stmt_info)
&& !vectorizable_live_operation (loop_vinfo,
&& !vectorizable_live_operation (vinfo,
slp_stmt_info, gsi, slp_node,
slp_node_instance, i,
vec_stmt_p, cost_vec))
@ -10553,7 +10553,7 @@ can_vectorize_live_stmts (loop_vec_info loop_vinfo,
}
}
else if (STMT_VINFO_LIVE_P (stmt_info)
&& !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
&& !vectorizable_live_operation (vinfo, stmt_info, gsi,
slp_node, slp_node_instance, -1,
vec_stmt_p, cost_vec))
return false;
@ -10923,10 +10923,8 @@ vect_transform_stmt (vec_info *vinfo,
/* Handle stmts whose DEF is used outside the loop-nest that is
being vectorized. */
if (is_a <loop_vec_info> (vinfo))
done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
stmt_info, gsi, slp_node,
slp_node_instance, true, NULL);
done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
slp_node_instance, true, NULL);
gcc_assert (done);
return false;

View file

@ -1990,7 +1990,7 @@ extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
extern class loop *vect_transform_loop (loop_vec_info, gimple *);
extern opt_loop_vec_info vect_analyze_loop_form (class loop *,
vec_info_shared *);
extern bool vectorizable_live_operation (loop_vec_info,
extern bool vectorizable_live_operation (vec_info *,
stmt_vec_info, gimple_stmt_iterator *,
slp_tree, slp_instance, int,
bool, stmt_vector_for_cost *);