tree-optimization/101668 - relax SLP of existing vectors
This relaxes the conditions on SLPing extracts from existing vectors leveraging the relaxed VEC_PERM conditions on the input vs output vector type compatibility. It also handles lowpart extracts and concats without VEC_PERMs now. 2022-05-25 Richard Biener <rguenther@suse.de> PR tree-optimization/101668 * tree-vect-slp.cc (vect_build_slp_tree_1): Allow BIT_FIELD_REFs for vector types with compatible lane types. (vect_build_slp_tree_2): Deal with this. (vect_add_slp_permutation): Adjust. Emit lowpart/concat special cases without VEC_PERM. (vectorizable_slp_permutation): Select the operand vector type and relax requirements. Handle identity permutes with mismatching operand types. * optabs-query.cc (can_vec_perm_const_p): Only allow variable permutes for op_mode == mode. * gcc.target/i386/pr101668.c: New testcase. * gcc.dg/vect/bb-slp-pr101668.c: Likewise.
This commit is contained in:
parent
4a6b8d9aad
commit
08afab6f86
4 changed files with 168 additions and 19 deletions
|
@ -426,7 +426,7 @@ can_vec_perm_const_p (machine_mode mode, machine_mode op_mode,
|
|||
return false;
|
||||
|
||||
/* It's probably cheaper to test for the variable case first. */
|
||||
if (allow_variable_p && selector_fits_mode_p (mode, sel))
|
||||
if (op_mode == mode && allow_variable_p && selector_fits_mode_p (mode, sel))
|
||||
{
|
||||
if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing)
|
||||
return true;
|
||||
|
|
59
gcc/testsuite/gcc.dg/vect/bb-slp-pr101668.c
Normal file
59
gcc/testsuite/gcc.dg/vect/bb-slp-pr101668.c
Normal file
|
@ -0,0 +1,59 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-w -Wno-psabi" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
typedef int v4si __attribute__((vector_size(16)));
|
||||
typedef int v8si __attribute__((vector_size(32)));
|
||||
|
||||
void __attribute__((noipa)) test_lo (v4si *dst, v8si src)
|
||||
{
|
||||
(*dst)[0] = src[0];
|
||||
(*dst)[1] = src[1];
|
||||
(*dst)[2] = src[2];
|
||||
(*dst)[3] = src[3];
|
||||
}
|
||||
|
||||
void __attribute__((noipa)) test_hi (v4si *dst, v8si src)
|
||||
{
|
||||
(*dst)[0] = src[4];
|
||||
(*dst)[1] = src[5];
|
||||
(*dst)[2] = src[6];
|
||||
(*dst)[3] = src[7];
|
||||
}
|
||||
|
||||
void __attribute__((noipa)) test_even (v4si *dst, v8si src)
|
||||
{
|
||||
(*dst)[0] = src[0];
|
||||
(*dst)[1] = src[2];
|
||||
(*dst)[2] = src[4];
|
||||
(*dst)[3] = src[6];
|
||||
}
|
||||
|
||||
void __attribute__((noipa)) test_odd (v4si *dst, v8si src)
|
||||
{
|
||||
(*dst)[0] = src[1];
|
||||
(*dst)[1] = src[3];
|
||||
(*dst)[2] = src[5];
|
||||
(*dst)[3] = src[7];
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
check_vect ();
|
||||
v8si v = (v8si) { 0, 1, 2, 3, 4, 5, 6, 7 };
|
||||
v4si dst;
|
||||
test_lo (&dst, v);
|
||||
if (dst[0] != 0 || dst[1] != 1 || dst[2] != 2 || dst[3] != 3)
|
||||
abort ();
|
||||
test_hi (&dst, v);
|
||||
if (dst[0] != 4 || dst[1] != 5 || dst[2] != 6 || dst[3] != 7)
|
||||
abort ();
|
||||
test_even (&dst, v);
|
||||
if (dst[0] != 0 || dst[1] != 2 || dst[2] != 4 || dst[3] != 6)
|
||||
abort ();
|
||||
test_odd (&dst, v);
|
||||
if (dst[0] != 1 || dst[1] != 3 || dst[2] != 5 || dst[3] != 7)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
27
gcc/testsuite/gcc.target/i386/pr101668.c
Normal file
27
gcc/testsuite/gcc.target/i386/pr101668.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
|
||||
|
||||
typedef int v16si __attribute__((vector_size (64)));
|
||||
typedef long long v8di __attribute__((vector_size (64)));
|
||||
|
||||
void
|
||||
bar_s32_s64 (v8di * dst, v16si src)
|
||||
{
|
||||
long long tem[8];
|
||||
tem[0] = src[0];
|
||||
tem[1] = src[1];
|
||||
tem[2] = src[2];
|
||||
tem[3] = src[3];
|
||||
tem[4] = src[4];
|
||||
tem[5] = src[5];
|
||||
tem[6] = src[6];
|
||||
tem[7] = src[7];
|
||||
dst[0] = *(v8di *) tem;
|
||||
}
|
||||
|
||||
/* We want to generate
|
||||
vpmovsxdq %ymm0, %zmm0
|
||||
vmovdqa64 %zmm0, (%rdi)
|
||||
ret
|
||||
*/
|
||||
/* { dg-final { scan-assembler "vpmovsxdq" } } */
|
|
@ -1086,8 +1086,13 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|
|||
tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
|
||||
if (!is_a <bb_vec_info> (vinfo)
|
||||
|| TREE_CODE (vec) != SSA_NAME
|
||||
|| !operand_equal_p (TYPE_SIZE (vectype),
|
||||
TYPE_SIZE (TREE_TYPE (vec))))
|
||||
/* When the element types are not compatible we pun the
|
||||
source to the target vectype which requires equal size. */
|
||||
|| ((!VECTOR_TYPE_P (TREE_TYPE (vec))
|
||||
|| !types_compatible_p (TREE_TYPE (vectype),
|
||||
TREE_TYPE (TREE_TYPE (vec))))
|
||||
&& !operand_equal_p (TYPE_SIZE (vectype),
|
||||
TYPE_SIZE (TREE_TYPE (vec)))))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
|
@ -1796,11 +1801,21 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
|
|||
lperm.safe_push (std::make_pair (0, (unsigned)lane));
|
||||
}
|
||||
slp_tree vnode = vect_create_new_slp_node (vNULL);
|
||||
/* ??? We record vectype here but we hide eventually necessary
|
||||
punning and instead rely on code generation to materialize
|
||||
VIEW_CONVERT_EXPRs as necessary. We instead should make
|
||||
this explicit somehow. */
|
||||
SLP_TREE_VECTYPE (vnode) = vectype;
|
||||
if (operand_equal_p (TYPE_SIZE (vectype), TYPE_SIZE (TREE_TYPE (vec))))
|
||||
/* ??? We record vectype here but we hide eventually necessary
|
||||
punning and instead rely on code generation to materialize
|
||||
VIEW_CONVERT_EXPRs as necessary. We instead should make
|
||||
this explicit somehow. */
|
||||
SLP_TREE_VECTYPE (vnode) = vectype;
|
||||
else
|
||||
{
|
||||
/* For different size but compatible elements we can still
|
||||
use VEC_PERM_EXPR without punning. */
|
||||
gcc_assert (VECTOR_TYPE_P (TREE_TYPE (vec))
|
||||
&& types_compatible_p (TREE_TYPE (vectype),
|
||||
TREE_TYPE (TREE_TYPE (vec))));
|
||||
SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec);
|
||||
}
|
||||
SLP_TREE_VEC_DEFS (vnode).safe_push (vec);
|
||||
/* We are always building a permutation node even if it is an identity
|
||||
permute to shield the rest of the vectorizer from the odd node
|
||||
|
@ -6900,7 +6915,8 @@ vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
/* ??? We SLP match existing vector element extracts but
|
||||
allow punning which we need to re-instantiate at uses
|
||||
but have no good way of explicitly representing. */
|
||||
if (!types_compatible_p (TREE_TYPE (first_def), vectype))
|
||||
if (operand_equal_p (TYPE_SIZE (TREE_TYPE (first_def)), TYPE_SIZE (vectype))
|
||||
&& !types_compatible_p (TREE_TYPE (first_def), vectype))
|
||||
{
|
||||
gassign *conv_stmt
|
||||
= gimple_build_assign (make_ssa_name (vectype),
|
||||
|
@ -6912,7 +6928,9 @@ vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
tree perm_dest = make_ssa_name (vectype);
|
||||
if (mask_vec)
|
||||
{
|
||||
if (!types_compatible_p (TREE_TYPE (second_def), vectype))
|
||||
if (operand_equal_p (TYPE_SIZE (TREE_TYPE (first_def)),
|
||||
TYPE_SIZE (vectype))
|
||||
&& !types_compatible_p (TREE_TYPE (second_def), vectype))
|
||||
{
|
||||
gassign *conv_stmt
|
||||
= gimple_build_assign (make_ssa_name (vectype),
|
||||
|
@ -6925,9 +6943,34 @@ vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
first_def, second_def,
|
||||
mask_vec);
|
||||
}
|
||||
else if (!types_compatible_p (TREE_TYPE (first_def), vectype))
|
||||
{
|
||||
/* For identity permutes we still need to handle the case
|
||||
of lowpart extracts or concats. */
|
||||
unsigned HOST_WIDE_INT c;
|
||||
auto first_def_nunits
|
||||
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (first_def));
|
||||
if (known_le (TYPE_VECTOR_SUBPARTS (vectype), first_def_nunits))
|
||||
{
|
||||
tree lowpart = build3 (BIT_FIELD_REF, vectype, first_def,
|
||||
TYPE_SIZE (vectype), bitsize_zero_node);
|
||||
perm_stmt = gimple_build_assign (perm_dest, lowpart);
|
||||
}
|
||||
else if (constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
|
||||
first_def_nunits, &c) && c == 2)
|
||||
{
|
||||
tree ctor = build_constructor_va (vectype, 2, NULL_TREE, first_def,
|
||||
NULL_TREE, second_def);
|
||||
perm_stmt = gimple_build_assign (perm_dest, ctor);
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else
|
||||
/* We need a copy here in case the def was external. */
|
||||
perm_stmt = gimple_build_assign (perm_dest, first_def);
|
||||
{
|
||||
/* We need a copy here in case the def was external. */
|
||||
perm_stmt = gimple_build_assign (perm_dest, first_def);
|
||||
}
|
||||
vect_finish_stmt_generation (vinfo, NULL, perm_stmt, gsi);
|
||||
/* Store the vector statement in NODE. */
|
||||
SLP_TREE_VEC_STMTS (node).quick_push (perm_stmt);
|
||||
|
@ -6950,21 +6993,32 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
tree vectype = SLP_TREE_VECTYPE (node);
|
||||
|
||||
/* ??? We currently only support all same vector input and output types
|
||||
/* ??? We currently only support all same vector input types
|
||||
while the SLP IL should really do a concat + select and thus accept
|
||||
arbitrary mismatches. */
|
||||
slp_tree child;
|
||||
unsigned i;
|
||||
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
bool repeating_p = multiple_p (nunits, SLP_TREE_LANES (node));
|
||||
tree op_vectype = NULL_TREE;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
if (SLP_TREE_VECTYPE (child))
|
||||
{
|
||||
op_vectype = SLP_TREE_VECTYPE (child);
|
||||
break;
|
||||
}
|
||||
if (!op_vectype)
|
||||
op_vectype = vectype;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
{
|
||||
if (!vect_maybe_update_slp_op_vectype (child, vectype)
|
||||
|| !types_compatible_p (SLP_TREE_VECTYPE (child), vectype))
|
||||
if ((SLP_TREE_DEF_TYPE (child) != vect_internal_def
|
||||
&& !vect_maybe_update_slp_op_vectype (child, op_vectype))
|
||||
|| !types_compatible_p (SLP_TREE_VECTYPE (child), op_vectype)
|
||||
|| !types_compatible_p (TREE_TYPE (vectype), TREE_TYPE (op_vectype)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"Unsupported lane permutation\n");
|
||||
"Unsupported vector types in lane permutation\n");
|
||||
return false;
|
||||
}
|
||||
if (SLP_TREE_LANES (child) != SLP_TREE_LANES (node))
|
||||
|
@ -7121,11 +7175,20 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
|
||||
if (index == count)
|
||||
{
|
||||
indices.new_vector (mask, second_vec.first == -1U ? 1 : 2, nunits);
|
||||
indices.new_vector (mask, second_vec.first == -1U ? 1 : 2,
|
||||
TYPE_VECTOR_SUBPARTS (op_vectype));
|
||||
bool identity_p = indices.series_p (0, 1, 0, 1);
|
||||
machine_mode vmode = TYPE_MODE (vectype);
|
||||
if (!identity_p
|
||||
&& !can_vec_perm_const_p (vmode, vmode, indices))
|
||||
machine_mode op_vmode = TYPE_MODE (op_vectype);
|
||||
unsigned HOST_WIDE_INT c;
|
||||
if ((!identity_p
|
||||
&& !can_vec_perm_const_p (vmode, op_vmode, indices))
|
||||
|| (identity_p
|
||||
&& !known_le (nunits,
|
||||
TYPE_VECTOR_SUBPARTS (op_vectype))
|
||||
&& (!constant_multiple_p (nunits,
|
||||
TYPE_VECTOR_SUBPARTS (op_vectype),
|
||||
&c) || c != 2)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue