re PR tree-optimization/71488 (Wrong code for vector comparisons with ivybridge and westmere targets)
gcc/ PR middle-end/71488 * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Support comparison of boolean vectors. * tree-vect-stmts.c (vectorizable_comparison): Vectorize comparison of boolean vectors using bitwise operations. gcc/testsuite/ PR middle-end/71488 * g++.dg/pr71488.C: New test. * gcc.dg/vect/vect-bool-cmp.c: New test. From-SVN: r237706
This commit is contained in:
parent
79c94ff796
commit
49e76ff114
6 changed files with 383 additions and 6 deletions
|
@ -1,3 +1,11 @@
|
|||
2016-06-22 Ilya Enkovich <ilya.enkovich@intel.com>
|
||||
|
||||
PR middle-end/71488
|
||||
* tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Support
|
||||
comparison of boolean vectors.
|
||||
* tree-vect-stmts.c (vectorizable_comparison): Vectorize comparison
|
||||
of boolean vectors using bitwise operations.
|
||||
|
||||
2016-06-22 Andreas Schwab <schwab@suse.de>
|
||||
|
||||
* config/aarch64/aarch64-protos.h (aarch64_elf_asm_named_section):
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2016-06-22 Ilya Enkovich <ilya.enkovich@intel.com>
|
||||
|
||||
PR middle-end/71488
|
||||
* g++.dg/pr71488.C: New test.
|
||||
* gcc.dg/vect/vect-bool-cmp.c: New test.
|
||||
|
||||
2016-06-22 Eric Botcazou <ebotcazou@adacore.com>
|
||||
|
||||
* gcc.dg/guality/param-5.c: New test.
|
||||
|
|
24
gcc/testsuite/g++.dg/pr71488.C
Normal file
24
gcc/testsuite/g++.dg/pr71488.C
Normal file
|
@ -0,0 +1,24 @@
|
|||
// PR middle-end/71488
|
||||
// { dg-do run }
|
||||
// { dg-options "-O3 -std=c++11" }
|
||||
// { dg-additional-options "-march=westmere" { target i?86-*-* x86_64-*-* } }
|
||||
// { dg-require-effective-target c++11 }
|
||||
|
||||
#include <valarray>
|
||||
|
||||
int var_4 = 1;
|
||||
long long var_9 = 0;
|
||||
|
||||
int main() {
|
||||
|
||||
std::valarray<std::valarray<long long>> v10;
|
||||
|
||||
v10.resize(1);
|
||||
v10[0].resize(4);
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
v10[0][i] = ((var_9 == 0) > unsigned (var_4 == 0)) + (var_9 == 0);
|
||||
|
||||
if (v10[0][0] != 2)
|
||||
__builtin_abort ();
|
||||
}
|
252
gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c
Normal file
252
gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c
Normal file
|
@ -0,0 +1,252 @@
|
|||
/* PR71488 */
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-require-effective-target vect_pack_trunc } */
|
||||
/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
|
||||
|
||||
int i1, i2;
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn1 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn2 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn3 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn4 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn5 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn6 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn7 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn8 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn9 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn10 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn11 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn12 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn13 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn14 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn15 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn16 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn17 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
void __attribute__((noclone,noinline))
|
||||
fn18 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
|
||||
}
|
||||
|
||||
int eq (int i1, int i2) { return i1 == i2; }
|
||||
int ne (int i1, int i2) { return i1 != i2; }
|
||||
int lt (int i1, int i2) { return i1 < i2; }
|
||||
int le (int i1, int i2) { return i1 <= i2; }
|
||||
int gt (int i1, int i2) { return i1 > i2; }
|
||||
int ge (int i1, int i2) { return i1 >= i2; }
|
||||
|
||||
typedef int (*cmp_fn)(int, int);
|
||||
|
||||
void
|
||||
check (int *p, cmp_fn fn)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int t1 = ((i % 4) > 1) == 0;
|
||||
int t2 = (i % 2) == 0;
|
||||
int res = fn (t1, t2) + t1;
|
||||
if (p[i] != res)
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
int i1[32], i2[32], res[32];
|
||||
short s2[32];
|
||||
long long l2[32];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
l2[i] = i2[i] = s2[i] = i % 2;
|
||||
i1[i] = (i % 4) > 1;
|
||||
asm ("":::"memory");
|
||||
}
|
||||
|
||||
fn1 (res, i1, i2, 32);
|
||||
check (res, gt);
|
||||
fn2 (res, i1, s2, 32);
|
||||
check (res, gt);
|
||||
fn3 (res, i1, l2, 32);
|
||||
check (res, gt);
|
||||
|
||||
fn4 (res, i1, i2, 32);
|
||||
check (res, ge);
|
||||
fn5 (res, i1, s2, 32);
|
||||
check (res, ge);
|
||||
fn6 (res, i1, l2, 32);
|
||||
check (res, ge);
|
||||
|
||||
fn7 (res, i1, i2, 32);
|
||||
check (res, lt);
|
||||
fn8 (res, i1, s2, 32);
|
||||
check (res, lt);
|
||||
fn9 (res, i1, l2, 32);
|
||||
check (res, lt);
|
||||
|
||||
fn10 (res, i1, i2, 32);
|
||||
check (res, le);
|
||||
fn11 (res, i1, s2, 32);
|
||||
check (res, le);
|
||||
fn12 (res, i1, l2, 32);
|
||||
check (res, le);
|
||||
|
||||
fn13 (res, i1, i2, 32);
|
||||
check (res, eq);
|
||||
fn14 (res, i1, s2, 32);
|
||||
check (res, eq);
|
||||
fn15 (res, i1, l2, 32);
|
||||
check (res, eq);
|
||||
|
||||
fn16 (res, i1, i2, 32);
|
||||
check (res, ne);
|
||||
fn17 (res, i1, s2, 32);
|
||||
check (res, ne);
|
||||
fn18 (res, i1, l2, 32);
|
||||
check (res, ne);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "VECTORIZED" 18 "vect" { target { i?86-*-* x86_64-*-* } } } } */
|
|
@ -3763,7 +3763,8 @@ vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
|
|||
|
||||
if (rhs_code != BIT_IOR_EXPR
|
||||
&& rhs_code != BIT_XOR_EXPR
|
||||
&& rhs_code != BIT_AND_EXPR)
|
||||
&& rhs_code != BIT_AND_EXPR
|
||||
&& TREE_CODE_CLASS (rhs_code) != tcc_comparison)
|
||||
return NULL;
|
||||
|
||||
rhs2 = gimple_assign_rhs2 (last_stmt);
|
||||
|
|
|
@ -7756,7 +7756,7 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
|
||||
unsigned nunits;
|
||||
int ncopies;
|
||||
enum tree_code code;
|
||||
enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
|
||||
stmt_vec_info prev_stmt_info = NULL;
|
||||
int i, j;
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
|
@ -7829,11 +7829,74 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
|
||||
return false;
|
||||
|
||||
/* Can't compare mask and non-mask types. */
|
||||
if (vectype1 && vectype2
|
||||
&& (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
|
||||
return false;
|
||||
|
||||
/* Boolean values may have another representation in vectors
|
||||
and therefore we prefer bit operations over comparison for
|
||||
them (which also works for scalar masks). We store opcodes
|
||||
to use in bitop1 and bitop2. Statement is vectorized as
|
||||
BITOP2 (rhs1 BITOP1 rhs2) or
|
||||
rhs1 BITOP2 (BITOP1 rhs2)
|
||||
depending on bitop1 and bitop2 arity. */
|
||||
if (VECTOR_BOOLEAN_TYPE_P (vectype))
|
||||
{
|
||||
if (code == GT_EXPR)
|
||||
{
|
||||
bitop1 = BIT_NOT_EXPR;
|
||||
bitop2 = BIT_AND_EXPR;
|
||||
}
|
||||
else if (code == GE_EXPR)
|
||||
{
|
||||
bitop1 = BIT_NOT_EXPR;
|
||||
bitop2 = BIT_IOR_EXPR;
|
||||
}
|
||||
else if (code == LT_EXPR)
|
||||
{
|
||||
bitop1 = BIT_NOT_EXPR;
|
||||
bitop2 = BIT_AND_EXPR;
|
||||
std::swap (rhs1, rhs2);
|
||||
}
|
||||
else if (code == LE_EXPR)
|
||||
{
|
||||
bitop1 = BIT_NOT_EXPR;
|
||||
bitop2 = BIT_IOR_EXPR;
|
||||
std::swap (rhs1, rhs2);
|
||||
}
|
||||
else
|
||||
{
|
||||
bitop1 = BIT_XOR_EXPR;
|
||||
if (code == EQ_EXPR)
|
||||
bitop2 = BIT_NOT_EXPR;
|
||||
}
|
||||
}
|
||||
|
||||
if (!vec_stmt)
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
|
||||
vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
|
||||
return expand_vec_cmp_expr_p (vectype, mask_type);
|
||||
vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
|
||||
dts, NULL, NULL);
|
||||
if (bitop1 == NOP_EXPR)
|
||||
return expand_vec_cmp_expr_p (vectype, mask_type);
|
||||
else
|
||||
{
|
||||
machine_mode mode = TYPE_MODE (vectype);
|
||||
optab optab;
|
||||
|
||||
optab = optab_for_tree_code (bitop1, vectype, optab_default);
|
||||
if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
|
||||
return false;
|
||||
|
||||
if (bitop2 != NOP_EXPR)
|
||||
{
|
||||
optab = optab_for_tree_code (bitop2, vectype, optab_default);
|
||||
if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Transform. */
|
||||
|
@ -7890,8 +7953,31 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
vec_rhs2 = vec_oprnds1[i];
|
||||
|
||||
new_temp = make_ssa_name (mask);
|
||||
new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
if (bitop1 == NOP_EXPR)
|
||||
{
|
||||
new_stmt = gimple_build_assign (new_temp, code,
|
||||
vec_rhs1, vec_rhs2);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bitop1 == BIT_NOT_EXPR)
|
||||
new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
|
||||
else
|
||||
new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
|
||||
vec_rhs2);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
if (bitop2 != NOP_EXPR)
|
||||
{
|
||||
tree res = make_ssa_name (mask);
|
||||
if (bitop2 == BIT_NOT_EXPR)
|
||||
new_stmt = gimple_build_assign (res, bitop2, new_temp);
|
||||
else
|
||||
new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
|
||||
new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
}
|
||||
}
|
||||
if (slp_node)
|
||||
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue