re PR tree-optimization/71488 (Wrong code for vector comparisons with ivybridge and westmere targets)

gcc/

	PR middle-end/71488
	* tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Support
	comparison of boolean vectors.
	* tree-vect-stmts.c (vectorizable_comparison): Vectorize comparison
	of boolean vectors using bitwise operations.

gcc/testsuite/

	PR middle-end/71488
	* g++.dg/pr71488.C: New test.
	* gcc.dg/vect/vect-bool-cmp.c: New test.

From-SVN: r237706
This commit is contained in:
Ilya Enkovich 2016-06-22 14:05:55 +00:00 committed by Ilya Enkovich
parent 79c94ff796
commit 49e76ff114
6 changed files with 383 additions and 6 deletions

View file

@ -1,3 +1,11 @@
2016-06-22 Ilya Enkovich <ilya.enkovich@intel.com>
PR middle-end/71488
* tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Support
comparison of boolean vectors.
* tree-vect-stmts.c (vectorizable_comparison): Vectorize comparison
of boolean vectors using bitwise operations.
2016-06-22 Andreas Schwab <schwab@suse.de>
* config/aarch64/aarch64-protos.h (aarch64_elf_asm_named_section):

View file

@ -1,3 +1,9 @@
2016-06-22 Ilya Enkovich <ilya.enkovich@intel.com>
PR middle-end/71488
* g++.dg/pr71488.C: New test.
* gcc.dg/vect/vect-bool-cmp.c: New test.
2016-06-22 Eric Botcazou <ebotcazou@adacore.com>
* gcc.dg/guality/param-5.c: New test.

View file

@ -0,0 +1,24 @@
// PR middle-end/71488
// { dg-do run }
// { dg-options "-O3 -std=c++11" }
// { dg-additional-options "-march=westmere" { target i?86-*-* x86_64-*-* } }
// { dg-require-effective-target c++11 }
#include <valarray>
int var_4 = 1;
long long var_9 = 0;
int main() {
std::valarray<std::valarray<long long>> v10;
v10.resize(1);
v10[0].resize(4);
for (int i = 0; i < 4; i++)
v10[0][i] = ((var_9 == 0) > unsigned (var_4 == 0)) + (var_9 == 0);
if (v10[0][0] != 2)
__builtin_abort ();
}

View file

@ -0,0 +1,252 @@
/* PR71488 */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_pack_trunc } */
/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
int i1, i2;
void __attribute__((noclone,noinline))
fn1 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn2 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn3 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn4 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn5 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn6 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn7 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn8 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn9 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn10 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn11 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn12 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn13 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn14 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn15 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn16 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn17 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
void __attribute__((noclone,noinline))
fn18 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
{
int i;
for (i = 0; i < size; i++)
p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
}
int eq (int i1, int i2) { return i1 == i2; }
int ne (int i1, int i2) { return i1 != i2; }
int lt (int i1, int i2) { return i1 < i2; }
int le (int i1, int i2) { return i1 <= i2; }
int gt (int i1, int i2) { return i1 > i2; }
int ge (int i1, int i2) { return i1 >= i2; }
typedef int (*cmp_fn)(int, int);
void
check (int *p, cmp_fn fn)
{
int i;
for (i = 0; i < 32; i++)
{
int t1 = ((i % 4) > 1) == 0;
int t2 = (i % 2) == 0;
int res = fn (t1, t2) + t1;
if (p[i] != res)
__builtin_abort ();
}
}
int
main (int argc, char **argv)
{
int i1[32], i2[32], res[32];
short s2[32];
long long l2[32];
int i;
for (i = 0; i < 32; i++)
{
l2[i] = i2[i] = s2[i] = i % 2;
i1[i] = (i % 4) > 1;
asm ("":::"memory");
}
fn1 (res, i1, i2, 32);
check (res, gt);
fn2 (res, i1, s2, 32);
check (res, gt);
fn3 (res, i1, l2, 32);
check (res, gt);
fn4 (res, i1, i2, 32);
check (res, ge);
fn5 (res, i1, s2, 32);
check (res, ge);
fn6 (res, i1, l2, 32);
check (res, ge);
fn7 (res, i1, i2, 32);
check (res, lt);
fn8 (res, i1, s2, 32);
check (res, lt);
fn9 (res, i1, l2, 32);
check (res, lt);
fn10 (res, i1, i2, 32);
check (res, le);
fn11 (res, i1, s2, 32);
check (res, le);
fn12 (res, i1, l2, 32);
check (res, le);
fn13 (res, i1, i2, 32);
check (res, eq);
fn14 (res, i1, s2, 32);
check (res, eq);
fn15 (res, i1, l2, 32);
check (res, eq);
fn16 (res, i1, i2, 32);
check (res, ne);
fn17 (res, i1, s2, 32);
check (res, ne);
fn18 (res, i1, l2, 32);
check (res, ne);
}
/* { dg-final { scan-tree-dump-times "VECTORIZED" 18 "vect" { target { i?86-*-* x86_64-*-* } } } } */

View file

@ -3763,7 +3763,8 @@ vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
if (rhs_code != BIT_IOR_EXPR
&& rhs_code != BIT_XOR_EXPR
&& rhs_code != BIT_AND_EXPR)
&& rhs_code != BIT_AND_EXPR
&& TREE_CODE_CLASS (rhs_code) != tcc_comparison)
return NULL;
rhs2 = gimple_assign_rhs2 (last_stmt);

View file

@ -7756,7 +7756,7 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
unsigned nunits;
int ncopies;
enum tree_code code;
enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
stmt_vec_info prev_stmt_info = NULL;
int i, j;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
@ -7829,11 +7829,74 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
return false;
/* Can't compare mask and non-mask types. */
if (vectype1 && vectype2
&& (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
return false;
/* Boolean values may have another representation in vectors
and therefore we prefer bit operations over comparison for
them (which also works for scalar masks). We store opcodes
to use in bitop1 and bitop2. Statement is vectorized as
BITOP2 (rhs1 BITOP1 rhs2) or
rhs1 BITOP2 (BITOP1 rhs2)
depending on bitop1 and bitop2 arity. */
if (VECTOR_BOOLEAN_TYPE_P (vectype))
{
if (code == GT_EXPR)
{
bitop1 = BIT_NOT_EXPR;
bitop2 = BIT_AND_EXPR;
}
else if (code == GE_EXPR)
{
bitop1 = BIT_NOT_EXPR;
bitop2 = BIT_IOR_EXPR;
}
else if (code == LT_EXPR)
{
bitop1 = BIT_NOT_EXPR;
bitop2 = BIT_AND_EXPR;
std::swap (rhs1, rhs2);
}
else if (code == LE_EXPR)
{
bitop1 = BIT_NOT_EXPR;
bitop2 = BIT_IOR_EXPR;
std::swap (rhs1, rhs2);
}
else
{
bitop1 = BIT_XOR_EXPR;
if (code == EQ_EXPR)
bitop2 = BIT_NOT_EXPR;
}
}
if (!vec_stmt)
{
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
return expand_vec_cmp_expr_p (vectype, mask_type);
vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
dts, NULL, NULL);
if (bitop1 == NOP_EXPR)
return expand_vec_cmp_expr_p (vectype, mask_type);
else
{
machine_mode mode = TYPE_MODE (vectype);
optab optab;
optab = optab_for_tree_code (bitop1, vectype, optab_default);
if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
return false;
if (bitop2 != NOP_EXPR)
{
optab = optab_for_tree_code (bitop2, vectype, optab_default);
if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
return false;
}
return true;
}
}
/* Transform. */
@ -7890,8 +7953,31 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
vec_rhs2 = vec_oprnds1[i];
new_temp = make_ssa_name (mask);
new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (bitop1 == NOP_EXPR)
{
new_stmt = gimple_build_assign (new_temp, code,
vec_rhs1, vec_rhs2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
else
{
if (bitop1 == BIT_NOT_EXPR)
new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
else
new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
vec_rhs2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (bitop2 != NOP_EXPR)
{
tree res = make_ssa_name (mask);
if (bitop2 == BIT_NOT_EXPR)
new_stmt = gimple_build_assign (res, bitop2, new_temp);
else
new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
}
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}