[2/2] PR88598: Optimise reduc (bit_and)
This patch folds certain reductions of X & CST to X[I] & CST[I] if I is the only nonzero element of CST. This includes the motivating case in which CST[I] is -1. We could do the same for REDUC_MAX on unsigned types, but I wasn't sure that that special case was worth it. 2019-01-07 Richard Sandiford <richard.sandiford@arm.com> gcc/ PR tree-optimization/88598 * tree.h (single_nonzero_element): Declare. * tree.c (single_nonzero_element): New function. * match.pd: Fold certain reductions of X & CST to X[I] & CST[I] if I is the only nonzero element of CST. gcc/testsuite/ PR tree-optimization/88598 * gcc.dg/vect/pr88598-1.c: New test. * gcc.dg/vect/pr88598-2.c: Likewise. * gcc.dg/vect/pr88598-3.c: Likewise. * gcc.dg/vect/pr88598-4.c: Likewise. * gcc.dg/vect/pr88598-5.c: Likewise. * gcc.dg/vect/pr88598-6.c: Likewise. From-SVN: r267646
This commit is contained in:
parent
46c66a46aa
commit
f4bf2aabe3
11 changed files with 386 additions and 0 deletions
|
@ -1,3 +1,11 @@
|
|||
2019-01-07 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR tree-optimization/88598
|
||||
* tree.h (single_nonzero_element): Declare.
|
||||
* tree.c (single_nonzero_element): New function.
|
||||
* match.pd: Fold certain reductions of X & CST to X[I] & CST[I]
|
||||
if I is the only nonzero element of CST.
|
||||
|
||||
2019-01-07 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR tree-optimization/88598
|
||||
|
|
16
gcc/match.pd
16
gcc/match.pd
|
@ -5251,3 +5251,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
{ wide_int_to_tree (sizetype, off); })
|
||||
{ swap_p ? @0 : @2; }))
|
||||
{ rhs_tree; })))))))))
|
||||
|
||||
/* Fold REDUC (@0 & @1) -> @0[I] & @1[I] if element I is the only nonzero
|
||||
element of @1. */
|
||||
(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR)
|
||||
(simplify (reduc (view_convert? (bit_and @0 VECTOR_CST@1)))
|
||||
(with { int i = single_nonzero_element (@1); }
|
||||
(if (i >= 0)
|
||||
(with { tree elt = vector_cst_elt (@1, i);
|
||||
tree elt_type = TREE_TYPE (elt);
|
||||
unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type));
|
||||
tree size = bitsize_int (elt_bits);
|
||||
tree pos = bitsize_int (elt_bits * i); }
|
||||
(view_convert
|
||||
(bit_and:elt_type
|
||||
(BIT_FIELD_REF:elt_type @0 { size; } { pos; })
|
||||
{ elt; })))))))
|
||||
|
|
|
@ -1,3 +1,13 @@
|
|||
2019-01-07 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR tree-optimization/88598
|
||||
* gcc.dg/vect/pr88598-1.c: New test.
|
||||
* gcc.dg/vect/pr88598-2.c: Likewise.
|
||||
* gcc.dg/vect/pr88598-3.c: Likewise.
|
||||
* gcc.dg/vect/pr88598-4.c: Likewise.
|
||||
* gcc.dg/vect/pr88598-5.c: Likewise.
|
||||
* gcc.dg/vect/pr88598-6.c: Likewise.
|
||||
|
||||
2019-01-07 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR tree-optimization/88598
|
||||
|
|
55
gcc/testsuite/gcc.dg/vect/pr88598-1.c
Normal file
55
gcc/testsuite/gcc.dg/vect/pr88598-1.c
Normal file
|
@ -0,0 +1,55 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 4
|
||||
|
||||
int a[N];
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f1 (void)
|
||||
{
|
||||
int b[N] = { 15, 0, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] & b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f2 (void)
|
||||
{
|
||||
int b[N] = { 0, 31, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] & b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f3 (void)
|
||||
{
|
||||
int b[N] = { 0, 0, 0, -1 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] & b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
a[i] = 0xe0 + i;
|
||||
|
||||
if (f1 () != (a[0] & 15)
|
||||
|| f2 () != (a[1] & 31)
|
||||
|| f3 () != a[N - 1])
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ??? We need more constant folding for this to work with fully-masked
|
||||
loops. */
|
||||
/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
|
55
gcc/testsuite/gcc.dg/vect/pr88598-2.c
Normal file
55
gcc/testsuite/gcc.dg/vect/pr88598-2.c
Normal file
|
@ -0,0 +1,55 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 4
|
||||
|
||||
int a[N];
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f1 (void)
|
||||
{
|
||||
int b[N] = { 1, 0, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f2 (void)
|
||||
{
|
||||
int b[N] = { 0, 1, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f3 (void)
|
||||
{
|
||||
int b[N] = { 0, 0, 0, 1 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
a[i] = 0xe0 + i;
|
||||
|
||||
if (f1 () != a[0]
|
||||
|| f2 () != a[1]
|
||||
|| f3 () != a[N - 1])
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ??? We need more constant folding for this to work with fully-masked
|
||||
loops. */
|
||||
/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
|
55
gcc/testsuite/gcc.dg/vect/pr88598-3.c
Normal file
55
gcc/testsuite/gcc.dg/vect/pr88598-3.c
Normal file
|
@ -0,0 +1,55 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-ffast-math -fdump-tree-optimized" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 4
|
||||
|
||||
float a[N];
|
||||
|
||||
float __attribute__ ((noipa))
|
||||
f1 (void)
|
||||
{
|
||||
float b[N] = { 1, 0, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
float __attribute__ ((noipa))
|
||||
f2 (void)
|
||||
{
|
||||
float b[N] = { 0, 1, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
float __attribute__ ((noipa))
|
||||
f3 (void)
|
||||
{
|
||||
float b[N] = { 0, 0, 0, 1 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
a[i] = 0xe0 + i;
|
||||
|
||||
if (f1 () != a[0]
|
||||
|| f2 () != a[1]
|
||||
|| f3 () != a[N - 1])
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ??? We need more constant folding for this to work with fully-masked
|
||||
loops. */
|
||||
/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */
|
51
gcc/testsuite/gcc.dg/vect/pr88598-4.c
Normal file
51
gcc/testsuite/gcc.dg/vect/pr88598-4.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 4
|
||||
|
||||
int a[N];
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f1 (void)
|
||||
{
|
||||
int b[N] = { 0, 15, 15, 15 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] & b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f2 (void)
|
||||
{
|
||||
int b[N] = { 0, 31, 0, 31 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] & b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f3 (void)
|
||||
{
|
||||
int b[N] = { -1, -1, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] & b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
a[i] = 0xe0 + i;
|
||||
|
||||
if (f1 () != (a[1] & 15) + (a[2] & 15) + (a[3] & 15)
|
||||
|| f2 () != (a[1] & 31) + (a[3] & 31)
|
||||
|| f3 () != a[0] + a[1])
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
51
gcc/testsuite/gcc.dg/vect/pr88598-5.c
Normal file
51
gcc/testsuite/gcc.dg/vect/pr88598-5.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 4
|
||||
|
||||
int a[N];
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f1 (void)
|
||||
{
|
||||
int b[N] = { 0, 1, 1, 1 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f2 (void)
|
||||
{
|
||||
int b[N] = { 0, 1, 0, 1 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int __attribute__ ((noipa))
|
||||
f3 (void)
|
||||
{
|
||||
int b[N] = { 1, 1, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
a[i] = 0xe0 + i;
|
||||
|
||||
if (f1 () != a[1] + a[2] + a[3]
|
||||
|| f2 () != a[1] + a[3]
|
||||
|| f3 () != a[0] + a[1])
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
51
gcc/testsuite/gcc.dg/vect/pr88598-6.c
Normal file
51
gcc/testsuite/gcc.dg/vect/pr88598-6.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-ffast-math -fdump-tree-optimized" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 4
|
||||
|
||||
float a[N];
|
||||
|
||||
float __attribute__ ((noipa))
|
||||
f1 (void)
|
||||
{
|
||||
float b[N] = { 0, 1, 1, 1 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
float __attribute__ ((noipa))
|
||||
f2 (void)
|
||||
{
|
||||
float b[N] = { 0, 1, 0, 1 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
float __attribute__ ((noipa))
|
||||
f3 (void)
|
||||
{
|
||||
float b[N] = { 1, 1, 0, 0 }, res = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
res += a[i] * b[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
a[i] = 0xe0 + i;
|
||||
|
||||
if (f1 () != a[1] + a[2] + a[3]
|
||||
|| f2 () != a[1] + a[3]
|
||||
|| f3 () != a[0] + a[1])
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
32
gcc/tree.c
32
gcc/tree.c
|
@ -11340,6 +11340,38 @@ uniform_integer_cst_p (tree t)
|
|||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* If VECTOR_CST T has a single nonzero element, return the index of that
|
||||
element, otherwise return -1. */
|
||||
|
||||
int
|
||||
single_nonzero_element (const_tree t)
|
||||
{
|
||||
unsigned HOST_WIDE_INT nelts;
|
||||
unsigned int repeat_nelts;
|
||||
if (VECTOR_CST_NELTS (t).is_constant (&nelts))
|
||||
repeat_nelts = nelts;
|
||||
else if (VECTOR_CST_NELTS_PER_PATTERN (t) == 2)
|
||||
{
|
||||
nelts = vector_cst_encoded_nelts (t);
|
||||
repeat_nelts = VECTOR_CST_NPATTERNS (t);
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
|
||||
int res = -1;
|
||||
for (unsigned int i = 0; i < nelts; ++i)
|
||||
{
|
||||
tree elt = vector_cst_elt (t, i);
|
||||
if (!integer_zerop (elt) && !real_zerop (elt))
|
||||
{
|
||||
if (res >= 0 || i >= repeat_nelts)
|
||||
return -1;
|
||||
res = i;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Build an empty statement at location LOC. */
|
||||
|
||||
tree
|
||||
|
|
|
@ -4522,6 +4522,8 @@ extern tree uniform_vector_p (const_tree);
|
|||
|
||||
extern tree uniform_integer_cst_p (tree);
|
||||
|
||||
extern int single_nonzero_element (const_tree);
|
||||
|
||||
/* Given a CONSTRUCTOR CTOR, return the element values as a vector. */
|
||||
|
||||
extern vec<tree, va_gc> *ctor_to_vec (tree);
|
||||
|
|
Loading…
Add table
Reference in a new issue