re PR middle-end/31699 (-march=opteron -ftree-vectorize generates wrong code)
PR tree-optimization/31699 * tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong code. (vect_enhance_data_refs_alignment): Compute peel amount using TYPE_VECTOR_SUBPARTS instead of vf. * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise. From-SVN: r124375
This commit is contained in:
parent
7b50cdeffb
commit
cb9ed5d79f
12 changed files with 196 additions and 45 deletions
|
@ -1,3 +1,12 @@
|
|||
2007-05-03 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
PR tree-optimization/31699
|
||||
* tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong
|
||||
code.
|
||||
(vect_enhance_data_refs_alignment): Compute peel amount using
|
||||
TYPE_VECTOR_SUBPARTS instead of vf.
|
||||
* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise.
|
||||
|
||||
2007-05-02 Brooks Moses <brooks.moses@codesourcery.com>
|
||||
|
||||
PR bootstrap/31776
|
||||
|
|
|
@ -1,3 +1,18 @@
|
|||
2007-05-03 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
PR tree-optimization/31699
|
||||
* lib/target-supports.exp (check_effective_target_vect_intfloat_cvt):
|
||||
New.
|
||||
(check_effective_target_vect_floatint_cvt): New.
|
||||
* gcc.dg/vect/vect-floatint-conversion-1.c: Use new keyword instead
|
||||
of specific targets.
|
||||
* gcc.dg/vect/vect-intfloat-conversion-1.c: Likewise.
|
||||
* gcc.dg/vect/vect-multitypes-1.c: One less loop gets vectorized.
|
||||
* gcc.dg/vect/vect-multitypes-4.c: Likewise.
|
||||
* gcc.dg/vect/vect-iv-4.c: Likewise.
|
||||
* gcc.dg/vect/vect-multitypes-11.c: New.
|
||||
* gcc.dg/vect/pr31699.c: New.
|
||||
|
||||
2007-05-02 Geoffrey Keating <geoffk@apple.com>
|
||||
|
||||
* gcc.c-torture/compile-limits-stringlit.c: Reduce size of string.
|
||||
|
@ -2399,7 +2414,7 @@
|
|||
Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-intfloat-conversion-1.c: New test.
|
||||
* gcc.dg/vect/vect-intfloat-conversion-1.c: New test.
|
||||
* gcc.dg/vect/vect-floatint-conversion-1.c: New test.
|
||||
* gcc.dg/vect/vect-93.c: Another loop gets vectorized on powerpc.
|
||||
* gcc.dg/vect/vect-113.c: Likewise.
|
||||
|
||||
|
|
35
gcc/testsuite/gcc.dg/vect/pr31699.c
Normal file
35
gcc/testsuite/gcc.dg/vect/pr31699.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* { dg-require-effective-target vect_double } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
float x[256];
|
||||
|
||||
void foo(void)
|
||||
{
|
||||
double *z = malloc (sizeof(double) * 256);
|
||||
|
||||
int i;
|
||||
for (i=0; i<256; ++i)
|
||||
z[i] = x[i] + 1.0f;
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
x[i] = (float) i;
|
||||
|
||||
foo();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -36,5 +36,5 @@ main (void)
|
|||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_floatint_cvt } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -34,5 +34,5 @@ int main (void)
|
|||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target powerpc*-*-* i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_intfloat_cvt } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -40,5 +40,5 @@ int main (void)
|
|||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -14,10 +14,9 @@ int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
|||
|
||||
/* Current peeling-for-alignment scheme will consider the 'sa[i+7]'
|
||||
access for peeling, and therefore will examine the option of
|
||||
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
|
||||
using a peeling factor = V-7%V = 1,3 for V=8,4 respectively,
|
||||
which will also align the access to 'ia[i+3]', and the loop could be
|
||||
vectorized on all targets that support unaligned loads.
|
||||
*/
|
||||
vectorized on all targets that support unaligned loads. */
|
||||
|
||||
int main1 (int n)
|
||||
{
|
||||
|
@ -43,17 +42,16 @@ int main1 (int n)
|
|||
|
||||
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
|
||||
access for peeling, and therefore will examine the option of
|
||||
using a peeling factor = VF-3%VF. This will result in a peeling factor
|
||||
5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access
|
||||
to 'sa[i+3]', and the loop could be vectorized on targets that support
|
||||
unaligned loads. */
|
||||
using a peeling factor = (V-3)%V = 1 for V=2,4.
|
||||
This will not align the access 'sa[i+3]' (for which we need to
|
||||
peel 5 iterations), so the loop can not be vectorized. */
|
||||
|
||||
int main2 (int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Multiple types with different sizes, used in independent
|
||||
copmutations. Vectorizable. */
|
||||
copmutations. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
ia[i+3] = ib[i];
|
||||
|
@ -80,8 +78,11 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
45
gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c
Normal file
45
gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c
Normal file
|
@ -0,0 +1,45 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
short x[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
int
|
||||
foo (int len, int *z) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
z[i] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
short i;
|
||||
int z[N+4];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
|
||||
foo (N,z+2);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (z[i+2] != x[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_unpack } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_no_align && vect_unpack } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -20,8 +20,7 @@ unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
|||
access for peeling, and therefore will examine the option of
|
||||
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
|
||||
which will also align the access to 'ia[i+3]', and the loop could be
|
||||
vectorized on all targets that support unaligned loads.
|
||||
*/
|
||||
vectorized on all targets that support unaligned loads. */
|
||||
|
||||
int main1 (int n)
|
||||
{
|
||||
|
@ -48,9 +47,9 @@ int main1 (int n)
|
|||
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
|
||||
access for peeling, and therefore will examine the option of
|
||||
using a peeling factor = VF-3%VF. This will result in a peeling factor
|
||||
5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access
|
||||
to 'sa[i+3]', and the loop could be vectorized on targets that support
|
||||
unaligned loads. */
|
||||
1 if VF=4,2. This will not align the access to 'sa[i+3]', for which we
|
||||
need to peel 5,1 iterations for VF=4,2 respectively, so the loop can not
|
||||
be vectorized. */
|
||||
|
||||
int main2 (int n)
|
||||
{
|
||||
|
@ -84,8 +83,11 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
@ -1368,6 +1368,49 @@ proc check_effective_target_vect_int { } {
|
|||
return $et_vect_int_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target supports int->float conversion
|
||||
#
|
||||
|
||||
proc check_effective_target_vect_intfloat_cvt { } {
|
||||
global et_vect_intfloat_cvt_saved
|
||||
|
||||
if [info exists et_vect_intfloat_cvt_saved] {
|
||||
verbose "check_effective_target_vect_intfloat_cvt: using cached result" 2
|
||||
} else {
|
||||
set et_vect_intfloat_cvt_saved 0
|
||||
if { [istarget i?86-*-*]
|
||||
|| [istarget powerpc*-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_intfloat_cvt_saved 1
|
||||
}
|
||||
}
|
||||
|
||||
verbose "check_effective_target_vect_intfloat_cvt: returning $et_vect_intfloat_cvt_saved" 2
|
||||
return $et_vect_intfloat_cvt_saved
|
||||
}
|
||||
|
||||
|
||||
# Return 1 if the target supports float->int conversion
|
||||
#
|
||||
|
||||
proc check_effective_target_vect_floatint_cvt { } {
|
||||
global et_vect_floatint_cvt_saved
|
||||
|
||||
if [info exists et_vect_floatint_cvt_saved] {
|
||||
verbose "check_effective_target_vect_floatint_cvt: using cached result" 2
|
||||
} else {
|
||||
set et_vect_floatint_cvt_saved 0
|
||||
if { [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_floatint_cvt_saved 1
|
||||
}
|
||||
}
|
||||
|
||||
verbose "check_effective_target_vect_floatint_cvt: returning $et_vect_floatint_cvt_saved" 2
|
||||
return $et_vect_floatint_cvt_saved
|
||||
}
|
||||
|
||||
|
||||
# Return 1 is this is an arm target using 32-bit instructions
|
||||
proc check_effective_target_arm32 { } {
|
||||
global et_arm32_saved
|
||||
|
|
|
@ -1258,15 +1258,6 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
|
|||
if (DR_GROUP_FIRST_DR (peel_stmt_info))
|
||||
dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
|
||||
|
||||
if (known_alignment_for_access_p (dr)
|
||||
&& known_alignment_for_access_p (dr_peel)
|
||||
&& (DR_MISALIGNMENT (dr) / dr_size ==
|
||||
DR_MISALIGNMENT (dr_peel) / dr_peel_size))
|
||||
{
|
||||
DR_MISALIGNMENT (dr) = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* It can be assumed that the data refs with the same alignment as dr_peel
|
||||
are aligned in the vector loop. */
|
||||
same_align_drs
|
||||
|
@ -1507,7 +1498,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
the prolog loop ({VF - misalignment}), is a multiple of the
|
||||
number of the interleaved accesses. */
|
||||
int elem_size, mis_in_elements;
|
||||
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
/* FORNOW: handle only known alignment. */
|
||||
if (!known_alignment_for_access_p (dr))
|
||||
|
@ -1516,10 +1508,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
break;
|
||||
}
|
||||
|
||||
elem_size = UNITS_PER_SIMD_WORD / vf;
|
||||
elem_size = UNITS_PER_SIMD_WORD / nelements;
|
||||
mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
|
||||
|
||||
if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
|
||||
if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
|
||||
{
|
||||
do_peeling = false;
|
||||
break;
|
||||
|
@ -1541,6 +1533,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
{
|
||||
int mis;
|
||||
int npeel = 0;
|
||||
tree stmt = DR_STMT (dr0);
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
if (known_alignment_for_access_p (dr0))
|
||||
{
|
||||
|
@ -1550,7 +1546,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
factor minus the misalignment as an element count. */
|
||||
mis = DR_MISALIGNMENT (dr0);
|
||||
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
|
||||
npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
|
||||
npeel = nelements - mis;
|
||||
|
||||
/* For interleaved data access every iteration accesses all the
|
||||
members of the group, therefore we divide the number of iterations
|
||||
|
|
|
@ -4786,13 +4786,17 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
|
|||
prolog_niters = min ( LOOP_NITERS ,
|
||||
(VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
|
||||
where group_size is the size of the interleaved group.
|
||||
*/
|
||||
|
||||
The above formulas assume that VF == number of elements in the vector. This
|
||||
may not hold when there are multiple-types in the loop.
|
||||
In this case, for some data-references in the loop the VF does not represent
|
||||
the number of elements that fit in the vector. Therefore, instead of VF we
|
||||
use TYPE_VECTOR_SUBPARTS. */
|
||||
|
||||
static tree
|
||||
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
|
||||
{
|
||||
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
|
||||
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
tree var, stmt;
|
||||
tree iters, iters_name;
|
||||
|
@ -4805,6 +4809,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
|
|||
tree niters_type = TREE_TYPE (loop_niters);
|
||||
int group_size = 1;
|
||||
int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
|
||||
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
if (DR_GROUP_FIRST_DR (stmt_info))
|
||||
{
|
||||
|
@ -4825,7 +4830,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
|
|||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "known alignment = %d.", byte_misalign);
|
||||
iters = build_int_cst (niters_type,
|
||||
(vf - elem_misalign)&(vf/group_size-1));
|
||||
(nelements - elem_misalign)&(nelements/group_size-1));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -4837,9 +4842,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
|
|||
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
|
||||
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
|
||||
tree elem_size_log =
|
||||
build_int_cst (type, exact_log2 (vectype_align/vf));
|
||||
tree vf_minus_1 = build_int_cst (type, vf - 1);
|
||||
tree vf_tree = build_int_cst (type, vf);
|
||||
build_int_cst (type, exact_log2 (vectype_align/nelements));
|
||||
tree nelements_minus_1 = build_int_cst (type, nelements - 1);
|
||||
tree nelements_tree = build_int_cst (type, nelements);
|
||||
tree byte_misalign;
|
||||
tree elem_misalign;
|
||||
|
||||
|
@ -4854,9 +4859,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
|
|||
elem_misalign =
|
||||
fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
|
||||
|
||||
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
|
||||
iters = fold_build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
|
||||
iters = fold_build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
|
||||
/* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
|
||||
iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
|
||||
iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
|
||||
iters = fold_convert (niters_type, iters);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue