re PR middle-end/31699 (-march=opteron -ftree-vectorize generates wrong code)

PR tree-optimization/31699
        * tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong
        code.
        (vect_enhance_data_refs_alignment): Compute peel amount using
        TYPE_VECTOR_SUBPARTS instead of vf.
        * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise.

From-SVN: r124375
This commit is contained in:
Dorit Nuzman 2007-05-03 12:54:45 +00:00
parent 7b50cdeffb
commit cb9ed5d79f
12 changed files with 196 additions and 45 deletions

View file

@ -1,3 +1,12 @@
2007-05-03 Dorit Nuzman <dorit@il.ibm.com>
PR tree-optimization/31699
* tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong
code.
(vect_enhance_data_refs_alignment): Compute peel amount using
TYPE_VECTOR_SUBPARTS instead of vf.
* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise.
2007-05-02 Brooks Moses <brooks.moses@codesourcery.com>
PR bootstrap/31776

View file

@ -1,3 +1,18 @@
2007-05-03 Dorit Nuzman <dorit@il.ibm.com>
PR tree-optimization/31699
* lib/target-supports.exp (check_effective_target_vect_intfloat_cvt):
New.
(check_effective_target_vect_floatint_cvt): New.
* gcc.dg/vect/vect-floatint-conversion-1.c: Use new keyword instead
of specific targets.
* gcc.dg/vect/vect-intfloat-conversion-1.c: Likewise.
* gcc.dg/vect/vect-multitypes-1.c: One less loop gets vectorized.
* gcc.dg/vect/vect-multitypes-4.c: Likewise.
* gcc.dg/vect/vect-iv-4.c: Likewise.
* gcc.dg/vect/vect-multitypes-11.c: New.
* gcc.dg/vect/pr31699.c: New.
2007-05-02 Geoffrey Keating <geoffk@apple.com>
* gcc.c-torture/compile-limits-stringlit.c: Reduce size of string.
@ -2399,7 +2414,7 @@
Dorit Nuzman <dorit@il.ibm.com>
* gcc.dg/vect/vect-intfloat-conversion-1.c: New test.
* gcc.dg/vect/vect-intfloat-conversion-1.c: New test.
* gcc.dg/vect/vect-floatint-conversion-1.c: New test.
* gcc.dg/vect/vect-93.c: Another loop gets vectorized on powerpc.
* gcc.dg/vect/vect-113.c: Likewise.

View file

@ -0,0 +1,35 @@
/* { dg-require-effective-target vect_double } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
float x[256];
void foo(void)
{
double *z = malloc (sizeof(double) * 256);
int i;
for (i=0; i<256; ++i)
z[i] = x[i] + 1.0f;
}
int main()
{
int i;
check_vect ();
for (i = 0; i < 256; i++)
x[i] = (float) i;
foo();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -36,5 +36,5 @@ main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_floatint_cvt } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -34,5 +34,5 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target powerpc*-*-* i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_intfloat_cvt } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -40,5 +40,5 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -14,10 +14,9 @@ int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
/* Current peeling-for-alignment scheme will consider the 'sa[i+7]'
access for peeling, and therefore will examine the option of
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
using a peeling factor = V-7%V = 1,3 for V=8,4 respectively,
which will also align the access to 'ia[i+3]', and the loop could be
vectorized on all targets that support unaligned loads.
*/
vectorized on all targets that support unaligned loads. */
int main1 (int n)
{
@ -43,17 +42,16 @@ int main1 (int n)
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
access for peeling, and therefore will examine the option of
using a peeling factor = VF-3%VF. This will result in a peeling factor
5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access
to 'sa[i+3]', and the loop could be vectorized on targets that support
unaligned loads. */
using a peeling factor = (V-3)%V = 1 for V=2,4.
This will not align the access 'sa[i+3]' (for which we need to
peel 5 iterations), so the loop can not be vectorized. */
int main2 (int n)
{
int i;
/* Multiple types with different sizes, used in independent
copmutations. Vectorizable. */
copmutations. */
for (i = 0; i < n; i++)
{
ia[i+3] = ib[i];
@ -80,8 +78,11 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,45 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
short x[N] __attribute__ ((__aligned__(16)));
int
foo (int len, int *z) {
int i;
for (i=0; i<len; i++) {
z[i] = x[i];
}
}
int main (void)
{
short i;
int z[N+4];
check_vect ();
for (i=0; i<N; i++) {
x[i] = i;
}
foo (N,z+2);
for (i=0; i<N; i++) {
if (z[i+2] != x[i])
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_unpack } } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_no_align && vect_unpack } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -20,8 +20,7 @@ unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
access for peeling, and therefore will examine the option of
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
which will also align the access to 'ia[i+3]', and the loop could be
vectorized on all targets that support unaligned loads.
*/
vectorized on all targets that support unaligned loads. */
int main1 (int n)
{
@ -48,9 +47,9 @@ int main1 (int n)
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
access for peeling, and therefore will examine the option of
using a peeling factor = VF-3%VF. This will result in a peeling factor
5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access
to 'sa[i+3]', and the loop could be vectorized on targets that support
unaligned loads. */
1 if VF=4,2. This will not align the access to 'sa[i+3]', for which we
need to peel 5,1 iterations for VF=4,2 respectively, so the loop can not
be vectorized. */
int main2 (int n)
{
@ -84,8 +83,11 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -1368,6 +1368,49 @@ proc check_effective_target_vect_int { } {
return $et_vect_int_saved
}
# Return 1 if the target supports int->float conversion
#
proc check_effective_target_vect_intfloat_cvt { } {
global et_vect_intfloat_cvt_saved
if [info exists et_vect_intfloat_cvt_saved] {
verbose "check_effective_target_vect_intfloat_cvt: using cached result" 2
} else {
set et_vect_intfloat_cvt_saved 0
if { [istarget i?86-*-*]
|| [istarget powerpc*-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_intfloat_cvt_saved 1
}
}
verbose "check_effective_target_vect_intfloat_cvt: returning $et_vect_intfloat_cvt_saved" 2
return $et_vect_intfloat_cvt_saved
}
# Return 1 if the target supports float->int conversion
#
proc check_effective_target_vect_floatint_cvt { } {
global et_vect_floatint_cvt_saved
if [info exists et_vect_floatint_cvt_saved] {
verbose "check_effective_target_vect_floatint_cvt: using cached result" 2
} else {
set et_vect_floatint_cvt_saved 0
if { [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_floatint_cvt_saved 1
}
}
verbose "check_effective_target_vect_floatint_cvt: returning $et_vect_floatint_cvt_saved" 2
return $et_vect_floatint_cvt_saved
}
# Return 1 is this is an arm target using 32-bit instructions
proc check_effective_target_arm32 { } {
global et_arm32_saved

View file

@ -1258,15 +1258,6 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
if (DR_GROUP_FIRST_DR (peel_stmt_info))
dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
if (known_alignment_for_access_p (dr)
&& known_alignment_for_access_p (dr_peel)
&& (DR_MISALIGNMENT (dr) / dr_size ==
DR_MISALIGNMENT (dr_peel) / dr_peel_size))
{
DR_MISALIGNMENT (dr) = 0;
return;
}
/* It can be assumed that the data refs with the same alignment as dr_peel
are aligned in the vector loop. */
same_align_drs
@ -1507,7 +1498,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
the prolog loop ({VF - misalignment}), is a multiple of the
number of the interleaved accesses. */
int elem_size, mis_in_elements;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
/* FORNOW: handle only known alignment. */
if (!known_alignment_for_access_p (dr))
@ -1516,10 +1508,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
break;
}
elem_size = UNITS_PER_SIMD_WORD / vf;
elem_size = UNITS_PER_SIMD_WORD / nelements;
mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
{
do_peeling = false;
break;
@ -1541,6 +1533,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
{
int mis;
int npeel = 0;
tree stmt = DR_STMT (dr0);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
if (known_alignment_for_access_p (dr0))
{
@ -1550,7 +1546,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
factor minus the misalignment as an element count. */
mis = DR_MISALIGNMENT (dr0);
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
npeel = nelements - mis;
/* For interleaved data access every iteration accesses all the
members of the group, therefore we divide the number of iterations

View file

@ -4786,13 +4786,17 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
prolog_niters = min ( LOOP_NITERS ,
(VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
where group_size is the size of the interleaved group.
*/
The above formulas assume that VF == number of elements in the vector. This
may not hold when there are multiple-types in the loop.
In this case, for some data-references in the loop the VF does not represent
the number of elements that fit in the vector. Therefore, instead of VF we
use TYPE_VECTOR_SUBPARTS. */
static tree
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
{
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree var, stmt;
tree iters, iters_name;
@ -4805,6 +4809,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
tree niters_type = TREE_TYPE (loop_niters);
int group_size = 1;
int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
if (DR_GROUP_FIRST_DR (stmt_info))
{
@ -4825,7 +4830,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "known alignment = %d.", byte_misalign);
iters = build_int_cst (niters_type,
(vf - elem_misalign)&(vf/group_size-1));
(nelements - elem_misalign)&(nelements/group_size-1));
}
else
{
@ -4837,9 +4842,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree elem_size_log =
build_int_cst (type, exact_log2 (vectype_align/vf));
tree vf_minus_1 = build_int_cst (type, vf - 1);
tree vf_tree = build_int_cst (type, vf);
build_int_cst (type, exact_log2 (vectype_align/nelements));
tree nelements_minus_1 = build_int_cst (type, nelements - 1);
tree nelements_tree = build_int_cst (type, nelements);
tree byte_misalign;
tree elem_misalign;
@ -4854,9 +4859,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
elem_misalign =
fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
iters = fold_build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
iters = fold_build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
/* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
iters = fold_convert (niters_type, iters);
}