
This issue was observed in rs6000 specific PR102658 as well. I've looked into it a bit, it's caused by the "conditional store replacement" which is originally disabled without vectorization as below code. /* If either vectorization or if-conversion is disabled then do not sink any stores. */ if (param_max_stores_to_sink == 0 || (!flag_tree_loop_vectorize && !flag_tree_slp_vectorize) || !flag_tree_loop_if_convert) return false; The new change makes the innermost loop look like for (int c1 = 0; c1 <= 1499; c1 += 1) { if (c1 <= 500) { S_10(c0, c1); } else { S_9(c0, c1); } S_11(c0, c1); } and can not be splitted as: for (int c1 = 0; c1 <= 500; c1 += 1) S_10(c0, c1); for (int c1 = 501; c1 <= 1499; c1 += 1) S_9(c0, c1); So instead of disabling vectorization, could we just disable this cs replacement with parameter "--param max-stores-to-sink=0"? I tested this proposal on ppc64le, it should work as well. 2021-10-11 Kewen Lin <linkw@linux.ibm.com> libgomp/ChangeLog: * testsuite/libgomp.graphite/force-parallel-8.c: Add --param max-stores-to-sink=0.
50 lines
917 B
C
50 lines
917 B
C
/* { dg-additional-options "-fdisable-tree-thread1 -fdisable-tree-vrp-thread1 --param max-stores-to-sink=0" } */
|
|
|
|
#define N 1500
|
|
|
|
int x[N][N], y[N];
|
|
|
|
void abort (void);
|
|
|
|
int foo(void)
|
|
{
|
|
int i, j;
|
|
|
|
for (i = 0; i < N; i++)
|
|
y[i] = i;
|
|
|
|
for (i = 0; i < N; i++)
|
|
for (j = 0; j < N; j++)
|
|
x[i][j] = i + j;
|
|
|
|
for (i = 0; i < N; i++)
|
|
{
|
|
y[i] = i;
|
|
|
|
for (j = 0; j < N; j++)
|
|
{
|
|
if (j > 500)
|
|
{
|
|
x[i][j] = i + j + 3;
|
|
y[j] = i*j + 10;
|
|
}
|
|
else
|
|
x[i][j] = x[i][j]*3;
|
|
}
|
|
}
|
|
|
|
return x[2][5]*y[8];
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
if (168 != foo())
|
|
abort ();
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Check that parallel code generation part make the right answer. */
|
|
/* { dg-final { scan-tree-dump-times "5 loops carried no dependency" 1 "graphite" } } */
|
|
/* { dg-final { scan-tree-dump-times "loopfn.0" 4 "optimized" } } */
|
|
/* { dg-final { scan-tree-dump-times "loopfn.1" 4 "optimized" } } */
|