tree-optimization/111294 - backwards threader PHI costing
This revives an earlier patch since the problematic code applying extra costs to PHIs in copied blocks we couldn't make any sense of prevents a required threading in this case. Instead of coming up with an artificial other costing the following simply removes the bits. As with all threading changes this requires a plethora of testsuite adjustments, but only the last three are unfortunate as is the libgomp team.c adjustment which is required to avoid a bogus -Werror diagnostic during bootstrap. PR tree-optimization/111294 gcc/ * tree-ssa-threadbackward.cc (back_threader_profitability::m_name): Remove (back_threader::find_paths_to_names): Adjust. (back_threader::maybe_thread_block): Likewise. (back_threader_profitability::possibly_profitable_path_p): Remove code applying extra costs to copies PHIs. libgomp/ * team.c (gomp_team_start): Assert alloca size to avoid false positive alloc-size diagnostic. gcc/testsuite/ * gcc.dg/tree-ssa/pr111294.c: New test. * gcc.dg/tree-ssa/phi_on_compare-4.c: Adjust. * gcc.dg/tree-ssa/pr59597.c: Likewise. * gcc.dg/tree-ssa/pr61839_2.c: Likewise. * gcc.dg/tree-ssa/ssa-sink-18.c: Likewise. * g++.dg/warn/Wstringop-overflow-4.C: XFAIL subtest on ilp32. * gcc.dg/uninit-pred-9_b.c: XFAIL subtest everywhere. * gcc.dg/vect/vect-117.c: Make scan for not Invalid sum conditional on lp64.
This commit is contained in:
parent
1f9bf6f372
commit
d45ddc2c04
10 changed files with 56 additions and 68 deletions
|
@ -151,7 +151,9 @@ void test_strcpy_new_int16_t (size_t n, const size_t vals[])
|
|||
as size_t as a result of threading. See PR 101688 comment #2. */
|
||||
T (S (1), new int16_t[r_0_imax]);
|
||||
|
||||
T (S (2), new int16_t[r_0_imax + 1]);
|
||||
/* Similar to PR 101688 the following can result in a bougs warning because
|
||||
of threading. */
|
||||
T (S (2), new int16_t[r_0_imax + 1]); // { dg-bogus "into a region of size" "" { xfail { ilp32 } } }
|
||||
T (S (9), new int16_t[r_0_imax * 2 + 1]);
|
||||
|
||||
int r_1_imax = SR (1, INT_MAX);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -fdump-tree-dom2" } */
|
||||
/* { dg-options "-Ofast -fdump-tree-threadfull1-stats" } */
|
||||
|
||||
void g (int);
|
||||
void g1 (int);
|
||||
|
@ -37,4 +37,4 @@ f (long a, long b, long c, long d, int x)
|
|||
g (c + d);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "dom2" } } */
|
||||
/* { dg-final { scan-tree-dump "Jumps threaded: 2" "threadfull1" } } */
|
||||
|
|
32
gcc/testsuite/gcc.dg/tree-ssa/pr111294.c
Normal file
32
gcc/testsuite/gcc.dg/tree-ssa/pr111294.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Os -fdump-tree-optimized" } */
|
||||
|
||||
void foo(void);
|
||||
static short a;
|
||||
static int b, c, d;
|
||||
static int *e, *f = &d;
|
||||
static int **g = &e;
|
||||
static unsigned char h;
|
||||
static short(i)(short j, int k) { return j > k ?: j; }
|
||||
static char l() {
|
||||
if (a) return b;
|
||||
return c;
|
||||
}
|
||||
int main() {
|
||||
b = 0;
|
||||
for (; b < 5; ++b)
|
||||
;
|
||||
h = l();
|
||||
if (a ^ 3 >= i(h, 11))
|
||||
a = 0;
|
||||
else {
|
||||
*g = f;
|
||||
if (e == &d & b) {
|
||||
__builtin_unreachable();
|
||||
} else
|
||||
foo();
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -fdisable-tree-cunrolli -fdump-tree-threadfull1-details" } */
|
||||
/* { dg-options "-Ofast -fdump-tree-ethread-details" } */
|
||||
|
||||
typedef unsigned short u16;
|
||||
typedef unsigned char u8;
|
||||
|
@ -56,8 +56,4 @@ main (int argc, char argv[])
|
|||
return crc;
|
||||
}
|
||||
|
||||
/* We used to have no threads in vrp-thread1 because all the attempted
|
||||
ones would cross loops. Now we get 30+ threads before VRP because
|
||||
of loop unrolling. A better option is to disable unrolling and
|
||||
test for the original 4 threads that this test was testing. */
|
||||
/* { dg-final { scan-tree-dump-times "Registering jump thread" 4 "threadfull1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Registering jump thread" 2 "ethread" } } */
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
/* PR tree-optimization/61839. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-evrp" } */
|
||||
/* Disable jump threading, we want to avoid separating the division/modulo
|
||||
by zero paths - we'd isolate those only later. */
|
||||
/* { dg-options "-O2 -fno-thread-jumps -fdump-tree-evrp" } */
|
||||
/* { dg-require-effective-target int32plus } */
|
||||
|
||||
__attribute__ ((noinline))
|
||||
|
|
|
@ -198,7 +198,9 @@ compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len)
|
|||
exits after gimple loop optimizations, which generates instructions executed
|
||||
each iteration in loop, but the results are used outside of loop:
|
||||
With -m64,
|
||||
"Sinking _367 = (uint8_t *) _320;
|
||||
"Sinking op_230 = op_244 + 2;
|
||||
from bb 63 to bb 94
|
||||
Sinking _367 = (uint8_t *) _320;
|
||||
from bb 31 to bb 90
|
||||
Sinking _320 = _321 + ivtmp.25_326;
|
||||
from bb 31 to bb 90
|
||||
|
@ -213,4 +215,4 @@ compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len)
|
|||
base+index addressing modes, so the ip[len] address computation can't be
|
||||
made from the IV computation above. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Sunk statements: 4" 1 "sink2" { target lp64 xfail { riscv64-*-* } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink2" { target lp64 xfail { riscv64-*-* } } } } */
|
||||
|
|
|
@ -17,7 +17,7 @@ int foo (int n, int l, int m, int r)
|
|||
|
||||
if (l > 100)
|
||||
if ( (n <= 9) && (m < 100) && (r < 19) )
|
||||
blah(v); /* { dg-bogus "uninitialized" "bogus warning" { xfail powerpc*-*-* cris-*-* riscv*-*-* } } */
|
||||
blah(v); /* { dg-bogus "uninitialized" "bogus warning" { xfail *-*-* } } */
|
||||
|
||||
if ( (n <= 8) && (m < 99) && (r < 19) )
|
||||
blah(v); /* { dg-bogus "uninitialized" "pr101674" { xfail mmix-*-* } } */
|
||||
|
|
|
@ -61,4 +61,4 @@ int main (void)
|
|||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 0 "vect" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" { target { lp64 } } } } */
|
||||
|
|
|
@ -62,7 +62,7 @@ class back_threader_profitability
|
|||
{
|
||||
public:
|
||||
back_threader_profitability (bool speed_p, gimple *stmt);
|
||||
bool possibly_profitable_path_p (const vec<basic_block> &, tree, bool *);
|
||||
bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
|
||||
bool profitable_path_p (const vec<basic_block> &,
|
||||
edge taken, bool *irreducible_loop);
|
||||
private:
|
||||
|
@ -126,9 +126,6 @@ private:
|
|||
auto_bitmap m_imports;
|
||||
// The last statement in the path.
|
||||
gimple *m_last_stmt;
|
||||
// This is a bit of a wart. It's used to pass the LHS SSA name to
|
||||
// the profitability engine.
|
||||
tree m_name;
|
||||
// Marker to differentiate unreachable edges.
|
||||
static const edge UNREACHABLE_EDGE;
|
||||
// Set to TRUE if unknown SSA names along a path should be resolved
|
||||
|
@ -366,7 +363,7 @@ back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
|
|||
// on the way to the backedge could be worthwhile.
|
||||
bool large_non_fsm;
|
||||
if (m_path.length () > 1
|
||||
&& (!profit.possibly_profitable_path_p (m_path, m_name, &large_non_fsm)
|
||||
&& (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
|
||||
|| (!large_non_fsm
|
||||
&& maybe_register_path (profit))))
|
||||
;
|
||||
|
@ -517,23 +514,19 @@ back_threader::maybe_thread_block (basic_block bb)
|
|||
return;
|
||||
|
||||
enum gimple_code code = gimple_code (stmt);
|
||||
tree name;
|
||||
if (code == GIMPLE_SWITCH)
|
||||
name = gimple_switch_index (as_a <gswitch *> (stmt));
|
||||
else if (code == GIMPLE_COND)
|
||||
name = gimple_cond_lhs (stmt);
|
||||
else
|
||||
if (code != GIMPLE_SWITCH
|
||||
&& code != GIMPLE_COND)
|
||||
return;
|
||||
|
||||
m_last_stmt = stmt;
|
||||
m_visited_bbs.empty ();
|
||||
m_path.truncate (0);
|
||||
m_name = name;
|
||||
|
||||
// We compute imports of the path during discovery starting
|
||||
// just with names used in the conditional.
|
||||
bitmap_clear (m_imports);
|
||||
ssa_op_iter iter;
|
||||
tree name;
|
||||
FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
|
||||
{
|
||||
if (!gimple_range_ssa_p (name))
|
||||
|
@ -588,15 +581,12 @@ back_threader::debug ()
|
|||
*LARGE_NON_FSM whether the thread is too large for a non-FSM thread
|
||||
but would be OK if we extend the path to cover the loop backedge.
|
||||
|
||||
NAME is the SSA_NAME of the variable we found to have a constant
|
||||
value on PATH. If unknown, SSA_NAME is NULL.
|
||||
|
||||
?? It seems we should be able to loosen some of the restrictions in
|
||||
this function after loop optimizations have run. */
|
||||
|
||||
bool
|
||||
back_threader_profitability::possibly_profitable_path_p
|
||||
(const vec<basic_block> &m_path, tree name,
|
||||
(const vec<basic_block> &m_path,
|
||||
bool *large_non_fsm)
|
||||
{
|
||||
gcc_checking_assert (!m_path.is_empty ());
|
||||
|
@ -645,44 +635,6 @@ back_threader_profitability::possibly_profitable_path_p
|
|||
if (j < m_path.length () - 1)
|
||||
{
|
||||
int orig_n_insns = m_n_insns;
|
||||
/* PHIs in the path will create degenerate PHIS in the
|
||||
copied path which will then get propagated away, so
|
||||
looking at just the duplicate path the PHIs would
|
||||
seem unimportant.
|
||||
|
||||
But those PHIs, because they're assignments to objects
|
||||
typically with lives that exist outside the thread path,
|
||||
will tend to generate PHIs (or at least new PHI arguments)
|
||||
at points where we leave the thread path and rejoin
|
||||
the original blocks. So we do want to account for them.
|
||||
|
||||
We ignore virtual PHIs. We also ignore cases where BB
|
||||
has a single incoming edge. That's the most common
|
||||
degenerate PHI we'll see here. Finally we ignore PHIs
|
||||
that are associated with the value we're tracking as
|
||||
that object likely dies. */
|
||||
if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
|
||||
{
|
||||
for (gphi_iterator gsip = gsi_start_phis (bb);
|
||||
!gsi_end_p (gsip);
|
||||
gsi_next (&gsip))
|
||||
{
|
||||
gphi *phi = gsip.phi ();
|
||||
tree dst = gimple_phi_result (phi);
|
||||
|
||||
/* Note that if both NAME and DST are anonymous
|
||||
SSA_NAMEs, then we do not have enough information
|
||||
to consider them associated. */
|
||||
if (dst != name
|
||||
&& name
|
||||
&& TREE_CODE (name) == SSA_NAME
|
||||
&& (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
|
||||
|| !SSA_NAME_VAR (dst))
|
||||
&& !virtual_operand_p (dst))
|
||||
++m_n_insns;
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_contains_hot_bb && m_speed_p)
|
||||
m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
|
||||
for (gsi = gsi_after_labels (bb);
|
||||
|
|
|
@ -756,6 +756,8 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
|
|||
attr = &thread_attr;
|
||||
}
|
||||
|
||||
if (i >= nthreads)
|
||||
__builtin_unreachable ();
|
||||
start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
|
||||
* (nthreads - i));
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue