re PR rtl-optimization/15242 (pessimization of "goto *")
PR optimization/15242 * params.def (PARAM_MAX_GOTO_DUPLICATION_INSNS): New param. * basic-block.h (duplicate_computed_gotos): Add prototype. * bb-reorder.c (duplicate_computed_gotos): New function to duplicate sufficiently small blocks ending in a computed jump. * passes.c (rest_of_compilation): Call duplicate_computed_gotos if not optimizing for size. * cfgcleanup.c (try_crossjump_bb): If not optimizing for size, never do tail merging for blocks ending in a computed jump. * doc/invoke.texi: Document the max-goto-duplication-insns param. From-SVN: r94531
This commit is contained in:
parent
e88334a68f
commit
bbcb0c056b
7 changed files with 140 additions and 2 deletions
|
@ -1,3 +1,16 @@
|
|||
2005-02-01 Steven Bosscher <stevenb@suse.de>
|
||||
|
||||
PR optimization/15242
|
||||
* params.def (PARAM_MAX_GOTO_DUPLICATION_INSNS): New param.
|
||||
* basic-block.h (duplicate_computed_gotos): Add prototype.
|
||||
* bb-reorder.c (duplicate_computed_gotos): New function to
|
||||
duplicate sufficiently small blocks ending in a computed jump.
|
||||
* passes.c (rest_of_compilation): Call duplicate_computed_gotos
|
||||
if not optimizing for size.
|
||||
* cfgcleanup.c (try_crossjump_bb): If not optimizing for size,
|
||||
never do tail merging for blocks ending in a computed jump.
|
||||
* doc/invoke.texi: Document the max-goto-duplication-insns param.
|
||||
|
||||
2005-02-01 Eric Botcazou <ebotcazou@libertysurf.fr>
|
||||
|
||||
Patch from Richard Sandiford <rsandifo@redhat.com>
|
||||
|
|
|
@ -812,6 +812,7 @@ extern bool control_flow_insn_p (rtx);
|
|||
|
||||
/* In bb-reorder.c */
|
||||
extern void reorder_basic_blocks (unsigned int);
|
||||
extern void duplicate_computed_gotos (void);
|
||||
extern void partition_hot_cold_basic_blocks (void);
|
||||
|
||||
/* In cfg.c */
|
||||
|
|
102
gcc/bb-reorder.c
102
gcc/bb-reorder.c
|
@ -81,6 +81,7 @@
|
|||
#include "tm_p.h"
|
||||
#include "obstack.h"
|
||||
#include "expr.h"
|
||||
#include "params.h"
|
||||
|
||||
/* The number of rounds. In most cases there will only be 4 rounds, but
|
||||
when partitioning hot and cold basic blocks into separate sections of
|
||||
|
@ -1189,8 +1190,7 @@ copy_bb_p (basic_block bb, int code_may_grow)
|
|||
if (code_may_grow && maybe_hot_bb_p (bb))
|
||||
max_size *= 8;
|
||||
|
||||
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
|
||||
insn = NEXT_INSN (insn))
|
||||
FOR_BB_INSNS (bb, insn)
|
||||
{
|
||||
if (INSN_P (insn))
|
||||
size += get_attr_length (insn);
|
||||
|
@ -1985,6 +1985,104 @@ reorder_basic_blocks (unsigned int flags)
|
|||
timevar_pop (TV_REORDER_BLOCKS);
|
||||
}
|
||||
|
||||
/* Duplicate the blocks containing computed gotos. This basically unfactors
|
||||
computed gotos that were factored early on in the compilation process to
|
||||
speed up edge based data flow. We used to not unfactoring them again,
|
||||
which can seriously pessimize code with many computed jumps in the source
|
||||
code, such as interpreters. See e.g. PR15242. */
|
||||
|
||||
void
|
||||
duplicate_computed_gotos (void)
|
||||
{
|
||||
basic_block bb, new_bb;
|
||||
bitmap candidates;
|
||||
int max_size;
|
||||
|
||||
if (n_basic_blocks <= 1)
|
||||
return;
|
||||
|
||||
if (targetm.cannot_modify_jumps_p ())
|
||||
return;
|
||||
|
||||
timevar_push (TV_REORDER_BLOCKS);
|
||||
|
||||
cfg_layout_initialize (0);
|
||||
|
||||
/* We are estimating the length of uncond jump insn only once
|
||||
since the code for getting the insn length always returns
|
||||
the minimal length now. */
|
||||
if (uncond_jump_length == 0)
|
||||
uncond_jump_length = get_uncond_jump_length ();
|
||||
|
||||
max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
|
||||
candidates = BITMAP_XMALLOC ();
|
||||
|
||||
/* Build the reorder chain for the original order of blocks.
|
||||
Look for a computed jump while we are at it. */
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
if (bb->next_bb != EXIT_BLOCK_PTR)
|
||||
bb->rbi->next = bb->next_bb;
|
||||
|
||||
/* If the block ends in a computed jump and it is small enough,
|
||||
make it a candidate for duplication. */
|
||||
if (computed_jump_p (BB_END (bb)))
|
||||
{
|
||||
rtx insn;
|
||||
int size = 0;
|
||||
|
||||
FOR_BB_INSNS (bb, insn)
|
||||
{
|
||||
if (INSN_P (insn))
|
||||
size += get_attr_length (insn);
|
||||
if (size > max_size)
|
||||
break;
|
||||
}
|
||||
|
||||
if (size <= max_size)
|
||||
bitmap_set_bit (candidates, bb->index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Nothing to do if there is no computed jump here. */
|
||||
if (bitmap_empty_p (candidates))
|
||||
goto done;
|
||||
|
||||
/* Duplicate computed gotos. */
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
if (bb->rbi->visited)
|
||||
continue;
|
||||
|
||||
bb->rbi->visited = 1;
|
||||
|
||||
/* BB must have one outgoing edge. That edge must not lead to
|
||||
the exit block or the next block.
|
||||
The destination must have more than one predecessor. */
|
||||
if (EDGE_COUNT(bb->succs) != 1
|
||||
|| EDGE_SUCC(bb,0)->dest == EXIT_BLOCK_PTR
|
||||
|| EDGE_SUCC(bb,0)->dest == bb->next_bb
|
||||
|| EDGE_COUNT(EDGE_SUCC(bb,0)->dest->preds) <= 1)
|
||||
continue;
|
||||
|
||||
/* The successor block has to be a duplication candidate. */
|
||||
if (!bitmap_bit_p (candidates, EDGE_SUCC(bb,0)->dest->index))
|
||||
continue;
|
||||
|
||||
new_bb = duplicate_block (EDGE_SUCC(bb,0)->dest, EDGE_SUCC(bb,0));
|
||||
new_bb->rbi->next = bb->rbi->next;
|
||||
bb->rbi->next = new_bb;
|
||||
new_bb->rbi->visited = 1;
|
||||
}
|
||||
|
||||
done:
|
||||
cfg_layout_finalize ();
|
||||
|
||||
BITMAP_XFREE (candidates);
|
||||
|
||||
timevar_pop (TV_REORDER_BLOCKS);
|
||||
}
|
||||
|
||||
/* This function is the main 'entrance' for the optimization that
|
||||
partitions hot and cold basic blocks into separate sections of the
|
||||
.o file (to improve performance and cache locality). Ideally it
|
||||
|
|
|
@ -1707,6 +1707,13 @@ try_crossjump_bb (int mode, basic_block bb)
|
|||
if (EDGE_COUNT (bb->preds) < 2)
|
||||
return false;
|
||||
|
||||
/* Don't crossjump if this block ends in a computed jump,
|
||||
unless we are optimizing for size. */
|
||||
if (!optimize_size
|
||||
&& bb != EXIT_BLOCK_PTR
|
||||
&& computed_jump_p (BB_END (bb)))
|
||||
return false;
|
||||
|
||||
/* If we are partitioning hot/cold basic blocks, we don't want to
|
||||
mess up unconditional or indirect jumps that cross between hot
|
||||
and cold sections.
|
||||
|
|
|
@ -5344,6 +5344,14 @@ of two blocks before crossjumping will be performed on them. This
|
|||
value is ignored in the case where all instructions in the block being
|
||||
crossjumped from are matched. The default value is 5.
|
||||
|
||||
@item max-goto-duplication-insns
|
||||
The maximum number of instructions to duplicate to a block that jumps
|
||||
to a computed goto. To avoid @math{O(N^2)} behavior in a number of
|
||||
passes, GCC factors computed gotos early in the compilation process,
|
||||
and unfactors them as late as possible. Only computed jumps at the
|
||||
end of a basic blocks with no more than max-goto-duplication-insns are
|
||||
unfactored. The default value is 8.
|
||||
|
||||
@item max-delay-slot-insn-search
|
||||
The maximum number of instructions to consider when looking for an
|
||||
instruction to fill a delay slot. If more than this arbitrary number of
|
||||
|
|
|
@ -317,6 +317,12 @@ DEFPARAM(PARAM_MIN_CROSSJUMP_INSNS,
|
|||
"The minimum number of matching instructions to consider for crossjumping",
|
||||
5, 0, 0)
|
||||
|
||||
/* The maximum number of insns to duplicate when unfactoring computed gotos. */
|
||||
DEFPARAM(PARAM_MAX_GOTO_DUPLICATION_INSNS,
|
||||
"max-goto-duplication-insns",
|
||||
"The maximum number of insns to duplicate when unfactoring computed gotos",
|
||||
8, 0, 0)
|
||||
|
||||
/* The maximum length of path considered in cse. */
|
||||
DEFPARAM(PARAM_MAX_CSE_PATH_LENGTH,
|
||||
"max-cse-path-length",
|
||||
|
|
|
@ -1713,6 +1713,11 @@ rest_of_compilation (void)
|
|||
|
||||
compute_alignments ();
|
||||
|
||||
/* Aggressively duplicate basic blocks ending in computed gotos to the
|
||||
tails of their predecessors, unless we are optimizing for size. */
|
||||
if (flag_expensive_optimizations && !optimize_size)
|
||||
duplicate_computed_gotos ();
|
||||
|
||||
if (flag_var_tracking)
|
||||
rest_of_handle_variable_tracking ();
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue