common.opt: Add description of the new -fgcse-after-reload flag.
2004-03-03 Mostafa Hagog <mustafa@il.ibm.com> * common.opt: Add description of the new -fgcse-after-reload flag. * flags.h (flag_gcse_after_reload): Declaration of global variable. * gcse.c (reg_used_on_edge ,reg_set_between_after_reload_p, reg_used_between_after_reload_p, rtx get_avail_load_store_reg, is_jump_table_basic_block, bb_has_well_behaved_predecessors, get_bb_avail_insn, hash_scan_set_after_reload, compute_hash_table_after_reload, eliminate_partially_redundant_loads, gcse_after_reload, get_bb_avail_insn): New functions to implement gcse-after-reload. (gcse_after_reload_main): New function, the main entry point to gcse-after-reload. * rtl.h (gcse_after_reload_main): Declaration of the new function. * opts.c (common_handle_option): Handle the -fgcse-after-reload flag. * toplev.c (flag_gcse_after_reload): Initialization. * passes.c (rest_of_handl_gcse2): Call gcse_after_reload_main. * params.def (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION, PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION): New parameters for tuning the gcse after reload optimization. * params.h (GCSE_AFTER_RELOAD_PARTIAL_FRACTION, GCSE_AFTER_RELOAD_CRITICAL_FRACTION): Two macros to access the tuning parameters. * doc/invoke.texi: Documentation for the new flag gcse-after-reload. From-SVN: r78842
This commit is contained in:
parent
6f6dedf5bd
commit
f9957958fe
11 changed files with 750 additions and 6 deletions
|
@ -1,3 +1,37 @@
|
|||
2004-03-03 Mostafa Hagog <mustafa@il.ibm.com>
|
||||
|
||||
* common.opt: Add description of the new -fgcse-after-reload flag.
|
||||
|
||||
* flags.h (flag_gcse_after_reload): Declaration of global variable.
|
||||
|
||||
* gcse.c (reg_used_on_edge ,reg_set_between_after_reload_p,
|
||||
reg_used_between_after_reload_p, rtx get_avail_load_store_reg,
|
||||
is_jump_table_basic_block, bb_has_well_behaved_predecessors,
|
||||
get_bb_avail_insn, hash_scan_set_after_reload,
|
||||
compute_hash_table_after_reload, eliminate_partially_redundant_loads,
|
||||
gcse_after_reload, get_bb_avail_insn): New functions to implement
|
||||
gcse-after-reload.
|
||||
(gcse_after_reload_main): New function, the main entry point to
|
||||
gcse-after-reload.
|
||||
|
||||
* rtl.h (gcse_after_reload_main): Declaration of the new function.
|
||||
|
||||
* opts.c (common_handle_option): Handle the -fgcse-after-reload flag.
|
||||
|
||||
* toplev.c (flag_gcse_after_reload): Initialization.
|
||||
|
||||
* passes.c (rest_of_handl_gcse2): Call gcse_after_reload_main.
|
||||
|
||||
* params.def (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
|
||||
PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION): New parameters for tuning
|
||||
the gcse after reload optimization.
|
||||
|
||||
* params.h (GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
|
||||
GCSE_AFTER_RELOAD_CRITICAL_FRACTION): Two macros to access the tuning
|
||||
parameters.
|
||||
|
||||
* doc/invoke.texi: Documentation for the new flag gcse-after-reload.
|
||||
|
||||
2004-03-03 Nicolas Pitre <nico@cam.org>
|
||||
|
||||
* config/arm/ieee754-df.S (muldf3, divdf3): Fix denormalization of
|
||||
|
|
|
@ -371,7 +371,13 @@ Perform store motion after global common subexpression elimination
|
|||
|
||||
fgcse-las
|
||||
Common
|
||||
Perform redundant load after store elimination in global common subexpression elimination
|
||||
Perform redundant load after store elimination in global common subexpression
|
||||
elimination
|
||||
|
||||
fgcse-after-reload
|
||||
Common
|
||||
Perform global common subexpression elimination after register allocation
|
||||
has finished.
|
||||
|
||||
fguess-branch-probability
|
||||
Common
|
||||
|
|
|
@ -270,8 +270,8 @@ in the following sections.
|
|||
-fdelayed-branch -fdelete-null-pointer-checks @gol
|
||||
-fexpensive-optimizations -ffast-math -ffloat-store @gol
|
||||
-fforce-addr -fforce-mem -ffunction-sections @gol
|
||||
-fgcse -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize @gol
|
||||
-fcrossjumping -fif-conversion -fif-conversion2 @gol
|
||||
-fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol
|
||||
-floop-optimize -fcrossjumping -fif-conversion -fif-conversion2 @gol
|
||||
-finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol
|
||||
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
|
||||
-fmove-all-movables -fnew-ra -fno-branch-count-reg @gol
|
||||
|
@ -3646,7 +3646,8 @@ invoking @option{-O2} on programs that use computed gotos.
|
|||
@opindex O3
|
||||
Optimize yet more. @option{-O3} turns on all optimizations specified by
|
||||
@option{-O2} and also turns on the @option{-finline-functions},
|
||||
@option{-fweb} and @option{-frename-registers} options.
|
||||
@option{-fweb}, @option{-frename-registers}
|
||||
and @option{-fgcse-after-reload} options.
|
||||
|
||||
@item -O0
|
||||
@opindex O0
|
||||
|
@ -3957,6 +3958,12 @@ same memory location (both partial and full redundancies).
|
|||
|
||||
Enabled by default when gcse is enabled.
|
||||
|
||||
@item -fgcse-after-reload
|
||||
@opindex fgcse-after-reload
|
||||
When @option{-fgcse-after-reload} is enabled, a redundant load elimination
|
||||
pass is performed after reload. The purpose of this pass is to cleanup
|
||||
redundant spilling.
|
||||
|
||||
@item -floop-optimize
|
||||
@opindex floop-optimize
|
||||
Perform loop optimizations: move constant expressions out of loops, simplify
|
||||
|
|
|
@ -672,6 +672,11 @@ extern int flag_gcse_sm;
|
|||
|
||||
extern int flag_gcse_las;
|
||||
|
||||
/* Nonzero if we want to perform global redundancy elimination after
|
||||
register allocation. */
|
||||
|
||||
extern int flag_gcse_after_reload;
|
||||
|
||||
/* Nonzero if value histograms should be used to optimize code. */
|
||||
extern int flag_value_profile_transformations;
|
||||
|
||||
|
|
638
gcc/gcse.c
638
gcc/gcse.c
|
@ -1980,6 +1980,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
|
|||
|
||||
antic_occr->insn = insn;
|
||||
antic_occr->next = NULL;
|
||||
antic_occr->deleted_p = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2016,6 +2017,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
|
|||
|
||||
avail_occr->insn = insn;
|
||||
avail_occr->next = NULL;
|
||||
avail_occr->deleted_p = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2102,6 +2104,7 @@ insert_set_in_table (rtx x, rtx insn, struct hash_table *table)
|
|||
|
||||
cur_occr->insn = insn;
|
||||
cur_occr->next = NULL;
|
||||
cur_occr->deleted_p = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8091,4 +8094,639 @@ is_too_expensive (const char *pass)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* The following code implements gcse after reload, the purpose of this
|
||||
pass is to cleanup redundant loads generated by reload and other
|
||||
optimizations that come after gcse. It searches for simple inter-block
|
||||
redundancies and tries to eliminate them by adding moves and loads
|
||||
in cold places. */
|
||||
|
||||
/* The following structure holds the information about the occurrences of
|
||||
the redundant instructions. */
|
||||
struct unoccr
|
||||
{
|
||||
struct unoccr *next;
|
||||
edge pred;
|
||||
rtx insn;
|
||||
};
|
||||
|
||||
static bool reg_used_on_edge (rtx, edge);
|
||||
static rtx reg_set_between_after_reload_p (rtx, rtx, rtx);
|
||||
static rtx reg_used_between_after_reload_p (rtx, rtx, rtx);
|
||||
static rtx get_avail_load_store_reg (rtx);
|
||||
static bool is_jump_table_basic_block (basic_block);
|
||||
static bool bb_has_well_behaved_predecessors (basic_block);
|
||||
static struct occr* get_bb_avail_insn (basic_block, struct occr *);
|
||||
static void hash_scan_set_after_reload (rtx, rtx, struct hash_table *);
|
||||
static void compute_hash_table_after_reload (struct hash_table *);
|
||||
static void eliminate_partially_redundant_loads (basic_block,
|
||||
rtx,
|
||||
struct expr *);
|
||||
static void gcse_after_reload (void);
|
||||
static struct occr* get_bb_avail_insn (basic_block, struct occr *);
|
||||
void gcse_after_reload_main (rtx, FILE *);
|
||||
|
||||
|
||||
/* Check if register REG is used in any insn waiting to be inserted on E.
|
||||
Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
|
||||
with PREV(insn),NEXT(insn) instead of calling
|
||||
reg_overlap_mentioned_p. */
|
||||
|
||||
static bool
|
||||
reg_used_on_edge (rtx reg, edge e)
|
||||
{
|
||||
rtx insn;
|
||||
|
||||
for (insn = e->insns; insn; insn = NEXT_INSN (insn))
|
||||
if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return the insn that sets register REG or clobbers it in between
|
||||
FROM_INSN and TO_INSN (exclusive of those two).
|
||||
Just like reg_set_between but for hard registers and not pseudos. */
|
||||
|
||||
static rtx
|
||||
reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn)
|
||||
{
|
||||
rtx insn;
|
||||
int regno;
|
||||
|
||||
if (GET_CODE (reg) != REG)
|
||||
abort ();
|
||||
regno = REGNO (reg);
|
||||
|
||||
/* We are called after register allocation. */
|
||||
if (regno >= FIRST_PSEUDO_REGISTER)
|
||||
abort ();
|
||||
|
||||
if (from_insn == to_insn)
|
||||
return NULL_RTX;
|
||||
|
||||
for (insn = NEXT_INSN (from_insn);
|
||||
insn != to_insn;
|
||||
insn = NEXT_INSN (insn))
|
||||
{
|
||||
if (INSN_P (insn))
|
||||
{
|
||||
if (FIND_REG_INC_NOTE (insn, reg)
|
||||
|| (GET_CODE (insn) == CALL_INSN
|
||||
&& call_used_regs[regno])
|
||||
|| find_reg_fusage (insn, CLOBBER, reg))
|
||||
return insn;
|
||||
}
|
||||
if (set_of (reg, insn) != NULL_RTX)
|
||||
return insn;
|
||||
}
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Return the insn that uses register REG in between FROM_INSN and TO_INSN
|
||||
(exclusive of those two). Similar to reg_used_between but for hard
|
||||
registers and not pseudos. */
|
||||
|
||||
static rtx
|
||||
reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn)
|
||||
{
|
||||
rtx insn;
|
||||
int regno;
|
||||
|
||||
if (GET_CODE (reg) != REG)
|
||||
return to_insn;
|
||||
regno = REGNO (reg);
|
||||
|
||||
/* We are called after register allocation. */
|
||||
if (regno >= FIRST_PSEUDO_REGISTER)
|
||||
abort ();
|
||||
if (from_insn == to_insn)
|
||||
return NULL_RTX;
|
||||
|
||||
for (insn = NEXT_INSN (from_insn);
|
||||
insn != to_insn;
|
||||
insn = NEXT_INSN (insn))
|
||||
if (INSN_P (insn)
|
||||
&& (reg_overlap_mentioned_p (reg, PATTERN (insn))
|
||||
|| (GET_CODE (insn) == CALL_INSN
|
||||
&& call_used_regs[regno])
|
||||
|| find_reg_fusage (insn, USE, reg)
|
||||
|| find_reg_fusage (insn, CLOBBER, reg)))
|
||||
return insn;
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Return the loaded/stored register of a load/store instruction. */
|
||||
|
||||
static rtx
|
||||
get_avail_load_store_reg (rtx insn)
|
||||
{
|
||||
if (GET_CODE (SET_DEST (PATTERN (insn))) == REG) /* A load. */
|
||||
return SET_DEST(PATTERN(insn));
|
||||
if (GET_CODE (SET_SRC (PATTERN (insn))) == REG) /* A store. */
|
||||
return SET_SRC (PATTERN (insn));
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Don't handle ABNORMAL edges or jump tables. */
|
||||
|
||||
static bool
|
||||
is_jump_table_basic_block (basic_block bb)
|
||||
{
|
||||
rtx insn = BB_END (bb);
|
||||
|
||||
if (GET_CODE (insn) == JUMP_INSN &&
|
||||
(GET_CODE (PATTERN (insn)) == ADDR_VEC
|
||||
|| GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return nonzero if the predecessors of BB are "well behaved". */
|
||||
|
||||
static bool
|
||||
bb_has_well_behaved_predecessors (basic_block bb)
|
||||
{
|
||||
edge pred;
|
||||
|
||||
if (! bb->pred)
|
||||
return false;
|
||||
for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
|
||||
if (((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred))
|
||||
|| is_jump_table_basic_block (pred->src))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Search for the occurrences of expression in BB. */
|
||||
|
||||
static struct occr*
|
||||
get_bb_avail_insn (basic_block bb, struct occr *occr)
|
||||
{
|
||||
for (; occr != NULL; occr = occr->next)
|
||||
if (BLOCK_FOR_INSN (occr->insn)->index == bb->index)
|
||||
return occr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Perform partial GCSE pass after reload, try to eliminate redundant loads
|
||||
created by the reload pass. We try to look for a full or partial
|
||||
redundant loads fed by one or more loads/stores in predecessor BBs,
|
||||
and try adding loads to make them fully redundant. We also check if
|
||||
it's worth adding loads to be able to delete the redundant load.
|
||||
|
||||
Algorithm:
|
||||
1. Build available expressions hash table:
|
||||
For each load/store instruction, if the loaded/stored memory didn't
|
||||
change until the end of the basic block add this memory expression to
|
||||
the hash table.
|
||||
2. Perform Redundancy elimination:
|
||||
For each load instruction do the following:
|
||||
perform partial redundancy elimination, check if it's worth adding
|
||||
loads to make the load fully redundant. If so add loads and
|
||||
register copies and delete the load.
|
||||
|
||||
Future enhancement:
|
||||
if loaded register is used/defined between load and some store,
|
||||
look for some other free register between load and all its stores,
|
||||
and replace load with a copy from this register to the loaded
|
||||
register. */
|
||||
|
||||
|
||||
/* This handles the case where several stores feed a partially redundant
|
||||
load. It checks if the redundancy elimination is possible and if it's
|
||||
worth it. */
|
||||
|
||||
static void
|
||||
eliminate_partially_redundant_loads (basic_block bb, rtx insn,
|
||||
struct expr *expr)
|
||||
{
|
||||
edge pred;
|
||||
rtx avail_insn = NULL_RTX;
|
||||
rtx avail_reg;
|
||||
rtx dest, pat;
|
||||
struct occr *a_occr;
|
||||
struct unoccr *occr, *avail_occrs = NULL;
|
||||
struct unoccr *unoccr, *unavail_occrs = NULL;
|
||||
int npred_ok = 0;
|
||||
gcov_type ok_count = 0; /* Redundant load execution count. */
|
||||
gcov_type critical_count = 0; /* Execution count of critical edges. */
|
||||
|
||||
/* The execution count of the loads to be added to make the
|
||||
load fully redundant. */
|
||||
gcov_type not_ok_count = 0;
|
||||
basic_block pred_bb;
|
||||
|
||||
pat = PATTERN (insn);
|
||||
dest = SET_DEST (pat);
|
||||
/* Check if the loaded register is not used nor killed from the beginning
|
||||
of the block. */
|
||||
if (reg_used_between_after_reload_p (dest,
|
||||
PREV_INSN (BB_HEAD (bb)), insn))
|
||||
return;
|
||||
|
||||
/* Check potential for replacing load with copy for predecessors. */
|
||||
for (pred = bb->pred; pred; pred = pred->pred_next)
|
||||
{
|
||||
rtx next_pred_bb_end;
|
||||
|
||||
avail_insn = NULL_RTX;
|
||||
pred_bb = pred->src;
|
||||
next_pred_bb_end = NEXT_INSN (BB_END (pred_bb));
|
||||
for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr;
|
||||
a_occr = get_bb_avail_insn (pred_bb, a_occr->next))
|
||||
{
|
||||
/* Check if the loaded register is not used. */
|
||||
avail_insn = a_occr->insn;
|
||||
if (! (avail_reg = get_avail_load_store_reg (avail_insn)))
|
||||
abort ();
|
||||
/* Make sure we can generate a move from register avail_reg to
|
||||
dest. */
|
||||
extract_insn (gen_move_insn (copy_rtx (dest),
|
||||
copy_rtx (avail_reg)));
|
||||
if (! constrain_operands (1)
|
||||
|| reg_killed_on_edge (avail_reg, pred)
|
||||
|| reg_used_on_edge (dest, pred))
|
||||
{
|
||||
avail_insn = NULL;
|
||||
continue;
|
||||
}
|
||||
if (! reg_set_between_after_reload_p (avail_reg, avail_insn,
|
||||
next_pred_bb_end))
|
||||
/* AVAIL_INSN remains non-null. */
|
||||
break;
|
||||
else
|
||||
avail_insn = NULL;
|
||||
}
|
||||
if (avail_insn != NULL_RTX)
|
||||
{
|
||||
npred_ok++;
|
||||
ok_count += pred->count;
|
||||
if (EDGE_CRITICAL_P (pred))
|
||||
critical_count += pred->count;
|
||||
occr = (struct unoccr *) gmalloc (sizeof (struct unoccr));
|
||||
occr->insn = avail_insn;
|
||||
occr->pred = pred;
|
||||
occr->next = avail_occrs;
|
||||
avail_occrs = occr;
|
||||
}
|
||||
else
|
||||
{
|
||||
not_ok_count += pred->count;
|
||||
if (EDGE_CRITICAL_P (pred))
|
||||
critical_count += pred->count;
|
||||
unoccr = (struct unoccr *) gmalloc (sizeof (struct unoccr));
|
||||
unoccr->insn = NULL_RTX;
|
||||
unoccr->pred = pred;
|
||||
unoccr->next = unavail_occrs;
|
||||
unavail_occrs = unoccr;
|
||||
}
|
||||
}
|
||||
|
||||
if (npred_ok == 0 /* No load can be replaced by copy. */
|
||||
|| (optimize_size && npred_ok > 1)) /* Prevent exploding the code. */
|
||||
return;
|
||||
|
||||
/* Check if it's worth applying the partial redundancy elimination. */
|
||||
if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count)
|
||||
return;
|
||||
|
||||
if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count)
|
||||
return;
|
||||
|
||||
/* Generate moves to the loaded register from where
|
||||
the memory is available. */
|
||||
for (occr = avail_occrs; occr; occr = occr->next)
|
||||
{
|
||||
avail_insn = occr->insn;
|
||||
pred = occr->pred;
|
||||
/* Set avail_reg to be the register having the value of the
|
||||
memory. */
|
||||
avail_reg = get_avail_load_store_reg (avail_insn);
|
||||
if (! avail_reg)
|
||||
abort ();
|
||||
|
||||
insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
|
||||
copy_rtx (avail_reg)),
|
||||
pred);
|
||||
|
||||
if (gcse_file)
|
||||
fprintf (gcse_file,
|
||||
"GCSE AFTER reload generating move from %d to %d on \
|
||||
edge from %d to %d\n",
|
||||
REGNO (avail_reg),
|
||||
REGNO (dest),
|
||||
pred->src->index,
|
||||
pred->dest->index);
|
||||
}
|
||||
|
||||
/* Regenerate loads where the memory is unavailable. */
|
||||
for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
|
||||
{
|
||||
pred = unoccr->pred;
|
||||
insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
|
||||
|
||||
if (gcse_file)
|
||||
fprintf (gcse_file,
|
||||
"GCSE AFTER reload: generating on edge from %d to %d\
|
||||
a copy of load:\n",
|
||||
pred->src->index,
|
||||
pred->dest->index);
|
||||
}
|
||||
|
||||
/* Delete the insn if it is not available in this block and mark it
|
||||
for deletion if it is available. If insn is available it may help
|
||||
discover additional redundancies, so mark it for later deletion.*/
|
||||
for (a_occr = get_bb_avail_insn (bb, expr->avail_occr);
|
||||
a_occr && (a_occr->insn != insn);
|
||||
a_occr = get_bb_avail_insn (bb, a_occr->next));
|
||||
|
||||
if (!a_occr)
|
||||
delete_insn (insn);
|
||||
else
|
||||
a_occr->deleted_p = 1;
|
||||
}
|
||||
|
||||
/* Performing the redundancy elimination as described before. */
|
||||
|
||||
static void
|
||||
gcse_after_reload (void)
|
||||
{
|
||||
unsigned int i;
|
||||
rtx insn;
|
||||
basic_block bb;
|
||||
struct expr *expr;
|
||||
struct occr *occr;
|
||||
|
||||
/* Note we start at block 1. */
|
||||
|
||||
if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
|
||||
return;
|
||||
|
||||
FOR_BB_BETWEEN (bb,
|
||||
ENTRY_BLOCK_PTR->next_bb->next_bb,
|
||||
EXIT_BLOCK_PTR,
|
||||
next_bb)
|
||||
{
|
||||
if (! bb_has_well_behaved_predecessors (bb))
|
||||
continue;
|
||||
|
||||
/* Do not try this optimization on cold basic blocks. */
|
||||
if (probably_cold_bb_p (bb))
|
||||
continue;
|
||||
|
||||
reset_opr_set_tables ();
|
||||
|
||||
for (insn = BB_HEAD (bb);
|
||||
insn != NULL
|
||||
&& insn != NEXT_INSN (BB_END (bb));
|
||||
insn = NEXT_INSN (insn))
|
||||
{
|
||||
/* Is it a load - of the form (set (reg) (mem))? */
|
||||
if (GET_CODE (insn) == INSN
|
||||
&& GET_CODE (PATTERN (insn)) == SET
|
||||
&& GET_CODE (SET_DEST (PATTERN (insn))) == REG
|
||||
&& GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
|
||||
{
|
||||
rtx pat = PATTERN (insn);
|
||||
rtx src = SET_SRC (pat);
|
||||
struct expr *expr;
|
||||
|
||||
if (general_operand (src, GET_MODE (src))
|
||||
/* Is the expression recorded? */
|
||||
&& (expr = lookup_expr (src, &expr_hash_table)) != NULL
|
||||
/* Are the operands unchanged since the start of the
|
||||
block? */
|
||||
&& oprs_not_set_p (src, insn)
|
||||
&& ! MEM_VOLATILE_P (src)
|
||||
&& GET_MODE (src) != BLKmode
|
||||
&& !(flag_non_call_exceptions && may_trap_p (src))
|
||||
&& !side_effects_p (src))
|
||||
{
|
||||
/* We now have a load (insn) and an available memory at
|
||||
its BB start (expr). Try to remove the loads if it is
|
||||
redundant. */
|
||||
eliminate_partially_redundant_loads (bb, insn, expr);
|
||||
}
|
||||
}
|
||||
|
||||
/* Keep track of everything modified by this insn. */
|
||||
if (INSN_P (insn))
|
||||
mark_oprs_set (insn);
|
||||
}
|
||||
}
|
||||
|
||||
commit_edge_insertions ();
|
||||
|
||||
/* Go over the expression hash table and delete insns that were
|
||||
marked for later deletion. */
|
||||
for (i = 0; i < expr_hash_table.size; i++)
|
||||
{
|
||||
for (expr = expr_hash_table.table[i];
|
||||
expr != NULL;
|
||||
expr = expr->next_same_hash)
|
||||
for (occr = expr->avail_occr; occr; occr = occr->next)
|
||||
if (occr->deleted_p)
|
||||
delete_insn (occr->insn);
|
||||
}
|
||||
}
|
||||
|
||||
/* Scan pattern PAT of INSN and add an entry to the hash TABLE.
|
||||
After reload we are interested in loads/stores only. */
|
||||
|
||||
static void
|
||||
hash_scan_set_after_reload (rtx pat, rtx insn, struct hash_table *table)
|
||||
{
|
||||
rtx src = SET_SRC (pat);
|
||||
rtx dest = SET_DEST (pat);
|
||||
|
||||
if (GET_CODE (src) != MEM && GET_CODE (dest) != MEM)
|
||||
return;
|
||||
|
||||
if (GET_CODE (dest) == REG)
|
||||
{
|
||||
if (/* Don't GCSE something if we can't do a reg/reg copy. */
|
||||
can_copy_p (GET_MODE (dest))
|
||||
/* GCSE commonly inserts instruction after the insn. We can't
|
||||
do that easily for EH_REGION notes so disable GCSE on these
|
||||
for now. */
|
||||
&& ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
|
||||
/* Is SET_SRC something we want to gcse? */
|
||||
&& general_operand (src, GET_MODE (src))
|
||||
/* Don't CSE a nop. */
|
||||
&& ! set_noop_p (pat)
|
||||
&& ! JUMP_P (insn))
|
||||
{
|
||||
/* An expression is not available if its operands are
|
||||
subsequently modified, including this insn. */
|
||||
if (oprs_available_p (src, insn))
|
||||
insert_expr_in_table (src, GET_MODE (dest), insn, 0, 1, table);
|
||||
}
|
||||
}
|
||||
else if ((GET_CODE (src) == REG))
|
||||
{
|
||||
/* Only record sets of pseudo-regs in the hash table. */
|
||||
if (/* Don't GCSE something if we can't do a reg/reg copy. */
|
||||
can_copy_p (GET_MODE (src))
|
||||
/* GCSE commonly inserts instruction after the insn. We can't
|
||||
do that easily for EH_REGION notes so disable GCSE on these
|
||||
for now. */
|
||||
&& ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
|
||||
/* Is SET_DEST something we want to gcse? */
|
||||
&& general_operand (dest, GET_MODE (dest))
|
||||
/* Don't CSE a nop. */
|
||||
&& ! set_noop_p (pat)
|
||||
&&! JUMP_P (insn)
|
||||
&& ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
|
||||
/* Check if the memory expression is killed after insn. */
|
||||
&& ! load_killed_in_block_p (BLOCK_FOR_INSN (insn),
|
||||
INSN_CUID (insn) + 1,
|
||||
dest,
|
||||
1)
|
||||
&& oprs_unchanged_p (XEXP (dest, 0), insn, 1))
|
||||
{
|
||||
insert_expr_in_table (dest, GET_MODE (dest), insn, 0, 1, table);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Create hash table of memory expressions available at end of basic
|
||||
blocks. */
|
||||
|
||||
static void
|
||||
compute_hash_table_after_reload (struct hash_table *table)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
table->set_p = 0;
|
||||
|
||||
/* Initialize count of number of entries in hash table. */
|
||||
table->n_elems = 0;
|
||||
memset ((char *) table->table, 0,
|
||||
table->size * sizeof (struct expr *));
|
||||
|
||||
/* While we compute the hash table we also compute a bit array of which
|
||||
registers are set in which blocks. */
|
||||
sbitmap_vector_zero (reg_set_in_block, last_basic_block);
|
||||
|
||||
/* Re-cache any INSN_LIST nodes we have allocated. */
|
||||
clear_modify_mem_tables ();
|
||||
|
||||
/* Some working arrays used to track first and last set in each block. */
|
||||
reg_avail_info = (struct reg_avail_info*)
|
||||
gmalloc (max_gcse_regno * sizeof (struct reg_avail_info));
|
||||
|
||||
for (i = 0; i < max_gcse_regno; ++i)
|
||||
reg_avail_info[i].last_bb = NULL;
|
||||
|
||||
FOR_EACH_BB (current_bb)
|
||||
{
|
||||
rtx insn;
|
||||
unsigned int regno;
|
||||
|
||||
/* First pass over the instructions records information used to
|
||||
determine when registers and memory are first and last set. */
|
||||
for (insn = BB_HEAD (current_bb);
|
||||
insn && insn != NEXT_INSN (BB_END (current_bb));
|
||||
insn = NEXT_INSN (insn))
|
||||
{
|
||||
if (! INSN_P (insn))
|
||||
continue;
|
||||
|
||||
if (GET_CODE (insn) == CALL_INSN)
|
||||
{
|
||||
bool clobbers_all = false;
|
||||
|
||||
#ifdef NON_SAVING_SETJMP
|
||||
if (NON_SAVING_SETJMP
|
||||
&& find_reg_note (insn, REG_SETJMP, NULL_RTX))
|
||||
clobbers_all = true;
|
||||
#endif
|
||||
|
||||
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
||||
if (clobbers_all
|
||||
|| TEST_HARD_REG_BIT (regs_invalidated_by_call,
|
||||
regno))
|
||||
record_last_reg_set_info (insn, regno);
|
||||
|
||||
mark_call (insn);
|
||||
}
|
||||
|
||||
note_stores (PATTERN (insn), record_last_set_info, insn);
|
||||
|
||||
if (GET_CODE (PATTERN (insn)) == SET)
|
||||
{
|
||||
rtx src, dest;
|
||||
|
||||
src = SET_SRC (PATTERN (insn));
|
||||
dest = SET_DEST (PATTERN (insn));
|
||||
if (GET_CODE (src) == MEM && auto_inc_p (XEXP (src, 0)))
|
||||
{
|
||||
regno = REGNO (XEXP (XEXP (src, 0), 0));
|
||||
record_last_reg_set_info (insn, regno);
|
||||
}
|
||||
if (GET_CODE (dest) == MEM && auto_inc_p (XEXP (dest, 0)))
|
||||
{
|
||||
regno = REGNO (XEXP (XEXP (dest, 0), 0));
|
||||
record_last_reg_set_info (insn, regno);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* The next pass builds the hash table. */
|
||||
for (insn = BB_HEAD (current_bb);
|
||||
insn && insn != NEXT_INSN (BB_END (current_bb));
|
||||
insn = NEXT_INSN (insn))
|
||||
if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
|
||||
if (! find_reg_note (insn, REG_LIBCALL, NULL_RTX))
|
||||
hash_scan_set_after_reload (PATTERN (insn), insn, table);
|
||||
}
|
||||
|
||||
free (reg_avail_info);
|
||||
reg_avail_info = NULL;
|
||||
}
|
||||
|
||||
|
||||
/* Main entry point of the GCSE after reload - clean some redundant loads
|
||||
due to spilling. */
|
||||
|
||||
void
|
||||
gcse_after_reload_main (rtx f, FILE* file)
|
||||
{
|
||||
gcse_subst_count = 0;
|
||||
gcse_create_count = 0;
|
||||
|
||||
gcse_file = file;
|
||||
|
||||
gcc_obstack_init (&gcse_obstack);
|
||||
bytes_used = 0;
|
||||
|
||||
/* We need alias. */
|
||||
init_alias_analysis ();
|
||||
|
||||
max_gcse_regno = max_reg_num ();
|
||||
|
||||
alloc_reg_set_mem (max_gcse_regno);
|
||||
alloc_gcse_mem (f);
|
||||
alloc_hash_table (max_cuid, &expr_hash_table, 0);
|
||||
compute_hash_table_after_reload (&expr_hash_table);
|
||||
|
||||
if (gcse_file)
|
||||
dump_hash_table (gcse_file, "Expression", &expr_hash_table);
|
||||
|
||||
if (expr_hash_table.n_elems > 0)
|
||||
gcse_after_reload ();
|
||||
|
||||
free_hash_table (&expr_hash_table);
|
||||
|
||||
free_gcse_mem ();
|
||||
free_reg_set_mem ();
|
||||
|
||||
/* We are finished with alias. */
|
||||
end_alias_analysis ();
|
||||
|
||||
obstack_free (&gcse_obstack, NULL);
|
||||
}
|
||||
|
||||
#include "gt-gcse.h"
|
||||
|
|
|
@ -574,6 +574,7 @@ decode_options (unsigned int argc, const char **argv)
|
|||
flag_rename_registers = 1;
|
||||
flag_unswitch_loops = 1;
|
||||
flag_web = 1;
|
||||
flag_gcse_after_reload = 1;
|
||||
}
|
||||
|
||||
if (optimize < 2 || optimize_size)
|
||||
|
@ -1035,6 +1036,10 @@ common_handle_option (size_t scode, const char *arg,
|
|||
flag_gcse_sm = value;
|
||||
break;
|
||||
|
||||
case OPT_fgcse_after_reload:
|
||||
flag_gcse_after_reload = value;
|
||||
break;
|
||||
|
||||
case OPT_fgcse_las:
|
||||
flag_gcse_las = value;
|
||||
break;
|
||||
|
|
|
@ -131,7 +131,25 @@ DEFPARAM(PARAM_MAX_GCSE_PASSES,
|
|||
"max-gcse-passes",
|
||||
"The maximum number of passes to make when doing GCSE",
|
||||
1)
|
||||
|
||||
/* This is the threshold ratio when to perform partial redundancy
|
||||
elimination after reload. We perform partial redundancy elimination
|
||||
when the following holds:
|
||||
(Redundant load execution count)
|
||||
------------------------------- >= GCSE_AFTER_RELOAD_PARTIAL_FRACTION
|
||||
(Added loads execution count) */
|
||||
DEFPARAM(PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
|
||||
"gcse-after-reload-partial-fraction",
|
||||
"The threshold ratio for performing partial redundancy elimination \
|
||||
after reload.",
|
||||
3)
|
||||
/* This is the threshold ratio of the critical edges execution count compared to
|
||||
the redundant loads execution count that permits performing the load
|
||||
redundancy elimination in gcse after reload. */
|
||||
DEFPARAM(PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION,
|
||||
"gcse-after-reload-critical-fraction",
|
||||
"The threshold ratio of critical edges execution count that permit \
|
||||
performing redundancy elimination after reload.",
|
||||
10)
|
||||
/* This parameter limits the number of insns in a loop that will be unrolled,
|
||||
and by how much the loop is unrolled.
|
||||
|
||||
|
|
|
@ -104,6 +104,10 @@ typedef enum compiler_param
|
|||
((size_t) PARAM_VALUE (PARAM_MAX_GCSE_MEMORY))
|
||||
#define MAX_GCSE_PASSES \
|
||||
PARAM_VALUE (PARAM_MAX_GCSE_PASSES)
|
||||
#define GCSE_AFTER_RELOAD_PARTIAL_FRACTION \
|
||||
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION)
|
||||
#define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \
|
||||
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION)
|
||||
#define MAX_UNROLLED_INSNS \
|
||||
PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
|
||||
#endif /* ! GCC_PARAMS_H */
|
||||
|
|
24
gcc/passes.c
24
gcc/passes.c
|
@ -158,6 +158,7 @@ enum dump_file_index
|
|||
DFI_lreg,
|
||||
DFI_greg,
|
||||
DFI_postreload,
|
||||
DFI_gcse2,
|
||||
DFI_flow2,
|
||||
DFI_peephole2,
|
||||
DFI_ce3,
|
||||
|
@ -178,7 +179,7 @@ enum dump_file_index
|
|||
Remaining -d letters:
|
||||
|
||||
" e m q "
|
||||
" JK O Q WXY "
|
||||
" K O Q WXY "
|
||||
*/
|
||||
|
||||
static struct dump_file_info dump_file_tbl[DFI_MAX] =
|
||||
|
@ -210,6 +211,7 @@ static struct dump_file_info dump_file_tbl[DFI_MAX] =
|
|||
{ "lreg", 'l', 1, 0, 0 },
|
||||
{ "greg", 'g', 1, 0, 0 },
|
||||
{ "postreload", 'o', 1, 0, 0 },
|
||||
{ "gcse2", 'J', 0, 0, 0 },
|
||||
{ "flow2", 'w', 1, 0, 0 },
|
||||
{ "peephole2", 'z', 1, 0, 0 },
|
||||
{ "ce3", 'E', 1, 0, 0 },
|
||||
|
@ -788,6 +790,23 @@ rest_of_handle_sched2 (tree decl, rtx insns)
|
|||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
rest_of_handle_gcse2 (tree decl, rtx insns)
|
||||
{
|
||||
open_dump_file (DFI_gcse2, decl);
|
||||
|
||||
gcse_after_reload_main (insns, dump_file);
|
||||
rebuild_jump_labels (insns);
|
||||
delete_trivially_dead_insns (insns, max_reg_num ());
|
||||
close_dump_file (DFI_gcse2, print_rtl_with_bb, insns);
|
||||
|
||||
ggc_collect ();
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
verify_flow_info ();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Register allocation pre-pass, to reduce number of moves necessary
|
||||
for two-address machines. */
|
||||
static void
|
||||
|
@ -1842,6 +1861,9 @@ rest_of_compilation (tree decl)
|
|||
|
||||
close_dump_file (DFI_postreload, print_rtl_with_bb, insns);
|
||||
|
||||
if (optimize > 0 && flag_gcse_after_reload)
|
||||
rest_of_handle_gcse2 (decl, insns);
|
||||
|
||||
/* Re-create the death notes which were deleted during reload. */
|
||||
timevar_push (TV_FLOW2);
|
||||
open_dump_file (DFI_flow2, decl);
|
||||
|
|
|
@ -2289,6 +2289,7 @@ extern rtx fis_get_condition (rtx);
|
|||
#ifdef BUFSIZ
|
||||
extern int gcse_main (rtx, FILE *);
|
||||
extern int bypass_jumps (FILE *);
|
||||
extern void gcse_after_reload_main (rtx, FILE *);
|
||||
#endif
|
||||
|
||||
/* In global.c */
|
||||
|
|
|
@ -526,6 +526,9 @@ int flag_gcse_sm = 1;
|
|||
|
||||
int flag_gcse_las = 1;
|
||||
|
||||
/* Nonzero means perform global cse after register allocation. */
|
||||
int flag_gcse_after_reload = 0;
|
||||
|
||||
/* Perform target register optimization before prologue / epilogue
|
||||
threading. */
|
||||
|
||||
|
@ -915,6 +918,7 @@ static const lang_independent_options f_options[] =
|
|||
{"gcse-lm", &flag_gcse_lm, 1 },
|
||||
{"gcse-sm", &flag_gcse_sm, 1 },
|
||||
{"gcse-las", &flag_gcse_las, 1 },
|
||||
{"gcse-after-reload", &flag_gcse_after_reload, 1},
|
||||
{"branch-target-load-optimize", &flag_branch_target_load_optimize, 1 },
|
||||
{"branch-target-load-optimize2", &flag_branch_target_load_optimize2, 1 },
|
||||
{"btr-bb-exclusive", &flag_btr_bb_exclusive, 1 },
|
||||
|
|
Loading…
Add table
Reference in a new issue