invoke.texi (-malign-double): Re-add lost warning.

* invoke.texi (-malign-double): Re-add lost warning.

	* i386-protos.h (x86_output_mi_thunk): Declare.
	* unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
	* i386.c (x86_output_mi_thunk): ... here; handle 64bits.

	* dwarf2out.c (output_call_frame_info): Do not skip unwind info
	when flag_asynchronous_unwind_tables is set.

	* flags.h (flag_reorder_functions): Declare.
	* function.c (prepare_function_start): Initialize frequnecy.
	* params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
	* Makefile.in (predict.o): Add dependency on target.h and params.h
	* defaults.h (HOT_TEXT_SECTION_NAME,
	UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
	* predict.c (choose_function_section): New function.
	(estimate_bb_frequencies): Use it.
	* toplev.c (flag_reorder_functions): New global variable.
	(lang_independent_options): New.
	(parse_options_and_default_flags): Set.
	* varasm.c (assemble_start_function): Bypass functdion alignment
	for never executed functions.
	* invoke.texi (-freorder-blocks, -freorder-functions): Document.
	(param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
	* tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
	Document.

	Thu Jan  3 21:52:09 CET 2002  Jan Hubicka  <jh@suse.cz>

	* predict.c: Inlude profile.h
	(MIN_COUNT): Rename to MIN_COUNT_FRACTION
	(maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
	Use the information about maximal counter in the program.

	Thu Dec 20 22:14:00 CET 2001  Jan Hubicka  <jh@suse.cz>

	* basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
	probably_never_executed_bb_p): New functions.
	* cfgcleanup.c (outgoing_edges_match): Use them.
	* predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
	(maybe_hot_bb_p, probably_cold_bb_p,
	probably_never_executed_bb_p): New functions.

	* function.h (function): Add new field function_frequency.
	* predict.c (compute_function_frequency): New function.
	(estimate_probability): Call it.

From-SVN: r53478
This commit is contained in:
Jan Hubicka 2002-05-15 11:00:30 +02:00 committed by Jan Hubicka
parent 61ad9a3472
commit 194734e9e5
18 changed files with 418 additions and 159 deletions

View file

@ -1,3 +1,52 @@
Wed May 15 10:38:27 CEST 2002 Jan Hubicka <jh@suse.cz>
* invoke.texi (-malign-double): Re-add lost warning.
* i386-protos.h (x86_output_mi_thunk): Declare.
* unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
* i386.c (x86_output_mi_thunk): ... here; handle 64bits.
* dwarf2out.c (output_call_frame_info): Do not skip unwind info
when flag_asynchronous_unwind_tables is set.
* flags.h (flag_reorder_functions): Declare.
* function.c (prepare_function_start): Initialize frequnecy.
* params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
* Makefile.in (predict.o): Add dependency on target.h and params.h
* defaults.h (HOT_TEXT_SECTION_NAME,
UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
* predict.c (choose_function_section): New function.
(estimate_bb_frequencies): Use it.
* toplev.c (flag_reorder_functions): New global variable.
(lang_independent_options): New.
(parse_options_and_default_flags): Set.
* varasm.c (assemble_start_function): Bypass functdion alignment
for never executed functions.
* invoke.texi (-freorder-blocks, -freorder-functions): Document.
(param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
* tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
Document.
Thu Jan 3 21:52:09 CET 2002 Jan Hubicka <jh@suse.cz>
* predict.c: Inlude profile.h
(MIN_COUNT): Rename to MIN_COUNT_FRACTION
(maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
Use the information about maximal counter in the program.
Thu Dec 20 22:14:00 CET 2001 Jan Hubicka <jh@suse.cz>
* basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
probably_never_executed_bb_p): New functions.
* cfgcleanup.c (outgoing_edges_match): Use them.
* predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
(maybe_hot_bb_p, probably_cold_bb_p,
probably_never_executed_bb_p): New functions.
* function.h (function): Add new field function_frequency.
* predict.c (compute_function_frequency): New function.
(estimate_probability): Call it.
2002-03-09 Jakub Jelinek <jakub@redhat.com>
PR optimization/5172, optimization/5200

View file

@ -1579,7 +1579,8 @@ reg-stack.o : reg-stack.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) $(RECOG_H)
varray.h function.h $(TM_P_H)
predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h \
insn-config.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h output.h toplev.h \
$(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h
$(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h \
$(PARAMS_H) $(TARGET_H)
lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) toplev.h $(RTL_H) $(GGC_H)
bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \
flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H)

View file

@ -628,6 +628,10 @@ extern rtx emit_block_insn_before PARAMS ((rtx, rtx, basic_block));
extern void estimate_probability PARAMS ((struct loops *));
extern void note_prediction_to_br_prob PARAMS ((void));
extern void expected_value_to_br_prob PARAMS ((void));
extern void note_prediction_to_br_prob PARAMS ((void));
extern bool maybe_hot_bb_p PARAMS ((basic_block));
extern bool probably_cold_bb_p PARAMS ((basic_block));
extern bool probably_never_executed_bb_p PARAMS ((basic_block));
/* In flow.c */
extern void init_flow PARAMS ((void));

View file

@ -1211,8 +1211,8 @@ outgoing_edges_match (mode, bb1, bb2)
roughly similar. */
if (match
&& !optimize_size
&& bb1->frequency > BB_FREQ_MAX / 1000
&& bb2->frequency > BB_FREQ_MAX / 1000)
&& maybe_hot_bb_p (bb1)
&& maybe_hot_bb_p (bb2))
{
int prob2;

View file

@ -197,4 +197,5 @@ extern tree ix86_handle_shared_attribute PARAMS ((tree *, tree, tree, int, bool
extern unsigned int i386_pe_section_type_flags PARAMS ((tree, const char *,
int));
extern void i386_pe_asm_named_section PARAMS ((const char *, unsigned int));
extern void x86_output_mi_thunk PARAMS ((FILE *, int, tree));
#endif

View file

@ -13049,3 +13049,78 @@ x86_order_regs_for_local_alloc ()
while (pos < FIRST_PSEUDO_REGISTER)
reg_alloc_order [pos++] = 0;
}
void
x86_output_mi_thunk (file, delta, function)
FILE *file;
int delta;
tree function;
{
tree parm;
rtx xops[3];
if (ix86_regparm > 0)
parm = TYPE_ARG_TYPES (TREE_TYPE (function));
else
parm = NULL_TREE;
for (; parm; parm = TREE_CHAIN (parm))
if (TREE_VALUE (parm) == void_type_node)
break;
xops[0] = GEN_INT (delta);
if (TARGET_64BIT)
{
int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
if (flag_pic)
{
fprintf (file, "\tjmp *");
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
fprintf (file, "@GOTPCREL(%%rip)\n");
}
else
{
fprintf (file, "\tjmp ");
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
fprintf (file, "\n");
}
}
else
{
if (parm)
xops[1] = gen_rtx_REG (SImode, 0);
else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
else
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
if (flag_pic)
{
xops[0] = pic_offset_table_rtx;
xops[1] = gen_label_rtx ();
xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
if (ix86_regparm > 2)
abort ();
output_asm_insn ("push{l}\t%0", xops);
output_asm_insn ("call\t%P1", xops);
ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
output_asm_insn ("pop{l}\t%0", xops);
output_asm_insn
("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
output_asm_insn
("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
}
else
{
fprintf (file, "\tjmp ");
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
fprintf (file, "\n");
}
}
}

View file

@ -79,57 +79,5 @@ Boston, MA 02111-1307, USA. */
/* Output code to add DELTA to the first argument, and then jump to FUNCTION.
Used for C++ multiple inheritance. */
#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
do { \
tree parm; \
rtx xops[3]; \
\
if (ix86_regparm > 0) \
parm = TYPE_ARG_TYPES (TREE_TYPE (function)); \
else \
parm = NULL_TREE; \
for (; parm; parm = TREE_CHAIN (parm)) \
if (TREE_VALUE (parm) == void_type_node) \
break; \
\
xops[0] = GEN_INT (DELTA); \
if (parm) \
xops[1] = gen_rtx_REG (SImode, 0); \
else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (FUNCTION)))) \
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); \
else \
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); \
output_asm_insn ("add{l} {%0, %1|%1, %0}", xops); \
\
if (flag_pic && !TARGET_64BIT) \
{ \
xops[0] = pic_offset_table_rtx; \
xops[1] = gen_label_rtx (); \
xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); \
\
if (ix86_regparm > 2) \
abort (); \
output_asm_insn ("push{l}\t%0", xops); \
output_asm_insn ("call\t%P1", xops); \
ASM_OUTPUT_INTERNAL_LABEL (FILE, "L", CODE_LABEL_NUMBER (xops[1])); \
output_asm_insn ("pop{l}\t%0", xops); \
output_asm_insn ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); \
xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (FUNCTION), 0)); \
output_asm_insn ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}",\
xops); \
asm_fprintf (FILE, "\tpop{l\t%%ebx|\t%%ebx}\n"); \
asm_fprintf (FILE, "\tjmp\t{*%%ecx|%%ecx}\n"); \
} \
else if (flag_pic && TARGET_64BIT) \
{ \
fprintf (FILE, "\tjmp *"); \
assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \
fprintf (FILE, "@GOTPCREL(%%rip)\n"); \
} \
else \
{ \
fprintf (FILE, "\tjmp "); \
assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \
fprintf (FILE, "\n"); \
} \
} while (0)
#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
x86_output_mi_thunk (FILE, DELTA, FUNCTION);

View file

@ -517,4 +517,12 @@ You Lose! You must define PREFERRED_DEBUGGING_TYPE!
&& !ROUND_TOWARDS_ZERO)
#endif
#ifndef HOT_TEXT_SECTION_NAME
#define HOT_TEXT_SECTION_NAME "text.hot"
#endif
#ifndef UNLIKELY_EXECUTED_TEXT_SECTION_NAME
#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME "text.unlikely"
#endif
#endif /* ! GCC_DEFAULTS_H */

View file

@ -278,6 +278,7 @@ in the following sections.
-fomit-frame-pointer -foptimize-register-move @gol
-foptimize-sibling-calls -fprefetch-loop-arrays @gol
-freduce-all-givs -fregmove -frename-registers @gol
-freorder-blocks -freorder-functions @gol
-frerun-cse-after-loop -frerun-loop-opt @gol
-fschedule-insns -fschedule-insns2 @gol
-fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol
@ -3712,6 +3713,23 @@ non-determinism is of paramount import. This switch allows users to
reduce non-determinism, possibly at the expense of inferior
optimization.
@item -freorder-blocks
@opindex freorder-blocks
Reorder basic blocks in the compiled function in order to reduce number of
taken branches and improve code locality.
@item -freorder-functions
@opindex freorder-functions
Reorder basic blocks in the compiled function in order to reduce number of
taken branches and improve code locality. This is implemented by using special
subsections @code{text.hot} for most frequently executed functions and
@code{text.unlikely} for unlikely executed functions. Reordering is done by
the linker so object file format must support named sections and linker must
place them in resonable way.
Also profile feedback must be available in to make this option effective. See
@option{-fprofile-arcs} for details.
@item -fstrict-aliasing
@opindex fstrict-aliasing
Allows the compiler to assume the strictest aliasing rules applicable to
@ -3900,6 +3918,13 @@ The maximum number of instructions that a loop should have if that loop
is unrolled, and if the loop is unrolled, it determines how many times
the loop code is unrolled.
@item hot-bb-count-fraction
Select fraction of the maximal count of repetitions of basic block in program
given basic block needs to have to be considered hot.
@item hot-bb-frequency-fraction
Select fraction of the maximal frequency of executions of basic block in
function given basic block needs to have to be considered hot
@end table
@end table
@ -7389,6 +7414,10 @@ boundary. Aligning @code{double} variables on a two word boundary will
produce code that runs somewhat faster on a @samp{Pentium} at the
expense of more memory.
@strong{Warning:} if you use the @samp{-malign-double} switch,
structures containing the above types will be aligned differently than
the published application binary interface specifications for the 386.
@item -m128bit-long-double
@opindex m128bit-long-double
Control the size of @code{long double} type. i386 application binary interface

View file

@ -5651,6 +5651,17 @@ Normally this is not needed, as simply defining @code{TEXT_SECTION_ASM_OP}
is enough. The MIPS port uses this to sort all functions after all data
declarations.
@findex HOT_TEXT_SECTION_NAME
@item HOT_TEXT_SECTION_NAME
If defined, a C string constant for the name of the section containing most
frequently executed functions of the program. If not defined, GCC will provide
a default definition if the target supports named sections.
@findex UNLIKELY_EXECUTED_TEXT_SECTION_NAME
@item UNLIKELY_EXECUTED_TEXT_SECTION_NAME
If defined, a C string constant for the name of the section containing unlikely
executed functions in the program.
@findex DATA_SECTION_ASM_OP
@item DATA_SECTION_ASM_OP
A C expression whose value is a string, including spacing, containing the

View file

@ -1968,7 +1968,8 @@ output_call_frame_info (for_eh)
fde = &fde_table[i];
/* Don't emit EH unwind info for leaf functions that don't need it. */
if (for_eh && fde->nothrow && ! fde->uses_eh_lsda)
if (!flag_asynchronous_unwind_tables && for_eh && fde->nothrow
&& ! fde->uses_eh_lsda)
continue;
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, FDE_LABEL, for_eh + i * 2);

View file

@ -204,6 +204,10 @@ extern int flag_branch_probabilities;
extern int flag_reorder_blocks;
/* Nonzero if functions should be reordered. */
extern int flag_reorder_functions;
/* Nonzero if registers should be renamed. */
extern int flag_rename_registers;

View file

@ -6320,6 +6320,10 @@ prepare_function_start ()
cfun->arc_profile = profile_arc_flag || flag_test_coverage;
cfun->arc_profile = profile_arc_flag || flag_test_coverage;
cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
(*lang_hooks.function.init) (cfun);
if (init_machine_status)
(*init_machine_status) (cfun);

View file

@ -481,6 +481,19 @@ struct function
/* Nonzero if code to initialize arg_pointer_save_area has been emited. */
unsigned int arg_pointer_save_area_init : 1;
/* How commonly executed the function is. Initialized during branch
probabilities pass. */
enum function_frequency {
/* This function most likely won't be executed at all.
(set only when profile feedback is available). */
FUNCTION_FREQUENCY_UNLIKELY_EXECUTED,
/* The default value. */
FUNCTION_FREQUENCY_NORMAL,
/* Optimize this function hard
(set only when profile feedback is available). */
FUNCTION_FREQUENCY_HOT
} function_frequency;
};
/* The function currently being compiled. */

View file

@ -150,6 +150,15 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
"max-unrolled-insns",
"The maximum number of instructions to consider to unroll in a loop",
100)
DEFPARAM(HOT_BB_COUNT_FRACTION,
"hot-bb-count-fraction",
"Select fraction of the maximal count of repetitions of basic block in program given basic block needs to have to be considered hot",
10000)
DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
"hot-bb-frequency-fraction",
"Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot",
1000)
/*
Local variables:
mode:c

View file

@ -45,7 +45,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "recog.h"
#include "expr.h"
#include "predict.h"
#include "profile.h"
#include "real.h"
#include "params.h"
#include "target.h"
/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 0.5,
REAL_BB_FREQ_MAX. */
@ -75,6 +78,8 @@ static void process_note_predictions PARAMS ((basic_block, int *, int *,
static void process_note_prediction PARAMS ((basic_block, int *, int *,
sbitmap *, int, int));
static bool last_basic_block_p PARAMS ((basic_block));
static void compute_function_frequency PARAMS ((void));
static void choose_function_section PARAMS ((void));
/* Information we hold about each branch predictor.
Filled using information from predict.def. */
@ -103,6 +108,54 @@ static const struct predictor_info predictor_info[]= {
{NULL, 0, 0}
};
#undef DEF_PREDICTOR
/* Return true in case BB can be CPU intensive and should be optimized
for maximal perofmrance. */
bool
maybe_hot_bb_p (bb)
basic_block bb;
{
if (profile_info.count_profiles_merged
&& flag_branch_probabilities
&& (bb->count
< profile_info.max_counter_in_program
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
return false;
if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
return false;
return true;
}
/* Return true in case BB is cold and should be optimized for size. */
bool
probably_cold_bb_p (bb)
basic_block bb;
{
if (profile_info.count_profiles_merged
&& flag_branch_probabilities
&& (bb->count
< profile_info.max_counter_in_program
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
return true;
if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
return true;
return false;
}
/* Return true in case BB is probably never executed. */
bool
probably_never_executed_bb_p (bb)
basic_block bb;
{
if (profile_info.count_profiles_merged
&& flag_branch_probabilities)
return ((bb->count + profile_info.count_profiles_merged / 2)
/ profile_info.count_profiles_merged) == 0;
return false;
}
/* Return true if the one of outgoing edges is already predicted by
PREDICTOR. */
@ -1095,118 +1148,159 @@ estimate_bb_frequencies (loops)
REAL_VALUE_TYPE freq_max;
enum machine_mode double_mode = TYPE_MODE (double_type_node);
REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
mark_dfs_back_edges ();
if (flag_branch_probabilities)
counts_to_freqs ();
else
{
counts_to_freqs ();
return;
}
REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
/* Fill in the probability values in flowgraph based on the REG_BR_PROB
notes. */
for (i = 0; i < n_basic_blocks; i++)
{
rtx last_insn = BLOCK_END (i);
REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
/* Avoid handling of conditional jumps jumping to fallthru edge. */
|| BASIC_BLOCK (i)->succ->succ_next == NULL)
REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
mark_dfs_back_edges ();
/* Fill in the probability values in flowgraph based on the REG_BR_PROB
notes. */
for (i = 0; i < n_basic_blocks; i++)
{
/* We can predict only conditional jumps at the moment.
Expect each edge to be equally probable.
?? In the future we want to make abnormal edges improbable. */
int nedges = 0;
edge e;
rtx last_insn = BLOCK_END (i);
for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
/* Avoid handling of conditional jumps jumping to fallthru edge. */
|| BASIC_BLOCK (i)->succ->succ_next == NULL)
{
nedges++;
if (e->probability != 0)
break;
/* We can predict only conditional jumps at the moment.
Expect each edge to be equally probable.
?? In the future we want to make abnormal edges improbable. */
int nedges = 0;
edge e;
for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
{
nedges++;
if (e->probability != 0)
break;
}
if (!e)
for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
}
if (!e)
for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
}
}
ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def));
alloc_aux_for_edges (sizeof (struct edge_info_def));
for (i = -2; i < n_basic_blocks; i++)
{
edge e;
basic_block bb;
if (i == -2)
bb = ENTRY_BLOCK_PTR;
else if (i == -1)
bb = EXIT_BLOCK_PTR;
else
bb = BASIC_BLOCK (i);
BLOCK_INFO (bb)->tovisit = 0;
for (e = bb->succ; e; e = e->succ_next)
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def));
alloc_aux_for_edges (sizeof (struct edge_info_def));
for (i = -2; i < n_basic_blocks; i++)
{
REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
e->probability, 0, double_mode);
REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
real_br_prob_base);
edge e;
basic_block bb;
if (i == -2)
bb = ENTRY_BLOCK_PTR;
else if (i == -1)
bb = EXIT_BLOCK_PTR;
else
bb = BASIC_BLOCK (i);
BLOCK_INFO (bb)->tovisit = 0;
for (e = bb->succ; e; e = e->succ_next)
{
REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
e->probability, 0, double_mode);
REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
real_br_prob_base);
}
}
/* First compute probabilities locally for each loop from innermost
to outermost to examine probabilities for back edges. */
estimate_loops_at_level (loops->tree_root);
/* Now fake loop around whole function to finalize probabilities. */
for (i = 0; i < n_basic_blocks; i++)
BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
propagate_freq (ENTRY_BLOCK_PTR);
memcpy (&freq_max, &real_zero, sizeof (real_zero));
for (i = 0; i < n_basic_blocks; i++)
if (REAL_VALUES_LESS
(freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
sizeof (freq_max));
for (i = -2; i < n_basic_blocks; i++)
{
basic_block bb;
REAL_VALUE_TYPE tmp;
if (i == -2)
bb = ENTRY_BLOCK_PTR;
else if (i == -1)
bb = EXIT_BLOCK_PTR;
else
bb = BASIC_BLOCK (i);
REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
real_bb_freq_max);
REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
}
free_aux_for_blocks ();
free_aux_for_edges ();
}
/* First compute probabilities locally for each loop from innermost
to outermost to examine probabilities for back edges. */
estimate_loops_at_level (loops->tree_root);
/* Now fake loop around whole function to finalize probabilities. */
for (i = 0; i < n_basic_blocks; i++)
BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
propagate_freq (ENTRY_BLOCK_PTR);
memcpy (&freq_max, &real_zero, sizeof (real_zero));
for (i = 0; i < n_basic_blocks; i++)
if (REAL_VALUES_LESS (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
sizeof (freq_max));
for (i = -2; i < n_basic_blocks; i++)
{
basic_block bb;
REAL_VALUE_TYPE tmp;
if (i == -2)
bb = ENTRY_BLOCK_PTR;
else if (i == -1)
bb = EXIT_BLOCK_PTR;
else
bb = BASIC_BLOCK (i);
REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
real_bb_freq_max);
REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
}
free_aux_for_blocks ();
free_aux_for_edges ();
compute_function_frequency ();
if (flag_reorder_functions)
choose_function_section ();
}
/* Decide whether function is hot, cold or unlikely executed. */
static void
compute_function_frequency ()
{
int i;
if (!profile_info.count_profiles_merged
|| !flag_branch_probabilities)
return;
cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
for (i = 0; i < n_basic_blocks; i++)
{
basic_block bb = BASIC_BLOCK (i);
if (maybe_hot_bb_p (bb))
{
cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
return;
}
if (!probably_never_executed_bb_p (bb))
cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
}
}
/* Choose appropriate section for the function. */
static void
choose_function_section ()
{
if (DECL_SECTION_NAME (current_function_decl)
|| !targetm.have_named_sections)
return;
if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
DECL_SECTION_NAME (current_function_decl) =
build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
DECL_SECTION_NAME (current_function_decl) =
build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
}

View file

@ -381,6 +381,10 @@ int flag_branch_probabilities = 0;
int flag_reorder_blocks = 0;
/* Nonzero if functions should be reordered. */
int flag_reorder_functions = 0;
/* Nonzero if registers should be renamed. */
int flag_rename_registers = 0;
@ -1076,6 +1080,8 @@ static const lang_independent_options f_options[] =
N_("Enable basic program profiling code") },
{"reorder-blocks", &flag_reorder_blocks, 1,
N_("Reorder basic blocks to improve code placement") },
{"reorder-functions", &flag_reorder_functions, 1,
N_("Reorder functions to improve code placement") },
{"rename-registers", &flag_rename_registers, 1,
N_("Do the register renaming optimization pass") },
{"cprop-registers", &flag_cprop_registers, 1,
@ -4657,6 +4663,7 @@ parse_options_and_default_flags (argc, argv)
flag_strict_aliasing = 1;
flag_delete_null_pointer_checks = 1;
flag_reorder_blocks = 1;
flag_reorder_functions = 1;
}
if (optimize >= 3)

View file

@ -1197,7 +1197,8 @@ assemble_start_function (decl, fnname)
/* Handle a user-specified function alignment.
Note that we still need to align to FUNCTION_BOUNDARY, as above,
because ASM_OUTPUT_MAX_SKIP_ALIGN might not do any alignment at all. */
if (align_functions_log > align)
if (align_functions_log > align
&& cfun->function_frequency != FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
{
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,