invoke.texi (-malign-double): Re-add lost warning.
* invoke.texi (-malign-double): Re-add lost warning. * i386-protos.h (x86_output_mi_thunk): Declare. * unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ... * i386.c (x86_output_mi_thunk): ... here; handle 64bits. * dwarf2out.c (output_call_frame_info): Do not skip unwind info when flag_asynchronous_unwind_tables is set. * flags.h (flag_reorder_functions): Declare. * function.c (prepare_function_start): Initialize frequnecy. * params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters. * Makefile.in (predict.o): Add dependency on target.h and params.h * defaults.h (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros. * predict.c (choose_function_section): New function. (estimate_bb_frequencies): Use it. * toplev.c (flag_reorder_functions): New global variable. (lang_independent_options): New. (parse_options_and_default_flags): Set. * varasm.c (assemble_start_function): Bypass functdion alignment for never executed functions. * invoke.texi (-freorder-blocks, -freorder-functions): Document. (param hot-bb-count-fraction, hot-bb-frequency-fraction): New. * tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME): Document. Thu Jan 3 21:52:09 CET 2002 Jan Hubicka <jh@suse.cz> * predict.c: Inlude profile.h (MIN_COUNT): Rename to MIN_COUNT_FRACTION (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p): Use the information about maximal counter in the program. Thu Dec 20 22:14:00 CET 2001 Jan Hubicka <jh@suse.cz> * basic-block.h (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p): New functions. * cfgcleanup.c (outgoing_edges_match): Use them. * predict.c (MIN_COUNT, MIN_FREQUENCY): New macros. (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p): New functions. * function.h (function): Add new field function_frequency. * predict.c (compute_function_frequency): New function. (estimate_probability): Call it. From-SVN: r53478
This commit is contained in:
parent
61ad9a3472
commit
194734e9e5
18 changed files with 418 additions and 159 deletions
|
@ -1,3 +1,52 @@
|
|||
Wed May 15 10:38:27 CEST 2002 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* invoke.texi (-malign-double): Re-add lost warning.
|
||||
|
||||
* i386-protos.h (x86_output_mi_thunk): Declare.
|
||||
* unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
|
||||
* i386.c (x86_output_mi_thunk): ... here; handle 64bits.
|
||||
|
||||
* dwarf2out.c (output_call_frame_info): Do not skip unwind info
|
||||
when flag_asynchronous_unwind_tables is set.
|
||||
|
||||
* flags.h (flag_reorder_functions): Declare.
|
||||
* function.c (prepare_function_start): Initialize frequnecy.
|
||||
* params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
|
||||
* Makefile.in (predict.o): Add dependency on target.h and params.h
|
||||
* defaults.h (HOT_TEXT_SECTION_NAME,
|
||||
UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
|
||||
* predict.c (choose_function_section): New function.
|
||||
(estimate_bb_frequencies): Use it.
|
||||
* toplev.c (flag_reorder_functions): New global variable.
|
||||
(lang_independent_options): New.
|
||||
(parse_options_and_default_flags): Set.
|
||||
* varasm.c (assemble_start_function): Bypass functdion alignment
|
||||
for never executed functions.
|
||||
* invoke.texi (-freorder-blocks, -freorder-functions): Document.
|
||||
(param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
|
||||
* tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
|
||||
Document.
|
||||
|
||||
Thu Jan 3 21:52:09 CET 2002 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* predict.c: Inlude profile.h
|
||||
(MIN_COUNT): Rename to MIN_COUNT_FRACTION
|
||||
(maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
|
||||
Use the information about maximal counter in the program.
|
||||
|
||||
Thu Dec 20 22:14:00 CET 2001 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
|
||||
probably_never_executed_bb_p): New functions.
|
||||
* cfgcleanup.c (outgoing_edges_match): Use them.
|
||||
* predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
|
||||
(maybe_hot_bb_p, probably_cold_bb_p,
|
||||
probably_never_executed_bb_p): New functions.
|
||||
|
||||
* function.h (function): Add new field function_frequency.
|
||||
* predict.c (compute_function_frequency): New function.
|
||||
(estimate_probability): Call it.
|
||||
|
||||
2002-03-09 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR optimization/5172, optimization/5200
|
||||
|
|
|
@ -1579,7 +1579,8 @@ reg-stack.o : reg-stack.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) $(RECOG_H)
|
|||
varray.h function.h $(TM_P_H)
|
||||
predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h \
|
||||
insn-config.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h output.h toplev.h \
|
||||
$(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h
|
||||
$(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h \
|
||||
$(PARAMS_H) $(TARGET_H)
|
||||
lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) toplev.h $(RTL_H) $(GGC_H)
|
||||
bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \
|
||||
flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H)
|
||||
|
|
|
@ -628,6 +628,10 @@ extern rtx emit_block_insn_before PARAMS ((rtx, rtx, basic_block));
|
|||
extern void estimate_probability PARAMS ((struct loops *));
|
||||
extern void note_prediction_to_br_prob PARAMS ((void));
|
||||
extern void expected_value_to_br_prob PARAMS ((void));
|
||||
extern void note_prediction_to_br_prob PARAMS ((void));
|
||||
extern bool maybe_hot_bb_p PARAMS ((basic_block));
|
||||
extern bool probably_cold_bb_p PARAMS ((basic_block));
|
||||
extern bool probably_never_executed_bb_p PARAMS ((basic_block));
|
||||
|
||||
/* In flow.c */
|
||||
extern void init_flow PARAMS ((void));
|
||||
|
|
|
@ -1211,8 +1211,8 @@ outgoing_edges_match (mode, bb1, bb2)
|
|||
roughly similar. */
|
||||
if (match
|
||||
&& !optimize_size
|
||||
&& bb1->frequency > BB_FREQ_MAX / 1000
|
||||
&& bb2->frequency > BB_FREQ_MAX / 1000)
|
||||
&& maybe_hot_bb_p (bb1)
|
||||
&& maybe_hot_bb_p (bb2))
|
||||
{
|
||||
int prob2;
|
||||
|
||||
|
|
|
@ -197,4 +197,5 @@ extern tree ix86_handle_shared_attribute PARAMS ((tree *, tree, tree, int, bool
|
|||
extern unsigned int i386_pe_section_type_flags PARAMS ((tree, const char *,
|
||||
int));
|
||||
extern void i386_pe_asm_named_section PARAMS ((const char *, unsigned int));
|
||||
extern void x86_output_mi_thunk PARAMS ((FILE *, int, tree));
|
||||
#endif
|
||||
|
|
|
@ -13049,3 +13049,78 @@ x86_order_regs_for_local_alloc ()
|
|||
while (pos < FIRST_PSEUDO_REGISTER)
|
||||
reg_alloc_order [pos++] = 0;
|
||||
}
|
||||
|
||||
void
|
||||
x86_output_mi_thunk (file, delta, function)
|
||||
FILE *file;
|
||||
int delta;
|
||||
tree function;
|
||||
{
|
||||
tree parm;
|
||||
rtx xops[3];
|
||||
|
||||
if (ix86_regparm > 0)
|
||||
parm = TYPE_ARG_TYPES (TREE_TYPE (function));
|
||||
else
|
||||
parm = NULL_TREE;
|
||||
for (; parm; parm = TREE_CHAIN (parm))
|
||||
if (TREE_VALUE (parm) == void_type_node)
|
||||
break;
|
||||
|
||||
xops[0] = GEN_INT (delta);
|
||||
if (TARGET_64BIT)
|
||||
{
|
||||
int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
|
||||
xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
|
||||
output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
|
||||
if (flag_pic)
|
||||
{
|
||||
fprintf (file, "\tjmp *");
|
||||
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
|
||||
fprintf (file, "@GOTPCREL(%%rip)\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (file, "\tjmp ");
|
||||
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
|
||||
fprintf (file, "\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (parm)
|
||||
xops[1] = gen_rtx_REG (SImode, 0);
|
||||
else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
|
||||
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
|
||||
else
|
||||
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
|
||||
output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
|
||||
|
||||
if (flag_pic)
|
||||
{
|
||||
xops[0] = pic_offset_table_rtx;
|
||||
xops[1] = gen_label_rtx ();
|
||||
xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
|
||||
|
||||
if (ix86_regparm > 2)
|
||||
abort ();
|
||||
output_asm_insn ("push{l}\t%0", xops);
|
||||
output_asm_insn ("call\t%P1", xops);
|
||||
ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
|
||||
output_asm_insn ("pop{l}\t%0", xops);
|
||||
output_asm_insn
|
||||
("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
|
||||
xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
|
||||
output_asm_insn
|
||||
("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
|
||||
asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
|
||||
asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (file, "\tjmp ");
|
||||
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
|
||||
fprintf (file, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -79,57 +79,5 @@ Boston, MA 02111-1307, USA. */
|
|||
|
||||
/* Output code to add DELTA to the first argument, and then jump to FUNCTION.
|
||||
Used for C++ multiple inheritance. */
|
||||
#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
|
||||
do { \
|
||||
tree parm; \
|
||||
rtx xops[3]; \
|
||||
\
|
||||
if (ix86_regparm > 0) \
|
||||
parm = TYPE_ARG_TYPES (TREE_TYPE (function)); \
|
||||
else \
|
||||
parm = NULL_TREE; \
|
||||
for (; parm; parm = TREE_CHAIN (parm)) \
|
||||
if (TREE_VALUE (parm) == void_type_node) \
|
||||
break; \
|
||||
\
|
||||
xops[0] = GEN_INT (DELTA); \
|
||||
if (parm) \
|
||||
xops[1] = gen_rtx_REG (SImode, 0); \
|
||||
else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (FUNCTION)))) \
|
||||
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); \
|
||||
else \
|
||||
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); \
|
||||
output_asm_insn ("add{l} {%0, %1|%1, %0}", xops); \
|
||||
\
|
||||
if (flag_pic && !TARGET_64BIT) \
|
||||
{ \
|
||||
xops[0] = pic_offset_table_rtx; \
|
||||
xops[1] = gen_label_rtx (); \
|
||||
xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); \
|
||||
\
|
||||
if (ix86_regparm > 2) \
|
||||
abort (); \
|
||||
output_asm_insn ("push{l}\t%0", xops); \
|
||||
output_asm_insn ("call\t%P1", xops); \
|
||||
ASM_OUTPUT_INTERNAL_LABEL (FILE, "L", CODE_LABEL_NUMBER (xops[1])); \
|
||||
output_asm_insn ("pop{l}\t%0", xops); \
|
||||
output_asm_insn ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); \
|
||||
xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (FUNCTION), 0)); \
|
||||
output_asm_insn ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}",\
|
||||
xops); \
|
||||
asm_fprintf (FILE, "\tpop{l\t%%ebx|\t%%ebx}\n"); \
|
||||
asm_fprintf (FILE, "\tjmp\t{*%%ecx|%%ecx}\n"); \
|
||||
} \
|
||||
else if (flag_pic && TARGET_64BIT) \
|
||||
{ \
|
||||
fprintf (FILE, "\tjmp *"); \
|
||||
assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \
|
||||
fprintf (FILE, "@GOTPCREL(%%rip)\n"); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
fprintf (FILE, "\tjmp "); \
|
||||
assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \
|
||||
fprintf (FILE, "\n"); \
|
||||
} \
|
||||
} while (0)
|
||||
#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
|
||||
x86_output_mi_thunk (FILE, DELTA, FUNCTION);
|
||||
|
|
|
@ -517,4 +517,12 @@ You Lose! You must define PREFERRED_DEBUGGING_TYPE!
|
|||
&& !ROUND_TOWARDS_ZERO)
|
||||
#endif
|
||||
|
||||
#ifndef HOT_TEXT_SECTION_NAME
|
||||
#define HOT_TEXT_SECTION_NAME "text.hot"
|
||||
#endif
|
||||
|
||||
#ifndef UNLIKELY_EXECUTED_TEXT_SECTION_NAME
|
||||
#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME "text.unlikely"
|
||||
#endif
|
||||
|
||||
#endif /* ! GCC_DEFAULTS_H */
|
||||
|
|
|
@ -278,6 +278,7 @@ in the following sections.
|
|||
-fomit-frame-pointer -foptimize-register-move @gol
|
||||
-foptimize-sibling-calls -fprefetch-loop-arrays @gol
|
||||
-freduce-all-givs -fregmove -frename-registers @gol
|
||||
-freorder-blocks -freorder-functions @gol
|
||||
-frerun-cse-after-loop -frerun-loop-opt @gol
|
||||
-fschedule-insns -fschedule-insns2 @gol
|
||||
-fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol
|
||||
|
@ -3712,6 +3713,23 @@ non-determinism is of paramount import. This switch allows users to
|
|||
reduce non-determinism, possibly at the expense of inferior
|
||||
optimization.
|
||||
|
||||
@item -freorder-blocks
|
||||
@opindex freorder-blocks
|
||||
Reorder basic blocks in the compiled function in order to reduce number of
|
||||
taken branches and improve code locality.
|
||||
|
||||
@item -freorder-functions
|
||||
@opindex freorder-functions
|
||||
Reorder basic blocks in the compiled function in order to reduce number of
|
||||
taken branches and improve code locality. This is implemented by using special
|
||||
subsections @code{text.hot} for most frequently executed functions and
|
||||
@code{text.unlikely} for unlikely executed functions. Reordering is done by
|
||||
the linker so object file format must support named sections and linker must
|
||||
place them in resonable way.
|
||||
|
||||
Also profile feedback must be available in to make this option effective. See
|
||||
@option{-fprofile-arcs} for details.
|
||||
|
||||
@item -fstrict-aliasing
|
||||
@opindex fstrict-aliasing
|
||||
Allows the compiler to assume the strictest aliasing rules applicable to
|
||||
|
@ -3900,6 +3918,13 @@ The maximum number of instructions that a loop should have if that loop
|
|||
is unrolled, and if the loop is unrolled, it determines how many times
|
||||
the loop code is unrolled.
|
||||
|
||||
@item hot-bb-count-fraction
|
||||
Select fraction of the maximal count of repetitions of basic block in program
|
||||
given basic block needs to have to be considered hot.
|
||||
|
||||
@item hot-bb-frequency-fraction
|
||||
Select fraction of the maximal frequency of executions of basic block in
|
||||
function given basic block needs to have to be considered hot
|
||||
@end table
|
||||
@end table
|
||||
|
||||
|
@ -7389,6 +7414,10 @@ boundary. Aligning @code{double} variables on a two word boundary will
|
|||
produce code that runs somewhat faster on a @samp{Pentium} at the
|
||||
expense of more memory.
|
||||
|
||||
@strong{Warning:} if you use the @samp{-malign-double} switch,
|
||||
structures containing the above types will be aligned differently than
|
||||
the published application binary interface specifications for the 386.
|
||||
|
||||
@item -m128bit-long-double
|
||||
@opindex m128bit-long-double
|
||||
Control the size of @code{long double} type. i386 application binary interface
|
||||
|
|
|
@ -5651,6 +5651,17 @@ Normally this is not needed, as simply defining @code{TEXT_SECTION_ASM_OP}
|
|||
is enough. The MIPS port uses this to sort all functions after all data
|
||||
declarations.
|
||||
|
||||
@findex HOT_TEXT_SECTION_NAME
|
||||
@item HOT_TEXT_SECTION_NAME
|
||||
If defined, a C string constant for the name of the section containing most
|
||||
frequently executed functions of the program. If not defined, GCC will provide
|
||||
a default definition if the target supports named sections.
|
||||
|
||||
@findex UNLIKELY_EXECUTED_TEXT_SECTION_NAME
|
||||
@item UNLIKELY_EXECUTED_TEXT_SECTION_NAME
|
||||
If defined, a C string constant for the name of the section containing unlikely
|
||||
executed functions in the program.
|
||||
|
||||
@findex DATA_SECTION_ASM_OP
|
||||
@item DATA_SECTION_ASM_OP
|
||||
A C expression whose value is a string, including spacing, containing the
|
||||
|
|
|
@ -1968,7 +1968,8 @@ output_call_frame_info (for_eh)
|
|||
fde = &fde_table[i];
|
||||
|
||||
/* Don't emit EH unwind info for leaf functions that don't need it. */
|
||||
if (for_eh && fde->nothrow && ! fde->uses_eh_lsda)
|
||||
if (!flag_asynchronous_unwind_tables && for_eh && fde->nothrow
|
||||
&& ! fde->uses_eh_lsda)
|
||||
continue;
|
||||
|
||||
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, FDE_LABEL, for_eh + i * 2);
|
||||
|
|
|
@ -204,6 +204,10 @@ extern int flag_branch_probabilities;
|
|||
|
||||
extern int flag_reorder_blocks;
|
||||
|
||||
/* Nonzero if functions should be reordered. */
|
||||
|
||||
extern int flag_reorder_functions;
|
||||
|
||||
/* Nonzero if registers should be renamed. */
|
||||
|
||||
extern int flag_rename_registers;
|
||||
|
|
|
@ -6320,6 +6320,10 @@ prepare_function_start ()
|
|||
|
||||
cfun->arc_profile = profile_arc_flag || flag_test_coverage;
|
||||
|
||||
cfun->arc_profile = profile_arc_flag || flag_test_coverage;
|
||||
|
||||
cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
|
||||
|
||||
(*lang_hooks.function.init) (cfun);
|
||||
if (init_machine_status)
|
||||
(*init_machine_status) (cfun);
|
||||
|
|
|
@ -481,6 +481,19 @@ struct function
|
|||
|
||||
/* Nonzero if code to initialize arg_pointer_save_area has been emited. */
|
||||
unsigned int arg_pointer_save_area_init : 1;
|
||||
|
||||
/* How commonly executed the function is. Initialized during branch
|
||||
probabilities pass. */
|
||||
enum function_frequency {
|
||||
/* This function most likely won't be executed at all.
|
||||
(set only when profile feedback is available). */
|
||||
FUNCTION_FREQUENCY_UNLIKELY_EXECUTED,
|
||||
/* The default value. */
|
||||
FUNCTION_FREQUENCY_NORMAL,
|
||||
/* Optimize this function hard
|
||||
(set only when profile feedback is available). */
|
||||
FUNCTION_FREQUENCY_HOT
|
||||
} function_frequency;
|
||||
};
|
||||
|
||||
/* The function currently being compiled. */
|
||||
|
|
|
@ -150,6 +150,15 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
|
|||
"max-unrolled-insns",
|
||||
"The maximum number of instructions to consider to unroll in a loop",
|
||||
100)
|
||||
|
||||
DEFPARAM(HOT_BB_COUNT_FRACTION,
|
||||
"hot-bb-count-fraction",
|
||||
"Select fraction of the maximal count of repetitions of basic block in program given basic block needs to have to be considered hot",
|
||||
10000)
|
||||
DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
|
||||
"hot-bb-frequency-fraction",
|
||||
"Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot",
|
||||
1000)
|
||||
/*
|
||||
Local variables:
|
||||
mode:c
|
||||
|
|
294
gcc/predict.c
294
gcc/predict.c
|
@ -45,7 +45,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
#include "recog.h"
|
||||
#include "expr.h"
|
||||
#include "predict.h"
|
||||
#include "profile.h"
|
||||
#include "real.h"
|
||||
#include "params.h"
|
||||
#include "target.h"
|
||||
|
||||
/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 0.5,
|
||||
REAL_BB_FREQ_MAX. */
|
||||
|
@ -75,6 +78,8 @@ static void process_note_predictions PARAMS ((basic_block, int *, int *,
|
|||
static void process_note_prediction PARAMS ((basic_block, int *, int *,
|
||||
sbitmap *, int, int));
|
||||
static bool last_basic_block_p PARAMS ((basic_block));
|
||||
static void compute_function_frequency PARAMS ((void));
|
||||
static void choose_function_section PARAMS ((void));
|
||||
|
||||
/* Information we hold about each branch predictor.
|
||||
Filled using information from predict.def. */
|
||||
|
@ -103,6 +108,54 @@ static const struct predictor_info predictor_info[]= {
|
|||
{NULL, 0, 0}
|
||||
};
|
||||
#undef DEF_PREDICTOR
|
||||
|
||||
/* Return true in case BB can be CPU intensive and should be optimized
|
||||
for maximal perofmrance. */
|
||||
|
||||
bool
|
||||
maybe_hot_bb_p (bb)
|
||||
basic_block bb;
|
||||
{
|
||||
if (profile_info.count_profiles_merged
|
||||
&& flag_branch_probabilities
|
||||
&& (bb->count
|
||||
< profile_info.max_counter_in_program
|
||||
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
|
||||
return false;
|
||||
if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return true in case BB is cold and should be optimized for size. */
|
||||
|
||||
bool
|
||||
probably_cold_bb_p (bb)
|
||||
basic_block bb;
|
||||
{
|
||||
if (profile_info.count_profiles_merged
|
||||
&& flag_branch_probabilities
|
||||
&& (bb->count
|
||||
< profile_info.max_counter_in_program
|
||||
/ PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
|
||||
return true;
|
||||
if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return true in case BB is probably never executed. */
|
||||
bool
|
||||
probably_never_executed_bb_p (bb)
|
||||
basic_block bb;
|
||||
{
|
||||
if (profile_info.count_profiles_merged
|
||||
&& flag_branch_probabilities)
|
||||
return ((bb->count + profile_info.count_profiles_merged / 2)
|
||||
/ profile_info.count_profiles_merged) == 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return true if the one of outgoing edges is already predicted by
|
||||
PREDICTOR. */
|
||||
|
||||
|
@ -1095,118 +1148,159 @@ estimate_bb_frequencies (loops)
|
|||
REAL_VALUE_TYPE freq_max;
|
||||
enum machine_mode double_mode = TYPE_MODE (double_type_node);
|
||||
|
||||
REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
|
||||
REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
|
||||
REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
|
||||
REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
|
||||
REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
|
||||
|
||||
REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
|
||||
|
||||
REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
|
||||
REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
|
||||
|
||||
mark_dfs_back_edges ();
|
||||
if (flag_branch_probabilities)
|
||||
counts_to_freqs ();
|
||||
else
|
||||
{
|
||||
counts_to_freqs ();
|
||||
return;
|
||||
}
|
||||
REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
|
||||
REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
|
||||
REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
|
||||
REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
|
||||
REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
|
||||
|
||||
/* Fill in the probability values in flowgraph based on the REG_BR_PROB
|
||||
notes. */
|
||||
for (i = 0; i < n_basic_blocks; i++)
|
||||
{
|
||||
rtx last_insn = BLOCK_END (i);
|
||||
REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
|
||||
|
||||
if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
|
||||
/* Avoid handling of conditional jumps jumping to fallthru edge. */
|
||||
|| BASIC_BLOCK (i)->succ->succ_next == NULL)
|
||||
REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
|
||||
REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
|
||||
|
||||
mark_dfs_back_edges ();
|
||||
/* Fill in the probability values in flowgraph based on the REG_BR_PROB
|
||||
notes. */
|
||||
for (i = 0; i < n_basic_blocks; i++)
|
||||
{
|
||||
/* We can predict only conditional jumps at the moment.
|
||||
Expect each edge to be equally probable.
|
||||
?? In the future we want to make abnormal edges improbable. */
|
||||
int nedges = 0;
|
||||
edge e;
|
||||
rtx last_insn = BLOCK_END (i);
|
||||
|
||||
for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
|
||||
if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
|
||||
/* Avoid handling of conditional jumps jumping to fallthru edge. */
|
||||
|| BASIC_BLOCK (i)->succ->succ_next == NULL)
|
||||
{
|
||||
nedges++;
|
||||
if (e->probability != 0)
|
||||
break;
|
||||
/* We can predict only conditional jumps at the moment.
|
||||
Expect each edge to be equally probable.
|
||||
?? In the future we want to make abnormal edges improbable. */
|
||||
int nedges = 0;
|
||||
edge e;
|
||||
|
||||
for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
|
||||
{
|
||||
nedges++;
|
||||
if (e->probability != 0)
|
||||
break;
|
||||
}
|
||||
if (!e)
|
||||
for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
|
||||
e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
|
||||
}
|
||||
if (!e)
|
||||
for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
|
||||
e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
|
||||
}
|
||||
}
|
||||
|
||||
ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
|
||||
ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
|
||||
|
||||
/* Set up block info for each basic block. */
|
||||
alloc_aux_for_blocks (sizeof (struct block_info_def));
|
||||
alloc_aux_for_edges (sizeof (struct edge_info_def));
|
||||
for (i = -2; i < n_basic_blocks; i++)
|
||||
{
|
||||
edge e;
|
||||
basic_block bb;
|
||||
|
||||
if (i == -2)
|
||||
bb = ENTRY_BLOCK_PTR;
|
||||
else if (i == -1)
|
||||
bb = EXIT_BLOCK_PTR;
|
||||
else
|
||||
bb = BASIC_BLOCK (i);
|
||||
|
||||
BLOCK_INFO (bb)->tovisit = 0;
|
||||
for (e = bb->succ; e; e = e->succ_next)
|
||||
/* Set up block info for each basic block. */
|
||||
alloc_aux_for_blocks (sizeof (struct block_info_def));
|
||||
alloc_aux_for_edges (sizeof (struct edge_info_def));
|
||||
for (i = -2; i < n_basic_blocks; i++)
|
||||
{
|
||||
|
||||
REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
|
||||
e->probability, 0, double_mode);
|
||||
REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
|
||||
RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
|
||||
real_br_prob_base);
|
||||
edge e;
|
||||
basic_block bb;
|
||||
|
||||
if (i == -2)
|
||||
bb = ENTRY_BLOCK_PTR;
|
||||
else if (i == -1)
|
||||
bb = EXIT_BLOCK_PTR;
|
||||
else
|
||||
bb = BASIC_BLOCK (i);
|
||||
|
||||
BLOCK_INFO (bb)->tovisit = 0;
|
||||
for (e = bb->succ; e; e = e->succ_next)
|
||||
{
|
||||
|
||||
REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
|
||||
e->probability, 0, double_mode);
|
||||
REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
|
||||
RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
|
||||
real_br_prob_base);
|
||||
}
|
||||
}
|
||||
|
||||
/* First compute probabilities locally for each loop from innermost
|
||||
to outermost to examine probabilities for back edges. */
|
||||
estimate_loops_at_level (loops->tree_root);
|
||||
|
||||
/* Now fake loop around whole function to finalize probabilities. */
|
||||
for (i = 0; i < n_basic_blocks; i++)
|
||||
BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
|
||||
|
||||
BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
|
||||
BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
|
||||
propagate_freq (ENTRY_BLOCK_PTR);
|
||||
|
||||
memcpy (&freq_max, &real_zero, sizeof (real_zero));
|
||||
for (i = 0; i < n_basic_blocks; i++)
|
||||
if (REAL_VALUES_LESS
|
||||
(freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
|
||||
memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
|
||||
sizeof (freq_max));
|
||||
|
||||
for (i = -2; i < n_basic_blocks; i++)
|
||||
{
|
||||
basic_block bb;
|
||||
REAL_VALUE_TYPE tmp;
|
||||
|
||||
if (i == -2)
|
||||
bb = ENTRY_BLOCK_PTR;
|
||||
else if (i == -1)
|
||||
bb = EXIT_BLOCK_PTR;
|
||||
else
|
||||
bb = BASIC_BLOCK (i);
|
||||
|
||||
REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
|
||||
real_bb_freq_max);
|
||||
REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
|
||||
REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
|
||||
bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
|
||||
}
|
||||
|
||||
free_aux_for_blocks ();
|
||||
free_aux_for_edges ();
|
||||
}
|
||||
|
||||
/* First compute probabilities locally for each loop from innermost
|
||||
to outermost to examine probabilities for back edges. */
|
||||
estimate_loops_at_level (loops->tree_root);
|
||||
|
||||
/* Now fake loop around whole function to finalize probabilities. */
|
||||
for (i = 0; i < n_basic_blocks; i++)
|
||||
BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
|
||||
|
||||
BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
|
||||
BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
|
||||
propagate_freq (ENTRY_BLOCK_PTR);
|
||||
|
||||
memcpy (&freq_max, &real_zero, sizeof (real_zero));
|
||||
for (i = 0; i < n_basic_blocks; i++)
|
||||
if (REAL_VALUES_LESS (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
|
||||
memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
|
||||
sizeof (freq_max));
|
||||
|
||||
for (i = -2; i < n_basic_blocks; i++)
|
||||
{
|
||||
basic_block bb;
|
||||
REAL_VALUE_TYPE tmp;
|
||||
|
||||
if (i == -2)
|
||||
bb = ENTRY_BLOCK_PTR;
|
||||
else if (i == -1)
|
||||
bb = EXIT_BLOCK_PTR;
|
||||
else
|
||||
bb = BASIC_BLOCK (i);
|
||||
|
||||
REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
|
||||
real_bb_freq_max);
|
||||
REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
|
||||
REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
|
||||
bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
|
||||
}
|
||||
|
||||
free_aux_for_blocks ();
|
||||
free_aux_for_edges ();
|
||||
compute_function_frequency ();
|
||||
if (flag_reorder_functions)
|
||||
choose_function_section ();
|
||||
}
|
||||
|
||||
/* Decide whether function is hot, cold or unlikely executed. */
|
||||
static void
|
||||
compute_function_frequency ()
|
||||
{
|
||||
int i;
|
||||
if (!profile_info.count_profiles_merged
|
||||
|| !flag_branch_probabilities)
|
||||
return;
|
||||
cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
|
||||
for (i = 0; i < n_basic_blocks; i++)
|
||||
{
|
||||
basic_block bb = BASIC_BLOCK (i);
|
||||
if (maybe_hot_bb_p (bb))
|
||||
{
|
||||
cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
|
||||
return;
|
||||
}
|
||||
if (!probably_never_executed_bb_p (bb))
|
||||
cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Choose appropriate section for the function. */
|
||||
static void
|
||||
choose_function_section ()
|
||||
{
|
||||
if (DECL_SECTION_NAME (current_function_decl)
|
||||
|| !targetm.have_named_sections)
|
||||
return;
|
||||
if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
|
||||
DECL_SECTION_NAME (current_function_decl) =
|
||||
build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
|
||||
if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
|
||||
DECL_SECTION_NAME (current_function_decl) =
|
||||
build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
|
||||
UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
|
||||
}
|
||||
|
|
|
@ -381,6 +381,10 @@ int flag_branch_probabilities = 0;
|
|||
|
||||
int flag_reorder_blocks = 0;
|
||||
|
||||
/* Nonzero if functions should be reordered. */
|
||||
|
||||
int flag_reorder_functions = 0;
|
||||
|
||||
/* Nonzero if registers should be renamed. */
|
||||
|
||||
int flag_rename_registers = 0;
|
||||
|
@ -1076,6 +1080,8 @@ static const lang_independent_options f_options[] =
|
|||
N_("Enable basic program profiling code") },
|
||||
{"reorder-blocks", &flag_reorder_blocks, 1,
|
||||
N_("Reorder basic blocks to improve code placement") },
|
||||
{"reorder-functions", &flag_reorder_functions, 1,
|
||||
N_("Reorder functions to improve code placement") },
|
||||
{"rename-registers", &flag_rename_registers, 1,
|
||||
N_("Do the register renaming optimization pass") },
|
||||
{"cprop-registers", &flag_cprop_registers, 1,
|
||||
|
@ -4657,6 +4663,7 @@ parse_options_and_default_flags (argc, argv)
|
|||
flag_strict_aliasing = 1;
|
||||
flag_delete_null_pointer_checks = 1;
|
||||
flag_reorder_blocks = 1;
|
||||
flag_reorder_functions = 1;
|
||||
}
|
||||
|
||||
if (optimize >= 3)
|
||||
|
|
|
@ -1197,7 +1197,8 @@ assemble_start_function (decl, fnname)
|
|||
/* Handle a user-specified function alignment.
|
||||
Note that we still need to align to FUNCTION_BOUNDARY, as above,
|
||||
because ASM_OUTPUT_MAX_SKIP_ALIGN might not do any alignment at all. */
|
||||
if (align_functions_log > align)
|
||||
if (align_functions_log > align
|
||||
&& cfun->function_frequency != FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
|
||||
{
|
||||
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
|
||||
ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,
|
||||
|
|
Loading…
Add table
Reference in a new issue