cgraph.c (cgraph_edge): Handle inconsistent counts when setting count_scale.
2008-08-18 Paul Yuan <yingbo.com@gmail.com> Vinodha Ramasamy <vinodha@google.com> * cgraph.c (cgraph_edge): Handle inconsistent counts when setting count_scale. * value-prof.c (check_counter): Fix the counter if flag_profile_correction is true. (tree_divmod_fixed_value_transform, tree_mod_pow2_value_transform, tree_mod_subtract_transform): Follow check_counter parameter change. * common.opt (fprofile-correction): New option. * mcf.c: New file. * profile.c (edge_info, EDGE_INFO): Moved to new file profile.h. (sum_edge_counts, is_edge_inconsistent, correct_negative_edge_counts, is_inconsistent, set_bb_counts, read_profile_edge_counts): New functions. (compute_branch_probabilities): Refactored. Invokes mcf_smooth_cfg if flag_profile_correction is set. Co-Authored-By: Vinodha Ramasamy <vinodha@google.com> From-SVN: r139208
This commit is contained in:
parent
808cc41726
commit
52c76998c7
7 changed files with 232 additions and 76 deletions
|
@ -1,3 +1,22 @@
|
|||
2008-08-18 Paul Yuan <yingbo.com@gmail.com>
|
||||
Vinodha Ramasamy <vinodha@google.com>
|
||||
|
||||
* cgraph.c (cgraph_edge): Handle inconsistent counts when setting
|
||||
count_scale.
|
||||
* value-prof.c (check_counter): Fix the counter if
|
||||
flag_profile_correction is true.
|
||||
(tree_divmod_fixed_value_transform, tree_mod_pow2_value_transform,
|
||||
tree_mod_subtract_transform):
|
||||
Follow check_counter parameter change.
|
||||
* common.opt (fprofile-correction): New option.
|
||||
* mcf.c: New file.
|
||||
* profile.c (edge_info, EDGE_INFO): Moved to new file profile.h.
|
||||
(sum_edge_counts, is_edge_inconsistent, correct_negative_edge_counts,
|
||||
is_inconsistent, set_bb_counts, read_profile_edge_counts): New
|
||||
functions.
|
||||
(compute_branch_probabilities): Refactored. Invokes mcf_smooth_cfg if
|
||||
flag_profile_correction is set.
|
||||
|
||||
2008-08-18 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
* rtlanal.c (subreg_offset_representable_p): Check HARD_REGNO_MODE_OK.
|
||||
|
|
|
@ -1112,6 +1112,7 @@ OBJS-common = \
|
|||
loop-unroll.o \
|
||||
loop-unswitch.o \
|
||||
lower-subreg.o \
|
||||
mcf.o \
|
||||
mode-switching.o \
|
||||
modulo-sched.o \
|
||||
omega.o \
|
||||
|
@ -2717,7 +2718,9 @@ var-tracking.o : var-tracking.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
|||
profile.o : profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
|
||||
$(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) $(FUNCTION_H) \
|
||||
$(TOPLEV_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h cfghooks.h \
|
||||
$(CFGLOOP_H) $(TIMEVAR_H) tree-pass.h
|
||||
$(CFGLOOP_H) $(TIMEVAR_H) tree-pass.h profile.h
|
||||
mcf.o : mcf.c profile.h $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h \
|
||||
$(BASIC_BLOCK_H) output.h langhooks.h $(GCOV_IO_H) $(TREE_H)
|
||||
tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \
|
||||
$(FUNCTION_H) $(TOPLEV_H) $(COVERAGE_H) $(TREE_H) value-prof.h $(TREE_DUMP_H) \
|
||||
|
@ -3213,7 +3216,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
|
|||
$(srcdir)/emit-rtl.c $(srcdir)/except.c $(srcdir)/explow.c $(srcdir)/expr.c \
|
||||
$(srcdir)/function.c $(srcdir)/except.h \
|
||||
$(srcdir)/gcse.c $(srcdir)/integrate.c $(srcdir)/lists.c $(srcdir)/optabs.c \
|
||||
$(srcdir)/profile.c $(srcdir)/regclass.c \
|
||||
$(srcdir)/profile.c $(srcdir)/regclass.c $(srcdir)/mcf.c \
|
||||
$(srcdir)/reg-stack.c $(srcdir)/cfglayout.c $(srcdir)/cfglayout.h \
|
||||
$(srcdir)/sdbout.c $(srcdir)/stor-layout.c \
|
||||
$(srcdir)/stringpool.c $(srcdir)/tree.c $(srcdir)/varasm.c \
|
||||
|
|
|
@ -516,7 +516,7 @@ cgraph_edge (struct cgraph_node *node, gimple call_stmt)
|
|||
if (node->call_site_hash)
|
||||
return (struct cgraph_edge *)
|
||||
htab_find_with_hash (node->call_site_hash, call_stmt,
|
||||
htab_hash_pointer (call_stmt));
|
||||
htab_hash_pointer (call_stmt));
|
||||
|
||||
/* This loop may turn out to be performance problem. In such case adding
|
||||
hashtables into call nodes with very many edges is probably best
|
||||
|
@ -1208,7 +1208,12 @@ cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq,
|
|||
new_node->master_clone = n->master_clone;
|
||||
new_node->count = count;
|
||||
if (n->count)
|
||||
count_scale = new_node->count * REG_BR_PROB_BASE / n->count;
|
||||
{
|
||||
if (new_node->count > n->count)
|
||||
count_scale = REG_BR_PROB_BASE;
|
||||
else
|
||||
count_scale = new_node->count * REG_BR_PROB_BASE / n->count;
|
||||
}
|
||||
else
|
||||
count_scale = 0;
|
||||
if (update_original)
|
||||
|
|
|
@ -821,6 +821,10 @@ Common Joined RejectNegative
|
|||
Set the top-level directory for storing the profile data.
|
||||
The default is 'pwd'.
|
||||
|
||||
fprofile-correction
|
||||
Common Report Var(flag_profile_correction)
|
||||
Enable correction of flow inconsistent profile data input
|
||||
|
||||
fprofile-generate
|
||||
Common
|
||||
Enable common options for generating profile info for profile feedback directed optimizations
|
||||
|
|
|
@ -342,7 +342,8 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
|
||||
-fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol
|
||||
-fpeel-loops -fpredictive-commoning -fprefetch-loop-arrays @gol
|
||||
-fprofile-dir=@var{path} -fprofile-generate -fprofile-generate=@var{path} @gol
|
||||
-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
|
||||
-fprofile-generate=@var{path} @gol
|
||||
-fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
|
||||
-freciprocal-math -fregmove -frename-registers -freorder-blocks @gol
|
||||
-freorder-blocks-and-partition -freorder-functions @gol
|
||||
|
@ -6369,6 +6370,13 @@ and occasionally eliminate the copy.
|
|||
|
||||
Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}.
|
||||
|
||||
@item -fprofile-correction
|
||||
@opindex fprofile-correction
|
||||
Profiles collected using an instrumented binary for multi-threaded programs may
|
||||
be inconsistent due to missed counter updates. When this option is specified,
|
||||
GCC will use heuristics to correct or smooth out such inconsistencies. By
|
||||
default, GCC will emit an error message when an inconsistent profile is detected.
|
||||
|
||||
@item -fprofile-dir=@var{path}
|
||||
@opindex fprofile-dir
|
||||
|
||||
|
|
206
gcc/profile.c
206
gcc/profile.c
|
@ -69,21 +69,11 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "cfgloop.h"
|
||||
#include "tree-pass.h"
|
||||
|
||||
#include "profile.h"
|
||||
|
||||
/* Hooks for profiling. */
|
||||
static struct profile_hooks* profile_hooks;
|
||||
|
||||
/* Additional information about the edges we need. */
|
||||
struct edge_info {
|
||||
unsigned int count_valid : 1;
|
||||
|
||||
/* Is on the spanning tree. */
|
||||
unsigned int on_tree : 1;
|
||||
|
||||
/* Pretend this edge does not exist (it is abnormal and we've
|
||||
inserted a fake to compensate). */
|
||||
unsigned int ignore : 1;
|
||||
};
|
||||
|
||||
struct bb_info {
|
||||
unsigned int count_valid : 1;
|
||||
|
||||
|
@ -92,7 +82,6 @@ struct bb_info {
|
|||
gcov_type pred_count;
|
||||
};
|
||||
|
||||
#define EDGE_INFO(e) ((struct edge_info *) (e)->aux)
|
||||
#define BB_INFO(b) ((struct bb_info *) (b)->aux)
|
||||
|
||||
|
||||
|
@ -124,7 +113,6 @@ static gcov_type * get_exec_counts (void);
|
|||
static basic_block find_group (basic_block);
|
||||
static void union_groups (basic_block, basic_block);
|
||||
|
||||
|
||||
/* Add edge instrumentation code to the entire insn chain.
|
||||
|
||||
F is the first insn of the chain.
|
||||
|
@ -278,64 +266,84 @@ get_exec_counts (void)
|
|||
|
||||
return counts;
|
||||
}
|
||||
|
||||
|
||||
/* Compute the branch probabilities for the various branches.
|
||||
Annotate them accordingly. */
|
||||
|
||||
static bool
|
||||
is_edge_inconsistent (VEC(edge,gc) *edges)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
FOR_EACH_EDGE (e, ei, edges)
|
||||
{
|
||||
if (!EDGE_INFO (e)->ignore)
|
||||
{
|
||||
if (e->count < 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
compute_branch_probabilities (void)
|
||||
correct_negative_edge_counts (void)
|
||||
{
|
||||
basic_block bb;
|
||||
int i;
|
||||
int num_edges = 0;
|
||||
int changes;
|
||||
int passes;
|
||||
int hist_br_prob[20];
|
||||
int num_never_executed;
|
||||
int num_branches;
|
||||
gcov_type *exec_counts = get_exec_counts ();
|
||||
int exec_counts_pos = 0;
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
|
||||
/* Very simple sanity checks so we catch bugs in our profiling code. */
|
||||
if (profile_info)
|
||||
{
|
||||
if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
|
||||
{
|
||||
error ("corrupted profile info: run_max * runs < sum_max");
|
||||
exec_counts = NULL;
|
||||
}
|
||||
|
||||
if (profile_info->sum_all < profile_info->sum_max)
|
||||
{
|
||||
error ("corrupted profile info: sum_all is smaller than sum_max");
|
||||
exec_counts = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Attach extra info block to each bb. */
|
||||
|
||||
alloc_aux_for_blocks (sizeof (struct bb_info));
|
||||
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
if (!EDGE_INFO (e)->ignore)
|
||||
BB_INFO (bb)->succ_count++;
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
if (!EDGE_INFO (e)->ignore)
|
||||
BB_INFO (bb)->pred_count++;
|
||||
{
|
||||
if (e->count < 0)
|
||||
e->count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Check consistency.
|
||||
Return true if inconsistency is found. */
|
||||
static bool
|
||||
is_inconsistent (void)
|
||||
{
|
||||
basic_block bb;
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
if (is_edge_inconsistent (bb->preds))
|
||||
return true;
|
||||
if (is_edge_inconsistent (bb->succs))
|
||||
return true;
|
||||
if ( bb->count != sum_edge_counts (bb->preds)
|
||||
|| (bb->count != sum_edge_counts (bb->succs) &&
|
||||
!(find_edge (bb, EXIT_BLOCK_PTR) != NULL &&
|
||||
block_ends_with_call_p (bb))))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Avoid predicting entry on exit nodes. */
|
||||
BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
|
||||
BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Set each basic block count to the sum of its outgoing edge counts */
|
||||
static void
|
||||
set_bb_counts (void)
|
||||
{
|
||||
basic_block bb;
|
||||
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
|
||||
{
|
||||
bb->count = sum_edge_counts (bb->succs);
|
||||
gcc_assert (bb->count >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Reads profile data and returns total number of edge counts read */
|
||||
static int
|
||||
read_profile_edge_counts (gcov_type *exec_counts)
|
||||
{
|
||||
basic_block bb;
|
||||
int num_edges = 0;
|
||||
int exec_counts_pos = 0;
|
||||
/* For each edge not on the spanning tree, set its execution count from
|
||||
the .da file. */
|
||||
|
||||
/* The first count in the .da file is the number of times that the function
|
||||
was entered. This is the exec_count for block zero. */
|
||||
|
||||
|
@ -373,6 +381,63 @@ compute_branch_probabilities (void)
|
|||
}
|
||||
}
|
||||
|
||||
return num_edges;
|
||||
}
|
||||
|
||||
/* Compute the branch probabilities for the various branches.
|
||||
Annotate them accordingly. */
|
||||
|
||||
static void
|
||||
compute_branch_probabilities (void)
|
||||
{
|
||||
basic_block bb;
|
||||
int i;
|
||||
int num_edges = 0;
|
||||
int changes;
|
||||
int passes;
|
||||
int hist_br_prob[20];
|
||||
int num_never_executed;
|
||||
int num_branches;
|
||||
gcov_type *exec_counts = get_exec_counts ();
|
||||
int inconsistent = 0;
|
||||
|
||||
/* Very simple sanity checks so we catch bugs in our profiling code. */
|
||||
if (profile_info)
|
||||
{
|
||||
if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
|
||||
{
|
||||
error ("corrupted profile info: run_max * runs < sum_max");
|
||||
exec_counts = NULL;
|
||||
}
|
||||
|
||||
if (profile_info->sum_all < profile_info->sum_max)
|
||||
{
|
||||
error ("corrupted profile info: sum_all is smaller than sum_max");
|
||||
exec_counts = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Attach extra info block to each bb. */
|
||||
alloc_aux_for_blocks (sizeof (struct bb_info));
|
||||
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
|
||||
FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
if (!EDGE_INFO (e)->ignore)
|
||||
BB_INFO (bb)->succ_count++;
|
||||
FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
if (!EDGE_INFO (e)->ignore)
|
||||
BB_INFO (bb)->pred_count++;
|
||||
}
|
||||
|
||||
/* Avoid predicting entry on exit nodes. */
|
||||
BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
|
||||
BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
|
||||
|
||||
num_edges = read_profile_edge_counts (exec_counts);
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\n%d edge counts read\n", num_edges);
|
||||
|
||||
|
@ -502,6 +567,31 @@ compute_branch_probabilities (void)
|
|||
gcc_assert (!BB_INFO (bb)->succ_count && !BB_INFO (bb)->pred_count);
|
||||
}
|
||||
|
||||
/* Check for inconsistent basic block counts */
|
||||
inconsistent = is_inconsistent ();
|
||||
|
||||
if (inconsistent)
|
||||
{
|
||||
if (flag_profile_correction)
|
||||
{
|
||||
/* Inconsistency detected. Make it flow-consistent. */
|
||||
static int informed = 0;
|
||||
if (informed == 0)
|
||||
{
|
||||
informed = 1;
|
||||
inform ("correcting inconsistent profile data");
|
||||
}
|
||||
correct_negative_edge_counts ();
|
||||
/* Set bb counts to the sum of the outgoing edge counts */
|
||||
set_bb_counts ();
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\nCalling mcf_smooth_cfg\n");
|
||||
mcf_smooth_cfg ();
|
||||
}
|
||||
else
|
||||
error ("corrupted profile info: profile data is not flow-consistent");
|
||||
}
|
||||
|
||||
/* For every edge, calculate its branch probability and add a reg_note
|
||||
to the branch insn to indicate this. */
|
||||
|
||||
|
|
|
@ -453,18 +453,32 @@ free_histograms (void)
|
|||
somehow. */
|
||||
|
||||
static bool
|
||||
check_counter (gimple stmt, const char *name, gcov_type all, gcov_type bb_count)
|
||||
check_counter (gimple stmt, const char * name,
|
||||
gcov_type *count, gcov_type *all, gcov_type bb_count)
|
||||
{
|
||||
if (all != bb_count)
|
||||
if (*all != bb_count || *count > *all)
|
||||
{
|
||||
location_t locus;
|
||||
locus = (stmt != NULL)
|
||||
? gimple_location (stmt)
|
||||
: DECL_SOURCE_LOCATION (current_function_decl);
|
||||
error ("%HCorrupted value profile: %s profiler overall count (%d) "
|
||||
"does not match BB count (%d)", &locus, name, (int)all,
|
||||
(int)bb_count);
|
||||
return true;
|
||||
? gimple_location (stmt)
|
||||
: DECL_SOURCE_LOCATION (current_function_decl);
|
||||
if (flag_profile_correction)
|
||||
{
|
||||
inform ("%HCorrecting inconsistent value profile: "
|
||||
"%s profiler overall count (%d) does not match BB count "
|
||||
"(%d)", &locus, name, (int)all, (int)bb_count);
|
||||
*all = bb_count;
|
||||
if (*count > *all)
|
||||
*count = *all;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
error ("%HCorrupted value profile: %s profiler overall count (%d) "
|
||||
"does not match BB count (%d)", &locus, name, (int)all,
|
||||
(int)bb_count);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -658,7 +672,7 @@ gimple_divmod_fixed_value_transform (gimple_stmt_iterator *si)
|
|||
|| !maybe_hot_bb_p (gimple_bb (stmt)))
|
||||
return false;
|
||||
|
||||
if (check_counter (stmt, "value", all, gimple_bb (stmt)->count))
|
||||
if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
|
||||
return false;
|
||||
|
||||
/* Compute probability of taking the optimal path. */
|
||||
|
@ -818,7 +832,7 @@ gimple_mod_pow2_value_transform (gimple_stmt_iterator *si)
|
|||
/* Compute probability of taking the optimal path. */
|
||||
all = count + wrong_values;
|
||||
|
||||
if (check_counter (stmt, "pow2", all, gimple_bb (stmt)->count))
|
||||
if (check_counter (stmt, "pow2", &count, &all, gimple_bb (stmt)->count))
|
||||
return false;
|
||||
|
||||
if (all > 0)
|
||||
|
@ -982,12 +996,17 @@ gimple_mod_subtract_transform (gimple_stmt_iterator *si)
|
|||
count2 = histogram->hvalue.counters[1];
|
||||
|
||||
/* Compute probability of taking the optimal path. */
|
||||
if (check_counter (stmt, "interval", all, gimple_bb (stmt)->count))
|
||||
if (check_counter (stmt, "interval", &count1, &all, gimple_bb (stmt)->count))
|
||||
{
|
||||
gimple_remove_histogram_value (cfun, stmt, histogram);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (flag_profile_correction && count1 + count2 > all)
|
||||
all = count1 + count2;
|
||||
|
||||
gcc_assert (count1 + count2 <= all);
|
||||
|
||||
/* We require that we use just subtractions in at least 50% of all
|
||||
evaluations. */
|
||||
count = 0;
|
||||
|
@ -1160,7 +1179,7 @@ static bool
|
|||
gimple_ic_transform (gimple stmt)
|
||||
{
|
||||
histogram_value histogram;
|
||||
gcov_type val, count, all;
|
||||
gcov_type val, count, all, bb_all;
|
||||
gcov_type prob;
|
||||
tree callee;
|
||||
gimple modify;
|
||||
|
@ -1186,6 +1205,14 @@ gimple_ic_transform (gimple stmt)
|
|||
if (4 * count <= 3 * all)
|
||||
return false;
|
||||
|
||||
bb_all = gimple_bb (stmt)->count;
|
||||
/* The order of CHECK_COUNTER calls is important -
|
||||
since check_counter can correct the third parameter
|
||||
and we want to make count <= all <= bb_all. */
|
||||
if ( check_counter (stmt, "ic", &all, &bb_all, bb_all)
|
||||
|| check_counter (stmt, "ic", &count, &all, all))
|
||||
return false;
|
||||
|
||||
if (all > 0)
|
||||
prob = (count * REG_BR_PROB_BASE + all / 2) / all;
|
||||
else
|
||||
|
@ -1372,7 +1399,7 @@ gimple_stringops_transform (gimple_stmt_iterator *gsi)
|
|||
at least 80% of time. */
|
||||
if ((6 * count / 5) < all || !maybe_hot_bb_p (gimple_bb (stmt)))
|
||||
return false;
|
||||
if (check_counter (stmt, "value", all, gimple_bb (stmt)->count))
|
||||
if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
|
||||
return false;
|
||||
if (all > 0)
|
||||
prob = (count * REG_BR_PROB_BASE + all / 2) / all;
|
||||
|
|
Loading…
Add table
Reference in a new issue