cgraph.c (cgraph_edge): Handle inconsistent counts when setting count_scale.

2008-08-18  Paul Yuan  <yingbo.com@gmail.com>
	    Vinodha Ramasamy  <vinodha@google.com>

	* cgraph.c (cgraph_edge): Handle inconsistent counts when setting
	count_scale.
	* value-prof.c (check_counter): Fix the counter if
	flag_profile_correction is true.
	(tree_divmod_fixed_value_transform, tree_mod_pow2_value_transform,
	tree_mod_subtract_transform):
	Follow check_counter parameter change.
	* common.opt (fprofile-correction): New option.
	* mcf.c: New file.
	* profile.c (edge_info, EDGE_INFO): Moved to new file profile.h.
	(sum_edge_counts, is_edge_inconsistent, correct_negative_edge_counts,
	is_inconsistent, set_bb_counts, read_profile_edge_counts): New
	functions.
	(compute_branch_probabilities): Refactored. Invokes mcf_smooth_cfg if
	flag_profile_correction is set.


Co-Authored-By: Vinodha Ramasamy <vinodha@google.com>

From-SVN: r139208
This commit is contained in:
Paul Yuan 2008-08-18 19:02:44 +00:00 committed by Seongbae Park
parent 808cc41726
commit 52c76998c7
7 changed files with 232 additions and 76 deletions

View file

@ -1,3 +1,22 @@
2008-08-18 Paul Yuan <yingbo.com@gmail.com>
Vinodha Ramasamy <vinodha@google.com>
* cgraph.c (cgraph_edge): Handle inconsistent counts when setting
count_scale.
* value-prof.c (check_counter): Fix the counter if
flag_profile_correction is true.
(tree_divmod_fixed_value_transform, tree_mod_pow2_value_transform,
tree_mod_subtract_transform):
Follow check_counter parameter change.
* common.opt (fprofile-correction): New option.
* mcf.c: New file.
* profile.c (edge_info, EDGE_INFO): Moved to new file profile.h.
(sum_edge_counts, is_edge_inconsistent, correct_negative_edge_counts,
is_inconsistent, set_bb_counts, read_profile_edge_counts): New
functions.
(compute_branch_probabilities): Refactored. Invokes mcf_smooth_cfg if
flag_profile_correction is set.
2008-08-18 Richard Sandiford <rdsandiford@googlemail.com>
* rtlanal.c (subreg_offset_representable_p): Check HARD_REGNO_MODE_OK.

View file

@ -1112,6 +1112,7 @@ OBJS-common = \
loop-unroll.o \
loop-unswitch.o \
lower-subreg.o \
mcf.o \
mode-switching.o \
modulo-sched.o \
omega.o \
@ -2717,7 +2718,9 @@ var-tracking.o : var-tracking.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
profile.o : profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) $(FUNCTION_H) \
$(TOPLEV_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h cfghooks.h \
$(CFGLOOP_H) $(TIMEVAR_H) tree-pass.h
$(CFGLOOP_H) $(TIMEVAR_H) tree-pass.h profile.h
mcf.o : mcf.c profile.h $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h \
$(BASIC_BLOCK_H) output.h langhooks.h $(GCOV_IO_H) $(TREE_H)
tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \
$(FUNCTION_H) $(TOPLEV_H) $(COVERAGE_H) $(TREE_H) value-prof.h $(TREE_DUMP_H) \
@ -3213,7 +3216,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
$(srcdir)/emit-rtl.c $(srcdir)/except.c $(srcdir)/explow.c $(srcdir)/expr.c \
$(srcdir)/function.c $(srcdir)/except.h \
$(srcdir)/gcse.c $(srcdir)/integrate.c $(srcdir)/lists.c $(srcdir)/optabs.c \
$(srcdir)/profile.c $(srcdir)/regclass.c \
$(srcdir)/profile.c $(srcdir)/regclass.c $(srcdir)/mcf.c \
$(srcdir)/reg-stack.c $(srcdir)/cfglayout.c $(srcdir)/cfglayout.h \
$(srcdir)/sdbout.c $(srcdir)/stor-layout.c \
$(srcdir)/stringpool.c $(srcdir)/tree.c $(srcdir)/varasm.c \

View file

@ -516,7 +516,7 @@ cgraph_edge (struct cgraph_node *node, gimple call_stmt)
if (node->call_site_hash)
return (struct cgraph_edge *)
htab_find_with_hash (node->call_site_hash, call_stmt,
htab_hash_pointer (call_stmt));
htab_hash_pointer (call_stmt));
/* This loop may turn out to be performance problem. In such case adding
hashtables into call nodes with very many edges is probably best
@ -1208,7 +1208,12 @@ cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq,
new_node->master_clone = n->master_clone;
new_node->count = count;
if (n->count)
count_scale = new_node->count * REG_BR_PROB_BASE / n->count;
{
if (new_node->count > n->count)
count_scale = REG_BR_PROB_BASE;
else
count_scale = new_node->count * REG_BR_PROB_BASE / n->count;
}
else
count_scale = 0;
if (update_original)

View file

@ -821,6 +821,10 @@ Common Joined RejectNegative
Set the top-level directory for storing the profile data.
The default is 'pwd'.
fprofile-correction
Common Report Var(flag_profile_correction)
Enable correction of flow inconsistent profile data input
fprofile-generate
Common
Enable common options for generating profile info for profile feedback directed optimizations

View file

@ -342,7 +342,8 @@ Objective-C and Objective-C++ Dialects}.
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol
-fpeel-loops -fpredictive-commoning -fprefetch-loop-arrays @gol
-fprofile-dir=@var{path} -fprofile-generate -fprofile-generate=@var{path} @gol
-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
-fprofile-generate=@var{path} @gol
-fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
-freciprocal-math -fregmove -frename-registers -freorder-blocks @gol
-freorder-blocks-and-partition -freorder-functions @gol
@ -6369,6 +6370,13 @@ and occasionally eliminate the copy.
Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}.
@item -fprofile-correction
@opindex fprofile-correction
Profiles collected using an instrumented binary for multi-threaded programs may
be inconsistent due to missed counter updates. When this option is specified,
GCC will use heuristics to correct or smooth out such inconsistencies. By
default, GCC will emit an error message when an inconsistent profile is detected.
@item -fprofile-dir=@var{path}
@opindex fprofile-dir

View file

@ -69,21 +69,11 @@ along with GCC; see the file COPYING3. If not see
#include "cfgloop.h"
#include "tree-pass.h"
#include "profile.h"
/* Hooks for profiling. */
static struct profile_hooks* profile_hooks;
/* Additional information about the edges we need. */
struct edge_info {
unsigned int count_valid : 1;
/* Is on the spanning tree. */
unsigned int on_tree : 1;
/* Pretend this edge does not exist (it is abnormal and we've
inserted a fake to compensate). */
unsigned int ignore : 1;
};
struct bb_info {
unsigned int count_valid : 1;
@ -92,7 +82,6 @@ struct bb_info {
gcov_type pred_count;
};
#define EDGE_INFO(e) ((struct edge_info *) (e)->aux)
#define BB_INFO(b) ((struct bb_info *) (b)->aux)
@ -124,7 +113,6 @@ static gcov_type * get_exec_counts (void);
static basic_block find_group (basic_block);
static void union_groups (basic_block, basic_block);
/* Add edge instrumentation code to the entire insn chain.
F is the first insn of the chain.
@ -278,64 +266,84 @@ get_exec_counts (void)
return counts;
}
/* Compute the branch probabilities for the various branches.
Annotate them accordingly. */
static bool
is_edge_inconsistent (VEC(edge,gc) *edges)
{
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, edges)
{
if (!EDGE_INFO (e)->ignore)
{
if (e->count < 0)
return true;
}
}
return false;
}
static void
compute_branch_probabilities (void)
correct_negative_edge_counts (void)
{
basic_block bb;
int i;
int num_edges = 0;
int changes;
int passes;
int hist_br_prob[20];
int num_never_executed;
int num_branches;
gcov_type *exec_counts = get_exec_counts ();
int exec_counts_pos = 0;
edge e;
edge_iterator ei;
/* Very simple sanity checks so we catch bugs in our profiling code. */
if (profile_info)
{
if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
{
error ("corrupted profile info: run_max * runs < sum_max");
exec_counts = NULL;
}
if (profile_info->sum_all < profile_info->sum_max)
{
error ("corrupted profile info: sum_all is smaller than sum_max");
exec_counts = NULL;
}
}
/* Attach extra info block to each bb. */
alloc_aux_for_blocks (sizeof (struct bb_info));
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
{
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, bb->succs)
if (!EDGE_INFO (e)->ignore)
BB_INFO (bb)->succ_count++;
FOR_EACH_EDGE (e, ei, bb->preds)
if (!EDGE_INFO (e)->ignore)
BB_INFO (bb)->pred_count++;
{
if (e->count < 0)
e->count = 0;
}
}
}
/* Check consistency.
Return true if inconsistency is found. */
static bool
is_inconsistent (void)
{
basic_block bb;
FOR_EACH_BB (bb)
{
if (is_edge_inconsistent (bb->preds))
return true;
if (is_edge_inconsistent (bb->succs))
return true;
if ( bb->count != sum_edge_counts (bb->preds)
|| (bb->count != sum_edge_counts (bb->succs) &&
!(find_edge (bb, EXIT_BLOCK_PTR) != NULL &&
block_ends_with_call_p (bb))))
return true;
}
/* Avoid predicting entry on exit nodes. */
BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
return false;
}
/* Set each basic block count to the sum of its outgoing edge counts */
static void
set_bb_counts (void)
{
basic_block bb;
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
{
bb->count = sum_edge_counts (bb->succs);
gcc_assert (bb->count >= 0);
}
}
/* Reads profile data and returns total number of edge counts read */
static int
read_profile_edge_counts (gcov_type *exec_counts)
{
basic_block bb;
int num_edges = 0;
int exec_counts_pos = 0;
/* For each edge not on the spanning tree, set its execution count from
the .da file. */
/* The first count in the .da file is the number of times that the function
was entered. This is the exec_count for block zero. */
@ -373,6 +381,63 @@ compute_branch_probabilities (void)
}
}
return num_edges;
}
/* Compute the branch probabilities for the various branches.
Annotate them accordingly. */
static void
compute_branch_probabilities (void)
{
basic_block bb;
int i;
int num_edges = 0;
int changes;
int passes;
int hist_br_prob[20];
int num_never_executed;
int num_branches;
gcov_type *exec_counts = get_exec_counts ();
int inconsistent = 0;
/* Very simple sanity checks so we catch bugs in our profiling code. */
if (profile_info)
{
if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
{
error ("corrupted profile info: run_max * runs < sum_max");
exec_counts = NULL;
}
if (profile_info->sum_all < profile_info->sum_max)
{
error ("corrupted profile info: sum_all is smaller than sum_max");
exec_counts = NULL;
}
}
/* Attach extra info block to each bb. */
alloc_aux_for_blocks (sizeof (struct bb_info));
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
{
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, bb->succs)
if (!EDGE_INFO (e)->ignore)
BB_INFO (bb)->succ_count++;
FOR_EACH_EDGE (e, ei, bb->preds)
if (!EDGE_INFO (e)->ignore)
BB_INFO (bb)->pred_count++;
}
/* Avoid predicting entry on exit nodes. */
BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
num_edges = read_profile_edge_counts (exec_counts);
if (dump_file)
fprintf (dump_file, "\n%d edge counts read\n", num_edges);
@ -502,6 +567,31 @@ compute_branch_probabilities (void)
gcc_assert (!BB_INFO (bb)->succ_count && !BB_INFO (bb)->pred_count);
}
/* Check for inconsistent basic block counts */
inconsistent = is_inconsistent ();
if (inconsistent)
{
if (flag_profile_correction)
{
/* Inconsistency detected. Make it flow-consistent. */
static int informed = 0;
if (informed == 0)
{
informed = 1;
inform ("correcting inconsistent profile data");
}
correct_negative_edge_counts ();
/* Set bb counts to the sum of the outgoing edge counts */
set_bb_counts ();
if (dump_file)
fprintf (dump_file, "\nCalling mcf_smooth_cfg\n");
mcf_smooth_cfg ();
}
else
error ("corrupted profile info: profile data is not flow-consistent");
}
/* For every edge, calculate its branch probability and add a reg_note
to the branch insn to indicate this. */

View file

@ -453,18 +453,32 @@ free_histograms (void)
somehow. */
static bool
check_counter (gimple stmt, const char *name, gcov_type all, gcov_type bb_count)
check_counter (gimple stmt, const char * name,
gcov_type *count, gcov_type *all, gcov_type bb_count)
{
if (all != bb_count)
if (*all != bb_count || *count > *all)
{
location_t locus;
locus = (stmt != NULL)
? gimple_location (stmt)
: DECL_SOURCE_LOCATION (current_function_decl);
error ("%HCorrupted value profile: %s profiler overall count (%d) "
"does not match BB count (%d)", &locus, name, (int)all,
(int)bb_count);
return true;
? gimple_location (stmt)
: DECL_SOURCE_LOCATION (current_function_decl);
if (flag_profile_correction)
{
inform ("%HCorrecting inconsistent value profile: "
"%s profiler overall count (%d) does not match BB count "
"(%d)", &locus, name, (int)all, (int)bb_count);
*all = bb_count;
if (*count > *all)
*count = *all;
return false;
}
else
{
error ("%HCorrupted value profile: %s profiler overall count (%d) "
"does not match BB count (%d)", &locus, name, (int)all,
(int)bb_count);
return true;
}
}
return false;
@ -658,7 +672,7 @@ gimple_divmod_fixed_value_transform (gimple_stmt_iterator *si)
|| !maybe_hot_bb_p (gimple_bb (stmt)))
return false;
if (check_counter (stmt, "value", all, gimple_bb (stmt)->count))
if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
return false;
/* Compute probability of taking the optimal path. */
@ -818,7 +832,7 @@ gimple_mod_pow2_value_transform (gimple_stmt_iterator *si)
/* Compute probability of taking the optimal path. */
all = count + wrong_values;
if (check_counter (stmt, "pow2", all, gimple_bb (stmt)->count))
if (check_counter (stmt, "pow2", &count, &all, gimple_bb (stmt)->count))
return false;
if (all > 0)
@ -982,12 +996,17 @@ gimple_mod_subtract_transform (gimple_stmt_iterator *si)
count2 = histogram->hvalue.counters[1];
/* Compute probability of taking the optimal path. */
if (check_counter (stmt, "interval", all, gimple_bb (stmt)->count))
if (check_counter (stmt, "interval", &count1, &all, gimple_bb (stmt)->count))
{
gimple_remove_histogram_value (cfun, stmt, histogram);
return false;
}
if (flag_profile_correction && count1 + count2 > all)
all = count1 + count2;
gcc_assert (count1 + count2 <= all);
/* We require that we use just subtractions in at least 50% of all
evaluations. */
count = 0;
@ -1160,7 +1179,7 @@ static bool
gimple_ic_transform (gimple stmt)
{
histogram_value histogram;
gcov_type val, count, all;
gcov_type val, count, all, bb_all;
gcov_type prob;
tree callee;
gimple modify;
@ -1186,6 +1205,14 @@ gimple_ic_transform (gimple stmt)
if (4 * count <= 3 * all)
return false;
bb_all = gimple_bb (stmt)->count;
/* The order of CHECK_COUNTER calls is important -
since check_counter can correct the third parameter
and we want to make count <= all <= bb_all. */
if ( check_counter (stmt, "ic", &all, &bb_all, bb_all)
|| check_counter (stmt, "ic", &count, &all, all))
return false;
if (all > 0)
prob = (count * REG_BR_PROB_BASE + all / 2) / all;
else
@ -1372,7 +1399,7 @@ gimple_stringops_transform (gimple_stmt_iterator *gsi)
at least 80% of time. */
if ((6 * count / 5) < all || !maybe_hot_bb_p (gimple_bb (stmt)))
return false;
if (check_counter (stmt, "value", all, gimple_bb (stmt)->count))
if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count))
return false;
if (all > 0)
prob = (count * REG_BR_PROB_BASE + all / 2) / all;