re PR driver/46760 (LTO bootstrap doesn't work with FDO)
PR tree-optimization/46760 * cgraph.c (cgraph_create_node): Initialize count_materialization_scale. * cgraph.h (struct cgraph_node): Add count_materialization_scale. * lto-cgraph.c (lto_output_edge): Fix assert. (lto_output_node): Output count_materialization_scale. (output_profile_summary): Output only runs and sum_max. (input_node): Input count_materialization_scale. (input_profile_summary): Read data into file specific gcov summary. (merge_profile_summaries): New function. (input_cgraph): Update call of input_profile_summary; call merge_profile_summaries. * lto-streamer-in.c (input_cfg): Add count_materialization_scale arg; rescale counts at read in. (intput_bb): Likewise. (input_function): Update call of input_bb. (lto_read_body): Update call of input_cfg. * lto-streamer.h: Inlclude gcov-io.h (lto_file_decl_data): Add gcov_ctr_summary. From-SVN: r167458
This commit is contained in:
parent
f3007348c3
commit
db0bf14f6b
6 changed files with 145 additions and 30 deletions
|
@ -1,3 +1,24 @@
|
|||
2010-12-03 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
PR tree-optimization/46760
|
||||
* cgraph.c (cgraph_create_node): Initialize count_materialization_scale.
|
||||
* cgraph.h (struct cgraph_node): Add count_materialization_scale.
|
||||
* lto-cgraph.c (lto_output_edge): Fix assert.
|
||||
(lto_output_node): Output count_materialization_scale.
|
||||
(output_profile_summary): Output only runs and sum_max.
|
||||
(input_node): Input count_materialization_scale.
|
||||
(input_profile_summary): Read data into file specific gcov summary.
|
||||
(merge_profile_summaries): New function.
|
||||
(input_cgraph): Update call of input_profile_summary;
|
||||
call merge_profile_summaries.
|
||||
* lto-streamer-in.c (input_cfg): Add count_materialization_scale arg;
|
||||
rescale counts at read in.
|
||||
(intput_bb): Likewise.
|
||||
(input_function): Update call of input_bb.
|
||||
(lto_read_body): Update call of input_cfg.
|
||||
* lto-streamer.h: Inlclude gcov-io.h
|
||||
(lto_file_decl_data): Add gcov_ctr_summary.
|
||||
|
||||
2010-12-03 Dave Korn <dave.korn.cygwin@gmail.com>
|
||||
|
||||
* doc/tm.texi.in (Cond. Exec. Macros): Rename node from this ...
|
||||
|
|
|
@ -478,6 +478,7 @@ cgraph_create_node (void)
|
|||
node->previous = NULL;
|
||||
node->global.estimated_growth = INT_MIN;
|
||||
node->frequency = NODE_FREQUENCY_NORMAL;
|
||||
node->count_materialization_scale = REG_BR_PROB_BASE;
|
||||
ipa_empty_ref_list (&node->ref_list);
|
||||
cgraph_nodes = node;
|
||||
cgraph_n_nodes++;
|
||||
|
|
|
@ -233,6 +233,9 @@ struct GTY((chain_next ("%h.next"), chain_prev ("%h.previous"))) cgraph_node {
|
|||
|
||||
/* Expected number of executions: calculated in profile.c. */
|
||||
gcov_type count;
|
||||
/* How to scale counts at materialization time; used to merge
|
||||
LTO units with different number of profile runs. */
|
||||
int count_materialization_scale;
|
||||
/* Unique id of the node. */
|
||||
int uid;
|
||||
/* Ordering of all cgraph nodes. */
|
||||
|
|
127
gcc/lto-cgraph.c
127
gcc/lto-cgraph.c
|
@ -302,6 +302,7 @@ lto_output_edge (struct lto_simple_output_block *ob, struct cgraph_edge *edge,
|
|||
gcc_assert (!(flags & (ECF_LOOPING_CONST_OR_PURE
|
||||
| ECF_MAY_BE_ALLOCA
|
||||
| ECF_SIBCALL
|
||||
| ECF_LEAF
|
||||
| ECF_NOVOPS)));
|
||||
}
|
||||
lto_output_bitpack (&bp);
|
||||
|
@ -462,6 +463,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
|
|||
|
||||
lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl);
|
||||
lto_output_sleb128_stream (ob->main_stream, node->count);
|
||||
lto_output_sleb128_stream (ob->main_stream, node->count_materialization_scale);
|
||||
|
||||
if (tag == LTO_cgraph_analyzed_node)
|
||||
{
|
||||
|
@ -661,12 +663,12 @@ output_profile_summary (struct lto_simple_output_block *ob)
|
|||
{
|
||||
if (profile_info)
|
||||
{
|
||||
/* We do not output num, it is not terribly useful. */
|
||||
/* We do not output num, sum_all and run_max, they are not used by
|
||||
GCC profile feedback and they are difficult to merge from multiple
|
||||
units. */
|
||||
gcc_assert (profile_info->runs);
|
||||
lto_output_uleb128_stream (ob->main_stream, profile_info->runs);
|
||||
lto_output_sleb128_stream (ob->main_stream, profile_info->sum_all);
|
||||
lto_output_sleb128_stream (ob->main_stream, profile_info->run_max);
|
||||
lto_output_sleb128_stream (ob->main_stream, profile_info->sum_max);
|
||||
lto_output_uleb128_stream (ob->main_stream, profile_info->sum_max);
|
||||
}
|
||||
else
|
||||
lto_output_uleb128_stream (ob->main_stream, 0);
|
||||
|
@ -1045,6 +1047,7 @@ input_node (struct lto_file_decl_data *file_data,
|
|||
node = cgraph_node (fn_decl);
|
||||
|
||||
node->count = lto_input_sleb128 (ib);
|
||||
node->count_materialization_scale = lto_input_sleb128 (ib);
|
||||
|
||||
if (tag == LTO_cgraph_analyzed_node)
|
||||
{
|
||||
|
@ -1424,32 +1427,108 @@ static struct gcov_ctr_summary lto_gcov_summary;
|
|||
|
||||
/* Input profile_info from IB. */
|
||||
static void
|
||||
input_profile_summary (struct lto_input_block *ib)
|
||||
input_profile_summary (struct lto_input_block *ib,
|
||||
struct lto_file_decl_data *file_data)
|
||||
{
|
||||
unsigned int runs = lto_input_uleb128 (ib);
|
||||
if (runs)
|
||||
{
|
||||
if (!profile_info)
|
||||
{
|
||||
profile_info = <o_gcov_summary;
|
||||
lto_gcov_summary.runs = runs;
|
||||
lto_gcov_summary.sum_all = lto_input_sleb128 (ib);
|
||||
lto_gcov_summary.run_max = lto_input_sleb128 (ib);
|
||||
lto_gcov_summary.sum_max = lto_input_sleb128 (ib);
|
||||
}
|
||||
/* We can support this by scaling all counts to nearest common multiple
|
||||
of all different runs, but it is perhaps not worth the effort. */
|
||||
else if (profile_info->runs != runs
|
||||
|| profile_info->sum_all != lto_input_sleb128 (ib)
|
||||
|| profile_info->run_max != lto_input_sleb128 (ib)
|
||||
|| profile_info->sum_max != lto_input_sleb128 (ib))
|
||||
sorry ("combining units with different profiles is not supported");
|
||||
/* We allow some units to have profile and other to not have one. This will
|
||||
just make unprofiled units to be size optimized that is sane. */
|
||||
file_data->profile_info.runs = runs;
|
||||
file_data->profile_info.sum_max = lto_input_uleb128 (ib);
|
||||
if (runs > file_data->profile_info.sum_max)
|
||||
fatal_error ("Corrupted profile info in %s: sum_max is smaller than runs",
|
||||
file_data->file_name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Rescale profile summaries to the same number of runs in the whole unit. */
|
||||
|
||||
static void
|
||||
merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
|
||||
{
|
||||
struct lto_file_decl_data *file_data;
|
||||
unsigned int j;
|
||||
gcov_unsigned_t max_runs = 0;
|
||||
struct cgraph_node *node;
|
||||
struct cgraph_edge *edge;
|
||||
|
||||
/* Find unit with maximal number of runs. If we ever get serious about
|
||||
roundoff errors, we might also consider computing smallest common
|
||||
multiply. */
|
||||
for (j = 0; (file_data = file_data_vec[j]) != NULL; j++)
|
||||
if (max_runs < file_data->profile_info.runs)
|
||||
max_runs = file_data->profile_info.runs;
|
||||
|
||||
if (!max_runs)
|
||||
return;
|
||||
|
||||
/* Simple overflow check. We probably don't need to support that many train
|
||||
runs. Such a large value probably imply data corruption anyway. */
|
||||
if (max_runs > INT_MAX / REG_BR_PROB_BASE)
|
||||
{
|
||||
sorry ("At most %i profile runs is supported. Perhaps corrupted profile?",
|
||||
INT_MAX / REG_BR_PROB_BASE);
|
||||
return;
|
||||
}
|
||||
|
||||
profile_info = <o_gcov_summary;
|
||||
lto_gcov_summary.runs = max_runs;
|
||||
lto_gcov_summary.sum_max = 0;
|
||||
|
||||
/* Rescale all units to the maximal number of runs.
|
||||
sum_max can not be easily merged, as we have no idea what files come from
|
||||
the same run. We do not use the info anyway, so leave it 0. */
|
||||
for (j = 0; (file_data = file_data_vec[j]) != NULL; j++)
|
||||
if (file_data->profile_info.runs)
|
||||
{
|
||||
int scale = ((REG_BR_PROB_BASE * max_runs
|
||||
+ file_data->profile_info.runs / 2)
|
||||
/ file_data->profile_info.runs);
|
||||
lto_gcov_summary.sum_max = MAX (lto_gcov_summary.sum_max,
|
||||
(file_data->profile_info.sum_max
|
||||
* scale
|
||||
+ REG_BR_PROB_BASE / 2)
|
||||
/ REG_BR_PROB_BASE);
|
||||
}
|
||||
|
||||
/* Watch roundoff errors. */
|
||||
if (lto_gcov_summary.sum_max < max_runs)
|
||||
lto_gcov_summary.sum_max = max_runs;
|
||||
|
||||
/* If merging already happent at WPA time, we are done. */
|
||||
if (flag_ltrans)
|
||||
return;
|
||||
|
||||
/* Now compute count_materialization_scale of each node.
|
||||
During LTRANS we already have values of count_materialization_scale
|
||||
computed, so just update them. */
|
||||
for (node = cgraph_nodes; node; node = node->next)
|
||||
if (node->local.lto_file_data->profile_info.run_max)
|
||||
{
|
||||
int scale;
|
||||
if (node->local.lto_file_data->profile_info.runs)
|
||||
scale =
|
||||
((node->count_materialization_scale * max_runs
|
||||
+ node->local.lto_file_data->profile_info.run_max / 2)
|
||||
/ node->local.lto_file_data->profile_info.run_max);
|
||||
else
|
||||
scale = node->count_materialization_scale;
|
||||
node->count_materialization_scale = scale;
|
||||
if (scale < 0)
|
||||
fatal_error ("Profile information in %s corrupted",
|
||||
file_data->file_name);
|
||||
|
||||
if (scale == REG_BR_PROB_BASE)
|
||||
continue;
|
||||
for (edge = node->callees; edge; edge = edge->next_callee)
|
||||
edge->count = ((edge->count * scale + REG_BR_PROB_BASE / 2)
|
||||
/ REG_BR_PROB_BASE);
|
||||
node->count = ((node->count * scale + REG_BR_PROB_BASE / 2)
|
||||
/ REG_BR_PROB_BASE);
|
||||
}
|
||||
}
|
||||
|
||||
/* Input and merge the cgraph from each of the .o files passed to
|
||||
lto1. */
|
||||
|
||||
|
@ -1473,7 +1552,7 @@ input_cgraph (void)
|
|||
&data, &len);
|
||||
if (!ib)
|
||||
fatal_error ("cannot find LTO cgraph in %s", file_data->file_name);
|
||||
input_profile_summary (ib);
|
||||
input_profile_summary (ib, file_data);
|
||||
file_data->cgraph_node_encoder = lto_cgraph_encoder_new ();
|
||||
nodes = input_cgraph_1 (file_data, ib);
|
||||
lto_destroy_simple_input_block (file_data, LTO_section_cgraph,
|
||||
|
@ -1499,6 +1578,8 @@ input_cgraph (void)
|
|||
VEC_free (cgraph_node_ptr, heap, nodes);
|
||||
VEC_free (varpool_node_ptr, heap, varpool);
|
||||
}
|
||||
merge_profile_summaries (file_data_vec);
|
||||
|
||||
|
||||
/* Clear out the aux field that was used to store enough state to
|
||||
tell which nodes should be overwritten. */
|
||||
|
|
|
@ -719,7 +719,8 @@ make_new_block (struct function *fn, unsigned int index)
|
|||
/* Read the CFG for function FN from input block IB. */
|
||||
|
||||
static void
|
||||
input_cfg (struct lto_input_block *ib, struct function *fn)
|
||||
input_cfg (struct lto_input_block *ib, struct function *fn,
|
||||
int count_materialization_scale)
|
||||
{
|
||||
unsigned int bb_count;
|
||||
basic_block p_bb;
|
||||
|
@ -752,7 +753,8 @@ input_cfg (struct lto_input_block *ib, struct function *fn)
|
|||
if (bb == NULL)
|
||||
bb = make_new_block (fn, index);
|
||||
|
||||
edge_count = lto_input_uleb128 (ib);
|
||||
edge_count = (lto_input_uleb128 (ib) * count_materialization_scale
|
||||
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
|
||||
|
||||
/* Connect up the CFG. */
|
||||
for (i = 0; i < edge_count; i++)
|
||||
|
@ -1066,7 +1068,8 @@ input_gimple_stmt (struct lto_input_block *ib, struct data_in *data_in,
|
|||
|
||||
static void
|
||||
input_bb (struct lto_input_block *ib, enum LTO_tags tag,
|
||||
struct data_in *data_in, struct function *fn)
|
||||
struct data_in *data_in, struct function *fn,
|
||||
int count_materialization_scale)
|
||||
{
|
||||
unsigned int index;
|
||||
basic_block bb;
|
||||
|
@ -1079,7 +1082,8 @@ input_bb (struct lto_input_block *ib, enum LTO_tags tag,
|
|||
index = lto_input_uleb128 (ib);
|
||||
bb = BASIC_BLOCK_FOR_FUNCTION (fn, index);
|
||||
|
||||
bb->count = lto_input_sleb128 (ib);
|
||||
bb->count = (lto_input_sleb128 (ib) * count_materialization_scale
|
||||
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
|
||||
bb->loop_depth = lto_input_sleb128 (ib);
|
||||
bb->frequency = lto_input_sleb128 (ib);
|
||||
bb->flags = lto_input_sleb128 (ib);
|
||||
|
@ -1253,12 +1257,14 @@ input_function (tree fn_decl, struct data_in *data_in,
|
|||
DECL_INITIAL (fn_decl) = lto_input_tree (ib, data_in);
|
||||
gcc_assert (DECL_INITIAL (fn_decl));
|
||||
DECL_SAVED_TREE (fn_decl) = NULL_TREE;
|
||||
node = cgraph_node (fn_decl);
|
||||
|
||||
/* Read all the basic blocks. */
|
||||
tag = input_record_start (ib);
|
||||
while (tag)
|
||||
{
|
||||
input_bb (ib, tag, data_in, fn);
|
||||
input_bb (ib, tag, data_in, fn,
|
||||
node->count_materialization_scale);
|
||||
tag = input_record_start (ib);
|
||||
}
|
||||
|
||||
|
@ -1300,7 +1306,6 @@ input_function (tree fn_decl, struct data_in *data_in,
|
|||
gimple_set_body (fn_decl, bb_seq (ei_edge (ei)->dest));
|
||||
}
|
||||
|
||||
node = cgraph_node (fn_decl);
|
||||
fixup_call_stmt_edges (node, stmts);
|
||||
execute_all_ipa_stmt_fixups (node, stmts);
|
||||
|
||||
|
@ -1393,6 +1398,7 @@ lto_read_body (struct lto_file_decl_data *file_data, tree fn_decl,
|
|||
{
|
||||
struct function *fn = DECL_STRUCT_FUNCTION (fn_decl);
|
||||
struct lto_in_decl_state *decl_state;
|
||||
struct cgraph_node *node = cgraph_node (fn_decl);
|
||||
|
||||
push_cfun (fn);
|
||||
init_tree_ssa (fn);
|
||||
|
@ -1402,7 +1408,7 @@ lto_read_body (struct lto_file_decl_data *file_data, tree fn_decl,
|
|||
gcc_assert (decl_state);
|
||||
file_data->current_decl_state = decl_state;
|
||||
|
||||
input_cfg (&ib_cfg, fn);
|
||||
input_cfg (&ib_cfg, fn, node->count_materialization_scale);
|
||||
|
||||
/* Set up the struct function. */
|
||||
input_function (fn_decl, data_in, &ib_main);
|
||||
|
|
|
@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "vec.h"
|
||||
#include "vecprim.h"
|
||||
#include "alloc-pool.h"
|
||||
#include "gcov-io.h"
|
||||
|
||||
/* Define when debugging the LTO streamer. This causes the writer
|
||||
to output the numeric value for the memory address of the tree node
|
||||
|
@ -610,6 +611,8 @@ struct GTY(()) lto_file_decl_data
|
|||
|
||||
/* Symbol resolutions for this file */
|
||||
VEC(ld_plugin_symbol_resolution_t,heap) * GTY((skip)) resolutions;
|
||||
|
||||
struct gcov_ctr_summary GTY((skip)) profile_info;
|
||||
};
|
||||
|
||||
typedef struct lto_file_decl_data *lto_file_decl_data_ptr;
|
||||
|
|
Loading…
Add table
Reference in a new issue