invoke.texi (-flto-partition, [...]): Document.
* doc/invoke.texi (-flto-partition, lto-partitions, lto-minpartition): Document. * opts.c (decode_options): Handle lto partitions. * common.opt (flto-partition=1to1, flto-partition=balanced): New. * params.def (PARAM_LTO_PARTITIONS, MIN_PARTITION_SIZE): New. * lto.c: Include params.h. (add_cgraph_node_to_partition, add_varpool_node_to_partition): Do refcounting in aux field. (undo_partition, partition_cgraph_node_p, partition_varpool_node_p): New functions. (lto_1_to_1_map): Simplify. (lto_balanced_map): New function. (do_whole_program_analysis): Chose proper partitioning alg. * Make-lang.in (lto.o): Add dependency on params.h From-SVN: r164995
This commit is contained in:
parent
5806d9ac5d
commit
852e4bd21f
8 changed files with 438 additions and 33 deletions
|
@ -1,4 +1,12 @@
|
|||
2010-09-29 Jan Hubicka <jh@suse.cz>
|
||||
2010-10-05 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* doc/invoke.texi (-flto-partition, lto-partitions, lto-minpartition):
|
||||
Document.
|
||||
* opts.c (decode_options): Handle lto partitions.
|
||||
* common.opt (flto-partition=1to1, flto-partition=balanced): New.
|
||||
* params.def (PARAM_LTO_PARTITIONS, MIN_PARTITION_SIZE): New.
|
||||
|
||||
2010-10-05 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* cgraphunit.c (assemble_function): Output thunks and aliases before
|
||||
the function itself.
|
||||
|
|
|
@ -1049,6 +1049,14 @@ flto
|
|||
Common Var(flag_lto)
|
||||
Enable link-time optimization.
|
||||
|
||||
flto-partition=1to1
|
||||
Common Var(flag_lto_partition_1to1)
|
||||
Partition functions and vars at linktime based on object files they originate from
|
||||
|
||||
flto-partition=balanced
|
||||
Common Var(flag_lto_partition_balanced)
|
||||
Partition functions and vars at linktime into approximately same sized buckets
|
||||
|
||||
; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h.
|
||||
flto-compression-level=
|
||||
Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1)
|
||||
|
|
|
@ -354,10 +354,10 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-fno-ira-share-spill-slots -fira-verbose=@var{n} @gol
|
||||
-fivopts -fkeep-inline-functions -fkeep-static-consts @gol
|
||||
-floop-block -floop-flatten -floop-interchange -floop-strip-mine @gol
|
||||
-floop-parallelize-all -flto -flto-compression-level -flto-report @gol
|
||||
-fltrans -fltrans-output-list -fmerge-all-constants -fmerge-constants @gol
|
||||
-fmodulo-sched -fmodulo-sched-allow-regmoves -fmove-loop-invariants @gol
|
||||
-fmudflap -fmudflapir -fmudflapth -fno-branch-count-reg @gol
|
||||
-floop-parallelize-all -flto -flto-compression-level -flto-partition=@var{alg} @gol
|
||||
-flto-report -fltrans -fltrans-output-list -fmerge-all-constants @gol
|
||||
-fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol
|
||||
-fmove-loop-invariants fmudflap -fmudflapir -fmudflapth -fno-branch-count-reg @gol
|
||||
-fno-default-inline @gol
|
||||
-fno-defer-pop -fno-function-cse -fno-guess-branch-probability @gol
|
||||
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
|
||||
|
@ -7596,6 +7596,13 @@ GNU make.
|
|||
|
||||
Disabled by default.
|
||||
|
||||
@item -flto-partition=@var{alg}
|
||||
@opindex flto-partition
|
||||
Specify partitioning algorithm used by @option{-fwhopr} mode. The value is
|
||||
either @code{1to1} to specify partitioning corresponding to source files
|
||||
or @code{balanced} to specify partitioning into, if possible, equally sized
|
||||
chunks. The default value is @code{balanced}.
|
||||
|
||||
@item -fwpa
|
||||
@opindex fwpa
|
||||
This is an internal option used by GCC when compiling with
|
||||
|
@ -8789,6 +8796,16 @@ parameter in order to perform devirtualization.
|
|||
@option{devirt-type-list-size} is the maximum number of types it
|
||||
stores per a single formal parameter of a function.
|
||||
|
||||
@item lto-partitions
|
||||
Specify desired nuber of partitions produced during WHOPR copmilation.
|
||||
Number of partitions should exceed number of CPUs used for compilatoin.
|
||||
Default value is 32.
|
||||
|
||||
@item lto-minpartition
|
||||
Size of minimal paritition for WHOPR (in estimated instructions).
|
||||
This prevents expenses of splitting very small programs into too many
|
||||
partitions.
|
||||
|
||||
@end table
|
||||
@end table
|
||||
|
||||
|
|
|
@ -1,3 +1,15 @@
|
|||
2010-10-05 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* lto.c: Include params.h.
|
||||
(add_cgraph_node_to_partition, add_varpool_node_to_partition): Do
|
||||
refcounting in aux field.
|
||||
(undo_partition, partition_cgraph_node_p, partition_varpool_node_p):
|
||||
New functions.
|
||||
(lto_1_to_1_map): Simplify.
|
||||
(lto_balanced_map): New function.
|
||||
(do_whole_program_analysis): Chose proper partitioning alg.
|
||||
* Make-lang.in (lto.o): Add dependency on params.h
|
||||
|
||||
2010-10-04 Andi Kleen <ak@linux.intel.com>
|
||||
|
||||
* Make-lang.in (lto1): Add + to build rule.
|
||||
|
|
|
@ -85,7 +85,7 @@ lto/lto.o: lto/lto.c $(CONFIG_H) $(SYSTEM_H) coretypes.h opts.h \
|
|||
$(CGRAPH_H) $(GGC_H) tree-ssa-operands.h $(TREE_PASS_H) \
|
||||
langhooks.h $(VEC_H) $(BITMAP_H) pointer-set.h $(IPA_PROP_H) \
|
||||
$(COMMON_H) debug.h $(TIMEVAR_H) $(GIMPLE_H) $(LTO_H) $(LTO_TREE_H) \
|
||||
$(LTO_TAGS_H) $(LTO_STREAMER_H) $(SPLAY_TREE_H) gt-lto-lto.h
|
||||
$(LTO_TAGS_H) $(LTO_STREAMER_H) $(SPLAY_TREE_H) gt-lto-lto.h $(PARAMS_H)
|
||||
lto/lto-elf.o: lto/lto-elf.c $(CONFIG_H) coretypes.h $(SYSTEM_H) \
|
||||
toplev.h $(LTO_H) $(TM_H) $(LIBIBERTY_H) $(GGC_H) $(LTO_STREAMER_H)
|
||||
lto/lto-coff.o: lto/lto-coff.c $(CONFIG_H) coretypes.h $(SYSTEM_H) \
|
||||
|
|
396
gcc/lto/lto.c
396
gcc/lto/lto.c
|
@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "lto-tree.h"
|
||||
#include "lto-streamer.h"
|
||||
#include "splay-tree.h"
|
||||
#include "params.h"
|
||||
|
||||
/* This needs to be included after config.h. Otherwise, _GNU_SOURCE will not
|
||||
be defined in time to set __USE_GNU in the system headers, and strsignal
|
||||
|
@ -759,12 +760,8 @@ add_cgraph_node_to_partition (ltrans_partition part, struct cgraph_node *node)
|
|||
part->insns += node->local.inline_summary.self_size;
|
||||
|
||||
if (node->aux)
|
||||
{
|
||||
gcc_assert (node->aux != part);
|
||||
node->in_other_partition = 1;
|
||||
}
|
||||
else
|
||||
node->aux = part;
|
||||
node->in_other_partition = 1;
|
||||
node->aux = (void *)((size_t)node->aux + 1);
|
||||
|
||||
cgraph_node_set_add (part->cgraph_set, node);
|
||||
|
||||
|
@ -788,12 +785,8 @@ add_varpool_node_to_partition (ltrans_partition part, struct varpool_node *vnode
|
|||
varpool_node_set_add (part->varpool_set, vnode);
|
||||
|
||||
if (vnode->aux)
|
||||
{
|
||||
gcc_assert (vnode->aux != part);
|
||||
vnode->in_other_partition = 1;
|
||||
}
|
||||
else
|
||||
vnode->aux = part;
|
||||
vnode->in_other_partition = 1;
|
||||
vnode->aux = (void *)((size_t)vnode->aux + 1);
|
||||
|
||||
add_references_to_partition (part, &vnode->ref_list);
|
||||
|
||||
|
@ -802,6 +795,70 @@ add_varpool_node_to_partition (ltrans_partition part, struct varpool_node *vnode
|
|||
add_varpool_node_to_partition (part, vnode->same_comdat_group);
|
||||
}
|
||||
|
||||
/* Undo all additions until number of cgraph nodes in PARITION is N_CGRAPH_NODES
|
||||
and number of varpool nodes is N_VARPOOL_NODES. */
|
||||
|
||||
static void
|
||||
undo_partition (ltrans_partition partition, unsigned int n_cgraph_nodes,
|
||||
unsigned int n_varpool_nodes)
|
||||
{
|
||||
while (VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes) >
|
||||
n_cgraph_nodes)
|
||||
{
|
||||
struct cgraph_node *node = VEC_index (cgraph_node_ptr,
|
||||
partition->cgraph_set->nodes,
|
||||
n_cgraph_nodes);
|
||||
partition->insns -= node->local.inline_summary.self_size;
|
||||
cgraph_node_set_remove (partition->cgraph_set, node);
|
||||
node->aux = (void *)((size_t)node->aux - 1);
|
||||
}
|
||||
while (VEC_length (varpool_node_ptr, partition->varpool_set->nodes) >
|
||||
n_varpool_nodes)
|
||||
{
|
||||
struct varpool_node *node = VEC_index (varpool_node_ptr,
|
||||
partition->varpool_set->nodes,
|
||||
n_varpool_nodes);
|
||||
varpool_node_set_remove (partition->varpool_set, node);
|
||||
node->aux = (void *)((size_t)node->aux - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true if NODE should be partitioned.
|
||||
This means that partitioning algorithm should put NODE into one of partitions.
|
||||
This apply to most functions with bodies. Functions that are not partitions
|
||||
are put into every unit needing them. This is the case of i.e. COMDATs. */
|
||||
|
||||
static bool
|
||||
partition_cgraph_node_p (struct cgraph_node *node)
|
||||
{
|
||||
/* We will get proper partition based on function they are inlined to. */
|
||||
if (node->global.inlined_to)
|
||||
return false;
|
||||
/* Nodes without a body do not need partitioning. */
|
||||
if (!node->analyzed)
|
||||
return false;
|
||||
/* Extern inlines and comdat are always only in partitions they are needed. */
|
||||
if (DECL_EXTERNAL (node->decl)
|
||||
|| DECL_COMDAT (node->decl))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return true if VNODE should be partitioned.
|
||||
This means that partitioning algorithm should put VNODE into one of partitions. */
|
||||
|
||||
static bool
|
||||
partition_varpool_node_p (struct varpool_node *vnode)
|
||||
{
|
||||
if (vnode->alias || !vnode->needed)
|
||||
return false;
|
||||
/* Constant pool and comdat are always only in partitions they are needed. */
|
||||
if (DECL_IN_CONSTANT_POOL (vnode->decl)
|
||||
|| DECL_COMDAT (vnode->decl))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Group cgrah nodes by input files. This is used mainly for testing
|
||||
right now. */
|
||||
|
||||
|
@ -822,15 +879,7 @@ lto_1_to_1_map (void)
|
|||
|
||||
for (node = cgraph_nodes; node; node = node->next)
|
||||
{
|
||||
/* We will get proper partition based on function they are inlined to. */
|
||||
if (node->global.inlined_to)
|
||||
continue;
|
||||
/* Nodes without a body do not need partitioning. */
|
||||
if (!node->analyzed)
|
||||
continue;
|
||||
/* Extern inlines and comdat are always only in partitions they are needed. */
|
||||
if (DECL_EXTERNAL (node->decl)
|
||||
|| DECL_COMDAT (node->decl))
|
||||
if (!partition_cgraph_node_p (node))
|
||||
continue;
|
||||
|
||||
file_data = node->local.lto_file_data;
|
||||
|
@ -865,11 +914,7 @@ lto_1_to_1_map (void)
|
|||
|
||||
for (vnode = varpool_nodes; vnode; vnode = vnode->next)
|
||||
{
|
||||
if (vnode->alias || !vnode->needed)
|
||||
continue;
|
||||
/* Constant pool and comdat are always only in partitions they are needed. */
|
||||
if (DECL_IN_CONSTANT_POOL (vnode->decl)
|
||||
|| DECL_COMDAT (vnode->decl))
|
||||
if (!partition_varpool_node_p (vnode))
|
||||
continue;
|
||||
file_data = vnode->lto_file_data;
|
||||
slot = pointer_map_contains (pmap, file_data);
|
||||
|
@ -903,6 +948,300 @@ lto_1_to_1_map (void)
|
|||
ltrans_partitions);
|
||||
}
|
||||
|
||||
|
||||
/* Group cgraph nodes in qually sized partitions.
|
||||
|
||||
The algorithm deciding paritions are simple: nodes are taken in predefined
|
||||
order. The order correspond to order we wish to have functions in final
|
||||
output. In future this will be given by function reordering pass, but at
|
||||
the moment we use topological order that serve a good approximation.
|
||||
|
||||
The goal is to partition this linear order into intervals (partitions) such
|
||||
that all partitions have approximately the same size and that the number of
|
||||
callgraph or IPA reference edgess crossing boundaries is minimal.
|
||||
|
||||
This is a lot faster (O(n) in size of callgraph) than algorithms doing
|
||||
priority based graph clustering that are generally O(n^2) and since WHOPR
|
||||
is designed to make things go well across partitions, it leads to good results.
|
||||
|
||||
We compute the expected size of partition as
|
||||
max (total_size / lto_partitions, min_partition_size).
|
||||
We use dynamic expected size of partition, so small programs
|
||||
are partitioning into enough partitions to allow use of multiple CPUs while
|
||||
large programs are not partitioned too much. Creating too many partition
|
||||
increase streaming overhead significandly.
|
||||
|
||||
In the future we would like to bound maximal size of partition to avoid
|
||||
ltrans stage consuming too much memory. At the moment however WPA stage is
|
||||
most memory intensive phase at large benchmark since too many types and
|
||||
declarations are read into memory.
|
||||
|
||||
The function implement simple greedy algorithm. Nodes are begin added into
|
||||
current partition until 3/4th of expected partition size is reached.
|
||||
After this threshold we keep track of boundary size (number of edges going to
|
||||
other partitions) and continue adding functions until the current partition
|
||||
grows into a double of expected partition size. Then the process is undone
|
||||
till the point when minimal ration of boundary size and in partition calls
|
||||
was reached. */
|
||||
|
||||
static void
|
||||
lto_balanced_map (void)
|
||||
{
|
||||
int n_nodes = 0;
|
||||
struct cgraph_node **postorder =
|
||||
XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
|
||||
struct cgraph_node **order = XNEWVEC (struct cgraph_node *, cgraph_max_uid);
|
||||
int i, postorder_len;
|
||||
struct cgraph_node *node;
|
||||
int total_size = 0;
|
||||
int partition_size;
|
||||
ltrans_partition partition;
|
||||
unsigned int last_visited_cgraph_node = 0, last_visited_varpool_node = 0;
|
||||
struct varpool_node *vnode;
|
||||
int cost = 0, internal = 0;
|
||||
int best_n_nodes = 0, best_n_varpool_nodes = 0, best_i = 0, best_cost =
|
||||
INT_MAX, best_internal = 0;
|
||||
int npartitions;
|
||||
|
||||
/* Until we have better ordering facility, use toplogical order.
|
||||
Include only nodes we will partition and compute estimate of program
|
||||
size. Note that since nodes that are not partitioned might be put into
|
||||
multiple partitions, this is just an estimate of real size. This is why
|
||||
we keep partition_size updated after every partition is finalized. */
|
||||
postorder_len = cgraph_postorder (postorder);
|
||||
for (i = 0; i < postorder_len; i++)
|
||||
{
|
||||
node = postorder[i];
|
||||
if (partition_cgraph_node_p (node))
|
||||
{
|
||||
order[n_nodes++] = node;
|
||||
total_size += node->local.inline_summary.self_size;
|
||||
}
|
||||
}
|
||||
free (postorder);
|
||||
|
||||
/* Compute partition size and create the first partition. */
|
||||
partition_size = total_size / PARAM_VALUE (PARAM_LTO_PARTITIONS);
|
||||
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
|
||||
partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
|
||||
npartitions = 1;
|
||||
partition = new_partition ("");
|
||||
if (cgraph_dump_file)
|
||||
fprintf (cgraph_dump_file, "Total unit size: %i, partition size: %i\n",
|
||||
total_size, partition_size);
|
||||
|
||||
for (i = 0; i < n_nodes; i++)
|
||||
{
|
||||
add_cgraph_node_to_partition (partition, order[i]);
|
||||
|
||||
/* Once we added a new node to the partition, we also want to add
|
||||
all referenced variables unless they was already added into some
|
||||
earlier partition.
|
||||
add_cgraph_node_to_partition adds possibly multiple nodes and
|
||||
variables that are needed to satisfy needs of ORDER[i].
|
||||
We remember last visited cgraph and varpool node from last iteration
|
||||
of outer loop that allows us to process every new addition.
|
||||
|
||||
At the same time we compute size of the boundary into COST. Every
|
||||
callgraph or IPA reference edge leaving the partition contributes into
|
||||
COST. Every edge inside partition was earlier computed as one leaving
|
||||
it and thus we need to subtract it from COST. */
|
||||
while (last_visited_cgraph_node <
|
||||
VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes)
|
||||
|| last_visited_varpool_node < VEC_length (varpool_node_ptr,
|
||||
partition->varpool_set->
|
||||
nodes))
|
||||
{
|
||||
struct ipa_ref_list *refs;
|
||||
int j;
|
||||
struct ipa_ref *ref;
|
||||
bool cgraph_p = false;
|
||||
|
||||
if (last_visited_cgraph_node <
|
||||
VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes))
|
||||
{
|
||||
struct cgraph_edge *edge;
|
||||
|
||||
cgraph_p = true;
|
||||
node = VEC_index (cgraph_node_ptr, partition->cgraph_set->nodes,
|
||||
last_visited_cgraph_node);
|
||||
refs = &node->ref_list;
|
||||
|
||||
total_size -= node->local.inline_summary.self_size;
|
||||
last_visited_cgraph_node++;
|
||||
|
||||
gcc_assert (node->analyzed);
|
||||
|
||||
/* Compute boundary cost of callgrpah edges. */
|
||||
for (edge = node->callees; edge; edge = edge->next_callee)
|
||||
if (edge->callee->analyzed)
|
||||
{
|
||||
int edge_cost = edge->frequency;
|
||||
cgraph_node_set_iterator csi;
|
||||
|
||||
if (!edge_cost)
|
||||
edge_cost = 1;
|
||||
gcc_assert (edge_cost > 0);
|
||||
csi = cgraph_node_set_find (partition->cgraph_set, edge->callee);
|
||||
if (!csi_end_p (csi)
|
||||
&& csi.index < last_visited_cgraph_node - 1)
|
||||
cost -= edge_cost, internal+= edge_cost;
|
||||
else
|
||||
cost += edge_cost;
|
||||
}
|
||||
for (edge = node->callers; edge; edge = edge->next_caller)
|
||||
{
|
||||
int edge_cost = edge->frequency;
|
||||
cgraph_node_set_iterator csi;
|
||||
|
||||
gcc_assert (edge->caller->analyzed);
|
||||
if (!edge_cost)
|
||||
edge_cost = 1;
|
||||
gcc_assert (edge_cost > 0);
|
||||
csi = cgraph_node_set_find (partition->cgraph_set, edge->caller);
|
||||
if (!csi_end_p (csi)
|
||||
&& csi.index < last_visited_cgraph_node)
|
||||
cost -= edge_cost;
|
||||
else
|
||||
cost += edge_cost;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
refs =
|
||||
&VEC_index (varpool_node_ptr, partition->varpool_set->nodes,
|
||||
last_visited_varpool_node)->ref_list;
|
||||
last_visited_varpool_node++;
|
||||
}
|
||||
|
||||
/* Compute boundary cost of IPA REF edges and at the same time look into
|
||||
variables referenced from current partition and try to add them. */
|
||||
for (j = 0; ipa_ref_list_reference_iterate (refs, j, ref); j++)
|
||||
if (ref->refered_type == IPA_REF_VARPOOL)
|
||||
{
|
||||
varpool_node_set_iterator vsi;
|
||||
|
||||
vnode = ipa_ref_varpool_node (ref);
|
||||
if (!vnode->finalized)
|
||||
continue;
|
||||
if (!vnode->aux && partition_varpool_node_p (vnode))
|
||||
add_varpool_node_to_partition (partition, vnode);
|
||||
vsi = varpool_node_set_find (partition->varpool_set, vnode);
|
||||
if (!vsi_end_p (vsi)
|
||||
&& vsi.index < last_visited_varpool_node - !cgraph_p)
|
||||
cost--, internal++;
|
||||
else
|
||||
cost++;
|
||||
}
|
||||
else
|
||||
{
|
||||
cgraph_node_set_iterator csi;
|
||||
|
||||
node = ipa_ref_node (ref);
|
||||
if (!node->analyzed)
|
||||
continue;
|
||||
csi = cgraph_node_set_find (partition->cgraph_set, node);
|
||||
if (!csi_end_p (csi)
|
||||
&& csi.index < last_visited_cgraph_node - cgraph_p)
|
||||
cost--, internal++;
|
||||
else
|
||||
cost++;
|
||||
}
|
||||
for (j = 0; ipa_ref_list_refering_iterate (refs, j, ref); j++)
|
||||
if (ref->refering_type == IPA_REF_VARPOOL)
|
||||
{
|
||||
varpool_node_set_iterator vsi;
|
||||
|
||||
vnode = ipa_ref_refering_varpool_node (ref);
|
||||
gcc_assert (vnode->finalized);
|
||||
if (!vnode->aux && partition_varpool_node_p (vnode))
|
||||
add_varpool_node_to_partition (partition, vnode);
|
||||
vsi = varpool_node_set_find (partition->varpool_set, vnode);
|
||||
if (!vsi_end_p (vsi)
|
||||
&& vsi.index < last_visited_varpool_node)
|
||||
cost--;
|
||||
else
|
||||
cost++;
|
||||
}
|
||||
else
|
||||
{
|
||||
cgraph_node_set_iterator csi;
|
||||
|
||||
node = ipa_ref_refering_node (ref);
|
||||
gcc_assert (node->analyzed);
|
||||
csi = cgraph_node_set_find (partition->cgraph_set, node);
|
||||
if (!csi_end_p (csi)
|
||||
&& csi.index < last_visited_cgraph_node)
|
||||
cost--;
|
||||
else
|
||||
cost++;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the partition is large enough, start looking for smallest boundary cost. */
|
||||
if (partition->insns < partition_size * 3 / 4
|
||||
|| best_cost == INT_MAX
|
||||
|| ((!cost
|
||||
|| (best_internal * (HOST_WIDE_INT) cost
|
||||
> (internal * (HOST_WIDE_INT)best_cost)))
|
||||
&& partition->insns < partition_size * 5 / 4))
|
||||
{
|
||||
best_cost = cost;
|
||||
best_internal = internal;
|
||||
best_i = i;
|
||||
best_n_nodes = VEC_length (cgraph_node_ptr,
|
||||
partition->cgraph_set->nodes);
|
||||
best_n_varpool_nodes = VEC_length (varpool_node_ptr,
|
||||
partition->varpool_set->nodes);
|
||||
}
|
||||
if (cgraph_dump_file)
|
||||
fprintf (cgraph_dump_file, "Step %i: added %s, size %i, cost %i/%i best %i/%i, step %i\n", i,
|
||||
cgraph_node_name (order[i]), partition->insns, cost, internal,
|
||||
best_cost, best_internal, best_i);
|
||||
/* Partition is too large, unwind into step when best cost was reached and
|
||||
start new partition. */
|
||||
if (partition->insns > 2 * partition_size)
|
||||
{
|
||||
if (best_i != i)
|
||||
{
|
||||
if (cgraph_dump_file)
|
||||
fprintf (cgraph_dump_file, "Unwinding %i insertions to step %i\n",
|
||||
i - best_i, best_i);
|
||||
undo_partition (partition, best_n_nodes, best_n_varpool_nodes);
|
||||
}
|
||||
i = best_i;
|
||||
partition = new_partition ("");
|
||||
last_visited_cgraph_node = 0;
|
||||
last_visited_varpool_node = 0;
|
||||
cost = 0;
|
||||
|
||||
if (cgraph_dump_file)
|
||||
fprintf (cgraph_dump_file, "New partition\n");
|
||||
best_n_nodes = 0;
|
||||
best_n_varpool_nodes = 0;
|
||||
best_cost = INT_MAX;
|
||||
|
||||
/* Since the size of partitions is just approximate, update the size after
|
||||
we finished current one. */
|
||||
if (npartitions < PARAM_VALUE (PARAM_LTO_PARTITIONS))
|
||||
partition_size = total_size
|
||||
/ (PARAM_VALUE (PARAM_LTO_PARTITIONS) - npartitions);
|
||||
else
|
||||
partition_size = INT_MAX;
|
||||
|
||||
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
|
||||
partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
|
||||
npartitions ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Varables that are not reachable from the code go into last partition. */
|
||||
for (vnode = varpool_nodes; vnode; vnode = vnode->next)
|
||||
if (partition_varpool_node_p (vnode) && !vnode->aux)
|
||||
add_varpool_node_to_partition (partition, vnode);
|
||||
free (order);
|
||||
}
|
||||
|
||||
/* Promote variable VNODE to be static. */
|
||||
|
||||
static bool
|
||||
|
@ -1990,7 +2329,10 @@ do_whole_program_analysis (void)
|
|||
/* We are about to launch the final LTRANS phase, stop the WPA timer. */
|
||||
timevar_pop (TV_WHOPR_WPA);
|
||||
|
||||
lto_1_to_1_map ();
|
||||
if (flag_lto_partition_1to1)
|
||||
lto_1_to_1_map ();
|
||||
else
|
||||
lto_balanced_map ();
|
||||
|
||||
if (!quiet_flag)
|
||||
{
|
||||
|
|
|
@ -1081,6 +1081,13 @@ decode_options (unsigned int argc, const char **argv,
|
|||
error ("LTO support has not been enabled in this configuration");
|
||||
#endif
|
||||
}
|
||||
if (flag_lto_partition_balanced || flag_lto_partition_1to1)
|
||||
{
|
||||
if (flag_lto_partition_balanced && flag_lto_partition_1to1)
|
||||
error ("Only one -flto-partitoin value can be specified");
|
||||
if (!flag_whopr)
|
||||
error ("-flto-partitoin has effect only with -fwhopr");
|
||||
}
|
||||
|
||||
/* Reconcile -flto and -fwhopr. Set additional flags as appropriate and
|
||||
check option consistency. */
|
||||
|
|
|
@ -838,6 +838,17 @@ DEFPARAM (PARAM_DEVIRT_TYPE_LIST_SIZE,
|
|||
"devirtualization",
|
||||
8, 0, 0)
|
||||
|
||||
/* WHOPR partitioning configuration. */
|
||||
|
||||
DEFPARAM (PARAM_LTO_PARTITIONS,
|
||||
"lto-partitions",
|
||||
"Number of paritions program should be split to",
|
||||
32, 0, 0)
|
||||
|
||||
DEFPARAM (MIN_PARTITION_SIZE,
|
||||
"lto-min-partition",
|
||||
"Size of minimal paritition for WHOPR (in estimated instructions)",
|
||||
1000, 0, 0)
|
||||
/*
|
||||
Local variables:
|
||||
mode:c
|
||||
|
|
Loading…
Add table
Reference in a new issue