invoke.texi (-fprefetch-loop-arrays, [...]): Document.
* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl): Document. * tree-ssa-loop-niter.c (number_of_iterations_ne, number_of_iterations_lt, number_of_iterations_cond): Remember the shape of the ending condition. * tree-ssa-loop-manip.c: Include params.h. (build_if_stmt, can_unroll_loop_p, determine_exit_conditions, tree_unroll_loop): New functions. * tree-pass.h (pass_loop_prefetch): Declare. * loop.c (rest_of_handle_loop_optimize): Test for -fprefetch-loop-arrays-rtl. * tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h. * timevar.def (TV_TREE_PREFETCH): New timevar. * tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch, pass_loop_prefetch): New. * tree-cfgcleanup.c: Include tree-scalar-evolution.h. (cleanup_tree_cfg_loop): Call scev_reset. * common.opt (fprefetch-loop-arrays-rtl): Add. * tree-ssa-loop-prefetch.c: New file. * tree-outof-ssa.c (struct value_expr_d): Add expr_vars field. (new_temp_expr_table): Initialize expr_vars. (free_temp_expr_table): Cleanup expr_vars. (check_replaceable, find_replaceable_in_bb): Prevent accumulating expressions from being merged into one. * tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h. (struct tree_niter_desc): Add control, bound and cmp fields. (tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop): Declare. * Makefile.in (tree-ssa-loop-prefetch.o): Add. (tree-cfgcleanup.o): Add SCEV_H dependency. (tree-ssa-loop-manip.o): Add PARAMS_H dependency. * passes.c (init_optimization_passes): Add pass_loop_prefetch. From-SVN: r110964
This commit is contained in:
parent
0a4288d92e
commit
1768461812
15 changed files with 512 additions and 41 deletions
|
@ -1,3 +1,38 @@
|
|||
2006-02-14 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
|
||||
Document.
|
||||
* tree-ssa-loop-niter.c (number_of_iterations_ne,
|
||||
number_of_iterations_lt, number_of_iterations_cond): Remember the shape
|
||||
of the ending condition.
|
||||
* tree-ssa-loop-manip.c: Include params.h.
|
||||
(build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
|
||||
tree_unroll_loop): New functions.
|
||||
* tree-pass.h (pass_loop_prefetch): Declare.
|
||||
* loop.c (rest_of_handle_loop_optimize): Test for
|
||||
-fprefetch-loop-arrays-rtl.
|
||||
* tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
|
||||
* timevar.def (TV_TREE_PREFETCH): New timevar.
|
||||
* tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
|
||||
pass_loop_prefetch): New.
|
||||
* tree-cfgcleanup.c: Include tree-scalar-evolution.h.
|
||||
(cleanup_tree_cfg_loop): Call scev_reset.
|
||||
* common.opt (fprefetch-loop-arrays-rtl): Add.
|
||||
* tree-ssa-loop-prefetch.c: New file.
|
||||
* tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
|
||||
(new_temp_expr_table): Initialize expr_vars.
|
||||
(free_temp_expr_table): Cleanup expr_vars.
|
||||
(check_replaceable, find_replaceable_in_bb): Prevent accumulating
|
||||
expressions from being merged into one.
|
||||
* tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
|
||||
(struct tree_niter_desc): Add control, bound and cmp fields.
|
||||
(tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
|
||||
Declare.
|
||||
* Makefile.in (tree-ssa-loop-prefetch.o): Add.
|
||||
(tree-cfgcleanup.o): Add SCEV_H dependency.
|
||||
(tree-ssa-loop-manip.o): Add PARAMS_H dependency.
|
||||
* passes.c (init_optimization_passes): Add pass_loop_prefetch.
|
||||
|
||||
2006-02-14 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/26258
|
||||
|
|
|
@ -963,7 +963,7 @@ OBJS-common = \
|
|||
tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \
|
||||
tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \
|
||||
tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \
|
||||
tree-vect-patterns.o \
|
||||
tree-vect-patterns.o tree-ssa-loop-prefetch.o \
|
||||
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \
|
||||
tree-ssa-math-opts.o \
|
||||
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \
|
||||
|
@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \
|
|||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \
|
||||
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
|
||||
tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h
|
||||
tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
|
||||
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
|
||||
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
|
||||
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
|
||||
tree-chrec.h toplev.h langhooks.h
|
||||
tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
|
||||
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
|
||||
|
@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
|
|||
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
|
||||
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
|
||||
tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h
|
||||
tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h \
|
||||
$(PARAMS_H)
|
||||
tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \
|
||||
$(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
|
||||
|
|
|
@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists
|
|||
Generate position-independent code for executables if possible (small mode)
|
||||
|
||||
fprefetch-loop-arrays
|
||||
Common Report Var(flag_prefetch_loop_arrays)
|
||||
Common Report Var(flag_prefetch_loop_arrays,1)
|
||||
Generate prefetch instructions, if available, for arrays in loops
|
||||
|
||||
fprefetch-loop-arrays-rtl
|
||||
Common Report Var(flag_prefetch_loop_arrays,2)
|
||||
Generate prefetch instructions, if available, for arrays in loops
|
||||
|
||||
fprofile
|
||||
|
|
|
@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol
|
||||
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
|
||||
-fomit-frame-pointer -foptimize-register-move @gol
|
||||
-foptimize-sibling-calls -fprefetch-loop-arrays @gol
|
||||
-foptimize-sibling-calls -fprefetch-loop-arrays -fprefetch-loop-arrays-rtl @gol
|
||||
-fprofile-generate -fprofile-use @gol
|
||||
-fregmove -frename-registers @gol
|
||||
-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol
|
||||
|
@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some
|
|||
local variables when unrolling a loop which can result in superior code.
|
||||
|
||||
@item -fprefetch-loop-arrays
|
||||
@itemx -fprefetch-loop-arrays-rtl
|
||||
@opindex fprefetch-loop-arrays
|
||||
@opindex fprefetch-loop-arrays-rtl
|
||||
If supported by the target machine, generate instructions to prefetch
|
||||
memory to improve the performance of loops that access large arrays.
|
||||
|
||||
|
@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates
|
|||
of the loop on both branches (modified according to result of the condition).
|
||||
|
||||
@item -fprefetch-loop-arrays
|
||||
@itemx -fprefetch-loop-arrays-rtl
|
||||
@opindex fprefetch-loop-arrays
|
||||
@opindex fprefetch-loop-arrays-rtl
|
||||
If supported by the target machine, generate instructions to prefetch
|
||||
memory to improve the performance of loops that access large arrays.
|
||||
|
||||
|
|
|
@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void)
|
|||
free_bb_for_insn ();
|
||||
profile_status = PROFILE_ABSENT;
|
||||
|
||||
do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
|
||||
do_prefetch = flag_prefetch_loop_arrays == 2 ? LOOP_PREFETCH : 0;
|
||||
|
||||
if (flag_rerun_loop_opt)
|
||||
{
|
||||
|
|
|
@ -601,6 +601,7 @@ init_optimization_passes (void)
|
|||
vectorizer creates alias relations that are not supported by
|
||||
pass_may_alias. */
|
||||
NEXT_PASS (pass_complete_unroll);
|
||||
NEXT_PASS (pass_loop_prefetch);
|
||||
NEXT_PASS (pass_iv_optimize);
|
||||
NEXT_PASS (pass_tree_loop_done);
|
||||
*p = NULL;
|
||||
|
|
|
@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
|
|||
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
|
||||
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
|
||||
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
|
||||
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini")
|
||||
|
|
|
@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA. */
|
|||
#include "cfglayout.h"
|
||||
#include "hashtab.h"
|
||||
#include "tree-ssa-propagate.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
|
||||
/* Remove any fallthru edge from EV. Return true if an edge was removed. */
|
||||
|
||||
|
@ -559,23 +560,26 @@ cleanup_tree_cfg (void)
|
|||
void
|
||||
cleanup_tree_cfg_loop (void)
|
||||
{
|
||||
bitmap changed_bbs = BITMAP_ALLOC (NULL);
|
||||
bool changed = cleanup_tree_cfg ();
|
||||
|
||||
cleanup_tree_cfg ();
|
||||
if (changed)
|
||||
{
|
||||
bitmap changed_bbs = BITMAP_ALLOC (NULL);
|
||||
fix_loop_structure (current_loops, changed_bbs);
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
|
||||
fix_loop_structure (current_loops, changed_bbs);
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
/* This usually does nothing. But sometimes parts of cfg that originally
|
||||
were inside a loop get out of it due to edge removal (since they
|
||||
become unreachable by back edges from latch). */
|
||||
rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
|
||||
|
||||
/* This usually does nothing. But sometimes parts of cfg that originally
|
||||
were inside a loop get out of it due to edge removal (since they
|
||||
become unreachable by back edges from latch). */
|
||||
rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
|
||||
|
||||
BITMAP_FREE (changed_bbs);
|
||||
BITMAP_FREE (changed_bbs);
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
verify_loop_structure (current_loops);
|
||||
verify_loop_structure (current_loops);
|
||||
#endif
|
||||
scev_reset ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Merge the PHI nodes at BB into those at BB's sole successor. */
|
||||
|
|
|
@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree);
|
|||
extern bool may_propagate_copy (tree, tree);
|
||||
extern bool may_propagate_copy_into_asm (tree);
|
||||
|
||||
/* Affine iv. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* Iv = BASE + STEP * i. */
|
||||
tree base, step;
|
||||
|
||||
/* True if this iv does not overflow. */
|
||||
bool no_overflow;
|
||||
} affine_iv;
|
||||
|
||||
/* Description of number of iterations of a loop. All the expressions inside
|
||||
the structure can be evaluated at the end of the loop's preheader
|
||||
(and due to ssa form, also anywhere inside the body of the loop). */
|
||||
|
@ -697,6 +708,15 @@ struct tree_niter_desc
|
|||
MAX_SIGNED_INT. However if the (n <= 0) assumption
|
||||
is eliminated (by looking at the guard on entry of
|
||||
the loop), then the information would be lost. */
|
||||
|
||||
/* The simplified shape of the exit condition. The loop exits if
|
||||
CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
|
||||
LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
|
||||
LE_EXPR and negative if CMP is GE_EXPR. This information is used
|
||||
by loop unrolling. */
|
||||
affine_iv control;
|
||||
tree bound;
|
||||
enum tree_code cmp;
|
||||
};
|
||||
|
||||
/* In tree-vectorizer.c */
|
||||
|
@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *);
|
|||
void tree_ssa_unswitch_loops (struct loops *);
|
||||
void canonicalize_induction_variables (struct loops *);
|
||||
void tree_unroll_loops_completely (struct loops *, bool);
|
||||
void tree_ssa_prefetch_arrays (struct loops *);
|
||||
void remove_empty_loops (struct loops *);
|
||||
void tree_ssa_iv_optimize (struct loops *);
|
||||
|
||||
|
@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree,
|
|||
tree expand_simple_operations (tree);
|
||||
void substitute_in_loop_info (struct loop *, tree, tree);
|
||||
edge single_dom_exit (struct loop *);
|
||||
bool can_unroll_loop_p (struct loop *loop, unsigned factor,
|
||||
struct tree_niter_desc *niter);
|
||||
void tree_unroll_loop (struct loops *, struct loop *, unsigned,
|
||||
edge, struct tree_niter_desc *);
|
||||
|
||||
/* In tree-ssa-threadedge.c */
|
||||
extern bool potentially_threadable_block (basic_block);
|
||||
|
|
|
@ -1299,7 +1299,8 @@ typedef struct value_expr_d
|
|||
typedef struct temp_expr_table_d
|
||||
{
|
||||
var_map map;
|
||||
void **version_info;
|
||||
void **version_info;
|
||||
bitmap *expr_vars;
|
||||
value_expr_p *partition_dep_list;
|
||||
bitmap replaceable;
|
||||
bool saw_replaceable;
|
||||
|
@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map)
|
|||
t->map = map;
|
||||
|
||||
t->version_info = XCNEWVEC (void *, num_ssa_names + 1);
|
||||
t->expr_vars = XCNEWVEC (bitmap, num_ssa_names + 1);
|
||||
t->partition_dep_list = XCNEWVEC (value_expr_p,
|
||||
num_var_partitions (map) + 1);
|
||||
|
||||
|
@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t)
|
|||
{
|
||||
value_expr_p p;
|
||||
tree *ret = NULL;
|
||||
unsigned i;
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
unsigned x;
|
||||
|
@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t)
|
|||
BITMAP_FREE (t->partition_in_use);
|
||||
BITMAP_FREE (t->replaceable);
|
||||
|
||||
for (i = 0; i <= num_ssa_names; i++)
|
||||
if (t->expr_vars[i])
|
||||
BITMAP_FREE (t->expr_vars[i]);
|
||||
free (t->expr_vars);
|
||||
|
||||
free (t->partition_dep_list);
|
||||
if (t->saw_replaceable)
|
||||
ret = (tree *)t->version_info;
|
||||
|
@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var)
|
|||
static bool
|
||||
check_replaceable (temp_expr_table_p tab, tree stmt)
|
||||
{
|
||||
tree var, def;
|
||||
tree var, def, basevar;
|
||||
int version;
|
||||
var_map map = tab->map;
|
||||
ssa_op_iter iter;
|
||||
tree call_expr;
|
||||
bitmap def_vars = BITMAP_ALLOC (NULL), use_vars;
|
||||
|
||||
if (TREE_CODE (stmt) != MODIFY_EXPR)
|
||||
return false;
|
||||
|
@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt)
|
|||
}
|
||||
|
||||
version = SSA_NAME_VERSION (def);
|
||||
basevar = SSA_NAME_VAR (def);
|
||||
bitmap_set_bit (def_vars, DECL_UID (basevar));
|
||||
|
||||
/* Add this expression to the dependency list for each use partition. */
|
||||
FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
|
||||
{
|
||||
add_dependance (tab, version, var);
|
||||
|
||||
use_vars = tab->expr_vars[SSA_NAME_VERSION (var)];
|
||||
if (use_vars)
|
||||
bitmap_ior_into (def_vars, use_vars);
|
||||
}
|
||||
tab->expr_vars[version] = def_vars;
|
||||
|
||||
/* If there are VUSES, add a dependence on virtual defs. */
|
||||
if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE))
|
||||
|
@ -1704,7 +1720,7 @@ static void
|
|||
find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
|
||||
{
|
||||
block_stmt_iterator bsi;
|
||||
tree stmt, def;
|
||||
tree stmt, def, use;
|
||||
stmt_ann_t ann;
|
||||
int partition;
|
||||
var_map map = tab->map;
|
||||
|
@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
|
|||
ann = stmt_ann (stmt);
|
||||
|
||||
/* Determine if this stmt finishes an existing expression. */
|
||||
FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE)
|
||||
FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
|
||||
{
|
||||
if (tab->version_info[SSA_NAME_VERSION (def)])
|
||||
unsigned ver = SSA_NAME_VERSION (use);
|
||||
|
||||
if (tab->version_info[ver])
|
||||
{
|
||||
bool same_root_var = false;
|
||||
tree def2;
|
||||
ssa_op_iter iter2;
|
||||
bitmap vars = tab->expr_vars[ver];
|
||||
|
||||
/* See if the root variables are the same. If they are, we
|
||||
do not want to do the replacement to avoid problems with
|
||||
code size, see PR tree-optimization/17549. */
|
||||
FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF)
|
||||
if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2))
|
||||
{
|
||||
same_root_var = true;
|
||||
break;
|
||||
}
|
||||
FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF)
|
||||
{
|
||||
if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def))))
|
||||
{
|
||||
same_root_var = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Mark expression as replaceable unless stmt is volatile
|
||||
or DEF sets the same root variable as STMT. */
|
||||
if (!ann->has_volatile_ops && !same_root_var)
|
||||
mark_replaceable (tab, def);
|
||||
mark_replaceable (tab, use);
|
||||
else
|
||||
finish_expr (tab, SSA_NAME_VERSION (def), false);
|
||||
finish_expr (tab, ver, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds;
|
|||
extern struct tree_opt_pass pass_if_conversion;
|
||||
extern struct tree_opt_pass pass_vectorize;
|
||||
extern struct tree_opt_pass pass_complete_unroll;
|
||||
extern struct tree_opt_pass pass_loop_prefetch;
|
||||
extern struct tree_opt_pass pass_iv_optimize;
|
||||
extern struct tree_opt_pass pass_tree_loop_done;
|
||||
extern struct tree_opt_pass pass_ch;
|
||||
|
|
|
@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void);
|
|||
extern void scev_analysis (void);
|
||||
void scev_const_prop (void);
|
||||
|
||||
/* Affine iv. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* Iv = BASE + STEP * i. */
|
||||
tree base, step;
|
||||
|
||||
/* True if this iv does not overflow. */
|
||||
bool no_overflow;
|
||||
} affine_iv;
|
||||
|
||||
extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
|
||||
|
||||
#endif /* GCC_TREE_SCALAR_EVOLUTION_H */
|
||||
|
|
|
@ -36,6 +36,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
|||
#include "tree-pass.h"
|
||||
#include "cfglayout.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "params.h"
|
||||
|
||||
/* Creates an induction variable with value BASE + STEP * iteration in LOOP.
|
||||
It is expected that neither BASE nor STEP are shared with other expressions
|
||||
|
@ -618,3 +619,328 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e,
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Build if (COND) goto THEN_LABEL; else goto ELSE_LABEL; */
|
||||
|
||||
static tree
|
||||
build_if_stmt (tree cond, tree then_label, tree else_label)
|
||||
{
|
||||
return build3 (COND_EXPR, void_type_node,
|
||||
cond,
|
||||
build1 (GOTO_EXPR, void_type_node, then_label),
|
||||
build1 (GOTO_EXPR, void_type_node, else_label));
|
||||
}
|
||||
|
||||
/* Returns true if we can unroll LOOP FACTOR times. Number
|
||||
of iterations of the loop is returned in NITER. */
|
||||
|
||||
bool
|
||||
can_unroll_loop_p (struct loop *loop, unsigned factor,
|
||||
struct tree_niter_desc *niter)
|
||||
{
|
||||
edge exit;
|
||||
|
||||
/* Check whether unrolling is possible. We only want to unroll loops
|
||||
for that we are able to determine number of iterations. We also
|
||||
want to split the extra iterations of the loop from its end,
|
||||
therefore we require that the loop has precisely one
|
||||
exit. */
|
||||
|
||||
exit = single_dom_exit (loop);
|
||||
if (!exit)
|
||||
return false;
|
||||
|
||||
if (!number_of_iterations_exit (loop, exit, niter, false)
|
||||
|| niter->cmp == ERROR_MARK)
|
||||
return false;
|
||||
|
||||
/* And of course, we must be able to duplicate the loop. */
|
||||
if (!can_duplicate_loop_p (loop))
|
||||
return false;
|
||||
|
||||
/* The final loop should be small enough. */
|
||||
if (tree_num_loop_insns (loop) * factor
|
||||
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Determines the conditions that control execution of LOOP unrolled FACTOR
|
||||
times. DESC is number of iterations of LOOP. ENTER_COND is set to
|
||||
condition that must be true if the main loop can be entered.
|
||||
EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
|
||||
how the exit from the unrolled loop should be controlled. */
|
||||
|
||||
static void
|
||||
determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
|
||||
unsigned factor, tree *enter_cond,
|
||||
tree *exit_base, tree *exit_step,
|
||||
enum tree_code *exit_cmp, tree *exit_bound)
|
||||
{
|
||||
tree stmts;
|
||||
tree base = desc->control.base;
|
||||
tree step = desc->control.step;
|
||||
tree bound = desc->bound;
|
||||
tree type = TREE_TYPE (base);
|
||||
tree bigstep, delta;
|
||||
tree min = lower_bound_in_type (type, type);
|
||||
tree max = upper_bound_in_type (type, type);
|
||||
enum tree_code cmp = desc->cmp;
|
||||
tree cond = boolean_true_node, assum;
|
||||
|
||||
*enter_cond = boolean_false_node;
|
||||
*exit_base = NULL_TREE;
|
||||
*exit_step = NULL_TREE;
|
||||
*exit_cmp = ERROR_MARK;
|
||||
*exit_bound = NULL_TREE;
|
||||
gcc_assert (cmp != ERROR_MARK);
|
||||
|
||||
/* We only need to be correct when we answer question
|
||||
"Do at least FACTOR more iterations remain?" in the unrolled loop.
|
||||
Thus, transforming BASE + STEP * i <> BOUND to
|
||||
BASE + STEP * i < BOUND is ok. */
|
||||
if (cmp == NE_EXPR)
|
||||
{
|
||||
if (tree_int_cst_sign_bit (step))
|
||||
cmp = GT_EXPR;
|
||||
else
|
||||
cmp = LT_EXPR;
|
||||
}
|
||||
else if (cmp == LT_EXPR)
|
||||
{
|
||||
gcc_assert (!tree_int_cst_sign_bit (step));
|
||||
}
|
||||
else if (cmp == GT_EXPR)
|
||||
{
|
||||
gcc_assert (tree_int_cst_sign_bit (step));
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
/* The main body of the loop may be entered iff:
|
||||
|
||||
1) desc->may_be_zero is false.
|
||||
2) it is possible to check that there are at least FACTOR iterations
|
||||
of the loop, i.e., BOUND - step * FACTOR does not overflow.
|
||||
3) # of iterations is at least FACTOR */
|
||||
|
||||
if (!zero_p (desc->may_be_zero))
|
||||
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
|
||||
invert_truthvalue (desc->may_be_zero),
|
||||
cond);
|
||||
|
||||
bigstep = fold_build2 (MULT_EXPR, type, step,
|
||||
build_int_cst_type (type, factor));
|
||||
delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
|
||||
if (cmp == LT_EXPR)
|
||||
assum = fold_build2 (GE_EXPR, boolean_type_node,
|
||||
bound,
|
||||
fold_build2 (PLUS_EXPR, type, min, delta));
|
||||
else
|
||||
assum = fold_build2 (LE_EXPR, boolean_type_node,
|
||||
bound,
|
||||
fold_build2 (PLUS_EXPR, type, max, delta));
|
||||
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
|
||||
|
||||
bound = fold_build2 (MINUS_EXPR, type, bound, delta);
|
||||
assum = fold_build2 (cmp, boolean_type_node, base, bound);
|
||||
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
|
||||
|
||||
cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
|
||||
if (stmts)
|
||||
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
|
||||
/* cond now may be a gimple comparison, which would be OK, but also any
|
||||
other gimple rhs (say a && b). In this case we need to force it to
|
||||
operand. */
|
||||
if (!is_gimple_condexpr (cond))
|
||||
{
|
||||
cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
|
||||
if (stmts)
|
||||
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
|
||||
}
|
||||
*enter_cond = cond;
|
||||
|
||||
base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
|
||||
if (stmts)
|
||||
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
|
||||
bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
|
||||
if (stmts)
|
||||
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
|
||||
|
||||
*exit_base = base;
|
||||
*exit_step = bigstep;
|
||||
*exit_cmp = cmp;
|
||||
*exit_bound = bound;
|
||||
}
|
||||
|
||||
/* Unroll LOOP FACTOR times. LOOPS is the loops tree. DESC describes
|
||||
number of iterations of LOOP. EXIT is the exit of the loop to that
|
||||
DESC corresponds.
|
||||
|
||||
If N is number of iterations of the loop and MAY_BE_ZERO is the condition
|
||||
under that loop exits in the first iteration even if N != 0,
|
||||
|
||||
while (1)
|
||||
{
|
||||
x = phi (init, next);
|
||||
|
||||
pre;
|
||||
if (st)
|
||||
break;
|
||||
post;
|
||||
}
|
||||
|
||||
becomes (with possibly the exit conditions formulated a bit differently,
|
||||
avoiding the need to create a new iv):
|
||||
|
||||
if (MAY_BE_ZERO || N < FACTOR)
|
||||
goto rest;
|
||||
|
||||
do
|
||||
{
|
||||
x = phi (init, next);
|
||||
|
||||
pre;
|
||||
post;
|
||||
pre;
|
||||
post;
|
||||
...
|
||||
pre;
|
||||
post;
|
||||
N -= FACTOR;
|
||||
|
||||
} while (N >= FACTOR);
|
||||
|
||||
rest:
|
||||
init' = phi (init, x);
|
||||
|
||||
while (1)
|
||||
{
|
||||
x = phi (init', next);
|
||||
|
||||
pre;
|
||||
if (st)
|
||||
break;
|
||||
post;
|
||||
} */
|
||||
|
||||
void
|
||||
tree_unroll_loop (struct loops *loops, struct loop *loop, unsigned factor,
|
||||
edge exit, struct tree_niter_desc *desc)
|
||||
{
|
||||
tree dont_exit, exit_if, ctr_before, ctr_after;
|
||||
tree enter_main_cond, exit_base, exit_step, exit_bound;
|
||||
enum tree_code exit_cmp;
|
||||
tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var;
|
||||
struct loop *new_loop;
|
||||
basic_block rest, exit_bb;
|
||||
edge old_entry, new_entry, old_latch, precond_edge, new_exit;
|
||||
edge nonexit, new_nonexit;
|
||||
block_stmt_iterator bsi;
|
||||
use_operand_p op;
|
||||
bool ok;
|
||||
unsigned est_niter;
|
||||
sbitmap wont_exit;
|
||||
|
||||
est_niter = expected_loop_iterations (loop);
|
||||
determine_exit_conditions (loop, desc, factor,
|
||||
&enter_main_cond, &exit_base, &exit_step,
|
||||
&exit_cmp, &exit_bound);
|
||||
|
||||
new_loop = loop_version (loops, loop, enter_main_cond, NULL, true);
|
||||
gcc_assert (new_loop != NULL);
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
||||
/* Unroll the loop and remove the old exits. */
|
||||
dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
|
||||
? boolean_false_node
|
||||
: boolean_true_node);
|
||||
if (exit == EDGE_SUCC (exit->src, 0))
|
||||
nonexit = EDGE_SUCC (exit->src, 1);
|
||||
else
|
||||
nonexit = EDGE_SUCC (exit->src, 0);
|
||||
nonexit->probability = REG_BR_PROB_BASE;
|
||||
exit->probability = 0;
|
||||
nonexit->count += exit->count;
|
||||
exit->count = 0;
|
||||
exit_if = last_stmt (exit->src);
|
||||
COND_EXPR_COND (exit_if) = dont_exit;
|
||||
update_stmt (exit_if);
|
||||
|
||||
wont_exit = sbitmap_alloc (factor);
|
||||
sbitmap_ones (wont_exit);
|
||||
ok = tree_duplicate_loop_to_header_edge
|
||||
(loop, loop_latch_edge (loop), loops, factor - 1,
|
||||
wont_exit, NULL, NULL, NULL, DLTHE_FLAG_UPDATE_FREQ);
|
||||
free (wont_exit);
|
||||
gcc_assert (ok);
|
||||
update_ssa (TODO_update_ssa);
|
||||
|
||||
/* Prepare the cfg and update the phi nodes. */
|
||||
rest = loop_preheader_edge (new_loop)->src;
|
||||
precond_edge = single_pred_edge (rest);
|
||||
loop_split_edge_with (loop_latch_edge (loop), NULL);
|
||||
exit_bb = single_pred (loop->latch);
|
||||
|
||||
new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE);
|
||||
new_exit->count = loop_preheader_edge (loop)->count;
|
||||
est_niter = est_niter / factor + 1;
|
||||
new_exit->probability = REG_BR_PROB_BASE / est_niter;
|
||||
|
||||
new_nonexit = single_pred_edge (loop->latch);
|
||||
new_nonexit->flags = EDGE_TRUE_VALUE;
|
||||
new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
|
||||
|
||||
old_entry = loop_preheader_edge (loop);
|
||||
new_entry = loop_preheader_edge (new_loop);
|
||||
old_latch = loop_latch_edge (loop);
|
||||
for (phi_old_loop = phi_nodes (loop->header),
|
||||
phi_new_loop = phi_nodes (new_loop->header);
|
||||
phi_old_loop;
|
||||
phi_old_loop = PHI_CHAIN (phi_old_loop),
|
||||
phi_new_loop = PHI_CHAIN (phi_new_loop))
|
||||
{
|
||||
init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
|
||||
op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
|
||||
gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
|
||||
next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
|
||||
|
||||
/* Prefer using original variable as a base for the new ssa name.
|
||||
This is necessary for virtual ops, and useful in order to avoid
|
||||
losing debug info for real ops. */
|
||||
if (TREE_CODE (next) == SSA_NAME)
|
||||
var = SSA_NAME_VAR (next);
|
||||
else if (TREE_CODE (init) == SSA_NAME)
|
||||
var = SSA_NAME_VAR (init);
|
||||
else
|
||||
{
|
||||
var = create_tmp_var (TREE_TYPE (init), "unrinittmp");
|
||||
add_referenced_tmp_var (var);
|
||||
}
|
||||
|
||||
new_init = make_ssa_name (var, NULL_TREE);
|
||||
phi_rest = create_phi_node (new_init, rest);
|
||||
SSA_NAME_DEF_STMT (new_init) = phi_rest;
|
||||
|
||||
add_phi_arg (phi_rest, init, precond_edge);
|
||||
add_phi_arg (phi_rest, next, new_exit);
|
||||
SET_USE (op, new_init);
|
||||
}
|
||||
|
||||
/* Finally create the new counter for number of iterations and add the new
|
||||
exit instruction. */
|
||||
bsi = bsi_last (exit_bb);
|
||||
create_iv (exit_base, exit_step, NULL_TREE, loop,
|
||||
&bsi, true, &ctr_before, &ctr_after);
|
||||
exit_if = build_if_stmt (build2 (exit_cmp, boolean_type_node, ctr_after,
|
||||
exit_bound),
|
||||
tree_block_label (loop->latch),
|
||||
tree_block_label (rest));
|
||||
bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
|
||||
|
||||
verify_flow_info ();
|
||||
verify_dominators (CDI_DOMINATORS);
|
||||
verify_loop_structure (loops);
|
||||
verify_loop_closed_ssa ();
|
||||
}
|
||||
|
|
|
@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final,
|
|||
tree niter_type = unsigned_type_for (type);
|
||||
tree s, c, d, bits, assumption, tmp, bound;
|
||||
|
||||
niter->control = *iv;
|
||||
niter->bound = final;
|
||||
niter->cmp = NE_EXPR;
|
||||
|
||||
/* Rearrange the terms so that we get inequality s * i <> c, with s
|
||||
positive. Also cast everything to the unsigned type. */
|
||||
if (tree_int_cst_sign_bit (iv->step))
|
||||
|
@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
|
|||
tree niter_type = unsigned_type_for (type);
|
||||
tree delta, step, s;
|
||||
|
||||
if (nonzero_p (iv0->step))
|
||||
{
|
||||
niter->control = *iv0;
|
||||
niter->cmp = LT_EXPR;
|
||||
niter->bound = iv1->base;
|
||||
}
|
||||
else
|
||||
{
|
||||
niter->control = *iv1;
|
||||
niter->cmp = GT_EXPR;
|
||||
niter->bound = iv0->base;
|
||||
}
|
||||
|
||||
delta = fold_build2 (MINUS_EXPR, niter_type,
|
||||
fold_convert (niter_type, iv1->base),
|
||||
fold_convert (niter_type, iv0->base));
|
||||
|
@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code,
|
|||
niter->niter = NULL_TREE;
|
||||
niter->additional_info = boolean_true_node;
|
||||
|
||||
niter->bound = NULL_TREE;
|
||||
niter->cmp = ERROR_MARK;
|
||||
|
||||
/* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
|
||||
the control variable is on lhs. */
|
||||
if (code == GE_EXPR || code == GT_EXPR
|
||||
|
|
|
@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll =
|
|||
0 /* letter */
|
||||
};
|
||||
|
||||
/* Prefetching. */
|
||||
|
||||
static void
|
||||
tree_ssa_loop_prefetch (void)
|
||||
{
|
||||
if (!current_loops)
|
||||
return;
|
||||
|
||||
tree_ssa_prefetch_arrays (current_loops);
|
||||
}
|
||||
|
||||
static bool
|
||||
gate_tree_ssa_loop_prefetch (void)
|
||||
{
|
||||
return flag_prefetch_loop_arrays == 1;
|
||||
}
|
||||
|
||||
struct tree_opt_pass pass_loop_prefetch =
|
||||
{
|
||||
"prefetch", /* name */
|
||||
gate_tree_ssa_loop_prefetch, /* gate */
|
||||
tree_ssa_loop_prefetch, /* execute */
|
||||
NULL, /* sub */
|
||||
NULL, /* next */
|
||||
0, /* static_pass_number */
|
||||
TV_TREE_PREFETCH, /* tv_id */
|
||||
PROP_cfg | PROP_ssa, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
TODO_dump_func | TODO_verify_loops, /* todo_flags_finish */
|
||||
0 /* letter */
|
||||
};
|
||||
|
||||
/* Induction variable optimizations. */
|
||||
|
||||
static void
|
||||
|
|
Loading…
Add table
Reference in a new issue