doloop.c: Removed.
* doloop.c: Removed. * loop-doloop.c: New file. * Makefile.in (doloop.o): Remove. (loop-doloop.o): New. * cfgloop.h (get_loop_level, doloop_optimize_loops): Declare. * cfgloopanal.c (get_loop_level): New function. * loop-iv.c (iv_number_of_iterations): Handle case when loop is leaved immediatelly. * loop.c (strength_reduce): Do not call doloop optimization. * loop.h (LOOP_BCT): Removed. * passes.c (rest_of_handle_loop_optimize): Do not use LOOP_BCT. (rest_of_handle_loop2): Call doloop_optimize_loops. (rest_of_compilation): Test for optimizations moved to rest_of_handle_loop2. From-SVN: r79625
This commit is contained in:
parent
327e534343
commit
689ba89dd8
10 changed files with 612 additions and 912 deletions
|
@ -1,3 +1,20 @@
|
|||
2004-03-18 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
|
||||
|
||||
* doloop.c: Removed.
|
||||
* loop-doloop.c: New file.
|
||||
* Makefile.in (doloop.o): Remove.
|
||||
(loop-doloop.o): New.
|
||||
* cfgloop.h (get_loop_level, doloop_optimize_loops): Declare.
|
||||
* cfgloopanal.c (get_loop_level): New function.
|
||||
* loop-iv.c (iv_number_of_iterations): Handle case when loop
|
||||
is leaved immediatelly.
|
||||
* loop.c (strength_reduce): Do not call doloop optimization.
|
||||
* loop.h (LOOP_BCT): Removed.
|
||||
* passes.c (rest_of_handle_loop_optimize): Do not use LOOP_BCT.
|
||||
(rest_of_handle_loop2): Call doloop_optimize_loops.
|
||||
(rest_of_compilation): Test for optimizations moved to
|
||||
rest_of_handle_loop2.
|
||||
|
||||
2004-03-17 Fariborz Jahanian <fjahanian@apple.com>
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_stack_info): correct reg_size
|
||||
|
|
|
@ -849,7 +849,7 @@ OBJS-common = \
|
|||
cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \
|
||||
cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \
|
||||
cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \
|
||||
dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o \
|
||||
dbxout.o debug.o df.o diagnostic.o dojump.o dominance.o loop-doloop.o \
|
||||
dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \
|
||||
expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \
|
||||
genrtl.o ggc-common.o global.o graph.o gtype-desc.o \
|
||||
|
@ -1702,9 +1702,9 @@ loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h $(L
|
|||
insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \
|
||||
real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h cfgloop.h \
|
||||
toplev.h varray.h except.h cselib.h $(OPTABS_H) $(TM_P_H) $(GGC_H)
|
||||
doloop.o : doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h \
|
||||
$(LOOP_H) $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) toplev.h \
|
||||
cfgloop.h
|
||||
loop-doloop.o : loop-doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
$(RTL_H) flags.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) \
|
||||
toplev.h cfgloop.h output.h $(PARAMS_H)
|
||||
unroll.o : unroll.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) insn-config.h \
|
||||
function.h $(INTEGRATE_H) $(REGS_H) $(RECOG_H) flags.h $(EXPR_H) $(LOOP_H) toplev.h \
|
||||
hard-reg-set.h varray.h $(BASIC_BLOCK_H) $(TM_P_H) $(PREDICT_H) $(PARAMS_H) \
|
||||
|
|
|
@ -247,6 +247,7 @@ extern bool flow_bb_inside_loop_p (const struct loop *, const basic_block);
|
|||
extern struct loop * find_common_loop (struct loop *, struct loop *);
|
||||
extern int num_loop_insns (struct loop *);
|
||||
extern int average_num_loop_insns (struct loop *);
|
||||
extern unsigned get_loop_level (const struct loop *);
|
||||
|
||||
/* Loops & cfg manipulation. */
|
||||
extern basic_block *get_loop_body (const struct loop *);
|
||||
|
@ -417,3 +418,4 @@ enum
|
|||
};
|
||||
|
||||
extern void unroll_and_peel_loops (struct loops *, int);
|
||||
extern void doloop_optimize_loops (struct loops *);
|
||||
|
|
|
@ -457,3 +457,20 @@ expected_loop_iterations (const struct loop *loop)
|
|||
return (freq_latch + freq_in - 1) / freq_in;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns the maximum level of nesting of subloops of LOOP. */
|
||||
|
||||
unsigned
|
||||
get_loop_level (const struct loop *loop)
|
||||
{
|
||||
const struct loop *ploop;
|
||||
unsigned mx = 0, l;
|
||||
|
||||
for (ploop = loop->inner; ploop; ploop = ploop->next)
|
||||
{
|
||||
l = get_loop_level (ploop);
|
||||
if (l >= mx)
|
||||
mx = l + 1;
|
||||
}
|
||||
return mx;
|
||||
}
|
||||
|
|
883
gcc/doloop.c
883
gcc/doloop.c
|
@ -1,883 +0,0 @@
|
|||
/* Perform doloop optimizations
|
||||
Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
|
||||
Free Software Foundation, Inc.
|
||||
Contributed by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 2, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to the Free
|
||||
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
||||
02111-1307, USA. */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "rtl.h"
|
||||
#include "flags.h"
|
||||
#include "expr.h"
|
||||
#include "loop.h"
|
||||
#include "hard-reg-set.h"
|
||||
#include "basic-block.h"
|
||||
#include "toplev.h"
|
||||
#include "tm_p.h"
|
||||
#include "cfgloop.h"
|
||||
|
||||
|
||||
/* This module is used to modify loops with a determinable number of
|
||||
iterations to use special low-overhead looping instructions.
|
||||
|
||||
It first validates whether the loop is well behaved and has a
|
||||
determinable number of iterations (either at compile or run-time).
|
||||
It then modifies the loop to use a low-overhead looping pattern as
|
||||
follows:
|
||||
|
||||
1. A pseudo register is allocated as the loop iteration counter.
|
||||
|
||||
2. The number of loop iterations is calculated and is stored
|
||||
in the loop counter.
|
||||
|
||||
3. At the end of the loop, the jump insn is replaced by the
|
||||
doloop_end pattern. The compare must remain because it might be
|
||||
used elsewhere. If the loop-variable or condition register are
|
||||
used elsewhere, they will be eliminated by flow.
|
||||
|
||||
4. An optional doloop_begin pattern is inserted at the top of the
|
||||
loop.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_doloop_end
|
||||
|
||||
static rtx doloop_condition_get (rtx);
|
||||
static unsigned HOST_WIDE_INT doloop_iterations_max (const struct loop_info *,
|
||||
enum machine_mode, int);
|
||||
static int doloop_valid_p (const struct loop *, rtx);
|
||||
static int doloop_modify (const struct loop *, rtx, rtx, rtx, rtx, rtx);
|
||||
static int doloop_modify_runtime (const struct loop *, rtx, rtx, rtx,
|
||||
enum machine_mode, rtx);
|
||||
|
||||
|
||||
/* Return the loop termination condition for PATTERN or zero
|
||||
if it is not a decrement and branch jump insn. */
|
||||
static rtx
|
||||
doloop_condition_get (rtx pattern)
|
||||
{
|
||||
rtx cmp;
|
||||
rtx inc;
|
||||
rtx reg;
|
||||
rtx condition;
|
||||
|
||||
/* The canonical doloop pattern we expect is:
|
||||
|
||||
(parallel [(set (pc) (if_then_else (condition)
|
||||
(label_ref (label))
|
||||
(pc)))
|
||||
(set (reg) (plus (reg) (const_int -1)))
|
||||
(additional clobbers and uses)])
|
||||
|
||||
Some machines (IA-64) make the decrement conditional on
|
||||
the condition as well, so we don't bother verifying the
|
||||
actual decrement. In summary, the branch must be the
|
||||
first entry of the parallel (also required by jump.c),
|
||||
and the second entry of the parallel must be a set of
|
||||
the loop counter register. */
|
||||
|
||||
if (GET_CODE (pattern) != PARALLEL)
|
||||
return 0;
|
||||
|
||||
cmp = XVECEXP (pattern, 0, 0);
|
||||
inc = XVECEXP (pattern, 0, 1);
|
||||
|
||||
/* Check for (set (reg) (something)). */
|
||||
if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
|
||||
return 0;
|
||||
|
||||
/* Extract loop counter register. */
|
||||
reg = SET_DEST (inc);
|
||||
|
||||
/* Check for (set (pc) (if_then_else (condition)
|
||||
(label_ref (label))
|
||||
(pc))). */
|
||||
if (GET_CODE (cmp) != SET
|
||||
|| SET_DEST (cmp) != pc_rtx
|
||||
|| GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
|
||||
|| GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
|
||||
|| XEXP (SET_SRC (cmp), 2) != pc_rtx)
|
||||
return 0;
|
||||
|
||||
/* Extract loop termination condition. */
|
||||
condition = XEXP (SET_SRC (cmp), 0);
|
||||
|
||||
if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
|
||||
|| GET_CODE (XEXP (condition, 1)) != CONST_INT)
|
||||
return 0;
|
||||
|
||||
if (XEXP (condition, 0) == reg)
|
||||
return condition;
|
||||
|
||||
if (GET_CODE (XEXP (condition, 0)) == PLUS
|
||||
&& XEXP (XEXP (condition, 0), 0) == reg)
|
||||
return condition;
|
||||
|
||||
/* ??? If a machine uses a funny comparison, we could return a
|
||||
canonicalised form here. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Return an estimate of the maximum number of loop iterations for the
|
||||
loop specified by LOOP or zero if the loop is not normal.
|
||||
MODE is the mode of the iteration count and NONNEG is nonzero if
|
||||
the iteration count has been proved to be non-negative. */
|
||||
static unsigned HOST_WIDE_INT
|
||||
doloop_iterations_max (const struct loop_info *loop_info,
|
||||
enum machine_mode mode, int nonneg)
|
||||
{
|
||||
unsigned HOST_WIDE_INT n_iterations_max;
|
||||
enum rtx_code code;
|
||||
rtx min_value;
|
||||
rtx max_value;
|
||||
HOST_WIDE_INT abs_inc;
|
||||
int neg_inc;
|
||||
|
||||
neg_inc = 0;
|
||||
abs_inc = INTVAL (loop_info->increment);
|
||||
if (abs_inc < 0)
|
||||
{
|
||||
abs_inc = -abs_inc;
|
||||
neg_inc = 1;
|
||||
}
|
||||
|
||||
if (neg_inc)
|
||||
{
|
||||
code = swap_condition (loop_info->comparison_code);
|
||||
min_value = loop_info->final_equiv_value;
|
||||
max_value = loop_info->initial_equiv_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
code = loop_info->comparison_code;
|
||||
min_value = loop_info->initial_equiv_value;
|
||||
max_value = loop_info->final_equiv_value;
|
||||
}
|
||||
|
||||
/* Since the loop has a VTOP, we know that the initial test will be
|
||||
true and thus the value of max_value should be greater than the
|
||||
value of min_value. Thus the difference should always be positive
|
||||
and the code must be LT, LE, LTU, LEU, or NE. Otherwise the loop is
|
||||
not normal, e.g., `for (i = 0; i < 10; i--)'. */
|
||||
switch (code)
|
||||
{
|
||||
case LTU:
|
||||
case LEU:
|
||||
{
|
||||
unsigned HOST_WIDE_INT umax;
|
||||
unsigned HOST_WIDE_INT umin;
|
||||
|
||||
if (GET_CODE (min_value) == CONST_INT)
|
||||
umin = INTVAL (min_value);
|
||||
else
|
||||
umin = 0;
|
||||
|
||||
if (GET_CODE (max_value) == CONST_INT)
|
||||
umax = INTVAL (max_value);
|
||||
else
|
||||
umax = ((unsigned) 2 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
|
||||
|
||||
n_iterations_max = umax - umin;
|
||||
break;
|
||||
}
|
||||
|
||||
case LT:
|
||||
case LE:
|
||||
{
|
||||
HOST_WIDE_INT smax;
|
||||
HOST_WIDE_INT smin;
|
||||
|
||||
if (GET_CODE (min_value) == CONST_INT)
|
||||
smin = INTVAL (min_value);
|
||||
else
|
||||
smin = -((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1));
|
||||
|
||||
if (GET_CODE (max_value) == CONST_INT)
|
||||
smax = INTVAL (max_value);
|
||||
else
|
||||
smax = ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
|
||||
|
||||
n_iterations_max = smax - smin;
|
||||
break;
|
||||
}
|
||||
|
||||
case NE:
|
||||
if (GET_CODE (min_value) == CONST_INT
|
||||
&& GET_CODE (max_value) == CONST_INT)
|
||||
n_iterations_max = INTVAL (max_value) - INTVAL (min_value);
|
||||
else
|
||||
/* We need to conservatively assume that we might have the maximum
|
||||
number of iterations without any additional knowledge. */
|
||||
n_iterations_max = ((unsigned) 2 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
n_iterations_max /= abs_inc;
|
||||
|
||||
/* If we know that the iteration count is non-negative then adjust
|
||||
n_iterations_max if it is so large that it appears negative. */
|
||||
if (nonneg
|
||||
&& n_iterations_max > ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)))
|
||||
n_iterations_max = ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
|
||||
|
||||
return n_iterations_max;
|
||||
}
|
||||
|
||||
|
||||
/* Return nonzero if the loop specified by LOOP is suitable for
|
||||
the use of special low-overhead looping instructions. */
|
||||
static int
|
||||
doloop_valid_p (const struct loop *loop, rtx jump_insn)
|
||||
{
|
||||
const struct loop_info *loop_info = LOOP_INFO (loop);
|
||||
|
||||
/* The loop must have a conditional jump at the end. */
|
||||
if (! any_condjump_p (jump_insn)
|
||||
|| ! onlyjump_p (jump_insn))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Invalid jump at loop end.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Give up if a loop has been completely unrolled. */
|
||||
if (loop_info->n_iterations == loop_info->unroll_number)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Loop completely unrolled.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The loop must have a single exit target. A break or return
|
||||
statement within a loop will generate multiple loop exits.
|
||||
Another example of a loop that currently generates multiple exit
|
||||
targets is for (i = 0; i < (foo ? 8 : 4); i++) { }. */
|
||||
if (loop_info->has_multiple_exit_targets || loop->exit_count)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Loop has multiple exit targets.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* An indirect jump may jump out of the loop. */
|
||||
if (loop_info->has_indirect_jump)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Indirect jump in function.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* A called function may clobber any special registers required for
|
||||
low-overhead looping. */
|
||||
if (loop_info->has_call)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Function call in loop.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Some targets (eg, PPC) use the count register for branch on table
|
||||
instructions. ??? This should be a target specific check. */
|
||||
if (loop_info->has_tablejump)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Computed branch in the loop.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (! loop_info->increment)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Could not determine iteration info.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (GET_CODE (loop_info->increment) != CONST_INT)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Increment not an integer constant.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* There is no guarantee that a NE loop will terminate if the
|
||||
absolute increment is not unity. ??? We could compute this
|
||||
condition at run-time and have an additional jump around the loop
|
||||
to ensure an infinite loop. */
|
||||
if (loop_info->comparison_code == NE
|
||||
&& !loop_info->preconditioned
|
||||
&& INTVAL (loop_info->increment) != -1
|
||||
&& INTVAL (loop_info->increment) != 1)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: NE loop with non-unity increment.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check for loops that may not terminate under special conditions. */
|
||||
if (! loop_info->n_iterations
|
||||
&& ((loop_info->comparison_code == LEU
|
||||
&& INTVAL (loop_info->increment) > 0)
|
||||
|| (loop_info->comparison_code == GEU
|
||||
&& INTVAL (loop_info->increment) < 0)
|
||||
|| (loop_info->comparison_code == LTU
|
||||
&& INTVAL (loop_info->increment) > 1)
|
||||
|| (loop_info->comparison_code == GTU
|
||||
&& INTVAL (loop_info->increment) < -1)))
|
||||
{
|
||||
/* If the comparison is LEU and the comparison value is UINT_MAX
|
||||
then the loop will not terminate. Similarly, if the
|
||||
comparison code is GEU and the comparison value is 0, the
|
||||
loop will not terminate.
|
||||
|
||||
If the absolute increment is not 1, the loop can be infinite
|
||||
even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
|
||||
|
||||
Note that with LE and GE, the loop behavior is undefined
|
||||
(C++ standard section 5 clause 5) if an overflow occurs, say
|
||||
between INT_MAX and INT_MAX + 1. We thus don't have to worry
|
||||
about these two cases.
|
||||
|
||||
??? We could compute these conditions at run-time and have a
|
||||
additional jump around the loop to ensure an infinite loop.
|
||||
However, it is very unlikely that this is the intended
|
||||
behavior of the loop and checking for these rare boundary
|
||||
conditions would pessimize all other code.
|
||||
|
||||
If the loop is executed only a few times an extra check to
|
||||
restart the loop could use up most of the benefits of using a
|
||||
count register loop. Note however, that normally, this
|
||||
restart branch would never execute, so it could be predicted
|
||||
well by the CPU. We should generate the pessimistic code by
|
||||
default, and have an option, e.g. -funsafe-loops that would
|
||||
enable count-register loops in this case. */
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Possible infinite iteration case ignored.\n");
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* Modify the loop to use the low-overhead looping insn where LOOP
|
||||
describes the loop, ITERATIONS is an RTX containing the desired
|
||||
number of loop iterations, ITERATIONS_MAX is a CONST_INT specifying
|
||||
the maximum number of loop iterations, and DOLOOP_INSN is the
|
||||
low-overhead looping insn to emit at the end of the loop. This
|
||||
returns nonzero if it was successful. */
|
||||
static int
|
||||
doloop_modify (const struct loop *loop, rtx iterations, rtx iterations_max,
|
||||
rtx doloop_seq, rtx start_label, rtx condition)
|
||||
{
|
||||
rtx counter_reg;
|
||||
rtx count;
|
||||
rtx sequence;
|
||||
rtx jump_insn;
|
||||
int nonneg = 0;
|
||||
int decrement_count;
|
||||
|
||||
jump_insn = prev_nonnote_insn (loop->end);
|
||||
|
||||
if (loop_dump_stream)
|
||||
{
|
||||
fprintf (loop_dump_stream, "Doloop: Inserting doloop pattern (");
|
||||
if (GET_CODE (iterations) == CONST_INT)
|
||||
fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
|
||||
INTVAL (iterations));
|
||||
else
|
||||
fputs ("runtime", loop_dump_stream);
|
||||
fputs (" iterations).", loop_dump_stream);
|
||||
}
|
||||
|
||||
/* Emit the label that will delimit the top of the loop.
|
||||
This has to be done before the delete_insn call below, to prevent
|
||||
delete_insn from deleting too much. */
|
||||
emit_label_after (start_label, loop->top ? loop->top : loop->start);
|
||||
LABEL_NUSES (start_label)++;
|
||||
|
||||
/* Discard original jump to continue loop. The original compare
|
||||
result may still be live, so it cannot be discarded explicitly. */
|
||||
delete_related_insns (jump_insn);
|
||||
|
||||
counter_reg = XEXP (condition, 0);
|
||||
if (GET_CODE (counter_reg) == PLUS)
|
||||
counter_reg = XEXP (counter_reg, 0);
|
||||
|
||||
start_sequence ();
|
||||
|
||||
count = iterations;
|
||||
decrement_count = 0;
|
||||
switch (GET_CODE (condition))
|
||||
{
|
||||
case NE:
|
||||
/* Currently only NE tests against zero and one are supported. */
|
||||
if (XEXP (condition, 1) == const0_rtx)
|
||||
decrement_count = 1;
|
||||
else if (XEXP (condition, 1) != const1_rtx)
|
||||
abort ();
|
||||
break;
|
||||
|
||||
case GE:
|
||||
/* Currently only GE tests against zero are supported. */
|
||||
if (XEXP (condition, 1) != const0_rtx)
|
||||
abort ();
|
||||
|
||||
/* The iteration count needs decrementing for a GE test. */
|
||||
decrement_count = 1;
|
||||
|
||||
/* Determine if the iteration counter will be non-negative.
|
||||
Note that the maximum value loaded is iterations_max - 1. */
|
||||
if ((unsigned HOST_WIDE_INT) INTVAL (iterations_max)
|
||||
<= ((unsigned) 1 << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
|
||||
nonneg = 1;
|
||||
break;
|
||||
|
||||
/* Abort if an invalid doloop pattern has been generated. */
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (decrement_count)
|
||||
{
|
||||
if (GET_CODE (count) == CONST_INT)
|
||||
count = GEN_INT (INTVAL (count) - 1);
|
||||
else
|
||||
count = expand_simple_binop (GET_MODE (counter_reg), MINUS,
|
||||
count, const1_rtx,
|
||||
0, 0, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
|
||||
/* Insert initialization of the count register into the loop header. */
|
||||
convert_move (counter_reg, count, 1);
|
||||
sequence = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn_before (sequence, loop->start);
|
||||
|
||||
/* Some targets (eg, C4x) need to initialize special looping
|
||||
registers. */
|
||||
#ifdef HAVE_doloop_begin
|
||||
{
|
||||
rtx init;
|
||||
|
||||
init = gen_doloop_begin (counter_reg,
|
||||
GET_CODE (iterations) == CONST_INT
|
||||
? iterations : const0_rtx, iterations_max,
|
||||
GEN_INT (loop->level));
|
||||
if (init)
|
||||
{
|
||||
start_sequence ();
|
||||
emit_insn (init);
|
||||
sequence = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn_after (sequence, loop->start);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Insert the new low-overhead looping insn. */
|
||||
emit_jump_insn_before (doloop_seq, loop->end);
|
||||
jump_insn = prev_nonnote_insn (loop->end);
|
||||
JUMP_LABEL (jump_insn) = start_label;
|
||||
|
||||
/* Add a REG_NONNEG note if the actual or estimated maximum number
|
||||
of iterations is non-negative. */
|
||||
if (nonneg)
|
||||
{
|
||||
REG_NOTES (jump_insn)
|
||||
= gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* Handle the more complex case, where the bounds are not known at
|
||||
compile time. In this case we generate a run_time calculation of
|
||||
the number of iterations. We rely on the existence of a run-time
|
||||
guard to ensure that the loop executes at least once, i.e.,
|
||||
initial_value obeys the loop comparison condition. If a guard is
|
||||
not present, we emit one. The loop to modify is described by LOOP.
|
||||
ITERATIONS_MAX is a CONST_INT specifying the estimated maximum
|
||||
number of loop iterations. DOLOOP_INSN is the low-overhead looping
|
||||
insn to insert. Returns nonzero if loop successfully modified. */
|
||||
static int
|
||||
doloop_modify_runtime (const struct loop *loop, rtx iterations_max,
|
||||
rtx doloop_seq, rtx start_label,
|
||||
enum machine_mode mode, rtx condition)
|
||||
{
|
||||
const struct loop_info *loop_info = LOOP_INFO (loop);
|
||||
HOST_WIDE_INT abs_inc;
|
||||
HOST_WIDE_INT abs_loop_inc;
|
||||
int neg_inc;
|
||||
rtx diff;
|
||||
rtx sequence;
|
||||
rtx iterations;
|
||||
rtx initial_value;
|
||||
rtx final_value;
|
||||
rtx increment;
|
||||
int unsigned_p;
|
||||
enum rtx_code comparison_code;
|
||||
|
||||
increment = loop_info->increment;
|
||||
initial_value = loop_info->initial_value;
|
||||
final_value = loop_info->final_value;
|
||||
|
||||
neg_inc = 0;
|
||||
abs_inc = INTVAL (increment);
|
||||
if (abs_inc < 0)
|
||||
{
|
||||
abs_inc = -abs_inc;
|
||||
neg_inc = 1;
|
||||
}
|
||||
|
||||
comparison_code = loop_info->comparison_code;
|
||||
unsigned_p = (comparison_code == LTU
|
||||
|| comparison_code == LEU
|
||||
|| comparison_code == GTU
|
||||
|| comparison_code == GEU
|
||||
|| comparison_code == NE);
|
||||
|
||||
/* The number of iterations (prior to any loop unrolling) is given by:
|
||||
|
||||
n = (abs (final - initial) + abs_inc - 1) / abs_inc.
|
||||
|
||||
However, it is possible for the summation to overflow, and a
|
||||
safer method is:
|
||||
|
||||
n = abs (final - initial) / abs_inc;
|
||||
n += (abs (final - initial) % abs_inc) != 0;
|
||||
|
||||
But when abs_inc is a power of two, the summation won't overflow
|
||||
except in cases where the loop never terminates. So we don't
|
||||
need to use this more costly calculation.
|
||||
|
||||
If the loop has been unrolled, the full calculation is
|
||||
|
||||
t1 = abs_inc * unroll_number; increment per loop
|
||||
n = (abs (final - initial) + abs_inc - 1) / t1; full loops
|
||||
n += (abs (final - initial) + abs_inc - 1) % t1) >= abs_inc;
|
||||
partial loop
|
||||
which works out to be equivalent to
|
||||
|
||||
n = (abs (final - initial) + t1 - 1) / t1;
|
||||
|
||||
In the case where the loop was preconditioned, a few iterations
|
||||
may have been executed earlier; but 'initial' was adjusted as they
|
||||
were executed, so we don't need anything special for that case here.
|
||||
As above, when t1 is a power of two we don't need to worry about
|
||||
overflow.
|
||||
|
||||
The division and modulo operations can be avoided by requiring
|
||||
that the increment is a power of 2 (precondition_loop_p enforces
|
||||
this requirement). Nevertheless, the RTX_COSTS should be checked
|
||||
to see if a fast divmod is available. */
|
||||
|
||||
start_sequence ();
|
||||
/* abs (final - initial) */
|
||||
diff = expand_simple_binop (mode, MINUS,
|
||||
copy_rtx (neg_inc ? initial_value : final_value),
|
||||
copy_rtx (neg_inc ? final_value : initial_value),
|
||||
NULL_RTX, unsigned_p, OPTAB_LIB_WIDEN);
|
||||
|
||||
/* Some code transformations can result in code akin to
|
||||
|
||||
tmp = i + 1;
|
||||
...
|
||||
goto scan_start;
|
||||
top:
|
||||
tmp = tmp + 1;
|
||||
scan_start:
|
||||
i = tmp;
|
||||
if (i < n) goto top;
|
||||
|
||||
We'll have already detected this form of loop in scan_loop,
|
||||
and set loop->top and loop->scan_start appropriately.
|
||||
|
||||
In this situation, we skip the increment the first time through
|
||||
the loop, which results in an incorrect estimate of the number
|
||||
of iterations. Adjust the difference to compensate. */
|
||||
/* ??? Logically, it would seem this belongs in loop_iterations.
|
||||
However, this causes regressions e.g. on x86 execute/20011008-3.c,
|
||||
so I do not believe we've properly characterized the exact nature
|
||||
of the problem. In the meantime, this fixes execute/20011126-2.c
|
||||
on ia64 and some Ada front end miscompilation on ppc. */
|
||||
|
||||
if (loop->scan_start)
|
||||
{
|
||||
rtx iteration_var = loop_info->iteration_var;
|
||||
struct loop_ivs *ivs = LOOP_IVS (loop);
|
||||
struct iv_class *bl;
|
||||
|
||||
if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
|
||||
bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
|
||||
else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
|
||||
{
|
||||
struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
|
||||
bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
|
||||
}
|
||||
else
|
||||
/* Iteration var must be an induction variable to get here. */
|
||||
abort ();
|
||||
|
||||
if (INSN_UID (bl->biv->insn) < max_uid_for_loop
|
||||
&& INSN_LUID (bl->biv->insn) < INSN_LUID (loop->scan_start))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Basic induction var skips initial incr.\n");
|
||||
|
||||
diff = expand_simple_binop (mode, PLUS, diff, GEN_INT (abs_inc),
|
||||
diff, unsigned_p, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
}
|
||||
|
||||
abs_loop_inc = abs_inc * loop_info->unroll_number;
|
||||
if (abs_loop_inc != 1)
|
||||
{
|
||||
int shift_count;
|
||||
|
||||
shift_count = exact_log2 (abs_loop_inc);
|
||||
if (shift_count < 0)
|
||||
abort ();
|
||||
|
||||
/* (abs (final - initial) + abs_inc * unroll_number - 1) */
|
||||
diff = expand_simple_binop (GET_MODE (diff), PLUS,
|
||||
diff, GEN_INT (abs_loop_inc - 1),
|
||||
diff, 1, OPTAB_LIB_WIDEN);
|
||||
|
||||
/* (abs (final - initial) + abs_inc * unroll_number - 1)
|
||||
/ (abs_inc * unroll_number) */
|
||||
diff = expand_simple_binop (GET_MODE (diff), LSHIFTRT,
|
||||
diff, GEN_INT (shift_count),
|
||||
diff, 1, OPTAB_LIB_WIDEN);
|
||||
}
|
||||
iterations = diff;
|
||||
|
||||
/* If there is a NOTE_INSN_LOOP_VTOP, we have a `for' or `while'
|
||||
style loop, with a loop exit test at the start. Thus, we can
|
||||
assume that the loop condition was true when the loop was
|
||||
entered.
|
||||
|
||||
`do-while' loops require special treatment since the exit test is
|
||||
not executed before the start of the loop. We need to determine
|
||||
if the loop will terminate after the first pass and to limit the
|
||||
iteration count to one if necessary. */
|
||||
if (! loop->vtop)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream, "Doloop: Do-while loop.\n");
|
||||
|
||||
/* A `do-while' loop must iterate at least once. For code like
|
||||
i = initial; do { ... } while (++i < final);
|
||||
we will calculate a bogus iteration count if initial > final.
|
||||
So detect this and set the iteration count to 1.
|
||||
Note that if the loop has been unrolled, then the loop body
|
||||
is guaranteed to execute at least once. Also, when the
|
||||
comparison is NE, our calculated count will be OK. */
|
||||
if (loop_info->unroll_number == 1 && comparison_code != NE)
|
||||
{
|
||||
rtx label;
|
||||
|
||||
/* Emit insns to test if the loop will immediately
|
||||
terminate and to set the iteration count to 1 if true. */
|
||||
label = gen_label_rtx();
|
||||
emit_cmp_and_jump_insns (copy_rtx (initial_value),
|
||||
copy_rtx (loop_info->comparison_value),
|
||||
comparison_code, NULL_RTX, mode, 0,
|
||||
label);
|
||||
JUMP_LABEL (get_last_insn ()) = label;
|
||||
LABEL_NUSES (label)++;
|
||||
emit_move_insn (iterations, const1_rtx);
|
||||
emit_label (label);
|
||||
}
|
||||
}
|
||||
|
||||
sequence = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn_before (sequence, loop->start);
|
||||
|
||||
return doloop_modify (loop, iterations, iterations_max, doloop_seq,
|
||||
start_label, condition);
|
||||
}
|
||||
|
||||
|
||||
/* This is the main entry point. Process loop described by LOOP
|
||||
validating that the loop is suitable for conversion to use a low
|
||||
overhead looping instruction, replacing the jump insn where
|
||||
suitable. We distinguish between loops with compile-time bounds
|
||||
and those with run-time bounds. Information from LOOP is used to
|
||||
compute the number of iterations and to determine whether the loop
|
||||
is a candidate for this optimization. Returns nonzero if loop
|
||||
successfully modified. */
|
||||
int
|
||||
doloop_optimize (const struct loop *loop)
|
||||
{
|
||||
struct loop_info *loop_info = LOOP_INFO (loop);
|
||||
rtx initial_value;
|
||||
rtx final_value;
|
||||
rtx increment;
|
||||
rtx jump_insn;
|
||||
enum machine_mode mode;
|
||||
unsigned HOST_WIDE_INT n_iterations;
|
||||
unsigned HOST_WIDE_INT n_iterations_max;
|
||||
rtx doloop_seq, doloop_pat, doloop_reg;
|
||||
rtx iterations;
|
||||
rtx iterations_max;
|
||||
rtx start_label;
|
||||
rtx condition;
|
||||
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Processing loop %d, enclosed levels %d.\n",
|
||||
loop->num, loop->level);
|
||||
|
||||
jump_insn = prev_nonnote_insn (loop->end);
|
||||
|
||||
/* Check that loop is a candidate for a low-overhead looping insn. */
|
||||
if (! doloop_valid_p (loop, jump_insn))
|
||||
return 0;
|
||||
|
||||
/* Determine if the loop can be safely, and profitably,
|
||||
preconditioned. While we don't precondition the loop in a loop
|
||||
unrolling sense, this test ensures that the loop is well behaved
|
||||
and that the increment is a constant integer. */
|
||||
if (! precondition_loop_p (loop, &initial_value, &final_value,
|
||||
&increment, &mode))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Cannot precondition loop.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Determine or estimate the maximum number of loop iterations. */
|
||||
n_iterations = loop_info->n_iterations;
|
||||
if (n_iterations)
|
||||
{
|
||||
/* This is the simple case where the initial and final loop
|
||||
values are constants. */
|
||||
n_iterations_max = n_iterations;
|
||||
}
|
||||
else
|
||||
{
|
||||
int nonneg = find_reg_note (jump_insn, REG_NONNEG, 0) != 0;
|
||||
|
||||
/* This is the harder case where the initial and final loop
|
||||
values may not be constants. */
|
||||
n_iterations_max = doloop_iterations_max (loop_info, mode, nonneg);
|
||||
|
||||
if (! n_iterations_max)
|
||||
{
|
||||
/* We have something like `for (i = 0; i < 10; i--)'. */
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Not normal loop.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Account for loop unrolling in the iteration count. This will
|
||||
have no effect if loop_iterations could not determine the number
|
||||
of iterations. */
|
||||
n_iterations /= loop_info->unroll_number;
|
||||
n_iterations_max /= loop_info->unroll_number;
|
||||
|
||||
if (n_iterations && n_iterations < 3)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Too few iterations (%ld) to be profitable.\n",
|
||||
(long int) n_iterations);
|
||||
return 0;
|
||||
}
|
||||
|
||||
iterations = GEN_INT (n_iterations);
|
||||
iterations_max = GEN_INT (n_iterations_max);
|
||||
|
||||
/* Generate looping insn. If the pattern FAILs then give up trying
|
||||
to modify the loop since there is some aspect the back-end does
|
||||
not like. */
|
||||
start_label = gen_label_rtx ();
|
||||
doloop_reg = gen_reg_rtx (mode);
|
||||
doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
|
||||
GEN_INT (loop->level), start_label);
|
||||
if (! doloop_seq && mode != word_mode)
|
||||
{
|
||||
PUT_MODE (doloop_reg, word_mode);
|
||||
doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
|
||||
GEN_INT (loop->level), start_label);
|
||||
}
|
||||
if (! doloop_seq)
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Target unwilling to use doloop pattern!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If multiple instructions were created, the last must be the
|
||||
jump instruction. Also, a raw define_insn may yield a plain
|
||||
pattern. */
|
||||
doloop_pat = doloop_seq;
|
||||
if (INSN_P (doloop_pat))
|
||||
{
|
||||
while (NEXT_INSN (doloop_pat) != NULL_RTX)
|
||||
doloop_pat = NEXT_INSN (doloop_pat);
|
||||
if (GET_CODE (doloop_pat) == JUMP_INSN)
|
||||
doloop_pat = PATTERN (doloop_pat);
|
||||
else
|
||||
doloop_pat = NULL_RTX;
|
||||
}
|
||||
|
||||
if (! doloop_pat
|
||||
|| ! (condition = doloop_condition_get (doloop_pat)))
|
||||
{
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream,
|
||||
"Doloop: Unrecognizable doloop pattern!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (n_iterations != 0)
|
||||
/* Handle the simpler case, where we know the iteration count at
|
||||
compile time. */
|
||||
return doloop_modify (loop, iterations, iterations_max, doloop_seq,
|
||||
start_label, condition);
|
||||
else
|
||||
/* Handle the harder case, where we must add additional runtime tests. */
|
||||
return doloop_modify_runtime (loop, iterations_max, doloop_seq,
|
||||
start_label, mode, condition);
|
||||
}
|
||||
|
||||
#endif /* HAVE_doloop_end */
|
553
gcc/loop-doloop.c
Normal file
553
gcc/loop-doloop.c
Normal file
|
@ -0,0 +1,553 @@
|
|||
/* Perform doloop optimizations
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 2, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to the Free
|
||||
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
||||
02111-1307, USA. */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "rtl.h"
|
||||
#include "flags.h"
|
||||
#include "expr.h"
|
||||
#include "hard-reg-set.h"
|
||||
#include "basic-block.h"
|
||||
#include "toplev.h"
|
||||
#include "tm_p.h"
|
||||
#include "cfgloop.h"
|
||||
#include "output.h"
|
||||
#include "params.h"
|
||||
|
||||
/* This module is used to modify loops with a determinable number of
|
||||
iterations to use special low-overhead looping instructions.
|
||||
|
||||
It first validates whether the loop is well behaved and has a
|
||||
determinable number of iterations (either at compile or run-time).
|
||||
It then modifies the loop to use a low-overhead looping pattern as
|
||||
follows:
|
||||
|
||||
1. A pseudo register is allocated as the loop iteration counter.
|
||||
|
||||
2. The number of loop iterations is calculated and is stored
|
||||
in the loop counter.
|
||||
|
||||
3. At the end of the loop, the jump insn is replaced by the
|
||||
doloop_end pattern. The compare must remain because it might be
|
||||
used elsewhere. If the loop-variable or condition register are
|
||||
used elsewhere, they will be eliminated by flow.
|
||||
|
||||
4. An optional doloop_begin pattern is inserted at the top of the
|
||||
loop.
|
||||
|
||||
TODO The optimization should only performed when either the biv used for exit
|
||||
condition is unused at all except for the exit test, or if we do not have to
|
||||
change its value, since otherwise we have to add a new induction variable,
|
||||
which usually will not pay up (unless the cost of the doloop pattern is
|
||||
somehow extremely lower than the cost of compare & jump, or unless the bct
|
||||
register cannot be used for anything else but doloop -- ??? detect these
|
||||
cases). */
|
||||
|
||||
#ifdef HAVE_doloop_end
|
||||
|
||||
/* Return the loop termination condition for PATTERN or zero
|
||||
if it is not a decrement and branch jump insn. */
|
||||
|
||||
static rtx
|
||||
doloop_condition_get (rtx pattern)
|
||||
{
|
||||
rtx cmp;
|
||||
rtx inc;
|
||||
rtx reg;
|
||||
rtx condition;
|
||||
|
||||
/* The canonical doloop pattern we expect is:
|
||||
|
||||
(parallel [(set (pc) (if_then_else (condition)
|
||||
(label_ref (label))
|
||||
(pc)))
|
||||
(set (reg) (plus (reg) (const_int -1)))
|
||||
(additional clobbers and uses)])
|
||||
|
||||
Some machines (IA-64) make the decrement conditional on
|
||||
the condition as well, so we don't bother verifying the
|
||||
actual decrement. In summary, the branch must be the
|
||||
first entry of the parallel (also required by jump.c),
|
||||
and the second entry of the parallel must be a set of
|
||||
the loop counter register. */
|
||||
|
||||
if (GET_CODE (pattern) != PARALLEL)
|
||||
return 0;
|
||||
|
||||
cmp = XVECEXP (pattern, 0, 0);
|
||||
inc = XVECEXP (pattern, 0, 1);
|
||||
|
||||
/* Check for (set (reg) (something)). */
|
||||
if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
|
||||
return 0;
|
||||
|
||||
/* Extract loop counter register. */
|
||||
reg = SET_DEST (inc);
|
||||
|
||||
/* Check for (set (pc) (if_then_else (condition)
|
||||
(label_ref (label))
|
||||
(pc))). */
|
||||
if (GET_CODE (cmp) != SET
|
||||
|| SET_DEST (cmp) != pc_rtx
|
||||
|| GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
|
||||
|| GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
|
||||
|| XEXP (SET_SRC (cmp), 2) != pc_rtx)
|
||||
return 0;
|
||||
|
||||
/* Extract loop termination condition. */
|
||||
condition = XEXP (SET_SRC (cmp), 0);
|
||||
|
||||
if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
|
||||
|| GET_CODE (XEXP (condition, 1)) != CONST_INT)
|
||||
return 0;
|
||||
|
||||
if (XEXP (condition, 0) == reg)
|
||||
return condition;
|
||||
|
||||
if (GET_CODE (XEXP (condition, 0)) == PLUS
|
||||
&& XEXP (XEXP (condition, 0), 0) == reg)
|
||||
return condition;
|
||||
|
||||
/* ??? If a machine uses a funny comparison, we could return a
|
||||
canonicalised form here. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return nonzero if the loop specified by LOOP is suitable for
|
||||
the use of special low-overhead looping instructions. DESC
|
||||
describes the number of iterations of the loop. */
|
||||
|
||||
static bool
|
||||
doloop_valid_p (struct loop *loop, struct niter_desc *desc)
|
||||
{
|
||||
basic_block *body = get_loop_body (loop), bb;
|
||||
rtx insn;
|
||||
unsigned i;
|
||||
|
||||
/* Check for loops that may not terminate under special conditions. */
|
||||
if (!desc->simple_p
|
||||
|| desc->assumptions
|
||||
|| desc->infinite)
|
||||
{
|
||||
/* There are some cases that would require a special attention.
|
||||
For example if the comparison is LEU and the comparison value
|
||||
is UINT_MAX then the loop will not terminate. Similarly, if the
|
||||
comparison code is GEU and the comparison value is 0, the
|
||||
loop will not terminate.
|
||||
|
||||
If the absolute increment is not 1, the loop can be infinite
|
||||
even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
|
||||
|
||||
??? We could compute these conditions at run-time and have a
|
||||
additional jump around the loop to ensure an infinite loop.
|
||||
However, it is very unlikely that this is the intended
|
||||
behavior of the loop and checking for these rare boundary
|
||||
conditions would pessimize all other code.
|
||||
|
||||
If the loop is executed only a few times an extra check to
|
||||
restart the loop could use up most of the benefits of using a
|
||||
count register loop. Note however, that normally, this
|
||||
restart branch would never execute, so it could be predicted
|
||||
well by the CPU. We should generate the pessimistic code by
|
||||
default, and have an option, e.g. -funsafe-loops that would
|
||||
enable count-register loops in this case. */
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
bb = body[i];
|
||||
|
||||
for (insn = BB_HEAD (bb);
|
||||
insn != NEXT_INSN (BB_END (bb));
|
||||
insn = NEXT_INSN (insn))
|
||||
{
|
||||
/* A called function may clobber any special registers required for
|
||||
low-overhead looping. */
|
||||
if (GET_CODE (insn) == CALL_INSN)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Doloop: Function call in loop.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Some targets (eg, PPC) use the count register for branch on table
|
||||
instructions. ??? This should be a target specific check. */
|
||||
if (GET_CODE (insn) == JUMP_INSN
|
||||
&& (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
|
||||
|| GET_CODE (PATTERN (insn)) == ADDR_VEC))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Doloop: Computed branch in the loop.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
free (body);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Adds test of COND jumping to DEST to the end of BB. */
|
||||
|
||||
static void
|
||||
add_test (rtx cond, basic_block bb, basic_block dest)
|
||||
{
|
||||
rtx seq, jump, label;
|
||||
enum machine_mode mode;
|
||||
rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
|
||||
enum rtx_code code = GET_CODE (cond);
|
||||
|
||||
mode = GET_MODE (XEXP (cond, 0));
|
||||
if (mode == VOIDmode)
|
||||
mode = GET_MODE (XEXP (cond, 1));
|
||||
|
||||
start_sequence ();
|
||||
op0 = force_operand (op0, NULL_RTX);
|
||||
op1 = force_operand (op1, NULL_RTX);
|
||||
label = block_label (dest);
|
||||
do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label);
|
||||
|
||||
jump = get_last_insn ();
|
||||
JUMP_LABEL (jump) = label;
|
||||
|
||||
/* The jump is supposed to handle an unlikely special case. */
|
||||
REG_NOTES (jump)
|
||||
= gen_rtx_EXPR_LIST (REG_BR_PROB,
|
||||
GEN_INT (0), REG_NOTES (jump));
|
||||
|
||||
LABEL_NUSES (label)++;
|
||||
|
||||
seq = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn_after (seq, BB_END (bb));
|
||||
}
|
||||
|
||||
/* Modify the loop to use the low-overhead looping insn where LOOP
|
||||
describes the loop, DESC describes the number of iterations of the
|
||||
loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
|
||||
end of the loop. CONDITION is the condition separated from the
|
||||
DOLOOP_SEQ. */
|
||||
|
||||
static void
|
||||
doloop_modify (struct loop *loop, struct niter_desc *desc,
|
||||
rtx doloop_seq, rtx condition)
|
||||
{
|
||||
rtx counter_reg;
|
||||
rtx count, tmp, noloop = NULL_RTX;
|
||||
rtx sequence;
|
||||
rtx jump_insn;
|
||||
rtx jump_label;
|
||||
int nonneg = 0, irr;
|
||||
bool increment_count;
|
||||
basic_block loop_end = desc->out_edge->src;
|
||||
|
||||
jump_insn = BB_END (loop_end);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Doloop: Inserting doloop pattern (");
|
||||
if (desc->const_iter)
|
||||
fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
|
||||
else
|
||||
fputs ("runtime", dump_file);
|
||||
fputs (" iterations).\n", dump_file);
|
||||
}
|
||||
|
||||
/* Discard original jump to continue loop. The original compare
|
||||
result may still be live, so it cannot be discarded explicitly. */
|
||||
delete_insn (jump_insn);
|
||||
|
||||
counter_reg = XEXP (condition, 0);
|
||||
if (GET_CODE (counter_reg) == PLUS)
|
||||
counter_reg = XEXP (counter_reg, 0);
|
||||
|
||||
count = desc->niter_expr;
|
||||
increment_count = false;
|
||||
switch (GET_CODE (condition))
|
||||
{
|
||||
case NE:
|
||||
/* Currently only NE tests against zero and one are supported. */
|
||||
if (XEXP (condition, 1) == const1_rtx)
|
||||
{
|
||||
increment_count = true;
|
||||
noloop = const1_rtx;
|
||||
}
|
||||
else if (XEXP (condition, 1) == const0_rtx)
|
||||
noloop = const0_rtx;
|
||||
else
|
||||
abort ();
|
||||
break;
|
||||
|
||||
case GE:
|
||||
/* Currently only GE tests against zero are supported. */
|
||||
if (XEXP (condition, 1) != const0_rtx)
|
||||
abort ();
|
||||
|
||||
noloop = constm1_rtx;
|
||||
|
||||
/* The iteration count does not need incrementing for a GE test. */
|
||||
increment_count = false;
|
||||
|
||||
/* Determine if the iteration counter will be non-negative.
|
||||
Note that the maximum value loaded is iterations_max - 1. */
|
||||
if (desc->niter_max
|
||||
<= ((unsigned HOST_WIDEST_INT) 1
|
||||
<< (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
|
||||
nonneg = 1;
|
||||
break;
|
||||
|
||||
/* Abort if an invalid doloop pattern has been generated. */
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (increment_count)
|
||||
count = simplify_gen_binary (PLUS, desc->mode, count, const1_rtx);
|
||||
|
||||
/* Insert initialization of the count register into the loop header. */
|
||||
start_sequence ();
|
||||
tmp = force_operand (count, counter_reg);
|
||||
convert_move (counter_reg, tmp, 1);
|
||||
sequence = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
|
||||
|
||||
if (desc->noloop_assumptions)
|
||||
{
|
||||
rtx ass = desc->noloop_assumptions;
|
||||
basic_block preheader = loop_preheader_edge (loop)->src;
|
||||
basic_block set_zero
|
||||
= loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
|
||||
basic_block new_preheader
|
||||
= loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
|
||||
basic_block bb;
|
||||
edge te;
|
||||
gcov_type cnt;
|
||||
|
||||
/* Expand the condition testing the assumptions and if it does not pass,
|
||||
reset the count register to 0. */
|
||||
add_test (XEXP (ass, 0), preheader, set_zero);
|
||||
preheader->succ->flags &= ~EDGE_FALLTHRU;
|
||||
cnt = preheader->succ->count;
|
||||
preheader->succ->probability = 0;
|
||||
preheader->succ->count = 0;
|
||||
irr = preheader->succ->flags & EDGE_IRREDUCIBLE_LOOP;
|
||||
te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr);
|
||||
te->probability = REG_BR_PROB_BASE;
|
||||
te->count = cnt;
|
||||
set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
|
||||
|
||||
set_zero->count = 0;
|
||||
set_zero->frequency = 0;
|
||||
|
||||
for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1))
|
||||
{
|
||||
bb = loop_split_edge_with (te, NULL_RTX);
|
||||
te = bb->succ;
|
||||
add_test (XEXP (ass, 0), bb, set_zero);
|
||||
make_edge (bb, set_zero, irr);
|
||||
}
|
||||
|
||||
start_sequence ();
|
||||
convert_move (counter_reg, noloop, 0);
|
||||
sequence = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn_after (sequence, BB_END (set_zero));
|
||||
}
|
||||
|
||||
/* Some targets (eg, C4x) need to initialize special looping
|
||||
registers. */
|
||||
#ifdef HAVE_doloop_begin
|
||||
{
|
||||
rtx init;
|
||||
unsigned level = get_loop_level (loop) + 1;
|
||||
init = gen_doloop_begin (counter_reg,
|
||||
desc->const_iter ? desc->niter_expr : const0_rtx,
|
||||
desc->niter_max,
|
||||
GEN_INT (level));
|
||||
if (init)
|
||||
{
|
||||
start_sequence ();
|
||||
emit_insn (init);
|
||||
sequence = get_insns ();
|
||||
end_sequence ();
|
||||
emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Insert the new low-overhead looping insn. */
|
||||
emit_jump_insn_after (doloop_seq, BB_END (loop_end));
|
||||
jump_insn = BB_END (loop_end);
|
||||
jump_label = block_label (desc->in_edge->dest);
|
||||
JUMP_LABEL (jump_insn) = jump_label;
|
||||
LABEL_NUSES (jump_label)++;
|
||||
|
||||
/* Ensure the right fallthru edge is marked, for case we have reversed
|
||||
the condition. */
|
||||
desc->in_edge->flags &= ~EDGE_FALLTHRU;
|
||||
desc->out_edge->flags |= EDGE_FALLTHRU;
|
||||
|
||||
/* Add a REG_NONNEG note if the actual or estimated maximum number
|
||||
of iterations is non-negative. */
|
||||
if (nonneg)
|
||||
{
|
||||
REG_NOTES (jump_insn)
|
||||
= gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
|
||||
}
|
||||
}
|
||||
|
||||
/* Process loop described by LOOP validating that the loop is suitable for
|
||||
conversion to use a low overhead looping instruction, replacing the jump
|
||||
insn where suitable. Returns true if the loop was successfully
|
||||
modified. */
|
||||
|
||||
static bool
|
||||
doloop_optimize (struct loop *loop)
|
||||
{
|
||||
enum machine_mode mode;
|
||||
rtx doloop_seq, doloop_pat, doloop_reg;
|
||||
rtx iterations;
|
||||
rtx iterations_max;
|
||||
rtx start_label;
|
||||
rtx condition;
|
||||
unsigned level, est_niter;
|
||||
struct niter_desc *desc;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
|
||||
|
||||
iv_analysis_loop_init (loop);
|
||||
|
||||
/* Find the simple exit of a LOOP. */
|
||||
desc = get_simple_loop_desc (loop);
|
||||
|
||||
/* Check that loop is a candidate for a low-overhead looping insn. */
|
||||
if (!doloop_valid_p (loop, desc))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
"Doloop: The loop is not suitable.\n");
|
||||
return false;
|
||||
}
|
||||
mode = desc->mode;
|
||||
|
||||
est_niter = 3;
|
||||
if (desc->const_iter)
|
||||
est_niter = desc->niter;
|
||||
/* If the estimate on number of iterations is reliable (comes from profile
|
||||
feedback), use it. Do not use it normally, since the expected number
|
||||
of iterations of an unrolled loop is 2. */
|
||||
if (loop->header->count)
|
||||
est_niter = expected_loop_iterations (loop);
|
||||
|
||||
if (est_niter < 3)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
"Doloop: Too few iterations (%u) to be profitable.\n",
|
||||
est_niter);
|
||||
return false;
|
||||
}
|
||||
|
||||
iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
|
||||
iterations_max = GEN_INT (desc->niter_max);
|
||||
level = get_loop_level (loop) + 1;
|
||||
|
||||
/* Generate looping insn. If the pattern FAILs then give up trying
|
||||
to modify the loop since there is some aspect the back-end does
|
||||
not like. */
|
||||
start_label = block_label (desc->in_edge->dest);
|
||||
doloop_reg = gen_reg_rtx (mode);
|
||||
doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
|
||||
GEN_INT (level), start_label);
|
||||
if (! doloop_seq && mode != word_mode)
|
||||
{
|
||||
PUT_MODE (doloop_reg, word_mode);
|
||||
doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
|
||||
GEN_INT (level), start_label);
|
||||
}
|
||||
if (! doloop_seq)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
"Doloop: Target unwilling to use doloop pattern!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If multiple instructions were created, the last must be the
|
||||
jump instruction. Also, a raw define_insn may yield a plain
|
||||
pattern. */
|
||||
doloop_pat = doloop_seq;
|
||||
if (INSN_P (doloop_pat))
|
||||
{
|
||||
while (NEXT_INSN (doloop_pat) != NULL_RTX)
|
||||
doloop_pat = NEXT_INSN (doloop_pat);
|
||||
if (GET_CODE (doloop_pat) == JUMP_INSN)
|
||||
doloop_pat = PATTERN (doloop_pat);
|
||||
else
|
||||
doloop_pat = NULL_RTX;
|
||||
}
|
||||
|
||||
if (! doloop_pat
|
||||
|| ! (condition = doloop_condition_get (doloop_pat)))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
doloop_modify (loop, desc, doloop_seq, condition);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* This is the main entry point. Process all LOOPS using doloop_optimize. */
|
||||
|
||||
void
|
||||
doloop_optimize_loops (struct loops *loops)
|
||||
{
|
||||
unsigned i;
|
||||
struct loop *loop;
|
||||
|
||||
for (i = 1; i < loops->num; i++)
|
||||
{
|
||||
loop = loops->parray[i];
|
||||
if (!loop)
|
||||
continue;
|
||||
|
||||
doloop_optimize (loop);
|
||||
}
|
||||
|
||||
iv_analysis_done ();
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
verify_dominators (CDI_DOMINATORS);
|
||||
verify_loop_structure (loops);
|
||||
#endif
|
||||
}
|
||||
#endif /* HAVE_doloop_end */
|
||||
|
|
@ -2281,6 +2281,10 @@ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition,
|
|||
simplify_using_initial_values (loop, IOR, &desc->infinite);
|
||||
simplify_using_initial_values (loop, NIL, &desc->niter_expr);
|
||||
|
||||
if (desc->noloop_assumptions
|
||||
&& XEXP (desc->noloop_assumptions, 0) == const_true_rtx)
|
||||
goto zero_iter;
|
||||
|
||||
if (GET_CODE (desc->niter_expr) == CONST_INT)
|
||||
{
|
||||
unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr);
|
||||
|
|
17
gcc/loop.c
17
gcc/loop.c
|
@ -5352,23 +5352,6 @@ strength_reduce (struct loop *loop, int flags)
|
|||
&& unrolled_insn_copies <= insn_count))
|
||||
unroll_loop (loop, insn_count, 1);
|
||||
|
||||
#ifdef HAVE_doloop_end
|
||||
if (HAVE_doloop_end && (flags & LOOP_BCT) && flag_branch_on_count_reg)
|
||||
doloop_optimize (loop);
|
||||
#endif /* HAVE_doloop_end */
|
||||
|
||||
/* In case number of iterations is known, drop branch prediction note
|
||||
in the branch. Do that only in second loop pass, as loop unrolling
|
||||
may change the number of iterations performed. */
|
||||
if (flags & LOOP_BCT)
|
||||
{
|
||||
unsigned HOST_WIDE_INT n
|
||||
= loop_info->n_iterations / loop_info->unroll_number;
|
||||
if (n > 1)
|
||||
predict_insn (prev_nonnote_insn (loop->end), PRED_LOOP_ITERATIONS,
|
||||
REG_BR_PROB_BASE - REG_BR_PROB_BASE / n);
|
||||
}
|
||||
|
||||
if (loop_dump_stream)
|
||||
fprintf (loop_dump_stream, "\n");
|
||||
|
||||
|
|
|
@ -26,9 +26,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
|
||||
/* Flags passed to loop_optimize. */
|
||||
#define LOOP_UNROLL 1
|
||||
#define LOOP_BCT 2
|
||||
#define LOOP_PREFETCH 4
|
||||
#define LOOP_AUTO_UNROLL 8
|
||||
#define LOOP_PREFETCH 2
|
||||
#define LOOP_AUTO_UNROLL 4
|
||||
|
||||
/* Get the loop info pointer of a loop. */
|
||||
#define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux)
|
||||
|
|
18
gcc/passes.c
18
gcc/passes.c
|
@ -1456,7 +1456,7 @@ rest_of_handle_loop_optimize (tree decl, rtx insns)
|
|||
reg_scan (insns, max_reg_num (), 1);
|
||||
}
|
||||
cleanup_barriers ();
|
||||
loop_optimize (insns, dump_file, do_unroll | LOOP_BCT | do_prefetch);
|
||||
loop_optimize (insns, dump_file, do_unroll | do_prefetch);
|
||||
|
||||
/* Loop can create trivially dead instructions. */
|
||||
delete_trivially_dead_insns (insns, max_reg_num ());
|
||||
|
@ -1476,6 +1476,12 @@ rest_of_handle_loop2 (tree decl, rtx insns)
|
|||
struct loops *loops;
|
||||
basic_block bb;
|
||||
|
||||
if (!flag_unswitch_loops
|
||||
&& !flag_peel_loops
|
||||
&& !flag_unroll_loops
|
||||
&& !flag_branch_on_count_reg)
|
||||
return;
|
||||
|
||||
timevar_push (TV_LOOP);
|
||||
open_dump_file (DFI_loop2, decl);
|
||||
if (dump_file)
|
||||
|
@ -1498,6 +1504,11 @@ rest_of_handle_loop2 (tree decl, rtx insns)
|
|||
(flag_unroll_loops ? UAP_UNROLL : 0) |
|
||||
(flag_unroll_all_loops ? UAP_UNROLL_ALL : 0));
|
||||
|
||||
#ifdef HAVE_doloop_end
|
||||
if (flag_branch_on_count_reg && HAVE_doloop_end)
|
||||
doloop_optimize_loops (loops);
|
||||
#endif /* HAVE_doloop_end */
|
||||
|
||||
loop_optimizer_finalize (loops, dump_file);
|
||||
}
|
||||
|
||||
|
@ -1776,10 +1787,7 @@ rest_of_compilation (tree decl)
|
|||
if (flag_tracer)
|
||||
rest_of_handle_tracer (decl, insns);
|
||||
|
||||
if (optimize > 0
|
||||
&& (flag_unswitch_loops
|
||||
|| flag_peel_loops
|
||||
|| flag_unroll_loops))
|
||||
if (optimize > 0)
|
||||
rest_of_handle_loop2 (decl, insns);
|
||||
|
||||
if (flag_web)
|
||||
|
|
Loading…
Add table
Reference in a new issue