
2007-11-15 Paolo Bonzini <bonzini@gnu.org> * fwprop.c (try_fwprop_subst): Skip profitability check for forward propagation into an address; that's done in should_replace_address. From-SVN: r130201
1071 lines
32 KiB
C
1071 lines
32 KiB
C
/* RTL-based forward propagation pass for GNU compiler.
|
||
Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
|
||
Contributed by Paolo Bonzini and Steven Bosscher.
|
||
|
||
This file is part of GCC.
|
||
|
||
GCC is free software; you can redistribute it and/or modify it under
|
||
the terms of the GNU General Public License as published by the Free
|
||
Software Foundation; either version 3, or (at your option) any later
|
||
version.
|
||
|
||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with GCC; see the file COPYING3. If not see
|
||
<http://www.gnu.org/licenses/>. */
|
||
|
||
#include "config.h"
|
||
#include "system.h"
|
||
#include "coretypes.h"
|
||
#include "tm.h"
|
||
#include "toplev.h"
|
||
|
||
#include "timevar.h"
|
||
#include "rtl.h"
|
||
#include "tm_p.h"
|
||
#include "emit-rtl.h"
|
||
#include "insn-config.h"
|
||
#include "recog.h"
|
||
#include "flags.h"
|
||
#include "obstack.h"
|
||
#include "basic-block.h"
|
||
#include "output.h"
|
||
#include "df.h"
|
||
#include "target.h"
|
||
#include "cfgloop.h"
|
||
#include "tree-pass.h"
|
||
|
||
|
||
/* This pass does simple forward propagation and simplification when an
|
||
operand of an insn can only come from a single def. This pass uses
|
||
df.c, so it is global. However, we only do limited analysis of
|
||
available expressions.
|
||
|
||
1) The pass tries to propagate the source of the def into the use,
|
||
and checks if the result is independent of the substituted value.
|
||
For example, the high word of a (zero_extend:DI (reg:SI M)) is always
|
||
zero, independent of the source register.
|
||
|
||
In particular, we propagate constants into the use site. Sometimes
|
||
RTL expansion did not put the constant in the same insn on purpose,
|
||
to satisfy a predicate, and the result will fail to be recognized;
|
||
but this happens rarely and in this case we can still create a
|
||
REG_EQUAL note. For multi-word operations, this
|
||
|
||
(set (subreg:SI (reg:DI 120) 0) (const_int 0))
|
||
(set (subreg:SI (reg:DI 120) 4) (const_int -1))
|
||
(set (subreg:SI (reg:DI 122) 0)
|
||
(ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
|
||
(set (subreg:SI (reg:DI 122) 4)
|
||
(ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
|
||
|
||
can be simplified to the much simpler
|
||
|
||
(set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
|
||
(set (subreg:SI (reg:DI 122) 4) (const_int -1))
|
||
|
||
This particular propagation is also effective at putting together
|
||
complex addressing modes. We are more aggressive inside MEMs, in
|
||
that all definitions are propagated if the use is in a MEM; if the
|
||
result is a valid memory address we check address_cost to decide
|
||
whether the substitution is worthwhile.
|
||
|
||
2) The pass propagates register copies. This is not as effective as
|
||
the copy propagation done by CSE's canon_reg, which works by walking
|
||
the instruction chain, it can help the other transformations.
|
||
|
||
We should consider removing this optimization, and instead reorder the
|
||
RTL passes, because GCSE does this transformation too. With some luck,
|
||
the CSE pass at the end of rest_of_handle_gcse could also go away.
|
||
|
||
3) The pass looks for paradoxical subregs that are actually unnecessary.
|
||
Things like this:
|
||
|
||
(set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
|
||
(set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
|
||
(set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
|
||
(subreg:SI (reg:QI 121) 0)))
|
||
|
||
are very common on machines that can only do word-sized operations.
|
||
For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
|
||
if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
|
||
we can replace the paradoxical subreg with simply (reg:WIDE M). The
|
||
above will simplify this to
|
||
|
||
(set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
|
||
(set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
|
||
(set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
|
||
|
||
where the first two insns are now dead. */
|
||
|
||
|
||
static int num_changes;
|
||
|
||
|
||
/* Do not try to replace constant addresses or addresses of local and
|
||
argument slots. These MEM expressions are made only once and inserted
|
||
in many instructions, as well as being used to control symbol table
|
||
output. It is not safe to clobber them.
|
||
|
||
There are some uncommon cases where the address is already in a register
|
||
for some reason, but we cannot take advantage of that because we have
|
||
no easy way to unshare the MEM. In addition, looking up all stack
|
||
addresses is costly. */
|
||
|
||
static bool
|
||
can_simplify_addr (rtx addr)
|
||
{
|
||
rtx reg;
|
||
|
||
if (CONSTANT_ADDRESS_P (addr))
|
||
return false;
|
||
|
||
if (GET_CODE (addr) == PLUS)
|
||
reg = XEXP (addr, 0);
|
||
else
|
||
reg = addr;
|
||
|
||
return (!REG_P (reg)
|
||
|| (REGNO (reg) != FRAME_POINTER_REGNUM
|
||
&& REGNO (reg) != HARD_FRAME_POINTER_REGNUM
|
||
&& REGNO (reg) != ARG_POINTER_REGNUM));
|
||
}
|
||
|
||
/* Returns a canonical version of X for the address, from the point of view,
|
||
that all multiplications are represented as MULT instead of the multiply
|
||
by a power of 2 being represented as ASHIFT.
|
||
|
||
Every ASHIFT we find has been made by simplify_gen_binary and was not
|
||
there before, so it is not shared. So we can do this in place. */
|
||
|
||
static void
|
||
canonicalize_address (rtx x)
|
||
{
|
||
for (;;)
|
||
switch (GET_CODE (x))
|
||
{
|
||
case ASHIFT:
|
||
if (GET_CODE (XEXP (x, 1)) == CONST_INT
|
||
&& INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
|
||
&& INTVAL (XEXP (x, 1)) >= 0)
|
||
{
|
||
HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
|
||
PUT_CODE (x, MULT);
|
||
XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
|
||
GET_MODE (x));
|
||
}
|
||
|
||
x = XEXP (x, 0);
|
||
break;
|
||
|
||
case PLUS:
|
||
if (GET_CODE (XEXP (x, 0)) == PLUS
|
||
|| GET_CODE (XEXP (x, 0)) == ASHIFT
|
||
|| GET_CODE (XEXP (x, 0)) == CONST)
|
||
canonicalize_address (XEXP (x, 0));
|
||
|
||
x = XEXP (x, 1);
|
||
break;
|
||
|
||
case CONST:
|
||
x = XEXP (x, 0);
|
||
break;
|
||
|
||
default:
|
||
return;
|
||
}
|
||
}
|
||
|
||
/* OLD is a memory address. Return whether it is good to use NEW instead,
|
||
for a memory access in the given MODE. */
|
||
|
||
static bool
|
||
should_replace_address (rtx old, rtx new, enum machine_mode mode)
|
||
{
|
||
int gain;
|
||
|
||
if (rtx_equal_p (old, new) || !memory_address_p (mode, new))
|
||
return false;
|
||
|
||
/* Copy propagation is always ok. */
|
||
if (REG_P (old) && REG_P (new))
|
||
return true;
|
||
|
||
/* Prefer the new address if it is less expensive. */
|
||
gain = address_cost (old, mode) - address_cost (new, mode);
|
||
|
||
/* If the addresses have equivalent cost, prefer the new address
|
||
if it has the highest `rtx_cost'. That has the potential of
|
||
eliminating the most insns without additional costs, and it
|
||
is the same that cse.c used to do. */
|
||
if (gain == 0)
|
||
gain = rtx_cost (new, SET) - rtx_cost (old, SET);
|
||
|
||
return (gain > 0);
|
||
}
|
||
|
||
/* Replace all occurrences of OLD in *PX with NEW and try to simplify the
|
||
resulting expression. Replace *PX with a new RTL expression if an
|
||
occurrence of OLD was found.
|
||
|
||
If CAN_APPEAR is true, we always return true; if it is false, we
|
||
can return false if, for at least one occurrence OLD, we failed to
|
||
collapse the result to a constant. For example, (mult:M (reg:M A)
|
||
(minus:M (reg:M B) (reg:M A))) may collapse to zero if replacing
|
||
(reg:M B) with (reg:M A).
|
||
|
||
CAN_APPEAR is disregarded inside MEMs: in that case, we always return
|
||
true if the simplification is a cheaper and valid memory address.
|
||
|
||
This is only a wrapper around simplify-rtx.c: do not add any pattern
|
||
matching code here. (The sole exception is the handling of LO_SUM, but
|
||
that is because there is no simplify_gen_* function for LO_SUM). */
|
||
|
||
static bool
|
||
propagate_rtx_1 (rtx *px, rtx old, rtx new, bool can_appear)
|
||
{
|
||
rtx x = *px, tem = NULL_RTX, op0, op1, op2;
|
||
enum rtx_code code = GET_CODE (x);
|
||
enum machine_mode mode = GET_MODE (x);
|
||
enum machine_mode op_mode;
|
||
bool valid_ops = true;
|
||
|
||
/* If X is OLD_RTX, return NEW_RTX. Otherwise, if this is an expression,
|
||
try to build a new expression from recursive substitution. */
|
||
|
||
if (x == old)
|
||
{
|
||
*px = new;
|
||
return can_appear;
|
||
}
|
||
|
||
switch (GET_RTX_CLASS (code))
|
||
{
|
||
case RTX_UNARY:
|
||
op0 = XEXP (x, 0);
|
||
op_mode = GET_MODE (op0);
|
||
valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
|
||
if (op0 == XEXP (x, 0))
|
||
return true;
|
||
tem = simplify_gen_unary (code, mode, op0, op_mode);
|
||
break;
|
||
|
||
case RTX_BIN_ARITH:
|
||
case RTX_COMM_ARITH:
|
||
op0 = XEXP (x, 0);
|
||
op1 = XEXP (x, 1);
|
||
valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
|
||
valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear);
|
||
if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
|
||
return true;
|
||
tem = simplify_gen_binary (code, mode, op0, op1);
|
||
break;
|
||
|
||
case RTX_COMPARE:
|
||
case RTX_COMM_COMPARE:
|
||
op0 = XEXP (x, 0);
|
||
op1 = XEXP (x, 1);
|
||
op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
|
||
valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
|
||
valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear);
|
||
if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
|
||
return true;
|
||
tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
|
||
break;
|
||
|
||
case RTX_TERNARY:
|
||
case RTX_BITFIELD_OPS:
|
||
op0 = XEXP (x, 0);
|
||
op1 = XEXP (x, 1);
|
||
op2 = XEXP (x, 2);
|
||
op_mode = GET_MODE (op0);
|
||
valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
|
||
valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear);
|
||
valid_ops &= propagate_rtx_1 (&op2, old, new, can_appear);
|
||
if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
|
||
return true;
|
||
if (op_mode == VOIDmode)
|
||
op_mode = GET_MODE (op0);
|
||
tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
|
||
break;
|
||
|
||
case RTX_EXTRA:
|
||
/* The only case we try to handle is a SUBREG. */
|
||
if (code == SUBREG)
|
||
{
|
||
op0 = XEXP (x, 0);
|
||
valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear);
|
||
if (op0 == XEXP (x, 0))
|
||
return true;
|
||
tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
|
||
SUBREG_BYTE (x));
|
||
}
|
||
break;
|
||
|
||
case RTX_OBJ:
|
||
if (code == MEM && x != new)
|
||
{
|
||
rtx new_op0;
|
||
op0 = XEXP (x, 0);
|
||
|
||
/* There are some addresses that we cannot work on. */
|
||
if (!can_simplify_addr (op0))
|
||
return true;
|
||
|
||
op0 = new_op0 = targetm.delegitimize_address (op0);
|
||
valid_ops &= propagate_rtx_1 (&new_op0, old, new, true);
|
||
|
||
/* Dismiss transformation that we do not want to carry on. */
|
||
if (!valid_ops
|
||
|| new_op0 == op0
|
||
|| !(GET_MODE (new_op0) == GET_MODE (op0)
|
||
|| GET_MODE (new_op0) == VOIDmode))
|
||
return true;
|
||
|
||
canonicalize_address (new_op0);
|
||
|
||
/* Copy propagations are always ok. Otherwise check the costs. */
|
||
if (!(REG_P (old) && REG_P (new))
|
||
&& !should_replace_address (op0, new_op0, GET_MODE (x)))
|
||
return true;
|
||
|
||
tem = replace_equiv_address_nv (x, new_op0);
|
||
}
|
||
|
||
else if (code == LO_SUM)
|
||
{
|
||
op0 = XEXP (x, 0);
|
||
op1 = XEXP (x, 1);
|
||
|
||
/* The only simplification we do attempts to remove references to op0
|
||
or make it constant -- in both cases, op0's invalidity will not
|
||
make the result invalid. */
|
||
propagate_rtx_1 (&op0, old, new, true);
|
||
valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear);
|
||
if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
|
||
return true;
|
||
|
||
/* (lo_sum (high x) x) -> x */
|
||
if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
|
||
tem = op1;
|
||
else
|
||
tem = gen_rtx_LO_SUM (mode, op0, op1);
|
||
|
||
/* OP1 is likely not a legitimate address, otherwise there would have
|
||
been no LO_SUM. We want it to disappear if it is invalid, return
|
||
false in that case. */
|
||
return memory_address_p (mode, tem);
|
||
}
|
||
|
||
else if (code == REG)
|
||
{
|
||
if (rtx_equal_p (x, old))
|
||
{
|
||
*px = new;
|
||
return can_appear;
|
||
}
|
||
}
|
||
break;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
|
||
/* No change, no trouble. */
|
||
if (tem == NULL_RTX)
|
||
return true;
|
||
|
||
*px = tem;
|
||
|
||
/* The replacement we made so far is valid, if all of the recursive
|
||
replacements were valid, or we could simplify everything to
|
||
a constant. */
|
||
return valid_ops || can_appear || CONSTANT_P (tem);
|
||
}
|
||
|
||
/* Replace all occurrences of OLD in X with NEW and try to simplify the
|
||
resulting expression (in mode MODE). Return a new expression if it is
|
||
a constant, otherwise X.
|
||
|
||
Simplifications where occurrences of NEW collapse to a constant are always
|
||
accepted. All simplifications are accepted if NEW is a pseudo too.
|
||
Otherwise, we accept simplifications that have a lower or equal cost. */
|
||
|
||
static rtx
|
||
propagate_rtx (rtx x, enum machine_mode mode, rtx old, rtx new)
|
||
{
|
||
rtx tem;
|
||
bool collapsed;
|
||
|
||
if (REG_P (new) && REGNO (new) < FIRST_PSEUDO_REGISTER)
|
||
return NULL_RTX;
|
||
|
||
new = copy_rtx (new);
|
||
|
||
tem = x;
|
||
collapsed = propagate_rtx_1 (&tem, old, new, REG_P (new) || CONSTANT_P (new));
|
||
if (tem == x || !collapsed)
|
||
return NULL_RTX;
|
||
|
||
/* gen_lowpart_common will not be able to process VOIDmode entities other
|
||
than CONST_INTs. */
|
||
if (GET_MODE (tem) == VOIDmode && GET_CODE (tem) != CONST_INT)
|
||
return NULL_RTX;
|
||
|
||
if (GET_MODE (tem) == VOIDmode)
|
||
tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
|
||
else
|
||
gcc_assert (GET_MODE (tem) == mode);
|
||
|
||
return tem;
|
||
}
|
||
|
||
|
||
|
||
|
||
/* Return true if the register from reference REF is killed
|
||
between FROM to (but not including) TO. */
|
||
|
||
static bool
|
||
local_ref_killed_between_p (struct df_ref * ref, rtx from, rtx to)
|
||
{
|
||
rtx insn;
|
||
|
||
for (insn = from; insn != to; insn = NEXT_INSN (insn))
|
||
{
|
||
struct df_ref **def_rec;
|
||
if (!INSN_P (insn))
|
||
continue;
|
||
|
||
for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
|
||
{
|
||
struct df_ref *def = *def_rec;
|
||
if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
|
||
/* Check if the given DEF is available in INSN. This would require full
|
||
computation of available expressions; we check only restricted conditions:
|
||
- if DEF is the sole definition of its register, go ahead;
|
||
- in the same basic block, we check for no definitions killing the
|
||
definition of DEF_INSN;
|
||
- if USE's basic block has DEF's basic block as the sole predecessor,
|
||
we check if the definition is killed after DEF_INSN or before
|
||
TARGET_INSN insn, in their respective basic blocks. */
|
||
static bool
|
||
use_killed_between (struct df_ref *use, rtx def_insn, rtx target_insn)
|
||
{
|
||
basic_block def_bb = BLOCK_FOR_INSN (def_insn);
|
||
basic_block target_bb = BLOCK_FOR_INSN (target_insn);
|
||
int regno;
|
||
struct df_ref * def;
|
||
|
||
/* In some obscure situations we can have a def reaching a use
|
||
that is _before_ the def. In other words the def does not
|
||
dominate the use even though the use and def are in the same
|
||
basic block. This can happen when a register may be used
|
||
uninitialized in a loop. In such cases, we must assume that
|
||
DEF is not available. */
|
||
if (def_bb == target_bb
|
||
? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
|
||
: !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
|
||
return true;
|
||
|
||
/* Check if the reg in USE has only one definition. We already
|
||
know that this definition reaches use, or we wouldn't be here. */
|
||
regno = DF_REF_REGNO (use);
|
||
def = DF_REG_DEF_CHAIN (regno);
|
||
if (def && (def->next_reg == NULL))
|
||
return false;
|
||
|
||
/* Check locally if we are in the same basic block. */
|
||
if (def_bb == target_bb)
|
||
return local_ref_killed_between_p (use, def_insn, target_insn);
|
||
|
||
/* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */
|
||
if (single_pred_p (target_bb)
|
||
&& single_pred (target_bb) == def_bb)
|
||
{
|
||
struct df_ref *x;
|
||
|
||
/* See if USE is killed between DEF_INSN and the last insn in the
|
||
basic block containing DEF_INSN. */
|
||
x = df_bb_regno_last_def_find (def_bb, regno);
|
||
if (x && DF_INSN_LUID (x->insn) >= DF_INSN_LUID (def_insn))
|
||
return true;
|
||
|
||
/* See if USE is killed between TARGET_INSN and the first insn in the
|
||
basic block containing TARGET_INSN. */
|
||
x = df_bb_regno_first_def_find (target_bb, regno);
|
||
if (x && DF_INSN_LUID (x->insn) < DF_INSN_LUID (target_insn))
|
||
return true;
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Otherwise assume the worst case. */
|
||
return true;
|
||
}
|
||
|
||
|
||
/* for_each_rtx traversal function that returns 1 if BODY points to
|
||
a non-constant mem. */
|
||
|
||
static int
|
||
varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
|
||
{
|
||
rtx x = *body;
|
||
return MEM_P (x) && !MEM_READONLY_P (x);
|
||
}
|
||
|
||
/* Check if all uses in DEF_INSN can be used in TARGET_INSN. This
|
||
would require full computation of available expressions;
|
||
we check only restricted conditions, see use_killed_between. */
|
||
static bool
|
||
all_uses_available_at (rtx def_insn, rtx target_insn)
|
||
{
|
||
struct df_ref **use_rec;
|
||
rtx def_set = single_set (def_insn);
|
||
|
||
gcc_assert (def_set);
|
||
|
||
/* If target_insn comes right after def_insn, which is very common
|
||
for addresses, we can use a quicker test. */
|
||
if (NEXT_INSN (def_insn) == target_insn
|
||
&& REG_P (SET_DEST (def_set)))
|
||
{
|
||
rtx def_reg = SET_DEST (def_set);
|
||
|
||
/* If the insn uses the reg that it defines, the substitution is
|
||
invalid. */
|
||
for (use_rec = DF_INSN_USES (def_insn); *use_rec; use_rec++)
|
||
{
|
||
struct df_ref *use = *use_rec;
|
||
if (rtx_equal_p (DF_REF_REG (use), def_reg))
|
||
return false;
|
||
}
|
||
for (use_rec = DF_INSN_EQ_USES (def_insn); *use_rec; use_rec++)
|
||
{
|
||
struct df_ref *use = *use_rec;
|
||
if (rtx_equal_p (use->reg, def_reg))
|
||
return false;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
/* Look at all the uses of DEF_INSN, and see if they are not
|
||
killed between DEF_INSN and TARGET_INSN. */
|
||
for (use_rec = DF_INSN_USES (def_insn); *use_rec; use_rec++)
|
||
{
|
||
struct df_ref *use = *use_rec;
|
||
if (use_killed_between (use, def_insn, target_insn))
|
||
return false;
|
||
}
|
||
for (use_rec = DF_INSN_EQ_USES (def_insn); *use_rec; use_rec++)
|
||
{
|
||
struct df_ref *use = *use_rec;
|
||
if (use_killed_between (use, def_insn, target_insn))
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/* We don't do any analysis of memories or aliasing. Reject any
|
||
instruction that involves references to non-constant memory. */
|
||
return !for_each_rtx (&SET_SRC (def_set), varying_mem_p, NULL);
|
||
}
|
||
|
||
|
||
struct find_occurrence_data
|
||
{
|
||
rtx find;
|
||
rtx *retval;
|
||
};
|
||
|
||
/* Callback for for_each_rtx, used in find_occurrence.
|
||
See if PX is the rtx we have to find. Return 1 to stop for_each_rtx
|
||
if successful, or 0 to continue traversing otherwise. */
|
||
|
||
static int
|
||
find_occurrence_callback (rtx *px, void *data)
|
||
{
|
||
struct find_occurrence_data *fod = (struct find_occurrence_data *) data;
|
||
rtx x = *px;
|
||
rtx find = fod->find;
|
||
|
||
if (x == find)
|
||
{
|
||
fod->retval = px;
|
||
return 1;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/* Return a pointer to one of the occurrences of register FIND in *PX. */
|
||
|
||
static rtx *
|
||
find_occurrence (rtx *px, rtx find)
|
||
{
|
||
struct find_occurrence_data data;
|
||
|
||
gcc_assert (REG_P (find)
|
||
|| (GET_CODE (find) == SUBREG
|
||
&& REG_P (SUBREG_REG (find))));
|
||
|
||
data.find = find;
|
||
data.retval = NULL;
|
||
for_each_rtx (px, find_occurrence_callback, &data);
|
||
return data.retval;
|
||
}
|
||
|
||
|
||
/* Inside INSN, the expression rooted at *LOC has been changed, moving some
|
||
uses from USE_VEC. Find those that are present, and create new items
|
||
in the data flow object of the pass. Mark any new uses as having the
|
||
given TYPE. */
|
||
static void
|
||
update_df (rtx insn, rtx *loc, struct df_ref **use_rec, enum df_ref_type type,
|
||
int new_flags)
|
||
{
|
||
bool changed = false;
|
||
|
||
/* Add a use for the registers that were propagated. */
|
||
while (*use_rec)
|
||
{
|
||
struct df_ref *use = *use_rec;
|
||
struct df_ref *orig_use = use, *new_use;
|
||
rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use));
|
||
use_rec++;
|
||
|
||
if (!new_loc)
|
||
continue;
|
||
|
||
/* Add a new insn use. Use the original type, because it says if the
|
||
use was within a MEM. */
|
||
new_use = df_ref_create (DF_REF_REG (orig_use), new_loc,
|
||
insn, BLOCK_FOR_INSN (insn),
|
||
type, DF_REF_FLAGS (orig_use) | new_flags);
|
||
|
||
/* Set up the use-def chain. */
|
||
df_chain_copy (new_use, DF_REF_CHAIN (orig_use));
|
||
changed = true;
|
||
}
|
||
if (changed)
|
||
df_insn_rescan (insn);
|
||
}
|
||
|
||
|
||
/* Try substituting NEW into LOC, which originated from forward propagation
|
||
of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are
|
||
substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
|
||
new insn is not recognized. Return whether the substitution was
|
||
performed. */
|
||
|
||
static bool
|
||
try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new, rtx def_insn, bool set_reg_equal)
|
||
{
|
||
rtx insn = DF_REF_INSN (use);
|
||
enum df_ref_type type = DF_REF_TYPE (use);
|
||
int flags = DF_REF_FLAGS (use);
|
||
rtx set = single_set (insn);
|
||
int old_cost = rtx_cost (SET_SRC (set), SET);
|
||
bool ok;
|
||
|
||
if (dump_file)
|
||
{
|
||
fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
|
||
print_inline_rtx (dump_file, *loc, 2);
|
||
fprintf (dump_file, "\n with ");
|
||
print_inline_rtx (dump_file, new, 2);
|
||
fprintf (dump_file, "\n");
|
||
}
|
||
|
||
validate_unshare_change (insn, loc, new, true);
|
||
if (!verify_changes (0))
|
||
{
|
||
if (dump_file)
|
||
fprintf (dump_file, "Changes to insn %d not recognized\n",
|
||
INSN_UID (insn));
|
||
ok = false;
|
||
}
|
||
|
||
else if (DF_REF_TYPE (use) == DF_REF_REG_USE
|
||
&& rtx_cost (SET_SRC (set), SET) > old_cost)
|
||
{
|
||
if (dump_file)
|
||
fprintf (dump_file, "Changes to insn %d not profitable\n",
|
||
INSN_UID (insn));
|
||
ok = false;
|
||
}
|
||
|
||
else
|
||
{
|
||
if (dump_file)
|
||
fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
|
||
ok = true;
|
||
}
|
||
|
||
if (ok)
|
||
{
|
||
confirm_change_group ();
|
||
num_changes++;
|
||
|
||
df_ref_remove (use);
|
||
if (!CONSTANT_P (new))
|
||
{
|
||
update_df (insn, loc, DF_INSN_USES (def_insn), type, flags);
|
||
update_df (insn, loc, DF_INSN_EQ_USES (def_insn), type, flags);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
cancel_changes (0);
|
||
|
||
/* Can also record a simplified value in a REG_EQUAL note, making a
|
||
new one if one does not already exist. */
|
||
if (set_reg_equal)
|
||
{
|
||
if (dump_file)
|
||
fprintf (dump_file, " Setting REG_EQUAL note\n");
|
||
|
||
set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new));
|
||
|
||
/* ??? Is this still necessary if we add the note through
|
||
set_unique_reg_note? */
|
||
if (!CONSTANT_P (new))
|
||
{
|
||
update_df (insn, loc, DF_INSN_USES (def_insn),
|
||
type, DF_REF_IN_NOTE);
|
||
update_df (insn, loc, DF_INSN_EQ_USES (def_insn),
|
||
type, DF_REF_IN_NOTE);
|
||
}
|
||
}
|
||
}
|
||
|
||
return ok;
|
||
}
|
||
|
||
|
||
/* If USE is a paradoxical subreg, see if it can be replaced by a pseudo. */
|
||
|
||
static bool
|
||
forward_propagate_subreg (struct df_ref *use, rtx def_insn, rtx def_set)
|
||
{
|
||
rtx use_reg = DF_REF_REG (use);
|
||
rtx use_insn, src;
|
||
|
||
/* Only consider paradoxical subregs... */
|
||
enum machine_mode use_mode = GET_MODE (use_reg);
|
||
if (GET_CODE (use_reg) != SUBREG
|
||
|| !REG_P (SET_DEST (def_set))
|
||
|| GET_MODE_SIZE (use_mode)
|
||
<= GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
|
||
return false;
|
||
|
||
/* If this is a paradoxical SUBREG, we have no idea what value the
|
||
extra bits would have. However, if the operand is equivalent to
|
||
a SUBREG whose operand is the same as our mode, and all the modes
|
||
are within a word, we can just use the inner operand because
|
||
these SUBREGs just say how to treat the register. */
|
||
use_insn = DF_REF_INSN (use);
|
||
src = SET_SRC (def_set);
|
||
if (GET_CODE (src) == SUBREG
|
||
&& REG_P (SUBREG_REG (src))
|
||
&& GET_MODE (SUBREG_REG (src)) == use_mode
|
||
&& subreg_lowpart_p (src)
|
||
&& all_uses_available_at (def_insn, use_insn))
|
||
return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
|
||
def_insn, false);
|
||
else
|
||
return false;
|
||
}
|
||
|
||
/* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
|
||
result. */
|
||
|
||
static bool
|
||
forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set)
|
||
{
|
||
rtx use_insn = DF_REF_INSN (use);
|
||
rtx use_set = single_set (use_insn);
|
||
rtx src, reg, new, *loc;
|
||
bool set_reg_equal;
|
||
enum machine_mode mode;
|
||
|
||
if (!use_set)
|
||
return false;
|
||
|
||
/* Do not propagate into PC, CC0, etc. */
|
||
if (GET_MODE (SET_DEST (use_set)) == VOIDmode)
|
||
return false;
|
||
|
||
/* If def and use are subreg, check if they match. */
|
||
reg = DF_REF_REG (use);
|
||
if (GET_CODE (reg) == SUBREG
|
||
&& GET_CODE (SET_DEST (def_set)) == SUBREG
|
||
&& (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg)
|
||
|| GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)))
|
||
return false;
|
||
|
||
/* Check if the def had a subreg, but the use has the whole reg. */
|
||
if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
|
||
return false;
|
||
|
||
/* Check if the use has a subreg, but the def had the whole reg. Unlike the
|
||
previous case, the optimization is possible and often useful indeed. */
|
||
if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
|
||
reg = SUBREG_REG (reg);
|
||
|
||
/* Check if the substitution is valid (last, because it's the most
|
||
expensive check!). */
|
||
src = SET_SRC (def_set);
|
||
if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
|
||
return false;
|
||
|
||
/* Check if the def is loading something from the constant pool; in this
|
||
case we would undo optimization such as compress_float_constant.
|
||
Still, we can set a REG_EQUAL note. */
|
||
if (MEM_P (src) && MEM_READONLY_P (src))
|
||
{
|
||
rtx x = avoid_constant_pool_reference (src);
|
||
if (x != src)
|
||
{
|
||
rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
|
||
rtx old = note ? XEXP (note, 0) : SET_SRC (use_set);
|
||
rtx new = simplify_replace_rtx (old, src, x);
|
||
if (old != new)
|
||
set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new));
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/* Else try simplifying. */
|
||
|
||
if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
|
||
{
|
||
loc = &SET_DEST (use_set);
|
||
set_reg_equal = false;
|
||
}
|
||
else
|
||
{
|
||
rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
|
||
if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
|
||
loc = &XEXP (note, 0);
|
||
else
|
||
loc = &SET_SRC (use_set);
|
||
|
||
/* Do not replace an existing REG_EQUAL note if the insn is not
|
||
recognized. Either we're already replacing in the note, or
|
||
we'll separately try plugging the definition in the note and
|
||
simplifying. */
|
||
set_reg_equal = (note == NULL_RTX);
|
||
}
|
||
|
||
if (GET_MODE (*loc) == VOIDmode)
|
||
mode = GET_MODE (SET_DEST (use_set));
|
||
else
|
||
mode = GET_MODE (*loc);
|
||
|
||
new = propagate_rtx (*loc, mode, reg, src);
|
||
|
||
if (!new)
|
||
return false;
|
||
|
||
return try_fwprop_subst (use, loc, new, def_insn, set_reg_equal);
|
||
}
|
||
|
||
|
||
/* Given a use USE of an insn, if it has a single reaching
|
||
definition, try to forward propagate it into that insn. */
|
||
|
||
static void
|
||
forward_propagate_into (struct df_ref *use)
|
||
{
|
||
struct df_link *defs;
|
||
struct df_ref *def;
|
||
rtx def_insn, def_set, use_insn;
|
||
rtx parent;
|
||
|
||
if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
|
||
return;
|
||
if (DF_REF_IS_ARTIFICIAL (use))
|
||
return;
|
||
|
||
/* Only consider uses that have a single definition. */
|
||
defs = DF_REF_CHAIN (use);
|
||
if (!defs || defs->next)
|
||
return;
|
||
|
||
def = defs->ref;
|
||
if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
|
||
return;
|
||
if (DF_REF_IS_ARTIFICIAL (def))
|
||
return;
|
||
|
||
/* Do not propagate loop invariant definitions inside the loop. */
|
||
if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
|
||
return;
|
||
|
||
/* Check if the use is still present in the insn! */
|
||
use_insn = DF_REF_INSN (use);
|
||
if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
|
||
parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
|
||
else
|
||
parent = PATTERN (use_insn);
|
||
|
||
if (!loc_mentioned_in_p (DF_REF_LOC (use), parent))
|
||
return;
|
||
|
||
def_insn = DF_REF_INSN (def);
|
||
if (multiple_sets (def_insn))
|
||
return;
|
||
def_set = single_set (def_insn);
|
||
if (!def_set)
|
||
return;
|
||
|
||
/* Only try one kind of propagation. If two are possible, we'll
|
||
do it on the following iterations. */
|
||
if (!forward_propagate_and_simplify (use, def_insn, def_set))
|
||
forward_propagate_subreg (use, def_insn, def_set);
|
||
}
|
||
|
||
|
||
static void
|
||
fwprop_init (void)
|
||
{
|
||
num_changes = 0;
|
||
calculate_dominance_info (CDI_DOMINATORS);
|
||
|
||
/* We do not always want to propagate into loops, so we have to find
|
||
loops and be careful about them. But we have to call flow_loops_find
|
||
before df_analyze, because flow_loops_find may introduce new jump
|
||
insns (sadly) if we are not working in cfglayout mode. */
|
||
loop_optimizer_init (0);
|
||
|
||
/* Now set up the dataflow problem (we only want use-def chains) and
|
||
put the dataflow solver to work. */
|
||
df_set_flags (DF_EQ_NOTES);
|
||
df_chain_add_problem (DF_UD_CHAIN);
|
||
df_analyze ();
|
||
df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
|
||
df_set_flags (DF_DEFER_INSN_RESCAN);
|
||
}
|
||
|
||
static void
|
||
fwprop_done (void)
|
||
{
|
||
loop_optimizer_finalize ();
|
||
|
||
free_dominance_info (CDI_DOMINATORS);
|
||
cleanup_cfg (0);
|
||
delete_trivially_dead_insns (get_insns (), max_reg_num ());
|
||
|
||
if (dump_file)
|
||
fprintf (dump_file,
|
||
"\nNumber of successful forward propagations: %d\n\n",
|
||
num_changes);
|
||
}
|
||
|
||
|
||
|
||
/* Main entry point. */
|
||
|
||
static bool
|
||
gate_fwprop (void)
|
||
{
|
||
return optimize > 0 && flag_forward_propagate;
|
||
}
|
||
|
||
static unsigned int
|
||
fwprop (void)
|
||
{
|
||
unsigned i;
|
||
|
||
fwprop_init ();
|
||
|
||
/* Go through all the uses. update_df will create new ones at the
|
||
end, and we'll go through them as well.
|
||
|
||
Do not forward propagate addresses into loops until after unrolling.
|
||
CSE did so because it was able to fix its own mess, but we are not. */
|
||
|
||
for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
|
||
{
|
||
struct df_ref *use = DF_USES_GET (i);
|
||
if (use)
|
||
if (DF_REF_TYPE (use) == DF_REF_REG_USE
|
||
|| DF_REF_BB (use)->loop_father == NULL)
|
||
forward_propagate_into (use);
|
||
}
|
||
|
||
fwprop_done ();
|
||
return 0;
|
||
}
|
||
|
||
struct tree_opt_pass pass_rtl_fwprop =
|
||
{
|
||
"fwprop1", /* name */
|
||
gate_fwprop, /* gate */
|
||
fwprop, /* execute */
|
||
NULL, /* sub */
|
||
NULL, /* next */
|
||
0, /* static_pass_number */
|
||
TV_FWPROP, /* tv_id */
|
||
0, /* properties_required */
|
||
0, /* properties_provided */
|
||
0, /* properties_destroyed */
|
||
0, /* todo_flags_start */
|
||
TODO_df_finish | TODO_verify_rtl_sharing |
|
||
TODO_dump_func, /* todo_flags_finish */
|
||
0 /* letter */
|
||
};
|
||
|
||
static unsigned int
|
||
fwprop_addr (void)
|
||
{
|
||
unsigned i;
|
||
fwprop_init ();
|
||
|
||
/* Go through all the uses. update_df will create new ones at the
|
||
end, and we'll go through them as well. */
|
||
df_set_flags (DF_DEFER_INSN_RESCAN);
|
||
|
||
for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
|
||
{
|
||
struct df_ref *use = DF_USES_GET (i);
|
||
if (use)
|
||
if (DF_REF_TYPE (use) != DF_REF_REG_USE
|
||
&& DF_REF_BB (use)->loop_father != NULL)
|
||
forward_propagate_into (use);
|
||
}
|
||
|
||
fwprop_done ();
|
||
|
||
return 0;
|
||
}
|
||
|
||
struct tree_opt_pass pass_rtl_fwprop_addr =
|
||
{
|
||
"fwprop2", /* name */
|
||
gate_fwprop, /* gate */
|
||
fwprop_addr, /* execute */
|
||
NULL, /* sub */
|
||
NULL, /* next */
|
||
0, /* static_pass_number */
|
||
TV_FWPROP, /* tv_id */
|
||
0, /* properties_required */
|
||
0, /* properties_provided */
|
||
0, /* properties_destroyed */
|
||
0, /* todo_flags_start */
|
||
TODO_df_finish | TODO_verify_rtl_sharing |
|
||
TODO_dump_func, /* todo_flags_finish */
|
||
0 /* letter */
|
||
};
|