Visit basic blocks using the work-list based algorithm.

2011-01-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/46519
	* config/i386/i386.c: Include sbitmap.h and fibheap.h.
	(block_info): Add scanned and prev.
	(move_or_delete_vzeroupper_2): Return if the basic block
	has been scanned and the upper 128bit state is unchanged
	from the last scan.
	(move_or_delete_vzeroupper_1): Return true if the exit
	state is changed.
	(move_or_delete_vzeroupper): Visit basic blocks using the
	work-list based algorithm based on vt_find_locations in
	var-tracking.c.

	* config/i386/t-i386: Also depend on sbitmap.h and $(FIBHEAP_H).

From-SVN: r169173
This commit is contained in:
H.J. Lu 2011-01-24 17:29:58 +00:00 committed by H.J. Lu
parent fc722a0ea4
commit a6c5070a12
3 changed files with 146 additions and 25 deletions

View file

@ -1,3 +1,19 @@
2011-01-24 H.J. Lu <hongjiu.lu@intel.com>
PR target/46519
* config/i386/i386.c: Include sbitmap.h and fibheap.h.
(block_info): Add scanned and prev.
(move_or_delete_vzeroupper_2): Return if the basic block
has been scanned and the upper 128bit state is unchanged
from the last scan.
(move_or_delete_vzeroupper_1): Return true if the exit
state is changed.
(move_or_delete_vzeroupper): Visit basic blocks using the
work-list based algorithm based on vt_find_locations in
var-tracking.c.
* config/i386/t-i386: Also depend on sbitmap.h and $(FIBHEAP_H).
2011-01-24 Nick Clifton <nickc@redhat.com>
* config/v850/v850.opt (mv850es): New option - alias for -mv850e1.

View file

@ -56,6 +56,8 @@ along with GCC; see the file COPYING3. If not see
#include "debug.h"
#include "dwarf2out.h"
#include "sched-int.h"
#include "sbitmap.h"
#include "fibheap.h"
enum upper_128bits_state
{
@ -73,6 +75,10 @@ typedef struct block_info_def
bool unchanged;
/* TRUE if block has been processed. */
bool processed;
/* TRUE if block has been scanned. */
bool scanned;
/* Previous state of the upper 128bits of AVX registers at entry. */
enum upper_128bits_state prev;
} *block_info;
#define BLOCK_INFO(B) ((block_info) (B)->aux)
@ -135,6 +141,16 @@ move_or_delete_vzeroupper_2 (basic_block bb,
return;
}
if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
{
if (dump_file)
fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
bb->index, BLOCK_INFO (bb)->state);
return;
}
BLOCK_INFO (bb)->prev = state;
if (dump_file)
fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
bb->index, state);
@ -264,6 +280,7 @@ move_or_delete_vzeroupper_2 (basic_block bb,
BLOCK_INFO (bb)->state = state;
BLOCK_INFO (bb)->unchanged = unchanged;
BLOCK_INFO (bb)->scanned = true;
if (dump_file)
fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
@ -273,9 +290,10 @@ move_or_delete_vzeroupper_2 (basic_block bb,
/* Helper function for move_or_delete_vzeroupper. Process vzeroupper
in BLOCK and check its predecessor blocks. Treat UNKNOWN state
as USED if UNKNOWN_IS_UNUSED is true. */
as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
state is changed. */
static void
static bool
move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
{
edge e;
@ -288,7 +306,7 @@ move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
block->index, BLOCK_INFO (block)->processed);
if (BLOCK_INFO (block)->processed)
return;
return false;
state = unused;
@ -324,8 +342,14 @@ done:
/* Need to rescan if the upper 128bits of AVX registers are changed
to USED at exit. */
if (new_state != old_state && new_state == used)
cfun->machine->rescan_vzeroupper_p = 1;
if (new_state != old_state)
{
if (new_state == used)
cfun->machine->rescan_vzeroupper_p = 1;
return true;
}
else
return false;
}
/* Go through the instruction stream looking for vzeroupper. Delete
@ -338,14 +362,18 @@ move_or_delete_vzeroupper (void)
edge e;
edge_iterator ei;
basic_block bb;
unsigned int count;
fibheap_t worklist, pending, fibheap_swap;
sbitmap visited, in_worklist, in_pending, sbitmap_swap;
int *bb_order;
int *rc_order;
int i;
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def));
/* Process successor blocks of all entry points. */
/* Process outgoing edges of entry point. */
if (dump_file)
fprintf (dump_file, "Process all entry points\n");
fprintf (dump_file, "Process outgoing edges of entry point\n");
FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
{
@ -355,25 +383,102 @@ move_or_delete_vzeroupper (void)
BLOCK_INFO (e->dest)->processed = true;
}
/* Process all basic blocks. */
count = 0;
do
{
if (dump_file)
fprintf (dump_file, "Process all basic blocks: trip %d\n",
count);
cfun->machine->rescan_vzeroupper_p = 0;
FOR_EACH_BB (bb)
move_or_delete_vzeroupper_1 (bb, false);
}
while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
/* Compute reverse completion order of depth first search of the CFG
so that the data-flow runs faster. */
rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
bb_order = XNEWVEC (int, last_basic_block);
pre_and_rev_post_order_compute (NULL, rc_order, false);
for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
bb_order[rc_order[i]] = i;
free (rc_order);
/* FIXME: Is 20 big enough? */
if (count >= 20)
gcc_unreachable ();
worklist = fibheap_new ();
pending = fibheap_new ();
visited = sbitmap_alloc (last_basic_block);
in_worklist = sbitmap_alloc (last_basic_block);
in_pending = sbitmap_alloc (last_basic_block);
sbitmap_zero (in_worklist);
/* Don't check outgoing edges of entry point. */
sbitmap_ones (in_pending);
FOR_EACH_BB (bb)
if (BLOCK_INFO (bb)->processed)
RESET_BIT (in_pending, bb->index);
else
{
move_or_delete_vzeroupper_1 (bb, false);
fibheap_insert (pending, bb_order[bb->index], bb);
}
if (dump_file)
fprintf (dump_file, "Process all basic blocks\n");
fprintf (dump_file, "Check remaining basic blocks\n");
while (!fibheap_empty (pending))
{
fibheap_swap = pending;
pending = worklist;
worklist = fibheap_swap;
sbitmap_swap = in_pending;
in_pending = in_worklist;
in_worklist = sbitmap_swap;
sbitmap_zero (visited);
cfun->machine->rescan_vzeroupper_p = 0;
while (!fibheap_empty (worklist))
{
bb = (basic_block) fibheap_extract_min (worklist);
RESET_BIT (in_worklist, bb->index);
gcc_assert (!TEST_BIT (visited, bb->index));
if (!TEST_BIT (visited, bb->index))
{
edge_iterator ei;
SET_BIT (visited, bb->index);
if (move_or_delete_vzeroupper_1 (bb, false))
FOR_EACH_EDGE (e, ei, bb->succs)
{
if (e->dest == EXIT_BLOCK_PTR
|| BLOCK_INFO (e->dest)->processed)
continue;
if (TEST_BIT (visited, e->dest->index))
{
if (!TEST_BIT (in_pending, e->dest->index))
{
/* Send E->DEST to next round. */
SET_BIT (in_pending, e->dest->index);
fibheap_insert (pending,
bb_order[e->dest->index],
e->dest);
}
}
else if (!TEST_BIT (in_worklist, e->dest->index))
{
/* Add E->DEST to current round. */
SET_BIT (in_worklist, e->dest->index);
fibheap_insert (worklist, bb_order[e->dest->index],
e->dest);
}
}
}
}
if (!cfun->machine->rescan_vzeroupper_p)
break;
}
free (bb_order);
fibheap_delete (worklist);
fibheap_delete (pending);
sbitmap_free (visited);
sbitmap_free (in_worklist);
sbitmap_free (in_pending);
if (dump_file)
fprintf (dump_file, "Process remaining basic blocks\n");
FOR_EACH_BB (bb)
move_or_delete_vzeroupper_1 (bb, true);

View file

@ -23,7 +23,7 @@ i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \
$(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \
$(TREE_GIMPLE_H) $(DWARF2_H) $(DF_H) tm-constrs.h $(PARAMS_H) \
i386-builtin-types.inc debug.h dwarf2out.h
i386-builtin-types.inc debug.h dwarf2out.h sbitmap.h $(FIBHEAP_H)
i386-c.o: $(srcdir)/config/i386/i386-c.c \
$(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \