RISC-V: Fix large memory usage of VSETVL PASS [PR113495]

SPEC 2017 wrf benchmark expose unreasonble memory usage of VSETVL PASS
that is, VSETVL PASS consume over 33 GB memory which make use impossible
to compile SPEC 2017 wrf in a laptop.

The root cause is wasting-memory variables:

unsigned num_exprs = num_bbs * num_regs;
sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs);
sbitmap *m_kill = sbitmap_vector_alloc (num_bbs, num_exprs);
m_avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs);
m_avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs);

I find that compute_avl_def_data can be achieved by RTL_SSA framework.
Replace the code implementation base on RTL_SSA framework.

After this patch, the memory-hog issue is fixed.

simple vsetvl memory usage (valgrind --tool=massif --pages-as-heap=yes --massif-out-file=massif.out)
is 1.673 GB.

lazy vsetvl memory usage (valgrind --tool=massif --pages-as-heap=yes --massif-out-file=massif.out)
is 2.441 GB.

Tested on both RV32 and RV64, no regression.

gcc/ChangeLog:

	PR target/113495
	* config/riscv/riscv-vsetvl.cc (get_expr_id): Remove.
	(get_regno): Ditto.
	(get_bb_index): Ditto.
	(pre_vsetvl::compute_avl_def_data): Ditto.
	(pre_vsetvl::earliest_fuse_vsetvl_info): Fix large memory usage.
	(pre_vsetvl::pre_global_vsetvl_info): Ditto.

gcc/testsuite/ChangeLog:

	PR target/113495
	* gcc.target/riscv/rvv/vsetvl/avl_single-107.c: Adapt test.
This commit is contained in:
Juzhe-Zhong 2024-01-23 18:12:49 +08:00 committed by Pan Li
parent 3128786c7e
commit 3132d2d36b
2 changed files with 54 additions and 185 deletions

View file

@ -617,22 +617,6 @@ same_equiv_note_p (set_info *set1, set_info *set2)
return source_equal_p (insn1, insn2); return source_equal_p (insn1, insn2);
} }
static unsigned
get_expr_id (unsigned bb_index, unsigned regno, unsigned num_bbs)
{
return regno * num_bbs + bb_index;
}
static unsigned
get_regno (unsigned expr_id, unsigned num_bb)
{
return expr_id / num_bb;
}
static unsigned
get_bb_index (unsigned expr_id, unsigned num_bb)
{
return expr_id % num_bb;
}
/* Return true if the SET result is not used by any instructions. */ /* Return true if the SET result is not used by any instructions. */
static bool static bool
has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno) has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
@ -1337,9 +1321,6 @@ public:
class demand_system class demand_system
{ {
private: private:
sbitmap *m_avl_def_in;
sbitmap *m_avl_def_out;
/* predictors. */ /* predictors. */
inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED, inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED,
@ -1743,14 +1724,6 @@ private:
} }
public: public:
demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {}
void set_avl_in_out_data (sbitmap *m_avl_def_in, sbitmap *m_avl_def_out)
{
m_avl_def_in = m_avl_def_in;
m_avl_def_out = m_avl_def_out;
}
/* Can we move vsetvl info between prev_insn and next_insn safe? */ /* Can we move vsetvl info between prev_insn and next_insn safe? */
bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn, bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn,
const vsetvl_info &info, const vsetvl_info &info,
@ -1778,32 +1751,66 @@ public:
} }
else else
{ {
basic_block prev_cfg_bb = prev_insn->bb ()->cfg_bb ();
if (!ignore_vl && info.has_vl ()) if (!ignore_vl && info.has_vl ())
{ {
bitmap live_out = df_get_live_out (prev_insn->bb ()->cfg_bb ()); bitmap live_out = df_get_live_out (prev_cfg_bb);
if (bitmap_bit_p (live_out, REGNO (info.get_vl ()))) if (bitmap_bit_p (live_out, REGNO (info.get_vl ())))
return false; return false;
} }
if (info.has_nonvlmax_reg_avl () && m_avl_def_in && m_avl_def_out) /* Find set_info at location of PREV_INSN and NEXT_INSN, Return
false if those 2 set_info are different.
PREV_INSN --- multiple nested blocks --- NEXT_INSN.
Return false if there is any modifications of AVL inside those
multiple nested blocks. */
if (info.has_nonvlmax_reg_avl ())
{ {
bool has_avl_out = false; resource_info resource = full_register (REGNO (info.get_avl ()));
unsigned regno = REGNO (info.get_avl ()); def_lookup dl1 = crtl->ssa->find_def (resource, prev_insn);
unsigned expr_id; def_lookup dl2 = crtl->ssa->find_def (resource, next_insn);
sbitmap_iterator sbi; if (dl2.matching_set ())
EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[prev_insn->bb ()->index ()],
0, expr_id, sbi)
{
if (get_regno (expr_id, last_basic_block_for_fn (cfun))
!= regno)
continue;
has_avl_out = true;
if (!bitmap_bit_p (m_avl_def_in[next_insn->bb ()->index ()],
expr_id))
return false;
}
if (!has_avl_out)
return false; return false;
auto is_phi_or_real
= [&] (insn_info *h) { return h->is_real () || h->is_phi (); };
def_info *def1 = dl1.matching_set_or_last_def_of_prev_group ();
def_info *def2 = dl2.prev_def (next_insn);
set_info *set1 = safe_dyn_cast<set_info *> (def1);
set_info *set2 = safe_dyn_cast<set_info *> (def2);
if (!set1 || !set2)
return false;
auto is_same_ultimate_def = [&] (set_info *s1, set_info *s2) {
return s1->insn ()->is_phi () && s2->insn ()->is_phi ()
&& look_through_degenerate_phi (s1)
== look_through_degenerate_phi (s2);
};
if (set1 != set2 && !is_same_ultimate_def (set1, set2))
{
if (!is_phi_or_real (set1->insn ())
|| !is_phi_or_real (set2->insn ()))
return false;
if (set1->insn ()->is_real () && set2->insn ()->is_phi ())
{
hash_set<set_info *> sets
= get_all_sets (set2, true, false, true);
if (!sets.contains (set1))
return false;
}
else
{
insn_info *def_insn1 = extract_single_source (set1);
insn_info *def_insn2 = extract_single_source (set2);
if (!def_insn1 || !def_insn2 || def_insn1 != def_insn2)
return false;
}
}
} }
for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn (); for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn ();
@ -2043,9 +2050,6 @@ private:
auto_vec<vsetvl_block_info> m_vector_block_infos; auto_vec<vsetvl_block_info> m_vector_block_infos;
/* data for avl reaching defintion. */ /* data for avl reaching defintion. */
sbitmap m_avl_regs;
sbitmap *m_avl_def_in;
sbitmap *m_avl_def_out;
sbitmap *m_reg_def_loc; sbitmap *m_reg_def_loc;
/* data for vsetvl info reaching defintion. */ /* data for vsetvl info reaching defintion. */
@ -2292,8 +2296,7 @@ private:
public: public:
pre_vsetvl () pre_vsetvl ()
: m_avl_def_in (nullptr), m_avl_def_out (nullptr), : m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr), m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr) m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
{ {
@ -2318,16 +2321,9 @@ public:
delete crtl->ssa; delete crtl->ssa;
crtl->ssa = nullptr; crtl->ssa = nullptr;
if (m_avl_regs)
sbitmap_free (m_avl_regs);
if (m_reg_def_loc) if (m_reg_def_loc)
sbitmap_vector_free (m_reg_def_loc); sbitmap_vector_free (m_reg_def_loc);
if (m_avl_def_in)
sbitmap_vector_free (m_avl_def_in);
if (m_avl_def_out)
sbitmap_vector_free (m_avl_def_out);
if (m_vsetvl_def_in) if (m_vsetvl_def_in)
sbitmap_vector_free (m_vsetvl_def_in); sbitmap_vector_free (m_vsetvl_def_in);
if (m_vsetvl_def_out) if (m_vsetvl_def_out)
@ -2354,7 +2350,6 @@ public:
free_edge_list (m_edges); free_edge_list (m_edges);
} }
void compute_avl_def_data ();
void compute_vsetvl_def_data (); void compute_vsetvl_def_data ();
void compute_lcm_local_properties (); void compute_lcm_local_properties ();
@ -2393,114 +2388,6 @@ public:
} }
}; };
void
pre_vsetvl::compute_avl_def_data ()
{
if (bitmap_empty_p (m_avl_regs))
return;
unsigned num_regs = GP_REG_LAST + 1;
unsigned num_bbs = last_basic_block_for_fn (cfun);
sbitmap *avl_def_loc_temp = sbitmap_vector_alloc (num_bbs, num_regs);
for (const bb_info *bb : crtl->ssa->bbs ())
{
bitmap_and (avl_def_loc_temp[bb->index ()], m_avl_regs,
m_reg_def_loc[bb->index ()]);
vsetvl_block_info &block_info = get_block_info (bb);
if (block_info.has_info ())
{
vsetvl_info &footer_info = block_info.get_exit_info ();
gcc_assert (footer_info.valid_p ());
if (footer_info.has_vl ())
bitmap_set_bit (avl_def_loc_temp[bb->index ()],
REGNO (footer_info.get_vl ()));
}
}
if (m_avl_def_in)
sbitmap_vector_free (m_avl_def_in);
if (m_avl_def_out)
sbitmap_vector_free (m_avl_def_out);
unsigned num_exprs = num_bbs * num_regs;
sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs);
sbitmap *m_kill = sbitmap_vector_alloc (num_bbs, num_exprs);
m_avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs);
m_avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs);
bitmap_vector_clear (avl_def_loc, num_bbs);
bitmap_vector_clear (m_kill, num_bbs);
bitmap_vector_clear (m_avl_def_out, num_bbs);
unsigned regno;
sbitmap_iterator sbi;
for (const bb_info *bb : crtl->ssa->bbs ())
EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp[bb->index ()], 0, regno, sbi)
{
bitmap_set_bit (avl_def_loc[bb->index ()],
get_expr_id (bb->index (), regno, num_bbs));
bitmap_set_range (m_kill[bb->index ()], regno * num_bbs, num_bbs);
}
basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
EXECUTE_IF_SET_IN_BITMAP (m_avl_regs, 0, regno, sbi)
bitmap_set_bit (m_avl_def_out[entry->index],
get_expr_id (entry->index, regno, num_bbs));
compute_reaching_defintion (avl_def_loc, m_kill, m_avl_def_in, m_avl_def_out);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file,
" Compute avl reaching defition data (num_bbs %d, num_regs "
"%d):\n\n",
num_bbs, num_regs);
fprintf (dump_file, " avl_regs: ");
dump_bitmap_file (dump_file, m_avl_regs);
fprintf (dump_file, "\n bitmap data:\n");
for (const bb_info *bb : crtl->ssa->bbs ())
{
unsigned int i = bb->index ();
fprintf (dump_file, " BB %u:\n", i);
fprintf (dump_file, " avl_def_loc:");
unsigned expr_id;
sbitmap_iterator sbi;
EXECUTE_IF_SET_IN_BITMAP (avl_def_loc[i], 0, expr_id, sbi)
{
fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
get_bb_index (expr_id, num_bbs));
}
fprintf (dump_file, "\n kill:");
EXECUTE_IF_SET_IN_BITMAP (m_kill[i], 0, expr_id, sbi)
{
fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
get_bb_index (expr_id, num_bbs));
}
fprintf (dump_file, "\n avl_def_in:");
EXECUTE_IF_SET_IN_BITMAP (m_avl_def_in[i], 0, expr_id, sbi)
{
fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
get_bb_index (expr_id, num_bbs));
}
fprintf (dump_file, "\n avl_def_out:");
EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[i], 0, expr_id, sbi)
{
fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
get_bb_index (expr_id, num_bbs));
}
fprintf (dump_file, "\n");
}
}
sbitmap_vector_free (avl_def_loc);
sbitmap_vector_free (m_kill);
sbitmap_vector_free (avl_def_loc_temp);
m_dem.set_avl_in_out_data (m_avl_def_in, m_avl_def_out);
}
void void
pre_vsetvl::compute_vsetvl_def_data () pre_vsetvl::compute_vsetvl_def_data ()
{ {
@ -2957,29 +2844,12 @@ pre_vsetvl::fuse_local_vsetvl_info ()
if (prev_info.valid_p () || prev_info.unknown_p ()) if (prev_info.valid_p () || prev_info.unknown_p ())
block_info.local_infos.safe_push (prev_info); block_info.local_infos.safe_push (prev_info);
} }
m_avl_regs = sbitmap_alloc (GP_REG_LAST + 1);
bitmap_clear (m_avl_regs);
for (const bb_info *bb : crtl->ssa->bbs ())
{
vsetvl_block_info &block_info = get_block_info (bb);
if (block_info.empty_p ())
continue;
vsetvl_info &header_info = block_info.get_entry_info ();
if (header_info.valid_p () && header_info.has_nonvlmax_reg_avl ())
{
gcc_assert (GP_REG_P (REGNO (header_info.get_avl ())));
bitmap_set_bit (m_avl_regs, REGNO (header_info.get_avl ()));
}
}
} }
bool bool
pre_vsetvl::earliest_fuse_vsetvl_info (int iter) pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
{ {
compute_avl_def_data ();
compute_vsetvl_def_data (); compute_vsetvl_def_data ();
compute_lcm_local_properties (); compute_lcm_local_properties ();
@ -3235,7 +3105,6 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
void void
pre_vsetvl::pre_global_vsetvl_info () pre_vsetvl::pre_global_vsetvl_info ()
{ {
compute_avl_def_data ();
compute_vsetvl_def_data (); compute_vsetvl_def_data ();
compute_lcm_local_properties (); compute_lcm_local_properties ();

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "--param=riscv-autovec-preference=scalable -march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */ /* { dg-options "--param=riscv-autovec-preference=scalable -march=rv32gcv -mabi=ilp32 -fno-tree-vectorize" } */
#include "riscv_vector.h" #include "riscv_vector.h"