Expand: Enable vector mode for by pieces compares

Vector mode compare instructions are efficient for equality compare on
rs6000. This patch refactors the codes of by pieces operation to enable
vector mode for compare.

gcc/
	PR target/111449
	* expr.cc (can_use_qi_vectors): New function to return true if
	we know how to implement OP using vectors of bytes.
	(qi_vector_mode_supported_p): New function to check if optabs
	exists for the mode and certain by pieces operations.
	(widest_fixed_size_mode_for_size): Replace the second argument
	with the type of by pieces operations.  Call can_use_qi_vectors
	and qi_vector_mode_supported_p to do the check.  Call
	scalar_mode_supported_p to check if the scalar mode is supported.
	(by_pieces_ninsns): Pass the type of by pieces operation to
	widest_fixed_size_mode_for_size.
	(class op_by_pieces_d): Remove m_qi_vector_mode.  Add m_op to
	record the type of by pieces operations.
	(op_by_pieces_d::op_by_pieces_d): Change last argument to the
	type of by pieces operations, initialize m_op with it.  Pass
	m_op to function widest_fixed_size_mode_for_size.
	(op_by_pieces_d::get_usable_mode): Pass m_op to function
	widest_fixed_size_mode_for_size.
	(op_by_pieces_d::smallest_fixed_size_mode_for_size): Call
	can_use_qi_vectors and qi_vector_mode_supported_p to do the
	check.
	(op_by_pieces_d::run): Pass m_op to function
	widest_fixed_size_mode_for_size.
	(move_by_pieces_d::move_by_pieces_d): Set m_op to MOVE_BY_PIECES.
	(store_by_pieces_d::store_by_pieces_d): Set m_op with the op.
	(can_store_by_pieces): Pass the type of by pieces operations to
	widest_fixed_size_mode_for_size.
	(clear_by_pieces): Initialize class store_by_pieces_d with
	CLEAR_BY_PIECES.
	(compare_by_pieces_d::compare_by_pieces_d): Set m_op to
	COMPARE_BY_PIECES.
This commit is contained in:
Haochen Gui 2023-10-23 09:14:13 +08:00
parent dbde384bd5
commit f08ca5903c

View file

@ -988,18 +988,44 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
return align; return align;
} }
/* Return the widest QI vector, if QI_MODE is true, or integer mode /* Return true if we know how to implement OP using vectors of bytes. */
that is narrower than SIZE bytes. */ static bool
can_use_qi_vectors (by_pieces_operation op)
{
return (op == COMPARE_BY_PIECES
|| op == SET_BY_PIECES
|| op == CLEAR_BY_PIECES);
}
/* Return true if optabs exists for the mode and certain by pieces
operations. */
static bool
qi_vector_mode_supported_p (fixed_size_mode mode, by_pieces_operation op)
{
if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES)
&& optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing)
return true;
if (op == COMPARE_BY_PIECES
&& optab_handler (mov_optab, mode) != CODE_FOR_nothing
&& can_compare_p (EQ, mode, ccp_jump))
return true;
return false;
}
/* Return the widest mode that can be used to perform part of an
operation OP on SIZE bytes. Try to use QI vector modes where
possible. */
static fixed_size_mode static fixed_size_mode
widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op)
{ {
fixed_size_mode result = NARROWEST_INT_MODE; fixed_size_mode result = NARROWEST_INT_MODE;
gcc_checking_assert (size > 1); gcc_checking_assert (size > 1);
/* Use QI vector only if size is wider than a WORD. */ /* Use QI vector only if size is wider than a WORD. */
if (qi_vector && size > UNITS_PER_WORD) if (can_use_qi_vectors (op) && size > UNITS_PER_WORD)
{ {
machine_mode mode; machine_mode mode;
fixed_size_mode candidate; fixed_size_mode candidate;
@ -1009,8 +1035,7 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
{ {
if (GET_MODE_SIZE (candidate) >= size) if (GET_MODE_SIZE (candidate) >= size)
break; break;
if (optab_handler (vec_duplicate_optab, candidate) if (qi_vector_mode_supported_p (candidate, op))
!= CODE_FOR_nothing)
result = candidate; result = candidate;
} }
@ -1019,9 +1044,14 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
} }
opt_scalar_int_mode tmode; opt_scalar_int_mode tmode;
scalar_int_mode mode;
FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT)
if (GET_MODE_SIZE (tmode.require ()) < size) {
result = tmode.require (); mode = tmode.require ();
if (GET_MODE_SIZE (mode) < size
&& targetm.scalar_mode_supported_p (mode))
result = mode;
}
return result; return result;
} }
@ -1061,8 +1091,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
{ {
/* NB: Round up L and ALIGN to the widest integer mode for /* NB: Round up L and ALIGN to the widest integer mode for
MAX_SIZE. */ MAX_SIZE. */
mode = widest_fixed_size_mode_for_size (max_size, mode = widest_fixed_size_mode_for_size (max_size, op);
op == SET_BY_PIECES);
if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
{ {
unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode)); unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode));
@ -1076,8 +1105,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
while (max_size > 1 && l > 0) while (max_size > 1 && l > 0)
{ {
mode = widest_fixed_size_mode_for_size (max_size, mode = widest_fixed_size_mode_for_size (max_size, op);
op == SET_BY_PIECES);
enum insn_code icode; enum insn_code icode;
unsigned int modesize = GET_MODE_SIZE (mode); unsigned int modesize = GET_MODE_SIZE (mode);
@ -1317,8 +1345,8 @@ class op_by_pieces_d
bool m_push; bool m_push;
/* True if targetm.overlap_op_by_pieces_p () returns true. */ /* True if targetm.overlap_op_by_pieces_p () returns true. */
bool m_overlap_op_by_pieces; bool m_overlap_op_by_pieces;
/* True if QI vector mode can be used. */ /* The type of operation that we're performing. */
bool m_qi_vector_mode; by_pieces_operation m_op;
/* Virtual functions, overriden by derived classes for the specific /* Virtual functions, overriden by derived classes for the specific
operation. */ operation. */
@ -1331,7 +1359,7 @@ class op_by_pieces_d
public: public:
op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn,
void *, unsigned HOST_WIDE_INT, unsigned int, bool, void *, unsigned HOST_WIDE_INT, unsigned int, bool,
bool = false); by_pieces_operation);
void run (); void run ();
}; };
@ -1349,11 +1377,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
void *from_cfn_data, void *from_cfn_data,
unsigned HOST_WIDE_INT len, unsigned HOST_WIDE_INT len,
unsigned int align, bool push, unsigned int align, bool push,
bool qi_vector_mode) by_pieces_operation op)
: m_to (to, to_load, NULL, NULL), : m_to (to, to_load, NULL, NULL),
m_from (from, from_load, from_cfn, from_cfn_data), m_from (from, from_load, from_cfn, from_cfn_data),
m_len (len), m_max_size (max_pieces + 1), m_len (len), m_max_size (max_pieces + 1),
m_push (push), m_qi_vector_mode (qi_vector_mode) m_push (push), m_op (op)
{ {
int toi = m_to.get_addr_inc (); int toi = m_to.get_addr_inc ();
int fromi = m_from.get_addr_inc (); int fromi = m_from.get_addr_inc ();
@ -1375,8 +1403,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
{ {
/* Find the mode of the largest comparison. */ /* Find the mode of the largest comparison. */
fixed_size_mode mode fixed_size_mode mode
= widest_fixed_size_mode_for_size (m_max_size, = widest_fixed_size_mode_for_size (m_max_size, m_op);
m_qi_vector_mode);
m_from.decide_autoinc (mode, m_reverse, len); m_from.decide_autoinc (mode, m_reverse, len);
m_to.decide_autoinc (mode, m_reverse, len); m_to.decide_autoinc (mode, m_reverse, len);
@ -1401,7 +1428,7 @@ op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len)
if (len >= size && prepare_mode (mode, m_align)) if (len >= size && prepare_mode (mode, m_align))
break; break;
/* widest_fixed_size_mode_for_size checks SIZE > 1. */ /* widest_fixed_size_mode_for_size checks SIZE > 1. */
mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); mode = widest_fixed_size_mode_for_size (size, m_op);
} }
while (1); while (1);
return mode; return mode;
@ -1414,7 +1441,7 @@ fixed_size_mode
op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
{ {
/* Use QI vector only for > size of WORD. */ /* Use QI vector only for > size of WORD. */
if (m_qi_vector_mode && size > UNITS_PER_WORD) if (can_use_qi_vectors (m_op) && size > UNITS_PER_WORD)
{ {
machine_mode mode; machine_mode mode;
fixed_size_mode candidate; fixed_size_mode candidate;
@ -1427,8 +1454,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
break; break;
if (GET_MODE_SIZE (candidate) >= size if (GET_MODE_SIZE (candidate) >= size
&& (optab_handler (vec_duplicate_optab, candidate) && qi_vector_mode_supported_p (candidate, m_op))
!= CODE_FOR_nothing))
return candidate; return candidate;
} }
} }
@ -1451,7 +1477,7 @@ op_by_pieces_d::run ()
/* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */
fixed_size_mode mode fixed_size_mode mode
= widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); = widest_fixed_size_mode_for_size (m_max_size, m_op);
mode = get_usable_mode (mode, length); mode = get_usable_mode (mode, length);
by_pieces_prev to_prev = { nullptr, mode }; by_pieces_prev to_prev = { nullptr, mode };
@ -1516,8 +1542,7 @@ op_by_pieces_d::run ()
else else
{ {
/* widest_fixed_size_mode_for_size checks SIZE > 1. */ /* widest_fixed_size_mode_for_size checks SIZE > 1. */
mode = widest_fixed_size_mode_for_size (size, mode = widest_fixed_size_mode_for_size (size, m_op);
m_qi_vector_mode);
mode = get_usable_mode (mode, length); mode = get_usable_mode (mode, length);
} }
} }
@ -1543,7 +1568,7 @@ class move_by_pieces_d : public op_by_pieces_d
move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len,
unsigned int align) unsigned int align)
: op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL,
NULL, len, align, PUSHG_P (to)) NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES)
{ {
} }
rtx finish_retmode (memop_ret); rtx finish_retmode (memop_ret);
@ -1632,15 +1657,16 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
class store_by_pieces_d : public op_by_pieces_d class store_by_pieces_d : public op_by_pieces_d
{ {
insn_gen_fn m_gen_fun; insn_gen_fn m_gen_fun;
void generate (rtx, rtx, machine_mode) final override; void generate (rtx, rtx, machine_mode) final override;
bool prepare_mode (machine_mode, unsigned int) final override; bool prepare_mode (machine_mode, unsigned int) final override;
public: public:
store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data,
unsigned HOST_WIDE_INT len, unsigned int align, unsigned HOST_WIDE_INT len, unsigned int align,
bool qi_vector_mode) by_pieces_operation op)
: op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn,
cfn_data, len, align, false, qi_vector_mode) cfn_data, len, align, false, op)
{ {
} }
rtx finish_retmode (memop_ret); rtx finish_retmode (memop_ret);
@ -1729,8 +1755,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
max_size = STORE_MAX_PIECES + 1; max_size = STORE_MAX_PIECES + 1;
while (max_size > 1 && l > 0) while (max_size > 1 && l > 0)
{ {
fixed_size_mode mode auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES;
= widest_fixed_size_mode_for_size (max_size, memsetp); auto mode = widest_fixed_size_mode_for_size (max_size, op);
icode = optab_handler (mov_optab, mode); icode = optab_handler (mov_optab, mode);
if (icode != CODE_FOR_nothing if (icode != CODE_FOR_nothing
@ -1793,7 +1819,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
optimize_insn_for_speed_p ())); optimize_insn_for_speed_p ()));
store_by_pieces_d data (to, constfun, constfundata, len, align, store_by_pieces_d data (to, constfun, constfundata, len, align,
memsetp); memsetp ? SET_BY_PIECES : STORE_BY_PIECES);
data.run (); data.run ();
if (retmode != RETURN_BEGIN) if (retmode != RETURN_BEGIN)
@ -1814,7 +1840,7 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
/* Use builtin_memset_read_str to support vector mode broadcast. */ /* Use builtin_memset_read_str to support vector mode broadcast. */
char c = 0; char c = 0;
store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align,
true); CLEAR_BY_PIECES);
data.run (); data.run ();
} }
@ -1832,12 +1858,13 @@ class compare_by_pieces_d : public op_by_pieces_d
void generate (rtx, rtx, machine_mode) final override; void generate (rtx, rtx, machine_mode) final override;
bool prepare_mode (machine_mode, unsigned int) final override; bool prepare_mode (machine_mode, unsigned int) final override;
void finish_mode (machine_mode) final override; void finish_mode (machine_mode) final override;
public: public:
compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn,
void *op1_cfn_data, HOST_WIDE_INT len, int align, void *op1_cfn_data, HOST_WIDE_INT len, int align,
rtx_code_label *fail_label) rtx_code_label *fail_label)
: op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn, : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn,
op1_cfn_data, len, align, false) op1_cfn_data, len, align, false, COMPARE_BY_PIECES)
{ {
m_fail_label = fail_label; m_fail_label = fail_label;
} }