re PR tree-optimization/52171 (memcmp/strcmp/strncmp can be optimized when the result is tested for [in]equality with 0)
PR tree-optimization/52171 * builtins.c (expand_cmpstrn_or_cmpmem): Delete, moved elsewhere. (expand_builtin_memcmp): New arg RESULT_EQ. All callers changed. Look for constant strings. Move some code to emit_block_cmp_hints and use it. * builtins.def (BUILT_IN_MEMCMP_EQ): New. * defaults.h (COMPARE_MAX_PIECES): New macro. * expr.c (move_by_pieces_d, store_by_pieces_d): Remove old structs. (move_by_pieces_1, store_by_pieces_1, store_by_pieces_2): Remvoe. (clear_by_pieces_1): Don't declare. Move definition before use. (can_do_by_pieces): New static function. (can_move_by_pieces): Use it. Return bool. (by_pieces_ninsns): Renamed from move_by_pieces_ninsns. New arg OP. All callers changed. Handle COMPARE_BY_PIECES. (class pieces_addr); New. (pieces_addr::pieces_addr, pieces_addr::decide_autoinc, pieces_addr::adjust, pieces_addr::increment_address, pieces_addr::maybe_predec, pieces_addr::maybe_postinc): New member functions for it. (class op_by_pieces_d): New. (op_by_pieces_d::op_by_pieces_d, op_by_pieces_d::run): New member functions for it. (class move_by_pieces_d, class compare_by_pieces_d, class store_by_pieces_d): New subclasses of op_by_pieces_d. (move_by_pieces_d::prepare_mode, move_by_pieces_d::generate, move_by_pieces_d::finish_endp, store_by_pieces_d::prepare_mode, store_by_pieces_d::generate, store_by_pieces_d::finish_endp, compare_by_pieces_d::generate, compare_by_pieces_d::prepare_mode, compare_by_pieces_d::finish_mode): New member functions. (compare_by_pieces, emit_block_cmp_via_cmpmem): New static functions. (expand_cmpstrn_or_cmpmem): Moved here from builtins.c. (emit_block_cmp_hints): New function. (move_by_pieces, store_by_pieces, clear_by_pieces): Rewrite to just use the newly defined classes. * expr.h (by_pieces_constfn): New typedef. (can_store_by_pieces, store_by_pieces): Use it in arg declarations. (emit_block_cmp_hints, expand_cmpstrn_or_cmpmem): Declare. (move_by_pieces_ninsns): Don't declare. (can_move_by_pieces): Change return value to bool. * target.def (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Update docs. (compare_by_pieces_branch_ratio): New hook. * target.h (enum by_pieces_operation): Add COMPARE_BY_PIECES. (by_pieces_ninsns): Declare. * targethooks.c (default_use_by_pieces_infrastructure_p): Handle COMPARE_BY_PIECES. (default_compare_by_pieces_branch_ratio): New function. * targhooks.h (default_compare_by_pieces_branch_ratio): Declare. * doc/tm.texi.in (STORE_MAX_PIECES, COMPARE_MAX_PIECES): Document. * doc/tm.texi: Regenerate. * tree-ssa-strlen.c: Include "builtins.h". (handle_builtin_memcmp): New static function. (strlen_optimize_stmt): Call it for BUILT_IN_MEMCMP. * tree.c (build_common_builtin_nodes): Create __builtin_memcmp_eq. testsuite/ PR tree-optimization/52171 * gcc.dg/pr52171.c: New test. * gcc.target/i386/pr52171.c: New test. From-SVN: r237069
This commit is contained in:
parent
bfeee8acaa
commit
36b85e4328
17 changed files with 1183 additions and 639 deletions
|
@ -1,3 +1,60 @@
|
|||
2016-06-03 Bernd Schmidt <bschmidt@redhat.com>
|
||||
|
||||
PR tree-optimization/52171
|
||||
* builtins.c (expand_cmpstrn_or_cmpmem): Delete, moved elsewhere.
|
||||
(expand_builtin_memcmp): New arg RESULT_EQ. All callers changed.
|
||||
Look for constant strings. Move some code to emit_block_cmp_hints
|
||||
and use it.
|
||||
* builtins.def (BUILT_IN_MEMCMP_EQ): New.
|
||||
* defaults.h (COMPARE_MAX_PIECES): New macro.
|
||||
* expr.c (move_by_pieces_d, store_by_pieces_d): Remove old structs.
|
||||
(move_by_pieces_1, store_by_pieces_1, store_by_pieces_2): Remvoe.
|
||||
(clear_by_pieces_1): Don't declare. Move definition before use.
|
||||
(can_do_by_pieces): New static function.
|
||||
(can_move_by_pieces): Use it. Return bool.
|
||||
(by_pieces_ninsns): Renamed from move_by_pieces_ninsns. New arg
|
||||
OP. All callers changed. Handle COMPARE_BY_PIECES.
|
||||
(class pieces_addr); New.
|
||||
(pieces_addr::pieces_addr, pieces_addr::decide_autoinc,
|
||||
pieces_addr::adjust, pieces_addr::increment_address,
|
||||
pieces_addr::maybe_predec, pieces_addr::maybe_postinc): New member
|
||||
functions for it.
|
||||
(class op_by_pieces_d): New.
|
||||
(op_by_pieces_d::op_by_pieces_d, op_by_pieces_d::run): New member
|
||||
functions for it.
|
||||
(class move_by_pieces_d, class compare_by_pieces_d,
|
||||
class store_by_pieces_d): New subclasses of op_by_pieces_d.
|
||||
(move_by_pieces_d::prepare_mode, move_by_pieces_d::generate,
|
||||
move_by_pieces_d::finish_endp, store_by_pieces_d::prepare_mode,
|
||||
store_by_pieces_d::generate, store_by_pieces_d::finish_endp,
|
||||
compare_by_pieces_d::generate, compare_by_pieces_d::prepare_mode,
|
||||
compare_by_pieces_d::finish_mode): New member functions.
|
||||
(compare_by_pieces, emit_block_cmp_via_cmpmem): New static
|
||||
functions.
|
||||
(expand_cmpstrn_or_cmpmem): Moved here from builtins.c.
|
||||
(emit_block_cmp_hints): New function.
|
||||
(move_by_pieces, store_by_pieces, clear_by_pieces): Rewrite to just
|
||||
use the newly defined classes.
|
||||
* expr.h (by_pieces_constfn): New typedef.
|
||||
(can_store_by_pieces, store_by_pieces): Use it in arg declarations.
|
||||
(emit_block_cmp_hints, expand_cmpstrn_or_cmpmem): Declare.
|
||||
(move_by_pieces_ninsns): Don't declare.
|
||||
(can_move_by_pieces): Change return value to bool.
|
||||
* target.def (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Update docs.
|
||||
(compare_by_pieces_branch_ratio): New hook.
|
||||
* target.h (enum by_pieces_operation): Add COMPARE_BY_PIECES.
|
||||
(by_pieces_ninsns): Declare.
|
||||
* targethooks.c (default_use_by_pieces_infrastructure_p): Handle
|
||||
COMPARE_BY_PIECES.
|
||||
(default_compare_by_pieces_branch_ratio): New function.
|
||||
* targhooks.h (default_compare_by_pieces_branch_ratio): Declare.
|
||||
* doc/tm.texi.in (STORE_MAX_PIECES, COMPARE_MAX_PIECES): Document.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* tree-ssa-strlen.c: Include "builtins.h".
|
||||
(handle_builtin_memcmp): New static function.
|
||||
(strlen_optimize_stmt): Call it for BUILT_IN_MEMCMP.
|
||||
* tree.c (build_common_builtin_nodes): Create __builtin_memcmp_eq.
|
||||
|
||||
2016-06-03 Alan Hayward <alan.hayward@arm.com>
|
||||
|
||||
* tree-vect-stmts.c (vect_stmt_relevant_p): Do not vectorize non live
|
||||
|
|
|
@ -3671,53 +3671,24 @@ expand_cmpstr (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
|
|||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
|
||||
ARG3_TYPE is the type of ARG3_RTX. Return the result rtx on success,
|
||||
otherwise return null. */
|
||||
|
||||
static rtx
|
||||
expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
|
||||
rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
|
||||
HOST_WIDE_INT align)
|
||||
{
|
||||
machine_mode insn_mode = insn_data[icode].operand[0].mode;
|
||||
|
||||
if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
|
||||
target = NULL_RTX;
|
||||
|
||||
struct expand_operand ops[5];
|
||||
create_output_operand (&ops[0], target, insn_mode);
|
||||
create_fixed_operand (&ops[1], arg1_rtx);
|
||||
create_fixed_operand (&ops[2], arg2_rtx);
|
||||
create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
|
||||
TYPE_UNSIGNED (arg3_type));
|
||||
create_integer_operand (&ops[4], align);
|
||||
if (maybe_expand_insn (icode, 5, ops))
|
||||
return ops[0].value;
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Expand expression EXP, which is a call to the memcmp built-in function.
|
||||
Return NULL_RTX if we failed and the caller should emit a normal call,
|
||||
otherwise try to get the result in TARGET, if convenient. */
|
||||
otherwise try to get the result in TARGET, if convenient.
|
||||
RESULT_EQ is true if we can relax the returned value to be either zero
|
||||
or nonzero, without caring about the sign. */
|
||||
|
||||
static rtx
|
||||
expand_builtin_memcmp (tree exp, rtx target)
|
||||
expand_builtin_memcmp (tree exp, rtx target, bool result_eq)
|
||||
{
|
||||
if (!validate_arglist (exp,
|
||||
POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
|
||||
return NULL_RTX;
|
||||
|
||||
/* Note: The cmpstrnsi pattern, if it exists, is not suitable for
|
||||
implementing memcmp because it will stop if it encounters two
|
||||
zero bytes. */
|
||||
insn_code icode = direct_optab_handler (cmpmem_optab, SImode);
|
||||
if (icode == CODE_FOR_nothing)
|
||||
return NULL_RTX;
|
||||
|
||||
tree arg1 = CALL_EXPR_ARG (exp, 0);
|
||||
tree arg2 = CALL_EXPR_ARG (exp, 1);
|
||||
tree len = CALL_EXPR_ARG (exp, 2);
|
||||
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
|
||||
location_t loc = EXPR_LOCATION (exp);
|
||||
|
||||
unsigned int arg1_align = get_pointer_alignment (arg1) / BITS_PER_UNIT;
|
||||
unsigned int arg2_align = get_pointer_alignment (arg2) / BITS_PER_UNIT;
|
||||
|
@ -3726,22 +3697,38 @@ expand_builtin_memcmp (tree exp, rtx target)
|
|||
if (arg1_align == 0 || arg2_align == 0)
|
||||
return NULL_RTX;
|
||||
|
||||
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
|
||||
location_t loc = EXPR_LOCATION (exp);
|
||||
rtx arg1_rtx = get_memory_rtx (arg1, len);
|
||||
rtx arg2_rtx = get_memory_rtx (arg2, len);
|
||||
rtx arg3_rtx = expand_normal (fold_convert_loc (loc, sizetype, len));
|
||||
rtx len_rtx = expand_normal (fold_convert_loc (loc, sizetype, len));
|
||||
|
||||
/* Set MEM_SIZE as appropriate. */
|
||||
if (CONST_INT_P (arg3_rtx))
|
||||
if (CONST_INT_P (len_rtx))
|
||||
{
|
||||
set_mem_size (arg1_rtx, INTVAL (arg3_rtx));
|
||||
set_mem_size (arg2_rtx, INTVAL (arg3_rtx));
|
||||
set_mem_size (arg1_rtx, INTVAL (len_rtx));
|
||||
set_mem_size (arg2_rtx, INTVAL (len_rtx));
|
||||
}
|
||||
|
||||
rtx result = expand_cmpstrn_or_cmpmem (icode, target, arg1_rtx, arg2_rtx,
|
||||
TREE_TYPE (len), arg3_rtx,
|
||||
MIN (arg1_align, arg2_align));
|
||||
by_pieces_constfn constfn = NULL;
|
||||
|
||||
const char *src_str = c_getstr (arg1);
|
||||
if (src_str == NULL)
|
||||
src_str = c_getstr (arg2);
|
||||
else
|
||||
std::swap (arg1_rtx, arg2_rtx);
|
||||
|
||||
/* If SRC is a string constant and block move would be done
|
||||
by pieces, we can avoid loading the string from memory
|
||||
and only stored the computed constants. */
|
||||
if (src_str
|
||||
&& CONST_INT_P (len_rtx)
|
||||
&& (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1)
|
||||
constfn = builtin_memcpy_read_str;
|
||||
|
||||
rtx result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx,
|
||||
TREE_TYPE (len), target,
|
||||
result_eq, constfn,
|
||||
CONST_CAST (char *, src_str));
|
||||
|
||||
if (result)
|
||||
{
|
||||
/* Return the value in the proper mode for this function. */
|
||||
|
@ -6073,9 +6060,15 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
|
|||
|
||||
case BUILT_IN_BCMP:
|
||||
case BUILT_IN_MEMCMP:
|
||||
target = expand_builtin_memcmp (exp, target);
|
||||
case BUILT_IN_MEMCMP_EQ:
|
||||
target = expand_builtin_memcmp (exp, target, fcode == BUILT_IN_MEMCMP_EQ);
|
||||
if (target)
|
||||
return target;
|
||||
if (fcode == BUILT_IN_MEMCMP_EQ)
|
||||
{
|
||||
tree newdecl = builtin_decl_explicit (BUILT_IN_MEMCMP);
|
||||
TREE_OPERAND (exp, 1) = build_fold_addr_expr (newdecl);
|
||||
}
|
||||
break;
|
||||
|
||||
case BUILT_IN_SETJMP:
|
||||
|
|
|
@ -864,6 +864,10 @@ DEF_BUILTIN_STUB (BUILT_IN_STACK_SAVE, "__builtin_stack_save")
|
|||
DEF_BUILTIN_STUB (BUILT_IN_STACK_RESTORE, "__builtin_stack_restore")
|
||||
DEF_BUILTIN_STUB (BUILT_IN_ALLOCA_WITH_ALIGN, "__builtin_alloca_with_align")
|
||||
|
||||
/* An internal version of memcmp, used when the result is only tested for
|
||||
equality with zero. */
|
||||
DEF_BUILTIN_STUB (BUILT_IN_MEMCMP_EQ, "__builtin_memcmp_eq")
|
||||
|
||||
/* Object size checking builtins. */
|
||||
DEF_GCC_BUILTIN (BUILT_IN_OBJECT_SIZE, "object_size", BT_FN_SIZE_CONST_PTR_INT, ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
DEF_EXT_LIB_BUILTIN_CHKP (BUILT_IN_MEMCPY_CHK, "__memcpy_chk", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, ATTR_RET1_NOTHROW_NONNULL_LEAF)
|
||||
|
|
|
@ -1039,6 +1039,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|||
#define STORE_MAX_PIECES MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
|
||||
#endif
|
||||
|
||||
/* Likewise for block comparisons. */
|
||||
#ifndef COMPARE_MAX_PIECES
|
||||
#define COMPARE_MAX_PIECES MOVE_MAX_PIECES
|
||||
#endif
|
||||
|
||||
#ifndef MAX_MOVE_MAX
|
||||
#define MAX_MOVE_MAX MOVE_MAX
|
||||
#endif
|
||||
|
|
|
@ -6315,8 +6315,9 @@ Both @var{size} and @var{alignment} are measured in terms of storage
|
|||
units.
|
||||
|
||||
The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},
|
||||
@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.
|
||||
These describe the type of memory operation under consideration.
|
||||
@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES} or
|
||||
@code{COMPARE_BY_PIECES}. These describe the type of memory operation
|
||||
under consideration.
|
||||
|
||||
The parameter @var{speed_p} is true if the code is currently being
|
||||
optimized for speed rather than size.
|
||||
|
@ -6333,11 +6334,33 @@ in code size, for example where the number of insns emitted to perform a
|
|||
move would be greater than that of a library call.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_COMPARE_BY_PIECES_BRANCH_RATIO (machine_mode @var{mode})
|
||||
When expanding a block comparison in MODE, gcc can try to reduce the
|
||||
number of branches at the expense of more memory operations. This hook
|
||||
allows the target to override the default choice. It should return the
|
||||
factor by which branches should be reduced over the plain expansion with
|
||||
one comparison per @var{mode}-sized piece. A port can also prevent a
|
||||
particular mode from being used for block comparisons by returning a
|
||||
negative number from this hook.
|
||||
@end deftypefn
|
||||
|
||||
@defmac MOVE_MAX_PIECES
|
||||
A C expression used by @code{move_by_pieces} to determine the largest unit
|
||||
a load or store used to copy memory is. Defaults to @code{MOVE_MAX}.
|
||||
@end defmac
|
||||
|
||||
@defmac STORE_MAX_PIECES
|
||||
A C expression used by @code{store_by_pieces} to determine the largest unit
|
||||
a store used to memory is. Defaults to @code{MOVE_MAX_PIECES}, or two times
|
||||
the size of @code{HOST_WIDE_INT}, whichever is smaller.
|
||||
@end defmac
|
||||
|
||||
@defmac COMPARE_MAX_PIECES
|
||||
A C expression used by @code{compare_by_pieces} to determine the largest unit
|
||||
a load or store used to compare memory is. Defaults to
|
||||
@code{MOVE_MAX_PIECES}.
|
||||
@end defmac
|
||||
|
||||
@defmac CLEAR_RATIO (@var{speed})
|
||||
The threshold of number of scalar move insns, @emph{below} which a sequence
|
||||
of insns should be generated to clear memory instead of a string clear insn
|
||||
|
|
|
@ -4653,11 +4653,25 @@ If you don't define this, a reasonable default is used.
|
|||
|
||||
@hook TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
|
||||
|
||||
@hook TARGET_COMPARE_BY_PIECES_BRANCH_RATIO
|
||||
|
||||
@defmac MOVE_MAX_PIECES
|
||||
A C expression used by @code{move_by_pieces} to determine the largest unit
|
||||
a load or store used to copy memory is. Defaults to @code{MOVE_MAX}.
|
||||
@end defmac
|
||||
|
||||
@defmac STORE_MAX_PIECES
|
||||
A C expression used by @code{store_by_pieces} to determine the largest unit
|
||||
a store used to memory is. Defaults to @code{MOVE_MAX_PIECES}, or two times
|
||||
the size of @code{HOST_WIDE_INT}, whichever is smaller.
|
||||
@end defmac
|
||||
|
||||
@defmac COMPARE_MAX_PIECES
|
||||
A C expression used by @code{compare_by_pieces} to determine the largest unit
|
||||
a load or store used to compare memory is. Defaults to
|
||||
@code{MOVE_MAX_PIECES}.
|
||||
@end defmac
|
||||
|
||||
@defmac CLEAR_RATIO (@var{speed})
|
||||
The threshold of number of scalar move insns, @emph{below} which a sequence
|
||||
of insns should be generated to clear memory instead of a string clear insn
|
||||
|
|
1398
gcc/expr.c
1398
gcc/expr.c
File diff suppressed because it is too large
Load diff
21
gcc/expr.h
21
gcc/expr.h
|
@ -103,12 +103,16 @@ enum block_op_methods
|
|||
BLOCK_OP_TAILCALL
|
||||
};
|
||||
|
||||
typedef rtx (*by_pieces_constfn) (void *, HOST_WIDE_INT, machine_mode);
|
||||
|
||||
extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
|
||||
extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
|
||||
unsigned int, HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT);
|
||||
extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool,
|
||||
by_pieces_constfn, void *);
|
||||
extern bool emit_storent_insn (rtx to, rtx from);
|
||||
|
||||
/* Copy all or part of a value X into registers starting at REGNO.
|
||||
|
@ -173,6 +177,11 @@ extern void use_regs (rtx *, int, int);
|
|||
/* Mark a PARALLEL as holding a parameter for the next CALL_INSN. */
|
||||
extern void use_group_regs (rtx *, rtx);
|
||||
|
||||
#ifdef GCC_INSN_CODES_H
|
||||
extern rtx expand_cmpstrn_or_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx,
|
||||
HOST_WIDE_INT);
|
||||
#endif
|
||||
|
||||
/* Write zeros through the storage of OBJECT.
|
||||
If OBJECT has BLKmode, SIZE is its length in bytes. */
|
||||
extern rtx clear_storage (rtx, rtx, enum block_op_methods);
|
||||
|
@ -191,10 +200,6 @@ extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
|
|||
unsigned HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT);
|
||||
|
||||
extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
|
||||
unsigned int,
|
||||
unsigned int);
|
||||
|
||||
/* Return nonzero if it is desirable to store LEN bytes generated by
|
||||
CONSTFUN with several move instructions by store_by_pieces
|
||||
function. CONSTFUNDATA is a pointer which will be passed as argument
|
||||
|
@ -203,8 +208,7 @@ extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
|
|||
MEMSETP is true if this is a real memset/bzero, not a copy
|
||||
of a const string. */
|
||||
extern int can_store_by_pieces (unsigned HOST_WIDE_INT,
|
||||
rtx (*) (void *, HOST_WIDE_INT,
|
||||
machine_mode),
|
||||
by_pieces_constfn,
|
||||
void *, unsigned int, bool);
|
||||
|
||||
/* Generate several move instructions to store LEN bytes generated by
|
||||
|
@ -213,8 +217,7 @@ extern int can_store_by_pieces (unsigned HOST_WIDE_INT,
|
|||
ALIGN is maximum alignment we can assume.
|
||||
MEMSETP is true if this is a real memset/bzero, not a copy.
|
||||
Returns TO + LEN. */
|
||||
extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT,
|
||||
rtx (*) (void *, HOST_WIDE_INT, machine_mode),
|
||||
extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT, by_pieces_constfn,
|
||||
void *, unsigned int, bool, int);
|
||||
|
||||
/* Emit insns to set X from Y. */
|
||||
|
@ -295,7 +298,7 @@ rtx get_personality_function (tree);
|
|||
/* Determine whether the LEN bytes can be moved by using several move
|
||||
instructions. Return nonzero if a call to move_by_pieces should
|
||||
succeed. */
|
||||
extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
|
||||
extern bool can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
|
||||
|
||||
extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
|
||||
|
||||
|
|
|
@ -3397,8 +3397,9 @@ Both @var{size} and @var{alignment} are measured in terms of storage\n\
|
|||
units.\n\
|
||||
\n\
|
||||
The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},\n\
|
||||
@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.\n\
|
||||
These describe the type of memory operation under consideration.\n\
|
||||
@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES} or\n\
|
||||
@code{COMPARE_BY_PIECES}. These describe the type of memory operation\n\
|
||||
under consideration.\n\
|
||||
\n\
|
||||
The parameter @var{speed_p} is true if the code is currently being\n\
|
||||
optimized for speed rather than size.\n\
|
||||
|
@ -3417,6 +3418,18 @@ move would be greater than that of a library call.",
|
|||
enum by_pieces_operation op, bool speed_p),
|
||||
default_use_by_pieces_infrastructure_p)
|
||||
|
||||
DEFHOOK
|
||||
(compare_by_pieces_branch_ratio,
|
||||
"When expanding a block comparison in MODE, gcc can try to reduce the\n\
|
||||
number of branches at the expense of more memory operations. This hook\n\
|
||||
allows the target to override the default choice. It should return the\n\
|
||||
factor by which branches should be reduced over the plain expansion with\n\
|
||||
one comparison per @var{mode}-sized piece. A port can also prevent a\n\
|
||||
particular mode from being used for block comparisons by returning a\n\
|
||||
negative number from this hook.",
|
||||
int, (machine_mode mode),
|
||||
default_compare_by_pieces_branch_ratio)
|
||||
|
||||
DEFHOOK
|
||||
(optab_supported_p,
|
||||
"Return true if the optimizers should use optab @var{op} with\n\
|
||||
|
|
11
gcc/target.h
11
gcc/target.h
|
@ -79,16 +79,23 @@ enum print_switch_type
|
|||
};
|
||||
|
||||
/* Types of memory operation understood by the "by_pieces" infrastructure.
|
||||
Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook. */
|
||||
Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook and
|
||||
internally by the functions in expr.c. */
|
||||
|
||||
enum by_pieces_operation
|
||||
{
|
||||
CLEAR_BY_PIECES,
|
||||
MOVE_BY_PIECES,
|
||||
SET_BY_PIECES,
|
||||
STORE_BY_PIECES
|
||||
STORE_BY_PIECES,
|
||||
COMPARE_BY_PIECES
|
||||
};
|
||||
|
||||
extern unsigned HOST_WIDE_INT by_pieces_ninsns (unsigned HOST_WIDE_INT,
|
||||
unsigned int,
|
||||
unsigned int,
|
||||
by_pieces_operation);
|
||||
|
||||
typedef int (* print_switch_fn_type) (print_switch_type, const char *);
|
||||
|
||||
/* An example implementation for ELF targets. Defined in varasm.c */
|
||||
|
|
|
@ -1482,25 +1482,40 @@ default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
|
|||
|
||||
switch (op)
|
||||
{
|
||||
case CLEAR_BY_PIECES:
|
||||
max_size = STORE_MAX_PIECES;
|
||||
ratio = CLEAR_RATIO (speed_p);
|
||||
break;
|
||||
case MOVE_BY_PIECES:
|
||||
max_size = MOVE_MAX_PIECES;
|
||||
ratio = get_move_ratio (speed_p);
|
||||
break;
|
||||
case SET_BY_PIECES:
|
||||
max_size = STORE_MAX_PIECES;
|
||||
ratio = SET_RATIO (speed_p);
|
||||
break;
|
||||
case STORE_BY_PIECES:
|
||||
max_size = STORE_MAX_PIECES;
|
||||
ratio = get_move_ratio (speed_p);
|
||||
break;
|
||||
case CLEAR_BY_PIECES:
|
||||
max_size = STORE_MAX_PIECES;
|
||||
ratio = CLEAR_RATIO (speed_p);
|
||||
break;
|
||||
case MOVE_BY_PIECES:
|
||||
max_size = MOVE_MAX_PIECES;
|
||||
ratio = get_move_ratio (speed_p);
|
||||
break;
|
||||
case SET_BY_PIECES:
|
||||
max_size = STORE_MAX_PIECES;
|
||||
ratio = SET_RATIO (speed_p);
|
||||
break;
|
||||
case STORE_BY_PIECES:
|
||||
max_size = STORE_MAX_PIECES;
|
||||
ratio = get_move_ratio (speed_p);
|
||||
break;
|
||||
case COMPARE_BY_PIECES:
|
||||
max_size = COMPARE_MAX_PIECES;
|
||||
/* Pick a likely default, just as in get_move_ratio. */
|
||||
ratio = speed_p ? 15 : 3;
|
||||
break;
|
||||
}
|
||||
|
||||
return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
|
||||
return by_pieces_ninsns (size, alignment, max_size + 1, op) < ratio;
|
||||
}
|
||||
|
||||
/* This hook controls code generation for expanding a memcmp operation by
|
||||
pieces. Return 1 for the normal pattern of compare/jump after each pair
|
||||
of loads, or a higher number to reduce the number of branches. */
|
||||
|
||||
int
|
||||
default_compare_by_pieces_branch_ratio (machine_mode)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
@ -199,6 +199,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
|
|||
unsigned int,
|
||||
enum by_pieces_operation,
|
||||
bool);
|
||||
extern int default_compare_by_pieces_branch_ratio (machine_mode);
|
||||
|
||||
extern bool default_profile_before_prologue (void);
|
||||
extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2016-06-03 Bernd Schmidt <bschmidt@redhat.com>
|
||||
|
||||
PR tree-optimization/52171
|
||||
* gcc.dg/pr52171.c: New test.
|
||||
* gcc.target/i386/pr52171.c: New test.
|
||||
|
||||
2016-06-03 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* g++.dg/tree-ssa/pred-1.C: New testcase
|
||||
|
|
12
gcc/testsuite/gcc.dg/pr52171.c
Normal file
12
gcc/testsuite/gcc.dg/pr52171.c
Normal file
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-final { scan-assembler-not "memcmp" } } */
|
||||
#include <string.h>
|
||||
struct A { int x; } a, b;
|
||||
|
||||
extern char s[], t[];
|
||||
|
||||
int foo ()
|
||||
{
|
||||
return memcmp (&a, &b, sizeof (struct A)) == 0;
|
||||
}
|
23
gcc/testsuite/gcc.target/i386/pr52171.c
Normal file
23
gcc/testsuite/gcc.target/i386/pr52171.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-final { scan-assembler-not "memcmp" } } */
|
||||
/* { dg-final { scan-assembler "1752394086" } } */
|
||||
|
||||
/* This should turn into four compare/jump pairs with -m32, within the
|
||||
limit of what the tuning considers acceptable for -O2. */
|
||||
int cmp (char *p, char *q)
|
||||
{
|
||||
char *pa = __builtin_assume_aligned (p, 4);
|
||||
char *qa = __builtin_assume_aligned (q, 4);
|
||||
if (__builtin_memcmp (pa, qa, 16) != 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
/* Since we have fast unaligned access, we should make a single
|
||||
constant comparison. The constant becomes 1752394086. */
|
||||
int cmp2 (char *p)
|
||||
{
|
||||
if (__builtin_memcmp (p, "fish", 4) != 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
|
@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "params.h"
|
||||
#include "ipa-chkp.h"
|
||||
#include "tree-hash-traits.h"
|
||||
#include "builtins.h"
|
||||
|
||||
/* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
|
||||
is an index into strinfo vector, negative value stands for
|
||||
|
@ -1843,6 +1844,88 @@ handle_builtin_memset (gimple_stmt_iterator *gsi)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Handle a call to memcmp. We try to handle small comparisons by
|
||||
converting them to load and compare, and replacing the call to memcmp
|
||||
with a __builtin_memcmp_eq call where possible. */
|
||||
|
||||
static bool
|
||||
handle_builtin_memcmp (gimple_stmt_iterator *gsi)
|
||||
{
|
||||
gcall *stmt2 = as_a <gcall *> (gsi_stmt (*gsi));
|
||||
tree res = gimple_call_lhs (stmt2);
|
||||
tree arg1 = gimple_call_arg (stmt2, 0);
|
||||
tree arg2 = gimple_call_arg (stmt2, 1);
|
||||
tree len = gimple_call_arg (stmt2, 2);
|
||||
unsigned HOST_WIDE_INT leni;
|
||||
use_operand_p use_p;
|
||||
imm_use_iterator iter;
|
||||
|
||||
if (!res)
|
||||
return true;
|
||||
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, res)
|
||||
{
|
||||
gimple *ustmt = USE_STMT (use_p);
|
||||
|
||||
if (gimple_code (ustmt) == GIMPLE_ASSIGN)
|
||||
{
|
||||
gassign *asgn = as_a <gassign *> (ustmt);
|
||||
tree_code code = gimple_assign_rhs_code (asgn);
|
||||
if ((code != EQ_EXPR && code != NE_EXPR)
|
||||
|| !integer_zerop (gimple_assign_rhs2 (asgn)))
|
||||
return true;
|
||||
}
|
||||
else if (gimple_code (ustmt) == GIMPLE_COND)
|
||||
{
|
||||
tree_code code = gimple_cond_code (ustmt);
|
||||
if ((code != EQ_EXPR && code != NE_EXPR)
|
||||
|| !integer_zerop (gimple_cond_rhs (ustmt)))
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
if (tree_fits_uhwi_p (len)
|
||||
&& (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
|
||||
&& exact_log2 (leni) != -1)
|
||||
{
|
||||
leni *= CHAR_TYPE_SIZE;
|
||||
unsigned align1 = get_pointer_alignment (arg1);
|
||||
unsigned align2 = get_pointer_alignment (arg2);
|
||||
unsigned align = MIN (align1, align2);
|
||||
machine_mode mode = mode_for_size (leni, MODE_INT, 1);
|
||||
if (mode != BLKmode
|
||||
&& (align >= leni || !SLOW_UNALIGNED_ACCESS (mode, align)))
|
||||
{
|
||||
location_t loc = gimple_location (stmt2);
|
||||
tree type, off;
|
||||
type = build_nonstandard_integer_type (leni, 1);
|
||||
gcc_assert (GET_MODE_BITSIZE (TYPE_MODE (type)) == leni);
|
||||
tree ptrtype = build_pointer_type_for_mode (char_type_node,
|
||||
ptr_mode, true);
|
||||
off = build_int_cst (ptrtype, 0);
|
||||
arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
|
||||
arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
|
||||
tree tem1 = fold_const_aggregate_ref (arg1);
|
||||
if (tem1)
|
||||
arg1 = tem1;
|
||||
tree tem2 = fold_const_aggregate_ref (arg2);
|
||||
if (tem2)
|
||||
arg2 = tem2;
|
||||
res = fold_convert_loc (loc, TREE_TYPE (res),
|
||||
fold_build2_loc (loc, NE_EXPR,
|
||||
boolean_type_node,
|
||||
arg1, arg2));
|
||||
gimplify_and_update_call_from_tree (gsi, res);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
gimple_call_set_fndecl (stmt2, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Handle a POINTER_PLUS_EXPR statement.
|
||||
For p = "abcd" + 2; compute associated length, or if
|
||||
p = q + off is pointing to a '\0' character of a string, call
|
||||
|
@ -2100,6 +2183,10 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
|
|||
if (!handle_builtin_memset (gsi))
|
||||
return false;
|
||||
break;
|
||||
case BUILT_IN_MEMCMP:
|
||||
if (!handle_builtin_memcmp (gsi))
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -10601,6 +10601,13 @@ build_common_builtin_nodes (void)
|
|||
BUILT_IN_STACK_RESTORE,
|
||||
"__builtin_stack_restore", ECF_NOTHROW | ECF_LEAF);
|
||||
|
||||
ftype = build_function_type_list (integer_type_node, const_ptr_type_node,
|
||||
const_ptr_type_node, size_type_node,
|
||||
NULL_TREE);
|
||||
local_define_builtin ("__builtin_memcmp_eq", ftype, BUILT_IN_MEMCMP_EQ,
|
||||
"__builtin_memcmp_eq",
|
||||
ECF_PURE | ECF_NOTHROW | ECF_LEAF);
|
||||
|
||||
/* If there's a possibility that we might use the ARM EABI, build the
|
||||
alternate __cxa_end_cleanup node used to resume from C++ and Java. */
|
||||
if (targetm.arm_eabi_unwinder)
|
||||
|
|
Loading…
Add table
Reference in a new issue