s390: Refactor block operation cpymem
Do not perform a libc function call into memcpy in case the size is not a compile-time constant but bounded and the upper bound is less than or equal to 256 bytes. gcc/ChangeLog: * config/s390/s390-protos.h (s390_expand_cpymem): Change function signature. * config/s390/s390.cc (s390_expand_cpymem): For memcpy's less than or equal to 256 byte do not perform a libc call. (s390_expand_insv): Adapt new function signature of s390_expand_cpymem. * config/s390/s390.md: Change expander into a version which takes 8 operands.
This commit is contained in:
parent
6c95fe9bc0
commit
0cc6d0e7e2
3 changed files with 74 additions and 22 deletions
|
@ -107,7 +107,7 @@ extern void s390_reload_symref_address (rtx , rtx , rtx , bool);
|
||||||
extern void s390_expand_plus_operand (rtx, rtx, rtx);
|
extern void s390_expand_plus_operand (rtx, rtx, rtx);
|
||||||
extern void emit_symbolic_move (rtx *);
|
extern void emit_symbolic_move (rtx *);
|
||||||
extern void s390_load_address (rtx, rtx);
|
extern void s390_load_address (rtx, rtx);
|
||||||
extern bool s390_expand_cpymem (rtx, rtx, rtx);
|
extern bool s390_expand_cpymem (rtx, rtx, rtx, rtx, rtx);
|
||||||
extern void s390_expand_setmem (rtx, rtx, rtx);
|
extern void s390_expand_setmem (rtx, rtx, rtx);
|
||||||
extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
|
extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
|
||||||
extern void s390_expand_vec_strlen (rtx, rtx, rtx);
|
extern void s390_expand_vec_strlen (rtx, rtx, rtx);
|
||||||
|
|
|
@ -5650,27 +5650,27 @@ legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
|
||||||
return NULL_RTX;
|
return NULL_RTX;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Emit code to move LEN bytes from DST to SRC. */
|
/* Emit code to move LEN bytes from SRC to DST. */
|
||||||
|
|
||||||
bool
|
bool
|
||||||
s390_expand_cpymem (rtx dst, rtx src, rtx len)
|
s390_expand_cpymem (rtx dst, rtx src, rtx len, rtx min_len_rtx, rtx max_len_rtx)
|
||||||
{
|
{
|
||||||
/* When tuning for z10 or higher we rely on the Glibc functions to
|
/* Exit early in case nothing has to be done. */
|
||||||
do the right thing. Only for constant lengths below 64k we will
|
if (CONST_INT_P (len) && UINTVAL (len) == 0)
|
||||||
generate inline code. */
|
return true;
|
||||||
if (s390_tune >= PROCESSOR_2097_Z10
|
|
||||||
&& (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
|
unsigned HOST_WIDE_INT min_len = UINTVAL (min_len_rtx);
|
||||||
return false;
|
unsigned HOST_WIDE_INT max_len
|
||||||
|
= max_len_rtx ? UINTVAL (max_len_rtx) : HOST_WIDE_INT_M1U;
|
||||||
|
|
||||||
/* Expand memcpy for constant length operands without a loop if it
|
/* Expand memcpy for constant length operands without a loop if it
|
||||||
is shorter that way.
|
is shorter that way.
|
||||||
|
|
||||||
With a constant length argument a
|
With a constant length argument a
|
||||||
memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
|
memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
|
||||||
if (GET_CODE (len) == CONST_INT
|
if (CONST_INT_P (len)
|
||||||
&& INTVAL (len) >= 0
|
&& UINTVAL (len) <= 6 * 256
|
||||||
&& INTVAL (len) <= 256 * 6
|
&& (!TARGET_MVCLE || UINTVAL (len) <= 256))
|
||||||
&& (!TARGET_MVCLE || INTVAL (len) <= 256))
|
|
||||||
{
|
{
|
||||||
HOST_WIDE_INT o, l;
|
HOST_WIDE_INT o, l;
|
||||||
|
|
||||||
|
@ -5681,14 +5681,57 @@ s390_expand_cpymem (rtx dst, rtx src, rtx len)
|
||||||
emit_insn (gen_cpymem_short (newdst, newsrc,
|
emit_insn (gen_cpymem_short (newdst, newsrc,
|
||||||
GEN_INT (l > 256 ? 255 : l - 1)));
|
GEN_INT (l > 256 ? 255 : l - 1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (TARGET_MVCLE)
|
else if (TARGET_MVCLE
|
||||||
|
&& (s390_tune < PROCESSOR_2097_Z10
|
||||||
|
|| (CONST_INT_P (len) && UINTVAL (len) <= (1 << 16))))
|
||||||
{
|
{
|
||||||
emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
|
emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
/* Non-constant length and no loop required. */
|
||||||
|
else if (!CONST_INT_P (len) && max_len <= 256)
|
||||||
|
{
|
||||||
|
rtx_code_label *end_label;
|
||||||
|
|
||||||
|
if (min_len == 0)
|
||||||
|
{
|
||||||
|
end_label = gen_label_rtx ();
|
||||||
|
emit_cmp_and_jump_insns (len, const0_rtx, EQ, NULL_RTX,
|
||||||
|
GET_MODE (len), 1, end_label,
|
||||||
|
profile_probability::very_unlikely ());
|
||||||
|
}
|
||||||
|
|
||||||
|
rtx lenm1 = expand_binop (GET_MODE (len), add_optab, len, constm1_rtx,
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
|
||||||
|
/* Prefer a vectorized implementation over one which makes use of an
|
||||||
|
execute instruction since it is faster (although it increases register
|
||||||
|
pressure). */
|
||||||
|
if (max_len <= 16 && TARGET_VX)
|
||||||
|
{
|
||||||
|
rtx tmp = gen_reg_rtx (V16QImode);
|
||||||
|
lenm1 = convert_to_mode (SImode, lenm1, 1);
|
||||||
|
emit_insn (gen_vllv16qi (tmp, lenm1, src));
|
||||||
|
emit_insn (gen_vstlv16qi (tmp, lenm1, dst));
|
||||||
|
}
|
||||||
|
else if (TARGET_Z15)
|
||||||
|
emit_insn (gen_mvcrl (dst, src, convert_to_mode (SImode, lenm1, 1)));
|
||||||
|
else
|
||||||
|
emit_insn (
|
||||||
|
gen_cpymem_short (dst, src, convert_to_mode (Pmode, lenm1, 1)));
|
||||||
|
|
||||||
|
if (min_len == 0)
|
||||||
|
emit_label (end_label);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (s390_tune < PROCESSOR_2097_Z10 || (CONST_INT_P (len) && UINTVAL (len) <= (1 << 16)))
|
||||||
{
|
{
|
||||||
rtx dst_addr, src_addr, count, blocks, temp;
|
rtx dst_addr, src_addr, count, blocks, temp;
|
||||||
rtx_code_label *loop_start_label = gen_label_rtx ();
|
rtx_code_label *loop_start_label = gen_label_rtx ();
|
||||||
|
@ -5706,8 +5749,9 @@ s390_expand_cpymem (rtx dst, rtx src, rtx len)
|
||||||
blocks = gen_reg_rtx (mode);
|
blocks = gen_reg_rtx (mode);
|
||||||
|
|
||||||
convert_move (count, len, 1);
|
convert_move (count, len, 1);
|
||||||
emit_cmp_and_jump_insns (count, const0_rtx,
|
if (min_len == 0)
|
||||||
EQ, NULL_RTX, mode, 1, end_label);
|
emit_cmp_and_jump_insns (count, const0_rtx, EQ, NULL_RTX, mode, 1,
|
||||||
|
end_label);
|
||||||
|
|
||||||
emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
|
emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
|
||||||
emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
|
emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
|
||||||
|
@ -5767,8 +5811,11 @@ s390_expand_cpymem (rtx dst, rtx src, rtx len)
|
||||||
emit_insn (gen_cpymem_short (dst, src,
|
emit_insn (gen_cpymem_short (dst, src,
|
||||||
convert_to_mode (Pmode, count, 1)));
|
convert_to_mode (Pmode, count, 1)));
|
||||||
emit_label (end_label);
|
emit_label (end_label);
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Emit code to set LEN bytes at DST to VAL.
|
/* Emit code to set LEN bytes at DST to VAL.
|
||||||
|
@ -6599,7 +6646,8 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
|
||||||
|
|
||||||
dest = adjust_address (dest, BLKmode, 0);
|
dest = adjust_address (dest, BLKmode, 0);
|
||||||
set_mem_size (dest, size);
|
set_mem_size (dest, size);
|
||||||
s390_expand_cpymem (dest, src_mem, GEN_INT (size));
|
rtx size_rtx = GEN_INT (size);
|
||||||
|
s390_expand_cpymem (dest, src_mem, size_rtx, size_rtx, size_rtx);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3341,11 +3341,15 @@
|
||||||
(define_expand "cpymem<mode>"
|
(define_expand "cpymem<mode>"
|
||||||
[(set (match_operand:BLK 0 "memory_operand" "") ; destination
|
[(set (match_operand:BLK 0 "memory_operand" "") ; destination
|
||||||
(match_operand:BLK 1 "memory_operand" "")) ; source
|
(match_operand:BLK 1 "memory_operand" "")) ; source
|
||||||
(use (match_operand:GPR 2 "general_operand" "")) ; count
|
(use (match_operand:GPR 2 "general_operand" "")) ; size
|
||||||
(match_operand 3 "" "")]
|
(match_operand 3 "") ; align
|
||||||
|
(match_operand 4 "") ; expected align
|
||||||
|
(match_operand 5 "") ; expected size
|
||||||
|
(match_operand 6 "") ; minimal size
|
||||||
|
(match_operand 7 "")] ; maximal size
|
||||||
""
|
""
|
||||||
{
|
{
|
||||||
if (s390_expand_cpymem (operands[0], operands[1], operands[2]))
|
if (s390_expand_cpymem (operands[0], operands[1], operands[2], operands[6], operands[7]))
|
||||||
DONE;
|
DONE;
|
||||||
else
|
else
|
||||||
FAIL;
|
FAIL;
|
||||||
|
|
Loading…
Add table
Reference in a new issue