i386: Double-word sign-extension missed-optimization [PR110717]

When sign-extending the value in a double-word register pair using shift and
ashiftrt sequence with the same count immediate value less than word width,
there is no need to shift the lower word of the value. The sign-extension
could be limited to the upper word, but we uselessly shift the lower word
with it as well:
	movq	%rdi, %rax
	movq	%rsi, %rdx
	shldq	$59, %rdi, %rdx
	salq	$59, %rax
	shrdq	$59, %rdx, %rax
	sarq	$59, %rdx
	ret
for -m64 and
	movl	4(%esp), %eax
	movl	8(%esp), %edx
	shldl	$27, %eax, %edx
	sall	$27, %eax
	shrdl	$27, %edx, %eax
	sarl	$27, %edx
	ret
for -m32.

The patch introduces a new post-reload splitter to provide the combined
ASHIFTRT/SHIFT instruction pattern.  The instruction is split to a sequence
of SAL and SAR insns with the same count immediate operand:
	movq    %rsi, %rdx
	movq    %rdi, %rax
	salq    $59, %rdx
	sarq    $59, %rdx
	ret

Some complication is required to properly handle STV transform, where we
emit a sequence with DImode PSLLQ and PSRAQ insns for 32-bit AVX512VL
targets when profitable.

The patch also fixes a small oversight and enables STV transform of SImode
ASHIFTRT to PSRAD also for SSE2 targets.

	PR target/110717

gcc/ChangeLog:

	* config/i386/i386-features.cc
	(general_scalar_chain::compute_convert_gain): Calculate gain
	for extend higpart case.
	(general_scalar_chain::convert_op): Handle
	ASHIFTRT/ASHIFT combined RTX.
	(general_scalar_to_vector_candidate_p): Enable ASHIFTRT for
	SImode for SSE2 targets.  Handle ASHIFTRT/ASHIFT combined RTX.
	* config/i386/i386.md (*extend<dwi>2_doubleword_highpart):
	New define_insn_and_split pattern.
	(*extendv2di2_highpart_stv): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110717.c: New test.
This commit is contained in:
Uros Bizjak 2023-07-20 20:54:51 +02:00
parent 4b8878fbf7
commit b50a851eef
3 changed files with 70 additions and 2 deletions

View file

@ -572,6 +572,9 @@ general_scalar_chain::compute_convert_gain ()
{
if (INTVAL (XEXP (src, 1)) >= 32)
igain += ix86_cost->add;
/* Gain for extend highpart case. */
else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
igain += ix86_cost->shift_const - ix86_cost->sse_op;
else
igain += ix86_cost->shift_const;
}
@ -951,7 +954,8 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
{
*op = copy_rtx_if_shared (*op);
if (GET_CODE (*op) == NOT)
if (GET_CODE (*op) == NOT
|| GET_CODE (*op) == ASHIFT)
{
convert_op (&XEXP (*op, 0), insn);
PUT_MODE (*op, vmode);
@ -2120,7 +2124,7 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
switch (GET_CODE (src))
{
case ASHIFTRT:
if (!TARGET_AVX512VL)
if (mode == DImode && !TARGET_AVX512VL)
return false;
/* FALLTHRU */
@ -2131,6 +2135,14 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
if (!CONST_INT_P (XEXP (src, 1))
|| !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
return false;
/* Check for extend highpart case. */
if (mode != DImode
|| GET_CODE (src) != ASHIFTRT
|| GET_CODE (XEXP (src, 0)) != ASHIFT)
break;
src = XEXP (src, 0);
break;
case SMAX:

View file

@ -15292,6 +15292,41 @@
(const_string "0")
(const_string "*")))
(set_attr "mode" "QI")])
(define_insn_and_split "*extend<dwi>2_doubleword_highpart"
[(set (match_operand:<DWI> 0 "register_operand" "=r")
(ashiftrt:<DWI>
(ashift:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")
(match_operand:QI 2 "const_int_operand"))
(match_operand:QI 3 "const_int_operand")))
(clobber (reg:CC FLAGS_REG))]
"INTVAL (operands[2]) == INTVAL (operands[3])
&& UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
"#"
"&& reload_completed"
[(parallel [(set (match_dup 4)
(ashift:DWIH (match_dup 4) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])
(parallel [(set (match_dup 4)
(ashiftrt:DWIH (match_dup 4) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[4]);")
(define_insn_and_split "*extendv2di2_highpart_stv"
[(set (match_operand:V2DI 0 "register_operand" "=v")
(ashiftrt:V2DI
(ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
(match_operand:QI 2 "const_int_operand"))
(match_operand:QI 3 "const_int_operand")))]
"!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
&& INTVAL (operands[2]) == INTVAL (operands[3])
&& UINTVAL (operands[2]) < 32"
"#"
"&& reload_completed"
[(set (match_dup 0)
(ashift:V2DI (match_dup 1) (match_dup 2)))
(set (match_dup 0)
(ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
;; Rotate instructions

View file

@ -0,0 +1,21 @@
/* PR target/110717 */
/* { dg-do compile } */
/* { dg-options "-O2" } */
#ifdef __SIZEOF_INT128__
unsigned __int128
foo (unsigned __int128 x)
{
x <<= 59;
return ((__int128) x) >> 59;
}
#else
unsigned long long
foo (unsigned long long x)
{
x <<= 27;
return ((long long) x) >> 27;
}
#endif
/* { dg-final { scan-assembler-not "sh\[lr\]d" } } */