ARC: Improve DImode left shift by a single bit.

This patch improves the code generated for x << 1 (and for x + x) when
X is 64-bit DImode, using the same two instruction code sequence used
for DImode addition.

For the test case:

long long foo(long long x) { return x << 1; }

GCC -O2 currently generates the following code:

foo:    lsr     r2,r0,31
        asl_s   r1,r1,1
        asl_s   r0,r0,1
        j_s.d   [blink]
        or_s    r1,r1,r2

and on CPU without a barrel shifter, i.e. -mcpu=em

foo:	add.f   0,r0,r0
        asl_s   r1,r1
        rlc     r2,0
        asl_s   r0,r0
        j_s.d   [blink]
        or_s    r1,r1,r2

with this patch (both with and without a barrel shifter):

foo:	add.f   r0,r0,r0
        j_s.d   [blink]
        adc     r1,r1,r1

A similar optimization is also applicable to H8300H, that could also use
a two instruction sequence (plus rts) but currently GCC generates 16
instructions (plus an rts) for foo above.

2023-11-03  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/arc/arc.md (addsi3): Fix GNU-style code formatting.
	(adddi3): Change define_expand to generate a *adddi3.
	(*adddi3): New define_insn_and_split to lower DImode additions
	during the split1 pass (after combine and before reload).
	(ashldi3): New define_expand to (only) generate *ashldi3_cnt1
	for DImode left shifts by a single bit.
	(*ashldi3_cnt1): New define_insn_and_split to lower DImode
	left shifts by one bit to an *adddi3.

gcc/testsuite/ChangeLog
	* gcc.target/arc/adddi3-1.c: New test case.
	* gcc.target/arc/ashldi3-1.c: Likewise.
This commit is contained in:
Roger Sayle 2023-11-03 14:32:26 +00:00
parent eb83605be3
commit b16845b30c
3 changed files with 66 additions and 9 deletions

View file

@ -2675,19 +2675,28 @@ archs4x, archs4xd"
(plus:SI (match_operand:SI 1 "register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")))]
""
"if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false))
{
operands[2]=force_reg(SImode, operands[2]);
}
")
{
if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false))
operands[2] = force_reg (SImode, operands[2]);
})
(define_expand "adddi3"
[(parallel
[(set (match_operand:DI 0 "register_operand" "")
(plus:DI (match_operand:DI 1 "register_operand" "")
(match_operand:DI 2 "nonmemory_operand" "")))
(clobber (reg:CC CC_REG))])])
(define_insn_and_split "*adddi3"
[(set (match_operand:DI 0 "register_operand" "")
(plus:DI (match_operand:DI 1 "register_operand" "")
(match_operand:DI 2 "nonmemory_operand" "")))
(clobber (reg:CC CC_REG))]
""
"
"arc_pre_reload_split ()"
"#"
"&& 1"
[(const_int 0)]
{
rtx l0 = gen_lowpart (SImode, operands[0]);
rtx h0 = gen_highpart (SImode, operands[0]);
rtx l1 = gen_lowpart (SImode, operands[1]);
@ -2719,11 +2728,12 @@ archs4x, archs4xd"
gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)),
gen_rtx_SET (h0, plus_constant (SImode, h0, 1))));
DONE;
}
}
emit_insn (gen_add_f (l0, l1, l2));
emit_insn (gen_adc (h0, h1, h2));
DONE;
")
}
[(set_attr "length" "8")])
(define_insn "add_f"
[(set (reg:CC_C CC_REG)
@ -3493,6 +3503,33 @@ archs4x, archs4xd"
[(set_attr "type" "shift")
(set_attr "length" "16,20")])
;; DImode shifts
(define_expand "ashldi3"
[(parallel
[(set (match_operand:DI 0 "register_operand")
(ashift:DI (match_operand:DI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC CC_REG))])]
""
{
if (operands[2] != const1_rtx)
FAIL;
})
(define_insn_and_split "*ashldi3_cnt1"
[(set (match_operand:DI 0 "register_operand")
(ashift:DI (match_operand:DI 1 "register_operand")
(const_int 1)))
(clobber (reg:CC CC_REG))]
"arc_pre_reload_split ()"
"#"
"&& 1"
[(parallel [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 1)))
(clobber (reg:CC CC_REG))])]
""
[(set_attr "length" "8")])
;; Rotate instructions.
(define_insn "rotrsi3_insn"

View file

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
long long foo(long long x, long long y)
{
return x + y;
}
/* { dg-final { scan-assembler "add.f\\s+r0,r0,r2" } } */
/* { dg-final { scan-assembler "adc\\s+r1,r1,r3" } } */

View file

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
long long foo(long long x)
{
return x << 1;
}
/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */
/* { dg-final { scan-assembler "adc\\s+r1,r1,r1" } } */