i386: Introduce V2QImode vectorized shifts [PR103861]

Add V2QImode shift operations and split them to synthesized
double HI/LO QImode operations with integer registers.

Also robustify arithmetic split patterns.

2022-01-13  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

	PR target/103861
	* config/i386/i386.md (*ashlqi_ext<mode>_2): New insn pattern.
	(*<any_shiftrt:insn>qi_ext<mode>_2): Ditto.
	* config/i386/mmx.md (<any_shift:insn>v2qi):
	New insn_and_split pattern.

gcc/testsuite/ChangeLog:

	PR target/103861
	* gcc.target/i386/pr103861.c (shl,ashr,lshr): New tests.
This commit is contained in:
Uros Bizjak 2022-01-13 20:48:18 +01:00
parent b0e5163960
commit 7a7d8c3f61
3 changed files with 128 additions and 4 deletions

View file

@ -12413,6 +12413,54 @@
(const_string "*")))
(set_attr "mode" "<MODE>")])
(define_insn "*ashlqi_ext<mode>_2"
[(set (zero_extract:SWI248
(match_operand:SWI248 0 "register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(ashift:QI
(subreg:QI
(zero_extract:SWI248
(match_operand:SWI248 1 "register_operand" "0")
(const_int 8)
(const_int 8)) 0)
(match_operand:QI 2 "nonmemory_operand" "cI")) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
rtx_equal_p (operands[0], operands[1])"
{
switch (get_attr_type (insn))
{
case TYPE_ALU:
gcc_assert (operands[2] == const1_rtx);
return "add{b}\t%h0, %h0";
default:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
return "sal{b}\t%h0";
else
return "sal{b}\t{%2, %h0|%h0, %2}";
}
}
[(set (attr "type")
(cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 2 "const1_operand"))
(const_string "alu")
]
(const_string "ishift")))
(set (attr "length_immediate")
(if_then_else
(ior (eq_attr "type" "alu")
(and (eq_attr "type" "ishift")
(and (match_operand 2 "const1_operand")
(ior (match_test "TARGET_SHIFT1")
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
(set_attr "mode" "QI")])
;; See comment above `ashl<mode>3' about how this works.
(define_expand "<insn><mode>3"
@ -13143,6 +13191,39 @@
(const_string "0")
(const_string "*")))
(set_attr "mode" "<MODE>")])
(define_insn "*<insn>qi_ext<mode>_2"
[(set (zero_extract:SWI248
(match_operand:SWI248 0 "register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(any_shiftrt:QI
(subreg:QI
(zero_extract:SWI248
(match_operand:SWI248 1 "register_operand" "0")
(const_int 8)
(const_int 8)) 0)
(match_operand:QI 2 "nonmemory_operand" "cI")) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
rtx_equal_p (operands[0], operands[1])"
{
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
return "<shift>{b}\t%h0";
else
return "<shift>{b}\t{%2, %h0|%h0, %2}";
}
[(set_attr "type" "ishift")
(set (attr "length_immediate")
(if_then_else
(and (match_operand 2 "const1_operand")
(ior (match_test "TARGET_SHIFT1")
(match_test "optimize_function_for_size_p (cfun)")))
(const_string "0")
(const_string "*")))
(set_attr "mode" "QI")])
;; Rotate instructions

View file

@ -1657,7 +1657,8 @@
(neg:V2QI
(match_operand:V2QI 1 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& reload_completed"
[(parallel
[(set (strict_low_part (match_dup 0))
(neg:QI (match_dup 1)))
@ -1683,7 +1684,8 @@
(neg:V2QI
(match_operand:V2QI 1 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& TARGET_SSE2 && reload_completed"
[(set (match_dup 0) (match_dup 2))
(set (match_dup 0)
(minus:V16QI (match_dup 0) (match_dup 1)))]
@ -1757,7 +1759,8 @@
(match_operand:V2QI 1 "general_reg_operand")
(match_operand:V2QI 2 "general_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& reload_completed"
[(parallel
[(set (strict_low_part (match_dup 0))
(plusminus:QI (match_dup 1) (match_dup 2)))
@ -1790,7 +1793,8 @@
(match_operand:V2QI 1 "sse_reg_operand")
(match_operand:V2QI 2 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_SSE2 && reload_completed"
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& TARGET_SSE2 && reload_completed"
[(set (match_dup 0)
(plusminus:V16QI (match_dup 1) (match_dup 2)))]
{
@ -2387,6 +2391,38 @@
(const_string "0")))
(set_attr "mode" "TI")])
(define_insn_and_split "<insn>v2qi3"
[(set (match_operand:V2QI 0 "register_operand" "=Q")
(any_shift:V2QI
(match_operand:V2QI 1 "register_operand" "0")
(match_operand:QI 2 "nonmemory_operand" "cI")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"#"
"&& reload_completed"
[(parallel
[(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
(subreg:HI
(any_shift:QI
(subreg:QI
(zero_extract:HI (match_dup 4)
(const_int 8)
(const_int 8)) 0)
(match_dup 2)) 0))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (strict_low_part (match_dup 0))
(any_shift:QI (match_dup 1) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
}
[(set_attr "type" "multi")
(set_attr "mode" "QI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral comparisons

View file

@ -3,6 +3,7 @@
/* { dg-options "-O2 -dp" } */
typedef char __v2qi __attribute__ ((__vector_size__ (2)));
typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
__v2qi and (__v2qi a, __v2qi b) { return a & b; };
@ -20,4 +21,10 @@ __v2qi minus (__v2qi a, __v2qi b) { return a - b; };
__v2qi neg (__v2qi a) { return -a; };
__v2qi shl (__v2qi a, int b) { return a << b; };
__v2qi ashr (__v2qi a, int b) { return a >> b; };
__v2qu lshr (__v2qu a, int b) { return a >> b; };
/* { dg-final { scan-assembler-not "insvhi" } } */