i386: Introduce V2QImode vectorized shifts [PR103861]
Add V2QImode shift operations and split them to synthesized double HI/LO QImode operations with integer registers. Also robustify arithmetic split patterns. 2022-01-13 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/103861 * config/i386/i386.md (*ashlqi_ext<mode>_2): New insn pattern. (*<any_shiftrt:insn>qi_ext<mode>_2): Ditto. * config/i386/mmx.md (<any_shift:insn>v2qi): New insn_and_split pattern. gcc/testsuite/ChangeLog: PR target/103861 * gcc.target/i386/pr103861.c (shl,ashr,lshr): New tests.
This commit is contained in:
parent
b0e5163960
commit
7a7d8c3f61
3 changed files with 128 additions and 4 deletions
|
@ -12413,6 +12413,54 @@
|
|||
(const_string "*")))
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*ashlqi_ext<mode>_2"
|
||||
[(set (zero_extract:SWI248
|
||||
(match_operand:SWI248 0 "register_operand" "+Q")
|
||||
(const_int 8)
|
||||
(const_int 8))
|
||||
(subreg:SWI248
|
||||
(ashift:QI
|
||||
(subreg:QI
|
||||
(zero_extract:SWI248
|
||||
(match_operand:SWI248 1 "register_operand" "0")
|
||||
(const_int 8)
|
||||
(const_int 8)) 0)
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI")) 0))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
|
||||
rtx_equal_p (operands[0], operands[1])"
|
||||
{
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_ALU:
|
||||
gcc_assert (operands[2] == const1_rtx);
|
||||
return "add{b}\t%h0, %h0";
|
||||
|
||||
default:
|
||||
if (operands[2] == const1_rtx
|
||||
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
|
||||
return "sal{b}\t%h0";
|
||||
else
|
||||
return "sal{b}\t{%2, %h0|%h0, %2}";
|
||||
}
|
||||
}
|
||||
[(set (attr "type")
|
||||
(cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
|
||||
(match_operand 2 "const1_operand"))
|
||||
(const_string "alu")
|
||||
]
|
||||
(const_string "ishift")))
|
||||
(set (attr "length_immediate")
|
||||
(if_then_else
|
||||
(ior (eq_attr "type" "alu")
|
||||
(and (eq_attr "type" "ishift")
|
||||
(and (match_operand 2 "const1_operand")
|
||||
(ior (match_test "TARGET_SHIFT1")
|
||||
(match_test "optimize_function_for_size_p (cfun)")))))
|
||||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
;; See comment above `ashl<mode>3' about how this works.
|
||||
|
||||
(define_expand "<insn><mode>3"
|
||||
|
@ -13143,6 +13191,39 @@
|
|||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*<insn>qi_ext<mode>_2"
|
||||
[(set (zero_extract:SWI248
|
||||
(match_operand:SWI248 0 "register_operand" "+Q")
|
||||
(const_int 8)
|
||||
(const_int 8))
|
||||
(subreg:SWI248
|
||||
(any_shiftrt:QI
|
||||
(subreg:QI
|
||||
(zero_extract:SWI248
|
||||
(match_operand:SWI248 1 "register_operand" "0")
|
||||
(const_int 8)
|
||||
(const_int 8)) 0)
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI")) 0))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
|
||||
rtx_equal_p (operands[0], operands[1])"
|
||||
{
|
||||
if (operands[2] == const1_rtx
|
||||
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
|
||||
return "<shift>{b}\t%h0";
|
||||
else
|
||||
return "<shift>{b}\t{%2, %h0|%h0, %2}";
|
||||
}
|
||||
[(set_attr "type" "ishift")
|
||||
(set (attr "length_immediate")
|
||||
(if_then_else
|
||||
(and (match_operand 2 "const1_operand")
|
||||
(ior (match_test "TARGET_SHIFT1")
|
||||
(match_test "optimize_function_for_size_p (cfun)")))
|
||||
(const_string "0")
|
||||
(const_string "*")))
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
;; Rotate instructions
|
||||
|
||||
|
|
|
@ -1657,7 +1657,8 @@
|
|||
(neg:V2QI
|
||||
(match_operand:V2QI 1 "general_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"reload_completed"
|
||||
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
|
||||
&& reload_completed"
|
||||
[(parallel
|
||||
[(set (strict_low_part (match_dup 0))
|
||||
(neg:QI (match_dup 1)))
|
||||
|
@ -1683,7 +1684,8 @@
|
|||
(neg:V2QI
|
||||
(match_operand:V2QI 1 "sse_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"reload_completed"
|
||||
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
|
||||
&& TARGET_SSE2 && reload_completed"
|
||||
[(set (match_dup 0) (match_dup 2))
|
||||
(set (match_dup 0)
|
||||
(minus:V16QI (match_dup 0) (match_dup 1)))]
|
||||
|
@ -1757,7 +1759,8 @@
|
|||
(match_operand:V2QI 1 "general_reg_operand")
|
||||
(match_operand:V2QI 2 "general_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"reload_completed"
|
||||
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
|
||||
&& reload_completed"
|
||||
[(parallel
|
||||
[(set (strict_low_part (match_dup 0))
|
||||
(plusminus:QI (match_dup 1) (match_dup 2)))
|
||||
|
@ -1790,7 +1793,8 @@
|
|||
(match_operand:V2QI 1 "sse_reg_operand")
|
||||
(match_operand:V2QI 2 "sse_reg_operand")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_SSE2 && reload_completed"
|
||||
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
|
||||
&& TARGET_SSE2 && reload_completed"
|
||||
[(set (match_dup 0)
|
||||
(plusminus:V16QI (match_dup 1) (match_dup 2)))]
|
||||
{
|
||||
|
@ -2387,6 +2391,38 @@
|
|||
(const_string "0")))
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn_and_split "<insn>v2qi3"
|
||||
[(set (match_operand:V2QI 0 "register_operand" "=Q")
|
||||
(any_shift:V2QI
|
||||
(match_operand:V2QI 1 "register_operand" "0")
|
||||
(match_operand:QI 2 "nonmemory_operand" "cI")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel
|
||||
[(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
|
||||
(subreg:HI
|
||||
(any_shift:QI
|
||||
(subreg:QI
|
||||
(zero_extract:HI (match_dup 4)
|
||||
(const_int 8)
|
||||
(const_int 8)) 0)
|
||||
(match_dup 2)) 0))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(parallel
|
||||
[(set (strict_low_part (match_dup 0))
|
||||
(any_shift:QI (match_dup 1) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
{
|
||||
operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
|
||||
operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
|
||||
operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
|
||||
operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
|
||||
}
|
||||
[(set_attr "type" "multi")
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Parallel integral comparisons
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
/* { dg-options "-O2 -dp" } */
|
||||
|
||||
typedef char __v2qi __attribute__ ((__vector_size__ (2)));
|
||||
typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
|
||||
|
||||
__v2qi and (__v2qi a, __v2qi b) { return a & b; };
|
||||
|
||||
|
@ -20,4 +21,10 @@ __v2qi minus (__v2qi a, __v2qi b) { return a - b; };
|
|||
|
||||
__v2qi neg (__v2qi a) { return -a; };
|
||||
|
||||
__v2qi shl (__v2qi a, int b) { return a << b; };
|
||||
|
||||
__v2qi ashr (__v2qi a, int b) { return a >> b; };
|
||||
|
||||
__v2qu lshr (__v2qu a, int b) { return a >> b; };
|
||||
|
||||
/* { dg-final { scan-assembler-not "insvhi" } } */
|
||||
|
|
Loading…
Add table
Reference in a new issue