aarch64: Make sqdmlal2 patterns match canonical RTL

The sqdmlal2 patterns are hidden beneath the SBINQOPS iterator and unfortunately they don't match
canonical RTL because the simple accumulate operand comes in the first arm of the SS_PLUS.
This patch splits the SS_PLUS and SS_MINUS forms with the SS_PLUS operands set up to match
the canonical form, where the complex operand comes first.

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md
	(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Split into...
	(aarch64_sqdmlsl2_lane<mode>_internal): ... This...
	(aarch64_sqdmlal2_lane<mode>_internal): ... And this.
	(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Split into ...
	(aarch64_sqdmlsl2_laneq<mode>_internal): ... This...
	(aarch64_sqdmlal2_laneq<mode>_internal): ... And this.
	(aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal): Split into...
	(aarch64_sqdmlsl2_n<mode>_internal): ... This...
	(aarch64_sqdmlal2_n<mode>_internal): ... And this.
This commit is contained in:
Kyrylo Tkachov 2021-05-14 10:05:42 +01:00
parent 4206171605
commit ff3809b459

View file

@ -5374,9 +5374,9 @@
;; vqdml[sa]l2_lane ;; vqdml[sa]l2_lane
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w") [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(SBINQOPS:<VWIDE> (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE> (ss_ashift:<VWIDE>
(mult:<VWIDE> (mult:<VWIDE>
@ -5395,14 +5395,40 @@
{ {
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return return
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
} }
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
) )
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w") [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(SBINQOPS:<VWIDE> (ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE> (ss_ashift:<VWIDE>
(mult:<VWIDE> (mult:<VWIDE>
@ -5421,7 +5447,33 @@
{ {
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return return
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
} }
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
) )
@ -5460,9 +5512,9 @@
DONE; DONE;
}) })
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w") [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(SBINQOPS:<VWIDE> (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE> (ss_ashift:<VWIDE>
(mult:<VWIDE> (mult:<VWIDE>
@ -5475,7 +5527,26 @@
(match_operand:<VEL> 3 "register_operand" "<vwx>")))) (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))))] (const_int 1))))]
"TARGET_SIMD" "TARGET_SIMD"
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
) )