aarch64: Make sqdmlal2 patterns match canonical RTL

The sqdmlal2 patterns are hidden beneath the SBINQOPS iterator and unfortunately they don't match
canonical RTL because the simple accumulate operand comes in the first arm of the SS_PLUS.
This patch splits the SS_PLUS and SS_MINUS forms with the SS_PLUS operands set up to match
the canonical form, where the complex operand comes first.

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md
	(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Split into...
	(aarch64_sqdmlsl2_lane<mode>_internal): ... This...
	(aarch64_sqdmlal2_lane<mode>_internal): ... And this.
	(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Split into ...
	(aarch64_sqdmlsl2_laneq<mode>_internal): ... This...
	(aarch64_sqdmlal2_laneq<mode>_internal): ... And this.
	(aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal): Split into...
	(aarch64_sqdmlsl2_n<mode>_internal): ... This...
	(aarch64_sqdmlal2_n<mode>_internal): ... And this.
This commit is contained in:
Kyrylo Tkachov 2021-05-14 10:05:42 +01:00
parent 4206171605
commit ff3809b459

View file

@ -5374,9 +5374,9 @@
;; vqdml[sa]l2_lane
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
(define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(SBINQOPS:<VWIDE>
(ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@ -5395,14 +5395,40 @@
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
"sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
(define_insn "aarch64_sqdmlal2_lane<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(SBINQOPS:<VWIDE>
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@ -5421,7 +5447,33 @@
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
"sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
@ -5460,9 +5512,9 @@
DONE;
})
(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
(define_insn "aarch64_sqdmlsl2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(SBINQOPS:<VWIDE>
(ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@ -5475,7 +5527,26 @@
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))))]
"TARGET_SIMD"
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
"sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)