diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 6f48b4d5f21..556d0cf359f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -389,26 +389,6 @@ [(set_attr "type" "neon_mul_")] ) -;; Advanced SIMD does not support vector DImode MUL, but SVE does. -;; Make use of the overlap between Z and V registers to implement the V2DI -;; optab for TARGET_SVE. The mulvnx2di3 expander can -;; handle the TARGET_SVE2 case transparently. -(define_expand "mulv2di3" - [(set (match_operand:V2DI 0 "register_operand") - (mult:V2DI (match_operand:V2DI 1 "register_operand") - (match_operand:V2DI 2 "aarch64_sve_vsm_operand")))] - "TARGET_SVE" - { - machine_mode sve_mode = VNx2DImode; - rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], V2DImode, 0); - rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], V2DImode, 0); - rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], V2DImode, 0); - - emit_insn (gen_mulvnx2di3 (sve_op0, sve_op1, sve_op2)); - DONE; - } -) - (define_insn "bswap2" [(set (match_operand:VDQHSD 0 "register_operand" "=w") (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] @@ -2678,27 +2658,6 @@ [(set_attr "type" "neon_fp_div_")] ) -;; SVE has vector integer divisions, unlike Advanced SIMD. -;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI -;; optabs to the midend. -(define_expand "div3" - [(set (match_operand:VQDIV 0 "register_operand") - (ANY_DIV:VQDIV - (match_operand:VQDIV 1 "register_operand") - (match_operand:VQDIV 2 "register_operand")))] - "TARGET_SVE" - { - machine_mode sve_mode - = aarch64_full_sve_mode (GET_MODE_INNER (mode)).require (); - rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], mode, 0); - rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], mode, 0); - rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], mode, 0); - - emit_insn (gen_div3 (sve_op0, sve_op1, sve_op2)); - DONE; - } -) - (define_insn "neg2" [(set (match_operand:VHSDF 0 "register_operand" "=w") (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index e1e3c1bd0b7..eca8623e587 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3789,16 +3789,35 @@ [(set (match_operand:SVE_I 0 "register_operand") (unspec:SVE_I [(match_dup 3) - (SVE_INT_BINARY_IMM:SVE_I + (SVE_INT_BINARY_MULTI:SVE_I (match_operand:SVE_I 1 "register_operand") (match_operand:SVE_I 2 "aarch64_sve__operand"))] UNSPEC_PRED_X))] "TARGET_SVE" + { + operands[3] = aarch64_ptrue_reg (mode); + } +) + +;; Unpredicated integer binary operations that have an immediate form. +;; Advanced SIMD does not support vector DImode MUL, but SVE does. +;; Make use of the overlap between Z and V registers to implement the V2DI +;; optab for TARGET_SVE. The mulvnx2di3 expander can +;; handle the TARGET_SVE2 case transparently. +(define_expand "mul3" + [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand") + (unspec:SVE_I_SIMD_DI + [(match_dup 3) + (mult:SVE_I_SIMD_DI + (match_operand:SVE_I_SIMD_DI 1 "register_operand") + (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand"))] + UNSPEC_PRED_X))] + "TARGET_SVE" { /* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple pattern for it here rather than splitting off the MULT expander separately. */ - if (TARGET_SVE2 && == MULT) + if (TARGET_SVE2) { emit_move_insn (operands[0], gen_rtx_MULT (mode, operands[1], operands[2])); @@ -3814,26 +3833,26 @@ ;; and would make the instruction seem less uniform to the register ;; allocator. (define_insn_and_split "@aarch64_pred_" - [(set (match_operand:SVE_I 0 "register_operand") - (unspec:SVE_I + [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand") + (unspec:SVE_I_SIMD_DI [(match_operand: 1 "register_operand") - (SVE_INT_BINARY_IMM:SVE_I - (match_operand:SVE_I 2 "register_operand") - (match_operand:SVE_I 3 "aarch64_sve__operand"))] + (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI + (match_operand:SVE_I_SIMD_DI 2 "register_operand") + (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve__operand"))] UNSPEC_PRED_X))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] [ w , Upl , %0 , ; * ] # - [ w , Upl , 0 , w ; * ] \t%0., %1/m, %0., %3. + [ w , Upl , 0 , w ; * ] \t%Z0., %1/m, %Z0., %Z3. [ ?&w , Upl , w , ; yes ] # - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;\t%0., %1/m, %0., %3. + [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;\t%Z0., %1/m, %Z0., %Z3. } ; Split the unpredicated form after reload, so that we don't have ; the unnecessary PTRUE. "&& reload_completed && !register_operand (operands[3], mode)" [(set (match_dup 0) - (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))] + (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI (match_dup 2) (match_dup 3)))] "" ) @@ -3841,14 +3860,14 @@ ;; These are generated by splitting a predicated instruction whose ;; predicate is unused. (define_insn "*post_ra_3" - [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") - (SVE_INT_BINARY_IMM:SVE_I - (match_operand:SVE_I 1 "register_operand" "0, w") - (match_operand:SVE_I 2 "aarch64_sve__immediate")))] + [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand" "=w, ?&w") + (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI + (match_operand:SVE_I_SIMD_DI 1 "register_operand" "0, w") + (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve__immediate")))] "TARGET_SVE && reload_completed" "@ - \t%0., %0., #%2 - movprfx\t%0, %1\;\t%0., %0., #%2" + \t%Z0., %Z0., #%2 + movprfx\t%Z0, %Z1\;\t%Z0., %Z0., #%2" [(set_attr "movprfx" "*,yes")] ) @@ -4458,13 +4477,16 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated integer division. +;; SVE has vector integer divisions, unlike Advanced SIMD. +;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI +;; optabs to the midend. (define_expand "3" - [(set (match_operand:SVE_FULL_SDI 0 "register_operand") - (unspec:SVE_FULL_SDI + [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand") + (unspec:SVE_FULL_SDI_SIMD [(match_dup 3) - (SVE_INT_BINARY_SD:SVE_FULL_SDI - (match_operand:SVE_FULL_SDI 1 "register_operand") - (match_operand:SVE_FULL_SDI 2 "register_operand"))] + (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD + (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand") + (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))] UNSPEC_PRED_X))] "TARGET_SVE" { @@ -4474,18 +4496,18 @@ ;; Integer division predicated with a PTRUE. (define_insn "@aarch64_pred_" - [(set (match_operand:SVE_FULL_SDI 0 "register_operand") - (unspec:SVE_FULL_SDI + [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand") + (unspec:SVE_FULL_SDI_SIMD [(match_operand: 1 "register_operand") - (SVE_INT_BINARY_SD:SVE_FULL_SDI - (match_operand:SVE_FULL_SDI 2 "register_operand") - (match_operand:SVE_FULL_SDI 3 "register_operand"))] + (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD + (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand") + (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))] UNSPEC_PRED_X))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , 0 , w ; * ] \t%0., %1/m, %0., %3. - [ w , Upl , w , 0 ; * ] r\t%0., %1/m, %0., %2. - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;\t%0., %1/m, %0., %3. + [ w , Upl , 0 , w ; * ] \t%Z0., %1/m, %Z0., %Z3. + [ w , Upl , w , 0 ; * ] r\t%Z0., %1/m, %Z0., %Z2. + [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;\t%Z0., %1/m, %Z0., %Z3. } ) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 1d1eb8bfdff..934e57055d3 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -615,29 +615,29 @@ ;; ------------------------------------------------------------------------- (define_insn "@aarch64_mul_lane_" - [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") - (mult:SVE_FULL_HSDI - (unspec:SVE_FULL_HSDI - [(match_operand:SVE_FULL_HSDI 2 "register_operand" "") + [(set (match_operand:SVE_FULL_HSDI_SIMD_DI 0 "register_operand" "=w") + (mult:SVE_FULL_HSDI_SIMD_DI + (unspec:SVE_FULL_HSDI_SIMD_DI + [(match_operand:SVE_FULL_HSDI_SIMD_DI 2 "register_operand" "") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT) - (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))] + (match_operand:SVE_FULL_HSDI_SIMD_DI 1 "register_operand" "w")))] "TARGET_SVE2" - "mul\t%0., %1., %2.[%3]" + "mul\t%Z0., %Z1., %Z2.[%3]" ) ;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but ;; we include them here to allow matching simpler, unpredicated RTL. (define_insn "*aarch64_mul_unpredicated_" - [(set (match_operand:SVE_I 0 "register_operand") - (mult:SVE_I - (match_operand:SVE_I 1 "register_operand") - (match_operand:SVE_I 2 "aarch64_sve_vsm_operand")))] + [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand") + (mult:SVE_I_SIMD_DI + (match_operand:SVE_I_SIMD_DI 1 "register_operand") + (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand")))] "TARGET_SVE2" {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] - [ w , w , w ; * ] mul\t%0., %1., %2. - [ w , 0 , vsm ; * ] mul\t%0., %0., #%2 - [ ?&w , w , vsm ; yes ] movprfx\t%0, %1\;mul\t%0., %0., #%2 + [ w , w , w ; * ] mul\t%Z0., %Z1., %Z2. + [ w , 0 , vsm ; * ] mul\t%Z0., %Z0., #%2 + [ ?&w , w , vsm ; yes ] movprfx\t%Z0, %Z1\;mul\t%Z0., %Z0., #%2 } ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 942270e99d6..99cde46f1ba 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -108,9 +108,6 @@ ;; Copy of the above. (define_mode_iterator DREG2 [DREG]) -;; Advanced SIMD modes for integer divides. -(define_mode_iterator VQDIV [V4SI V2DI]) - ;; All modes suitable to store/load pair (2 elements) using STP/LDP. (define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF]) @@ -471,6 +468,10 @@ ;; elements. (define_mode_iterator SVE_FULL_HSDI [VNx8HI VNx4SI VNx2DI]) +;; Fully-packed SVE integer vector modes that have 16-bit, 32-bit or 64-bit +;; elements and Advanced SIMD Fully-packed 64-bit elements. +(define_mode_iterator SVE_FULL_HSDI_SIMD_DI [SVE_FULL_HSDI V2DI]) + ;; Fully-packed SVE integer vector modes that have 16-bit or 32-bit ;; elements. (define_mode_iterator SVE_FULL_HSI [VNx8HI VNx4SI]) @@ -488,6 +489,10 @@ ;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements. (define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI]) +;; Fully-packed SVE and Advanced SIMD integer vector modes that have 32-bit or +;; 64-bit elements. +(define_mode_iterator SVE_FULL_SDI_SIMD [SVE_FULL_SDI V4SI V2DI]) + ;; 2x and 4x tuples of the above, excluding 2x DI. (define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI]) @@ -550,6 +555,10 @@ VNx4SI VNx2SI VNx2DI]) +;; All SVE integer vector modes and Advanced SIMD 64-bit vector +;; element modes +(define_mode_iterator SVE_I_SIMD_DI [SVE_I V2DI]) + ;; SVE integer vector modes whose elements are 16 bits or wider. (define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI VNx4SI VNx2SI @@ -2268,7 +2277,8 @@ (VNx32HI "VNx8BI") (VNx32HF "VNx8BI") (VNx32BF "VNx8BI") (VNx16SI "VNx4BI") (VNx16SF "VNx4BI") - (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")]) + (VNx8DI "VNx2BI") (VNx8DF "VNx2BI") + (V4SI "VNx4BI") (V2DI "VNx2BI")]) ;; ...and again in lower case. (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi") @@ -2370,6 +2380,7 @@ ;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index. (define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x") + (V2DI "x") (VNx8HF "y") (VNx4SF "y") (VNx2DF "x")]) ;; The constraint to use for an SVE FCMLA lane index. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c new file mode 100644 index 00000000000..5b37ddd2770 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c @@ -0,0 +1,13 @@ +/* { dg-additional-options "-O -mtune=a64fx" } */ + +typedef unsigned long long __attribute__((__vector_size__ (16))) V; +typedef unsigned long long __attribute__((__vector_size__ (32))) W; + +extern void bar (V v); + +void foo (V v, W w) +{ + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) / v)); +} + +/* { dg-final { scan-assembler {udiv\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c new file mode 100644 index 00000000000..6d39dc8e590 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c @@ -0,0 +1,13 @@ +/* { dg-additional-options "-O -mcpu=a64fx" } */ + +typedef unsigned long long __attribute__((__vector_size__ (16))) V; +typedef unsigned long long __attribute__((__vector_size__ (32))) W; + +extern void bar (V v); + +void foom (V v, W w) +{ + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v)); +} + +/* { dg-final { scan-assembler {mul\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c new file mode 100644 index 00000000000..2bea18ad703 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c @@ -0,0 +1,13 @@ +/* { dg-additional-options "-O -mtune=a64fx" } */ + +typedef unsigned long long __attribute__((__vector_size__ (16))) V; +typedef unsigned long long __attribute__((__vector_size__ (32))) W; + +extern void bar (V v); + +void foom (V v, W w) +{ + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v)); +} + +/* { dg-final { scan-assembler {mul\tz[0-9]+.d, z[0-9]+.d, z[0-9]+.d} } } */