[AArch64] Implement ACLE intrinsics for FRINT[32,64][Z,X]

This patch implements the ACLE intrinsics to access the FRINT[32,64][Z,X] scalar[1] and vector[2][3] instructions
from Armv8.5-a. These are enabled when the __ARM_FEATURE_FRINT macro is defined.

They're added in a fairly standard way through builtins and unspecs at the RTL level.

	* config/aarch64/aarch64.md ("unspec"): Add UNSPEC_FRINT32Z,
	UNSPEC_FRINT32X, UNSPEC_FRINT64Z, UNSPEC_FRINT64X.
	(aarch64_<frintnzs_op><mode>): New define_insn.
	* config/aarch64/aarch64.h (TARGET_FRINT): Define.
	* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
	__ARM_FEATURE_FRINT when appropriate.
	* config/aarch64/aarch64-simd-builtins.def: Add builtins for frint32z,
	frint32x, frint64z, frint64x.
	* config/aarch64/arm_acle.h (__rint32zf, __rint32z, __rint64zf,
	__rint64z, __rint32xf, __rint32x, __rint64xf, __rint64x): Define.
	* config/aarch64/arm_neon.h (vrnd32z_f32, vrnd32zq_f32, vrnd32z_f64,
	vrnd32zq_f64, vrnd32x_f32, vrnd32xq_f32, vrnd32x_f64, vrnd32xq_f64,
	vrnd64z_f32, vrnd64zq_f32, vrnd64z_f64, vrnd64zq_f64, vrnd64x_f32,
	vrnd64xq_f32, vrnd64x_f64, vrnd64xq_f64): Define.
	* config/aarch64/iterators.md (VSFDF): Define.
	(FRINTNZX): Likewise.
	(frintnzs_op): Likewise.

	* gcc.target/aarch64/acle/rintnzx_1.c: New test.
	* gcc.target/aarch64/simd/vrndnzx_1.c: Likewise.

From-SVN: r275334
This commit is contained in:
Kyrylo Tkachov 2019-09-03 08:38:08 +00:00 committed by Kyrylo Tkachov
parent e0664b7a63
commit 10bd1d964e
11 changed files with 439 additions and 0 deletions

View file

@ -1,3 +1,23 @@
2019-09-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.md ("unspec"): Add UNSPEC_FRINT32Z,
UNSPEC_FRINT32X, UNSPEC_FRINT64Z, UNSPEC_FRINT64X.
(aarch64_<frintnzs_op><mode>): New define_insn.
* config/aarch64/aarch64.h (TARGET_FRINT): Define.
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
__ARM_FEATURE_FRINT when appropriate.
* config/aarch64/aarch64-simd-builtins.def: Add builtins for frint32z,
frint32x, frint64z, frint64x.
* config/aarch64/arm_acle.h (__rint32zf, __rint32z, __rint64zf,
__rint64z, __rint32xf, __rint32x, __rint64xf, __rint64x): Define.
* config/aarch64/arm_neon.h (vrnd32z_f32, vrnd32zq_f32, vrnd32z_f64,
vrnd32zq_f64, vrnd32x_f32, vrnd32xq_f32, vrnd32x_f64, vrnd32xq_f64,
vrnd64z_f32, vrnd64zq_f32, vrnd64z_f64, vrnd64zq_f64, vrnd64x_f32,
vrnd64xq_f32, vrnd64x_f64, vrnd64xq_f64): Define.
* config/aarch64/iterators.md (VSFDF): Define.
(FRINTNZX): Likewise.
(frintnzs_op): Likewise.
2019-09-03 Dennis Zhang <dennis.zhang@arm.com>
* config/aarch64/aarch64-cores.def (AARCH64_CORE): New entries

View file

@ -157,6 +157,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_SM4, "__ARM_FEATURE_SM4", pfile);
aarch64_def_or_undef (TARGET_F16FML, "__ARM_FEATURE_FP16_FML", pfile);
aarch64_def_or_undef (TARGET_FRINT, "__ARM_FEATURE_FRINT", pfile);
aarch64_def_or_undef (TARGET_TME, "__ARM_FEATURE_TME", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch

View file

@ -676,3 +676,9 @@
/* Implemented by aarch64_fml<f16mac1>lq_laneq_highv4sf. */
VAR1 (QUADOP_LANE, fmlalq_laneq_high, 0, v4sf)
VAR1 (QUADOP_LANE, fmlslq_laneq_high, 0, v4sf)
/* Implemented by aarch64_<frintnzs_op><mode>. */
BUILTIN_VSFDF (UNOP, frint32z, 0)
BUILTIN_VSFDF (UNOP, frint32x, 0)
BUILTIN_VSFDF (UNOP, frint64z, 0)
BUILTIN_VSFDF (UNOP, frint64x, 0)

View file

@ -291,6 +291,9 @@ extern unsigned aarch64_architecture_version;
/* Armv8.3-a Complex number extension to AdvSIMD extensions. */
#define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3)
/* Floating-point rounding instructions from Armv8.5-a. */
#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT)
/* TME instructions are enabled. */
#define TARGET_TME (AARCH64_ISA_TME)

View file

@ -141,6 +141,10 @@
UNSPEC_CRC32X
UNSPEC_FCVTZS
UNSPEC_FCVTZU
UNSPEC_FRINT32Z
UNSPEC_FRINT32X
UNSPEC_FRINT64Z
UNSPEC_FRINT64X
UNSPEC_URECPE
UNSPEC_FRECPE
UNSPEC_FRECPS
@ -7306,6 +7310,16 @@
(set_attr "speculation_barrier" "true")]
)
(define_insn "aarch64_<frintnzs_op><mode>"
[(set (match_operand:VSFDF 0 "register_operand" "=w")
(unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
FRINTNZX))]
"TARGET_FRINT && TARGET_FLOAT
&& !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)"
"<frintnzs_op>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "f_rint<stype>")]
)
;; Transactional Memory Extension (TME) instructions.
(define_insn "tstart"

View file

@ -33,6 +33,59 @@
extern "C" {
#endif
#pragma GCC push_options
#pragma GCC target ("arch=armv8.5-a")
__extension__ static __inline float __attribute__ ((__always_inline__))
__rint32zf (float __a)
{
return __builtin_aarch64_frint32zsf (__a);
}
__extension__ static __inline double __attribute__ ((__always_inline__))
__rint32z (double __a)
{
return __builtin_aarch64_frint32zdf (__a);
}
__extension__ static __inline float __attribute__ ((__always_inline__))
__rint64zf (float __a)
{
return __builtin_aarch64_frint64zsf (__a);
}
__extension__ static __inline double __attribute__ ((__always_inline__))
__rint64z (double __a)
{
return __builtin_aarch64_frint64zdf (__a);
}
__extension__ static __inline float __attribute__ ((__always_inline__))
__rint32xf (float __a)
{
return __builtin_aarch64_frint32xsf (__a);
}
__extension__ static __inline double __attribute__ ((__always_inline__))
__rint32x (double __a)
{
return __builtin_aarch64_frint32xdf (__a);
}
__extension__ static __inline float __attribute__ ((__always_inline__))
__rint64xf (float __a)
{
return __builtin_aarch64_frint64xsf (__a);
}
__extension__ static __inline double __attribute__ ((__always_inline__))
__rint64x (double __a)
{
return __builtin_aarch64_frint64xdf (__a);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("+nothing+crc")

View file

@ -34469,6 +34469,124 @@ vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("arch=armv8.5-a")
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd32z_f32 (float32x2_t __a)
{
return __builtin_aarch64_frint32zv2sf (__a);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd32zq_f32 (float32x4_t __a)
{
return __builtin_aarch64_frint32zv4sf (__a);
}
__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd32z_f64 (float64x1_t __a)
{
return (float64x1_t)
{__builtin_aarch64_frint32zdf (vget_lane_f64 (__a, 0))};
}
__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd32zq_f64 (float64x2_t __a)
{
return __builtin_aarch64_frint32zv2df (__a);
}
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd32x_f32 (float32x2_t __a)
{
return __builtin_aarch64_frint32xv2sf (__a);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd32xq_f32 (float32x4_t __a)
{
return __builtin_aarch64_frint32xv4sf (__a);
}
__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd32x_f64 (float64x1_t __a)
{
return (float64x1_t) {__builtin_aarch64_frint32xdf (vget_lane_f64 (__a, 0))};
}
__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd32xq_f64 (float64x2_t __a)
{
return __builtin_aarch64_frint32xv2df (__a);
}
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd64z_f32 (float32x2_t __a)
{
return __builtin_aarch64_frint64zv2sf (__a);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd64zq_f32 (float32x4_t __a)
{
return __builtin_aarch64_frint64zv4sf (__a);
}
__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd64z_f64 (float64x1_t __a)
{
return (float64x1_t) {__builtin_aarch64_frint64zdf (vget_lane_f64 (__a, 0))};
}
__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd64zq_f64 (float64x2_t __a)
{
return __builtin_aarch64_frint64zv2df (__a);
}
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd64x_f32 (float32x2_t __a)
{
return __builtin_aarch64_frint64xv2sf (__a);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd64xq_f32 (float32x4_t __a)
{
return __builtin_aarch64_frint64xv4sf (__a);
}
__extension__ extern __inline float64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd64x_f64 (float64x1_t __a)
{
return (float64x1_t) {__builtin_aarch64_frint64xdf (vget_lane_f64 (__a, 0))};
}
__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrnd64xq_f64 (float64x2_t __a)
{
return __builtin_aarch64_frint64xv2df (__a);
}
#pragma GCC pop_options
#undef __aarch64_vget_lane_any
#undef __aarch64_vdup_lane_any

View file

@ -128,6 +128,9 @@
(HF "TARGET_SIMD_F16INST")
SF DF])
;; Scalar and vetor modes for SF, DF.
(define_mode_iterator VSFDF [V2SF V4SF V2DF DF SF])
;; Advanced SIMD single Float modes.
(define_mode_iterator VDQSF [V2SF V4SF])
@ -1758,6 +1761,9 @@
UNSPEC_FCMLA180
UNSPEC_FCMLA270])
(define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X
UNSPEC_FRINT64Z UNSPEC_FRINT64X])
;; Iterators for atomic operations.
(define_int_iterator ATOMIC_LDOP
@ -2041,6 +2047,9 @@
(define_int_attr f16mac1 [(UNSPEC_FMLAL "a") (UNSPEC_FMLSL "s")
(UNSPEC_FMLAL2 "a") (UNSPEC_FMLSL2 "s")])
(define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x")
(UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")])
;; The condition associated with an UNSPEC_COND_<xx>.
(define_int_attr cmp_op [(UNSPEC_COND_FCMEQ "eq")
(UNSPEC_COND_FCMGE "ge")

View file

@ -1,3 +1,8 @@
2019-09-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/acle/rintnzx_1.c: New test.
* gcc.target/aarch64/simd/vrndnzx_1.c: Likewise.
2019-09-03 Jakub Jelinek <jakub@redhat.com>
Richard Biener <rguenther@suse.de>

View file

@ -0,0 +1,73 @@
/* Test the __rint[32,64][z,x] intrinsics. */
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8.5-a" } */
#include <arm_acle.h>
#ifdef __ARM_FEATURE_FRINT
float
foo_32z_f32_scal (float a)
{
return __rint32zf (a);
}
/* { dg-final { scan-assembler-times "frint32z\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */
double
foo_32z_f64_scal (double a)
{
return __rint32z (a);
}
/* { dg-final { scan-assembler-times "frint32z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
float
foo_32x_f32_scal (float a)
{
return __rint32xf (a);
}
/* { dg-final { scan-assembler-times "frint32x\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */
double
foo_32x_f64_scal (double a)
{
return __rint32x (a);
}
/* { dg-final { scan-assembler-times "frint32x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
float
foo_64z_f32_scal (float a)
{
return __rint64zf (a);
}
/* { dg-final { scan-assembler-times "frint64z\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */
double
foo_64z_f64_scal (double a)
{
return __rint64z (a);
}
/* { dg-final { scan-assembler-times "frint64z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
float
foo_64x_f32_scal (float a)
{
return __rint64xf (a);
}
/* { dg-final { scan-assembler-times "frint64x\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */
double
foo_64x_f64_scal (double a)
{
return __rint64x (a);
}
/* { dg-final { scan-assembler-times "frint64x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
#endif

View file

@ -0,0 +1,137 @@
/* Test the vrnd[32,64][z,x] intrinsics. */
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8.5-a" } */
#include "arm_neon.h"
#ifdef __ARM_FEATURE_FRINT
float32x2_t
foo_32z (float32x2_t a)
{
return vrnd32z_f32 (a);
}
/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */
float32x4_t
foo_32z_q (float32x4_t a)
{
return vrnd32zq_f32 (a);
}
/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */
float64x1_t
foo_32z_f64 (float64x1_t a)
{
return vrnd32z_f64 (a);
}
/* { dg-final { scan-assembler-times "frint32z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
float64x2_t
foo_32z_q_f64 (float64x2_t a)
{
return vrnd32zq_f64 (a);
}
/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */
float32x2_t
foo_32x (float32x2_t a)
{
return vrnd32x_f32 (a);
}
/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */
float32x4_t
foo_32x_q (float32x4_t a)
{
return vrnd32xq_f32 (a);
}
/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */
float64x1_t
foo_32x_f64 (float64x1_t a)
{
return vrnd32x_f64 (a);
}
/* { dg-final { scan-assembler-times "frint32x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
float64x2_t
foo_32x_q_f64 (float64x2_t a)
{
return vrnd32xq_f64 (a);
}
/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */
float32x2_t
foo_64z (float32x2_t a)
{
return vrnd64z_f32 (a);
}
/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */
float32x4_t
foo_64z_q (float32x4_t a)
{
return vrnd64zq_f32 (a);
}
/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */
float64x1_t
foo_64z_f64 (float64x1_t a)
{
return vrnd64z_f64 (a);
}
/* { dg-final { scan-assembler-times "frint64z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
float64x2_t
foo_64z_q_f64 (float64x2_t a)
{
return vrnd64zq_f64 (a);
}
/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */
float32x2_t
foo_64x (float32x2_t a)
{
return vrnd64x_f32 (a);
}
/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */
float32x4_t
foo_64x_q (float32x4_t a)
{
return vrnd64xq_f32 (a);
}
/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */
float64x1_t
foo_64x_f64 (float64x1_t a)
{
return vrnd64x_f64 (a);
}
/* { dg-final { scan-assembler-times "frint64x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */
float64x2_t
foo_64x_q_f64 (float64x2_t a)
{
return vrnd64xq_f64 (a);
}
/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */
#endif