[AArch64] Handle HFAs of float16 types properly

Fix PR Target/72819. gcc/ PR Target/72819 * config/aarch64/aarch64.h (aarch64_fp16_type_node): Declare. (aarch64_fp16_ptr_type_node): Likewise. * config/aarch64/aarch64-simd-builtins.c (aarch64_fp16_ptr_type_node): Define. (aarch64_init_fp16_types): New, refactored out of... (aarch64_init_builtins): ...here, update to call aarch64_init_fp16_types. * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Handle HFmode. (aapcs_vfp_sub_candidate): Likewise. gcc/testsuite/ PR Target/72819 * gcc.target/aarch64/aapcs64/abitest-common.h: Define half-precision registers. * gcc.target/aarch64/aapcs64/abitest.S (dumpregs): Add assembly for saving the half-precision registers. * gcc.target/aarch64/aapcs64/func-ret-1.c: Test that an __fp16 value is returned in h0. * gcc.target/aarch64/aapcs64/test_2.c: Check that __FP16 arguments are passed in FP/SIMD registers. * gcc.target/aarch64/aapcs64/test_27.c: New, test that __fp16 HFA passing works corrcetly. * gcc.target/aarch64/aapcs64/type-def.h (hfa_f16x1_t): New. (hfa_f16x2_t): Likewise. (hfa_f16x3_t): Likewise. * gcc.target/aarch64/aapcs64/va_arg-1.c: Check that __fp16 values are promoted to double and passed in a double register. * gcc.target/aarch64/aapcs64/va_arg-2.c: Check that __fp16 values are promoted to double and stacked. * gcc.target/aarch64/aapcs64/va_arg-4.c: Check stacking of HFA of __fp16 data types. * gcc.target/aarch64/aapcs64/va_arg-5.c: Likewise. * gcc.target/aarch64/aapcs64/va_arg-16.c: New, check HFAs of __fp16 first get passed in FP/SIMD registers, then stacked. From-SVN: r239173
2016-08-05 16:08:24 +00:00 · 2016-08-05 16:08:24 +00:00 · 1b62ed4f26
commit 1b62ed4f26
parent a0b1bf2181
16 changed files with 209 additions and 26 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,17 @@
+2016-08-05  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR Target/72819
+	* config/aarch64/aarch64.h (aarch64_fp16_type_node): Declare.
+	(aarch64_fp16_ptr_type_node): Likewise.
+	* config/aarch64/aarch64-simd-builtins.c
+	(aarch64_fp16_ptr_type_node): Define.
+	(aarch64_init_fp16_types): New, refactored out of...
+	(aarch64_init_builtins): ...here, update to call
+	aarch64_init_fp16_types.
+	* config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Handle
+	HFmode.
+	(aapcs_vfp_sub_candidate): Likewise.
+
 2016-08-05  Martin Liska  <mliska@suse.cz>

 	    Joshua Cranmer  <Pidgeot18@gmail.com>
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@ -443,13 +443,15 @@ static struct aarch64_simd_type_info aarch64_simd_types [] = {
 };
 #undef ENTRY

-/* This type is not SIMD-specific; it is the user-visible __fp16.  */
-static tree aarch64_fp16_type_node = NULL_TREE;
-
 static tree aarch64_simd_intOI_type_node = NULL_TREE;
 static tree aarch64_simd_intCI_type_node = NULL_TREE;
 static tree aarch64_simd_intXI_type_node = NULL_TREE;

+/* The user-visible __fp16 type, and a pointer to that type.  Used
+   across the back-end.  */
+tree aarch64_fp16_type_node = NULL_TREE;
+tree aarch64_fp16_ptr_type_node = NULL_TREE;
+
 static const char *
 aarch64_mangle_builtin_scalar_type (const_tree type)
 {
@ -883,6 +885,21 @@ aarch64_init_builtin_rsqrt (void)
  }
 }

+/* Initialize the backend types that support the user-visible __fp16
+   type, also initialize a pointer to that type, to be used when
+   forming HFAs.  */
+
+static void
+aarch64_init_fp16_types (void)
+{
+  aarch64_fp16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (aarch64_fp16_type_node) = 16;
+  layout_type (aarch64_fp16_type_node);
+
+  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
+  aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
+}
+
 void
 aarch64_init_builtins (void)
 {
@ -904,11 +921,7 @@ aarch64_init_builtins (void)
    = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
 			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);

-  aarch64_fp16_type_node = make_node (REAL_TYPE);
-  TYPE_PRECISION (aarch64_fp16_type_node) = 16;
-  layout_type (aarch64_fp16_type_node);
-
-  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
+  aarch64_init_fp16_types ();

  if (TARGET_SIMD)
    aarch64_init_simd_builtins ();
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@ -9881,15 +9881,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	  field_t = long_double_type_node;
 	  field_ptr_t = long_double_ptr_type_node;
 	  break;
-/* The half precision and quad precision are not fully supported yet.  Enable
-   the following code after the support is complete.  Need to find the correct
-   type node for __fp16 *.  */
-#if 0
 	case HFmode:
-	  field_t = float_type_node;
-	  field_ptr_t = float_ptr_type_node;
+	  field_t = aarch64_fp16_type_node;
+	  field_ptr_t = aarch64_fp16_ptr_type_node;
 	  break;
-#endif
 	case V2SImode:
 	case V4SImode:
 	    {
@ -10051,7 +10046,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
    {
    case REAL_TYPE:
      mode = TYPE_MODE (type);
-      if (mode != DFmode && mode != SFmode && mode != TFmode)
+      if (mode != DFmode && mode != SFmode
+	  && mode != TFmode && mode != HFmode)
 	return -1;

      if (*modep == VOIDmode)
@ -10064,7 +10060,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)

    case COMPLEX_TYPE:
      mode = TYPE_MODE (TREE_TYPE (type));
-      if (mode != DFmode && mode != SFmode && mode != TFmode)
+      if (mode != DFmode && mode != SFmode
+	  && mode != TFmode && mode != HFmode)
 	return -1;

      if (*modep == VOIDmode)
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@ -944,4 +944,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);

 #define ASM_OUTPUT_POOL_EPILOGUE  aarch64_asm_output_pool_epilogue

+/* This type is the user-visible __fp16, and a pointer to that type.  We
+   need it in many places in the backend.  Defined in aarch64-builtins.c.  */
+extern tree aarch64_fp16_type_node;
+extern tree aarch64_fp16_ptr_type_node;
+
 #endif /* GCC_AARCH64_H */
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,29 @@
+2016-08-05  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR Target/72819
+	* gcc.target/aarch64/aapcs64/abitest-common.h: Define half-precision
+	registers.
+	* gcc.target/aarch64/aapcs64/abitest.S (dumpregs): Add assembly for
+	saving the half-precision registers.
+	* gcc.target/aarch64/aapcs64/func-ret-1.c: Test that an __fp16
+	value is returned in h0.
+	* gcc.target/aarch64/aapcs64/test_2.c: Check that __FP16 arguments
+	are passed in FP/SIMD registers.
+	* gcc.target/aarch64/aapcs64/test_27.c: New, test that __fp16 HFA
+	passing works corrcetly.
+	* gcc.target/aarch64/aapcs64/type-def.h (hfa_f16x1_t): New.
+	(hfa_f16x2_t): Likewise.
+	(hfa_f16x3_t): Likewise.
+	* gcc.target/aarch64/aapcs64/va_arg-1.c: Check that __fp16 values
+	are promoted to double and passed in a double register.
+	* gcc.target/aarch64/aapcs64/va_arg-2.c: Check that __fp16 values
+	are promoted to double and stacked.
+	* gcc.target/aarch64/aapcs64/va_arg-4.c: Check stacking of HFA of
+	__fp16 data types.
+	* gcc.target/aarch64/aapcs64/va_arg-5.c: Likewise.
+	* gcc.target/aarch64/aapcs64/va_arg-16.c: New, check HFAs of
+	__fp16 first get passed in FP/SIMD registers, then stacked.
+
 2016-08-05  Nathan Sidwell  <nathan@acm.org>

 	PR c++/68724
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h
@ -57,7 +57,17 @@
 #define X8     320
 #define X9     328

-#define STACK  336
+#define H0	336
+#define H1	338
+#define H2	340
+#define H3	342
+#define H4	344
+#define H5	346
+#define H6	348
+#define H7	350
+
+
+#define STACK  352

 /* The type of test.  'myfunc' in abitest.S needs to know which kind of
   test it is running to decide what to do at the runtime.  Keep the
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S
@ -13,7 +13,12 @@ dumpregs:
 myfunc:
      mov	x16, sp
      mov	x17, sp
-      sub	sp,  sp, 352 // 336 for registers and 16 for old sp and lr
+      sub	sp,  sp, 368 // 352 for registers and 16 for old sp and lr
+
+      sub	x17, x17, 8
+      st4	{ v4.h, v5.h, v6.h, v7.h }[0], [x17] //344
+      sub	x17, x17, 8
+      st4	{ v0.h, v1.h, v2.h, v3.h }[0], [x17] //336

      stp	x8, x9, [x17, #-16]! //320

--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c
@ -44,4 +44,5 @@ FUNC_VAL_CHECK (12,         vf2_t,        vf2, D0, f32in64)
 FUNC_VAL_CHECK (13,         vi4_t,        vi4, Q0, i32in128)
 FUNC_VAL_CHECK (14,         int *,    int_ptr, X0, flat)
 FUNC_VAL_CHECK (15,         vlf1_t,    vlf1, Q0, flat)
+FUNC_VAL_CHECK (16,         __fp16,    0xabcd, H0, flat)
 #endif
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c
@ -12,5 +12,6 @@
  ARG(double, 4.0, D1)
  ARG(float, 2.0f, S2)
  ARG(double, 5.0, D3)
+  ARG(__fp16, 8.0f, H4)
  LAST_ARG(int, 3, W0)
 #endif
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c
@ -0,0 +1,46 @@
+/* Test AAPCS64 layout
+
+   Test named homogeneous floating-point aggregates of __fp16 data,
+   which should be passed in SIMD/FP registers or via the stack.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define TESTFILE "test_27.c"
+
+struct x0
+{
+  __fp16 v[1];
+} f16x1;
+
+struct x1
+{
+  __fp16 v[2];
+} f16x2;
+
+struct x2
+{
+  __fp16 v[3];
+} f16x3;
+
+#define HAS_DATA_INIT_FUNC
+void init_data ()
+{
+  f16x1.v[0] = 2.0f;
+  f16x2.v[0] = 4.0f;
+  f16x2.v[1] = 8.0f;
+  f16x3.v[0] = 16.0f;
+  f16x3.v[1] = 32.0f;
+  f16x3.v[2] = 64.0f;
+}
+
+#include "abitest.h"
+#else
+ARG (struct x0, f16x1, H0)
+ARG (struct x1, f16x2, H1)
+ARG (struct x2, f16x3, H3)
+ARG (struct x1, f16x2, H6)
+ARG (struct x0, f16x1, STACK)
+ARG (int, 0xdeadbeef, W0)
+LAST_ARG (double, 456.789, STACK+8)
+#endif
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
@ -44,6 +44,24 @@ struct hfa_fx3_t
  float c;
 };

+struct hfa_f16x1_t
+{
+  __fp16 a;
+};
+
+struct hfa_f16x2_t
+{
+  __fp16 a;
+  __fp16 b;
+};
+
+struct hfa_f16x3_t
+{
+  __fp16 a;
+  __fp16 b;
+  __fp16 c;
+};
+
 struct hfa_dx2_t
 {
  double a;
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c
@ -19,6 +19,8 @@ signed short ss = 0xcba9;
 signed int ss_promoted = 0xffffcba9;
 float fp = 65432.12345f;
 double fp_promoted = (double)65432.12345f;
+__fp16 fp16 = 2.0f;
+__fp16 fp16_promoted = (double)2.0f;

 #define HAS_DATA_INIT_FUNC
 void init_data ()
@ -46,9 +48,13 @@ void init_data ()
  ANON         (    long double   , 98765432123456789.987654321L,      Q2,      12)
  ANON         (             vf2_t, vf2   ,                            D3,      13)
  ANON         (             vi4_t, vi4   ,                            Q4,      14)
+  /* 7.2: For unprototyped (i.e. pre- ANSI or K&R C) and variadic functions,
+     in addition to the normal conversions and promotions, arguments of
+     type __fp16 are converted to type double.  */
+  ANON_PROMOTED(            __fp16, fp16  ,     double, fp16_promoted, D5,      15)
 #ifndef __AAPCS64_BIG_ENDIAN__
-  LAST_ANON    (         int      , 0xeeee,                            STACK+32,15)
+  LAST_ANON    (         int      , 0xeeee,                            STACK+32,16)
 #else
-  LAST_ANON    (         int      , 0xeeee,                            STACK+36,15)
+  LAST_ANON    (         int      , 0xeeee,                            STACK+36,16)
 #endif
 #endif
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c
@ -0,0 +1,28 @@
+/* Test AAPCS64 layout and __builtin_va_arg.
+
+   This test is focused particularly on __fp16 unnamed homogeneous
+   floating-point aggregate types which should be passed in fp/simd
+   registers until we run out of those, then the stack.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define AAPCS64_TEST_STDARG
+#define TESTFILE "va_arg-16.c"
+#include "type-def.h"
+
+struct hfa_f16x1_t hfa_f16x1 = {2.0f};
+struct hfa_f16x2_t hfa_f16x2 = {4.0f, 8.0f};
+struct hfa_f16x3_t hfa_f16x3 = {16.0f, 32.0f, 64.0f};
+
+#include "abitest.h"
+#else
+  ARG      (int, 1, W0, LAST_NAMED_ARG_ID)
+  DOTS
+  ANON     (struct hfa_f16x1_t, hfa_f16x1, H0     , 0)
+  ANON     (struct hfa_f16x2_t, hfa_f16x2, H1     , 1)
+  ANON     (struct hfa_f16x3_t, hfa_f16x3, H3     , 2)
+  ANON     (struct hfa_f16x2_t, hfa_f16x2, H6     , 3)
+  ANON     (struct hfa_f16x1_t, hfa_f16x1, STACK  , 4)
+  LAST_ANON(double            , 1.0      , STACK+8, 5)
+#endif
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c
@ -19,6 +19,8 @@ signed short ss = 0xcba9;
 signed int ss_promoted = 0xffffcba9;
 float fp = 65432.12345f;
 double fp_promoted = (double)65432.12345f;
+__fp16 fp16 = 2.0f;
+__fp16 fp16_promoted = (double)2.0f;

 #define HAS_DATA_INIT_FUNC
 void init_data ()
@ -64,9 +66,10 @@ void init_data ()
  ANON         (    long double   , 98765432123456789.987654321L,      STACK+80, 20)
  ANON         (             vf2_t, vf2   ,                            STACK+96, 21)
  ANON         (             vi4_t, vi4   ,                            STACK+112,22)
+  ANON_PROMOTED(         __fp16   , fp16  ,     double, fp16_promoted, STACK+128,23)
 #ifndef __AAPCS64_BIG_ENDIAN__
-  LAST_ANON    (         int      , 0xeeee,                            STACK+128,23)
+  LAST_ANON    (         int      , 0xeeee,                            STACK+136,24)
 #else
-  LAST_ANON    (         int      , 0xeeee,                            STACK+132,23)
+  LAST_ANON    (         int      , 0xeeee,                            STACK+140,24)
 #endif
 #endif
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c
@ -29,6 +29,8 @@ struct non_hfa_ffvf2_t non_hfa_ffvf2;
 struct non_hfa_fffd_t non_hfa_fffd = {33.f, 34.f, 35.f, 36.0};
 union hfa_union_t hfa_union;
 union non_hfa_union_t non_hfa_union;
+struct hfa_f16x2_t hfa_f16x2 = {2.0f, 4.0f};
+struct hfa_f16x3_t hfa_f16x3 = {2.0f, 4.0f, 8.0f};

 #define HAS_DATA_INIT_FUNC
 void init_data ()
@ -89,9 +91,12 @@ void init_data ()
  PTR_ANON (struct non_hfa_ffs_t  , non_hfa_ffs  , STACK+120, 18)
  ANON     (struct non_hfa_ffs_2_t, non_hfa_ffs_2, STACK+128, 19)
  ANON     (union  non_hfa_union_t, non_hfa_union, STACK+144, 20)
+  /* HFA of __fp16 passed on stack, directed __fp16 test is va_arg-10.c.  */
+  ANON     (struct hfa_f16x2_t    , hfa_f16x2    , STACK+152, 21)
+  ANON     (struct hfa_f16x3_t    , hfa_f16x3    , STACK+160, 22)
 #ifndef __AAPCS64_BIG_ENDIAN__
-  LAST_ANON(int                   , 2            , STACK+152, 30)
+  LAST_ANON(int                   , 2            , STACK+168, 30)
 #else
-  LAST_ANON(int                   , 2            , STACK+156, 30)
+  LAST_ANON(int                   , 2            , STACK+172, 30)
 #endif
 #endif
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c
@ -17,6 +17,8 @@ struct hfa_dx4_t hfa_dx4 = {1234.123, 2345.234, 3456.345, 4567.456};
 struct hfa_ldx3_t hfa_ldx3 = {123456.7890, 234567.8901, 345678.9012};
 struct hfa_ffs_t hfa_ffs;
 union hfa_union_t hfa_union;
+struct hfa_f16x2_t hfa_f16x2 = {2.0f, 4.0f};
+struct hfa_f16x3_t hfa_f16x3 = {2.0f, 4.0f, 8.0f};

 #define HAS_DATA_INIT_FUNC
 void init_data ()
@ -43,5 +45,8 @@ void init_data ()
  ANON     (struct hfa_fx1_t  , hfa_fx1  , STACK+24, 4)
  ANON     (struct hfa_fx2_t  , hfa_fx2  , STACK+32, 5)
  ANON     (struct hfa_dx2_t  , hfa_dx2  , STACK+40, 6)
-  LAST_ANON(double            , 1.0      , STACK+56, 7)
+  /* HFA of __fp16 passed on stack, directed __fp16 test is va_arg-10.c.  */
+  ANON     (struct hfa_f16x2_t, hfa_f16x2, STACK+56, 7)
+  ANON     (struct hfa_f16x3_t, hfa_f16x3, STACK+64, 8)
+  LAST_ANON(double            , 1.0      , STACK+72, 9)
 #endif