[AArch64] Handle HFAs of float16 types properly
Fix PR Target/72819. gcc/ PR Target/72819 * config/aarch64/aarch64.h (aarch64_fp16_type_node): Declare. (aarch64_fp16_ptr_type_node): Likewise. * config/aarch64/aarch64-simd-builtins.c (aarch64_fp16_ptr_type_node): Define. (aarch64_init_fp16_types): New, refactored out of... (aarch64_init_builtins): ...here, update to call aarch64_init_fp16_types. * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Handle HFmode. (aapcs_vfp_sub_candidate): Likewise. gcc/testsuite/ PR Target/72819 * gcc.target/aarch64/aapcs64/abitest-common.h: Define half-precision registers. * gcc.target/aarch64/aapcs64/abitest.S (dumpregs): Add assembly for saving the half-precision registers. * gcc.target/aarch64/aapcs64/func-ret-1.c: Test that an __fp16 value is returned in h0. * gcc.target/aarch64/aapcs64/test_2.c: Check that __FP16 arguments are passed in FP/SIMD registers. * gcc.target/aarch64/aapcs64/test_27.c: New, test that __fp16 HFA passing works corrcetly. * gcc.target/aarch64/aapcs64/type-def.h (hfa_f16x1_t): New. (hfa_f16x2_t): Likewise. (hfa_f16x3_t): Likewise. * gcc.target/aarch64/aapcs64/va_arg-1.c: Check that __fp16 values are promoted to double and passed in a double register. * gcc.target/aarch64/aapcs64/va_arg-2.c: Check that __fp16 values are promoted to double and stacked. * gcc.target/aarch64/aapcs64/va_arg-4.c: Check stacking of HFA of __fp16 data types. * gcc.target/aarch64/aapcs64/va_arg-5.c: Likewise. * gcc.target/aarch64/aapcs64/va_arg-16.c: New, check HFAs of __fp16 first get passed in FP/SIMD registers, then stacked. From-SVN: r239173
This commit is contained in:
parent
a0b1bf2181
commit
1b62ed4f26
16 changed files with 209 additions and 26 deletions
|
@ -1,3 +1,17 @@
|
|||
2016-08-05 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
PR Target/72819
|
||||
* config/aarch64/aarch64.h (aarch64_fp16_type_node): Declare.
|
||||
(aarch64_fp16_ptr_type_node): Likewise.
|
||||
* config/aarch64/aarch64-simd-builtins.c
|
||||
(aarch64_fp16_ptr_type_node): Define.
|
||||
(aarch64_init_fp16_types): New, refactored out of...
|
||||
(aarch64_init_builtins): ...here, update to call
|
||||
aarch64_init_fp16_types.
|
||||
* config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Handle
|
||||
HFmode.
|
||||
(aapcs_vfp_sub_candidate): Likewise.
|
||||
|
||||
2016-08-05 Martin Liska <mliska@suse.cz>
|
||||
|
||||
Joshua Cranmer <Pidgeot18@gmail.com>
|
||||
|
|
|
@ -443,13 +443,15 @@ static struct aarch64_simd_type_info aarch64_simd_types [] = {
|
|||
};
|
||||
#undef ENTRY
|
||||
|
||||
/* This type is not SIMD-specific; it is the user-visible __fp16. */
|
||||
static tree aarch64_fp16_type_node = NULL_TREE;
|
||||
|
||||
static tree aarch64_simd_intOI_type_node = NULL_TREE;
|
||||
static tree aarch64_simd_intCI_type_node = NULL_TREE;
|
||||
static tree aarch64_simd_intXI_type_node = NULL_TREE;
|
||||
|
||||
/* The user-visible __fp16 type, and a pointer to that type. Used
|
||||
across the back-end. */
|
||||
tree aarch64_fp16_type_node = NULL_TREE;
|
||||
tree aarch64_fp16_ptr_type_node = NULL_TREE;
|
||||
|
||||
static const char *
|
||||
aarch64_mangle_builtin_scalar_type (const_tree type)
|
||||
{
|
||||
|
@ -883,6 +885,21 @@ aarch64_init_builtin_rsqrt (void)
|
|||
}
|
||||
}
|
||||
|
||||
/* Initialize the backend types that support the user-visible __fp16
|
||||
type, also initialize a pointer to that type, to be used when
|
||||
forming HFAs. */
|
||||
|
||||
static void
|
||||
aarch64_init_fp16_types (void)
|
||||
{
|
||||
aarch64_fp16_type_node = make_node (REAL_TYPE);
|
||||
TYPE_PRECISION (aarch64_fp16_type_node) = 16;
|
||||
layout_type (aarch64_fp16_type_node);
|
||||
|
||||
(*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
|
||||
aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
|
||||
}
|
||||
|
||||
void
|
||||
aarch64_init_builtins (void)
|
||||
{
|
||||
|
@ -904,11 +921,7 @@ aarch64_init_builtins (void)
|
|||
= add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
|
||||
AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
|
||||
|
||||
aarch64_fp16_type_node = make_node (REAL_TYPE);
|
||||
TYPE_PRECISION (aarch64_fp16_type_node) = 16;
|
||||
layout_type (aarch64_fp16_type_node);
|
||||
|
||||
(*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
|
||||
aarch64_init_fp16_types ();
|
||||
|
||||
if (TARGET_SIMD)
|
||||
aarch64_init_simd_builtins ();
|
||||
|
|
|
@ -9881,15 +9881,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
|
|||
field_t = long_double_type_node;
|
||||
field_ptr_t = long_double_ptr_type_node;
|
||||
break;
|
||||
/* The half precision and quad precision are not fully supported yet. Enable
|
||||
the following code after the support is complete. Need to find the correct
|
||||
type node for __fp16 *. */
|
||||
#if 0
|
||||
case HFmode:
|
||||
field_t = float_type_node;
|
||||
field_ptr_t = float_ptr_type_node;
|
||||
field_t = aarch64_fp16_type_node;
|
||||
field_ptr_t = aarch64_fp16_ptr_type_node;
|
||||
break;
|
||||
#endif
|
||||
case V2SImode:
|
||||
case V4SImode:
|
||||
{
|
||||
|
@ -10051,7 +10046,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
|
|||
{
|
||||
case REAL_TYPE:
|
||||
mode = TYPE_MODE (type);
|
||||
if (mode != DFmode && mode != SFmode && mode != TFmode)
|
||||
if (mode != DFmode && mode != SFmode
|
||||
&& mode != TFmode && mode != HFmode)
|
||||
return -1;
|
||||
|
||||
if (*modep == VOIDmode)
|
||||
|
@ -10064,7 +10060,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
|
|||
|
||||
case COMPLEX_TYPE:
|
||||
mode = TYPE_MODE (TREE_TYPE (type));
|
||||
if (mode != DFmode && mode != SFmode && mode != TFmode)
|
||||
if (mode != DFmode && mode != SFmode
|
||||
&& mode != TFmode && mode != HFmode)
|
||||
return -1;
|
||||
|
||||
if (*modep == VOIDmode)
|
||||
|
|
|
@ -944,4 +944,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
|||
|
||||
#define ASM_OUTPUT_POOL_EPILOGUE aarch64_asm_output_pool_epilogue
|
||||
|
||||
/* This type is the user-visible __fp16, and a pointer to that type. We
|
||||
need it in many places in the backend. Defined in aarch64-builtins.c. */
|
||||
extern tree aarch64_fp16_type_node;
|
||||
extern tree aarch64_fp16_ptr_type_node;
|
||||
|
||||
#endif /* GCC_AARCH64_H */
|
||||
|
|
|
@ -1,3 +1,29 @@
|
|||
2016-08-05 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
PR Target/72819
|
||||
* gcc.target/aarch64/aapcs64/abitest-common.h: Define half-precision
|
||||
registers.
|
||||
* gcc.target/aarch64/aapcs64/abitest.S (dumpregs): Add assembly for
|
||||
saving the half-precision registers.
|
||||
* gcc.target/aarch64/aapcs64/func-ret-1.c: Test that an __fp16
|
||||
value is returned in h0.
|
||||
* gcc.target/aarch64/aapcs64/test_2.c: Check that __FP16 arguments
|
||||
are passed in FP/SIMD registers.
|
||||
* gcc.target/aarch64/aapcs64/test_27.c: New, test that __fp16 HFA
|
||||
passing works corrcetly.
|
||||
* gcc.target/aarch64/aapcs64/type-def.h (hfa_f16x1_t): New.
|
||||
(hfa_f16x2_t): Likewise.
|
||||
(hfa_f16x3_t): Likewise.
|
||||
* gcc.target/aarch64/aapcs64/va_arg-1.c: Check that __fp16 values
|
||||
are promoted to double and passed in a double register.
|
||||
* gcc.target/aarch64/aapcs64/va_arg-2.c: Check that __fp16 values
|
||||
are promoted to double and stacked.
|
||||
* gcc.target/aarch64/aapcs64/va_arg-4.c: Check stacking of HFA of
|
||||
__fp16 data types.
|
||||
* gcc.target/aarch64/aapcs64/va_arg-5.c: Likewise.
|
||||
* gcc.target/aarch64/aapcs64/va_arg-16.c: New, check HFAs of
|
||||
__fp16 first get passed in FP/SIMD registers, then stacked.
|
||||
|
||||
2016-08-05 Nathan Sidwell <nathan@acm.org>
|
||||
|
||||
PR c++/68724
|
||||
|
|
|
@ -57,7 +57,17 @@
|
|||
#define X8 320
|
||||
#define X9 328
|
||||
|
||||
#define STACK 336
|
||||
#define H0 336
|
||||
#define H1 338
|
||||
#define H2 340
|
||||
#define H3 342
|
||||
#define H4 344
|
||||
#define H5 346
|
||||
#define H6 348
|
||||
#define H7 350
|
||||
|
||||
|
||||
#define STACK 352
|
||||
|
||||
/* The type of test. 'myfunc' in abitest.S needs to know which kind of
|
||||
test it is running to decide what to do at the runtime. Keep the
|
||||
|
|
|
@ -13,7 +13,12 @@ dumpregs:
|
|||
myfunc:
|
||||
mov x16, sp
|
||||
mov x17, sp
|
||||
sub sp, sp, 352 // 336 for registers and 16 for old sp and lr
|
||||
sub sp, sp, 368 // 352 for registers and 16 for old sp and lr
|
||||
|
||||
sub x17, x17, 8
|
||||
st4 { v4.h, v5.h, v6.h, v7.h }[0], [x17] //344
|
||||
sub x17, x17, 8
|
||||
st4 { v0.h, v1.h, v2.h, v3.h }[0], [x17] //336
|
||||
|
||||
stp x8, x9, [x17, #-16]! //320
|
||||
|
||||
|
|
|
@ -44,4 +44,5 @@ FUNC_VAL_CHECK (12, vf2_t, vf2, D0, f32in64)
|
|||
FUNC_VAL_CHECK (13, vi4_t, vi4, Q0, i32in128)
|
||||
FUNC_VAL_CHECK (14, int *, int_ptr, X0, flat)
|
||||
FUNC_VAL_CHECK (15, vlf1_t, vlf1, Q0, flat)
|
||||
FUNC_VAL_CHECK (16, __fp16, 0xabcd, H0, flat)
|
||||
#endif
|
||||
|
|
|
@ -12,5 +12,6 @@
|
|||
ARG(double, 4.0, D1)
|
||||
ARG(float, 2.0f, S2)
|
||||
ARG(double, 5.0, D3)
|
||||
ARG(__fp16, 8.0f, H4)
|
||||
LAST_ARG(int, 3, W0)
|
||||
#endif
|
||||
|
|
46
gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c
Normal file
46
gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Test AAPCS64 layout
|
||||
|
||||
Test named homogeneous floating-point aggregates of __fp16 data,
|
||||
which should be passed in SIMD/FP registers or via the stack. */
|
||||
|
||||
/* { dg-do run { target aarch64*-*-* } } */
|
||||
|
||||
#ifndef IN_FRAMEWORK
|
||||
#define TESTFILE "test_27.c"
|
||||
|
||||
struct x0
|
||||
{
|
||||
__fp16 v[1];
|
||||
} f16x1;
|
||||
|
||||
struct x1
|
||||
{
|
||||
__fp16 v[2];
|
||||
} f16x2;
|
||||
|
||||
struct x2
|
||||
{
|
||||
__fp16 v[3];
|
||||
} f16x3;
|
||||
|
||||
#define HAS_DATA_INIT_FUNC
|
||||
void init_data ()
|
||||
{
|
||||
f16x1.v[0] = 2.0f;
|
||||
f16x2.v[0] = 4.0f;
|
||||
f16x2.v[1] = 8.0f;
|
||||
f16x3.v[0] = 16.0f;
|
||||
f16x3.v[1] = 32.0f;
|
||||
f16x3.v[2] = 64.0f;
|
||||
}
|
||||
|
||||
#include "abitest.h"
|
||||
#else
|
||||
ARG (struct x0, f16x1, H0)
|
||||
ARG (struct x1, f16x2, H1)
|
||||
ARG (struct x2, f16x3, H3)
|
||||
ARG (struct x1, f16x2, H6)
|
||||
ARG (struct x0, f16x1, STACK)
|
||||
ARG (int, 0xdeadbeef, W0)
|
||||
LAST_ARG (double, 456.789, STACK+8)
|
||||
#endif
|
|
@ -44,6 +44,24 @@ struct hfa_fx3_t
|
|||
float c;
|
||||
};
|
||||
|
||||
struct hfa_f16x1_t
|
||||
{
|
||||
__fp16 a;
|
||||
};
|
||||
|
||||
struct hfa_f16x2_t
|
||||
{
|
||||
__fp16 a;
|
||||
__fp16 b;
|
||||
};
|
||||
|
||||
struct hfa_f16x3_t
|
||||
{
|
||||
__fp16 a;
|
||||
__fp16 b;
|
||||
__fp16 c;
|
||||
};
|
||||
|
||||
struct hfa_dx2_t
|
||||
{
|
||||
double a;
|
||||
|
|
|
@ -19,6 +19,8 @@ signed short ss = 0xcba9;
|
|||
signed int ss_promoted = 0xffffcba9;
|
||||
float fp = 65432.12345f;
|
||||
double fp_promoted = (double)65432.12345f;
|
||||
__fp16 fp16 = 2.0f;
|
||||
__fp16 fp16_promoted = (double)2.0f;
|
||||
|
||||
#define HAS_DATA_INIT_FUNC
|
||||
void init_data ()
|
||||
|
@ -46,9 +48,13 @@ void init_data ()
|
|||
ANON ( long double , 98765432123456789.987654321L, Q2, 12)
|
||||
ANON ( vf2_t, vf2 , D3, 13)
|
||||
ANON ( vi4_t, vi4 , Q4, 14)
|
||||
/* 7.2: For unprototyped (i.e. pre- ANSI or K&R C) and variadic functions,
|
||||
in addition to the normal conversions and promotions, arguments of
|
||||
type __fp16 are converted to type double. */
|
||||
ANON_PROMOTED( __fp16, fp16 , double, fp16_promoted, D5, 15)
|
||||
#ifndef __AAPCS64_BIG_ENDIAN__
|
||||
LAST_ANON ( int , 0xeeee, STACK+32,15)
|
||||
LAST_ANON ( int , 0xeeee, STACK+32,16)
|
||||
#else
|
||||
LAST_ANON ( int , 0xeeee, STACK+36,15)
|
||||
LAST_ANON ( int , 0xeeee, STACK+36,16)
|
||||
#endif
|
||||
#endif
|
||||
|
|
28
gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c
Normal file
28
gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* Test AAPCS64 layout and __builtin_va_arg.
|
||||
|
||||
This test is focused particularly on __fp16 unnamed homogeneous
|
||||
floating-point aggregate types which should be passed in fp/simd
|
||||
registers until we run out of those, then the stack. */
|
||||
|
||||
/* { dg-do run { target aarch64*-*-* } } */
|
||||
|
||||
#ifndef IN_FRAMEWORK
|
||||
#define AAPCS64_TEST_STDARG
|
||||
#define TESTFILE "va_arg-16.c"
|
||||
#include "type-def.h"
|
||||
|
||||
struct hfa_f16x1_t hfa_f16x1 = {2.0f};
|
||||
struct hfa_f16x2_t hfa_f16x2 = {4.0f, 8.0f};
|
||||
struct hfa_f16x3_t hfa_f16x3 = {16.0f, 32.0f, 64.0f};
|
||||
|
||||
#include "abitest.h"
|
||||
#else
|
||||
ARG (int, 1, W0, LAST_NAMED_ARG_ID)
|
||||
DOTS
|
||||
ANON (struct hfa_f16x1_t, hfa_f16x1, H0 , 0)
|
||||
ANON (struct hfa_f16x2_t, hfa_f16x2, H1 , 1)
|
||||
ANON (struct hfa_f16x3_t, hfa_f16x3, H3 , 2)
|
||||
ANON (struct hfa_f16x2_t, hfa_f16x2, H6 , 3)
|
||||
ANON (struct hfa_f16x1_t, hfa_f16x1, STACK , 4)
|
||||
LAST_ANON(double , 1.0 , STACK+8, 5)
|
||||
#endif
|
|
@ -19,6 +19,8 @@ signed short ss = 0xcba9;
|
|||
signed int ss_promoted = 0xffffcba9;
|
||||
float fp = 65432.12345f;
|
||||
double fp_promoted = (double)65432.12345f;
|
||||
__fp16 fp16 = 2.0f;
|
||||
__fp16 fp16_promoted = (double)2.0f;
|
||||
|
||||
#define HAS_DATA_INIT_FUNC
|
||||
void init_data ()
|
||||
|
@ -64,9 +66,10 @@ void init_data ()
|
|||
ANON ( long double , 98765432123456789.987654321L, STACK+80, 20)
|
||||
ANON ( vf2_t, vf2 , STACK+96, 21)
|
||||
ANON ( vi4_t, vi4 , STACK+112,22)
|
||||
ANON_PROMOTED( __fp16 , fp16 , double, fp16_promoted, STACK+128,23)
|
||||
#ifndef __AAPCS64_BIG_ENDIAN__
|
||||
LAST_ANON ( int , 0xeeee, STACK+128,23)
|
||||
LAST_ANON ( int , 0xeeee, STACK+136,24)
|
||||
#else
|
||||
LAST_ANON ( int , 0xeeee, STACK+132,23)
|
||||
LAST_ANON ( int , 0xeeee, STACK+140,24)
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -29,6 +29,8 @@ struct non_hfa_ffvf2_t non_hfa_ffvf2;
|
|||
struct non_hfa_fffd_t non_hfa_fffd = {33.f, 34.f, 35.f, 36.0};
|
||||
union hfa_union_t hfa_union;
|
||||
union non_hfa_union_t non_hfa_union;
|
||||
struct hfa_f16x2_t hfa_f16x2 = {2.0f, 4.0f};
|
||||
struct hfa_f16x3_t hfa_f16x3 = {2.0f, 4.0f, 8.0f};
|
||||
|
||||
#define HAS_DATA_INIT_FUNC
|
||||
void init_data ()
|
||||
|
@ -89,9 +91,12 @@ void init_data ()
|
|||
PTR_ANON (struct non_hfa_ffs_t , non_hfa_ffs , STACK+120, 18)
|
||||
ANON (struct non_hfa_ffs_2_t, non_hfa_ffs_2, STACK+128, 19)
|
||||
ANON (union non_hfa_union_t, non_hfa_union, STACK+144, 20)
|
||||
/* HFA of __fp16 passed on stack, directed __fp16 test is va_arg-10.c. */
|
||||
ANON (struct hfa_f16x2_t , hfa_f16x2 , STACK+152, 21)
|
||||
ANON (struct hfa_f16x3_t , hfa_f16x3 , STACK+160, 22)
|
||||
#ifndef __AAPCS64_BIG_ENDIAN__
|
||||
LAST_ANON(int , 2 , STACK+152, 30)
|
||||
LAST_ANON(int , 2 , STACK+168, 30)
|
||||
#else
|
||||
LAST_ANON(int , 2 , STACK+156, 30)
|
||||
LAST_ANON(int , 2 , STACK+172, 30)
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -17,6 +17,8 @@ struct hfa_dx4_t hfa_dx4 = {1234.123, 2345.234, 3456.345, 4567.456};
|
|||
struct hfa_ldx3_t hfa_ldx3 = {123456.7890, 234567.8901, 345678.9012};
|
||||
struct hfa_ffs_t hfa_ffs;
|
||||
union hfa_union_t hfa_union;
|
||||
struct hfa_f16x2_t hfa_f16x2 = {2.0f, 4.0f};
|
||||
struct hfa_f16x3_t hfa_f16x3 = {2.0f, 4.0f, 8.0f};
|
||||
|
||||
#define HAS_DATA_INIT_FUNC
|
||||
void init_data ()
|
||||
|
@ -43,5 +45,8 @@ void init_data ()
|
|||
ANON (struct hfa_fx1_t , hfa_fx1 , STACK+24, 4)
|
||||
ANON (struct hfa_fx2_t , hfa_fx2 , STACK+32, 5)
|
||||
ANON (struct hfa_dx2_t , hfa_dx2 , STACK+40, 6)
|
||||
LAST_ANON(double , 1.0 , STACK+56, 7)
|
||||
/* HFA of __fp16 passed on stack, directed __fp16 test is va_arg-10.c. */
|
||||
ANON (struct hfa_f16x2_t, hfa_f16x2, STACK+56, 7)
|
||||
ANON (struct hfa_f16x3_t, hfa_f16x3, STACK+64, 8)
|
||||
LAST_ANON(double , 1.0 , STACK+72, 9)
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue