[aarch64/64821]: Simplify __builtin_aarch64_sqrt* into internal function .SQRT.
This is a simple patch which simplifies the __builtin_aarch64_sqrt* builtins into the internal function SQRT which allows for constant folding and other optimizations at the gimple level. It was originally suggested we do to __builtin_sqrt just for __builtin_aarch64_sqrtdf when -fno-math-errno but since r6-4969-g686ee9719a4 we have the internal function SQRT which does the same so it makes we don't need to check -fno-math-errno either now. Applied as approved after bootstrapped and tested on aarch64-linux-gnu with no regressions. PR target/64821 gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (aarch64_general_gimple_fold_builtin): Handle __builtin_aarch64_sqrt* and simplify into SQRT internal function. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vsqrt-1.c: New test. * gcc.target/aarch64/vsqrt-2.c: New test.
This commit is contained in:
parent
603a9ab41f
commit
cbcf4a50fa
3 changed files with 52 additions and 0 deletions
|
@ -2820,6 +2820,13 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
|
|||
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
||||
break;
|
||||
|
||||
/* Lower sqrt builtins to gimple/internal function sqrt. */
|
||||
BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
|
||||
new_stmt = gimple_build_call_internal (IFN_SQRT,
|
||||
1, args[0]);
|
||||
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
||||
break;
|
||||
|
||||
/*lower store and load neon builtins to gimple. */
|
||||
BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
|
||||
BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
|
||||
|
|
17
gcc/testsuite/gcc.target/aarch64/vsqrt-1.c
Normal file
17
gcc/testsuite/gcc.target/aarch64/vsqrt-1.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
/* PR target/64821 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
/* Check that we constant fold sqrt(4.0) into 2.0. */
|
||||
/* { dg-final { scan-tree-dump-not " \\\.SQRT" "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump " 2\\\.0e\\\+0" "optimized" } } */
|
||||
/* { dg-final { scan-assembler-not "fsqrt" } } */
|
||||
/* We should produce a fmov to d0 with 2.0 but currently don't, see PR 103959. */
|
||||
/* { dg-final { scan-assembler-times "\n\tfmov\td0, 2.0e.0" 1 { xfail *-*-* } } } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
float64x1_t f64(void)
|
||||
{
|
||||
float64x1_t a = (float64x1_t){4.0};
|
||||
return vsqrt_f64 (a);
|
||||
}
|
28
gcc/testsuite/gcc.target/aarch64/vsqrt-2.c
Normal file
28
gcc/testsuite/gcc.target/aarch64/vsqrt-2.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* PR target/64821 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-fdump-tree-optimized" } */
|
||||
#include <arm_neon.h>
|
||||
|
||||
/* Check that we lower __builtin_aarch64_sqrt* into the internal function SQRT. */
|
||||
/* { dg-final { scan-tree-dump-times " __builtin_aarch64_sqrt" 0 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times " \\\.SQRT " 4 "optimized" } } */
|
||||
|
||||
float64x1_t f64(float64x1_t a)
|
||||
{
|
||||
return vsqrt_f64 (a);
|
||||
}
|
||||
|
||||
float64x2_t f64q(float64x2_t a)
|
||||
{
|
||||
return vsqrtq_f64 (a);
|
||||
}
|
||||
|
||||
float32x2_t f32(float32x2_t a)
|
||||
{
|
||||
return vsqrt_f32 (a);
|
||||
}
|
||||
|
||||
float32x4_t f32q(float32x4_t a)
|
||||
{
|
||||
return vsqrtq_f32 (a);
|
||||
}
|
Loading…
Add table
Reference in a new issue