hppa: Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV

This change implements __builtin_get_fpsr() and __builtin_set_fpsr(x)
to get and set the floating-point status register.  They are used to
implement pa_atomic_assign_expand_fenv().

2024-02-02  John David Anglin  <danglin@gcc.gnu.org>

gcc/ChangeLog:

	PR target/59778
	* config/pa/pa.cc (enum pa_builtins): Add PA_BUILTIN_GET_FPSR
	and PA_BUILTIN_SET_FPSR builtins.
	* (pa_builtins_icode): Declare.
	* (def_builtin, pa_fpu_init_builtins): New.
	* (pa_init_builtins): Initialize FPU builtins.
	* (pa_builtin_decl, pa_expand_builtin_1): New.
	* (pa_expand_builtin): Handle PA_BUILTIN_GET_FPSR and
	PA_BUILTIN_SET_FPSR builtins.
	* (pa_atomic_assign_expand_fenv): New.
	* config/pa/pa.md (UNSPECV_GET_FPSR, UNSPECV_SET_FPSR): New
	UNSPECV constants.
	(get_fpsr, put_fpsr): New expanders.
	(get_fpsr_32, get_fpsr_64, set_fpsr_32, set_fpsr_64): New
	insn patterns.
This commit is contained in:
John David Anglin 2024-02-02 18:05:06 +00:00
parent 922e4599e6
commit 1c3cfb5a95
2 changed files with 298 additions and 1 deletions

View file

@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
#include "df.h"
#include "tm_p.h"
#include "stringpool.h"
@ -142,6 +143,7 @@ static void pa_asm_out_destructor (rtx, int);
#endif
static void pa_init_builtins (void);
static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
static tree pa_builtin_decl (unsigned, bool);
static rtx hppa_builtin_saveregs (void);
static void hppa_va_start (tree, rtx);
static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
@ -205,6 +207,7 @@ static bool pa_modes_tieable_p (machine_mode, machine_mode);
static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
static HOST_WIDE_INT pa_starting_frame_offset (void);
static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
static void pa_atomic_assign_expand_fenv (tree *, tree *, tree *);
/* The following extra sections are only used for SOM. */
static GTY(()) section *som_readonly_data_section;
@ -314,9 +317,10 @@ static size_t n_deferred_plabels = 0;
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS pa_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN pa_expand_builtin
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL pa_builtin_decl
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
@ -426,6 +430,9 @@ static size_t n_deferred_plabels = 0;
#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV pa_atomic_assign_expand_fenv
struct gcc_target targetm = TARGET_INITIALIZER;
/* Parse the -mfixed-range= option string. */
@ -592,6 +599,10 @@ pa_option_override (void)
enum pa_builtins
{
/* FPU builtins. */
PA_BUILTIN_GET_FPSR,
PA_BUILTIN_SET_FPSR,
PA_BUILTIN_COPYSIGNQ,
PA_BUILTIN_FABSQ,
PA_BUILTIN_INFQ,
@ -600,10 +611,48 @@ enum pa_builtins
};
static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
static GTY(()) enum insn_code pa_builtins_icode[(int) PA_BUILTIN_max];
/* Add a PA builtin function with NAME, ICODE, CODE and TYPE. Return the
function decl or NULL_TREE if the builtin was not added. */
static tree
def_builtin (const char *name, enum insn_code icode, enum pa_builtins code,
tree type)
{
tree t
= add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
if (t)
{
pa_builtins[code] = t;
pa_builtins_icode[code] = icode;
}
return t;
}
/* Create builtin functions for FPU instructions. */
static void
pa_fpu_init_builtins (void)
{
tree ftype;
ftype = build_function_type_list (unsigned_type_node, 0);
def_builtin ("__builtin_get_fpsr", CODE_FOR_get_fpsr,
PA_BUILTIN_GET_FPSR, ftype);
ftype = build_function_type_list (void_type_node, unsigned_type_node, 0);
def_builtin ("__builtin_set_fpsr", CODE_FOR_set_fpsr,
PA_BUILTIN_SET_FPSR, ftype);
}
static void
pa_init_builtins (void)
{
if (!TARGET_SOFT_FLOAT)
pa_fpu_init_builtins ();
#ifdef DONT_HAVE_FPUTC_UNLOCKED
{
tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
@ -663,6 +712,92 @@ pa_init_builtins (void)
}
}
/* Implement TARGET_BUILTIN_DECL. */
static tree
pa_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
{
if (code >= PA_BUILTIN_max)
return error_mark_node;
return pa_builtins[code];
}
static rtx
pa_expand_builtin_1 (tree exp, rtx target,
rtx subtarget ATTRIBUTE_UNUSED,
machine_mode tmode ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
enum pa_builtins code
= (enum pa_builtins) DECL_MD_FUNCTION_CODE (fndecl);
enum insn_code icode = pa_builtins_icode[code];
bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
call_expr_arg_iterator iter;
int arg_count = 0;
rtx pat, op[4];
tree arg;
if (nonvoid)
{
machine_mode tmode = insn_data[icode].operand[0].mode;
if (!target
|| GET_MODE (target) != tmode
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
op[0] = gen_reg_rtx (tmode);
else
op[0] = target;
}
else
op[0] = NULL_RTX;
FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
{
const struct insn_operand_data *insn_op;
int idx;
if (arg == error_mark_node)
return NULL_RTX;
arg_count++;
idx = arg_count - !nonvoid;
insn_op = &insn_data[icode].operand[idx];
op[arg_count] = expand_normal (arg);
if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
insn_op->mode))
op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
}
switch (arg_count)
{
case 0:
pat = GEN_FCN (icode) (op[0]);
break;
case 1:
if (nonvoid)
pat = GEN_FCN (icode) (op[0], op[1]);
else
pat = GEN_FCN (icode) (op[1]);
break;
case 2:
pat = GEN_FCN (icode) (op[0], op[1], op[2]);
break;
case 3:
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
break;
default:
gcc_unreachable ();
}
if (!pat)
return NULL_RTX;
emit_insn (pat);
return (nonvoid ? op[0] : const0_rtx);
}
static rtx
pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
machine_mode mode ATTRIBUTE_UNUSED,
@ -673,6 +808,10 @@ pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
switch (fcode)
{
case PA_BUILTIN_GET_FPSR:
case PA_BUILTIN_SET_FPSR:
return pa_expand_builtin_1 (exp, target, subtarget, mode, ignore);
case PA_BUILTIN_FABSQ:
case PA_BUILTIN_COPYSIGNQ:
return expand_call (exp, target, ignore);
@ -11099,4 +11238,78 @@ pa_function_arg_size (machine_mode mode, const_tree type)
return (int) CEIL (size, UNITS_PER_WORD);
}
/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
static void
pa_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
{
const unsigned PA_FE_INEXACT = 1;
const unsigned PA_FE_UNDERFLOW = 2;
const unsigned PA_FE_OVERFLOW = 4;
const unsigned PA_FE_DIVBYZERO = 8;
const unsigned PA_FE_INVALID = 16;
const unsigned HOST_WIDE_INT PA_FE_ALL_EXCEPT = (PA_FE_INVALID
| PA_FE_DIVBYZERO
| PA_FE_OVERFLOW
| PA_FE_UNDERFLOW
| PA_FE_INEXACT);
const unsigned HOST_WIDE_INT PA_FE_EXCEPT_SHIFT = 27;
tree fenv_var, get_fpsr, set_fpsr, mask, ld_fenv, masked_fenv;
tree hold_all, new_fenv_var, reload_fenv, restore_fnenv;
tree get_fpsr_call, set_fpsr_call, update_call, atomic_feraiseexcept;
if (TARGET_SOFT_FLOAT)
return;
/* Generate the equivalent of :
unsigned int fenv_var;
fenv_var = __builtin_get_fpsr ();
unsigned int masked_fenv;
masked_fenv = fenv_var & mask;
__builtin_set_fpsr (masked_fenv); */
fenv_var = create_tmp_var_raw (unsigned_type_node);
get_fpsr = pa_builtins[PA_BUILTIN_GET_FPSR];
set_fpsr = pa_builtins[PA_BUILTIN_SET_FPSR];
mask = build_int_cst (unsigned_type_node,
~((PA_FE_ALL_EXCEPT << PA_FE_EXCEPT_SHIFT)
| PA_FE_ALL_EXCEPT));
get_fpsr_call = build_call_expr (get_fpsr, 0);
ld_fenv = build4 (TARGET_EXPR, unsigned_type_node,
fenv_var, get_fpsr_call,
NULL_TREE, NULL_TREE);
masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
hold_all = build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv);
set_fpsr_call = build_call_expr (set_fpsr, 1, masked_fenv);
*hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, set_fpsr_call);
/* Store the value of masked_fenv to clear the exceptions:
__builtin_set_fpsr (masked_fenv); */
*clear = set_fpsr_call;
/* Generate the equivalent of :
unsigned int new_fenv_var;
new_fenv_var = __builtin_get_fpsr ();
__builtin_set_fpsr (fenv_var);
__atomic_feraiseexcept (new_fenv_var); */
new_fenv_var = create_tmp_var_raw (unsigned_type_node);
reload_fenv = build4 (TARGET_EXPR, unsigned_type_node, new_fenv_var,
get_fpsr_call, NULL_TREE, NULL_TREE);
restore_fnenv = build_call_expr (set_fpsr, 1, fenv_var);
atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
update_call = build_call_expr (atomic_feraiseexcept, 1,
fold_convert (integer_type_node,
new_fenv_var));
*update = build2 (COMPOUND_EXPR, void_type_node,
build2 (COMPOUND_EXPR, void_type_node,
reload_fenv, restore_fnenv), update_call);
}
#include "gt-pa.h"

View file

@ -96,6 +96,8 @@
UNSPECV_OPC ; outline_prologue_call
UNSPECV_OEC ; outline_epilogue_call
UNSPECV_LONGJMP ; builtin_longjmp
UNSPECV_GET_FPSR ; get floating-point status register
UNSPECV_SET_FPSR ; set floating-point status register
])
;; Maximum pc-relative branch offsets.
@ -10784,3 +10786,85 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
"ldo 15(%%sp),%1\n\t{dep|depw} %%r0,31,3,%1\n\t{ldcw|ldcw,co} 0(%1),%1"
[(set_attr "type" "binary")
(set_attr "length" "12")])
;; Get floating-point status register.
(define_expand "get_fpsr"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
""
{
if (TARGET_SOFT_FLOAT)
FAIL;
if (TARGET_64BIT)
emit_insn (gen_get_fpsr_64 (operands[0]));
else
emit_insn (gen_get_fpsr_32 (operands[0]));
DONE;
})
;; The floating-point status register is stored to an unused slot in
;; the frame marker and then loaded to register operand 0. The final
;; floating-point load restores the T bit in the status register.
;; The final load might be avoided if a word mode store was used to
;; store the status register. It is unclear why we need a double-word
;; store. I suspect PA 1.0 didn't support single-word stores of the
;; status register.
(define_insn "get_fpsr_32"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
"!TARGET_SOFT_FLOAT && !TARGET_64BIT"
"{fstds|fstd} %%fr0,-16(%%sp)\n\tldw -16(%%sp),%0\n\t{fldds|fldd} -16(%%sp),%%fr0"
[(set_attr "type" "fpstore_load")
(set_attr "length" "12")])
;; The 64-bit pattern is similar to the 32-bit pattern except we need
;; compute the address of the frame location as long displacements aren't
;; supported on Linux targets.
(define_insn "get_fpsr_64"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))
(clobber (match_scratch:DI 1 "=&r"))]
"!TARGET_SOFT_FLOAT && TARGET_64BIT"
"ldo -40(%%sp),%1\n\tfstd %%fr0,0(%1)\n\tldw 0(%1),%0\n\tfldd 0(%1),%%fr0"
[(set_attr "type" "fpstore_load")
(set_attr "length" "16")])
;; Set floating-point status register.
(define_expand "set_fpsr"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
""
{
if (TARGET_SOFT_FLOAT)
FAIL;
if (TARGET_64BIT)
emit_insn (gen_set_fpsr_64 (operands[0]));
else
emit_insn (gen_set_fpsr_32 (operands[0]));
DONE;
})
;; The old T bit is extracted and stored in the new status register.
(define_insn "set_fpsr_32"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)
(clobber (match_scratch:SI 1 "=&r"))]
"!TARGET_SOFT_FLOAT && !TARGET_64BIT"
"{fstds|fstd} %%fr0,-16(%%sp)\n\tldw -16(%%sp),%1\n\t{extru|extrw,u} %1,25,1,%1\n\t{dep|depw} %1,25,1,%0\n\tstw %0,-16(%%sp)\n\t{fldds|fldd} -16(%%sp),%%fr0"
[(set_attr "type" "store_fpload")
(set_attr "length" "24")])
(define_insn "set_fpsr_64"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)
(clobber (match_scratch:DI 1 "=&r"))
(clobber (match_scratch:SI 2 "=&r"))]
"!TARGET_SOFT_FLOAT && TARGET_64BIT"
"ldo -40(%%sp),%1\n\tfstd %%fr0,0(%1)\n\tldw 0(%1),%2\n\textrw,u %2,25,1,%2\n\tdepw %2,25,1,%0\n\tstw %0,0(%1)\n\tfldd 0(%1),%%fr0"
[(set_attr "type" "store_fpload")
(set_attr "length" "28")])