aarch64: Handle PSTATE.SM across abnormal edges

PSTATE.SM is always off on entry to an exception handler, and on entry
to a nonlocal goto receiver.  Those entry points need to switch
PSTATE.SM back to the appropriate state for the current function.
In the case of streaming-compatible functions, they need to restore
the mode that the caller was originally using.

The requirement on nonlocal goto receivers means that nonlocal
jumps need to ensure that PSTATE.SM is zero.

gcc/
	* config/aarch64/aarch64.cc: Include except.h
	(aarch64_sme_mode_switch_regs::add_call_preserved_reg): New function.
	(aarch64_sme_mode_switch_regs::add_call_preserved_regs): Likewise.
	(aarch64_need_old_pstate_sm): Return true if the function has
	a nonlocal-goto or exception receiver.
	(aarch64_switch_pstate_sm_for_landing_pad): New function.
	(aarch64_switch_pstate_sm_for_jump): Likewise.
	(pass_switch_pstate_sm::gate): Enable the pass for all
	streaming and streaming-compatible functions.
	(pass_switch_pstate_sm::execute): Handle non-local gotos and their
	receivers.  Handle exception handler entry points.

gcc/testsuite/
	* g++.target/aarch64/sme/exceptions_2.C: New test.
	* gcc.target/aarch64/sme/nonlocal_goto_1.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_4.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_5.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_6.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_7.c: Likewise.
This commit is contained in:
Richard Sandiford 2023-12-05 10:11:29 +00:00
parent 3f6e5991fa
commit 275706fc59
9 changed files with 537 additions and 7 deletions

View file

@ -85,6 +85,7 @@
#include "config/arm/aarch-common.h"
#include "config/arm/aarch-common-protos.h"
#include "ssa.h"
#include "except.h"
#include "tree-pass.h"
#include "cfgbuild.h"
@ -4758,6 +4759,8 @@ public:
void add_reg (machine_mode, unsigned int);
void add_call_args (rtx_call_insn *);
void add_call_result (rtx_call_insn *);
void add_call_preserved_reg (unsigned int);
void add_call_preserved_regs (bitmap);
void emit_prologue ();
void emit_epilogue ();
@ -4890,6 +4893,46 @@ aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn)
add_reg (GET_MODE (dest), REGNO (dest));
}
/* REGNO is a register that is call-preserved under the current function's ABI.
Record that it must be preserved around the mode switch. */
void
aarch64_sme_mode_switch_regs::add_call_preserved_reg (unsigned int regno)
{
if (FP_REGNUM_P (regno))
switch (crtl->abi->id ())
{
case ARM_PCS_SVE:
add_reg (VNx16QImode, regno);
break;
case ARM_PCS_SIMD:
add_reg (V16QImode, regno);
break;
case ARM_PCS_AAPCS64:
add_reg (DImode, regno);
break;
default:
gcc_unreachable ();
}
else if (PR_REGNUM_P (regno))
add_reg (VNx16BImode, regno);
}
/* The hard registers in REGS are call-preserved under the current function's
ABI. Record that they must be preserved around the mode switch. */
void
aarch64_sme_mode_switch_regs::add_call_preserved_regs (bitmap regs)
{
bitmap_iterator bi;
unsigned int regno;
EXECUTE_IF_SET_IN_BITMAP (regs, 0, regno, bi)
if (HARD_REGISTER_NUM_P (regno))
add_call_preserved_reg (regno);
else
break;
}
/* Emit code to save registers before the mode switch. */
void
@ -7423,6 +7466,23 @@ aarch64_need_old_pstate_sm ()
if (aarch64_cfun_enables_pstate_sm ())
return true;
/* Non-local goto receivers are entered with PSTATE.SM equal to 0,
but the function needs to return with PSTATE.SM unchanged. */
if (nonlocal_goto_handler_labels)
return true;
/* Likewise for exception handlers. */
eh_landing_pad lp;
for (unsigned int i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i)
if (lp && lp->post_landing_pad)
return true;
/* Non-local gotos need to set PSTATE.SM to zero. It's possible to call
streaming-compatible functions without SME being available, so PSTATE.SM
should only be changed if it is currently set to one. */
if (crtl->has_nonlocal_goto)
return true;
if (cfun->machine->call_switches_pstate_sm)
for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn))
if (auto *call = dyn_cast<rtx_call_insn *> (insn))
@ -28323,6 +28383,59 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
return seq;
}
/* BB is the target of an exception or nonlocal goto edge, which means
that PSTATE.SM is known to be 0 on entry. Put it into the state that
the current function requires. */
static bool
aarch64_switch_pstate_sm_for_landing_pad (basic_block bb)
{
if (TARGET_NON_STREAMING)
return false;
start_sequence ();
rtx_insn *guard_label = nullptr;
if (TARGET_STREAMING_COMPATIBLE)
guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
AARCH64_FL_SM_OFF);
aarch64_sme_mode_switch_regs args_switch;
args_switch.add_call_preserved_regs (df_get_live_in (bb));
args_switch.emit_prologue ();
aarch64_switch_pstate_sm (AARCH64_FL_SM_OFF, AARCH64_FL_SM_ON);
args_switch.emit_epilogue ();
if (guard_label)
emit_label (guard_label);
auto seq = get_insns ();
end_sequence ();
emit_insn_after (seq, bb_note (bb));
return true;
}
/* JUMP is a nonlocal goto. Its target requires PSTATE.SM to be 0 on entry,
so arrange to make it so. */
static bool
aarch64_switch_pstate_sm_for_jump (rtx_insn *jump)
{
if (TARGET_NON_STREAMING)
return false;
start_sequence ();
rtx_insn *guard_label = nullptr;
if (TARGET_STREAMING_COMPATIBLE)
guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
AARCH64_FL_SM_OFF);
aarch64_switch_pstate_sm (AARCH64_FL_SM_ON, AARCH64_FL_SM_OFF);
if (guard_label)
emit_label (guard_label);
auto seq = get_insns ();
end_sequence ();
emit_insn_before (seq, jump);
return true;
}
/* If CALL involves a change in PSTATE.SM, emit the instructions needed
to switch to the new mode and the instructions needed to restore the
original mode. Return true if something changed. */
@ -28406,9 +28519,10 @@ public:
};
bool
pass_switch_pstate_sm::gate (function *)
pass_switch_pstate_sm::gate (function *fn)
{
return cfun->machine->call_switches_pstate_sm;
return (aarch64_fndecl_pstate_sm (fn->decl) != AARCH64_FL_SM_OFF
|| cfun->machine->call_switches_pstate_sm);
}
/* Emit any instructions needed to switch PSTATE.SM. */
@ -28420,6 +28534,12 @@ pass_switch_pstate_sm::execute (function *fn)
auto_sbitmap blocks (last_basic_block_for_fn (cfun));
bitmap_clear (blocks);
FOR_EACH_BB_FN (bb, fn)
{
if (has_abnormal_call_or_eh_pred_edge_p (bb)
&& aarch64_switch_pstate_sm_for_landing_pad (bb))
bitmap_set_bit (blocks, bb->index);
if (cfun->machine->call_switches_pstate_sm)
{
rtx_insn *insn;
FOR_BB_INSNS (bb, insn)
@ -28427,6 +28547,13 @@ pass_switch_pstate_sm::execute (function *fn)
if (aarch64_switch_pstate_sm_for_call (call))
bitmap_set_bit (blocks, bb->index);
}
auto end = BB_END (bb);
if (JUMP_P (end)
&& find_reg_note (end, REG_NON_LOCAL_GOTO, NULL_RTX)
&& aarch64_switch_pstate_sm_for_jump (end))
bitmap_set_bit (blocks, bb->index);
}
find_many_sub_basic_blocks (blocks);
clear_aux_for_blocks ();
return 0;

View file

@ -0,0 +1,148 @@
// { dg-options "-O -fno-optimize-sibling-calls" }
// { dg-final { check-function-bodies "**" "" } }
void n_callee();
void s_callee() __arm_streaming;
void sc_callee() __arm_streaming_compatible;
void n_callee_ne() noexcept;
void s_callee_ne() noexcept __arm_streaming;
void sc_callee_ne() noexcept __arm_streaming_compatible;
void n_caller1()
{
try
{
n_callee();
sc_callee();
}
catch (...)
{
n_callee_ne();
sc_callee_ne();
}
}
// { dg-final { scan-assembler {_Z9n_caller1v:(?:(?!smstart|smstop).)*\tret} } }
/*
** _Z9n_caller2v:
** ...
** cntd (x[0-9]+)
** str \1, [^\n]+
** ...
** bl __cxa_begin_catch
** smstart sm
** bl _Z11s_callee_nev
** smstop sm
** bl __cxa_end_catch
** ...
*/
void n_caller2()
{
try
{
n_callee();
sc_callee();
}
catch (...)
{
s_callee_ne();
}
}
/*
** _Z9s_caller1v:
** ...
** bl __cxa_end_catch
** smstart sm
** ...
*/
int s_caller1() __arm_streaming
{
try
{
s_callee();
return 1;
}
catch (...)
{
return 2;
}
}
/*
** _Z9s_caller2v:
** ...
** bl __cxa_begin_catch
** smstart sm
** bl _Z11s_callee_nev
** smstop sm
** bl __cxa_end_catch
** smstart sm
** ...
*/
int s_caller2() __arm_streaming
{
try
{
n_callee();
return 1;
}
catch (...)
{
s_callee_ne();
return 2;
}
}
/*
** _Z10sc_caller1v:
** ...
** cntd (x[0-9]+)
** str \1, [^\n]+
** mrs (x[0-9]+), svcr
** str \2, ([^\n]+)
** ...
** bl __cxa_end_catch
** ldr (x[0-9]+), \3
** tbz \4, 0, [^\n]+
** smstart sm
** ...
*/
int sc_caller1() __arm_streaming_compatible
{
try
{
sc_callee();
return 1;
}
catch (...)
{
return 2;
}
}
/*
** _Z10ls_caller1v:
** ...
** cntd (x[0-9]+)
** str \1, [^\n]+
** ...
** bl __cxa_begin_catch
** smstart sm
** bl _Z12sc_callee_nev
** smstop sm
** bl __cxa_end_catch
** ...
*/
__arm_locally_streaming void ls_caller1()
{
try
{
sc_callee();
}
catch (...)
{
sc_callee_ne();
}
}

View file

@ -0,0 +1,58 @@
/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
void run(void (*)());
/*
** foo:
** ...
** mrs x16, svcr
** ...
** str x16, (.*)
** ...
** ldr x16, \1
** tbz x16, 0, .*
** smstop sm
** bl __clear_cache
** ldr x16, \1
** tbz x16, 0, .*
** smstart sm
** add x0, .*
** ldr x16, \1
** tbz x16, 0, .*
** smstop sm
** bl run
** ldr x16, \1
** tbz x16, 0, .*
** smstart sm
** mov w0, 1
** ...
** ret
** ldr x16, \1
** tbz x16, 0, .*
** smstart sm
** mov w0, 0
** ...
*/
int
foo (int *ptr) __arm_streaming_compatible
{
__label__ failure;
void bar () { *ptr += 1; goto failure; }
run (bar);
return 1;
failure:
return 0;
}
// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
// { dg-final { scan-assembler {\tstp\td8, d9,} } }
// { dg-final { scan-assembler {\tstp\td10, d11,} } }
// { dg-final { scan-assembler {\tstp\td12, d13,} } }
// { dg-final { scan-assembler {\tstp\td14, d15,} } }

View file

@ -0,0 +1,44 @@
/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
void run(void (*)());
/*
** foo:
** ...
** smstop sm
** bl __clear_cache
** smstart sm
** add x0, .*
** smstop sm
** bl run
** smstart sm
** mov w0, 1
** ...
** ret
** smstart sm
** mov w0, 0
** ...
*/
int
foo (int *ptr) __arm_streaming
{
__label__ failure;
void bar () { *ptr += 1; goto failure; }
run (bar);
return 1;
failure:
return 0;
}
// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
// { dg-final { scan-assembler {\tstp\td8, d9,} } }
// { dg-final { scan-assembler {\tstp\td10, d11,} } }
// { dg-final { scan-assembler {\tstp\td12, d13,} } }
// { dg-final { scan-assembler {\tstp\td14, d15,} } }

View file

@ -0,0 +1,46 @@
/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
void run(void (*)());
/*
** foo:
** ...
** smstart sm
** ...
** smstop sm
** bl __clear_cache
** smstart sm
** add x0, .*
** smstop sm
** bl run
** smstart sm
** mov w0, 1
** ...
** smstart sm
** mov w0, 0
** smstop sm
** ...
*/
__arm_locally_streaming int
foo (int *ptr)
{
__label__ failure;
void bar () { *ptr += 1; goto failure; }
run (bar);
return 1;
failure:
return 0;
}
// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
// { dg-final { scan-assembler {\tstp\td8, d9,} } }
// { dg-final { scan-assembler {\tstp\td10, d11,} } }
// { dg-final { scan-assembler {\tstp\td12, d13,} } }
// { dg-final { scan-assembler {\tstp\td14, d15,} } }

View file

@ -0,0 +1,25 @@
/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
void run(void (*)());
/*
** bar.0:
** ...
** smstart sm
** ...
** smstop sm
** br x[0-9]+
*/
int
foo (int *ptr)
{
__label__ failure;
__arm_locally_streaming void bar () { *ptr += 1; goto failure; }
run (bar);
return 1;
failure:
return 0;
}

View file

@ -0,0 +1,26 @@
/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
void run(void (*)() __arm_streaming);
/*
** bar.0:
** ...
** smstop sm
** br x[0-9]+
*/
int
foo (int *ptr)
{
__label__ failure;
void bar () __arm_streaming { *ptr += 1; goto failure; }
run (bar);
return 1;
failure:
return 0;
}
// { dg-final { scan-assembler-not {smstart\t} } }
// { dg-final { scan-assembler-not {mrs\t} } }

View file

@ -0,0 +1,31 @@
/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
void run(void (*)() __arm_streaming_compatible);
/*
** bar.0:
** ...
** mrs x16, svcr
** ...
** str x16, (.*)
** ...
** ldr x16, \1
** tbz x16, 0, .*
** smstop sm
** br x[0-9]+
*/
int
foo (int *ptr)
{
__label__ failure;
void bar () __arm_streaming_compatible { *ptr += 1; goto failure; }
run (bar);
return 1;
failure:
return 0;
}
// { dg-final { scan-assembler-not {smstart\t} } }

View file

@ -0,0 +1,25 @@
/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
void run(void (*)() __arm_inout("za"));
void callee () __arm_inout("za");
int
foo (int *ptr)
{
__label__ failure;
void bar () __arm_inout("za")
{
callee ();
*ptr += 1;
goto failure;
}
run (bar);
return 1;
failure:
return 0;
}
// { dg-final { scan-assembler-not {\tsmstart\t} } }
// { dg-final { scan-assembler-not {\tsmstop\t} } }