aarch64: Enforce inlining restrictions for SME

A function that has local ZA state cannot be inlined into its caller,
since we only support managing ZA switches at function scope.

A function whose body directly clobbers ZA state cannot be inlined into
a function with ZA state.

A function whose body requires a particular PSTATE.SM setting can only
be inlined into a function body that guarantees that PSTATE.SM setting.
The callee's function type doesn't matter here: one locally-streaming
function can be inlined into another.

gcc/
	* config/aarch64/aarch64.cc: Include symbol-summary.h, ipa-prop.h,
	and ipa-fnsummary.h
	(aarch64_function_attribute_inlinable_p): New function.
	(AARCH64_IPA_SM_FIXED, AARCH64_IPA_CLOBBERS_ZA): New constants.
	(aarch64_need_ipa_fn_target_info): New function.
	(aarch64_update_ipa_fn_target_info): Likewise.
	(aarch64_can_inline_p): Restrict the previous ISA flag checks
	to non-modal features.  Prevent callees that require a particular
	PSTATE.SM state from being inlined into callers that can't guarantee
	that state.  Also prevent callees that have ZA state from being
	inlined into callers that don't.  Finally, prevent callees that
	clobber ZA from being inlined into callers that have ZA state.
	(TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P): Define.
	(TARGET_NEED_IPA_FN_TARGET_INFO): Likewise.
	(TARGET_UPDATE_IPA_FN_TARGET_INFO): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/sme/inlining_1.c: New test.
	* gcc.target/aarch64/sme/inlining_2.c: Likewise.
	* gcc.target/aarch64/sme/inlining_3.c: Likewise.
	* gcc.target/aarch64/sme/inlining_4.c: Likewise.
	* gcc.target/aarch64/sme/inlining_5.c: Likewise.
	* gcc.target/aarch64/sme/inlining_6.c: Likewise.
	* gcc.target/aarch64/sme/inlining_7.c: Likewise.
	* gcc.target/aarch64/sme/inlining_8.c: Likewise.
This commit is contained in:
Richard Sandiford 2023-12-05 10:11:30 +00:00
parent 275706fc59
commit 0e9aa05df6
16 changed files with 696 additions and 5 deletions

View file

@ -88,6 +88,9 @@
#include "except.h"
#include "tree-pass.h"
#include "cfgbuild.h"
#include "symbol-summary.h"
#include "ipa-prop.h"
#include "ipa-fnsummary.h"
/* This file should be included last. */
#include "target-def.h"
@ -19155,6 +19158,17 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
return ret;
}
/* Implement TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P. Use an opt-out
rather than an opt-in list. */
static bool
aarch64_function_attribute_inlinable_p (const_tree fndecl)
{
/* A function that has local ZA state cannot be inlined into its caller,
since we only support managing ZA switches at function scope. */
return !aarch64_fndecl_has_new_state (fndecl, "za");
}
/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
tri-bool options (yes, no, don't care) and the default value is
DEF, determine whether to reject inlining. */
@ -19176,6 +19190,60 @@ aarch64_tribools_ok_for_inlining_p (int caller, int callee,
return (callee == caller || callee == def);
}
/* Bit allocations for ipa_fn_summary::target_info. */
/* Set if the function contains a stmt that relies on the function's
choice of PSTATE.SM setting (0 for non-streaming, 1 for streaming).
Not meaningful for streaming-compatible functions. */
constexpr auto AARCH64_IPA_SM_FIXED = 1U << 0;
/* Set if the function clobbers ZA. Not meaningful for functions that
have ZA state. */
constexpr auto AARCH64_IPA_CLOBBERS_ZA = 1U << 1;
/* Implement TARGET_NEED_IPA_FN_TARGET_INFO. */
static bool
aarch64_need_ipa_fn_target_info (const_tree, unsigned int &)
{
/* We could in principle skip this for streaming-compatible functions
that have ZA state, but that's a rare combination. */
return true;
}
/* Implement TARGET_UPDATE_IPA_FN_TARGET_INFO. */
static bool
aarch64_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
{
if (auto *ga = dyn_cast<const gasm *> (stmt))
{
/* We don't know what the asm does, so conservatively assume that
it requires the function's current SM mode. */
info |= AARCH64_IPA_SM_FIXED;
for (unsigned int i = 0; i < gimple_asm_nclobbers (ga); ++i)
{
tree op = gimple_asm_clobber_op (ga, i);
const char *clobber = TREE_STRING_POINTER (TREE_VALUE (op));
if (strcmp (clobber, "za") == 0)
info |= AARCH64_IPA_CLOBBERS_ZA;
}
}
if (auto *call = dyn_cast<const gcall *> (stmt))
{
if (gimple_call_builtin_p (call, BUILT_IN_MD))
{
/* The attributes on AArch64 builtins are supposed to be accurate.
If the function isn't marked streaming-compatible then it
needs whichever SM mode it selects. */
tree decl = gimple_call_fndecl (call);
if (aarch64_fndecl_pstate_sm (decl) != 0)
info |= AARCH64_IPA_SM_FIXED;
}
}
return true;
}
/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
to inline CALLEE into CALLER based on target-specific info.
Make sure that the caller and callee have compatible architectural
@ -19198,12 +19266,56 @@ aarch64_can_inline_p (tree caller, tree callee)
: target_option_default_node);
/* Callee's ISA flags should be a subset of the caller's. */
if ((caller_opts->x_aarch64_asm_isa_flags
& callee_opts->x_aarch64_asm_isa_flags)
!= callee_opts->x_aarch64_asm_isa_flags)
auto caller_asm_isa = (caller_opts->x_aarch64_asm_isa_flags
& ~AARCH64_FL_ISA_MODES);
auto callee_asm_isa = (callee_opts->x_aarch64_asm_isa_flags
& ~AARCH64_FL_ISA_MODES);
if (callee_asm_isa & ~caller_asm_isa)
return false;
if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
!= callee_opts->x_aarch64_isa_flags)
auto caller_isa = (caller_opts->x_aarch64_isa_flags
& ~AARCH64_FL_ISA_MODES);
auto callee_isa = (callee_opts->x_aarch64_isa_flags
& ~AARCH64_FL_ISA_MODES);
if (callee_isa & ~caller_isa)
return false;
/* Return true if the callee might have target_info property PROPERTY.
The answer must be true unless we have positive proof to the contrary. */
auto callee_has_property = [&](unsigned int property)
{
if (ipa_fn_summaries)
if (auto *summary = ipa_fn_summaries->get (cgraph_node::get (callee)))
if (!(summary->target_info & property))
return false;
return true;
};
/* Streaming-compatible code can be inlined into functions with any
PSTATE.SM mode. Otherwise the caller and callee must agree on
PSTATE.SM mode, unless we can prove that the callee is naturally
streaming-compatible. */
auto caller_sm = (caller_opts->x_aarch64_isa_flags & AARCH64_FL_SM_STATE);
auto callee_sm = (callee_opts->x_aarch64_isa_flags & AARCH64_FL_SM_STATE);
if (callee_sm
&& caller_sm != callee_sm
&& callee_has_property (AARCH64_IPA_SM_FIXED))
return false;
/* aarch64_function_attribute_inlinable_p prevents new-ZA functions
from being inlined into others. We also need to prevent inlining
of shared-ZA functions into functions without ZA state, since this
is an error condition.
The only other problematic case for ZA is inlining a function that
directly clobbers ZA into a function that has ZA state. */
auto caller_za = (caller_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON);
auto callee_za = (callee_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON);
if (!caller_za && callee_za)
return false;
if (caller_za
&& !callee_za
&& callee_has_property (AARCH64_IPA_CLOBBERS_ZA))
return false;
/* Allow non-strict aligned functions inlining into strict
@ -28760,6 +28872,16 @@ aarch64_run_selftests (void)
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P \
aarch64_function_attribute_inlinable_p
#undef TARGET_NEED_IPA_FN_TARGET_INFO
#define TARGET_NEED_IPA_FN_TARGET_INFO aarch64_need_ipa_fn_target_info
#undef TARGET_UPDATE_IPA_FN_TARGET_INFO
#define TARGET_UPDATE_IPA_FN_TARGET_INFO aarch64_update_ipa_fn_target_info
#undef TARGET_CAN_INLINE_P
#define TARGET_CAN_INLINE_P aarch64_can_inline_p

View file

@ -0,0 +1,47 @@
/* { dg-options "" } */
inline void __attribute__((always_inline))
sc_callee () [[arm::streaming_compatible]] {}
inline void __attribute__((always_inline))
s_callee () [[arm::streaming]] {}
inline void __attribute__((always_inline))
n_callee () {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_callee () [[arm::streaming_compatible]] {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_callee () {}
inline void __attribute__((always_inline))
sc_asm_callee () [[arm::streaming_compatible]] { asm (""); }
inline void __attribute__((always_inline))
s_asm_callee () [[arm::streaming]] { asm (""); } // { dg-error "inlining failed" }
inline void __attribute__((always_inline))
n_asm_callee () { asm (""); } // { dg-error "inlining failed" }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); } // { dg-error "inlining failed" }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_asm_callee () { asm (""); } // { dg-error "inlining failed" }
void
sc_caller () [[arm::streaming_compatible]]
{
sc_callee ();
s_callee ();
n_callee ();
sc_ls_callee ();
n_ls_callee ();
sc_asm_callee ();
s_asm_callee ();
n_asm_callee ();
sc_ls_asm_callee ();
n_ls_asm_callee ();
}

View file

@ -0,0 +1,57 @@
/* { dg-options "" } */
#include <arm_neon.h>
#include <arm_sme.h>
uint8x16_t *neon;
svint64_t *sve;
int64_t *ptr;
// Gets expanded to addition early, so no error. An error would be
// more correct though.
inline void __attribute__((always_inline))
call_vadd ()
{
neon[4] = vaddq_u8 (neon[5], neon[6]);
}
inline void __attribute__((always_inline))
call_vbsl () // { dg-error "inlining failed" }
{
neon[0] = vbslq_u8 (neon[1], neon[2], neon[3]);
}
inline void __attribute__((always_inline))
call_svadd ()
{
*sve = svadd_x (svptrue_b8 (), *sve, 1);
}
inline void __attribute__((always_inline))
call_svld1_gather () // { dg-error "inlining failed" }
{
*sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve);
}
inline void __attribute__((always_inline))
call_svzero () [[arm::inout("za")]]
{
svzero_za ();
}
inline void __attribute__((always_inline))
call_svst1_za () [[arm::streaming, arm::inout("za")]] // { dg-error "inlining failed" }
{
svst1_ver_za64 (0, 0, svptrue_b8 (), ptr);
}
void
sc_caller () [[arm::inout("za"), arm::streaming_compatible]]
{
call_vadd ();
call_vbsl ();
call_svadd ();
call_svld1_gather ();
call_svzero ();
call_svst1_za ();
}

View file

@ -0,0 +1,57 @@
/* { dg-options "" } */
#include <arm_neon.h>
#include <arm_sme.h>
uint8x16_t *neon;
svint64_t *sve;
int64_t *ptr;
// Gets expanded to addition early, so no error. An error would be
// more correct though.
inline void __attribute__((always_inline))
call_vadd ()
{
neon[4] = vaddq_u8 (neon[5], neon[6]);
}
inline void __attribute__((always_inline))
call_vbsl () // { dg-error "inlining failed" }
{
neon[0] = vbslq_u8 (neon[1], neon[2], neon[3]);
}
inline void __attribute__((always_inline))
call_svadd ()
{
*sve = svadd_x (svptrue_b8 (), *sve, 1);
}
inline void __attribute__((always_inline))
call_svld1_gather () // { dg-error "inlining failed" }
{
*sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve);
}
inline void __attribute__((always_inline))
call_svzero () [[arm::inout("za")]]
{
svzero_za ();
}
inline void __attribute__((always_inline))
call_svst1_za () [[arm::streaming, arm::inout("za")]]
{
svst1_ver_za64 (0, 0, svptrue_b8 (), ptr);
}
void
sc_caller () [[arm::inout("za"), arm::streaming]]
{
call_vadd ();
call_vbsl ();
call_svadd ();
call_svld1_gather ();
call_svzero ();
call_svst1_za ();
}

View file

@ -0,0 +1,15 @@
/* { dg-options "" } */
#include <arm_sme.h>
inline void __attribute__((always_inline))
call_svzero () [[arm::inout("za"), arm::streaming_compatible]] // { dg-error "inlining failed" }
{
svzero_za ();
}
void
n_caller ()
{
call_svzero ();
}

View file

@ -0,0 +1,15 @@
/* { dg-options "" } */
#include <arm_sme.h>
inline void __attribute__((always_inline))
call_svzero () [[arm::inout("za"), arm::streaming_compatible]] // { dg-error "inlining failed" }
{
svzero_za ();
}
void
s_caller ()
{
call_svzero ();
}

View file

@ -0,0 +1,15 @@
/* { dg-options "" } */
#include <arm_sme.h>
inline void __attribute__((always_inline))
call_svzero () [[arm::inout("za"), arm::streaming_compatible]] // { dg-error "inlining failed" }
{
svzero_za ();
}
void
sc_caller ()
{
call_svzero ();
}

View file

@ -0,0 +1,27 @@
/* { dg-options "" } */
#include <arm_sme.h>
inline void
call_svzero () [[arm::inout("za"), arm::streaming_compatible]]
{
svzero_za ();
}
void
n_caller ()
{
call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" }
}
void
s_caller ()
{
call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" }
}
void
sc_caller ()
{
call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" }
}

View file

@ -0,0 +1,47 @@
/* { dg-options "" } */
inline void __attribute__((always_inline))
sc_callee () [[arm::streaming_compatible]] {}
inline void __attribute__((always_inline))
s_callee () [[arm::streaming]] {}
inline void __attribute__((always_inline))
n_callee () {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_callee () [[arm::streaming_compatible]] {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_callee () {}
inline void __attribute__((always_inline))
sc_asm_callee () [[arm::streaming_compatible]] { asm (""); }
inline void __attribute__((always_inline))
s_asm_callee () [[arm::streaming]] { asm (""); }
inline void __attribute__((always_inline))
n_asm_callee () { asm (""); } // { dg-error "inlining failed" }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_asm_callee () { asm (""); }
void
s_caller () [[arm::streaming]]
{
sc_callee ();
s_callee ();
n_callee ();
sc_ls_callee ();
n_ls_callee ();
sc_asm_callee ();
s_asm_callee ();
n_asm_callee ();
sc_ls_asm_callee ();
n_ls_asm_callee ();
}

View file

@ -0,0 +1,47 @@
/* { dg-options "" } */
inline void __attribute__((always_inline))
sc_callee () [[arm::streaming_compatible]] {}
inline void __attribute__((always_inline))
s_callee () [[arm::streaming]] {}
inline void __attribute__((always_inline))
n_callee () {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_callee () [[arm::streaming_compatible]] {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_callee () {}
inline void __attribute__((always_inline))
sc_asm_callee () [[arm::streaming_compatible]] { asm (""); }
inline void __attribute__((always_inline))
s_asm_callee () [[arm::streaming]] { asm (""); } // { dg-error "inlining failed" }
inline void __attribute__((always_inline))
n_asm_callee () { asm (""); }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); } // { dg-error "inlining failed" }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_asm_callee () { asm (""); } // { dg-error "inlining failed" }
void
n_caller ()
{
sc_callee ();
s_callee ();
n_callee ();
sc_ls_callee ();
n_ls_callee ();
sc_asm_callee ();
s_asm_callee ();
n_asm_callee ();
sc_ls_asm_callee ();
n_ls_asm_callee ();
}

View file

@ -0,0 +1,47 @@
/* { dg-options "" } */
inline void __attribute__((always_inline))
sc_callee () [[arm::streaming_compatible]] {}
inline void __attribute__((always_inline))
s_callee () [[arm::streaming]] {}
inline void __attribute__((always_inline))
n_callee () {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_callee () [[arm::streaming_compatible]] {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_callee () {}
inline void __attribute__((always_inline))
sc_asm_callee () [[arm::streaming_compatible]] { asm (""); }
inline void __attribute__((always_inline))
s_asm_callee () [[arm::streaming]] { asm (""); }
inline void __attribute__((always_inline))
n_asm_callee () { asm (""); } // { dg-error "inlining failed" }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_asm_callee () { asm (""); }
[[arm::locally_streaming]] void
sc_ls_caller () [[arm::streaming_compatible]]
{
sc_callee ();
s_callee ();
n_callee ();
sc_ls_callee ();
n_ls_callee ();
sc_asm_callee ();
s_asm_callee ();
n_asm_callee ();
sc_ls_asm_callee ();
n_ls_asm_callee ();
}

View file

@ -0,0 +1,47 @@
/* { dg-options "" } */
inline void __attribute__((always_inline))
sc_callee () [[arm::streaming_compatible]] {}
inline void __attribute__((always_inline))
s_callee () [[arm::streaming]] {}
inline void __attribute__((always_inline))
n_callee () {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_callee () [[arm::streaming_compatible]] {}
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_callee () {}
inline void __attribute__((always_inline))
sc_asm_callee () [[arm::streaming_compatible]] { asm (""); }
inline void __attribute__((always_inline))
s_asm_callee () [[arm::streaming]] { asm (""); }
inline void __attribute__((always_inline))
n_asm_callee () { asm (""); } // { dg-error "inlining failed" }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); }
[[arm::locally_streaming]] inline void __attribute__((always_inline))
n_ls_asm_callee () { asm (""); }
[[arm::locally_streaming]] void
n_ls_caller ()
{
sc_callee ();
s_callee ();
n_callee ();
sc_ls_callee ();
n_ls_callee ();
sc_asm_callee ();
s_asm_callee ();
n_asm_callee ();
sc_ls_asm_callee ();
n_ls_asm_callee ();
}

View file

@ -0,0 +1,31 @@
/* { dg-options "" } */
inline void __attribute__((always_inline))
shared_callee () [[arm::inout("za")]] {}
[[arm::new("za")]] inline void __attribute__((always_inline))
new_callee () {} // { dg-error "inlining failed" }
inline void __attribute__((always_inline))
normal_callee () {}
inline void __attribute__((always_inline))
shared_asm_callee () [[arm::inout("za")]] { asm volatile ("" ::: "za"); }
[[arm::new("za")]] inline void __attribute__((always_inline))
new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" }
inline void __attribute__((always_inline))
normal_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" }
void
shared_caller () [[arm::inout("za")]]
{
shared_callee ();
new_callee ();
normal_callee ();
shared_asm_callee ();
new_asm_callee ();
normal_asm_callee ();
}

View file

@ -0,0 +1,31 @@
/* { dg-options "" } */
inline void __attribute__((always_inline))
shared_callee () [[arm::inout("za")]] {}
[[arm::new("za")]] inline void __attribute__((always_inline))
new_callee () {} // { dg-error "inlining failed" }
inline void __attribute__((always_inline))
normal_callee () {}
inline void __attribute__((always_inline))
shared_asm_callee () [[arm::inout("za")]] { asm volatile ("" ::: "za"); }
[[arm::new("za")]] inline void __attribute__((always_inline))
new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" }
inline void __attribute__((always_inline))
normal_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" }
[[arm::new("za")]] void
new_caller ()
{
shared_callee ();
new_callee ();
normal_callee ();
shared_asm_callee ();
new_asm_callee ();
normal_asm_callee ();
}

View file

@ -0,0 +1,31 @@
/* { dg-options "" } */
inline void __attribute__((always_inline))
shared_callee () [[arm::inout("za")]] {} // { dg-error "inlining failed" }
[[arm::new("za")]] inline void __attribute__((always_inline))
new_callee () {} // { dg-error "inlining failed" }
inline void __attribute__((always_inline))
normal_callee () {}
inline void __attribute__((always_inline))
shared_asm_callee () [[arm::inout("za")]] { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" }
[[arm::new("za")]] inline void __attribute__((always_inline))
new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" }
inline void __attribute__((always_inline))
normal_asm_callee () { asm volatile ("" ::: "za"); }
void
normal_caller ()
{
shared_callee ();
new_callee ();
normal_callee ();
shared_asm_callee ();
new_asm_callee ();
normal_asm_callee ();
}

View file

@ -0,0 +1,55 @@
/* { dg-options "" } */
#include <arm_neon.h>
#include <arm_sme.h>
uint8x16_t *neon;
svint64_t *sve;
int64_t *ptr;
inline void __attribute__((always_inline))
call_vadd ()
{
neon[4] = vaddq_u8 (neon[5], neon[6]);
}
inline void __attribute__((always_inline))
call_vbsl ()
{
neon[0] = vbslq_u8 (neon[1], neon[2], neon[3]);
}
inline void __attribute__((always_inline))
call_svadd ()
{
*sve = svadd_x (svptrue_b8 (), *sve, 1);
}
inline void __attribute__((always_inline))
call_svld1_gather ()
{
*sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve);
}
inline void __attribute__((always_inline))
call_svzero () [[arm::inout("za")]]
{
svzero_za ();
}
inline void __attribute__((always_inline))
call_svst1_za () [[arm::streaming, arm::inout("za")]] // { dg-error "inlining failed" }
{
svst1_ver_za64 (0, 0, svptrue_b8 (), ptr);
}
void
n_caller () [[arm::inout("za")]]
{
call_vadd ();
call_vbsl ();
call_svadd ();
call_svld1_gather ();
call_svzero ();
call_svst1_za ();
}