aarch64: Implement the ACLE instruction/data prefetch functions.
Implement the ACLE data and instruction prefetch functions[1] with the following signatures: 1. Data prefetch intrinsics: ---------------------------- void __pldx (/*constant*/ unsigned int /*access_kind*/, /*constant*/ unsigned int /*cache_level*/, /*constant*/ unsigned int /*retention_policy*/, void const volatile *addr); void __pld (void const volatile *addr); 2. Instruction prefetch intrinsics: ----------------------------------- void __plix (/*constant*/ unsigned int /*cache_level*/, /*constant*/ unsigned int /*retention_policy*/, void const volatile *addr); void __pli (void const volatile *addr); `__pldx' affords the programmer more fine-grained control over the data prefetch behaviour than the analogous GCC builtin `__builtin_prefetch', and allows access to the "SLC" cache level. While `__builtin_prefetch' chooses both cache-level and retention policy automatically via the optional `locality' parameter, `__pldx' expects 2 (mandatory) arguments to explicitly define the desired cache-level and retention policies. `__plix' on the other hand, generates a code prefetch instruction and so extends functionality on aarch64 targets beyond that which is exposed by `builtin_prefetch'. `__pld' and `__pli' do prefetch of data and instructions, respectively, using default values for both cache-level and retention policies. Bootstrapped and tested on aarch64-none-linux-gnu. [1] https://arm-software.github.io/acle/main/acle.html#memory-prefetch-intrinsics gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc: (AARCH64_PLD): New enum aarch64_builtins entry. (AARCH64_PLDX): Likewise. (AARCH64_PLI): Likewise. (AARCH64_PLIX): Likewise. (aarch64_init_prefetch_builtin): New. (aarch64_general_init_builtins): Call prefetch init function. (aarch64_expand_prefetch_builtin): New. (aarch64_general_expand_builtin): Add prefetch expansion. (require_const_argument): New. * config/aarch64/aarch64.md (UNSPEC_PLDX): New. (aarch64_pldx): Likewise. * config/aarch64/arm_acle.h (__pld): Likewise. (__pli): Likewise. (__plix): Likewise. (__pldx): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/builtin_pld_pli.c: New. * gcc.target/aarch64/builtin_pld_pli_illegal.c: New.
This commit is contained in:
parent
fda8e2f829
commit
9fba663768
5 changed files with 290 additions and 0 deletions
|
@ -822,6 +822,10 @@ enum aarch64_builtins
|
|||
AARCH64_WSRF,
|
||||
AARCH64_WSRF64,
|
||||
AARCH64_WSR128,
|
||||
AARCH64_PLD,
|
||||
AARCH64_PLDX,
|
||||
AARCH64_PLI,
|
||||
AARCH64_PLIX,
|
||||
AARCH64_BUILTIN_MAX
|
||||
};
|
||||
|
||||
|
@ -1878,7 +1882,34 @@ aarch64_init_rwsr_builtins (void)
|
|||
= build_function_type_list (void_type_node, const_char_ptr_type,
|
||||
uint128_type_node, NULL);
|
||||
AARCH64_INIT_RWSR_BUILTINS_DECL (WSR128, wsr128, fntype);
|
||||
}
|
||||
|
||||
/* Add builtins for data and instrution prefetch. */
|
||||
static void
|
||||
aarch64_init_prefetch_builtin (void)
|
||||
{
|
||||
#define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N) \
|
||||
aarch64_builtin_decls[INDEX] = \
|
||||
aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX)
|
||||
|
||||
tree ftype;
|
||||
tree cv_argtype;
|
||||
cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST
|
||||
| TYPE_QUAL_VOLATILE);
|
||||
cv_argtype = build_pointer_type (cv_argtype);
|
||||
|
||||
ftype = build_function_type_list (void_type_node, cv_argtype, NULL);
|
||||
AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD, "pld");
|
||||
AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI, "pli");
|
||||
|
||||
ftype = build_function_type_list (void_type_node, unsigned_type_node,
|
||||
unsigned_type_node, unsigned_type_node,
|
||||
cv_argtype, NULL);
|
||||
AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX, "pldx");
|
||||
|
||||
ftype = build_function_type_list (void_type_node, unsigned_type_node,
|
||||
unsigned_type_node, cv_argtype, NULL);
|
||||
AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX, "plix");
|
||||
}
|
||||
|
||||
/* Initialize the memory tagging extension (MTE) builtins. */
|
||||
|
@ -2103,6 +2134,7 @@ aarch64_general_init_builtins (void)
|
|||
aarch64_init_data_intrinsics ();
|
||||
|
||||
aarch64_init_rwsr_builtins ();
|
||||
aarch64_init_prefetch_builtin ();
|
||||
|
||||
tree ftype_jcvt
|
||||
= build_function_type_list (intSI_type_node, double_type_node, NULL);
|
||||
|
@ -2839,6 +2871,93 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
|
|||
}
|
||||
}
|
||||
|
||||
/* Ensure argument ARGNO in EXP represents a const-type argument in the range
|
||||
[MINVAL, MAXVAL). */
|
||||
static HOST_WIDE_INT
|
||||
require_const_argument (tree exp, unsigned int argno, HOST_WIDE_INT minval,
|
||||
HOST_WIDE_INT maxval)
|
||||
{
|
||||
maxval--;
|
||||
tree arg = CALL_EXPR_ARG (exp, argno);
|
||||
if (TREE_CODE (arg) != INTEGER_CST)
|
||||
error_at (EXPR_LOCATION (exp), "Constant-type argument expected");
|
||||
|
||||
auto argval = wi::to_widest (arg);
|
||||
|
||||
if (argval < minval || argval > maxval)
|
||||
error_at (EXPR_LOCATION (exp),
|
||||
"argument %d must be a constant immediate "
|
||||
"in range [%wd,%wd]", argno + 1, minval, maxval);
|
||||
|
||||
HOST_WIDE_INT retval = argval.to_shwi ();
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/* Expand a prefetch builtin EXP. */
|
||||
void
|
||||
aarch64_expand_prefetch_builtin (tree exp, int fcode)
|
||||
{
|
||||
int kind_id = -1;
|
||||
int level_id = -1;
|
||||
int rettn_id = -1;
|
||||
char prfop[11];
|
||||
class expand_operand ops[2];
|
||||
|
||||
static const char *kind_s[] = {"PLD", "PST", "PLI"};
|
||||
static const char *level_s[] = {"L1", "L2", "L3", "SLC"};
|
||||
static const char *rettn_s[] = {"KEEP", "STRM"};
|
||||
|
||||
/* Each of the four prefetch builtins takes a different number of arguments,
|
||||
but proceeds to call the PRFM insn which requires 4 pieces of information
|
||||
to be fully defined. Where one of these takes less than 4 arguments, set
|
||||
sensible defaults. */
|
||||
switch (fcode)
|
||||
{
|
||||
case AARCH64_PLDX:
|
||||
break;
|
||||
case AARCH64_PLIX:
|
||||
kind_id = 2;
|
||||
break;
|
||||
case AARCH64_PLI:
|
||||
case AARCH64_PLD:
|
||||
kind_id = (fcode == AARCH64_PLD) ? 0 : 2;
|
||||
level_id = 0;
|
||||
rettn_id = 0;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Any -1 id variable is to be user-supplied. Here we fill these in and run
|
||||
bounds checks on them. "PLI" is used only implicitly by AARCH64_PLI &
|
||||
AARCH64_PLIX, never explicitly. */
|
||||
int argno = 0;
|
||||
if (kind_id < 0)
|
||||
kind_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (kind_s) - 1);
|
||||
if (level_id < 0)
|
||||
level_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (level_s));
|
||||
if (rettn_id < 0)
|
||||
rettn_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (rettn_s));
|
||||
rtx address = expand_expr (CALL_EXPR_ARG (exp, argno), NULL_RTX, Pmode,
|
||||
EXPAND_NORMAL);
|
||||
|
||||
if (seen_error ())
|
||||
return;
|
||||
|
||||
sprintf (prfop, "%s%s%s", kind_s[kind_id],
|
||||
level_s[level_id],
|
||||
rettn_s[rettn_id]);
|
||||
|
||||
rtx const_str = rtx_alloc (CONST_STRING);
|
||||
PUT_CODE (const_str, CONST_STRING);
|
||||
XSTR (const_str, 0) = ggc_strdup (prfop);
|
||||
|
||||
create_fixed_operand (&ops[0], const_str);
|
||||
create_address_operand (&ops[1], address);
|
||||
maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, ops);
|
||||
}
|
||||
|
||||
/* Expand an expression EXP that calls a MEMTAG built-in FCODE
|
||||
with result going to TARGET. */
|
||||
static rtx
|
||||
|
@ -3085,6 +3204,12 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
|
|||
case AARCH64_WSRF64:
|
||||
case AARCH64_WSR128:
|
||||
return aarch64_expand_rwsr_builtin (exp, target, fcode);
|
||||
case AARCH64_PLD:
|
||||
case AARCH64_PLDX:
|
||||
case AARCH64_PLI:
|
||||
case AARCH64_PLIX:
|
||||
aarch64_expand_prefetch_builtin (exp, fcode);
|
||||
return target;
|
||||
}
|
||||
|
||||
if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
|
||||
|
|
|
@ -338,6 +338,7 @@
|
|||
UNSPEC_SYSREG_RTI
|
||||
UNSPEC_SYSREG_WDI
|
||||
UNSPEC_SYSREG_WTI
|
||||
UNSPEC_PLDX
|
||||
;; Represents an SVE-style lane index, in which the indexing applies
|
||||
;; within the containing 128-bit block.
|
||||
UNSPEC_SVE_LANE_SELECT
|
||||
|
@ -970,6 +971,17 @@
|
|||
[(set_attr "type" "load_4")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_pldx"
|
||||
[(unspec [(match_operand 0 "" "")
|
||||
(match_operand:DI 1 "aarch64_prefetch_operand" "Dp")] UNSPEC_PLDX)]
|
||||
""
|
||||
{
|
||||
operands[1] = gen_rtx_MEM (DImode, operands[1]);
|
||||
return "prfm\\t%0, %1";
|
||||
}
|
||||
[(set_attr "type" "load_4")]
|
||||
)
|
||||
|
||||
(define_insn "trap"
|
||||
[(trap_if (const_int 1) (const_int 8))]
|
||||
""
|
||||
|
|
|
@ -78,6 +78,36 @@ _GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t)
|
|||
|
||||
#undef _GCC_ARM_ACLE_DATA_FN
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
__pld (void const volatile *__addr)
|
||||
{
|
||||
return __builtin_aarch64_pld (__addr);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
__pli (void const volatile *__addr)
|
||||
{
|
||||
return __builtin_aarch64_pli (__addr);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
__plix (unsigned int __cache, unsigned int __rettn,
|
||||
void const volatile *__addr)
|
||||
{
|
||||
return __builtin_aarch64_plix (__cache, __rettn, __addr);
|
||||
}
|
||||
|
||||
__extension__ extern __inline void
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
__pldx (unsigned int __access, unsigned int __cache, unsigned int __rettn,
|
||||
void const volatile *__addr)
|
||||
{
|
||||
return __builtin_aarch64_pldx (__access, __cache, __rettn, __addr);
|
||||
}
|
||||
|
||||
__extension__ extern __inline unsigned long
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
__revl (unsigned long __value)
|
||||
|
|
90
gcc/testsuite/gcc.target/aarch64/builtin_pld_pli.c
Normal file
90
gcc/testsuite/gcc.target/aarch64/builtin_pld_pli.c
Normal file
|
@ -0,0 +1,90 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=armv8-a -O2" } */
|
||||
|
||||
#include <arm_acle.h>
|
||||
|
||||
/* Check that we can generate the immediate-offset addressing
|
||||
mode for PRFM. */
|
||||
|
||||
/* Access kind specifiers. */
|
||||
#define PLD 0
|
||||
#define PST 1
|
||||
/* Cache levels. */
|
||||
#define L1 0
|
||||
#define L2 1
|
||||
#define L3 2
|
||||
#define SLC 3
|
||||
/* Retention policies. */
|
||||
#define KEEP 0
|
||||
#define STRM 1
|
||||
|
||||
void
|
||||
prefetch_for_read_write (void *a)
|
||||
{
|
||||
__pldx (PLD, L1, KEEP, a);
|
||||
__pldx (PLD, L1, STRM, a);
|
||||
__pldx (PLD, L2, KEEP, a);
|
||||
__pldx (PLD, L2, STRM, a);
|
||||
__pldx (PLD, L3, KEEP, a);
|
||||
__pldx (PLD, L3, STRM, a);
|
||||
__pldx (PLD, SLC, KEEP, a);
|
||||
__pldx (PLD, SLC, STRM, a);
|
||||
__pldx (PST, L1, KEEP, a);
|
||||
__pldx (PST, L1, STRM, a);
|
||||
__pldx (PST, L2, KEEP, a);
|
||||
__pldx (PST, L2, STRM, a);
|
||||
__pldx (PST, L3, KEEP, a);
|
||||
__pldx (PST, L3, STRM, a);
|
||||
__pldx (PST, SLC, KEEP, a);
|
||||
__pldx (PST, SLC, STRM, a);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "prfm\tPLDL1KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLDL1STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLDL2KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLDL2STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLDL3KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLDL3STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLDSLCKEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLDSLCSTRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPSTL1KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPSTL1STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPSTL2KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPSTL2STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPSTL3KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPSTL3STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPSTSLCKEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPSTSLCSTRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
|
||||
void
|
||||
prefetch_simple (void *a)
|
||||
{
|
||||
__pld (a);
|
||||
__pli (a);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "prfm\tPLDL1KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLIL1KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
|
||||
void
|
||||
prefetch_instructions (void *a)
|
||||
{
|
||||
__plix (L1, KEEP, a);
|
||||
__plix (L1, STRM, a);
|
||||
__plix (L2, KEEP, a);
|
||||
__plix (L2, STRM, a);
|
||||
__plix (L3, KEEP, a);
|
||||
__plix (L3, STRM, a);
|
||||
__plix (SLC, KEEP, a);
|
||||
__plix (SLC, STRM, a);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "prfm\tPLIL1KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLIL1STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLIL2KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLIL2STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLIL3KEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLIL3STRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLISLCKEEP, \\\[x\[0-9\]+\\\]" } } */
|
||||
/* { dg-final { scan-assembler "prfm\tPLISLCSTRM, \\\[x\[0-9\]+\\\]" } } */
|
||||
|
33
gcc/testsuite/gcc.target/aarch64/builtin_pld_pli_illegal.c
Normal file
33
gcc/testsuite/gcc.target/aarch64/builtin_pld_pli_illegal.c
Normal file
|
@ -0,0 +1,33 @@
|
|||
/* Check that PRFM-related bounds checks are applied correctly. */
|
||||
/* { dg-do compile } */
|
||||
#include <arm_acle.h>
|
||||
|
||||
/* Access kind specifiers. */
|
||||
#define KIND_LOW -1
|
||||
#define KIND_HIGH 2
|
||||
/* Cache levels. */
|
||||
#define LEVEL_LOW -1
|
||||
#define LEVEL_HIGH 4
|
||||
/* Retention policies. */
|
||||
#define POLICY_LOW -1
|
||||
#define POLICY_HIGH 2
|
||||
|
||||
void
|
||||
data_rw_prefetch_bad_bounds (void *a)
|
||||
{
|
||||
__builtin_aarch64_pldx (KIND_LOW, 0, 0, a); /* { dg-error {argument 1 must be a constant immediate in range \[0,1\]} } */
|
||||
__builtin_aarch64_pldx (KIND_HIGH, 0, 0, a); /* { dg-error {argument 1 must be a constant immediate in range \[0,1\]} } */
|
||||
__builtin_aarch64_pldx (0, LEVEL_LOW, 0, a); /* { dg-error {argument 2 must be a constant immediate in range \[0,3\]} } */
|
||||
__builtin_aarch64_pldx (0, LEVEL_HIGH, 0, a); /* { dg-error {argument 2 must be a constant immediate in range \[0,3\]} } */
|
||||
__builtin_aarch64_pldx (0, 0, POLICY_LOW, a); /* { dg-error {argument 3 must be a constant immediate in range \[0,1\]} } */
|
||||
__builtin_aarch64_pldx (0, 0, POLICY_HIGH, a); /* { dg-error {argument 3 must be a constant immediate in range \[0,1\]} } */
|
||||
}
|
||||
|
||||
void
|
||||
insn_prefetch_bad_bounds (void *a)
|
||||
{
|
||||
__builtin_aarch64_plix (LEVEL_LOW, 0, a); /* { dg-error {argument 1 must be a constant immediate in range \[0,3\]} } */
|
||||
__builtin_aarch64_plix (LEVEL_HIGH, 0, a); /* { dg-error {argument 1 must be a constant immediate in range \[0,3\]} } */
|
||||
__builtin_aarch64_plix (0, POLICY_LOW, a); /* { dg-error {argument 2 must be a constant immediate in range \[0,1\]} } */
|
||||
__builtin_aarch64_plix (0, POLICY_HIGH, a); /* { dg-error {argument 2 must be a constant immediate in range \[0,1\]} } */
|
||||
}
|
Loading…
Add table
Reference in a new issue