arm.c (neon_vdup_constant, [...]): New.
gcc/ * config/arm/arm.c (neon_vdup_constant, neon_make_constant): New. (neon_expand_vector_init): Use them. Also handle non-constant vectors with identical elements and vectors with only one non-constant element. (arm_print_operand): Handle 'y' modifier. * config/arm/arm-protos.h (neon_make_constant): Declare. * config/arm/neon.md (neon_vdup_n<mode>): Split into two patterns. Use VX instead of VDQW for the first one. Allow a VFP alternative and V32 modes for the second one. * config/arm/neon.ml (shape_elt): Add Alternatives. (ops): Use Alternatives for vdup lane instructions. * config/arm/neon-testgen.ml (analyze_shape): Handle Alternatives. * config/arm/vec-common.md (mov<mode>): Use neon_make_constant. gcc/testsuite/ * gcc.target/arm/neon: Regenerate generated tests. From-SVN: r154094
This commit is contained in:
parent
40f73786e9
commit
814a4c3b35
44 changed files with 276 additions and 53 deletions
|
@ -1,3 +1,19 @@
|
|||
2009-11-11 Daniel Jacobowitz <dan@codesourcery.com>
|
||||
|
||||
* config/arm/arm.c (neon_vdup_constant, neon_make_constant): New.
|
||||
(neon_expand_vector_init): Use them. Also handle non-constant
|
||||
vectors with identical elements and vectors with only one
|
||||
non-constant element.
|
||||
(arm_print_operand): Handle 'y' modifier.
|
||||
* config/arm/arm-protos.h (neon_make_constant): Declare.
|
||||
* config/arm/neon.md (neon_vdup_n<mode>): Split into two
|
||||
patterns. Use VX instead of VDQW for the first one. Allow
|
||||
a VFP alternative and V32 modes for the second one.
|
||||
* config/arm/neon.ml (shape_elt): Add Alternatives.
|
||||
(ops): Use Alternatives for vdup lane instructions.
|
||||
* config/arm/neon-testgen.ml (analyze_shape): Handle Alternatives.
|
||||
* config/arm/vec-common.md (mov<mode>): Use neon_make_constant.
|
||||
|
||||
2009-11-11 Daniel Jacobowitz <dan@codesourcery.com>
|
||||
|
||||
* config/arm/neon.md (*neon_mov<mode>): Reject two non-register
|
||||
|
|
|
@ -68,6 +68,7 @@ extern char *neon_output_logic_immediate (const char *, rtx *,
|
|||
enum machine_mode, int, int);
|
||||
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
|
||||
rtx (*) (rtx, rtx, rtx));
|
||||
extern rtx neon_make_constant (rtx);
|
||||
extern void neon_expand_vector_init (rtx, rtx);
|
||||
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
|
||||
extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
|
||||
|
|
|
@ -8085,25 +8085,171 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
|
|||
}
|
||||
}
|
||||
|
||||
/* Initialize a vector with non-constant elements. FIXME: We can do better
|
||||
than the current implementation (building a vector on the stack and then
|
||||
loading it) in many cases. See rs6000.c. */
|
||||
/* If VALS is a vector constant that can be loaded into a register
|
||||
using VDUP, generate instructions to do so and return an RTX to
|
||||
assign to the register. Otherwise return NULL_RTX. */
|
||||
|
||||
static rtx
|
||||
neon_vdup_constant (rtx vals)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (vals);
|
||||
enum machine_mode inner_mode = GET_MODE_INNER (mode);
|
||||
int n_elts = GET_MODE_NUNITS (mode);
|
||||
bool all_same = true;
|
||||
rtx x;
|
||||
int i;
|
||||
|
||||
if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
|
||||
return NULL_RTX;
|
||||
|
||||
for (i = 0; i < n_elts; ++i)
|
||||
{
|
||||
x = XVECEXP (vals, 0, i);
|
||||
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
|
||||
all_same = false;
|
||||
}
|
||||
|
||||
if (!all_same)
|
||||
/* The elements are not all the same. We could handle repeating
|
||||
patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
|
||||
{0, C, 0, C, 0, C, 0, C} which can be loaded using
|
||||
vdup.i16). */
|
||||
return NULL_RTX;
|
||||
|
||||
/* We can load this constant by using VDUP and a constant in a
|
||||
single ARM register. This will be cheaper than a vector
|
||||
load. */
|
||||
|
||||
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
|
||||
return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
|
||||
UNSPEC_VDUP_N);
|
||||
}
|
||||
|
||||
/* Generate code to load VALS, which is a PARALLEL containing only
|
||||
constants (for vec_init) or CONST_VECTOR, efficiently into a
|
||||
register. Returns an RTX to copy into the register, or NULL_RTX
|
||||
for a PARALLEL that can not be converted into a CONST_VECTOR. */
|
||||
|
||||
rtx
|
||||
neon_make_constant (rtx vals)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (vals);
|
||||
rtx target;
|
||||
rtx const_vec = NULL_RTX;
|
||||
int n_elts = GET_MODE_NUNITS (mode);
|
||||
int n_const = 0;
|
||||
int i;
|
||||
|
||||
if (GET_CODE (vals) == CONST_VECTOR)
|
||||
const_vec = vals;
|
||||
else if (GET_CODE (vals) == PARALLEL)
|
||||
{
|
||||
/* A CONST_VECTOR must contain only CONST_INTs and
|
||||
CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
|
||||
Only store valid constants in a CONST_VECTOR. */
|
||||
for (i = 0; i < n_elts; ++i)
|
||||
{
|
||||
rtx x = XVECEXP (vals, 0, i);
|
||||
if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
|
||||
n_const++;
|
||||
}
|
||||
if (n_const == n_elts)
|
||||
const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
if (const_vec != NULL
|
||||
&& neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
|
||||
/* Load using VMOV. On Cortex-A8 this takes one cycle. */
|
||||
return const_vec;
|
||||
else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
|
||||
/* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
|
||||
pipeline cycle; creating the constant takes one or two ARM
|
||||
pipeline cycles. */
|
||||
return target;
|
||||
else if (const_vec != NULL_RTX)
|
||||
/* Load from constant pool. On Cortex-A8 this takes two cycles
|
||||
(for either double or quad vectors). We can not take advantage
|
||||
of single-cycle VLD1 because we need a PC-relative addressing
|
||||
mode. */
|
||||
return const_vec;
|
||||
else
|
||||
/* A PARALLEL containing something not valid inside CONST_VECTOR.
|
||||
We can not construct an initializer. */
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Initialize vector TARGET to VALS. */
|
||||
|
||||
void
|
||||
neon_expand_vector_init (rtx target, rtx vals)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (target);
|
||||
enum machine_mode inner = GET_MODE_INNER (mode);
|
||||
unsigned int i, n_elts = GET_MODE_NUNITS (mode);
|
||||
rtx mem;
|
||||
enum machine_mode inner_mode = GET_MODE_INNER (mode);
|
||||
int n_elts = GET_MODE_NUNITS (mode);
|
||||
int n_var = 0, one_var = -1;
|
||||
bool all_same = true;
|
||||
rtx x, mem;
|
||||
int i;
|
||||
|
||||
gcc_assert (VECTOR_MODE_P (mode));
|
||||
for (i = 0; i < n_elts; ++i)
|
||||
{
|
||||
x = XVECEXP (vals, 0, i);
|
||||
if (!CONSTANT_P (x))
|
||||
++n_var, one_var = i;
|
||||
|
||||
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
|
||||
all_same = false;
|
||||
}
|
||||
|
||||
if (n_var == 0)
|
||||
{
|
||||
rtx constant = neon_make_constant (vals);
|
||||
if (constant != NULL_RTX)
|
||||
{
|
||||
emit_move_insn (target, constant);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Splat a single non-constant element if we can. */
|
||||
if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
|
||||
{
|
||||
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, target,
|
||||
gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
|
||||
UNSPEC_VDUP_N)));
|
||||
return;
|
||||
}
|
||||
|
||||
/* One field is non-constant. Load constant then overwrite varying
|
||||
field. This is more efficient than using the stack. */
|
||||
if (n_var == 1)
|
||||
{
|
||||
rtx copy = copy_rtx (vals);
|
||||
rtvec ops;
|
||||
|
||||
/* Load constant part of vector, substitute neighboring value for
|
||||
varying element. */
|
||||
XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
|
||||
neon_expand_vector_init (target, copy);
|
||||
|
||||
/* Insert variable. */
|
||||
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
|
||||
ops = gen_rtvec (3, x, target, GEN_INT (one_var));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, target,
|
||||
gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Construct the vector in memory one field at a time
|
||||
and load the whole vector. */
|
||||
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
|
||||
for (i = 0; i < n_elts; i++)
|
||||
emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
|
||||
XVECEXP (vals, 0, i));
|
||||
|
||||
emit_move_insn (adjust_address_nv (mem, inner_mode,
|
||||
i * GET_MODE_SIZE (inner_mode)),
|
||||
XVECEXP (vals, 0, i));
|
||||
emit_move_insn (target, mem);
|
||||
}
|
||||
|
||||
|
@ -15253,6 +15399,30 @@ arm_print_operand (FILE *stream, rtx x, int code)
|
|||
}
|
||||
return;
|
||||
|
||||
/* Translate an S register number into a D register number and element index. */
|
||||
case 'y':
|
||||
{
|
||||
int mode = GET_MODE (x);
|
||||
int regno;
|
||||
|
||||
if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
|
||||
{
|
||||
output_operand_lossage ("invalid operand for code '%c'", code);
|
||||
return;
|
||||
}
|
||||
|
||||
regno = REGNO (x);
|
||||
if (!VFP_REGNO_OK_FOR_SINGLE (regno))
|
||||
{
|
||||
output_operand_lossage ("invalid operand for code '%c'", code);
|
||||
return;
|
||||
}
|
||||
|
||||
regno = regno - FIRST_VFP_REGNUM;
|
||||
fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
|
||||
}
|
||||
return;
|
||||
|
||||
/* Register specifier for vld1.16/vst1.16. Translate the S register
|
||||
number into a D register number and element index. */
|
||||
case 'z':
|
||||
|
|
|
@ -175,6 +175,7 @@ let rec analyze_shape shape =
|
|||
| Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
|
||||
| Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
|
||||
| All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]"
|
||||
| Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")"
|
||||
in
|
||||
match shape with
|
||||
All (n, elt) -> commas analyze_shape_elt (n_things n elt) ""
|
||||
|
|
|
@ -2687,9 +2687,9 @@
|
|||
})
|
||||
|
||||
(define_insn "neon_vdup_n<mode>"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
|
||||
(unspec:VDQW [(match_operand:<V_elem> 1 "s_register_operand" "r")]
|
||||
UNSPEC_VDUP_N))]
|
||||
[(set (match_operand:VX 0 "s_register_operand" "=w")
|
||||
(unspec:VX [(match_operand:<V_elem> 1 "s_register_operand" "r")]
|
||||
UNSPEC_VDUP_N))]
|
||||
"TARGET_NEON"
|
||||
"vdup%?.<V_sz_elem>\t%<V_reg>0, %1"
|
||||
;; Assume this schedules like vmov.
|
||||
|
@ -2697,6 +2697,19 @@
|
|||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vdup_n<mode>"
|
||||
[(set (match_operand:V32 0 "s_register_operand" "=w,w")
|
||||
(unspec:V32 [(match_operand:<V_elem> 1 "s_register_operand" "r,t")]
|
||||
UNSPEC_VDUP_N))]
|
||||
"TARGET_NEON"
|
||||
"@
|
||||
vdup%?.<V_sz_elem>\t%<V_reg>0, %1
|
||||
vdup%?.<V_sz_elem>\t%<V_reg>0, %y1"
|
||||
;; Assume this schedules like vmov.
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vdup_ndi"
|
||||
[(set (match_operand:DI 0 "s_register_operand" "=w")
|
||||
(unspec:DI [(match_operand:DI 1 "s_register_operand" "r")]
|
||||
|
|
|
@ -68,6 +68,7 @@ type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
|
|||
| Element_of_dreg (* Used for "lane" variants. *)
|
||||
| Element_of_qreg (* Likewise. *)
|
||||
| All_elements_of_dreg (* Used for "dup" variants. *)
|
||||
| Alternatives of shape_elt list (* Used for multiple valid operands *)
|
||||
|
||||
type shape_form = All of int * shape_elt
|
||||
| Long
|
||||
|
@ -1008,7 +1009,10 @@ let ops =
|
|||
pf_su_8_64;
|
||||
|
||||
(* Set all lanes to the same value. *)
|
||||
Vdup_n, [],
|
||||
Vdup_n,
|
||||
[Disassembles_as [Use_operands [| Dreg;
|
||||
Alternatives [ Corereg;
|
||||
Element_of_dreg ] |]]],
|
||||
Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
|
||||
pf_su_8_32;
|
||||
Vdup_n,
|
||||
|
@ -1016,7 +1020,10 @@ let ops =
|
|||
Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
|
||||
Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
|
||||
[S64; U64];
|
||||
Vdup_n, [],
|
||||
Vdup_n,
|
||||
[Disassembles_as [Use_operands [| Qreg;
|
||||
Alternatives [ Corereg;
|
||||
Element_of_dreg ] |]]],
|
||||
Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
|
||||
pf_su_8_32;
|
||||
Vdup_n,
|
||||
|
@ -1028,7 +1035,10 @@ let ops =
|
|||
|
||||
(* These are just aliases for the above. *)
|
||||
Vmov_n,
|
||||
[Builtin_name "vdup_n"],
|
||||
[Builtin_name "vdup_n";
|
||||
Disassembles_as [Use_operands [| Dreg;
|
||||
Alternatives [ Corereg;
|
||||
Element_of_dreg ] |]]],
|
||||
Use_operands [| Dreg; Corereg |],
|
||||
"vmov_n", bits_1, pf_su_8_32;
|
||||
Vmov_n,
|
||||
|
@ -1038,7 +1048,10 @@ let ops =
|
|||
Use_operands [| Dreg; Corereg |],
|
||||
"vmov_n", notype_1, [S64; U64];
|
||||
Vmov_n,
|
||||
[Builtin_name "vdupQ_n"],
|
||||
[Builtin_name "vdupQ_n";
|
||||
Disassembles_as [Use_operands [| Qreg;
|
||||
Alternatives [ Corereg;
|
||||
Element_of_dreg ] |]]],
|
||||
Use_operands [| Qreg; Corereg |],
|
||||
"vmovQ_n", bits_1, pf_su_8_32;
|
||||
Vmov_n,
|
||||
|
|
|
@ -42,6 +42,11 @@
|
|||
{
|
||||
if (GET_CODE (operands[0]) != REG)
|
||||
operands[1] = force_reg (<MODE>mode, operands[1]);
|
||||
else if (TARGET_NEON && CONSTANT_P (operands[1]))
|
||||
{
|
||||
operands[1] = neon_make_constant (operands[1]);
|
||||
gcc_assert (operands[1] != NULL_RTX);
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2009-11-11 Daniel Jacobowitz <dan@codesourcery.com>
|
||||
|
||||
* gcc.target/arm/neon: Regenerate generated tests.
|
||||
|
||||
2009-11-10 Jason Merrill <jason@redhat.com>
|
||||
|
||||
* g++.dg/init/placement5.C: New.
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_nf32 (void)
|
|||
out_float32x4_t = vdupq_n_f32 (arg0_float32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_np16 (void)
|
|||
out_poly16x8_t = vdupq_n_p16 (arg0_poly16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_np8 (void)
|
|||
out_poly8x16_t = vdupq_n_p8 (arg0_poly8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_ns16 (void)
|
|||
out_int16x8_t = vdupq_n_s16 (arg0_int16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_ns32 (void)
|
|||
out_int32x4_t = vdupq_n_s32 (arg0_int32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_ns8 (void)
|
|||
out_int8x16_t = vdupq_n_s8 (arg0_int8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_nu16 (void)
|
|||
out_uint16x8_t = vdupq_n_u16 (arg0_uint16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_nu32 (void)
|
|||
out_uint32x4_t = vdupq_n_u32 (arg0_uint32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdupQ_nu8 (void)
|
|||
out_uint8x16_t = vdupq_n_u8 (arg0_uint8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_nf32 (void)
|
|||
out_float32x2_t = vdup_n_f32 (arg0_float32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_np16 (void)
|
|||
out_poly16x4_t = vdup_n_p16 (arg0_poly16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_np8 (void)
|
|||
out_poly8x8_t = vdup_n_p8 (arg0_poly8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_ns16 (void)
|
|||
out_int16x4_t = vdup_n_s16 (arg0_int16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_ns32 (void)
|
|||
out_int32x2_t = vdup_n_s32 (arg0_int32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_ns8 (void)
|
|||
out_int8x8_t = vdup_n_s8 (arg0_int8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_nu16 (void)
|
|||
out_uint16x4_t = vdup_n_u16 (arg0_uint16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_nu32 (void)
|
|||
out_uint32x2_t = vdup_n_u32 (arg0_uint32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vdup_nu8 (void)
|
|||
out_uint8x8_t = vdup_n_u8 (arg0_uint8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_nf32 (void)
|
|||
out_float32x4_t = vmovq_n_f32 (arg0_float32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_np16 (void)
|
|||
out_poly16x8_t = vmovq_n_p16 (arg0_poly16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_np8 (void)
|
|||
out_poly8x16_t = vmovq_n_p8 (arg0_poly8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_ns16 (void)
|
|||
out_int16x8_t = vmovq_n_s16 (arg0_int16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_ns32 (void)
|
|||
out_int32x4_t = vmovq_n_s32 (arg0_int32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_ns8 (void)
|
|||
out_int8x16_t = vmovq_n_s8 (arg0_int8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_nu16 (void)
|
|||
out_uint16x8_t = vmovq_n_u16 (arg0_uint16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_nu32 (void)
|
|||
out_uint32x4_t = vmovq_n_u32 (arg0_uint32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmovQ_nu8 (void)
|
|||
out_uint8x16_t = vmovq_n_u8 (arg0_uint8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_nf32 (void)
|
|||
out_float32x2_t = vmov_n_f32 (arg0_float32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_np16 (void)
|
|||
out_poly16x4_t = vmov_n_p16 (arg0_poly16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_np8 (void)
|
|||
out_poly8x8_t = vmov_n_p8 (arg0_poly8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_ns16 (void)
|
|||
out_int16x4_t = vmov_n_s16 (arg0_int16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_ns32 (void)
|
|||
out_int32x2_t = vmov_n_s32 (arg0_int32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_ns8 (void)
|
|||
out_int8x8_t = vmov_n_s8 (arg0_int8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_nu16 (void)
|
|||
out_uint16x4_t = vmov_n_u16 (arg0_uint16_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_nu32 (void)
|
|||
out_uint32x2_t = vmov_n_u32 (arg0_uint32_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vmov_nu8 (void)
|
|||
out_uint8x8_t = vmov_n_u8 (arg0_uint8_t);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
Loading…
Add table
Reference in a new issue