AVX512FP16: Enhance vector shuffle builtins
Support HFmode vector shuffle by creating HImode subreg when expanding permutation expr. gcc/ChangeLog: * config/i386/i386-expand.c (ix86_expand_vec_perm): Convert HFmode input operand to HImode. (ix86_vectorize_vec_perm_const): Likewise. * config/i386/sse.md (*avx512bw_permvar_truncv16siv16hi_1_hf): New define_insn. (*avx512f_permvar_truncv8siv8hi_1_hf): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-builtin_shuffle-1.c: New test. * gcc.target/i386/avx512fp16-pr101846.c: Ditto. * gcc.target/i386/avx512fp16-pr94680.c: Ditto.
This commit is contained in:
parent
147ed0184f
commit
be072bfa5b
5 changed files with 280 additions and 1 deletions
|
@ -4846,6 +4846,16 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
e = GET_MODE_UNIT_SIZE (mode);
|
||||
gcc_assert (w <= 64);
|
||||
|
||||
/* For HF mode vector, convert it to HI using subreg. */
|
||||
if (GET_MODE_INNER (mode) == HFmode)
|
||||
{
|
||||
machine_mode orig_mode = mode;
|
||||
mode = mode_for_vector (HImode, w).require ();
|
||||
target = lowpart_subreg (mode, target, orig_mode);
|
||||
op0 = lowpart_subreg (mode, op0, orig_mode);
|
||||
op1 = lowpart_subreg (mode, op1, orig_mode);
|
||||
}
|
||||
|
||||
if (TARGET_AVX512F && one_operand_shuffle)
|
||||
{
|
||||
rtx (*gen) (rtx, rtx, rtx) = NULL;
|
||||
|
@ -21139,6 +21149,20 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
|
|||
unsigned int i, nelt, which;
|
||||
bool two_args;
|
||||
|
||||
/* For HF mode vector, convert it to HI using subreg. */
|
||||
if (GET_MODE_INNER (vmode) == HFmode)
|
||||
{
|
||||
machine_mode orig_mode = vmode;
|
||||
vmode = mode_for_vector (HImode,
|
||||
GET_MODE_NUNITS (vmode)).require ();
|
||||
if (target)
|
||||
target = lowpart_subreg (vmode, target, orig_mode);
|
||||
if (op0)
|
||||
op0 = lowpart_subreg (vmode, op0, orig_mode);
|
||||
if (op1)
|
||||
op1 = lowpart_subreg (vmode, op1, orig_mode);
|
||||
}
|
||||
|
||||
d.target = target;
|
||||
d.op0 = op0;
|
||||
d.op1 = op1;
|
||||
|
|
|
@ -12757,6 +12757,33 @@
|
|||
(truncate:V16HI (match_dup 1)))]
|
||||
"operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);")
|
||||
|
||||
(define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1_hf"
|
||||
[(set (match_operand:V16HF 0 "nonimmediate_operand")
|
||||
(vec_select:V16HF
|
||||
(subreg:V32HF
|
||||
(unspec:V32HI
|
||||
[(match_operand:V32HI 1 "register_operand")
|
||||
(match_operand:V32HI 2 "permvar_truncate_operand")]
|
||||
UNSPEC_VPERMVAR) 0)
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 2) (const_int 3)
|
||||
(const_int 4) (const_int 5)
|
||||
(const_int 6) (const_int 7)
|
||||
(const_int 8) (const_int 9)
|
||||
(const_int 10) (const_int 11)
|
||||
(const_int 12) (const_int 13)
|
||||
(const_int 14) (const_int 15)])))]
|
||||
"TARGET_AVX512BW && ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(truncate:V16HI (match_dup 1)))]
|
||||
{
|
||||
operands[0] = lowpart_subreg (V16HImode, operands[0], V16HFmode);
|
||||
operands[1] = lowpart_subreg (V16SImode, operands[1], V32HImode);
|
||||
})
|
||||
|
||||
|
||||
(define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1"
|
||||
[(set (match_operand:V8HI 0 "nonimmediate_operand")
|
||||
(vec_select:V8HI
|
||||
|
@ -12775,6 +12802,28 @@
|
|||
(truncate:V8HI (match_dup 1)))]
|
||||
"operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);")
|
||||
|
||||
(define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1_hf"
|
||||
[(set (match_operand:V8HF 0 "nonimmediate_operand")
|
||||
(vec_select:V8HF
|
||||
(subreg:V16HF
|
||||
(unspec:V16HI
|
||||
[(match_operand:V16HI 1 "register_operand")
|
||||
(match_operand:V16HI 2 "permvar_truncate_operand")]
|
||||
UNSPEC_VPERMVAR) 0)
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 2) (const_int 3)
|
||||
(const_int 4) (const_int 5)
|
||||
(const_int 6) (const_int 7)])))]
|
||||
"TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(truncate:V8HI (match_dup 1)))]
|
||||
{
|
||||
operands[0] = lowpart_subreg (V8HImode, operands[0], V8HFmode);
|
||||
operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);
|
||||
})
|
||||
|
||||
(define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1"
|
||||
[(set (match_operand:V8SI 0 "nonimmediate_operand")
|
||||
(vec_select:V8SI
|
||||
|
@ -15787,12 +15836,15 @@
|
|||
|
||||
(define_mode_iterator VEC_PERM_AVX2
|
||||
[V16QI V8HI V4SI V2DI V4SF V2DF
|
||||
(V8HF "TARGET_AVX512FP16")
|
||||
(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
|
||||
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
|
||||
(V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
|
||||
(V16HF "TARGET_AVX512FP16")
|
||||
(V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
|
||||
(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
|
||||
(V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
|
||||
(V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")
|
||||
(V32HF "TARGET_AVX512FP16")])
|
||||
|
||||
(define_expand "vec_perm<mode>"
|
||||
[(match_operand:VEC_PERM_AVX2 0 "register_operand")
|
||||
|
|
86
gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c
Normal file
86
gcc/testsuite/gcc.target/i386/avx512fp16-builtin_shuffle-1.c
Normal file
|
@ -0,0 +1,86 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */
|
||||
/* { dg-final { scan-assembler-not "movw" } } */
|
||||
/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "vpermw" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "vpshufb" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "vpermt2w" 6 } } */
|
||||
|
||||
typedef _Float16 v32hf __attribute__((vector_size (64)));
|
||||
typedef _Float16 v16hf __attribute__((vector_size (32)));
|
||||
typedef _Float16 v8hf __attribute__((vector_size (16)));
|
||||
typedef short v32hi __attribute__((vector_size (64)));
|
||||
typedef short v16hi __attribute__((vector_size (32)));
|
||||
typedef short v8hi __attribute__((vector_size (16)));
|
||||
|
||||
#define PERM_CONST_RANDOM_v32hi \
|
||||
{ 0, 21, 15, 9, 43, 25, 37, 48, \
|
||||
8, 16, 27, 51, 30, 12, 6, 46, \
|
||||
34, 3, 11, 5, 17, 53, 26, 39, \
|
||||
2, 18, 40, 61, 19, 4, 50, 29 }
|
||||
|
||||
#define PERM_CONST_RANDOM_RANGE32_v32hi \
|
||||
{ 0, 21, 10, 23, 8, 18, 7, 19, \
|
||||
4, 25, 3, 31, 5, 22, 11, 17, \
|
||||
9, 20, 2, 24, 1, 30, 12, 27, \
|
||||
13, 28, 6, 29, 14, 16, 15, 23 }
|
||||
|
||||
#define PERM_CONST_RANDOM_v16hi \
|
||||
{ 0, 21, 15, 9, 13, 25, 30, 18, \
|
||||
8, 16, 17, 11, 4, 22, 6, 7 }
|
||||
|
||||
#define PERM_CONST_RANDOM_RANGE16_v16hi \
|
||||
{ 0, 9, 1, 12, 4, 15, 7, 13, \
|
||||
3, 10, 6, 14, 5, 8, 2, 11 }
|
||||
|
||||
#define PERM_CONST_RANDOM_v8hi \
|
||||
{ 0, 14, 15, 9, 13, 2, 3, 5 }
|
||||
|
||||
#define PERM_CONST_RANDOM_RANGE8_v8hi \
|
||||
{ 0, 7, 2, 5, 3, 4, 1, 6 }
|
||||
|
||||
#define PERM_CONST_RANDOM(size) \
|
||||
PERM_CONST_RANDOM_v##size##hi
|
||||
|
||||
#define PERM_CONST_RANDOM_RANGE(size) \
|
||||
PERM_CONST_RANDOM_RANGE##size##_v##size##hi
|
||||
|
||||
#define SHUFFLE_CONST_RANDOM(type, itype, size) \
|
||||
type foo_##type##shuffle_2param_const_random (type a, type b) \
|
||||
{ \
|
||||
return __builtin_shuffle (a, b, \
|
||||
(itype) PERM_CONST_RANDOM (size)); \
|
||||
} \
|
||||
type foo_##type##shuffle_2param_const_random_range (type a, type b) \
|
||||
{ \
|
||||
return __builtin_shuffle (a, b, \
|
||||
(itype) PERM_CONST_RANDOM_RANGE (size)); \
|
||||
} \
|
||||
type foo_##type##shuffle_1param_const_random (type a) \
|
||||
{ \
|
||||
return __builtin_shuffle (a, \
|
||||
(itype) PERM_CONST_RANDOM (size)); \
|
||||
} \
|
||||
type foo_##type##shuffle_1param_const_random_range (type a) \
|
||||
{ \
|
||||
return __builtin_shuffle (a, \
|
||||
(itype) PERM_CONST_RANDOM_RANGE (size)); \
|
||||
}
|
||||
|
||||
#define SHUFFLE_VEC_INDEX(type, itype) \
|
||||
type foo##type##itype##shuffle_2param_vec (type a, type b, itype c) \
|
||||
{ \
|
||||
return __builtin_shuffle (a, b, c); \
|
||||
} \
|
||||
type foo##type##itype##shuffle_1param_vec (type a, itype c) \
|
||||
{ \
|
||||
return __builtin_shuffle (a, c); \
|
||||
}
|
||||
|
||||
SHUFFLE_CONST_RANDOM (v32hf, v32hi, 32)
|
||||
SHUFFLE_CONST_RANDOM (v16hf, v16hi, 16)
|
||||
SHUFFLE_CONST_RANDOM (v8hf, v8hi, 8)
|
||||
|
||||
SHUFFLE_VEC_INDEX (v32hf, v32hi)
|
||||
SHUFFLE_VEC_INDEX (v16hf, v16hi)
|
||||
SHUFFLE_VEC_INDEX (v8hf, v8hi)
|
56
gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c
Normal file
56
gcc/testsuite/gcc.target/i386/avx512fp16-pr101846.c
Normal file
|
@ -0,0 +1,56 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vpmovzxwd" "3" } } */
|
||||
/* { dg-final { scan-assembler-times "vpmovdw" "3" } } */
|
||||
|
||||
typedef _Float16 v32hf __attribute__((vector_size (64)));
|
||||
typedef _Float16 v16hf __attribute__((vector_size (32)));
|
||||
typedef _Float16 v8hf __attribute__((vector_size (16)));
|
||||
typedef _Float16 v4hf __attribute__((vector_size (8)));
|
||||
typedef short v4hi __attribute__((vector_size (8)));
|
||||
typedef short v8hi __attribute__((vector_size (16)));
|
||||
|
||||
#define PERM_CONST_INTERLEAVE_v32hi \
|
||||
0, 16, 1, 17, 2, 18, 3, 19, \
|
||||
4, 20, 5, 21, 6, 22, 7, 23, \
|
||||
8, 24, 9, 25, 10, 26, 11, 27, \
|
||||
12, 28, 13, 29, 14, 30, 15, 31
|
||||
|
||||
#define PERM_CONST_INTERLEAVE_v16hi \
|
||||
0, 8, 1, 9, 2, 10, 3, 11, \
|
||||
4, 12, 5, 13, 6, 14, 7, 15
|
||||
|
||||
#define PERM_CONST_INTERLEAVE_v8hi \
|
||||
0, 4, 1, 5, 2, 6, 3, 7
|
||||
|
||||
#define PERM_CONST_TRUNCATE_v32hi \
|
||||
0, 2, 4, 6, 8, 10, 12, 14, \
|
||||
16, 18, 20, 22, 24, 26, 28, 30
|
||||
|
||||
#define PERM_CONST_TRUNCATE_v16hi \
|
||||
0, 2, 4, 6, 8, 10, 12, 14
|
||||
|
||||
#define PERM_CONST_TRUNCATE_v8hi \
|
||||
0, 2, 4, 6
|
||||
|
||||
#define PERM_CONST_INTERLEAVE(size) \
|
||||
PERM_CONST_INTERLEAVE_v##size##hi
|
||||
|
||||
#define PERM_CONST_TRUNCATE(size) \
|
||||
PERM_CONST_TRUNCATE_v##size##hi
|
||||
|
||||
#define SHUFFLE_CONST_INTERLEAVE(type, rtype, size) \
|
||||
rtype foo_##type##shufflevector_const_interleave (type a) \
|
||||
{ \
|
||||
return __builtin_shufflevector (a, (type) {}, \
|
||||
PERM_CONST_INTERLEAVE (size)); \
|
||||
} \
|
||||
type foo_##type##shufflevector_const_trunc (rtype a) \
|
||||
{ \
|
||||
return __builtin_shufflevector (a, a, \
|
||||
PERM_CONST_TRUNCATE (size)); \
|
||||
}
|
||||
|
||||
SHUFFLE_CONST_INTERLEAVE (v16hf, v32hf, 32)
|
||||
SHUFFLE_CONST_INTERLEAVE (v8hf, v16hf, 16)
|
||||
SHUFFLE_CONST_INTERLEAVE (v4hf, v8hf, 8)
|
61
gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c
Normal file
61
gcc/testsuite/gcc.target/i386/avx512fp16-pr94680.c
Normal file
|
@ -0,0 +1,61 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovq" 2 } } */
|
||||
|
||||
typedef _Float16 v32hf __attribute__((vector_size (64)));
|
||||
typedef _Float16 v16hf __attribute__((vector_size (32)));
|
||||
typedef _Float16 v8hf __attribute__((vector_size (16)));
|
||||
typedef short v32hi __attribute__((vector_size (64)));
|
||||
typedef short v16hi __attribute__((vector_size (32)));
|
||||
typedef short v8hi __attribute__((vector_size (16)));
|
||||
|
||||
|
||||
#define PERM_CONST_CONCAT0_v32hi \
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, \
|
||||
8, 9, 10, 11, 12, 13, 14, 15, \
|
||||
34, 53, 41, 55, 57, 43, 36, 39, \
|
||||
62, 48, 50, 51, 49, 44, 60, 37 }
|
||||
|
||||
#define PERM_CONST_CONCAT0_v32hi_l \
|
||||
{ 32, 33, 34, 35, 36, 37, 38, 39, \
|
||||
40, 41, 42, 43, 44, 45, 46, 47, \
|
||||
31, 0, 29, 2, 27, 4, 25, 6, 23, \
|
||||
8, 21, 10, 19, 12, 17, 14 }
|
||||
|
||||
#define PERM_CONST_CONCAT0_v16hi \
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, \
|
||||
21, 26, 17, 31, 24, 22, 30, 19 }
|
||||
|
||||
#define PERM_CONST_CONCAT0_v16hi_l \
|
||||
{ 16, 17, 18, 19, 20, 21, 22, 23, \
|
||||
15, 0, 13, 2, 11, 4, 9, 6 }
|
||||
|
||||
#define PERM_CONST_CONCAT0_v8hi \
|
||||
{ 0, 1, 2, 3, 9, 11, 14, 12 }
|
||||
|
||||
#define PERM_CONST_CONCAT0_v8hi_l \
|
||||
{ 8, 9, 10, 11, 3, 5, 1, 7 }
|
||||
|
||||
#define PERM_CONST_CONCAT0(type) \
|
||||
PERM_CONST_CONCAT0_##type
|
||||
|
||||
#define PERM_CONST_CONCAT0_L(type) \
|
||||
PERM_CONST_CONCAT0_##type##_l
|
||||
|
||||
#define SHUFFLE_CONST_CONCAT0(type, itype) \
|
||||
type foo_##type##shuffle_const_concat0 (type a) \
|
||||
{ \
|
||||
return __builtin_shuffle (a, (type) {0}, \
|
||||
(itype) PERM_CONST_CONCAT0 (itype)); \
|
||||
} \
|
||||
type foo_##type##shuffle_const_concat0_l (type a) \
|
||||
{ \
|
||||
return __builtin_shuffle ((type) {0}, a, \
|
||||
(itype) PERM_CONST_CONCAT0_L (itype)); \
|
||||
}
|
||||
|
||||
SHUFFLE_CONST_CONCAT0 (v32hf, v32hi)
|
||||
SHUFFLE_CONST_CONCAT0 (v16hf, v16hi)
|
||||
SHUFFLE_CONST_CONCAT0 (v8hf, v8hi)
|
||||
|
Loading…
Add table
Reference in a new issue