i386: Disable ix86_expand_vecop_qihi2 when !TARGET_AVX512BW
Since vpermq is really slow, we should avoid using it for permutation when vpmovwb is not available (needs AVX512BW) for ix86_expand_vecop_qihi2 and fall back to ix86_expand_vecop_qihi. gcc/ChangeLog: PR target/115069 * config/i386/i386-expand.cc (ix86_expand_vecop_qihi2): Do not enable the optimization when AVX512BW is not enabled. gcc/testsuite/ChangeLog: PR target/115069 * gcc.target/i386/pr115069.c: New.
This commit is contained in:
parent
d2f4279516
commit
1ad5c9d524
2 changed files with 16 additions and 0 deletions
|
@ -23963,6 +23963,13 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
|
|||
bool op2vec = GET_MODE_CLASS (GET_MODE (op2)) == MODE_VECTOR_INT;
|
||||
bool uns_p = code != ASHIFTRT;
|
||||
|
||||
/* Without VPMOVWB (provided by AVX512BW ISA), the expansion uses the
|
||||
generic permutation to merge the data back into the right place. This
|
||||
permutation results in VPERMQ, which is slow, so better fall back to
|
||||
ix86_expand_vecop_qihi. */
|
||||
if (!TARGET_AVX512BW)
|
||||
return false;
|
||||
|
||||
if ((qimode == V16QImode && !TARGET_AVX2)
|
||||
|| (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
|
||||
/* There are no V64HImode instructions. */
|
||||
|
|
9
gcc/testsuite/gcc.target/i386/pr115069.c
Normal file
9
gcc/testsuite/gcc.target/i386/pr115069.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx2" } */
|
||||
/* { dg-final { scan-assembler-not "vpermq" } } */
|
||||
|
||||
typedef char v16qi __attribute__((vector_size(16)));
|
||||
|
||||
v16qi foo (v16qi a, v16qi b) {
|
||||
return a * b;
|
||||
}
|
Loading…
Add table
Reference in a new issue