i386: Fix emit_reduc_half on V{64Q,32H}Imode [PR94500]
The following testcase is miscompiled in 8.x, because emit_reduc_half is prepared to handle for 512-bit modes only i equal to 512, 256, 128 and 64. V32HImode also needs i equal to 32 and V64QImode i equal to 32 and 16, but emit_reduc_half in that case performs a redundant permutation exactly like i == 32. In 9+ the testcase works because Richard in r9-3393 changed the reduc_* expanders so that they actually don't call ix86_expand_reduc on 512-bit modes, but only 128-bit ones. The patch fixes emit_reduc_half to handle also i of 32 and 16 similarly to how V32QImode/V16HImode are handled for AVX2. I think it shouldn't hurt to fix the function even on the trunk and 9 branch even when nothing uses it ATM. 2020-04-07 Jakub Jelinek <jakub@redhat.com> PR target/94500 * config/i386/i386-expand.c (emit_reduc_half): For V{64QI,32HI}mode handle i < 64 using avx512bw_lshrv4ti3. Formatting fixes. * gcc.target/i386/avx512bw-pr94500.c: New test.
This commit is contained in:
parent
467fc7c83a
commit
bee27152f7
4 changed files with 76 additions and 29 deletions
|
@ -1,3 +1,9 @@
|
|||
2020-04-07 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/94500
|
||||
* config/i386/i386-expand.c (emit_reduc_half): For V{64QI,32HI}mode
|
||||
handle i < 64 using avx512bw_lshrv4ti3. Formatting fixes.
|
||||
|
||||
2020-04-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* cselib.c (cselib_subst_to_values): For SP_DERIVED_VALUE_P
|
||||
|
|
|
@ -14891,43 +14891,51 @@ emit_reduc_half (rtx dest, rtx src, int i)
|
|||
break;
|
||||
case E_V64QImode:
|
||||
case E_V32HImode:
|
||||
if (i < 64)
|
||||
{
|
||||
d = gen_reg_rtx (V4TImode);
|
||||
tem = gen_avx512bw_lshrv4ti3 (d, gen_lowpart (V4TImode, src),
|
||||
GEN_INT (i / 2));
|
||||
break;
|
||||
}
|
||||
/* FALLTHRU */
|
||||
case E_V16SImode:
|
||||
case E_V16SFmode:
|
||||
case E_V8DImode:
|
||||
case E_V8DFmode:
|
||||
if (i > 128)
|
||||
tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
|
||||
gen_lowpart (V16SImode, src),
|
||||
gen_lowpart (V16SImode, src),
|
||||
GEN_INT (0x4 + (i == 512 ? 4 : 0)),
|
||||
GEN_INT (0x5 + (i == 512 ? 4 : 0)),
|
||||
GEN_INT (0x6 + (i == 512 ? 4 : 0)),
|
||||
GEN_INT (0x7 + (i == 512 ? 4 : 0)),
|
||||
GEN_INT (0xC), GEN_INT (0xD),
|
||||
GEN_INT (0xE), GEN_INT (0xF),
|
||||
GEN_INT (0x10), GEN_INT (0x11),
|
||||
GEN_INT (0x12), GEN_INT (0x13),
|
||||
GEN_INT (0x14), GEN_INT (0x15),
|
||||
GEN_INT (0x16), GEN_INT (0x17));
|
||||
gen_lowpart (V16SImode, src),
|
||||
gen_lowpart (V16SImode, src),
|
||||
GEN_INT (0x4 + (i == 512 ? 4 : 0)),
|
||||
GEN_INT (0x5 + (i == 512 ? 4 : 0)),
|
||||
GEN_INT (0x6 + (i == 512 ? 4 : 0)),
|
||||
GEN_INT (0x7 + (i == 512 ? 4 : 0)),
|
||||
GEN_INT (0xC), GEN_INT (0xD),
|
||||
GEN_INT (0xE), GEN_INT (0xF),
|
||||
GEN_INT (0x10), GEN_INT (0x11),
|
||||
GEN_INT (0x12), GEN_INT (0x13),
|
||||
GEN_INT (0x14), GEN_INT (0x15),
|
||||
GEN_INT (0x16), GEN_INT (0x17));
|
||||
else
|
||||
tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
|
||||
gen_lowpart (V16SImode, src),
|
||||
GEN_INT (i == 128 ? 0x2 : 0x1),
|
||||
GEN_INT (0x3),
|
||||
GEN_INT (0x3),
|
||||
GEN_INT (0x3),
|
||||
GEN_INT (i == 128 ? 0x6 : 0x5),
|
||||
GEN_INT (0x7),
|
||||
GEN_INT (0x7),
|
||||
GEN_INT (0x7),
|
||||
GEN_INT (i == 128 ? 0xA : 0x9),
|
||||
GEN_INT (0xB),
|
||||
GEN_INT (0xB),
|
||||
GEN_INT (0xB),
|
||||
GEN_INT (i == 128 ? 0xE : 0xD),
|
||||
GEN_INT (0xF),
|
||||
GEN_INT (0xF),
|
||||
GEN_INT (0xF));
|
||||
gen_lowpart (V16SImode, src),
|
||||
GEN_INT (i == 128 ? 0x2 : 0x1),
|
||||
GEN_INT (0x3),
|
||||
GEN_INT (0x3),
|
||||
GEN_INT (0x3),
|
||||
GEN_INT (i == 128 ? 0x6 : 0x5),
|
||||
GEN_INT (0x7),
|
||||
GEN_INT (0x7),
|
||||
GEN_INT (0x7),
|
||||
GEN_INT (i == 128 ? 0xA : 0x9),
|
||||
GEN_INT (0xB),
|
||||
GEN_INT (0xB),
|
||||
GEN_INT (0xB),
|
||||
GEN_INT (i == 128 ? 0xE : 0xD),
|
||||
GEN_INT (0xF),
|
||||
GEN_INT (0xF),
|
||||
GEN_INT (0xF));
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2020-04-07 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/94500
|
||||
* gcc.target/i386/avx512bw-pr94500.c: New test.
|
||||
|
||||
2020-04-06 Steven G. Kargl <kargl@gcc.gnu.org>
|
||||
|
||||
PR fortran/93686
|
||||
|
|
28
gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c
Normal file
28
gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* PR target/94500 */
|
||||
/* { dg-do run { target avx512bw } } */
|
||||
/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */
|
||||
|
||||
#define AVX512BW
|
||||
#include "avx512f-helper.h"
|
||||
|
||||
__attribute__((noipa)) signed char
|
||||
foo (signed char *p)
|
||||
{
|
||||
signed char r = 0;
|
||||
int i;
|
||||
for (i = 0; i < 256; i++)
|
||||
if (p[i] > r) r = p[i];
|
||||
return r;
|
||||
}
|
||||
|
||||
signed char buf[256];
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 256; i++)
|
||||
buf[i] = i - 128;
|
||||
if (foo (buf) != 127)
|
||||
abort ();
|
||||
}
|
Loading…
Add table
Reference in a new issue