x86: Update STORE_MAX_PIECES
Update STORE_MAX_PIECES to allow 16/32/64 bytes only if inter-unit move is enabled since vec_duplicate enabled by inter-unit move is used to implement store_by_pieces of 16/32/64 bytes. gcc/ PR target/101742 * config/i386/i386.h (STORE_MAX_PIECES): Allow 16/32/64 bytes only if TARGET_INTER_UNIT_MOVES_TO_VEC is true. gcc/testsuite/ PR target/101742 * gcc.target/i386/pr101742a.c: New test. * gcc.target/i386/pr101742b.c: Likewise.
This commit is contained in:
parent
09dba016db
commit
5738a64f8b
3 changed files with 35 additions and 11 deletions
|
@ -1780,18 +1780,22 @@ typedef struct ix86_args {
|
|||
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
|
||||
? 16 : UNITS_PER_WORD)))
|
||||
|
||||
/* STORE_MAX_PIECES is the number of bytes at a time that we can
|
||||
store efficiently. */
|
||||
/* STORE_MAX_PIECES is the number of bytes at a time that we can store
|
||||
efficiently. Allow 16/32/64 bytes only if inter-unit move is enabled
|
||||
since vec_duplicate enabled by inter-unit move is used to implement
|
||||
store_by_pieces of 16/32/64 bytes. */
|
||||
#define STORE_MAX_PIECES \
|
||||
((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
|
||||
? 64 \
|
||||
: ((TARGET_AVX \
|
||||
&& !TARGET_PREFER_AVX128 \
|
||||
&& !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
|
||||
? 32 \
|
||||
: ((TARGET_SSE2 \
|
||||
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
|
||||
? 16 : UNITS_PER_WORD)))
|
||||
(TARGET_INTER_UNIT_MOVES_TO_VEC \
|
||||
? ((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
|
||||
? 64 \
|
||||
: ((TARGET_AVX \
|
||||
&& !TARGET_PREFER_AVX128 \
|
||||
&& !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
|
||||
? 32 \
|
||||
: ((TARGET_SSE2 \
|
||||
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
|
||||
? 16 : UNITS_PER_WORD))) \
|
||||
: UNITS_PER_WORD)
|
||||
|
||||
/* If a memory-to-memory move would take MOVE_RATIO or more simple
|
||||
move-instruction pairs, we will do a cpymem or libcall instead.
|
||||
|
|
16
gcc/testsuite/gcc.target/i386/pr101742a.c
Normal file
16
gcc/testsuite/gcc.target/i386/pr101742a.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -mtune=nano-x2" } */
|
||||
|
||||
int n2;
|
||||
|
||||
__attribute__ ((simd)) char
|
||||
w7 (void)
|
||||
{
|
||||
short int xb = n2;
|
||||
int qp;
|
||||
|
||||
for (qp = 0; qp < 2; ++qp)
|
||||
xb = xb < 1;
|
||||
|
||||
return xb;
|
||||
}
|
4
gcc/testsuite/gcc.target/i386/pr101742b.c
Normal file
4
gcc/testsuite/gcc.target/i386/pr101742b.c
Normal file
|
@ -0,0 +1,4 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -mtune=nano-x2 -mtune-ctrl=sse_unaligned_store_optimal" } */
|
||||
|
||||
#include "pr101742a.c"
|
Loading…
Add table
Reference in a new issue