x86: Update STORE_MAX_PIECES

Update STORE_MAX_PIECES to allow 16/32/64 bytes only if inter-unit move
is enabled since vec_duplicate enabled by inter-unit move is used to
implement store_by_pieces of 16/32/64 bytes.

gcc/

	PR target/101742
	* config/i386/i386.h (STORE_MAX_PIECES): Allow 16/32/64 bytes
	only if TARGET_INTER_UNIT_MOVES_TO_VEC is true.

gcc/testsuite/

	PR target/101742
	* gcc.target/i386/pr101742a.c: New test.
	* gcc.target/i386/pr101742b.c: Likewise.
This commit is contained in:
H.J. Lu 2021-08-03 06:17:22 -07:00
parent 09dba016db
commit 5738a64f8b
3 changed files with 35 additions and 11 deletions

View file

@ -1780,18 +1780,22 @@ typedef struct ix86_args {
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
? 16 : UNITS_PER_WORD)))
/* STORE_MAX_PIECES is the number of bytes at a time that we can
store efficiently. */
/* STORE_MAX_PIECES is the number of bytes at a time that we can store
efficiently. Allow 16/32/64 bytes only if inter-unit move is enabled
since vec_duplicate enabled by inter-unit move is used to implement
store_by_pieces of 16/32/64 bytes. */
#define STORE_MAX_PIECES \
((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
? 64 \
: ((TARGET_AVX \
&& !TARGET_PREFER_AVX128 \
&& !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
? 32 \
: ((TARGET_SSE2 \
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
? 16 : UNITS_PER_WORD)))
(TARGET_INTER_UNIT_MOVES_TO_VEC \
? ((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
? 64 \
: ((TARGET_AVX \
&& !TARGET_PREFER_AVX128 \
&& !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
? 32 \
: ((TARGET_SSE2 \
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
? 16 : UNITS_PER_WORD))) \
: UNITS_PER_WORD)
/* If a memory-to-memory move would take MOVE_RATIO or more simple
move-instruction pairs, we will do a cpymem or libcall instead.

View file

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-additional-options "-O3 -mtune=nano-x2" } */
int n2;
__attribute__ ((simd)) char
w7 (void)
{
short int xb = n2;
int qp;
for (qp = 0; qp < 2; ++qp)
xb = xb < 1;
return xb;
}

View file

@ -0,0 +1,4 @@
/* { dg-do compile } */
/* { dg-additional-options "-O3 -mtune=nano-x2 -mtune-ctrl=sse_unaligned_store_optimal" } */
#include "pr101742a.c"