cprop: Do not set REG_EQUAL note when simplifying paradoxical subreg [PR110206]
cprop1 pass does not consider paradoxical subreg and for (insn 22) claims that it equals 8 elements of HImodeby setting REG_EQUAL note: (insn 21 19 22 4 (set (reg:V4QI 98) (mem/u/c:V4QI (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S4 A32])) "pr110206.c":12:42 1530 {*movv4qi_internal} (expr_list:REG_EQUAL (const_vector:V4QI [ (const_int -52 [0xffffffffffffffcc]) repeated x4 ]) (nil))) (insn 22 21 23 4 (set (reg:V8HI 100) (zero_extend:V8HI (vec_select:V8QI (subreg:V16QI (reg:V4QI 98) 0) (parallel [ (const_int 0 [0]) (const_int 1 [0x1]) (const_int 2 [0x2]) (const_int 3 [0x3]) (const_int 4 [0x4]) (const_int 5 [0x5]) (const_int 6 [0x6]) (const_int 7 [0x7]) ])))) "pr110206.c":12:42 7471 {sse4_1_zero_extendv8qiv8hi2} (expr_list:REG_EQUAL (const_vector:V8HI [ (const_int 204 [0xcc]) repeated x8 ]) (expr_list:REG_DEAD (reg:V4QI 98) (nil)))) We rely on the "undefined" vals to have a specific value (from the earlier REG_EQUAL note) but actual code generation doesn't ensure this (it doesn't need to). That said, the issue isn't the constant folding per-se but that we do not actually constant fold but register an equality that doesn't hold. PR target/110206 gcc/ChangeLog: * fwprop.cc (contains_paradoxical_subreg_p): Move to ... * rtlanal.cc (contains_paradoxical_subreg_p): ... here. * rtlanal.h (contains_paradoxical_subreg_p): Add prototype. * cprop.cc (try_replace_reg): Do not set REG_EQUAL note when the original source contains a paradoxical subreg. gcc/testsuite/ChangeLog: * gcc.target/i386/pr110206.c: New test.
This commit is contained in:
parent
aa6741ef2e
commit
1815e313a8
5 changed files with 60 additions and 16 deletions
|
@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "coretypes.h"
|
||||
#include "backend.h"
|
||||
#include "rtl.h"
|
||||
#include "rtlanal.h"
|
||||
#include "cfghooks.h"
|
||||
#include "df.h"
|
||||
#include "insn-config.h"
|
||||
|
@ -795,7 +796,8 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn)
|
|||
/* If we've failed perform the replacement, have a single SET to
|
||||
a REG destination and don't yet have a note, add a REG_EQUAL note
|
||||
to not lose information. */
|
||||
if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set)))
|
||||
if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set))
|
||||
&& !contains_paradoxical_subreg_p (SET_SRC (set)))
|
||||
note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "coretypes.h"
|
||||
#include "backend.h"
|
||||
#include "rtl.h"
|
||||
#include "rtlanal.h"
|
||||
#include "df.h"
|
||||
#include "rtl-ssa.h"
|
||||
|
||||
|
@ -353,21 +354,6 @@ reg_single_def_p (rtx x)
|
|||
return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
|
||||
}
|
||||
|
||||
/* Return true if X contains a paradoxical subreg. */
|
||||
|
||||
static bool
|
||||
contains_paradoxical_subreg_p (rtx x)
|
||||
{
|
||||
subrtx_var_iterator::array_type array;
|
||||
FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
|
||||
{
|
||||
x = *iter;
|
||||
if (SUBREG_P (x) && paradoxical_subreg_p (x))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
|
||||
USE_INSN. Return the number of substitutions on success, otherwise return
|
||||
-1 and leave USE_INSN unchanged.
|
||||
|
|
|
@ -6970,3 +6970,18 @@ vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel)
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return true if X contains a paradoxical subreg. */
|
||||
|
||||
bool
|
||||
contains_paradoxical_subreg_p (rtx x)
|
||||
{
|
||||
subrtx_var_iterator::array_type array;
|
||||
FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
|
||||
{
|
||||
x = *iter;
|
||||
if (SUBREG_P (x) && paradoxical_subreg_p (x))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -338,4 +338,6 @@ vec_series_highpart_p (machine_mode result_mode, machine_mode op_mode,
|
|||
bool
|
||||
vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel);
|
||||
|
||||
bool
|
||||
contains_paradoxical_subreg_p (rtx x);
|
||||
#endif
|
||||
|
|
39
gcc/testsuite/gcc.target/i386/pr110206.c
Normal file
39
gcc/testsuite/gcc.target/i386/pr110206.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* PR target/110206 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-Os -mavx512bw -mavx512vl" } */
|
||||
/* { dg-require-effective-target avx512bw } */
|
||||
/* { dg-require-effective-target avx512vl } */
|
||||
|
||||
#define AVX512BW
|
||||
#define AVX512VL
|
||||
|
||||
#include "avx512f-check.h"
|
||||
|
||||
typedef unsigned char __attribute__((__vector_size__ (4))) U;
|
||||
typedef unsigned char __attribute__((__vector_size__ (8))) V;
|
||||
typedef unsigned short u16;
|
||||
|
||||
V g;
|
||||
|
||||
void
|
||||
__attribute__((noinline))
|
||||
foo (U u, u16 c, V *r)
|
||||
{
|
||||
if (!c)
|
||||
abort ();
|
||||
V x = __builtin_shufflevector (u, (204 >> u), 7, 0, 5, 1, 3, 5, 0, 2);
|
||||
V y = __builtin_shufflevector (g, (V) { }, 7, 6, 6, 7, 2, 6, 3, 5);
|
||||
V z = __builtin_shufflevector (y, 204 * x, 3, 9, 8, 1, 4, 6, 14, 5);
|
||||
*r = z;
|
||||
}
|
||||
|
||||
static void test_256 (void) { };
|
||||
|
||||
static void
|
||||
test_128 (void)
|
||||
{
|
||||
V r;
|
||||
foo ((U){4}, 5, &r);
|
||||
if (r[6] != 0x30)
|
||||
abort();
|
||||
}
|
Loading…
Add table
Reference in a new issue