x86: Don't allocate stack frame nor align stack if not needed
get_frame_size () returns used stack slots during compilation, which may be optimized out later. This patch does the followings: 1. Add stack_frame_required to machine_function to indicate that the function needs a stack frame. 2. Change ix86_find_max_used_stack_alignment to set stack_frame_required. 3. Always call ix86_find_max_used_stack_alignment to check if stack frame is needed. Tested on i686 and x86-64 with --with-arch=native --with-cpu=native Tested on AVX512 machine configured with --with-arch=native --with-cpu=native gcc/ PR target/88483 * config/i386/i386-options.c (ix86_init_machine_status): Set stack_frame_required to true. * config/i386/i386.c (ix86_get_frame_size): New function. (ix86_frame_pointer_required): Replace get_frame_size with ix86_get_frame_size. (ix86_compute_frame_layout): Likewise. (ix86_find_max_used_stack_alignment): Changed to void. Set stack_frame_required. (ix86_finalize_stack_frame_flags): Always call ix86_find_max_used_stack_alignment. Replace get_frame_size with ix86_get_frame_size. * config/i386/i386.h (machine_function): Add stack_frame_required. gcc/testsuite/ PR target/88483 * gcc.target/i386/stackalign/pr88483-1.c: New test. * gcc.target/i386/stackalign/pr88483-2.c: Likewise. From-SVN: r271517
This commit is contained in:
parent
586e53e269
commit
c2080a1fb5
7 changed files with 97 additions and 18 deletions
|
@ -1,3 +1,19 @@
|
|||
2019-05-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/88483
|
||||
* config/i386/i386-options.c (ix86_init_machine_status): Set
|
||||
stack_frame_required to true.
|
||||
* config/i386/i386.c (ix86_get_frame_size): New function.
|
||||
(ix86_frame_pointer_required): Replace get_frame_size with
|
||||
ix86_get_frame_size.
|
||||
(ix86_compute_frame_layout): Likewise.
|
||||
(ix86_find_max_used_stack_alignment): Changed to void. Set
|
||||
stack_frame_required.
|
||||
(ix86_finalize_stack_frame_flags): Always call
|
||||
ix86_find_max_used_stack_alignment. Replace get_frame_size with
|
||||
ix86_get_frame_size.
|
||||
* config/i386/i386.h (machine_function): Add stack_frame_required.
|
||||
|
||||
2019-05-22 Uroš Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/sse.md (sse_cvtpi2ps): Use TARGET_MMX in insn condition.
|
||||
|
|
|
@ -1537,6 +1537,7 @@ ix86_init_machine_status (void)
|
|||
|
||||
f = ggc_cleared_alloc<machine_function> ();
|
||||
f->call_abi = ix86_abi;
|
||||
f->stack_frame_required = true;
|
||||
|
||||
return f;
|
||||
}
|
||||
|
|
|
@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void)
|
|||
&& (frame.nregs + frame.nsseregs) == 0);
|
||||
}
|
||||
|
||||
/* Return stack frame size. get_frame_size () returns used stack slots
|
||||
during compilation, which may be optimized out later. If stack frame
|
||||
is needed, stack_frame_required should be true. */
|
||||
|
||||
static HOST_WIDE_INT
|
||||
ix86_get_frame_size (void)
|
||||
{
|
||||
if (cfun->machine->stack_frame_required)
|
||||
return get_frame_size ();
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Value should be nonzero if functions must have frame pointers.
|
||||
Zero means the frame pointer need not be set up (and parms may
|
||||
be accessed via the stack pointer) in functions that seem suitable. */
|
||||
|
@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void)
|
|||
|
||||
/* Win64 SEH, very large frames need a frame-pointer as maximum stack
|
||||
allocation is 4GB. */
|
||||
if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
|
||||
if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
|
||||
return true;
|
||||
|
||||
/* SSE saves require frame-pointer when stack is misaligned. */
|
||||
|
@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void)
|
|||
unsigned HOST_WIDE_INT stack_alignment_needed;
|
||||
HOST_WIDE_INT offset;
|
||||
unsigned HOST_WIDE_INT preferred_alignment;
|
||||
HOST_WIDE_INT size = get_frame_size ();
|
||||
HOST_WIDE_INT size = ix86_get_frame_size ();
|
||||
HOST_WIDE_INT to_allocate;
|
||||
|
||||
/* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
|
||||
|
@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end)
|
|||
return "";
|
||||
}
|
||||
|
||||
/* Return true if stack frame is required. Update STACK_ALIGNMENT
|
||||
to the largest alignment, in bits, of stack slot used if stack
|
||||
frame is required and CHECK_STACK_SLOT is true. */
|
||||
/* Set stack_frame_required to false if stack frame isn't required.
|
||||
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
|
||||
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
|
||||
|
||||
static bool
|
||||
static void
|
||||
ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
|
||||
bool check_stack_slot)
|
||||
{
|
||||
|
@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
|
|||
}
|
||||
}
|
||||
|
||||
return require_stack_frame;
|
||||
cfun->machine->stack_frame_required = require_stack_frame;
|
||||
}
|
||||
|
||||
/* Finalize stack_realign_needed and frame_pointer_needed flags, which
|
||||
|
@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void)
|
|||
return;
|
||||
}
|
||||
|
||||
/* It is always safe to compute max_used_stack_alignment. We
|
||||
compute it only if 128-bit aligned load/store may be generated
|
||||
on misaligned stack slot which will lead to segfault. */
|
||||
bool check_stack_slot
|
||||
= (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
|
||||
ix86_find_max_used_stack_alignment (stack_alignment,
|
||||
check_stack_slot);
|
||||
|
||||
/* If the only reason for frame_pointer_needed is that we conservatively
|
||||
assumed stack realignment might be needed or -fno-omit-frame-pointer
|
||||
is used, but in the end nothing that needed the stack alignment had
|
||||
|
@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void)
|
|||
&& flag_exceptions
|
||||
&& cfun->can_throw_non_call_exceptions)
|
||||
&& !ix86_frame_pointer_required ()
|
||||
&& get_frame_size () == 0
|
||||
&& ix86_get_frame_size () == 0
|
||||
&& ix86_nsaved_sseregs () == 0
|
||||
&& ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
|
||||
{
|
||||
if (ix86_find_max_used_stack_alignment (stack_alignment,
|
||||
stack_realign))
|
||||
if (cfun->machine->stack_frame_required)
|
||||
{
|
||||
/* Stack frame is required. If stack alignment needed is less
|
||||
than incoming stack boundary, don't realign stack. */
|
||||
|
@ -7631,15 +7651,12 @@ ix86_finalize_stack_frame_flags (void)
|
|||
recompute_frame_layout_p = true;
|
||||
}
|
||||
}
|
||||
else if (crtl->max_used_stack_slot_alignment >= 128)
|
||||
else if (crtl->max_used_stack_slot_alignment >= 128
|
||||
&& cfun->machine->stack_frame_required)
|
||||
{
|
||||
/* We don't need to realign stack. max_used_stack_alignment is
|
||||
used to decide how stack frame should be aligned. This is
|
||||
independent of any psABIs nor 32-bit vs 64-bit. It is always
|
||||
safe to compute max_used_stack_alignment. We compute it only
|
||||
if 128-bit aligned load/store may be generated on misaligned
|
||||
stack slot which will lead to segfault. */
|
||||
if (ix86_find_max_used_stack_alignment (stack_alignment, true))
|
||||
independent of any psABIs nor 32-bit vs 64-bit. */
|
||||
cfun->machine->max_used_stack_alignment
|
||||
= stack_alignment / BITS_PER_UNIT;
|
||||
}
|
||||
|
|
|
@ -2754,6 +2754,9 @@ struct GTY(()) machine_function {
|
|||
/* If true, ENDBR is queued at function entrance. */
|
||||
BOOL_BITFIELD endbr_queued_at_entrance : 1;
|
||||
|
||||
/* True if the function needs a stack frame. */
|
||||
BOOL_BITFIELD stack_frame_required : 1;
|
||||
|
||||
/* The largest alignment, in bytes, of stack slot actually used. */
|
||||
unsigned int max_used_stack_alignment;
|
||||
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2019-05-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/88483
|
||||
* gcc.target/i386/stackalign/pr88483-1.c: New test.
|
||||
* gcc.target/i386/stackalign/pr88483-2.c: Likewise.
|
||||
|
||||
2019-05-22 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
* gfortran.dg/coarray_lock_7.f90: Fix output patterns.
|
||||
|
|
18
gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
Normal file
18
gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx2" } */
|
||||
|
||||
struct B
|
||||
{
|
||||
char a[12];
|
||||
int b;
|
||||
};
|
||||
|
||||
struct B
|
||||
f2 (void)
|
||||
{
|
||||
struct B x = {};
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t \]*%\[re\]?sp" } } */
|
||||
/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
|
18
gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
Normal file
18
gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx512f" } */
|
||||
|
||||
struct B
|
||||
{
|
||||
char a[12];
|
||||
int b;
|
||||
};
|
||||
|
||||
struct B
|
||||
f2 (void)
|
||||
{
|
||||
struct B x = {};
|
||||
return x;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t \]*%\[re\]?sp" } } */
|
||||
/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
|
Loading…
Add table
Reference in a new issue