i386.c (ix86_adjust_stack_and_probe_stack_clash): New.

* config/i386/i386.c (ix86_adjust_stack_and_probe_stack_clash): New.
	(ix86_expand_prologue): Dump stack clash info as needed.
	Call ix86_adjust_stack_and_probe_stack_clash as needed.

	* gcc.dg/stack-check-4.c: New test.
	* gcc.dg/stack-check-5.c: New test.
	* gcc.dg/stack-check-6.c: New test.
	* gcc.dg/stack-check-6a.c: New test.
	* gcc.dg/stack-check-7.c: New test.
	* gcc.dg/stack-check-8.c: New test.
	* gcc.dg/stack-check-9.c: New test.
	* gcc.dg/stack-check-10.c: New test.
	* lib/target-supports.exp
	(check_effective_target_supports_stack_clash_protection): Enable for
	x86 and x86_64 targets.

From-SVN: r252998
This commit is contained in:
Jeff Law 2017-09-19 23:35:07 -06:00 committed by Jeff Law
parent 8a502a808e
commit 8e7a09c353
13 changed files with 2600 additions and 6 deletions

View file

@ -1,5 +1,9 @@
2017-09-19 Jeff Law <law@redhat.com>
* config/i386/i386.c (ix86_adjust_stack_and_probe_stack_clash): New.
(ix86_expand_prologue): Dump stack clash info as needed.
Call ix86_adjust_stack_and_probe_stack_clash as needed.
* function.c (dump_stack_clash_frame_info): New function.
* function.h (dump_stack_clash_frame_info): Prototype.
(enum stack_clash_probes): New enum.

View file

@ -13924,6 +13924,147 @@ release_scratch_register_on_entry (struct scratch_reg *sr)
#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
/* Emit code to adjust the stack pointer by SIZE bytes while probing it.
This differs from the next routine in that it tries hard to prevent
attacks that jump the stack guard. Thus it is never allowed to allocate
more than PROBE_INTERVAL bytes of stack space without a suitable
probe. */
static void
ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size)
{
struct machine_function *m = cfun->machine;
/* If this function does not statically allocate stack space, then
no probes are needed. */
if (!size)
{
dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
return;
}
/* If we are a noreturn function, then we have to consider the
possibility that we're called via a jump rather than a call.
Thus we don't have the implicit probe generated by saving the
return address into the stack at the call. Thus, the stack
pointer could be anywhere in the guard page. The safe thing
to do is emit a probe now.
?!? This should be revamped to work like aarch64 and s390 where
we track the offset from the most recent probe. Normally that
offset would be zero. For a non-return function we would reset
it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
we just probe when we cross PROBE_INTERVAL. */
if (TREE_THIS_VOLATILE (cfun->decl))
{
emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-GET_MODE_SIZE (word_mode)));
emit_insn (gen_blockage ());
}
/* If we allocate less than the size of the guard statically,
then no probing is necessary, but we do need to allocate
the stack. */
if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
{
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-size), -1,
m->fs.cfa_reg == stack_pointer_rtx);
dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
return;
}
/* We're allocating a large enough stack frame that we need to
emit probes. Either emit them inline or in a loop depending
on the size. */
HOST_WIDE_INT probe_interval
= 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
if (size <= 4 * probe_interval)
{
HOST_WIDE_INT i;
for (i = probe_interval; i <= size; i += probe_interval)
{
/* Allocate PROBE_INTERVAL bytes. */
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-probe_interval), -1,
m->fs.cfa_reg == stack_pointer_rtx);
/* And probe at *sp. */
emit_stack_probe (stack_pointer_rtx);
emit_insn (gen_blockage ());
}
/* We need to allocate space for the residual, but we do not need
to probe the residual. */
HOST_WIDE_INT residual = (i - probe_interval - size);
if (residual)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (residual), -1,
m->fs.cfa_reg == stack_pointer_rtx);
dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
}
else
{
struct scratch_reg sr;
get_scratch_register_on_entry (&sr);
/* Step 1: round SIZE down to a multiple of the interval. */
HOST_WIDE_INT rounded_size = size & -probe_interval;
/* Step 2: compute final value of the loop counter. Use lea if
possible. */
rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
rtx insn;
if (address_no_seg_operand (addr, Pmode))
insn = emit_insn (gen_rtx_SET (sr.reg, addr));
else
{
emit_move_insn (sr.reg, GEN_INT (-rounded_size));
insn = emit_insn (gen_rtx_SET (sr.reg,
gen_rtx_PLUS (Pmode, sr.reg,
stack_pointer_rtx)));
}
if (m->fs.cfa_reg == stack_pointer_rtx)
{
add_reg_note (insn, REG_CFA_DEF_CFA,
plus_constant (Pmode, sr.reg,
m->fs.cfa_offset + rounded_size));
RTX_FRAME_RELATED_P (insn) = 1;
}
/* Step 3: the loop. */
rtx size_rtx = GEN_INT (rounded_size);
insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
size_rtx));
if (m->fs.cfa_reg == stack_pointer_rtx)
{
m->fs.cfa_offset += rounded_size;
add_reg_note (insn, REG_CFA_DEF_CFA,
plus_constant (Pmode, stack_pointer_rtx,
m->fs.cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
m->fs.sp_offset += rounded_size;
emit_insn (gen_blockage ());
/* Step 4: adjust SP if we cannot assert at compile-time that SIZE
is equal to ROUNDED_SIZE. */
if (size != rounded_size)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (rounded_size - size), -1,
m->fs.cfa_reg == stack_pointer_rtx);
dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
release_scratch_register_on_entry (&sr);
}
/* Make sure nothing is scheduled before we are done. */
emit_insn (gen_blockage ());
}
/* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
static void
@ -14852,12 +14993,19 @@ ix86_expand_prologue (void)
/* The stack has already been decremented by the instruction calling us
so probe if the size is non-negative to preserve the protection area. */
if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
if (allocate >= 0
&& (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
|| flag_stack_clash_protection))
{
/* We expect the GP registers to be saved when probes are used. */
gcc_assert (int_registers_saved);
if (STACK_CHECK_MOVING_SP)
if (flag_stack_clash_protection)
{
ix86_adjust_stack_and_probe_stack_clash (allocate);
allocate = 0;
}
else if (STACK_CHECK_MOVING_SP)
{
if (!(crtl->is_leaf && !cfun->calls_alloca
&& allocate <= PROBE_INTERVAL))

View file

@ -1,10 +1,22 @@
2017-09-19 Jeff Law <law@redhat.com>
* gcc.dg/stack-check-4.c: New test.
* gcc.dg/stack-check-5.c: New test.
* gcc.dg/stack-check-6.c: New test.
* gcc.dg/stack-check-6a.c: New test.
* gcc.dg/stack-check-7.c: New test.
* gcc.dg/stack-check-8.c: New test.
* gcc.dg/stack-check-9.c: New test.
* gcc.dg/stack-check-10.c: New test.
* lib/target-supports.exp
(check_effective_target_supports_stack_clash_protection): Enable for
x86 and x86_64 targets.
* gcc.dg/stack-check-3.c: New test.
* gcc.dg/stack-check-2.c: New test.
* lib/target-supports.exp
(check_effective_target_supports_stack_clash_protection): New function.
(check_effective_target_supports_stack_clash_protection): New.
(check_effective_target_frame_pointer_for_non_leaf): Likewise.
(check_effective_target_caller_implicit_probes): Likewise.

View file

@ -0,0 +1,41 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
int f (int *);
int
g (int a)
{
return f (&a);
}
int f1 (void);
int f2 (int);
int
f3 (void)
{
return f2 (f1 ());
}
/* If we have caller implicit probes, then we should not need probes in either callee.
Else callees may need probes, particularly if non-leaf functions require a
frame/frame pointer. */
/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 2 "pro_and_epilogue" { target caller_implicit_probes } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash inline probe" 1 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 1 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */
/* Neither of these functions are a nonreturn function. */
/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 2 "pro_and_epilogue" } } */
/* If the callee realigns the stack or has a mandatory frame, then both functions
have a residual allocation. Else just g() has a residual allocation. */
/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 2 "pro_and_epilogue" } } */
/* If the target has frame pointers for non-leafs, then both functions will
need a frame pointer. Otherwise neither should. */
/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */

View file

@ -7,7 +7,7 @@
residual allocation + probe for f?. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=4096 --param stack-clash-protection-guard-size=4096" } */
/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
__attribute__((noinline, noclone)) void

View file

@ -0,0 +1,42 @@
/* On targets where the call instruction is an implicit probe of *sp, we
elide stack probes as long as the size of the local stack is less than
PROBE_INTERVAL.
But if the caller were to transform a tail call into a direct jump
we do not have that implicit probe. This normally isn't a problem as
the caller must not have a local frame for that optimization to apply.
However, a sufficiently smart compiler could realize that the caller's
local stack need not be torn down and thus could transform a call into
a jump if the target is a noreturn function, even if the caller has
a local frame.
To guard against that, targets that depend on *sp being probed by the
call itself must emit a probe if the target function is a noreturn
function, even if they just allocate a small amount of stack space.
Rather than try to parse RTL or assembly code, we instead require the
prologue code to emit information into the dump file that we can
scan for. We scan for both the positive and negative cases. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
extern void arf (char *);
__attribute__ ((noreturn)) void foo1 ()
{
char x[10];
while (1)
arf (x);
}
void foo2 ()
{
char x[10];
arf (x);
}
/* { dg-final { scan-rtl-dump-times "Stack clash noreturn" 1 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 1 "pro_and_epilogue" } } */

View file

@ -0,0 +1,74 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
extern void foo (char *);
extern void bar (void);
/* This function allocates no local stack and is a leaf. It should have no
probes on any target and should not require a frame pointer. */
int
f0 (int x, int y)
{
asm volatile ("" : : : "memory");
return x + y;
}
/* This function allocates no local stack, but is not a leaf. Ideally it
should not need probing and no frame pointer. */
int
f1 (int x, int y)
{
asm volatile ("" : : : "memory");
bar ();
}
/* This is a leaf with a small frame. On targets with implicit probes in
the caller, this should not need probing. On targets with no implicit
probes in the caller, it may require probes. Ideally it should need no
frame pointer. */
void
f2 (void)
{
char buf[512];
asm volatile ("" : : "g" (&buf) : "memory");
}
/* This is a non-leaf with a small frame. On targets with implicit probes in
the caller, this should not need probing. On targets with no implicit
probes in the caller, it may require probes. It should need no frame
pointer. */
void
f3 (void)
{
char buf[512];
foo (buf);
}
/* If we have caller implicit probes, then we should not need probes.
Else callees may need probes, particularly if non-leaf functions require a
frame/frame pointer. */
/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 4 "pro_and_epilogue" { target caller_implicit_probes } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 2 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash inline probes " 2 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */
/* None of these functions are marked with the noreturn attribute. */
/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */
/* Two functions are leafs, two are not. Verify the target identified them
appropriately. */
/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */
/* We have selected the size of the array in f2/f3 to be large enough
to not live in the red zone on targets that support it.
That allows simplification of this test considerably.
f1() should not require any allocations, thus no residuals.
All the rest of the functions require some kind of allocation,
either for the saved fp/rp or the array. */
/* { dg-final { scan-rtl-dump-times "Stack clash no residual allocation in prologue" 1 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 3 "pro_and_epilogue" } } */

View file

@ -0,0 +1,55 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
extern void foo (char *);
extern void bar (void);
/* This is a leaf with a frame that is large enough to require probing with
a residual allocation, but small enough to probe inline. */
void
f4 (void)
{
char buf[4096 + 512];
asm volatile ("" : : "g" (&buf) : "memory");
}
/* This is a non-leaf with a frame large enough to require probing and
a residual allocation, but small enough to probe inline. */
void
f5 (void)
{
char buf[4096 + 512];
foo (buf);
}
/* This is a leaf with a frame that is large enough to require probing with
a loop plus a residual allocation. */
void
f6 (void)
{
char buf[4096 * 10 + 512];
asm volatile ("" : : "g" (&buf) : "memory");
}
/* This is a non-leaf with a frame large enough to require probing with
a loop plus a residual allocation. */
void
f7 (void)
{
char buf[4096 * 10 + 512];
foo (buf);
}
/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 2 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 2 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 4 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */

View file

@ -0,0 +1,17 @@
/* The goal here is to verify that increasing the size of the guard allows
elimination of all probing on the relevant targets. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#include "stack-check-6.c"
/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 0 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 0 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 4 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */
/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */

View file

@ -0,0 +1,36 @@
/* { dg-do run } */
/* { dg-options "-O2 -fstack-clash-protection -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
/* For further testing, this can be run under valgrind where it's crashed
on aarch64 and ppc64le with -fstack-check=specific. */
__attribute__((noinline, noclone)) void
foo (char *p)
{
asm volatile ("" : : "r" (p) : "memory");
}
__attribute__((noinline, noclone)) void
bar (void)
{
char buf[131072];
foo (buf);
}
__attribute__((noinline, noclone)) void
baz (void)
{
char buf[12000];
foo (buf);
}
int
main ()
{
bar ();
baz ();
return 0;
}

View file

@ -0,0 +1,139 @@
/* { dg-do run } */
/* { dg-options "-O2 -fstack-clash-protection -Wno-psabi -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
typedef float V __attribute__((vector_size (32)));
__attribute__((noinline, noclone)) void
foo (char *p)
{
asm volatile ("" : : "r" (p) : "memory");
}
__attribute__((noinline, noclone)) int
f0 (int x, int y)
{
asm volatile ("" : : : "memory");
return x + y;
}
__attribute__((noinline, noclone)) void
f1 (void)
{
char buf[64];
foo (buf);
}
__attribute__((noinline, noclone)) void
f2 (void)
{
char buf[12000];
foo (buf);
}
__attribute__((noinline, noclone)) void
f3 (void)
{
char buf[131072];
foo (buf);
}
__attribute__((noinline, noclone)) void
f4 (int x)
{
char vla[x];
foo (vla);
}
__attribute__((noinline, noclone)) void
f5 (int x)
{
char buf[12000];
foo (buf);
{
char vla[x];
foo (vla);
}
{
char vla[x];
foo (vla);
}
}
V v;
__attribute__((noinline, noclone)) int
f6 (int x, int y, V a, V b, V c)
{
asm volatile ("" : : : "memory");
v = a + b + c;
return x + y;
}
__attribute__((noinline, noclone)) void
f7 (V a, V b, V c)
{
char buf[64];
foo (buf);
v = a + b + c;
}
__attribute__((noinline, noclone)) void
f8 (V a, V b, V c)
{
char buf[12000];
foo (buf);
v = a + b + c;
}
__attribute__((noinline, noclone)) void
f9 (V a, V b, V c)
{
char buf[131072];
foo (buf);
v = a + b + c;
}
__attribute__((noinline, noclone)) void
f10 (int x, V a, V b, V c)
{
char vla[x];
foo (vla);
v = a + b + c;
}
__attribute__((noinline, noclone)) void
f11 (int x, V a, V b, V c)
{
char buf[12000];
foo (buf);
v = a + b + c;
{
char vla[x];
foo (vla);
}
{
char vla[x];
foo (vla);
}
}
int
main ()
{
f0 (2, 3);
f1 ();
f2 ();
f3 ();
f4 (12000);
f5 (12000);
f6 (2, 3, v, v, v);
f7 (v, v, v);
f8 (v, v, v);
f9 (v, v, v);
f10 (12000, v, v, v);
f11 (12000, v, v, v);
return 0;
}

File diff suppressed because it is too large Load diff

View file

@ -8635,11 +8635,15 @@ proc check_effective_target_autoincdec { } {
proc check_effective_target_supports_stack_clash_protection { } {
# Temporary until the target bits are fully ACK'd.
# if { [istarget aarch*-*-*] || [istarget x86_64-*-*]
# || [istarget i?86-*-*] || [istarget s390*-*-*]
# if { [istarget aarch*-*-*]
# || [istarget s390*-*-*]
# || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } {
# return 1
# }
if { [istarget x86_64-*-*] || [istarget i?86-*-*] } {
return 1
}
return 0
}