
PR 28065 (gdb.threads/access-mem-running-thread-exit.exp intermittent failure) shows that GDB can hit an unexpected scenario -- it can happen that the kernel manages to open a /proc/PID/task/LWP/mem file, but then reading from the file returns 0/EOF, even though the process hasn't exited or execed. "0" out of read/write is normally what you get when the address space of the process the file was open for is gone, because the process execed or exited. So when GDB gets the 0, it returns memory access failure. In the bad case in question, the process hasn't execed or exited, so GDB fails a memory access when the access should have worked. GDB has code in place to gracefully handle the case of opening the /proc/PID/task/LWP/mem just while the LWP is exiting -- most often the open fails with EACCES or ENOENT. When it happens, GDB just tries opening the file for a different thread of the process. The testcase is written such that it stresses GDB's logic of closing/reopening the /proc/PID/task/LWP/mem file, by constantly spawning short lived threads. However, there's a window where the kernel manages to find the thread, but the thread exits just after and clears its address space pointer. In this case, the kernel creates a file successfully, but the file ends up with no address space associated, so a subsequent read/write returns 0/EOF too, just like if the whole process had execed or exited. This is the case in question that GDB does not handle. Oleg Nesterov gave this suggestion as workaround for that race: gdb can open(/proc/pid/mem) and then read (say) /proc/pid/statm. If statm reports something non-zero, then open() was "successfull". I think that might work. However, I didn't try it, because I realized we have another nasty race that that wouldn't fix. The other race I realized is that because we close/reopen the /proc/PID/task/LWP/mem file when GDB switches to a different inferior, then it can happen that GDB reopens /proc/PID/task/LWP/mem just after a thread execs, and before GDB has seen the corresponding exec event. I.e., we can open a /proc/PID/task/LWP/mem file accessing the post-exec address space thinking we're accessing the pre-exec address space. A few months back, Simon, Oleg and I discussed a similar race: [Bug gdb/26754] Race condition when resuming threads and one does an exec https://sourceware.org/bugzilla/show_bug.cgi?id=26754 The solution back then was to make the kernel fail any ptrace operation until the exec event is consumed, with this kernel commit: commit dbb5afad100a828c97e012c6106566d99f041db6 Author: Oleg Nesterov <oleg@redhat.com> AuthorDate: Wed May 12 15:33:08 2021 +0200 Commit: Linus Torvalds <torvalds@linux-foundation.org> CommitDate: Wed May 12 10:45:22 2021 -0700 ptrace: make ptrace() fail if the tracee changed its pid unexpectedly This however, only applies to ptrace, not to the /proc/pid/mem file opening case. Also, even if it did apply to the file open case, we would want to support current kernels until such a fix is more wide spread anyhow. So all in all, this commit gives up on the idea of only ever keeping one /proc/pid/mem file descriptor open. Instead, make GDB open a /proc/pid/mem per inferior, and keep it open until the inferior exits, is detached or execs. Make GDB open the file right after the inferior is created or is attached to or forks, at which point we know the inferior is stable and stopped and isn't thus going to exec, or have a thread exit, and so the file open won't fail (unless the whole process is SIGKILLed from outside GDB, at which point it doesn't matter whether we open the file). This way, we avoid both races described above, at the expense of using more file descriptors (one per inferior). Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=28065 Change-Id: Iff943b95126d0f98a7973a07e989e4f020c29419
166 lines
4.7 KiB
Text
166 lines
4.7 KiB
Text
# Copyright (C) 2021 Free Software Foundation, Inc.
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
# Test that we can access memory while all the threads of the inferior
|
|
# are running, and even if:
|
|
#
|
|
# - the leader thread exits
|
|
# - the selected thread exits
|
|
#
|
|
# This test constantly spawns short lived threads to make sure that on
|
|
# systems with debug APIs that require passing down a specific thread
|
|
# to work with (e.g., GNU/Linux ptrace and /proc filesystem), GDB
|
|
# copes with accessing memory just while the thread it is accessing
|
|
# memory through exits.
|
|
#
|
|
# The test spawns two processes and alternates memory accesses between
|
|
# them to force flushing per-process caches. When the testcase was
|
|
# originally written, the Linux backend would access inferior memory
|
|
# via /proc/PID/mem, and kept one such file open, as a cache.
|
|
# Alternating inferiors would force re-opening such file for a
|
|
# different process, which would fail if GDB tried to open the file
|
|
# for a thread that exited. The test thus ensured those reopen/fail
|
|
# code paths were exercised. Nowadays, GDB keeps one /proc/PID/mem
|
|
# file open per inferior.
|
|
|
|
standard_testfile
|
|
|
|
if {[build_executable "failed to prepare" $testfile $srcfile {debug pthreads}] == -1} {
|
|
return -1
|
|
}
|
|
|
|
# The test proper. NON_STOP indicates whether we're testing in
|
|
# non-stop, or all-stop mode.
|
|
|
|
proc test { non_stop } {
|
|
global binfile
|
|
global gdb_prompt
|
|
global GDBFLAGS
|
|
|
|
save_vars { GDBFLAGS } {
|
|
append GDBFLAGS " -ex \"set non-stop $non_stop\""
|
|
clean_restart ${binfile}
|
|
}
|
|
|
|
if ![runto_main] {
|
|
return -1
|
|
}
|
|
|
|
# If debugging with target remote, check whether the all-stop variant
|
|
# of the RSP is being used. If so, we can't run the background tests.
|
|
if {!$non_stop
|
|
&& [target_info exists gdb_protocol]
|
|
&& ([target_info gdb_protocol] == "remote"
|
|
|| [target_info gdb_protocol] == "extended-remote")} {
|
|
|
|
gdb_test_multiple "maint show target-non-stop" "" {
|
|
-wrap -re "(is|currently) on.*" {
|
|
}
|
|
-wrap -re "(is|currently) off.*" {
|
|
unsupported "can't issue commands while target is running"
|
|
return 0
|
|
}
|
|
}
|
|
}
|
|
|
|
delete_breakpoints
|
|
|
|
# Start the second inferior.
|
|
with_test_prefix "second inferior" {
|
|
gdb_test "add-inferior -no-connection" "New inferior 2.*"
|
|
gdb_test "inferior 2" "Switching to inferior 2 .*"
|
|
|
|
gdb_load $binfile
|
|
|
|
if ![runto_main] {
|
|
return -1
|
|
}
|
|
}
|
|
|
|
delete_breakpoints
|
|
|
|
# These put too much noise in the logs.
|
|
gdb_test_no_output "set print thread-events off"
|
|
|
|
# Continue all threads of both processes.
|
|
gdb_test_no_output "set schedule-multiple on"
|
|
if {$non_stop == "off"} {
|
|
set cmd "continue &"
|
|
} else {
|
|
set cmd "continue -a &"
|
|
}
|
|
gdb_test_multiple $cmd "continuing" {
|
|
-re "Continuing\.\r\n$gdb_prompt " {
|
|
pass $gdb_test_name
|
|
}
|
|
}
|
|
|
|
# Like gdb_test, but:
|
|
# - don't issue a pass on success.
|
|
# - on failure, clear the ok variable in the calling context, and
|
|
# break it.
|
|
proc my_gdb_test {cmd pattern message} {
|
|
upvar inf inf
|
|
upvar iter iter
|
|
if {[gdb_test_multiple $cmd "access mem ($message, inf=$inf, iter=$iter)" {
|
|
-wrap -re $pattern {
|
|
}
|
|
}] != 0} {
|
|
uplevel 1 {set ok 0}
|
|
return -code break
|
|
}
|
|
}
|
|
|
|
# Hammer away for 5 seconds, alternating between inferiors.
|
|
set ::done 0
|
|
after 5000 { set ::done 1 }
|
|
|
|
set inf 1
|
|
set ok 1
|
|
set iter 0
|
|
while {!$::done && $ok} {
|
|
incr iter
|
|
verbose -log "xxxxx: iteration $iter"
|
|
gdb_test "info threads" ".*" ""
|
|
|
|
if {$inf == 1} {
|
|
set inf 2
|
|
} else {
|
|
set inf 1
|
|
}
|
|
|
|
my_gdb_test "inferior $inf" ".*" "inferior $inf"
|
|
|
|
my_gdb_test "print global_var = 555" " = 555" \
|
|
"write to global_var"
|
|
my_gdb_test "print global_var" " = 555" \
|
|
"print global_var after writing"
|
|
my_gdb_test "print global_var = 333" " = 333" \
|
|
"write to global_var again"
|
|
my_gdb_test "print global_var" " = 333" \
|
|
"print global_var after writing again"
|
|
}
|
|
|
|
if {$ok} {
|
|
pass "access mem"
|
|
}
|
|
}
|
|
|
|
foreach non_stop { "off" "on" } {
|
|
set stop_mode [expr ($non_stop=="off")?"all-stop":"non-stop"]
|
|
with_test_prefix "$stop_mode" {
|
|
test $non_stop
|
|
}
|
|
}
|