Fix failure to detach if process exits while detaching on Linux

This commit fixes detaching on Linux when some thread exits the whole
thread group (process) just while we're detaching.

On Linux, a ptracer must detach from each LWP individually, with
PTRACE_DETACH.  Since PTRACE_DETACH sets the thread running free, if
one of the already-detached threads causes the whole thread group to
exit (e.g., simply calls exit), the kernel force-kills the other
threads in the group, making them zombie, just as we're still
detaching them.  Since PTRACE_DETACH against a zombie thread fails
with ESRCH, and gdb/gdbserver are not expecting this, the detach fails
with an error like: "Can't detach process: No such process.".

This patch detects this detach failure as normal, and instead of
erroring out, reaps the now-dead thread.

New test included, that exercises several different scenarios that
cause GDB/GDBserver to error out when it should not.

Tested on x86-64 GNU/Linux with {unix, native-gdbserver,
native-extended-gdbserver}

Note: without the previous fix, the "single-process + continue"
variant of the new test would fail with:

 (gdb) PASS: gdb.threads/process-dies-while-detaching.exp: single-process: continue: watchpoint: switch to parent
 continue
 Continuing.
 Warning:
 Could not insert hardware watchpoint 3.
 Could not insert hardware breakpoints:
 You may have requested too many hardware breakpoints/watchpoints.

 Command aborted.
 (gdb) FAIL: gdb.threads/process-dies-while-detaching.exp: single-process: continue: watchpoint: continue

gdb/gdbserver/ChangeLog:
2016-07-01  Pedro Alves  <palves@redhat.com>
	    Antoine Tremblay  <antoine.tremblay@ericsson.com>

	* linux-low.c: Change interface to take the target lwp_info
	pointer directly and return void.  Handle detaching from a zombie
	thread.
	(linux_detach_lwp_callback): New function.
	(linux_detach): Detach from the leader thread after detaching from
	the clone threads.

gdb/ChangeLog:
2016-07-01  Pedro Alves  <palves@redhat.com>
	    Antoine Tremblay  <antoine.tremblay@ericsson.com>

	* inf-ptrace.c (inf_ptrace_detach_success): New function, factored
	out from ...
	(inf_ptrace_detach): ... here.
	* inf-ptrace.h (inf_ptrace_detach_success): New declaration.
	* linux-nat.c (get_pending_status): Rename to ...
	(get_detach_signal): ... this, and return a host signal instead of
	filling in a wait status.
	(detach_one_lwp): New function, factored out from detach_callback
	and adjusted to handle detaching from a zombie thread.
	(detach_callback): Skip the leader thread.
	(linux_nat_detach): No longer defer to inf_ptrace_detach to detach
	the leader thread, nor build a signal string to pass down.
	Instead, use target_announce_detach, detach_one_lwp and
	inf_ptrace_detach_success.

gdb/testsuite/ChangeLog:
2016-07-01  Pedro Alves  <palves@redhat.com>
	    Antoine Tremblay  <antoine.tremblay@ericsson.com>

	* gdb.threads/process-dies-while-detaching.c: New file.
	* gdb.threads/process-dies-while-detaching.exp: New file.
This commit is contained in:
Pedro Alves 2016-07-01 11:16:33 +01:00
parent 6300088845
commit ced2dffbf1
9 changed files with 690 additions and 71 deletions

View file

@ -820,6 +820,7 @@ linux_nat_pass_signals (struct target_ops *self,
static int stop_wait_callback (struct lwp_info *lp, void *data);
static char *linux_child_pid_to_exec_file (struct target_ops *self, int pid);
static int resume_stopped_resumed_lwps (struct lwp_info *lp, void *data);
static int check_ptrace_stopped_lwp_gone (struct lwp_info *lp);
@ -1295,9 +1296,13 @@ linux_nat_attach (struct target_ops *ops, const char *args, int from_tty)
target_async (1);
}
/* Get pending status of LP. */
/* Get pending signal of THREAD as a host signal number, for detaching
purposes. This is the signal the thread last stopped for, which we
need to deliver to the thread when detaching, otherwise, it'd be
suppressed/lost. */
static int
get_pending_status (struct lwp_info *lp, int *status)
get_detach_signal (struct lwp_info *lp)
{
enum gdb_signal signo = GDB_SIGNAL_0;
@ -1350,8 +1355,6 @@ get_pending_status (struct lwp_info *lp, int *status)
}
}
*status = 0;
if (signo == GDB_SIGNAL_0)
{
if (debug_linux_nat)
@ -1370,21 +1373,28 @@ get_pending_status (struct lwp_info *lp, int *status)
}
else
{
*status = W_STOPCODE (gdb_signal_to_host (signo));
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"GPT: lwp %s has pending signal %s\n",
target_pid_to_str (lp->ptid),
gdb_signal_to_string (signo));
return gdb_signal_to_host (signo);
}
return 0;
}
static int
detach_callback (struct lwp_info *lp, void *data)
/* Detach from LP. If SIGNO_P is non-NULL, then it points to the
signal number that should be passed to the LWP when detaching.
Otherwise pass any pending signal the LWP may have, if any. */
static void
detach_one_lwp (struct lwp_info *lp, int *signo_p)
{
int lwpid = ptid_get_lwp (lp->ptid);
int signo;
gdb_assert (lp->status == 0 || WIFSTOPPED (lp->status));
if (debug_linux_nat && lp->status)
@ -1400,36 +1410,83 @@ detach_callback (struct lwp_info *lp, void *data)
"DC: Sending SIGCONT to %s\n",
target_pid_to_str (lp->ptid));
kill_lwp (ptid_get_lwp (lp->ptid), SIGCONT);
kill_lwp (lwpid, SIGCONT);
lp->signalled = 0;
}
/* We don't actually detach from the LWP that has an id equal to the
overall process id just yet. */
if (ptid_get_lwp (lp->ptid) != ptid_get_pid (lp->ptid))
if (signo_p == NULL)
{
int status = 0;
/* Pass on any pending signal for this LWP. */
get_pending_status (lp, &status);
signo = get_detach_signal (lp);
}
else
signo = *signo_p;
/* Preparing to resume may try to write registers, and fail if the
lwp is zombie. If that happens, ignore the error. We'll handle
it below, when detach fails with ESRCH. */
TRY
{
if (linux_nat_prepare_to_resume != NULL)
linux_nat_prepare_to_resume (lp);
errno = 0;
if (ptrace (PTRACE_DETACH, ptid_get_lwp (lp->ptid), 0,
WSTOPSIG (status)) < 0)
error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
safe_strerror (errno));
}
CATCH (ex, RETURN_MASK_ERROR)
{
if (!check_ptrace_stopped_lwp_gone (lp))
throw_exception (ex);
}
END_CATCH
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"PTRACE_DETACH (%s, %s, 0) (OK)\n",
target_pid_to_str (lp->ptid),
strsignal (WSTOPSIG (status)));
if (ptrace (PTRACE_DETACH, lwpid, 0, signo) < 0)
{
int save_errno = errno;
delete_lwp (lp->ptid);
/* We know the thread exists, so ESRCH must mean the lwp is
zombie. This can happen if one of the already-detached
threads exits the whole thread group. In that case we're
still attached, and must reap the lwp. */
if (save_errno == ESRCH)
{
int ret, status;
ret = my_waitpid (lwpid, &status, __WALL);
if (ret == -1)
{
warning (_("Couldn't reap LWP %d while detaching: %s"),
lwpid, strerror (errno));
}
else if (!WIFEXITED (status) && !WIFSIGNALED (status))
{
warning (_("Reaping LWP %d while detaching "
"returned unexpected status 0x%x"),
lwpid, status);
}
}
else
{
error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
safe_strerror (save_errno));
}
}
else if (debug_linux_nat)
{
fprintf_unfiltered (gdb_stdlog,
"PTRACE_DETACH (%s, %s, 0) (OK)\n",
target_pid_to_str (lp->ptid),
strsignal (signo));
}
delete_lwp (lp->ptid);
}
static int
detach_callback (struct lwp_info *lp, void *data)
{
/* We don't actually detach from the thread group leader just yet.
If the thread group exits, we must reap the zombie clone lwps
before we're able to reap the leader. */
if (ptid_get_lwp (lp->ptid) != ptid_get_pid (lp->ptid))
detach_one_lwp (lp, NULL);
return 0;
}
@ -1437,7 +1494,6 @@ static void
linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
{
int pid;
int status;
struct lwp_info *main_lwp;
pid = ptid_get_pid (inferior_ptid);
@ -1459,29 +1515,6 @@ linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
main_lwp = find_lwp_pid (pid_to_ptid (pid));
/* Pass on any pending signal for the last LWP. */
if ((args == NULL || *args == '\0')
&& get_pending_status (main_lwp, &status) != -1
&& WIFSTOPPED (status))
{
char *tem;
/* Put the signal number in ARGS so that inf_ptrace_detach will
pass it along with PTRACE_DETACH. */
tem = (char *) alloca (8);
xsnprintf (tem, 8, "%d", (int) WSTOPSIG (status));
args = tem;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"LND: Sending signal %s to %s\n",
args,
target_pid_to_str (main_lwp->ptid));
}
if (linux_nat_prepare_to_resume != NULL)
linux_nat_prepare_to_resume (main_lwp);
delete_lwp (main_lwp->ptid);
if (forks_exist_p ())
{
/* Multi-fork case. The current inferior_ptid is being detached
@ -1491,7 +1524,24 @@ linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
linux_fork_detach (args, from_tty);
}
else
linux_ops->to_detach (ops, args, from_tty);
{
int signo;
target_announce_detach (from_tty);
/* Pass on any pending signal for the last LWP, unless the user
requested detaching with a different signal (most likely 0,
meaning, discard the signal). */
if (args != NULL)
signo = atoi (args);
else
signo = get_detach_signal (main_lwp);
detach_one_lwp (main_lwp, &signo);
inf_ptrace_detach_success (ops);
}
delete_lwp (main_lwp->ptid);
}
/* Resume execution of the inferior process. If STEP is nonzero,