Minor robustness tweaks.

Use the alternate stack specified by sigaltstack (if there is one).

Use tgkill instead of kill to ensure that we send the signal to
precisely the right thread.

Added timeout logic.

Change-Id: I72922137d1bc0c8becff98244926bde09fe3990e
diff --git a/libcorkscrew/backtrace.c b/libcorkscrew/backtrace.c
index f9a49ec..857b741 100644
--- a/libcorkscrew/backtrace.c
+++ b/libcorkscrew/backtrace.c
@@ -85,9 +85,13 @@
 }
 
 #ifdef CORKSCREW_HAVE_ARCH
+static const int32_t STATE_DUMPING = -1;
+static const int32_t STATE_DONE = -2;
+static const int32_t STATE_CANCEL = -3;
+
 static pthread_mutex_t g_unwind_signal_mutex = PTHREAD_MUTEX_INITIALIZER;
 static volatile struct {
-    int32_t tid;
+    int32_t tid_state;
     const map_info_t* map_info_list;
     backtrace_frame_t* backtrace;
     size_t ignore_depth;
@@ -96,22 +100,23 @@
 } g_unwind_signal_state;
 
 static void unwind_backtrace_thread_signal_handler(int n, siginfo_t* siginfo, void* sigcontext) {
-    int32_t tid = android_atomic_acquire_load(&g_unwind_signal_state.tid);
-    if (tid == gettid()) {
+    if (!android_atomic_acquire_cas(gettid(), STATE_DUMPING, &g_unwind_signal_state.tid_state)) {
         g_unwind_signal_state.returned_frames = unwind_backtrace_signal_arch(
                 siginfo, sigcontext,
                 g_unwind_signal_state.map_info_list,
                 g_unwind_signal_state.backtrace,
                 g_unwind_signal_state.ignore_depth,
                 g_unwind_signal_state.max_depth);
-        android_atomic_release_store(-1, &g_unwind_signal_state.tid);
+        android_atomic_release_store(STATE_DONE, &g_unwind_signal_state.tid_state);
     } else {
         ALOGV("Received spurious SIGURG on thread %d that was intended for thread %d.",
-                gettid(), tid);
+                gettid(), android_atomic_acquire_load(&g_unwind_signal_state.tid_state));
     }
 }
 #endif
 
+extern int tgkill(int tgid, int tid, int sig);
+
 ssize_t unwind_backtrace_thread(pid_t tid, backtrace_frame_t* backtrace,
         size_t ignore_depth, size_t max_depth) {
     if (tid == gettid()) {
@@ -125,7 +130,7 @@
     struct sigaction oact;
     memset(&act, 0, sizeof(act));
     act.sa_sigaction = unwind_backtrace_thread_signal_handler;
-    act.sa_flags = SA_RESTART | SA_SIGINFO;
+    act.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
     sigemptyset(&act.sa_mask);
 
     pthread_mutex_lock(&g_unwind_signal_mutex);
@@ -138,16 +143,51 @@
         g_unwind_signal_state.ignore_depth = ignore_depth;
         g_unwind_signal_state.max_depth = max_depth;
         g_unwind_signal_state.returned_frames = 0;
-        android_atomic_release_store(tid, &g_unwind_signal_state.tid);
+        android_atomic_release_store(tid, &g_unwind_signal_state.tid_state);
 
-        if (kill(tid, SIGURG)) {
+        // Signal the specific thread that we want to dump.
+        int32_t tid_state = tid;
+        if (tgkill(getpid(), tid, SIGURG)) {
             ALOGV("Failed to send SIGURG to thread %d.", tid);
-            android_atomic_release_store(-1, &g_unwind_signal_state.tid);
         } else {
-            while (android_atomic_acquire_load(&g_unwind_signal_state.tid) == tid) {
-                ALOGV("Waiting for response from thread %d...", tid);
-                usleep(1000);
+            // Wait for the other thread to start dumping the stack, or time out.
+            int wait_millis = 250;
+            for (;;) {
+                tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+                if (tid_state != tid) {
+                    break;
+                }
+                if (wait_millis--) {
+                    ALOGV("Waiting for thread %d to start dumping the stack...", tid);
+                    usleep(1000);
+                } else {
+                    ALOGV("Timed out waiting for thread %d to start dumping the stack.", tid);
+                    break;
+                }
             }
+        }
+
+        // Try to cancel the dump if it has not started yet.
+        if (tid_state == tid) {
+            if (!android_atomic_acquire_cas(tid, STATE_CANCEL, &g_unwind_signal_state.tid_state)) {
+                ALOGV("Canceled thread %d stack dump.", tid);
+                tid_state = STATE_CANCEL;
+            } else {
+                tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+            }
+        }
+
+        // Wait indefinitely for the dump to finish or be canceled.
+        // We cannot apply a timeout here because the other thread is accessing state that
+        // is owned by this thread, such as milist.  It should not take very
+        // long to take the dump once started.
+        while (tid_state == STATE_DUMPING) {
+            ALOGV("Waiting for thread %d to finish dumping the stack...", tid);
+            usleep(1000);
+            tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+        }
+
+        if (tid_state == STATE_DONE) {
             frames = g_unwind_signal_state.returned_frames;
         }