Make pthread join_state not protected by g_thread_list_lock.

1. Move the representation of thread join_state from pthread.attr.flag
   to pthread.join_state. This clarifies thread state change.
2. Use atomic operations for pthread.join_state. So we don't need to
   protect it by g_thread_list_lock. g_thread_list_lock will be reduced
   to only protect g_thread_list or even removed in further changes.

Bug: 19636317
Change-Id: I31fb143a7c69508c7287307dd3b0776993ec0f43
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index d0d64b0..81cc67b 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -87,9 +87,12 @@
     thread->alternate_signal_stack = NULL;
   }
 
-  bool free_mapped_space = false;
-  pthread_mutex_lock(&g_thread_list_lock);
-  if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
+  ThreadJoinState old_state = THREAD_NOT_JOINED;
+  while (old_state == THREAD_NOT_JOINED &&
+         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
+  }
+
+  if (old_state == THREAD_DETACHED) {
     // The thread is detached, no one will use pthread_internal_t after pthread_exit.
     // So we can free mapped space, which includes pthread_internal_t and thread stack.
     // First make sure that the kernel does not try to clear the tid field
@@ -97,28 +100,25 @@
     __set_tid_address(NULL);
 
     // pthread_internal_t is freed below with stack, not here.
+    pthread_mutex_lock(&g_thread_list_lock);
     _pthread_internal_remove_locked(thread, false);
-    free_mapped_space = true;
-  } else {
-    // Mark the thread as exiting without freeing pthread_internal_t.
-    thread->attr.flags |= PTHREAD_ATTR_FLAG_ZOMBIE;
+    pthread_mutex_unlock(&g_thread_list_lock);
+
+    if (thread->mmap_size != 0) {
+      // We need to free mapped space for detached threads when they exit.
+      // That's not something we can do in C.
+
+      // We don't want to take a signal after we've unmapped the stack.
+      // That's one last thing we can handle in C.
+      sigset_t mask;
+      sigfillset(&mask);
+      sigprocmask(SIG_SETMASK, &mask, NULL);
+
+      _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
+    }
   }
-  pthread_mutex_unlock(&g_thread_list_lock);
 
-  if (free_mapped_space && thread->mmap_size != 0) {
-    // We need to free mapped space for detached threads when they exit.
-    // That's not something we can do in C.
-
-    // We don't want to take a signal after we've unmapped the stack.
-    // That's one last thing we can handle in C.
-    sigset_t mask;
-    sigfillset(&mask);
-    sigprocmask(SIG_SETMASK, &mask, NULL);
-
-    _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
-  } else {
-    // No need to free mapped space. Either there was no space mapped, or it is left for
-    // the pthread_join caller to clean up.
-    __exit(0);
-  }
+  // No need to free mapped space. Either there was no space mapped, or it is left for
+  // the pthread_join caller to clean up.
+  __exit(0);
 }