Better handling of sigset_t on LP32.

The main motivation here is that the sigprocmask in pthread_exit wasn't
actually blocking the real-time signals, and debuggerd (amongst other
things) is using them. I wasn't able to write a test that actually won
that race but I did write an equivalent one for posix_spawn.

This also fixes all the uses of sigset_t where the sigset_t isn't
exposed to the outside (which we can't easily fix because it would be
an ABI change).

Bug: https://issuetracker.google.com/72291624
Test: ran tests
Change-Id: Ib6eebebc5a7b0150079f1cb79593247917dcf750
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 8b4c44e..f1b65fd 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -34,6 +34,7 @@
 #include <sys/mman.h>
 
 #include "private/bionic_defs.h"
+#include "private/ScopedSignalBlocker.h"
 #include "pthread_internal.h"
 
 extern "C" __noreturn void _exit_with_stack_teardown(void*, size_t);
@@ -63,6 +64,12 @@
   }
 }
 
+static void __pthread_unmap_tls(pthread_internal_t* thread) {
+  // Unmap the bionic TLS, including guard pages.
+  void* allocation = reinterpret_cast<char*>(thread->bionic_tls) - PTHREAD_GUARD_SIZE;
+  munmap(allocation, BIONIC_TLS_SIZE + 2 * PTHREAD_GUARD_SIZE);
+}
+
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
 void pthread_exit(void* return_value) {
   // Call dtors for thread_local objects first.
@@ -96,10 +103,6 @@
     thread->alternate_signal_stack = NULL;
   }
 
-  // Unmap the bionic TLS, including guard pages.
-  void* allocation = reinterpret_cast<char*>(thread->bionic_tls) - PTHREAD_GUARD_SIZE;
-  munmap(allocation, BIONIC_TLS_SIZE + 2 * PTHREAD_GUARD_SIZE);
-
   ThreadJoinState old_state = THREAD_NOT_JOINED;
   while (old_state == THREAD_NOT_JOINED &&
          !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
@@ -120,16 +123,15 @@
       // That's not something we can do in C.
 
       // We don't want to take a signal after we've unmapped the stack.
-      // That's one last thing we can handle in C.
-      sigset_t mask;
-      sigfillset(&mask);
-      sigprocmask(SIG_SETMASK, &mask, NULL);
-
+      // That's one last thing we can do before dropping to assembler.
+      ScopedSignalBlocker ssb;
+      __pthread_unmap_tls(thread);
       _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
     }
   }
 
   // No need to free mapped space. Either there was no space mapped, or it is left for
   // the pthread_join caller to clean up.
+  __pthread_unmap_tls(thread);
   __exit(0);
 }