Name stack+tls VMAs with PR_SET_VMA_ANON_NAME

Bionic creates a single thread mapping to hold a thread's stack and static
TLS memory. Use PR_SET_VMA_ANON_NAME to name this region
"stack_and_tls:tid". dumpsys meminfo can report this region as "Stack"
memory.

The main thread's memory is instead named stack_and_tls:main, and the VMA
is renamed to stack_and_tls:main in a post-fork child.

For the main thread, and threads using pthread_attr_setstack, Bionic still
creates the stack_and_tls mapping, but it only has TLS memory in it.

Bug: http://b/134795155
Test: run "dumpsys meminfo" and verify that this CL increases the reported
  stack memory usage from about 4MB to 21MB.
Change-Id: Id1f39ff083329e83426130b4ef94222ffacb90ae
Merged-In: Id1f39ff083329e83426130b4ef94222ffacb90ae
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 4cf14ad..0dbf539 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -254,6 +254,8 @@
   ThreadMapping result = {};
   result.mmap_base = space;
   result.mmap_size = mmap_size;
+  result.mmap_base_unguarded = space + stack_guard_size;
+  result.mmap_size_unguarded = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
   result.static_tls = space + mmap_size - PTHREAD_GUARD_SIZE - layout.size();
   result.stack_base = space;
   result.stack_top = result.static_tls;
@@ -315,12 +317,33 @@
   thread->attr = *attr;
   thread->mmap_base = mapping.mmap_base;
   thread->mmap_size = mapping.mmap_size;
+  thread->mmap_base_unguarded = mapping.mmap_base_unguarded;
+  thread->mmap_size_unguarded = mapping.mmap_size_unguarded;
 
   *tcbp = tcb;
   *child_stack = stack_top;
   return 0;
 }
 
+void __set_stack_and_tls_vma_name(bool is_main_thread) {
+  // Name the thread's stack-and-tls area to help with debugging. This mapped area also includes
+  // static TLS data, which is typically a few pages (e.g. bionic_tls).
+  pthread_internal_t* thread = __get_thread();
+  const char* name;
+  if (is_main_thread) {
+    name = "stack_and_tls:main";
+  } else {
+    // The kernel doesn't copy the name string, but this variable will last at least as long as the
+    // mapped area. The mapped area's VMAs are unmapped with a single call to munmap.
+    auto& name_buffer = thread->vma_name_buffer;
+    static_assert(arraysize(name_buffer) >= arraysize("stack_and_tls:") + 11 + 1);
+    async_safe_format_buffer(name_buffer, arraysize(name_buffer), "stack_and_tls:%d", thread->tid);
+    name = name_buffer;
+  }
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, thread->mmap_base_unguarded, thread->mmap_size_unguarded,
+        name);
+}
+
 __attribute__((no_sanitize("hwaddress")))
 static int __pthread_start(void* arg) {
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
@@ -333,6 +356,7 @@
   // accesses previously made by the creating thread are visible to us.
   thread->startup_handshake_lock.lock();
 
+  __set_stack_and_tls_vma_name(false);
   __init_additional_stacks(thread);
 
   void* result = thread->start_routine(thread->start_routine_arg);