Allocate thread local buffers in __init_tls.

Thread local buffers were using pthread_setspecific for storage with
lazy initialization. pthread_setspecific shares TLS slots between the
linker and libc.so, so thread local buffers being initialized in a
different order between libc.so and the linker meant that bad things
would happen (manifesting as snprintf not working because the
locale was mangled)

Bug: http://b/20464031
Test: /data/nativetest64/bionic-unit-tests/bionic-unit-tests
      everything passes
Test: /data/nativetest/bionic-unit-tests/bionic-unit-tests
      thread_local tests are failing both before and after (KUSER_HELPERS?)
Test: /data/nativetest64/bionic-unit-tests-static/bionic-unit-tests-static
      no additional failures
Change-Id: I9f445a77c6e86979f3fa49c4a5feecf6ec2b0c3f
diff --git a/libc/bionic/grp_pwd.cpp b/libc/bionic/grp_pwd.cpp
index e99eaca..5d565c4 100644
--- a/libc/bionic/grp_pwd.cpp
+++ b/libc/bionic/grp_pwd.cpp
@@ -39,9 +39,9 @@
 
 #include "private/android_filesystem_config.h"
 #include "private/bionic_macros.h"
+#include "private/grp_pwd.h"
 #include "private/ErrnoRestorer.h"
 #include "private/libc_logging.h"
-#include "private/ThreadLocalBuffer.h"
 
 // Generated android_ids array
 #include "generated_android_ids.h"
@@ -52,25 +52,14 @@
 // okay for all the <grp.h> functions to share state, and all the <passwd.h>
 // functions to share state, but <grp.h> functions can't clobber <passwd.h>
 // functions' state and vice versa.
+#include "bionic/pthread_internal.h"
+static group_state_t* get_group_tls_buffer() {
+  return &__get_bionic_tls().group;
+}
 
-struct group_state_t {
-  group group_;
-  char* group_members_[2];
-  char group_name_buffer_[32];
-  // Must be last so init_group_state can run a simple memset for the above
-  ssize_t getgrent_idx;
-};
-
-struct passwd_state_t {
-  passwd passwd_;
-  char name_buffer_[32];
-  char dir_buffer_[32];
-  char sh_buffer_[32];
-  ssize_t getpwent_idx;
-};
-
-static ThreadLocalBuffer<group_state_t> g_group_tls_buffer;
-static ThreadLocalBuffer<passwd_state_t> g_passwd_tls_buffer;
+static passwd_state_t* get_passwd_tls_buffer() {
+  return &__get_bionic_tls().passwd;
+}
 
 static void init_group_state(group_state_t* state) {
   memset(state, 0, sizeof(group_state_t) - sizeof(state->getgrent_idx));
@@ -78,7 +67,7 @@
 }
 
 static group_state_t* __group_state() {
-  group_state_t* result = g_group_tls_buffer.get();
+  group_state_t* result = get_group_tls_buffer();
   if (result != nullptr) {
     init_group_state(result);
   }
@@ -432,7 +421,7 @@
 }
 
 passwd* getpwuid(uid_t uid) { // NOLINT: implementing bad function.
-  passwd_state_t* state = g_passwd_tls_buffer.get();
+  passwd_state_t* state = get_passwd_tls_buffer();
   if (state == NULL) {
     return NULL;
   }
@@ -450,7 +439,7 @@
 }
 
 passwd* getpwnam(const char* login) { // NOLINT: implementing bad function.
-  passwd_state_t* state = g_passwd_tls_buffer.get();
+  passwd_state_t* state = get_passwd_tls_buffer();
   if (state == NULL) {
     return NULL;
   }
@@ -483,7 +472,7 @@
 }
 
 void setpwent() {
-  passwd_state_t* state = g_passwd_tls_buffer.get();
+  passwd_state_t* state = get_passwd_tls_buffer();
   if (state) {
     state->getpwent_idx = 0;
   }
@@ -494,7 +483,7 @@
 }
 
 passwd* getpwent() {
-  passwd_state_t* state = g_passwd_tls_buffer.get();
+  passwd_state_t* state = get_passwd_tls_buffer();
   if (state == NULL) {
     return NULL;
   }
@@ -608,7 +597,7 @@
 }
 
 void setgrent() {
-  group_state_t* state = g_group_tls_buffer.get();
+  group_state_t* state = get_group_tls_buffer();
   if (state) {
     state->getgrent_idx = 0;
   }
@@ -619,7 +608,7 @@
 }
 
 group* getgrent() {
-  group_state_t* state = g_group_tls_buffer.get();
+  group_state_t* state = get_group_tls_buffer();
   if (state == NULL) {
     return NULL;
   }
diff --git a/libc/bionic/libgen.cpp b/libc/bionic/libgen.cpp
index c415c0f..33b46a1 100644
--- a/libc/bionic/libgen.cpp
+++ b/libc/bionic/libgen.cpp
@@ -34,10 +34,7 @@
 #include <sys/cdefs.h>
 #include <sys/param.h>
 
-#include "private/ThreadLocalBuffer.h"
-
-static ThreadLocalBuffer<char, MAXPATHLEN> g_basename_tls_buffer;
-static ThreadLocalBuffer<char, MAXPATHLEN> g_dirname_tls_buffer;
+#include "bionic/pthread_internal.h"
 
 static int __basename_r(const char* path, char* buffer, size_t buffer_size) {
   const char* startp = NULL;
@@ -161,13 +158,13 @@
 }
 
 char* basename(const char* path) {
-  char* buf = g_basename_tls_buffer.get();
-  int rc = __basename_r(path, buf, g_basename_tls_buffer.size());
+  char* buf = __get_bionic_tls().basename_buf;
+  int rc = __basename_r(path, buf, sizeof(__get_bionic_tls().basename_buf));
   return (rc < 0) ? NULL : buf;
 }
 
 char* dirname(const char* path) {
-  char* buf = g_dirname_tls_buffer.get();
-  int rc = __dirname_r(path, buf, g_dirname_tls_buffer.size());
+  char* buf = __get_bionic_tls().dirname_buf;
+  int rc = __dirname_r(path, buf, sizeof(__get_bionic_tls().dirname_buf));
   return (rc < 0) ? NULL : buf;
 }
diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
index 113118d..38e15b7 100644
--- a/libc/bionic/locale.cpp
+++ b/libc/bionic/locale.cpp
@@ -37,6 +37,8 @@
 
 #include "private/bionic_macros.h"
 
+#include "bionic/pthread_internal.h"
+
 // We only support two locales, the "C" locale (also known as "POSIX"),
 // and the "C.UTF-8" locale (also known as "en_US.UTF-8").
 
@@ -161,17 +163,9 @@
   return const_cast<char*>(__bionic_current_locale_is_utf8 ? "C.UTF-8" : "C");
 }
 
-// We can't use a constructor to create g_uselocal_key, because it may be used in constructors.
-static pthread_once_t g_uselocale_once = PTHREAD_ONCE_INIT;
-static pthread_key_t g_uselocale_key;
-
-static void g_uselocale_key_init() {
-  pthread_key_create(&g_uselocale_key, NULL);
-}
-
 locale_t uselocale(locale_t new_locale) {
-  pthread_once(&g_uselocale_once, g_uselocale_key_init);
-  locale_t old_locale = static_cast<locale_t>(pthread_getspecific(g_uselocale_key));
+  locale_t* locale_storage = &__get_bionic_tls().locale;
+  locale_t old_locale = *locale_storage;
 
   // If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
   if (old_locale == NULL) {
@@ -179,7 +173,7 @@
   }
 
   if (new_locale != NULL) {
-    pthread_setspecific(g_uselocale_key, new_locale);
+    *locale_storage = new_locale;
   }
 
   return old_locale;
diff --git a/libc/bionic/mntent.cpp b/libc/bionic/mntent.cpp
index 994b84d..92284ce 100644
--- a/libc/bionic/mntent.cpp
+++ b/libc/bionic/mntent.cpp
@@ -29,15 +29,11 @@
 #include <mntent.h>
 #include <string.h>
 
-#include "private/ThreadLocalBuffer.h"
-
-static ThreadLocalBuffer<mntent> g_getmntent_mntent_tls_buffer;
-static ThreadLocalBuffer<char, BUFSIZ> g_getmntent_strings_tls_buffer;
+#include "bionic/pthread_internal.h"
 
 mntent* getmntent(FILE* fp) {
-  return getmntent_r(fp, g_getmntent_mntent_tls_buffer.get(),
-                     g_getmntent_strings_tls_buffer.get(),
-                     g_getmntent_strings_tls_buffer.size());
+  auto& tls = __get_bionic_tls();
+  return getmntent_r(fp, &tls.mntent_buf, tls.mntent_strings, sizeof(tls.mntent_strings));
 }
 
 mntent* getmntent_r(FILE* fp, struct mntent* e, char* buf, int buf_len) {
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index ab92853..f591c86 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -55,6 +55,18 @@
   // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
   thread->tls[TLS_SLOT_SELF] = thread->tls;
   thread->tls[TLS_SLOT_THREAD_ID] = thread;
+
+  // Add a guard page before and after.
+  size_t allocation_size = BIONIC_TLS_SIZE + 2 * PAGE_SIZE;
+  void* allocation = mmap(nullptr, allocation_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (allocation == MAP_FAILED) {
+    __libc_fatal("failed to allocate TLS");
+  }
+
+  thread->bionic_tls = reinterpret_cast<bionic_tls*>(static_cast<char*>(allocation) + PAGE_SIZE);
+  if (mprotect(thread->bionic_tls, BIONIC_TLS_SIZE, PROT_READ | PROT_WRITE) != 0) {
+    __libc_fatal("failed to mprotect TLS");
+  }
 }
 
 void __init_thread_stack_guard(pthread_internal_t* thread) {
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index 5819bc1..2bc2bfb 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -86,6 +86,10 @@
 }
 
 static void __pthread_internal_free(pthread_internal_t* thread) {
+  // Unmap the TLS, including guard pages.
+  void* allocation = reinterpret_cast<char*>(thread->bionic_tls) - PAGE_SIZE;
+  munmap(allocation, BIONIC_TLS_SIZE + 2 * PAGE_SIZE);
+
   if (thread->mmap_size != 0) {
     // Free mapped space, including thread stack and pthread_internal_t.
     munmap(thread->attr.stack_base, thread->mmap_size);
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index d2abea0..b170299 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -110,6 +110,8 @@
    */
 #define __BIONIC_DLERROR_BUFFER_SIZE 512
   char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
+
+  bionic_tls* bionic_tls;
 };
 
 __LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
@@ -133,6 +135,10 @@
   return nullptr;
 }
 
+static inline __always_inline bionic_tls& __get_bionic_tls() {
+  return *__get_thread()->bionic_tls;
+}
+
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
 
 #if defined(__LP64__)
diff --git a/libc/bionic/pty.cpp b/libc/bionic/pty.cpp
index d699ff5..bdabf36 100644
--- a/libc/bionic/pty.cpp
+++ b/libc/bionic/pty.cpp
@@ -36,10 +36,7 @@
 #include <unistd.h>
 #include <utmp.h>
 
-#include "private/ThreadLocalBuffer.h"
-
-static ThreadLocalBuffer<char, 32> g_ptsname_tls_buffer;
-static ThreadLocalBuffer<char, 64> g_ttyname_tls_buffer;
+#include "bionic/pthread_internal.h"
 
 int getpt() {
   return posix_openpt(O_RDWR|O_NOCTTY);
@@ -54,8 +51,9 @@
 }
 
 char* ptsname(int fd) {
-  char* buf = g_ptsname_tls_buffer.get();
-  int error = ptsname_r(fd, buf, g_ptsname_tls_buffer.size());
+  bionic_tls& tls = __get_bionic_tls();
+  char* buf = tls.ptsname_buf;
+  int error = ptsname_r(fd, buf, sizeof(tls.ptsname_buf));
   return (error == 0) ? buf : NULL;
 }
 
@@ -80,8 +78,9 @@
 }
 
 char* ttyname(int fd) {
-  char* buf = g_ttyname_tls_buffer.get();
-  int error = ttyname_r(fd, buf, g_ttyname_tls_buffer.size());
+  bionic_tls& tls = __get_bionic_tls();
+  char* buf = tls.ttyname_buf;
+  int error = ttyname_r(fd, buf, sizeof(tls.ttyname_buf));
   return (error == 0) ? buf : NULL;
 }
 
diff --git a/libc/bionic/strerror.cpp b/libc/bionic/strerror.cpp
index f74194f..99692ca 100644
--- a/libc/bionic/strerror.cpp
+++ b/libc/bionic/strerror.cpp
@@ -27,12 +27,11 @@
  */
 
 #include <string.h>
-#include "private/ThreadLocalBuffer.h"
+
+#include "bionic/pthread_internal.h"
 
 extern "C" const char* __strerror_lookup(int);
 
-static ThreadLocalBuffer<char, NL_TEXTMAX> g_strerror_tls_buffer;
-
 char* strerror(int error_number) {
   // Just return the original constant in the easy cases.
   char* result = const_cast<char*>(__strerror_lookup(error_number));
@@ -40,7 +39,8 @@
     return result;
   }
 
-  result = g_strerror_tls_buffer.get();
-  strerror_r(error_number, result, g_strerror_tls_buffer.size());
+  bionic_tls& tls = __get_bionic_tls();
+  result = tls.strerror_buf;
+  strerror_r(error_number, result, sizeof(tls.strerror_buf));
   return result;
 }
diff --git a/libc/bionic/strsignal.cpp b/libc/bionic/strsignal.cpp
index c389ddd..81a8f95 100644
--- a/libc/bionic/strsignal.cpp
+++ b/libc/bionic/strsignal.cpp
@@ -27,13 +27,12 @@
  */
 
 #include <string.h>
-#include "private/ThreadLocalBuffer.h"
+
+#include "bionic/pthread_internal.h"
 
 extern "C" const char* __strsignal_lookup(int);
 extern "C" const char* __strsignal(int, char*, size_t);
 
-static ThreadLocalBuffer<char, NL_TEXTMAX> g_strsignal_tls_buffer;
-
 char* strsignal(int signal_number) {
   // Just return the original constant in the easy cases.
   char* result = const_cast<char*>(__strsignal_lookup(signal_number));
@@ -41,6 +40,6 @@
     return result;
   }
 
-  return const_cast<char*>(__strsignal(signal_number, g_strsignal_tls_buffer.get(),
-                                       g_strsignal_tls_buffer.size()));
+  bionic_tls& tls = __get_bionic_tls();
+  return const_cast<char*>(__strsignal(signal_number, tls.strsignal_buf, sizeof(tls.strsignal_buf)));
 }
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index c61e2ff..852b9ae 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -29,10 +29,15 @@
 #ifndef __BIONIC_PRIVATE_BIONIC_TLS_H_
 #define __BIONIC_PRIVATE_BIONIC_TLS_H_
 
+#include <locale.h>
+#include <mntent.h>
+#include <stdio.h>
 #include <sys/cdefs.h>
+#include <sys/param.h>
 
 #include "bionic_macros.h"
 #include "__get_tls.h"
+#include "grp_pwd.h"
 
 __BEGIN_DECLS
 
@@ -77,6 +82,28 @@
   BIONIC_TLS_SLOTS // Must come last!
 };
 
+// ~3 pages.
+struct bionic_tls {
+  locale_t locale;
+
+  char basename_buf[MAXPATHLEN];
+  char dirname_buf[MAXPATHLEN];
+
+  mntent mntent_buf;
+  char mntent_strings[BUFSIZ];
+
+  char ptsname_buf[32];
+  char ttyname_buf[64];
+
+  char strerror_buf[NL_TEXTMAX];
+  char strsignal_buf[NL_TEXTMAX];
+
+  group_state_t group;
+  passwd_state_t passwd;
+};
+
+#define BIONIC_TLS_SIZE (BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE))
+
 /*
  * Bionic uses some pthread keys internally. All pthread keys used internally
  * should be created in constructors, except for keys that may be used in or
@@ -86,22 +113,10 @@
  * pthread_test should fail if we forget.
  *
  * These are the pthread keys currently used internally by libc:
- *
- *  basename               libc (ThreadLocalBuffer)
- *  dirname                libc (ThreadLocalBuffer)
- *  uselocale              libc (can be used in constructors)
- *  getmntent_mntent       libc (ThreadLocalBuffer)
- *  getmntent_strings      libc (ThreadLocalBuffer)
- *  ptsname                libc (ThreadLocalBuffer)
- *  ttyname                libc (ThreadLocalBuffer)
- *  strerror               libc (ThreadLocalBuffer)
- *  strsignal              libc (ThreadLocalBuffer)
- *  passwd                 libc (ThreadLocalBuffer)
- *  group                  libc (ThreadLocalBuffer)
  *  _res_key               libc (constructor in BSD code)
  */
 
-#define LIBC_PTHREAD_KEY_RESERVED_COUNT 12
+#define LIBC_PTHREAD_KEY_RESERVED_COUNT 1
 
 /* Internally, jemalloc uses a single key for per thread data. */
 #define JEMALLOC_PTHREAD_KEY_RESERVED_COUNT 1
diff --git a/libc/private/ThreadLocalBuffer.h b/libc/private/grp_pwd.h
similarity index 60%
rename from libc/private/ThreadLocalBuffer.h
rename to libc/private/grp_pwd.h
index 5e43665..e1aff4f 100644
--- a/libc/private/ThreadLocalBuffer.h
+++ b/libc/private/grp_pwd.h
@@ -1,5 +1,7 @@
+#pragma once
+
 /*
- * Copyright (C) 2012 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,36 +28,21 @@
  * SUCH DAMAGE.
  */
 
-#ifndef _BIONIC_THREAD_LOCAL_BUFFER_H_included
-#define _BIONIC_THREAD_LOCAL_BUFFER_H_included
+#include <grp.h>
+#include <pwd.h>
 
-#include <malloc.h>
-#include <pthread.h>
-
-// TODO: use __thread instead?
-
-template <typename T, size_t Size = sizeof(T)>
-class ThreadLocalBuffer {
- public:
-  ThreadLocalBuffer() {
-    // We used to use pthread_once to initialize the keys, but life is more predictable
-    // if we allocate them all up front when the C library starts up, via __constructor__.
-    pthread_key_create(&key_, free);
-  }
-
-  T* get() {
-    T* result = reinterpret_cast<T*>(pthread_getspecific(key_));
-    if (result == nullptr) {
-      result = reinterpret_cast<T*>(calloc(1, Size));
-      pthread_setspecific(key_, result);
-    }
-    return result;
-  }
-
-  size_t size() { return Size; }
-
- private:
-  pthread_key_t key_;
+struct group_state_t {
+  group group_;
+  char* group_members_[2];
+  char group_name_buffer_[32];
+  // Must be last so init_group_state can run a simple memset for the above
+  ssize_t getgrent_idx;
 };
 
-#endif // _BIONIC_THREAD_LOCAL_BUFFER_H_included
+struct passwd_state_t {
+  passwd passwd_;
+  char name_buffer_[32];
+  char dir_buffer_[32];
+  char sh_buffer_[32];
+  ssize_t getpwent_idx;
+};