Move the trivial __set_tls() implementations to "bionic_tls.h".

This simplifies things for everything but x86, which is a basket case.

It doesn't matter in terms of performance, because this is only called
once per thread created (plus once on the main thread).

Change-Id: I45470b5762e55e652b57e92cd3b3768f5d2fc4fe
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index b15a317..f90b511 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -28,6 +28,9 @@
 
 // This file perpetuates the mistakes of the past.
 
+// LP64 doesn't need to support any legacy cruft.
+#if !defined(__LP64__)
+
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
@@ -47,10 +50,19 @@
 
 #include "platform/bionic/macros.h"
 
-extern "C" {
+#define __futex_wake __real_futex_wake
+#define __futex_wait __real_futex_wait
+#include "private/bionic_futex.h"
+#undef __futex_wake
+#undef __futex_wait
 
-// LP64 doesn't need to support any legacy cruft.
-#if !defined(__LP64__)
+#define __get_thread __real_get_thread
+#include "pthread_internal.h"
+#undef __get_thread
+static inline void** __real_get_tls() { return __get_tls(); }
+#undef __get_tls
+
+extern "C" {
 
 // By the time any NDK-built code is running, there are plenty of threads.
 int __isthreaded = 1;
@@ -73,8 +85,7 @@
 
 // TODO: does anything still need this?
 void** __get_tls() {
-#include "platform/bionic/tls.h"
-  return __get_tls();
+  return __real_get_tls();
 }
 
 // This non-standard function was in our <string.h> for some reason.
@@ -213,12 +224,6 @@
   return vdprintf(fd, fmt, ap);
 }
 
-#define __futex_wake __real_futex_wake
-#define __futex_wait __real_futex_wait
-#include "private/bionic_futex.h"
-#undef __futex_wake
-#undef __futex_wait
-
 // This used to be in <sys/atomics.h>.
 int __futex_wake(volatile void* ftx, int count) {
   return __real_futex_wake(ftx, count);
@@ -356,14 +361,6 @@
   return malloc(size);
 }
 
-} // extern "C"
-
-#define __get_thread __real_get_thread
-#include "pthread_internal.h"
-#undef __get_thread
-
-extern "C" {
-
 // Various third-party apps contain a backport of our pthread_rwlock implementation that uses this.
 pthread_internal_t* __get_thread() {
   return __real_get_thread();
@@ -388,6 +385,6 @@
     return fwrite(&value, sizeof(value), 1, fp) == 1 ? 0 : EOF;
 }
 
-#endif // !defined (__LP64__)
-
 } // extern "C"
+
+#endif // !defined (__LP64__)
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index cbaa9a6..ae9a791 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -240,8 +240,6 @@
   tcb->tls_slot(TLS_SLOT_DTV) = &val->generation;
 }
 
-extern "C" __LIBC_HIDDEN__ int __set_tls(void* ptr);
-
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
 
 // Address space is precious on LP32, so use the minimum unit: one page.