Fix __errno for LP64 and clean up __get_tls.

If __get_tls has the right type, a lot of confusing casting can disappear.

It was probably a mistake that __get_tls was exposed as a function for mips
and x86 (but not arm), so let's (a) ensure that the __get_tls function
always matches the macro, (b) that we have the function for arm too, and
(c) that we don't have the function for any 64-bit architecture.

Change-Id: Ie9cb989b66e2006524ad7733eb6e1a65055463be
diff --git a/libc/bionic/__errno.c b/libc/bionic/__errno.cpp
similarity index 92%
rename from libc/bionic/__errno.c
rename to libc/bionic/__errno.cpp
index 8f33cce..9caa618 100644
--- a/libc/bionic/__errno.c
+++ b/libc/bionic/__errno.cpp
@@ -25,10 +25,11 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+
 #include <errno.h>
 #include <bionic_tls.h>
+#include <stdint.h>
 
-volatile int*  __errno( void )
-{
-  return  &((volatile int*)__get_tls())[TLS_SLOT_ERRNO];
+volatile int*  __errno() {
+  return reinterpret_cast<int*>(&(__get_tls()[TLS_SLOT_ERRNO]));
 }
diff --git a/libc/bionic/__errno.c b/libc/bionic/__get_tls.cpp
similarity index 90%
copy from libc/bionic/__errno.c
copy to libc/bionic/__get_tls.cpp
index 8f33cce..d01e2aa 100644
--- a/libc/bionic/__errno.c
+++ b/libc/bionic/__get_tls.cpp
@@ -25,10 +25,8 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-#include <errno.h>
-#include <bionic_tls.h>
 
-volatile int*  __errno( void )
-{
-  return  &((volatile int*)__get_tls())[TLS_SLOT_ERRNO];
+extern "C" void** __get_tls() {
+#include "private/__get_tls.h"
+  return __get_tls();
 }
diff --git a/libc/bionic/libc_init_dynamic.cpp b/libc/bionic/libc_init_dynamic.cpp
index 88e87a7..4e1374e 100644
--- a/libc/bionic/libc_init_dynamic.cpp
+++ b/libc/bionic/libc_init_dynamic.cpp
@@ -65,7 +65,7 @@
 // as soon as the shared library is loaded.
 __attribute__((constructor)) static void __libc_preinit() {
   // Read the kernel argument block pointer from TLS.
-  void* tls = const_cast<void*>(__get_tls());
+  void** tls = __get_tls();
   KernelArgumentBlock** args_slot = &reinterpret_cast<KernelArgumentBlock**>(tls)[TLS_SLOT_BIONIC_PREINIT];
   KernelArgumentBlock* args = *args_slot;
 
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 63695d3..9e06afc 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -50,6 +50,8 @@
 
 extern "C" void ATTRIBUTES _thread_created_hook(pid_t thread_id);
 
+extern "C" int __set_tls(void* ptr);
+
 static const int kPthreadInitFailed = 1;
 
 static pthread_mutex_t gPthreadStackCreationLock = PTHREAD_MUTEX_INITIALIZER;
diff --git a/libc/bionic/pthread_internals.cpp b/libc/bionic/pthread_internals.cpp
index 66bc5b7..59c6e48 100644
--- a/libc/bionic/pthread_internals.cpp
+++ b/libc/bionic/pthread_internals.cpp
@@ -64,6 +64,5 @@
 }
 
 __LIBC_ABI_PRIVATE__ pthread_internal_t* __get_thread(void) {
-  void** tls = reinterpret_cast<void**>(const_cast<void*>(__get_tls()));
-  return reinterpret_cast<pthread_internal_t*>(tls[TLS_SLOT_THREAD_ID]);
+  return reinterpret_cast<pthread_internal_t*>(__get_tls()[TLS_SLOT_THREAD_ID]);
 }
diff --git a/libc/bionic/pthread_key.cpp b/libc/bionic/pthread_key.cpp
index 7e8b4cd..706758b 100644
--- a/libc/bionic/pthread_key.cpp
+++ b/libc/bionic/pthread_key.cpp
@@ -133,7 +133,7 @@
   // from this thread's TLS area. This must call the destructor of all keys
   // that have a non-NULL data value and a non-NULL destructor.
   void CleanAll() {
-    void** tls = (void**)__get_tls();
+    void** tls = __get_tls();
 
     // Because destructors can do funky things like deleting/creating other
     // keys, we need to implement this in a loop.
@@ -239,8 +239,7 @@
   // to check that the key is properly allocated. If the key was not
   // allocated, the value read from the TLS should always be NULL
   // due to pthread_key_delete() clearing the values for all threads.
-  uintptr_t address = reinterpret_cast<volatile uintptr_t*>(__get_tls())[key];
-  return reinterpret_cast<void*>(address);
+  return __get_tls()[key];
 }
 
 int pthread_setspecific(pthread_key_t key, const void* ptr) {
@@ -250,6 +249,6 @@
     return EINVAL;
   }
 
-  reinterpret_cast<volatile uintptr_t*>(__get_tls())[key] = reinterpret_cast<uintptr_t>(ptr);
+  __get_tls()[key] = const_cast<void*>(ptr);
   return 0;
 }