Use a static inline rather than a macro for __get_tls().

Change-Id: I0e2529c71c444f1d885317f469c386a9a3f37e35
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index f90b511..a69b77f 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -57,9 +57,9 @@
 #undef __futex_wait
 
 #define __get_thread __real_get_thread
+#define __get_tls __real_get_tls
 #include "pthread_internal.h"
 #undef __get_thread
-static inline void** __real_get_tls() { return __get_tls(); }
 #undef __get_tls
 
 extern "C" {
diff --git a/libc/platform/bionic/tls.h b/libc/platform/bionic/tls.h
index 9a79f84..e77e91f 100644
--- a/libc/platform/bionic/tls.h
+++ b/libc/platform/bionic/tls.h
@@ -30,11 +30,13 @@
 
 #include <sys/cdefs.h>
 
-// TODO: move the __get_tls() macros to functions instead.
-
 #if defined(__aarch64__)
 
-# define __get_tls() ({ void** __val; __asm__("mrs %0, tpidr_el0" : "=r"(__val)); __val; })
+static inline void** __get_tls() {
+  void** result;
+  __asm__("mrs %0, tpidr_el0" : "=r"(result));
+  return result;
+}
 
 static inline void __set_tls(void* tls) {
   __asm__("msr tpidr_el0, %0" : : "r" (tls));
@@ -42,9 +44,13 @@
 
 #elif defined(__arm__)
 
-# define __get_tls() ({ void** __val; __asm__("mrc p15, 0, %0, c13, c0, 3" : "=r"(__val)); __val; })
+static inline void** __get_tls() {
+  void** result;
+  __asm__("mrc p15, 0, %0, c13, c0, 3" : "=r"(result));
+  return result;
+}
 
-// arm32 requires a syscall.
+// arm32 requires a syscall to set the thread pointer.
 // By historical accident it's public API, but not in any header except this one.
 __BEGIN_DECLS
 int __set_tls(void* tls);
@@ -52,7 +58,11 @@
 
 #elif defined(__i386__)
 
-# define __get_tls() ({ void** __val; __asm__("movl %%gs:0, %0" : "=r"(__val)); __val; })
+static inline void** __get_tls() {
+  void** result;
+  __asm__("movl %%gs:0, %0" : "=r"(result));
+  return result;
+}
 
 // x86 is really hairy, so we keep that out of line.
 __BEGIN_DECLS
@@ -61,7 +71,11 @@
 
 #elif defined(__riscv)
 
-# define __get_tls() ({ void** __val; __asm__("mv %0, tp" : "=r"(__val)); __val; })
+static inline void** __get_tls() {
+  void** result;
+  __asm__("mv %0, tp" : "=r"(result));
+  return result;
+}
 
 static inline void __set_tls(void* tls) {
   __asm__("mv tp, %0" : : "r"(tls));
@@ -69,7 +83,11 @@
 
 #elif defined(__x86_64__)
 
-# define __get_tls() ({ void** __val; __asm__("mov %%fs:0, %0" : "=r"(__val)); __val; })
+static inline void** __get_tls() {
+  void** result;
+  __asm__("mov %%fs:0, %0" : "=r"(result));
+  return result;
+}
 
 // ARCH_SET_FS is not exposed via <sys/prctl.h> or <linux/prctl.h>.
 #include <asm/prctl.h>