Fix pthread key num calculation.

Bug: 18723085
Change-Id: Iba2c834b350e4cdba0b2d771b221560a3e5df952
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 8bb1be9..c99e69c 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -191,7 +191,7 @@
   // At offsets >= 0, we have the TLS slots.
   // At offsets < 0, we have the child stack.
   thread->tls = reinterpret_cast<void**>(reinterpret_cast<uint8_t*>(thread->attr.stack_base) +
-                                         thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
+                  thread->attr.stack_size - BIONIC_ALIGN(BIONIC_TLS_SLOTS * sizeof(void*), 16));
   void* child_stack = thread->tls;
   __init_tls(thread);