Set __get_thread()->tid as part of clone().

This was previously done only in fork() and pthread_create(), but this left raw
clone() with an invalid cached tid. Since the tid is used for pthread routines,
this led to unstable behavior after clone().

Test: ltp clone01 (see bug for more)
Test: mmma bionic/tests
Test: bionic-unit-tests-static --gtest_filter=*fork*:*clone*

Bug: 32612735
Bug: 32305649
Change-Id: I30eae5a8024b4c5da65476fcadfe14c6db35bb79
diff --git a/libc/bionic/clone.cpp b/libc/bionic/clone.cpp
index b50a96d..3a20aa9 100644
--- a/libc/bionic/clone.cpp
+++ b/libc/bionic/clone.cpp
@@ -38,6 +38,11 @@
 
 // Called from the __bionic_clone assembler to call the thread function then exit.
 extern "C" __LIBC_HIDDEN__ void __start_thread(int (*fn)(void*), void* arg) {
+  pthread_internal_t* self = __get_thread();
+  if (self && self->tid == -1) {
+    self->tid = syscall(__NR_gettid);
+  }
+
   int status = (*fn)(arg);
   __exit(status);
 }
@@ -105,6 +110,9 @@
     // If any other cases become important, we could use a double trampoline like __pthread_start.
     self->set_cached_pid(parent_pid);
     self->tid = caller_tid;
+  } else if (self->tid == -1) {
+    self->tid = syscall(__NR_gettid);
+    self->set_cached_pid(self->tid);
   }
 
   return clone_result;