Per-thread -fstack-protector guards for x86.

Based on a pair of patches from Intel:

  https://android-review.googlesource.com/#/c/43909/
  https://android-review.googlesource.com/#/c/44903/

For x86, this patch supports _both_ the global that ARM/MIPS use
and the per-thread TLS entry (%gs:20) that GCC uses by default. This
lets us support binaries built with any x86 toolchain (right now,
the NDK is emitting x86 code that uses the global).

I've also extended the original tests to cover ARM/MIPS too, and
be a little more thorough for x86.

Change-Id: I02f279a80c6b626aecad449771dec91df235ad01
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index 719bc83..7c22b45 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -48,6 +48,7 @@
 #include "bionic_atomic_inline.h"
 #include "bionic_futex.h"
 #include "bionic_pthread.h"
+#include "bionic_ssp.h"
 #include "bionic_tls.h"
 #include "pthread_internal.h"
 #include "thread_private.h"
@@ -171,12 +172,14 @@
     tls[i] = NULL;
   }
 
-  // Slot 0 must point to the tls area, this is required by the implementation
-  // of the x86 Linux kernel thread-local-storage.
+  // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
   tls[TLS_SLOT_SELF]      = (void*) tls;
   tls[TLS_SLOT_THREAD_ID] = thread;
 
+  // Stack guard generation may make system calls, and those system calls may fail.
+  // If they do, they'll try to set errno, so we can only do this after calling __set_tls.
   __set_tls((void*) tls);
+  tls[TLS_SLOT_STACK_GUARD] = __generate_stack_chk_guard();
 }