Add a memory barrier to cond var signaling.

This adds an explicit memory barrier to condition variable signaling.
It's a little murky as to whether it's strictly required, but it seems
like a wise thing to do.

Change-Id: Id0faa542d61e4b8ffa775e4adf68e4d7471f4fb7
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index b28cd9f..dd8d758 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -196,6 +196,9 @@
 
     // Wait for our creating thread to release us. This lets it have time to
     // notify gdb about this thread before it starts doing anything.
+    //
+    // This also provides the memory barrier needed to ensure that all memory
+    // accesses previously made by the creating thread are visible to us.
     pthread_mutex_t * start_mutex = (pthread_mutex_t *)&tls[TLS_SLOT_SELF];
     pthread_mutex_lock(start_mutex);
     pthread_mutex_destroy(start_mutex);
@@ -264,7 +267,7 @@
 }
 
 /*
- * Create a new thread. The thread's stack is layed out like so:
+ * Create a new thread. The thread's stack is laid out like so:
  *
  * +---------------------------+
  * |     pthread_internal_t    |
@@ -334,6 +337,10 @@
 
     // Create a mutex for the thread in TLS_SLOT_SELF to wait on once it starts so we can keep
     // it from doing anything until after we notify the debugger about it
+    //
+    // This also provides the memory barrier we need to ensure that all
+    // memory accesses previously performed by this thread are visible to
+    // the new thread.
     start_mutex = (pthread_mutex_t *) &tls[TLS_SLOT_SELF];
     pthread_mutex_init(start_mutex, NULL);
     pthread_mutex_lock(start_mutex);
@@ -1421,6 +1428,18 @@
             break;
     }
 
+    /*
+     * Ensure that all memory accesses previously made by this thread are
+     * visible to the woken thread(s).  On the other side, the "wait"
+     * code will issue any necessary barriers when locking the mutex.
+     *
+     * This may not strictly be necessary -- if the caller follows
+     * recommended practice and holds the mutex before signaling the cond
+     * var, the mutex ops will provide correct semantics.  If they don't
+     * hold the mutex, they're subject to race conditions anyway.
+     */
+    ANDROID_MEMBAR_FULL();
+
     __futex_wake_ex(&cond->value, COND_IS_SHARED(cond), counter);
     return 0;
 }