Merge "Fix, generalize stdatomic.h; improve test."
diff --git a/libc/include/stdatomic.h b/libc/include/stdatomic.h
index 2c04221..adf60bb 100644
--- a/libc/include/stdatomic.h
+++ b/libc/include/stdatomic.h
@@ -123,6 +123,12 @@
  * bits as a T.
  */
 
+#include <stddef.h>  /* For ptrdiff_t.                          */
+#include <stdint.h>  /* TODO: Should pollute namespace less.    */
+#if __STDC_VERSION__ >= 201112L
+# include <uchar.h>  /* For char16_t and char32_t.              */
+#endif
+
 #if __has_extension(c_atomic) || __has_extension(cxx_atomic)
 #define	__CLANG_ATOMICS
 #elif __GNUC_PREREQ__(4, 7)
@@ -228,7 +234,7 @@
  */
 
 static __inline void
-atomic_thread_fence(memory_order __order __unused)
+atomic_thread_fence(memory_order __order __attribute__((unused)))
 {
 
 #ifdef __CLANG_ATOMICS
@@ -241,7 +247,7 @@
 }
 
 static __inline void
-atomic_signal_fence(memory_order __order __unused)
+atomic_signal_fence(memory_order __order __attribute__((unused)))
 {
 
 #ifdef __CLANG_ATOMICS
@@ -263,7 +269,7 @@
 	((void)(obj), (_Bool)1)
 #elif defined(__CLANG_ATOMICS)
 #define	atomic_is_lock_free(obj) \
-	__atomic_is_lock_free(sizeof(*(obj)), obj)
+	__c11_atomic_is_lock_free(sizeof(*(obj)))
 #elif defined(__GNUC_ATOMICS)
 #define	atomic_is_lock_free(obj) \
 	__atomic_is_lock_free(sizeof((obj)->__val), &(obj)->__val)
@@ -477,7 +483,7 @@
 	atomic_bool	__flag;
 } atomic_flag;
 
-#define	ATOMIC_FLAG_INIT		{ ATOMIC_VAR_INIT(0) }
+#define	ATOMIC_FLAG_INIT		{ ATOMIC_VAR_INIT(false) }
 
 static __inline bool
 atomic_flag_test_and_set_explicit(volatile atomic_flag *__object,
diff --git a/tests/Android.mk b/tests/Android.mk
index 4f1661a..13d7cbe 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -239,7 +239,8 @@
 
 # -----------------------------------------------------------------------------
 # Tests for the device using bionic's .so. Run with:
-#   adb shell /data/nativetest/bionic-unit-tests/bionic-unit-tests
+#   adb shell /data/nativetest/bionic-unit-tests/bionic-unit-tests32
+#   adb shell /data/nativetest/bionic-unit-tests/bionic-unit-tests64
 # -----------------------------------------------------------------------------
 bionic-unit-tests_whole_static_libraries := \
     libBionicTests \
@@ -275,7 +276,8 @@
 
 # -----------------------------------------------------------------------------
 # Tests for the device linked against bionic's static library. Run with:
-#   adb shell /data/nativetest/bionic-unit-tests-static/bionic-unit-tests-static
+#   adb shell /data/nativetest/bionic-unit-tests-static/bionic-unit-tests-static32
+#   adb shell /data/nativetest/bionic-unit-tests-static/bionic-unit-tests-static64
 # -----------------------------------------------------------------------------
 bionic-unit-tests-static_whole_static_libraries := \
     libBionicTests \
diff --git a/tests/stdatomic_test.cpp b/tests/stdatomic_test.cpp
index 5e88c88..222bd9c 100644
--- a/tests/stdatomic_test.cpp
+++ b/tests/stdatomic_test.cpp
@@ -14,11 +14,10 @@
  * limitations under the License.
  */
 
-#include <gtest/gtest.h>
-
-#if !defined(__GLIBC__) /* TODO: fix our prebuilt toolchains! */
-
 #include <stdatomic.h>
+#include <gtest/gtest.h>
+#include <pthread.h>
+#include <stdint.h>
 
 TEST(stdatomic, LOCK_FREE) {
   ASSERT_TRUE(ATOMIC_BOOL_LOCK_FREE);
@@ -167,4 +166,80 @@
   ASSERT_EQ(0x002, atomic_load(&i));
 }
 
-#endif
+// And a rudimentary test of acquire-release memory ordering:
+
+constexpr static uint_least32_t BIG = 10000000ul; // Assumed even below.
+
+struct three_atomics {
+  atomic_uint_least32_t x;
+  char a[123];  // Everything in different cache lines,
+                // increase chance of compiler getting alignment wrong.
+  atomic_uint_least32_t y;
+  char b[4013];
+  atomic_uint_least32_t z;
+};
+
+// Very simple acquire/release memory ordering sanity check.
+static void* writer(void* arg) {
+  three_atomics* a = reinterpret_cast<three_atomics*>(arg);
+  for (uint_least32_t i = 0; i <= BIG; i+=2) {
+    atomic_store_explicit(&a->x, i, memory_order_relaxed);
+    atomic_store_explicit(&a->z, i, memory_order_relaxed);
+    atomic_store_explicit(&a->y, i, memory_order_release);
+    atomic_store_explicit(&a->x, i+1, memory_order_relaxed);
+    atomic_store_explicit(&a->z, i+1, memory_order_relaxed);
+    atomic_store_explicit(&a->y, i+1, memory_order_release);
+  }
+  return 0;
+}
+
+static void* reader(void* arg) {
+  three_atomics* a = reinterpret_cast<three_atomics*>(arg);
+  uint_least32_t xval = 0, yval = 0, zval = 0;
+  size_t repeat = 0;
+  size_t repeat_limit = 1000;
+  while (yval != BIG + 1) {
+    yval = atomic_load_explicit(&a->y, memory_order_acquire);
+    zval = atomic_load_explicit(&a->z, memory_order_relaxed);
+    xval = atomic_load_explicit(&a->x, memory_order_relaxed);
+    // If we see a given value of y, the immediately preceding
+    // stores to z and x, or later ones, should also be visible.
+    if (zval < yval) {
+      // Cant just ASSERT, since we are in a non-void function.
+      ADD_FAILURE() << "acquire-release ordering violation: "
+                    << zval << " < " << yval << ", " << xval << "\n";
+      return 0; // Only report once.
+    }
+    if (xval < yval) {
+      // Cant just ASSERT, since we are in a non-void function.
+      ADD_FAILURE() << "acquire-release ordering violation: "
+                    << xval << " < " << yval << ", " << zval <<  "\n";
+      return 0; // Only report once.
+    }
+    if (repeat < repeat_limit) ++repeat;
+  }
+  // The following assertion is not technically guaranteed to hold.
+  // But if it fails to hold, this test was useless, and we have a
+  // serious scheduling issue that we should probably know about.
+  EXPECT_EQ(repeat, repeat_limit);
+  return 0;
+}
+
+TEST(stdatomic, ordering) {
+  // Run a memory ordering sanity test.
+  void* result;
+  three_atomics a;
+  atomic_init(&a.x, 0ul);
+  atomic_init(&a.y, 0ul);
+  atomic_init(&a.z, 0ul);
+  pthread_t t1,t2;
+  ASSERT_EQ(0, pthread_create(&t1, 0, reader, &a));
+  ASSERT_EQ(0, pthread_create(&t2, 0, writer, &a));
+  ASSERT_EQ(0, pthread_join(t1, &result));
+  EXPECT_EQ(0, result);
+  ASSERT_EQ(0, pthread_join(t2, &result));
+  EXPECT_EQ(0, result);
+  EXPECT_EQ(atomic_load_explicit(&a.x, memory_order_consume), BIG + 1);
+  EXPECT_EQ(atomic_load_explicit(&a.y, memory_order_seq_cst), BIG + 1);
+  EXPECT_EQ(atomic_load(&a.z), BIG + 1);
+}