Second attempt to "Rewrite atomic.h to use stdatomic.h."

This should not be committed until win_sdk and aarch64 builds are
fixed in the presence of this CL.

This reverts commit 2789faabfa9cdf16902a52a1de3fc4a7ae98efed.

We additionally remove uniprocessor support from the earlier CL,
thus avoiding a potential compiler code reordering issue.

Change-Id: I7207a5ca2efa907a6f757f172d7090a62b2311fe
diff --git a/include/cutils/atomic.h b/include/cutils/atomic.h
index 79409a7..ded972a 100644
--- a/include/cutils/atomic.h
+++ b/include/cutils/atomic.h
@@ -19,9 +19,10 @@
 
 #include <stdint.h>
 #include <sys/types.h>
+#include <stdatomic.h>
 
-#ifdef __cplusplus
-extern "C" {
+#ifndef ANDROID_ATOMIC_INLINE
+#define ANDROID_ATOMIC_INLINE static inline
 #endif
 
 /*
@@ -77,11 +78,41 @@
  * These have the same characteristics (e.g. what happens on overflow)
  * as the equivalent non-atomic C operations.
  */
-int32_t android_atomic_inc(volatile int32_t* addr);
-int32_t android_atomic_dec(volatile int32_t* addr);
-int32_t android_atomic_add(int32_t value, volatile int32_t* addr);
-int32_t android_atomic_and(int32_t value, volatile int32_t* addr);
-int32_t android_atomic_or(int32_t value, volatile int32_t* addr);
+ANDROID_ATOMIC_INLINE
+int32_t android_atomic_inc(volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+        /* Int32_t, if it exists, is the same as int_least32_t. */
+    return atomic_fetch_add_explicit(a, 1, memory_order_release);
+}
+
+ANDROID_ATOMIC_INLINE
+int32_t android_atomic_dec(volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    return atomic_fetch_sub_explicit(a, 1, memory_order_release);
+}
+
+ANDROID_ATOMIC_INLINE
+int32_t android_atomic_add(int32_t value, volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    return atomic_fetch_add_explicit(a, value, memory_order_release);
+}
+
+ANDROID_ATOMIC_INLINE
+int32_t android_atomic_and(int32_t value, volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    return atomic_fetch_and_explicit(a, value, memory_order_release);
+}
+
+ANDROID_ATOMIC_INLINE
+int32_t android_atomic_or(int32_t value, volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    return atomic_fetch_or_explicit(a, value, memory_order_release);
+}
 
 /*
  * Perform an atomic load with "acquire" or "release" ordering.
@@ -96,29 +127,53 @@
  * this comment, you are in the vast majority, and should not be
  * using release loads or replacing them with anything other than
  * locks or default sequentially consistent atomics.
- *
- * This is only necessary if you need the memory barrier.  A 32-bit read
- * from a 32-bit aligned address is atomic on all supported platforms.
  */
-int32_t android_atomic_acquire_load(volatile const int32_t* addr);
-int32_t android_atomic_release_load(volatile const int32_t* addr);
+ANDROID_ATOMIC_INLINE
+int32_t android_atomic_acquire_load(volatile const int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    return atomic_load_explicit(a, memory_order_acquire);
+}
+
+ANDROID_ATOMIC_INLINE
+int32_t android_atomic_release_load(volatile const int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    atomic_thread_fence(memory_order_seq_cst);
+    /* Any reasonable clients of this interface would probably prefer   */
+    /* something weaker.  But some remaining clients seem to be         */
+    /* abusing this API in strange ways, e.g. by using it as a fence.   */
+    /* Thus we are conservative until we can get rid of remaining       */
+    /* clients (and this function).                                     */
+    return atomic_load_explicit(a, memory_order_relaxed);
+}
 
 /*
  * Perform an atomic store with "acquire" or "release" ordering.
  *
- * Note that the notion of a "acquire" ordering for a store does not
+ * Note that the notion of an "acquire" ordering for a store does not
  * really fit into the C11 or C++11 memory model.  The extra ordering
  * is normally observable only by code using memory_order_relaxed
  * atomics, or data races.  In the rare cases in which such ordering
  * is called for, use memory_order_relaxed atomics and a trailing
  * atomic_thread_fence (typically with memory_order_release,
  * not memory_order_acquire!) instead.
- *
- * This is only necessary if you need the memory barrier.  A 32-bit write
- * to a 32-bit aligned address is atomic on all supported platforms.
  */
-void android_atomic_acquire_store(int32_t value, volatile int32_t* addr);
-void android_atomic_release_store(int32_t value, volatile int32_t* addr);
+ANDROID_ATOMIC_INLINE
+void android_atomic_acquire_store(int32_t value, volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    atomic_store_explicit(a, value, memory_order_relaxed);
+    atomic_thread_fence(memory_order_seq_cst);
+    /* Again overly conservative to accomodate weird clients.   */
+}
+
+ANDROID_ATOMIC_INLINE
+void android_atomic_release_store(int32_t value, volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    atomic_store_explicit(a, value, memory_order_release);
+}
 
 /*
  * Compare-and-set operation with "acquire" or "release" ordering.
@@ -132,10 +187,44 @@
  * Implementations that use the release CAS in a loop may be less efficient
  * than possible, because we re-issue the memory barrier on each iteration.
  */
+ANDROID_ATOMIC_INLINE
 int android_atomic_acquire_cas(int32_t oldvalue, int32_t newvalue,
-        volatile int32_t* addr);
+                           volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    return (int)(!atomic_compare_exchange_strong_explicit(
+                                          a, &oldvalue, newvalue,
+                                          memory_order_acquire,
+                                          memory_order_acquire));
+}
+
+ANDROID_ATOMIC_INLINE
 int android_atomic_release_cas(int32_t oldvalue, int32_t newvalue,
-        volatile int32_t* addr);
+                               volatile int32_t* addr)
+{
+    volatile atomic_int_least32_t* a = (volatile atomic_int_least32_t*)addr;
+    return (int)(!atomic_compare_exchange_strong_explicit(
+                                          a, &oldvalue, newvalue,
+                                          memory_order_release,
+                                          memory_order_relaxed));
+}
+
+/*
+ * Fence primitives.
+ */
+ANDROID_ATOMIC_INLINE
+void android_compiler_barrier(void)
+{
+    __asm__ __volatile__ ("" : : : "memory");
+    /* Could probably also be:                          */
+    /* atomic_signal_fence(memory_order_seq_cst);       */
+}
+
+ANDROID_ATOMIC_INLINE
+void android_memory_barrier(void)
+{
+    atomic_thread_fence(memory_order_seq_cst);
+}
 
 /*
  * Aliases for code using an older version of this header.  These are now
@@ -145,8 +234,4 @@
 #define android_atomic_write android_atomic_release_store
 #define android_atomic_cmpxchg android_atomic_release_cas
 
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
 #endif // ANDROID_CUTILS_ATOMIC_H