Define inline atomic operations for x86 and ARM.

This change moves the ARM definitions into GCC extended inline
assembler.  In addition, the same set of x86 definitions are now
shared among all x86 targets.

Change-Id: I6e5aa3a413d0af2acbe5d32994983d35a01fdcb3
diff --git a/include/cutils/atomic-arm.h b/include/cutils/atomic-arm.h
new file mode 100644
index 0000000..0dd629d
--- /dev/null
+++ b/include/cutils/atomic-arm.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_CUTILS_ATOMIC_ARM_H
+#define ANDROID_CUTILS_ATOMIC_ARM_H
+
+#include <stdint.h>
+#include <machine/cpu-features.h>
+
+extern inline void android_compiler_barrier(void)
+{
+    __asm__ __volatile__ ("" : : : "memory");
+}
+
+#if ANDROID_SMP == 0
+extern inline void android_memory_barrier(void)
+{
+  android_compiler_barrier();
+}
+#elif defined(__ARM_HAVE_DMB)
+extern inline void android_memory_barrier(void)
+{
+    __asm__ __volatile__ ("dmb" : : : "memory");
+}
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline void android_memory_barrier(void)
+{
+    __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5"
+                          : : "r" (0) : "memory");
+}
+#else
+extern inline void android_memory_barrier(void)
+{
+    typedef void (kuser_memory_barrier)(void);
+    (*(kuser_memory_barrier *)0xffff0fa0)();
+}
+#endif
+
+extern inline int32_t android_atomic_acquire_load(volatile int32_t *ptr)
+{
+    int32_t value = *ptr;
+    android_memory_barrier();
+    return value;
+}
+
+extern inline int32_t android_atomic_release_load(volatile int32_t *ptr)
+{
+    android_memory_barrier();
+    return *ptr;
+}
+
+extern inline void android_atomic_acquire_store(int32_t value,
+                                                volatile int32_t *ptr)
+{
+    *ptr = value;
+    android_memory_barrier();
+}
+
+extern inline void android_atomic_release_store(int32_t value,
+                                                volatile int32_t *ptr)
+{
+    android_memory_barrier();
+    *ptr = value;
+}
+
+#if defined(__thumb__)
+extern int android_atomic_cas(int32_t old_value, int32_t new_value,
+                              volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int android_atomic_cas(int32_t old_value, int32_t new_value,
+                                     volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%3]\n"
+                              "mov %1, #0\n"
+                              "teq %0, %4\n"
+                              "strexeq %1, %5, [%3]"
+                              : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+                              : "r" (ptr), "Ir" (old_value), "r" (new_value)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev != old_value;
+}
+#else
+extern inline int android_atomic_cas(int32_t old_value, int32_t new_value,
+                                     volatile int32_t *ptr)
+{
+    typedef int (kuser_cmpxchg)(int32_t, int32_t, volatile int32_t *);
+    int32_t prev, status;
+    prev = *ptr;
+    do {
+        status = (*(kuser_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
+        if (__builtin_expect(status == 0, 1))
+            return 0;
+        prev = *ptr;
+    } while (prev == old_value);
+    return 1;
+}
+#endif
+
+extern inline int android_atomic_acquire_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    int status = android_atomic_cas(old_value, new_value, ptr);
+    android_memory_barrier();
+    return status;
+}
+
+extern inline int android_atomic_release_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    android_memory_barrier();
+    return android_atomic_cas(old_value, new_value, ptr);
+}
+
+
+#if defined(__thumb__)
+extern int32_t android_atomic_swap(int32_t new_value,
+                                   volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int32_t android_atomic_swap(int32_t new_value,
+                                          volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%3]\n"
+                              "strex %1, %4, [%3]"
+                              : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+                              : "r" (ptr), "r" (new_value)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    android_memory_barrier();
+    return prev;
+}
+#else
+extern inline int32_t android_atomic_swap(int32_t new_value,
+                                          volatile int32_t *ptr)
+{
+    int32_t prev;
+    __asm__ __volatile__ ("swp %0, %2, [%3]"
+                          : "=&r" (prev), "+m" (*ptr)
+                          : "r" (new_value), "r" (ptr)
+                          : "cc");
+    android_memory_barrier();
+    return prev;
+}
+#endif
+
+#if defined(__thumb__)
+extern int32_t android_atomic_add(int32_t increment,
+                                  volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int32_t android_atomic_add(int32_t increment,
+                                         volatile int32_t *ptr)
+{
+    int32_t prev, tmp, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%4]\n"
+                              "add %1, %0, %5\n"
+                              "strex %2, %1, [%4]"
+                              : "=&r" (prev), "=&r" (tmp),
+                                "=&r" (status), "+m" (*ptr)
+                              : "r" (ptr), "Ir" (increment)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+extern inline int32_t android_atomic_add(int32_t increment,
+                                         volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev + increment, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+extern inline int32_t android_atomic_inc(volatile int32_t *addr) {
+    return android_atomic_add(1, addr);
+}
+
+extern inline int32_t android_atomic_dec(volatile int32_t *addr) {
+    return android_atomic_add(-1, addr);
+}
+
+#if defined(__thumb__)
+extern int32_t android_atomic_and(int32_t value, volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int32_t android_atomic_and(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, tmp, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%4]\n"
+                              "and %1, %0, %5\n"
+                              "strex %2, %1, [%4]"
+                              : "=&r" (prev), "=&r" (tmp),
+                                "=&r" (status), "+m" (*ptr)
+                              : "r" (ptr), "Ir" (value)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+extern inline int32_t android_atomic_and(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev & value, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+#if defined(__thumb__)
+extern int32_t android_atomic_or(int32_t value, volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int32_t android_atomic_or(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, tmp, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%4]\n"
+                              "orr %1, %0, %5\n"
+                              "strex %2, %1, [%4]"
+                              : "=&r" (prev), "=&r" (tmp),
+                                "=&r" (status), "+m" (*ptr)
+                              : "r" (ptr), "Ir" (value)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+extern inline int32_t android_atomic_or(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev | value, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+#endif /* ANDROID_CUTILS_ATOMIC_ARM_H */
diff --git a/include/cutils/atomic-inline.h b/include/cutils/atomic-inline.h
index 1c23be9..715e0aa 100644
--- a/include/cutils/atomic-inline.h
+++ b/include/cutils/atomic-inline.h
@@ -39,69 +39,20 @@
 # error "Must define ANDROID_SMP before including atomic-inline.h"
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Define the full memory barrier for an SMP system.  This is
- * platform-specific.
- */
-
-#ifdef __arm__
-#include <machine/cpu-features.h>
-
-/*
- * For ARMv6K we need to issue a specific MCR instead of the DMB, since
- * that wasn't added until v7.  For anything older, SMP isn't relevant.
- * Since we don't have an ARMv6K to test with, we're not going to deal
- * with that now.
- *
- * The DMB instruction is found in the ARM and Thumb2 instruction sets.
- * This will fail on plain 16-bit Thumb.
- */
-#if defined(__ARM_HAVE_DMB)
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
-#else
-# define _ANDROID_MEMBAR_FULL_SMP()  ARM_SMP_defined_but_no_DMB()
-#endif
-
+#if defined(__arm__)
+#include <cutils/atomic-arm.h>
 #elif defined(__i386__) || defined(__x86_64__)
-/*
- * For recent x86, we can use the SSE2 mfence instruction.
- */
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
-
+#include <cutils/atomic-x86.h>
+#elif defined(__sh__)
+/* implementation is in atomic-android-sh.c */
 #else
-/*
- * Implementation not defined for this platform.  Hopefully we're building
- * in uniprocessor mode.
- */
-# define _ANDROID_MEMBAR_FULL_SMP()  SMP_barrier_not_defined_for_platform()
+#error atomic operations are unsupported
 #endif
 
-
-/*
- * Full barrier.  On uniprocessors this is just a compiler reorder barrier,
- * which ensures that the statements appearing above the barrier in the C/C++
- * code will be issued after the statements appearing below the barrier.
- *
- * For SMP this also includes a memory barrier instruction.  On an ARM
- * CPU this means that the current core will flush pending writes, wait
- * for pending reads to complete, and discard any cached reads that could
- * be stale.  Other CPUs may do less, but the end result is equivalent.
- */
-#if ANDROID_SMP != 0
-# define ANDROID_MEMBAR_FULL() _ANDROID_MEMBAR_FULL_SMP()
+#if ANDROID_SMP == 0
+#define ANDROID_MEMBAR_FULL android_compiler_barrier
 #else
-# define ANDROID_MEMBAR_FULL() \
-    do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
+#define ANDROID_MEMBAR_FULL android_memory_barrier
 #endif
 
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // ANDROID_CUTILS_ATOMIC_INLINE_H
+#endif /* ANDROID_CUTILS_ATOMIC_INLINE_H */
diff --git a/include/cutils/atomic-x86.h b/include/cutils/atomic-x86.h
new file mode 100644
index 0000000..06b643f
--- /dev/null
+++ b/include/cutils/atomic-x86.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_CUTILS_ATOMIC_X86_H
+#define ANDROID_CUTILS_ATOMIC_X86_H
+
+#include <stdint.h>
+
+extern inline void android_compiler_barrier(void)
+{
+    __asm__ __volatile__ ("" : : : "memory");
+}
+
+#if ANDROID_SMP == 0
+extern inline void android_memory_barrier(void)
+{
+    android_compiler_barrier();
+}
+#else
+extern inline void android_memory_barrier(void)
+{
+    __asm__ __volatile__ ("mfence" : : : "memory");
+}
+#endif
+
+extern inline int32_t android_atomic_acquire_load(volatile int32_t *ptr) {
+    int32_t value = *ptr;
+    android_compiler_barrier();
+    return value;
+}
+
+extern inline int32_t android_atomic_release_load(volatile int32_t *ptr) {
+    android_memory_barrier();
+    return *ptr;
+}
+
+extern inline void android_atomic_acquire_store(int32_t value,
+                                                volatile int32_t *ptr) {
+    *ptr = value;
+    android_memory_barrier();
+}
+
+extern inline void android_atomic_release_store(int32_t value,
+                                                volatile int32_t *ptr) {
+    android_compiler_barrier();
+    *ptr = value;
+}
+
+extern inline int android_atomic_cas(int32_t old_value, int32_t new_value,
+                                     volatile int32_t *ptr)
+{
+    int32_t prev;
+    __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+                          : "=a" (prev)
+                          : "q" (new_value), "m" (*ptr), "0" (old_value)
+                          : "memory");
+    return prev != old_value;
+}
+
+extern inline int android_atomic_acquire_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    /* Loads are not reordered with other loads. */
+    return android_atomic_cas(old_value, new_value, ptr);
+}
+
+extern inline int android_atomic_release_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    /* Stores are not reordered with other stores. */
+    return android_atomic_cas(old_value, new_value, ptr);
+}
+
+extern inline int32_t android_atomic_swap(int32_t new_value,
+                                          volatile int32_t *ptr)
+{
+    __asm__ __volatile__ ("xchgl %1, %0"
+                          : "=r" (new_value)
+                          : "m" (*ptr), "0" (new_value)
+                          : "memory");
+    /* new_value now holds the old value of *ptr */
+    return new_value;
+}
+
+extern inline int32_t android_atomic_add(int32_t increment,
+                                         volatile int32_t *ptr)
+{
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+extern inline int32_t android_atomic_inc(volatile int32_t *addr) {
+    return android_atomic_add(1, addr);
+}
+
+extern inline int32_t android_atomic_dec(volatile int32_t *addr) {
+    return android_atomic_add(-1, addr);
+}
+
+extern inline int32_t android_atomic_and(int32_t value,
+                                         volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev & value, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+
+extern inline int32_t android_atomic_or(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev | value, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+
+#endif /* ANDROID_CUTILS_ATOMIC_X86_H */
diff --git a/include/cutils/atomic.h b/include/cutils/atomic.h
index 0200709..3866848 100644
--- a/include/cutils/atomic.h
+++ b/include/cutils/atomic.h
@@ -90,12 +90,11 @@
 void android_atomic_release_store(int32_t value, volatile int32_t* addr);
 
 /*
- * Unconditional swap operation with "acquire" or "release" ordering.
+ * Unconditional swap operation with release ordering.
  *
  * Stores the new value at *addr, and returns the previous value.
  */
-int32_t android_atomic_acquire_swap(int32_t value, volatile int32_t* addr);
-int32_t android_atomic_release_swap(int32_t value, volatile int32_t* addr);
+int32_t android_atomic_swap(int32_t value, volatile int32_t* addr);
 
 /*
  * Compare-and-set operation with "acquire" or "release" ordering.