Merge "Revert "Lose the hand-written futex assembler.""
diff --git a/libc/arch-arm/bionic/__bionic_clone.S b/libc/arch-arm/bionic/__bionic_clone.S
index 7cc4db5..2643ae0 100644
--- a/libc/arch-arm/bionic/__bionic_clone.S
+++ b/libc/arch-arm/bionic/__bionic_clone.S
@@ -54,19 +54,14 @@
 
     # In the parent, reload saved registers then either return or set errno.
     ldmfd   sp!, {r4, r5, r6, r7}
-    .cfi_def_cfa_offset 0
     cmn     r0, #(MAX_ERRNO + 1)
     bxls    lr
     neg     r0, r0
     b       __set_errno
 
 1:  # The child.
-    # Re-add the unwind directives that were reset from above.
-    .cfi_def_cfa_offset 16
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 4
-    .cfi_rel_offset r6, 8
-    .cfi_rel_offset r7, 12
+    # Setting lr to 0 will make the unwinder stop at __bionic_clone_entry
+    mov    lr, #0
     ldr    r0, [sp, #-4]
     ldr    r1, [sp, #-8]
     b      __bionic_clone_entry
diff --git a/libc/arch-arm64/bionic/__bionic_clone.S b/libc/arch-arm64/bionic/__bionic_clone.S
index c49782c..d3c0374 100644
--- a/libc/arch-arm64/bionic/__bionic_clone.S
+++ b/libc/arch-arm64/bionic/__bionic_clone.S
@@ -61,9 +61,9 @@
 
 .L_bc_child:
     # We're in the child now. Set the end of the frame record chain...
-    .cfi_undefined x29
-    .cfi_undefined x30
     mov     x29, xzr
+    # Setting x30 to 0 will make the unwinder stop at __bionic_clone_entry
+    mov     x30, xzr
     # ...and call __bionic_clone_entry with the 'fn' and 'arg' we stored on the child stack.
     ldp     x0, x1, [sp, #-16]
     b       __bionic_clone_entry
diff --git a/libc/arch-mips/bionic/__bionic_clone.S b/libc/arch-mips/bionic/__bionic_clone.S
index 11d596b..7b138ae 100644
--- a/libc/arch-mips/bionic/__bionic_clone.S
+++ b/libc/arch-mips/bionic/__bionic_clone.S
@@ -54,6 +54,9 @@
         j ra
 
 .L__thread_start_bc:
+        # Clear return address in child so we don't unwind further.
+        li      ra,0
+
         lw	a0,0(sp)	#  fn
         lw	a1,4(sp)	#  arg
 
diff --git a/libc/arch-mips64/bionic/__bionic_clone.S b/libc/arch-mips64/bionic/__bionic_clone.S
index 754861b..8687906 100644
--- a/libc/arch-mips64/bionic/__bionic_clone.S
+++ b/libc/arch-mips64/bionic/__bionic_clone.S
@@ -75,6 +75,9 @@
 	j	ra
 
 .L__thread_start_bc:
+	# Clear return address in child so we don't unwind further.
+	li	ra,0
+
 	# void __bionic_clone_entry(int (*func)(void*), void *arg)
 	PTR_L	a0,FRAME_FN(sp)		#  fn
 	PTR_L	a1,FRAME_ARG(sp)	#  arg
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index 295418b..4900a8a 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -31,10 +31,12 @@
 
 #include <ctype.h>
 #include <inttypes.h>
+#include <linux/futex.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/resource.h>
+#include <sys/syscall.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -199,4 +201,30 @@
   return vdprintf(fd, fmt, ap);
 }
 
+static inline int __futex(volatile void* ftx, int op, int value, const struct timespec* timeout) {
+  // Our generated syscall assembler sets errno, but our callers (pthread functions) don't want to.
+  int saved_errno = errno;
+  if (syscall(__NR_futex, ftx, op, value, timeout) == 0) {
+    return 0;
+  }
+  int result = -errno;
+  errno = saved_errno;
+  return result;
+}
+
+// This used to be in <sys/atomics.h>.
+extern "C" int __futex_wake(volatile void* ftx, int count) {
+  return __futex(ftx, FUTEX_WAKE, count, NULL);
+}
+
+// This used to be in <sys/atomics.h>.
+extern "C" int __futex_wait(volatile void* ftx, int value, const struct timespec* timeout) {
+  return __futex(ftx, FUTEX_WAIT, value, timeout);
+}
+
+// Unity's libmono uses this.
+extern "C" int tkill(pid_t tid, int sig) {
+  return syscall(__NR_tkill, tid, sig);
+}
+
 #endif
diff --git a/libc/bionic/pthread_rwlock.cpp b/libc/bionic/pthread_rwlock.cpp
index 3a1b543..063137b 100644
--- a/libc/bionic/pthread_rwlock.cpp
+++ b/libc/bionic/pthread_rwlock.cpp
@@ -27,7 +27,6 @@
  */
 
 #include <errno.h>
-#include <sys/atomics.h>
 
 #include "pthread_internal.h"
 #include "private/bionic_futex.h"
@@ -53,7 +52,7 @@
  *    write" cases and will deadlock in write after read case.
  *
  * TODO: VERY CAREFULLY convert this to use C++11 atomics when possible. All volatile
- * members of pthread_rwlock_t should be converted to atomics<> and __atomic_cmpxchg
+ * members of pthread_rwlock_t should be converted to atomics<> and __sync_bool_compare_and_swap
  * should be changed to compare_exchange_strong accompanied by the proper ordering
  * constraints (comments have been added with the intending ordering across the code).
  *
@@ -147,17 +146,17 @@
     int32_t cur_state = rwlock->state;  // C++11 relaxed atomic read
     if (__predict_true(cur_state >= 0)) {
       // Add as an extra reader.
-      done = __atomic_cmpxchg(cur_state, cur_state + 1, &rwlock->state) == 0;  // C++11 memory_order_aquire
+      done = __sync_bool_compare_and_swap(&rwlock->state, cur_state, cur_state + 1);  // C++11 memory_order_aquire
     } else {
       if (!timespec_from_absolute(rel_timeout, abs_timeout)) {
         return ETIMEDOUT;
       }
       // Owner holds it in write mode, hang up.
       // To avoid losing wake ups the pending_readers update and the state read should be
-      // sequentially consistent. (currently enforced by __atomic_inc which creates a full barrier)
-      __atomic_inc(&rwlock->pending_readers);  // C++11 memory_order_relaxed (if the futex_wait ensures the ordering)
+      // sequentially consistent. (currently enforced by __sync_fetch_and_add which creates a full barrier)
+      __sync_fetch_and_add(&rwlock->pending_readers, 1);  // C++11 memory_order_relaxed (if the futex_wait ensures the ordering)
       int ret = __futex_wait_ex(&rwlock->state, rwlock_is_shared(rwlock), cur_state, rel_timeout);
-      __atomic_dec(&rwlock->pending_readers);  // C++11 memory_order_relaxed
+      __sync_fetch_and_sub(&rwlock->pending_readers, 1);  // C++11 memory_order_relaxed
       if (ret == -ETIMEDOUT) {
         return ETIMEDOUT;
       }
@@ -180,17 +179,17 @@
     int32_t cur_state = rwlock->state;
     if (__predict_true(cur_state == 0)) {
       // Change state from 0 to -1.
-      done =  __atomic_cmpxchg(0 /* cur_state */, -1 /* new state */, &rwlock->state) == 0;  // C++11 memory_order_aquire
+      done =  __sync_bool_compare_and_swap(&rwlock->state, 0 /* cur state */, -1 /* new state */);  // C++11 memory_order_aquire
     } else {
       if (!timespec_from_absolute(rel_timeout, abs_timeout)) {
         return ETIMEDOUT;
       }
       // Failed to acquire, hang up.
       // To avoid losing wake ups the pending_writers update and the state read should be
-      // sequentially consistent. (currently enforced by __atomic_inc which creates a full barrier)
-      __atomic_inc(&rwlock->pending_writers);  // C++11 memory_order_relaxed (if the futex_wait ensures the ordering)
+      // sequentially consistent. (currently enforced by __sync_fetch_and_add which creates a full barrier)
+      __sync_fetch_and_add(&rwlock->pending_writers, 1);  // C++11 memory_order_relaxed (if the futex_wait ensures the ordering)
       int ret = __futex_wait_ex(&rwlock->state, rwlock_is_shared(rwlock), cur_state, rel_timeout);
-      __atomic_dec(&rwlock->pending_writers);  // C++11 memory_order_relaxed
+      __sync_fetch_and_sub(&rwlock->pending_writers, 1);  // C++11 memory_order_relaxed
       if (ret == -ETIMEDOUT) {
         return ETIMEDOUT;
       }
@@ -211,14 +210,11 @@
 
 int pthread_rwlock_tryrdlock(pthread_rwlock_t* rwlock) {
   int32_t cur_state = rwlock->state;
-  if (cur_state >= 0) {
-    if(__atomic_cmpxchg(cur_state, cur_state + 1, &rwlock->state) != 0) {  // C++11 memory_order_acquire
-      return EBUSY;
-    }
-  } else {
-    return EBUSY;
+  if ((cur_state >= 0) &&
+      __sync_bool_compare_and_swap(&rwlock->state, cur_state, cur_state + 1)) {  // C++11 memory_order_acquire
+    return 0;
   }
-  return 0;
+  return EBUSY;
 }
 
 int pthread_rwlock_wrlock(pthread_rwlock_t* rwlock) {
@@ -232,16 +228,12 @@
 int pthread_rwlock_trywrlock(pthread_rwlock_t* rwlock) {
   int tid = __get_thread()->tid;
   int32_t cur_state = rwlock->state;
-  if (cur_state == 0) {
-    if(__atomic_cmpxchg(0, -1, &rwlock->state) != 0) {  // C++11 memory_order_acquire
-      return EBUSY;
-    }
-  } else {
-    return EBUSY;
+  if ((cur_state == 0) &&
+      __sync_bool_compare_and_swap(&rwlock->state, 0 /* cur state */, -1 /* new state */)) {  // C++11 memory_order_acquire
+    rwlock->writer_thread_id = tid;
+    return 0;
   }
-
-  rwlock->writer_thread_id = tid;
-  return 0;
+  return EBUSY;
 }
 
 
@@ -260,11 +252,11 @@
       // We're no longer the owner.
       rwlock->writer_thread_id = 0;
       // Change state from -1 to 0.
-      // We use __atomic_cmpxchg to achieve sequential consistency of the state store and
+      // We use __sync_bool_compare_and_swap to achieve sequential consistency of the state store and
       // the following pendingX loads. A simple store with memory_order_release semantics
       // is not enough to guarantee that the pendingX loads are not reordered before the
       // store (which may lead to a lost wakeup).
-      __atomic_cmpxchg(-1 /* cur_state*/, 0 /* new state */, &rwlock->state);  // C++11 maybe memory_order_seq_cst?
+      __sync_bool_compare_and_swap( &rwlock->state, -1 /* cur state*/, 0 /* new state */);  // C++11 maybe memory_order_seq_cst?
 
       // Wake any waiters.
       if (__predict_false(rwlock->pending_readers > 0 || rwlock->pending_writers > 0)) {
@@ -273,8 +265,8 @@
       done = true;
     } else { // cur_state > 0
       // Reduce state by 1.
-      // See the above comment on why we need __atomic_cmpxchg.
-      done = __atomic_cmpxchg(cur_state, cur_state - 1, &rwlock->state) == 0;  // C++11 maybe memory_order_seq_cst?
+      // See the comment above on why we need __sync_bool_compare_and_swap.
+      done = __sync_bool_compare_and_swap(&rwlock->state, cur_state, cur_state - 1);  // C++11 maybe memory_order_seq_cst?
       if (done && (cur_state - 1) == 0) {
         // There are no more readers, wake any waiters.
         if (__predict_false(rwlock->pending_readers > 0 || rwlock->pending_writers > 0)) {
diff --git a/libc/include/stdatomic.h b/libc/include/stdatomic.h
new file mode 100644
index 0000000..43ec753
--- /dev/null
+++ b/libc/include/stdatomic.h
@@ -0,0 +1,420 @@
+/*-
+ * Copyright (c) 2011 Ed Schouten <ed@FreeBSD.org>
+ *                    David Chisnall <theraven@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _STDATOMIC_H_
+#define	_STDATOMIC_H_
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <stdbool.h>
+
+#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
+#define	__CLANG_ATOMICS
+#elif __GNUC_PREREQ__(4, 7)
+#define	__GNUC_ATOMICS
+#elif defined(__GNUC__)
+#define	__SYNC_ATOMICS
+#else
+#error "stdatomic.h does not support your compiler"
+#endif
+
+/*
+ * 7.17.1 Atomic lock-free macros.
+ */
+
+#ifdef __GCC_ATOMIC_BOOL_LOCK_FREE
+#define	ATOMIC_BOOL_LOCK_FREE		__GCC_ATOMIC_BOOL_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR_LOCK_FREE
+#define	ATOMIC_CHAR_LOCK_FREE		__GCC_ATOMIC_CHAR_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#define	ATOMIC_CHAR16_T_LOCK_FREE	__GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#define	ATOMIC_CHAR32_T_LOCK_FREE	__GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#define	ATOMIC_WCHAR_T_LOCK_FREE	__GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_SHORT_LOCK_FREE
+#define	ATOMIC_SHORT_LOCK_FREE		__GCC_ATOMIC_SHORT_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_INT_LOCK_FREE
+#define	ATOMIC_INT_LOCK_FREE		__GCC_ATOMIC_INT_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_LONG_LOCK_FREE
+#define	ATOMIC_LONG_LOCK_FREE		__GCC_ATOMIC_LONG_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_LLONG_LOCK_FREE
+#define	ATOMIC_LLONG_LOCK_FREE		__GCC_ATOMIC_LLONG_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_POINTER_LOCK_FREE
+#define	ATOMIC_POINTER_LOCK_FREE	__GCC_ATOMIC_POINTER_LOCK_FREE
+#endif
+
+/*
+ * 7.17.2 Initialization.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	ATOMIC_VAR_INIT(value)		(value)
+#define	atomic_init(obj, value)		__c11_atomic_init(obj, value)
+#else
+#define	ATOMIC_VAR_INIT(value)		{ .__val = (value) }
+#define	atomic_init(obj, value)		((void)((obj)->__val = (value)))
+#endif
+
+/*
+ * Clang and recent GCC both provide predefined macros for the memory
+ * orderings.  If we are using a compiler that doesn't define them, use the
+ * clang values - these will be ignored in the fallback path.
+ */
+
+#ifndef __ATOMIC_RELAXED
+#define __ATOMIC_RELAXED		0
+#endif
+#ifndef __ATOMIC_CONSUME
+#define __ATOMIC_CONSUME		1
+#endif
+#ifndef __ATOMIC_ACQUIRE
+#define __ATOMIC_ACQUIRE		2
+#endif
+#ifndef __ATOMIC_RELEASE
+#define __ATOMIC_RELEASE		3
+#endif
+#ifndef __ATOMIC_ACQ_REL
+#define __ATOMIC_ACQ_REL		4
+#endif
+#ifndef __ATOMIC_SEQ_CST
+#define __ATOMIC_SEQ_CST		5
+#endif
+
+/*
+ * 7.17.3 Order and consistency.
+ *
+ * The memory_order_* constants that denote the barrier behaviour of the
+ * atomic operations.
+ */
+
+typedef enum {
+	memory_order_relaxed = __ATOMIC_RELAXED,
+	memory_order_consume = __ATOMIC_CONSUME,
+	memory_order_acquire = __ATOMIC_ACQUIRE,
+	memory_order_release = __ATOMIC_RELEASE,
+	memory_order_acq_rel = __ATOMIC_ACQ_REL,
+	memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+/*
+ * 7.17.4 Fences.
+ */
+
+static __inline void
+atomic_thread_fence(memory_order __order __unused)
+{
+
+#ifdef __CLANG_ATOMICS
+	__c11_atomic_thread_fence(__order);
+#elif defined(__GNUC_ATOMICS)
+	__atomic_thread_fence(__order);
+#else
+	__sync_synchronize();
+#endif
+}
+
+static __inline void
+atomic_signal_fence(memory_order __order __unused)
+{
+
+#ifdef __CLANG_ATOMICS
+	__c11_atomic_signal_fence(__order);
+#elif defined(__GNUC_ATOMICS)
+	__atomic_signal_fence(__order);
+#else
+	__asm volatile ("" ::: "memory");
+#endif
+}
+
+/*
+ * 7.17.5 Lock-free property.
+ */
+
+#if defined(_KERNEL)
+/* Atomics in kernelspace are always lock-free. */
+#define	atomic_is_lock_free(obj) \
+	((void)(obj), (_Bool)1)
+#elif defined(__CLANG_ATOMICS)
+#define	atomic_is_lock_free(obj) \
+	__atomic_is_lock_free(sizeof(*(obj)), obj)
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_is_lock_free(obj) \
+	__atomic_is_lock_free(sizeof((obj)->__val), &(obj)->__val)
+#else
+#define	atomic_is_lock_free(obj) \
+	((void)(obj), sizeof((obj)->__val) <= sizeof(void *))
+#endif
+
+/*
+ * 7.17.6 Atomic integer types.
+ */
+
+#if !__has_extension(c_atomic) && !__has_extension(cxx_atomic)
+/*
+ * No native support for _Atomic(). Place object in structure to prevent
+ * most forms of direct non-atomic access.
+ */
+#define _Atomic(T)              struct { T volatile __val; }
+#endif
+
+typedef _Atomic(bool)			atomic_bool;
+typedef _Atomic(char)			atomic_char;
+typedef _Atomic(signed char)		atomic_schar;
+typedef _Atomic(unsigned char)		atomic_uchar;
+typedef _Atomic(short)			atomic_short;
+typedef _Atomic(unsigned short)		atomic_ushort;
+typedef _Atomic(int)			atomic_int;
+typedef _Atomic(unsigned int)		atomic_uint;
+typedef _Atomic(long)			atomic_long;
+typedef _Atomic(unsigned long)		atomic_ulong;
+typedef _Atomic(long long)		atomic_llong;
+typedef _Atomic(unsigned long long)	atomic_ullong;
+typedef _Atomic(char16_t)		atomic_char16_t;
+typedef _Atomic(char32_t)		atomic_char32_t;
+typedef _Atomic(wchar_t)		atomic_wchar_t;
+typedef _Atomic(int_least8_t)		atomic_int_least8_t;
+typedef _Atomic(uint_least8_t)	atomic_uint_least8_t;
+typedef _Atomic(int_least16_t)	atomic_int_least16_t;
+typedef _Atomic(uint_least16_t)	atomic_uint_least16_t;
+typedef _Atomic(int_least32_t)	atomic_int_least32_t;
+typedef _Atomic(uint_least32_t)	atomic_uint_least32_t;
+typedef _Atomic(int_least64_t)	atomic_int_least64_t;
+typedef _Atomic(uint_least64_t)	atomic_uint_least64_t;
+typedef _Atomic(int_fast8_t)		atomic_int_fast8_t;
+typedef _Atomic(uint_fast8_t)		atomic_uint_fast8_t;
+typedef _Atomic(int_fast16_t)		atomic_int_fast16_t;
+typedef _Atomic(uint_fast16_t)	atomic_uint_fast16_t;
+typedef _Atomic(int_fast32_t)		atomic_int_fast32_t;
+typedef _Atomic(uint_fast32_t)	atomic_uint_fast32_t;
+typedef _Atomic(int_fast64_t)		atomic_int_fast64_t;
+typedef _Atomic(uint_fast64_t)	atomic_uint_fast64_t;
+typedef _Atomic(intptr_t)		atomic_intptr_t;
+typedef _Atomic(uintptr_t)		atomic_uintptr_t;
+typedef _Atomic(size_t)		atomic_size_t;
+typedef _Atomic(ptrdiff_t)		atomic_ptrdiff_t;
+typedef _Atomic(intmax_t)		atomic_intmax_t;
+typedef _Atomic(uintmax_t)		atomic_uintmax_t;
+
+/*
+ * 7.17.7 Operations on atomic types.
+ */
+
+/*
+ * Compiler-specific operations.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)						\
+	__c11_atomic_compare_exchange_strong(object, expected, desired,	\
+	    success, failure)
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	__c11_atomic_compare_exchange_weak(object, expected, desired,	\
+	    success, failure)
+#define	atomic_exchange_explicit(object, desired, order)		\
+	__c11_atomic_exchange(object, desired, order)
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__c11_atomic_fetch_add(object, operand, order)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__c11_atomic_fetch_and(object, operand, order)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__c11_atomic_fetch_or(object, operand, order)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__c11_atomic_fetch_sub(object, operand, order)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__c11_atomic_fetch_xor(object, operand, order)
+#define	atomic_load_explicit(object, order)				\
+	__c11_atomic_load(object, order)
+#define	atomic_store_explicit(object, desired, order)			\
+	__c11_atomic_store(object, desired, order)
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)						\
+	__atomic_compare_exchange_n(&(object)->__val, expected,		\
+	    desired, 0, success, failure)
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	__atomic_compare_exchange_n(&(object)->__val, expected,		\
+	    desired, 1, success, failure)
+#define	atomic_exchange_explicit(object, desired, order)		\
+	__atomic_exchange_n(&(object)->__val, desired, order)
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__atomic_fetch_add(&(object)->__val, operand, order)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__atomic_fetch_and(&(object)->__val, operand, order)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__atomic_fetch_or(&(object)->__val, operand, order)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__atomic_fetch_sub(&(object)->__val, operand, order)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__atomic_fetch_xor(&(object)->__val, operand, order)
+#define	atomic_load_explicit(object, order)				\
+	__atomic_load_n(&(object)->__val, order)
+#define	atomic_store_explicit(object, desired, order)			\
+	__atomic_store_n(&(object)->__val, desired, order)
+#else
+#define	__atomic_apply_stride(object, operand) \
+	(((__typeof__((object)->__val))0) + (operand))
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)	__extension__ ({			\
+	__typeof__(expected) __ep = (expected);				\
+	__typeof__(*__ep) __e = *__ep;					\
+	(void)(success); (void)(failure);				\
+	(bool)((*__ep = __sync_val_compare_and_swap(&(object)->__val,	\
+	    __e, desired)) == __e);					\
+})
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	atomic_compare_exchange_strong_explicit(object, expected,	\
+		desired, success, failure)
+#if __has_builtin(__sync_swap)
+/* Clang provides a full-barrier atomic exchange - use it if available. */
+#define	atomic_exchange_explicit(object, desired, order)		\
+	((void)(order), __sync_swap(&(object)->__val, desired))
+#else
+/*
+ * __sync_lock_test_and_set() is only an acquire barrier in theory (although in
+ * practice it is usually a full barrier) so we need an explicit barrier before
+ * it.
+ */
+#define	atomic_exchange_explicit(object, desired, order)		\
+__extension__ ({							\
+	__typeof__(object) __o = (object);				\
+	__typeof__(desired) __d = (desired);				\
+	(void)(order);							\
+	__sync_synchronize();						\
+	__sync_lock_test_and_set(&(__o)->__val, __d);			\
+})
+#endif
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_add(&(object)->__val,		\
+	    __atomic_apply_stride(object, operand)))
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_and(&(object)->__val, operand))
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_or(&(object)->__val, operand))
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_sub(&(object)->__val,		\
+	    __atomic_apply_stride(object, operand)))
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_xor(&(object)->__val, operand))
+#define	atomic_load_explicit(object, order)				\
+	((void)(order), __sync_fetch_and_add(&(object)->__val, 0))
+#define	atomic_store_explicit(object, desired, order)			\
+	((void)atomic_exchange_explicit(object, desired, order))
+#endif
+
+/*
+ * Convenience functions.
+ *
+ * Don't provide these in kernel space. In kernel space, we should be
+ * disciplined enough to always provide explicit barriers.
+ */
+
+#ifndef _KERNEL
+#define	atomic_compare_exchange_strong(object, expected, desired)	\
+	atomic_compare_exchange_strong_explicit(object, expected,	\
+	    desired, memory_order_seq_cst, memory_order_seq_cst)
+#define	atomic_compare_exchange_weak(object, expected, desired)		\
+	atomic_compare_exchange_weak_explicit(object, expected,		\
+	    desired, memory_order_seq_cst, memory_order_seq_cst)
+#define	atomic_exchange(object, desired)				\
+	atomic_exchange_explicit(object, desired, memory_order_seq_cst)
+#define	atomic_fetch_add(object, operand)				\
+	atomic_fetch_add_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_and(object, operand)				\
+	atomic_fetch_and_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_or(object, operand)				\
+	atomic_fetch_or_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_sub(object, operand)				\
+	atomic_fetch_sub_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_xor(object, operand)				\
+	atomic_fetch_xor_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_load(object)						\
+	atomic_load_explicit(object, memory_order_seq_cst)
+#define	atomic_store(object, desired)					\
+	atomic_store_explicit(object, desired, memory_order_seq_cst)
+#endif /* !_KERNEL */
+
+/*
+ * 7.17.8 Atomic flag type and operations.
+ *
+ * XXX: Assume atomic_bool can be used as an atomic_flag. Is there some
+ * kind of compiler built-in type we could use?
+ */
+
+typedef struct {
+	atomic_bool	__flag;
+} atomic_flag;
+
+#define	ATOMIC_FLAG_INIT		{ ATOMIC_VAR_INIT(0) }
+
+static __inline bool
+atomic_flag_test_and_set_explicit(volatile atomic_flag *__object,
+    memory_order __order)
+{
+	return (atomic_exchange_explicit(&__object->__flag, 1, __order));
+}
+
+static __inline void
+atomic_flag_clear_explicit(volatile atomic_flag *__object, memory_order __order)
+{
+
+	atomic_store_explicit(&__object->__flag, 0, __order);
+}
+
+#ifndef _KERNEL
+static __inline bool
+atomic_flag_test_and_set(volatile atomic_flag *__object)
+{
+
+	return (atomic_flag_test_and_set_explicit(__object,
+	    memory_order_seq_cst));
+}
+
+static __inline void
+atomic_flag_clear(volatile atomic_flag *__object)
+{
+
+	atomic_flag_clear_explicit(__object, memory_order_seq_cst);
+}
+#endif /* !_KERNEL */
+
+#endif /* !_STDATOMIC_H_ */
diff --git a/libc/include/sys/atomics.h b/libc/include/sys/atomics.h
deleted file mode 100644
index dfb5d5e..0000000
--- a/libc/include/sys/atomics.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#ifndef _SYS_ATOMICS_H
-#define _SYS_ATOMICS_H
-
-#include <sys/cdefs.h>
-#include <sys/time.h>
-
-__BEGIN_DECLS
-
-/* Note: atomic operations that were exported by the C library didn't
- *       provide any memory barriers, which created potential issues on
- *       multi-core devices. We now define them as inlined calls to
- *       GCC sync builtins, which always provide a full barrier.
- *
- *       NOTE: The C library still exports atomic functions by the same
- *              name to ensure ABI stability for existing NDK machine code.
- *
- *       If you are an NDK developer, we encourage you to rebuild your
- *       unmodified sources against this header as soon as possible.
- */
-#define __ATOMIC_INLINE__ static __inline__ __attribute__((always_inline))
-
-__ATOMIC_INLINE__ int
-__atomic_cmpxchg(int old_value, int new_value, volatile int* ptr)
-{
-    /* We must return 0 on success */
-    return __sync_val_compare_and_swap(ptr, old_value, new_value) != old_value;
-}
-
-__ATOMIC_INLINE__ int
-__atomic_swap(int new_value, volatile int *ptr)
-{
-    int old_value;
-    do {
-        old_value = *ptr;
-    } while (__sync_val_compare_and_swap(ptr, old_value, new_value) != old_value);
-    return old_value;
-}
-
-__ATOMIC_INLINE__ int
-__atomic_dec(volatile int *ptr)
-{
-  return __sync_fetch_and_sub (ptr, 1);
-}
-
-__ATOMIC_INLINE__ int
-__atomic_inc(volatile int *ptr)
-{
-  return __sync_fetch_and_add (ptr, 1);
-}
-
-__END_DECLS
-
-#endif /* _SYS_ATOMICS_H */
diff --git a/libc/include/sys/cdefs.h b/libc/include/sys/cdefs.h
index 685de32..0286616 100644
--- a/libc/include/sys/cdefs.h
+++ b/libc/include/sys/cdefs.h
@@ -38,6 +38,24 @@
 #define	_SYS_CDEFS_H_
 
 /*
+ * Testing against Clang-specific extensions.
+ */
+
+#ifndef __has_extension
+#define __has_extension         __has_feature
+#endif
+#ifndef __has_feature
+#define __has_feature(x)        0
+#endif
+#ifndef __has_include
+#define __has_include(x)        0
+#endif
+#ifndef __has_builtin
+#define __has_builtin(x)        0
+#endif
+
+
+/*
  * Macro to test if we're using a GNU C compiler of a specific vintage
  * or later, for e.g. features that appeared in a particular version
  * of GNU C.  Usage:
diff --git a/libc/include/sys/ucontext.h b/libc/include/sys/ucontext.h
index d6c6b43..f62380d 100644
--- a/libc/include/sys/ucontext.h
+++ b/libc/include/sys/ucontext.h
@@ -68,8 +68,13 @@
   struct ucontext* uc_link;
   stack_t uc_stack;
   mcontext_t uc_mcontext;
-  sigset_t uc_sigmask;
-  char __padding[128 - sizeof(sigset_t)];
+  // Android has a wrong (smaller) sigset_t on ARM.
+  union {
+    sigset_t bionic;
+    uint32_t kernel[2];
+  } uc_sigmask;
+  // The kernel adds extra padding after uc_sigmask to match glibc sigset_t on ARM.
+  char __padding[120];
   unsigned long uc_regspace[128] __attribute__((__aligned__(8)));
 } ucontext_t;
 
@@ -83,6 +88,7 @@
   struct ucontext *uc_link;
   stack_t uc_stack;
   sigset_t uc_sigmask;
+  // The kernel adds extra padding after uc_sigmask to match glibc sigset_t on ARM64.
   char __padding[128 - sizeof(sigset_t)];
   mcontext_t uc_mcontext;
 } ucontext_t;
@@ -146,8 +152,11 @@
   struct ucontext* uc_link;
   stack_t uc_stack;
   mcontext_t uc_mcontext;
-  sigset_t uc_sigmask;
-  char __padding[128 - sizeof(sigset_t)];
+  // Android has a wrong (smaller) sigset_t on x86.
+  union {
+    sigset_t bionic;
+    uint32_t kernel[2];
+  } uc_sigmask;
   struct _libc_fpstate __fpregs_mem;
 } ucontext_t;
 
@@ -198,7 +207,6 @@
   stack_t uc_stack;
   mcontext_t uc_mcontext;
   sigset_t uc_sigmask;
-  char __padding[128 - sizeof(sigset_t)];
 } ucontext_t;
 
 #elif defined(__mips64__)
@@ -275,7 +283,6 @@
   stack_t uc_stack;
   mcontext_t uc_mcontext;
   sigset_t uc_sigmask;
-  char __padding[128 - sizeof(sigset_t)];
   struct _libc_fpstate __fpregs_mem;
 } ucontext_t;
 
diff --git a/libc/include/unistd.h b/libc/include/unistd.h
index 6cb36d8..08a82e6 100644
--- a/libc/include/unistd.h
+++ b/libc/include/unistd.h
@@ -207,7 +207,7 @@
 
 /* Used to retry syscalls that can return EINTR. */
 #define TEMP_FAILURE_RETRY(exp) ({         \
-    typeof (exp) _rc;                      \
+    __typeof__(exp) _rc;                   \
     do {                                   \
         _rc = (exp);                       \
     } while (_rc == -1 && errno == EINTR); \
diff --git a/libc/stdio/glue.h b/libc/stdio/glue.h
index 73cef63..81f83fc 100644
--- a/libc/stdio/glue.h
+++ b/libc/stdio/glue.h
@@ -42,4 +42,8 @@
 	FILE	*iobs;
 };
 
+#pragma GCC visibility push(hidden)
+
 extern struct glue __sglue;
+
+#pragma GCC visibility pop
diff --git a/libc/stdio/local.h b/libc/stdio/local.h
index 7afa761..151e009 100644
--- a/libc/stdio/local.h
+++ b/libc/stdio/local.h
@@ -41,10 +41,33 @@
 #include "wcio.h"
 #include "fileext.h"
 
+#if defined(__LP64__)
+/*
+ * Android <= KitKat had getc/putc macros in <stdio.h> that referred
+ * to __srget/__swbuf, so those symbols need to be public for LP32
+ * but can be hidden for LP64.
+ */
+__LIBC_HIDDEN__ int __srget(FILE*);
+__LIBC_HIDDEN__ int __swbuf(int, FILE*);
+
+/*
+ * The NDK apparently includes an android_support.a library that
+ * refers to __srefill in its copy of the vsnprintf implementation.
+ */
+/* TODO(LP64): __LIBC_HIDDEN__ int __srefill(FILE*);*/
+/* http://b/15291317: the LP64 NDK needs to be fixed to remove that cruft. */
+__LIBC_ABI_PUBLIC__ int __srefill(FILE*);
+#else
+__LIBC_ABI_PUBLIC__ int __srget(FILE*);
+__LIBC_ABI_PUBLIC__ int __swbuf(int, FILE*);
+__LIBC_ABI_PUBLIC__ int __srefill(FILE*);
+#endif
+
+#pragma GCC visibility push(hidden)
+
 int	__sflush(FILE *);
 int	__sflush_locked(FILE *);
 FILE	*__sfp(void);
-int	__srefill(FILE *);
 int	__sread(void *, char *, int);
 int	__swrite(void *, const char *, int);
 fpos_t	__sseek(void *, fpos_t, int);
@@ -102,10 +125,8 @@
 #define NO_PRINTF_PERCENT_N
 
 /* OpenBSD exposes these in <stdio.h>, but we only want them exposed to the implementation. */
-__BEGIN_DECLS
 int __srget(FILE*);
 int __swbuf(int, FILE*);
-__END_DECLS
 #define __sfeof(p)     (((p)->_flags & __SEOF) != 0)
 #define __sferror(p)   (((p)->_flags & __SERR) != 0)
 #define __sclearerr(p) ((void)((p)->_flags &= ~(__SERR|__SEOF)))
@@ -118,3 +139,10 @@
     return (__swbuf(_c, _p));
   }
 }
+
+/* OpenBSD declares these in fvwrite.h but we want to ensure they're hidden. */
+struct __suio;
+extern int __sfvwrite(FILE *, struct __suio *);
+wint_t __fputwc_unlock(wchar_t wc, FILE *fp);
+
+#pragma GCC visibility pop
diff --git a/linker/linker.cpp b/linker/linker.cpp
index ca2601a..98cb178 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -840,9 +840,6 @@
   soinfo* si = find_library(name, flags, extinfo);
   if (si != NULL) {
     si->CallConstructors();
-    if (caller != NULL) {
-      caller->add_child(si);
-    }
   }
   protect_data(PROT_READ);
   return si;
diff --git a/tests/Android.mk b/tests/Android.mk
index 37aeec3..d753acc 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -87,6 +87,7 @@
     stack_protector_test.cpp \
     stack_unwinding_test.cpp \
     stack_unwinding_test_impl.c \
+    stdatomic_test.cpp \
     stdint_test.cpp \
     stdio_test.cpp \
     stdlib_test.cpp \
diff --git a/tests/stdatomic_test.cpp b/tests/stdatomic_test.cpp
new file mode 100644
index 0000000..44f5c7b
--- /dev/null
+++ b/tests/stdatomic_test.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#if !defined(__GLIBC__) /* TODO: fix our prebuilt toolchains! */
+
+#include <stdatomic.h>
+
+TEST(stdatomic, LOCK_FREE) {
+  ASSERT_TRUE(ATOMIC_BOOL_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_CHAR16_T_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_CHAR32_T_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_CHAR_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_INT_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_LLONG_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_LONG_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_POINTER_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_SHORT_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_WCHAR_T_LOCK_FREE);
+}
+
+TEST(stdatomic, init) {
+  atomic_int v = ATOMIC_VAR_INIT(123);
+  ASSERT_EQ(123, atomic_load(&v));
+
+  atomic_init(&v, 456);
+  ASSERT_EQ(456, atomic_load(&v));
+
+  atomic_flag f = ATOMIC_FLAG_INIT;
+  ASSERT_FALSE(atomic_flag_test_and_set(&f));
+}
+
+TEST(stdatomic, atomic_thread_fence) {
+  atomic_thread_fence(memory_order_relaxed);
+  atomic_thread_fence(memory_order_consume);
+  atomic_thread_fence(memory_order_acquire);
+  atomic_thread_fence(memory_order_release);
+  atomic_thread_fence(memory_order_acq_rel);
+  atomic_thread_fence(memory_order_seq_cst);
+}
+
+TEST(stdatomic, atomic_signal_fence) {
+  atomic_signal_fence(memory_order_relaxed);
+  atomic_signal_fence(memory_order_consume);
+  atomic_signal_fence(memory_order_acquire);
+  atomic_signal_fence(memory_order_release);
+  atomic_signal_fence(memory_order_acq_rel);
+  atomic_signal_fence(memory_order_seq_cst);
+}
+
+TEST(stdatomic, atomic_is_lock_free) {
+  atomic_char small;
+  atomic_intmax_t big;
+  ASSERT_TRUE(atomic_is_lock_free(&small));
+  ASSERT_TRUE(atomic_is_lock_free(&big));
+}
+
+TEST(stdatomic, atomic_flag) {
+  atomic_flag f = ATOMIC_FLAG_INIT;
+  ASSERT_FALSE(atomic_flag_test_and_set(&f));
+  ASSERT_TRUE(atomic_flag_test_and_set(&f));
+
+  atomic_flag_clear(&f);
+
+  ASSERT_FALSE(atomic_flag_test_and_set_explicit(&f, memory_order_relaxed));
+  ASSERT_TRUE(atomic_flag_test_and_set_explicit(&f, memory_order_relaxed));
+
+  atomic_flag_clear_explicit(&f, memory_order_relaxed);
+  ASSERT_FALSE(atomic_flag_test_and_set_explicit(&f, memory_order_relaxed));
+}
+
+TEST(stdatomic, atomic_store) {
+  atomic_int i;
+  atomic_store(&i, 123);
+  ASSERT_EQ(123, atomic_load(&i));
+  atomic_store_explicit(&i, 123, memory_order_relaxed);
+  ASSERT_EQ(123, atomic_load_explicit(&i, memory_order_relaxed));
+}
+
+TEST(stdatomic, atomic_exchange) {
+  atomic_int i;
+  atomic_store(&i, 123);
+  ASSERT_EQ(123, atomic_exchange(&i, 456));
+  ASSERT_EQ(456, atomic_exchange_explicit(&i, 123, memory_order_relaxed));
+}
+
+TEST(stdatomic, atomic_compare_exchange) {
+  atomic_int i;
+  atomic_int expected;
+
+  atomic_store(&i, 123);
+  atomic_store(&expected, 123);
+  ASSERT_TRUE(atomic_compare_exchange_strong(&i, &expected, 456));
+  ASSERT_FALSE(atomic_compare_exchange_strong(&i, &expected, 456));
+  ASSERT_EQ(456, atomic_load(&expected));
+
+  atomic_store(&i, 123);
+  atomic_store(&expected, 123);
+  ASSERT_TRUE(atomic_compare_exchange_strong_explicit(&i, &expected, 456, memory_order_relaxed, memory_order_relaxed));
+  ASSERT_FALSE(atomic_compare_exchange_strong_explicit(&i, &expected, 456, memory_order_relaxed, memory_order_relaxed));
+  ASSERT_EQ(456, atomic_load(&expected));
+
+  atomic_store(&i, 123);
+  atomic_store(&expected, 123);
+  ASSERT_TRUE(atomic_compare_exchange_weak(&i, &expected, 456));
+  ASSERT_FALSE(atomic_compare_exchange_weak(&i, &expected, 456));
+  ASSERT_EQ(456, atomic_load(&expected));
+
+  atomic_store(&i, 123);
+  atomic_store(&expected, 123);
+  ASSERT_TRUE(atomic_compare_exchange_weak_explicit(&i, &expected, 456, memory_order_relaxed, memory_order_relaxed));
+  ASSERT_FALSE(atomic_compare_exchange_weak_explicit(&i, &expected, 456, memory_order_relaxed, memory_order_relaxed));
+  ASSERT_EQ(456, atomic_load(&expected));
+}
+
+TEST(stdatomic, atomic_fetch_add) {
+  atomic_int i = ATOMIC_VAR_INIT(123);
+  ASSERT_EQ(123, atomic_fetch_add(&i, 1));
+  ASSERT_EQ(124, atomic_fetch_add_explicit(&i, 1, memory_order_relaxed));
+  ASSERT_EQ(125, atomic_load(&i));
+}
+
+TEST(stdatomic, atomic_fetch_sub) {
+  atomic_int i = ATOMIC_VAR_INIT(123);
+  ASSERT_EQ(123, atomic_fetch_sub(&i, 1));
+  ASSERT_EQ(122, atomic_fetch_sub_explicit(&i, 1, memory_order_relaxed));
+  ASSERT_EQ(121, atomic_load(&i));
+}
+
+TEST(stdatomic, atomic_fetch_or) {
+  atomic_int i = ATOMIC_VAR_INIT(0x100);
+  ASSERT_EQ(0x100, atomic_fetch_or(&i, 0x020));
+  ASSERT_EQ(0x120, atomic_fetch_or_explicit(&i, 0x003, memory_order_relaxed));
+  ASSERT_EQ(0x123, atomic_load(&i));
+}
+
+TEST(stdatomic, atomic_fetch_xor) {
+  atomic_int i = ATOMIC_VAR_INIT(0x100);
+  ASSERT_EQ(0x100, atomic_fetch_xor(&i, 0x120));
+  ASSERT_EQ(0x020, atomic_fetch_xor_explicit(&i, 0x103, memory_order_relaxed));
+  ASSERT_EQ(0x123, atomic_load(&i));
+}
+
+TEST(stdatomic, atomic_fetch_and) {
+  atomic_int i = ATOMIC_VAR_INIT(0x123);
+  ASSERT_EQ(0x123, atomic_fetch_and(&i, 0x00f));
+  ASSERT_EQ(0x003, atomic_fetch_and_explicit(&i, 0x2, memory_order_relaxed));
+  ASSERT_EQ(0x002, atomic_load(&i));
+}
+
+#endif