Merge "Add optimized AArch64 versions of bcopy and wmemmove based on memmove"
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 785d191..60cf980 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -305,6 +305,8 @@
 void _exit|_Exit:exit_group(int)  all
 void __exit:exit(int)  all
 
+int futex(void*, int, int, void*, void*, int)  all
+
 int inotify_init1(int)  all
 int inotify_add_watch(int, const char*, unsigned int)  all
 int inotify_rm_watch(int, unsigned int)  all
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk
index 7c423ab..7e4729f 100644
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -54,6 +54,7 @@
     arch-arm/bionic/__bionic_clone.S \
     arch-arm/bionic/eabi.c \
     arch-arm/bionic/_exit_with_stack_teardown.S \
+    arch-arm/bionic/futex_arm.S \
     arch-arm/bionic/__get_sp.S \
     arch-arm/bionic/libgcc_compat.c \
     arch-arm/bionic/memcmp16.S \
diff --git a/libc/arch-arm/bionic/futex_arm.S b/libc/arch-arm/bionic/futex_arm.S
new file mode 100644
index 0000000..89a1e96
--- /dev/null
+++ b/libc/arch-arm/bionic/futex_arm.S
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+// int __futex_syscall4(volatile void* ftx, int op, int val, const struct timespec* timeout)
+ENTRY_PRIVATE(__futex_syscall4)
+    mov     ip, r7
+    ldr     r7, =__NR_futex
+    swi     #0
+    mov     r7, ip
+    bx      lr
+END(__futex_syscall4)
diff --git a/libc/arch-arm/syscalls/futex.S b/libc/arch-arm/syscalls/futex.S
new file mode 100644
index 0000000..1646ca2
--- /dev/null
+++ b/libc/arch-arm/syscalls/futex.S
@@ -0,0 +1,22 @@
+/* Generated by gensyscalls.py. Do not edit. */
+
+#include <private/bionic_asm.h>
+
+ENTRY(futex)
+    mov     ip, sp
+    stmfd   sp!, {r4, r5, r6, r7}
+    .cfi_def_cfa_offset 16
+    .cfi_rel_offset r4, 0
+    .cfi_rel_offset r5, 4
+    .cfi_rel_offset r6, 8
+    .cfi_rel_offset r7, 12
+    ldmfd   ip, {r4, r5, r6}
+    ldr     r7, =__NR_futex
+    swi     #0
+    ldmfd   sp!, {r4, r5, r6, r7}
+    .cfi_def_cfa_offset 0
+    cmn     r0, #(MAX_ERRNO + 1)
+    bxls    lr
+    neg     r0, r0
+    b       __set_errno
+END(futex)
diff --git a/libc/arch-arm64/arm64.mk b/libc/arch-arm64/arm64.mk
index cd8b6ea..ee7d581 100644
--- a/libc/arch-arm64/arm64.mk
+++ b/libc/arch-arm64/arm64.mk
@@ -36,6 +36,7 @@
     arch-arm64/bionic/__bionic_clone.S \
     arch-arm64/bionic/bzero_arm64.c \
     arch-arm64/bionic/_exit_with_stack_teardown.S \
+    arch-arm64/bionic/futex_arm64.S \
     arch-arm64/bionic/__get_sp.S \
     arch-arm64/bionic/__rt_sigreturn.S \
     arch-arm64/bionic/_setjmp.S \
diff --git a/libc/arch-arm64/bionic/futex_arm64.S b/libc/arch-arm64/bionic/futex_arm64.S
new file mode 100644
index 0000000..9d7465a
--- /dev/null
+++ b/libc/arch-arm64/bionic/futex_arm64.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+// int __futex_syscall4(volatile void* ftx, int op, int val, const struct timespec* timeout)
+ENTRY_PRIVATE(__futex_syscall4)
+  stp x29, x30, [sp, #-16]!
+  .cfi_def_cfa_offset 16
+  .cfi_rel_offset x29, 0
+  .cfi_rel_offset x30, 8
+  mov x29, sp
+
+  mov x8, __NR_futex
+  svc #0
+
+  ldp x29, x30, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore x29
+  .cfi_restore x30
+  ret
+END(__futex_syscall4)
diff --git a/libc/arch-arm64/syscalls/futex.S b/libc/arch-arm64/syscalls/futex.S
new file mode 100644
index 0000000..c14ebbf
--- /dev/null
+++ b/libc/arch-arm64/syscalls/futex.S
@@ -0,0 +1,25 @@
+/* Generated by gensyscalls.py. Do not edit. */
+
+#include <private/bionic_asm.h>
+
+ENTRY(futex)
+    stp     x29, x30, [sp, #-16]!
+    .cfi_def_cfa_offset 16
+    .cfi_rel_offset x29, 0
+    .cfi_rel_offset x30, 8
+    mov     x29,  sp
+
+    mov     x8, __NR_futex
+    svc     #0
+
+    ldp     x29, x30, [sp], #16
+    .cfi_def_cfa_offset 0
+    .cfi_restore x29
+    .cfi_restore x30
+
+    cmn     x0, #(MAX_ERRNO + 1)
+    cneg    x0, x0, hi
+    b.hi    __set_errno
+
+    ret
+END(futex)
diff --git a/libc/arch-mips/bionic/__bionic_clone.S b/libc/arch-mips/bionic/__bionic_clone.S
index 11d596b..7b138ae 100644
--- a/libc/arch-mips/bionic/__bionic_clone.S
+++ b/libc/arch-mips/bionic/__bionic_clone.S
@@ -54,6 +54,9 @@
         j ra
 
 .L__thread_start_bc:
+        # Clear return address in child so we don't unwind further.
+        li      ra,0
+
         lw	a0,0(sp)	#  fn
         lw	a1,4(sp)	#  arg
 
diff --git a/libc/arch-mips/bionic/futex_mips.S b/libc/arch-mips/bionic/futex_mips.S
new file mode 100644
index 0000000..5a09f32
--- /dev/null
+++ b/libc/arch-mips/bionic/futex_mips.S
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+// int __futex_syscall4(volatile void* ftx, int op, int val, const struct timespec* timeout)
+ENTRY_PRIVATE(__futex_syscall4)
+	subu	sp,4*6
+	sw	$0,20(sp)	/* val3 */
+	sw	$0,16(sp)	/* addr2 */
+#	move	a3,a3		/* timespec */
+#	move	a2,a2		/* val */
+#	li	a1,a1		/* op */
+#	move	a0,a0		/* ftx */
+	li	v0,__NR_futex
+	syscall
+	.set noreorder
+	bnez	a3, 1f		/* Check for error */
+         neg	v0		/* Negate error number if it's valid */
+	move	v0,$0		/* Otherwise return 0 */
+1:
+	.set reorder
+	addu	sp,4*6
+	j	ra
+END(__futex_syscall4)
diff --git a/libc/arch-mips/mips.mk b/libc/arch-mips/mips.mk
index 0244712..2dab6e0 100644
--- a/libc/arch-mips/mips.mk
+++ b/libc/arch-mips/mips.mk
@@ -59,6 +59,7 @@
     arch-mips/bionic/bzero.S \
     arch-mips/bionic/cacheflush.cpp \
     arch-mips/bionic/_exit_with_stack_teardown.S \
+    arch-mips/bionic/futex_mips.S \
     arch-mips/bionic/__get_sp.S \
     arch-mips/bionic/memcmp16.S \
     arch-mips/bionic/_setjmp.S \
diff --git a/libc/arch-mips/syscalls/futex.S b/libc/arch-mips/syscalls/futex.S
new file mode 100644
index 0000000..a865fea
--- /dev/null
+++ b/libc/arch-mips/syscalls/futex.S
@@ -0,0 +1,19 @@
+/* Generated by gensyscalls.py. Do not edit. */
+
+#include <private/bionic_asm.h>
+
+ENTRY(futex)
+    .set noreorder
+    .cpload t9
+    li v0, __NR_futex
+    syscall
+    bnez a3, 1f
+    move a0, v0
+    j ra
+    nop
+1:
+    la t9,__set_errno
+    j t9
+    nop
+    .set reorder
+END(futex)
diff --git a/libc/arch-mips64/bionic/__bionic_clone.S b/libc/arch-mips64/bionic/__bionic_clone.S
index 754861b..8687906 100644
--- a/libc/arch-mips64/bionic/__bionic_clone.S
+++ b/libc/arch-mips64/bionic/__bionic_clone.S
@@ -75,6 +75,9 @@
 	j	ra
 
 .L__thread_start_bc:
+	# Clear return address in child so we don't unwind further.
+	li	ra,0
+
 	# void __bionic_clone_entry(int (*func)(void*), void *arg)
 	PTR_L	a0,FRAME_FN(sp)		#  fn
 	PTR_L	a1,FRAME_ARG(sp)	#  arg
diff --git a/libc/arch-mips64/bionic/futex_mips.S b/libc/arch-mips64/bionic/futex_mips.S
new file mode 100644
index 0000000..60c218c
--- /dev/null
+++ b/libc/arch-mips64/bionic/futex_mips.S
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+#if (_MIPS_SIM == _ABIO32) || (_MIPS_SIM == _ABI32)
+FRAMESZ		=	MKFSIZ(NARGSAVE+2,0)
+FRAME_A4	=	4*REGSZ
+FRAME_A5	=	5*REGSZ
+#else
+FRAMESZ		=	0
+#endif
+
+// int __futex_syscall4(volatile void* ftx, int op, int val, const struct timespec* timeout)
+LEAF(__futex_syscall4,FRAMESZ)
+#if (_MIPS_SIM == _ABIO32) || (_MIPS_SIM == _ABI32)
+	PTR_SUBU sp, FRAMESZ
+	REG_S	$0,FRAME_A5(sp)	/* val3 */
+	REG_S	$0,FRAME_A4(sp)	/* addr2 */
+#else
+	move	a5,$0		/* val3 */
+	move	a4,$0		/* addr2 */
+#endif
+#	move	a3,a3		/* timespec */
+#	move	a2,a2		/* val */
+#	move	a1,a1		/* op */
+#	move	a0,a0		/* ftx */
+	LI	v0,__NR_futex
+	syscall
+	neg	v0		/* Negate errno */
+	bnez	a3,1f		/* Check for error */
+	move	v0,$0		/* Return 0 if no error */
+1:
+#if (_MIPS_SIM == _ABIO32) || (_MIPS_SIM == _ABI32)
+	PTR_ADDU sp,FRAMESZ
+#endif
+	j	ra
+	END(__futex_syscall4)
+.hidden __futex_syscall4
diff --git a/libc/arch-mips64/mips64.mk b/libc/arch-mips64/mips64.mk
index 5759104..533be5e 100644
--- a/libc/arch-mips64/mips64.mk
+++ b/libc/arch-mips64/mips64.mk
@@ -44,6 +44,7 @@
     arch-mips64/bionic/__bionic_clone.S \
     arch-mips64/bionic/bzero.S \
     arch-mips64/bionic/_exit_with_stack_teardown.S \
+    arch-mips64/bionic/futex_mips.S \
     arch-mips64/bionic/__get_sp.S \
     arch-mips64/bionic/getdents.cpp \
     arch-mips64/bionic/memcmp16.S \
diff --git a/libc/arch-mips64/syscalls/futex.S b/libc/arch-mips64/syscalls/futex.S
new file mode 100644
index 0000000..dc7dcc6
--- /dev/null
+++ b/libc/arch-mips64/syscalls/futex.S
@@ -0,0 +1,25 @@
+/* Generated by gensyscalls.py. Do not edit. */
+
+#include <private/bionic_asm.h>
+
+ENTRY(futex)
+    .set push
+    .set noreorder
+    li v0, __NR_futex
+    syscall
+    bnez a3, 1f
+    move a0, v0
+    j ra
+    nop
+1:
+    move t0, ra
+    bal     2f
+    nop
+2:
+    .cpsetup ra, t1, 2b
+    LA t9,__set_errno
+    .cpreturn
+    j t9
+    move ra, t0
+    .set pop
+END(futex)
diff --git a/libc/arch-x86/bionic/futex_x86.S b/libc/arch-x86/bionic/futex_x86.S
new file mode 100644
index 0000000..94647ca
--- /dev/null
+++ b/libc/arch-x86/bionic/futex_x86.S
@@ -0,0 +1,16 @@
+#include <private/bionic_asm.h>
+
+// int __futex_syscall4(volatile void* ftx, int op, int val, const struct timespec* timeout)
+ENTRY_PRIVATE(__futex_syscall4)
+    pushl   %ebx
+    pushl   %esi
+    movl    12(%esp), %ebx      /* ftx */
+    movl    16(%esp), %ecx      /* op */
+    movl    20(%esp), %edx      /* val */
+    movl    24(%esp), %esi      /* timeout */
+    movl    $__NR_futex, %eax
+    int     $0x80
+    popl    %esi
+    popl    %ebx
+    ret
+END(__futex_syscall4)
diff --git a/libc/arch-x86/syscalls/futex.S b/libc/arch-x86/syscalls/futex.S
new file mode 100644
index 0000000..7a52913
--- /dev/null
+++ b/libc/arch-x86/syscalls/futex.S
@@ -0,0 +1,42 @@
+/* Generated by gensyscalls.py. Do not edit. */
+
+#include <private/bionic_asm.h>
+
+ENTRY(futex)
+    pushl   %ebx
+    pushl   %ecx
+    pushl   %edx
+    pushl   %esi
+    pushl   %edi
+    pushl   %ebp
+    .cfi_def_cfa_offset 24
+    .cfi_rel_offset ebx, 0
+    .cfi_rel_offset ecx, 4
+    .cfi_rel_offset edx, 8
+    .cfi_rel_offset esi, 12
+    .cfi_rel_offset edi, 16
+    .cfi_rel_offset ebp, 20
+    mov     28(%esp), %ebx
+    mov     32(%esp), %ecx
+    mov     36(%esp), %edx
+    mov     40(%esp), %esi
+    mov     44(%esp), %edi
+    mov     48(%esp), %ebp
+    movl    $__NR_futex, %eax
+    int     $0x80
+    cmpl    $-MAX_ERRNO, %eax
+    jb      1f
+    negl    %eax
+    pushl   %eax
+    call    __set_errno
+    addl    $4, %esp
+    orl     $-1, %eax
+1:
+    popl    %ebp
+    popl    %edi
+    popl    %esi
+    popl    %edx
+    popl    %ecx
+    popl    %ebx
+    ret
+END(futex)
diff --git a/libc/arch-x86/x86.mk b/libc/arch-x86/x86.mk
index d13a934..279fe61 100644
--- a/libc/arch-x86/x86.mk
+++ b/libc/arch-x86/x86.mk
@@ -24,6 +24,7 @@
 libc_bionic_src_files_x86 += \
     arch-x86/bionic/__bionic_clone.S \
     arch-x86/bionic/_exit_with_stack_teardown.S \
+    arch-x86/bionic/futex_x86.S \
     arch-x86/bionic/__get_sp.S \
     arch-x86/bionic/_setjmp.S \
     arch-x86/bionic/setjmp.S \
diff --git a/libc/arch-x86_64/bionic/futex_x86_64.S b/libc/arch-x86_64/bionic/futex_x86_64.S
new file mode 100644
index 0000000..c248439
--- /dev/null
+++ b/libc/arch-x86_64/bionic/futex_x86_64.S
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+// int __futex_syscall4(volatile void* ftx, int op, int val, const struct timespec* timeout)
+ENTRY_PRIVATE(__futex_syscall4)
+    mov     %rcx, %r10      /* timeout */
+    mov     $__NR_futex, %eax
+    syscall
+    ret
+END(__futex_syscall4)
diff --git a/libc/arch-x86_64/syscalls/futex.S b/libc/arch-x86_64/syscalls/futex.S
new file mode 100644
index 0000000..62f64bd
--- /dev/null
+++ b/libc/arch-x86_64/syscalls/futex.S
@@ -0,0 +1,17 @@
+/* Generated by gensyscalls.py. Do not edit. */
+
+#include <private/bionic_asm.h>
+
+ENTRY(futex)
+    movq    %rcx, %r10
+    movl    $__NR_futex, %eax
+    syscall
+    cmpq    $-MAX_ERRNO, %rax
+    jb      1f
+    negl    %eax
+    movl    %eax, %edi
+    call    __set_errno
+    orq     $-1, %rax
+1:
+    ret
+END(futex)
diff --git a/libc/arch-x86_64/x86_64.mk b/libc/arch-x86_64/x86_64.mk
index c54cdb8..f038f6f 100644
--- a/libc/arch-x86_64/x86_64.mk
+++ b/libc/arch-x86_64/x86_64.mk
@@ -31,6 +31,7 @@
 libc_bionic_src_files_x86_64 := \
     arch-x86_64/bionic/__bionic_clone.S \
     arch-x86_64/bionic/_exit_with_stack_teardown.S \
+    arch-x86_64/bionic/futex_x86_64.S \
     arch-x86_64/bionic/__get_sp.S \
     arch-x86_64/bionic/__rt_sigreturn.S \
     arch-x86_64/bionic/_setjmp.S \
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index 295418b..4900a8a 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -31,10 +31,12 @@
 
 #include <ctype.h>
 #include <inttypes.h>
+#include <linux/futex.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/resource.h>
+#include <sys/syscall.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -199,4 +201,30 @@
   return vdprintf(fd, fmt, ap);
 }
 
+static inline int __futex(volatile void* ftx, int op, int value, const struct timespec* timeout) {
+  // Our generated syscall assembler sets errno, but our callers (pthread functions) don't want to.
+  int saved_errno = errno;
+  if (syscall(__NR_futex, ftx, op, value, timeout) == 0) {
+    return 0;
+  }
+  int result = -errno;
+  errno = saved_errno;
+  return result;
+}
+
+// This used to be in <sys/atomics.h>.
+extern "C" int __futex_wake(volatile void* ftx, int count) {
+  return __futex(ftx, FUTEX_WAKE, count, NULL);
+}
+
+// This used to be in <sys/atomics.h>.
+extern "C" int __futex_wait(volatile void* ftx, int value, const struct timespec* timeout) {
+  return __futex(ftx, FUTEX_WAIT, value, timeout);
+}
+
+// Unity's libmono uses this.
+extern "C" int tkill(pid_t tid, int sig) {
+  return syscall(__NR_tkill, tid, sig);
+}
+
 #endif
diff --git a/libc/include/stdatomic.h b/libc/include/stdatomic.h
new file mode 100644
index 0000000..43ec753
--- /dev/null
+++ b/libc/include/stdatomic.h
@@ -0,0 +1,420 @@
+/*-
+ * Copyright (c) 2011 Ed Schouten <ed@FreeBSD.org>
+ *                    David Chisnall <theraven@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _STDATOMIC_H_
+#define	_STDATOMIC_H_
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <stdbool.h>
+
+#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
+#define	__CLANG_ATOMICS
+#elif __GNUC_PREREQ__(4, 7)
+#define	__GNUC_ATOMICS
+#elif defined(__GNUC__)
+#define	__SYNC_ATOMICS
+#else
+#error "stdatomic.h does not support your compiler"
+#endif
+
+/*
+ * 7.17.1 Atomic lock-free macros.
+ */
+
+#ifdef __GCC_ATOMIC_BOOL_LOCK_FREE
+#define	ATOMIC_BOOL_LOCK_FREE		__GCC_ATOMIC_BOOL_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR_LOCK_FREE
+#define	ATOMIC_CHAR_LOCK_FREE		__GCC_ATOMIC_CHAR_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#define	ATOMIC_CHAR16_T_LOCK_FREE	__GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#define	ATOMIC_CHAR32_T_LOCK_FREE	__GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#define	ATOMIC_WCHAR_T_LOCK_FREE	__GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_SHORT_LOCK_FREE
+#define	ATOMIC_SHORT_LOCK_FREE		__GCC_ATOMIC_SHORT_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_INT_LOCK_FREE
+#define	ATOMIC_INT_LOCK_FREE		__GCC_ATOMIC_INT_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_LONG_LOCK_FREE
+#define	ATOMIC_LONG_LOCK_FREE		__GCC_ATOMIC_LONG_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_LLONG_LOCK_FREE
+#define	ATOMIC_LLONG_LOCK_FREE		__GCC_ATOMIC_LLONG_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_POINTER_LOCK_FREE
+#define	ATOMIC_POINTER_LOCK_FREE	__GCC_ATOMIC_POINTER_LOCK_FREE
+#endif
+
+/*
+ * 7.17.2 Initialization.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	ATOMIC_VAR_INIT(value)		(value)
+#define	atomic_init(obj, value)		__c11_atomic_init(obj, value)
+#else
+#define	ATOMIC_VAR_INIT(value)		{ .__val = (value) }
+#define	atomic_init(obj, value)		((void)((obj)->__val = (value)))
+#endif
+
+/*
+ * Clang and recent GCC both provide predefined macros for the memory
+ * orderings.  If we are using a compiler that doesn't define them, use the
+ * clang values - these will be ignored in the fallback path.
+ */
+
+#ifndef __ATOMIC_RELAXED
+#define __ATOMIC_RELAXED		0
+#endif
+#ifndef __ATOMIC_CONSUME
+#define __ATOMIC_CONSUME		1
+#endif
+#ifndef __ATOMIC_ACQUIRE
+#define __ATOMIC_ACQUIRE		2
+#endif
+#ifndef __ATOMIC_RELEASE
+#define __ATOMIC_RELEASE		3
+#endif
+#ifndef __ATOMIC_ACQ_REL
+#define __ATOMIC_ACQ_REL		4
+#endif
+#ifndef __ATOMIC_SEQ_CST
+#define __ATOMIC_SEQ_CST		5
+#endif
+
+/*
+ * 7.17.3 Order and consistency.
+ *
+ * The memory_order_* constants that denote the barrier behaviour of the
+ * atomic operations.
+ */
+
+typedef enum {
+	memory_order_relaxed = __ATOMIC_RELAXED,
+	memory_order_consume = __ATOMIC_CONSUME,
+	memory_order_acquire = __ATOMIC_ACQUIRE,
+	memory_order_release = __ATOMIC_RELEASE,
+	memory_order_acq_rel = __ATOMIC_ACQ_REL,
+	memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+/*
+ * 7.17.4 Fences.
+ */
+
+static __inline void
+atomic_thread_fence(memory_order __order __unused)
+{
+
+#ifdef __CLANG_ATOMICS
+	__c11_atomic_thread_fence(__order);
+#elif defined(__GNUC_ATOMICS)
+	__atomic_thread_fence(__order);
+#else
+	__sync_synchronize();
+#endif
+}
+
+static __inline void
+atomic_signal_fence(memory_order __order __unused)
+{
+
+#ifdef __CLANG_ATOMICS
+	__c11_atomic_signal_fence(__order);
+#elif defined(__GNUC_ATOMICS)
+	__atomic_signal_fence(__order);
+#else
+	__asm volatile ("" ::: "memory");
+#endif
+}
+
+/*
+ * 7.17.5 Lock-free property.
+ */
+
+#if defined(_KERNEL)
+/* Atomics in kernelspace are always lock-free. */
+#define	atomic_is_lock_free(obj) \
+	((void)(obj), (_Bool)1)
+#elif defined(__CLANG_ATOMICS)
+#define	atomic_is_lock_free(obj) \
+	__atomic_is_lock_free(sizeof(*(obj)), obj)
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_is_lock_free(obj) \
+	__atomic_is_lock_free(sizeof((obj)->__val), &(obj)->__val)
+#else
+#define	atomic_is_lock_free(obj) \
+	((void)(obj), sizeof((obj)->__val) <= sizeof(void *))
+#endif
+
+/*
+ * 7.17.6 Atomic integer types.
+ */
+
+#if !__has_extension(c_atomic) && !__has_extension(cxx_atomic)
+/*
+ * No native support for _Atomic(). Place object in structure to prevent
+ * most forms of direct non-atomic access.
+ */
+#define _Atomic(T)              struct { T volatile __val; }
+#endif
+
+typedef _Atomic(bool)			atomic_bool;
+typedef _Atomic(char)			atomic_char;
+typedef _Atomic(signed char)		atomic_schar;
+typedef _Atomic(unsigned char)		atomic_uchar;
+typedef _Atomic(short)			atomic_short;
+typedef _Atomic(unsigned short)		atomic_ushort;
+typedef _Atomic(int)			atomic_int;
+typedef _Atomic(unsigned int)		atomic_uint;
+typedef _Atomic(long)			atomic_long;
+typedef _Atomic(unsigned long)		atomic_ulong;
+typedef _Atomic(long long)		atomic_llong;
+typedef _Atomic(unsigned long long)	atomic_ullong;
+typedef _Atomic(char16_t)		atomic_char16_t;
+typedef _Atomic(char32_t)		atomic_char32_t;
+typedef _Atomic(wchar_t)		atomic_wchar_t;
+typedef _Atomic(int_least8_t)		atomic_int_least8_t;
+typedef _Atomic(uint_least8_t)	atomic_uint_least8_t;
+typedef _Atomic(int_least16_t)	atomic_int_least16_t;
+typedef _Atomic(uint_least16_t)	atomic_uint_least16_t;
+typedef _Atomic(int_least32_t)	atomic_int_least32_t;
+typedef _Atomic(uint_least32_t)	atomic_uint_least32_t;
+typedef _Atomic(int_least64_t)	atomic_int_least64_t;
+typedef _Atomic(uint_least64_t)	atomic_uint_least64_t;
+typedef _Atomic(int_fast8_t)		atomic_int_fast8_t;
+typedef _Atomic(uint_fast8_t)		atomic_uint_fast8_t;
+typedef _Atomic(int_fast16_t)		atomic_int_fast16_t;
+typedef _Atomic(uint_fast16_t)	atomic_uint_fast16_t;
+typedef _Atomic(int_fast32_t)		atomic_int_fast32_t;
+typedef _Atomic(uint_fast32_t)	atomic_uint_fast32_t;
+typedef _Atomic(int_fast64_t)		atomic_int_fast64_t;
+typedef _Atomic(uint_fast64_t)	atomic_uint_fast64_t;
+typedef _Atomic(intptr_t)		atomic_intptr_t;
+typedef _Atomic(uintptr_t)		atomic_uintptr_t;
+typedef _Atomic(size_t)		atomic_size_t;
+typedef _Atomic(ptrdiff_t)		atomic_ptrdiff_t;
+typedef _Atomic(intmax_t)		atomic_intmax_t;
+typedef _Atomic(uintmax_t)		atomic_uintmax_t;
+
+/*
+ * 7.17.7 Operations on atomic types.
+ */
+
+/*
+ * Compiler-specific operations.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)						\
+	__c11_atomic_compare_exchange_strong(object, expected, desired,	\
+	    success, failure)
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	__c11_atomic_compare_exchange_weak(object, expected, desired,	\
+	    success, failure)
+#define	atomic_exchange_explicit(object, desired, order)		\
+	__c11_atomic_exchange(object, desired, order)
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__c11_atomic_fetch_add(object, operand, order)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__c11_atomic_fetch_and(object, operand, order)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__c11_atomic_fetch_or(object, operand, order)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__c11_atomic_fetch_sub(object, operand, order)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__c11_atomic_fetch_xor(object, operand, order)
+#define	atomic_load_explicit(object, order)				\
+	__c11_atomic_load(object, order)
+#define	atomic_store_explicit(object, desired, order)			\
+	__c11_atomic_store(object, desired, order)
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)						\
+	__atomic_compare_exchange_n(&(object)->__val, expected,		\
+	    desired, 0, success, failure)
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	__atomic_compare_exchange_n(&(object)->__val, expected,		\
+	    desired, 1, success, failure)
+#define	atomic_exchange_explicit(object, desired, order)		\
+	__atomic_exchange_n(&(object)->__val, desired, order)
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__atomic_fetch_add(&(object)->__val, operand, order)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__atomic_fetch_and(&(object)->__val, operand, order)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__atomic_fetch_or(&(object)->__val, operand, order)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__atomic_fetch_sub(&(object)->__val, operand, order)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__atomic_fetch_xor(&(object)->__val, operand, order)
+#define	atomic_load_explicit(object, order)				\
+	__atomic_load_n(&(object)->__val, order)
+#define	atomic_store_explicit(object, desired, order)			\
+	__atomic_store_n(&(object)->__val, desired, order)
+#else
+#define	__atomic_apply_stride(object, operand) \
+	(((__typeof__((object)->__val))0) + (operand))
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)	__extension__ ({			\
+	__typeof__(expected) __ep = (expected);				\
+	__typeof__(*__ep) __e = *__ep;					\
+	(void)(success); (void)(failure);				\
+	(bool)((*__ep = __sync_val_compare_and_swap(&(object)->__val,	\
+	    __e, desired)) == __e);					\
+})
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	atomic_compare_exchange_strong_explicit(object, expected,	\
+		desired, success, failure)
+#if __has_builtin(__sync_swap)
+/* Clang provides a full-barrier atomic exchange - use it if available. */
+#define	atomic_exchange_explicit(object, desired, order)		\
+	((void)(order), __sync_swap(&(object)->__val, desired))
+#else
+/*
+ * __sync_lock_test_and_set() is only an acquire barrier in theory (although in
+ * practice it is usually a full barrier) so we need an explicit barrier before
+ * it.
+ */
+#define	atomic_exchange_explicit(object, desired, order)		\
+__extension__ ({							\
+	__typeof__(object) __o = (object);				\
+	__typeof__(desired) __d = (desired);				\
+	(void)(order);							\
+	__sync_synchronize();						\
+	__sync_lock_test_and_set(&(__o)->__val, __d);			\
+})
+#endif
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_add(&(object)->__val,		\
+	    __atomic_apply_stride(object, operand)))
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_and(&(object)->__val, operand))
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_or(&(object)->__val, operand))
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_sub(&(object)->__val,		\
+	    __atomic_apply_stride(object, operand)))
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_xor(&(object)->__val, operand))
+#define	atomic_load_explicit(object, order)				\
+	((void)(order), __sync_fetch_and_add(&(object)->__val, 0))
+#define	atomic_store_explicit(object, desired, order)			\
+	((void)atomic_exchange_explicit(object, desired, order))
+#endif
+
+/*
+ * Convenience functions.
+ *
+ * Don't provide these in kernel space. In kernel space, we should be
+ * disciplined enough to always provide explicit barriers.
+ */
+
+#ifndef _KERNEL
+#define	atomic_compare_exchange_strong(object, expected, desired)	\
+	atomic_compare_exchange_strong_explicit(object, expected,	\
+	    desired, memory_order_seq_cst, memory_order_seq_cst)
+#define	atomic_compare_exchange_weak(object, expected, desired)		\
+	atomic_compare_exchange_weak_explicit(object, expected,		\
+	    desired, memory_order_seq_cst, memory_order_seq_cst)
+#define	atomic_exchange(object, desired)				\
+	atomic_exchange_explicit(object, desired, memory_order_seq_cst)
+#define	atomic_fetch_add(object, operand)				\
+	atomic_fetch_add_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_and(object, operand)				\
+	atomic_fetch_and_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_or(object, operand)				\
+	atomic_fetch_or_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_sub(object, operand)				\
+	atomic_fetch_sub_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_xor(object, operand)				\
+	atomic_fetch_xor_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_load(object)						\
+	atomic_load_explicit(object, memory_order_seq_cst)
+#define	atomic_store(object, desired)					\
+	atomic_store_explicit(object, desired, memory_order_seq_cst)
+#endif /* !_KERNEL */
+
+/*
+ * 7.17.8 Atomic flag type and operations.
+ *
+ * XXX: Assume atomic_bool can be used as an atomic_flag. Is there some
+ * kind of compiler built-in type we could use?
+ */
+
+typedef struct {
+	atomic_bool	__flag;
+} atomic_flag;
+
+#define	ATOMIC_FLAG_INIT		{ ATOMIC_VAR_INIT(0) }
+
+static __inline bool
+atomic_flag_test_and_set_explicit(volatile atomic_flag *__object,
+    memory_order __order)
+{
+	return (atomic_exchange_explicit(&__object->__flag, 1, __order));
+}
+
+static __inline void
+atomic_flag_clear_explicit(volatile atomic_flag *__object, memory_order __order)
+{
+
+	atomic_store_explicit(&__object->__flag, 0, __order);
+}
+
+#ifndef _KERNEL
+static __inline bool
+atomic_flag_test_and_set(volatile atomic_flag *__object)
+{
+
+	return (atomic_flag_test_and_set_explicit(__object,
+	    memory_order_seq_cst));
+}
+
+static __inline void
+atomic_flag_clear(volatile atomic_flag *__object)
+{
+
+	atomic_flag_clear_explicit(__object, memory_order_seq_cst);
+}
+#endif /* !_KERNEL */
+
+#endif /* !_STDATOMIC_H_ */
diff --git a/libc/include/sys/cdefs.h b/libc/include/sys/cdefs.h
index 685de32..0286616 100644
--- a/libc/include/sys/cdefs.h
+++ b/libc/include/sys/cdefs.h
@@ -38,6 +38,24 @@
 #define	_SYS_CDEFS_H_
 
 /*
+ * Testing against Clang-specific extensions.
+ */
+
+#ifndef __has_extension
+#define __has_extension         __has_feature
+#endif
+#ifndef __has_feature
+#define __has_feature(x)        0
+#endif
+#ifndef __has_include
+#define __has_include(x)        0
+#endif
+#ifndef __has_builtin
+#define __has_builtin(x)        0
+#endif
+
+
+/*
  * Macro to test if we're using a GNU C compiler of a specific vintage
  * or later, for e.g. features that appeared in a particular version
  * of GNU C.  Usage:
diff --git a/libc/include/sys/ucontext.h b/libc/include/sys/ucontext.h
index d6c6b43..f62380d 100644
--- a/libc/include/sys/ucontext.h
+++ b/libc/include/sys/ucontext.h
@@ -68,8 +68,13 @@
   struct ucontext* uc_link;
   stack_t uc_stack;
   mcontext_t uc_mcontext;
-  sigset_t uc_sigmask;
-  char __padding[128 - sizeof(sigset_t)];
+  // Android has a wrong (smaller) sigset_t on ARM.
+  union {
+    sigset_t bionic;
+    uint32_t kernel[2];
+  } uc_sigmask;
+  // The kernel adds extra padding after uc_sigmask to match glibc sigset_t on ARM.
+  char __padding[120];
   unsigned long uc_regspace[128] __attribute__((__aligned__(8)));
 } ucontext_t;
 
@@ -83,6 +88,7 @@
   struct ucontext *uc_link;
   stack_t uc_stack;
   sigset_t uc_sigmask;
+  // The kernel adds extra padding after uc_sigmask to match glibc sigset_t on ARM64.
   char __padding[128 - sizeof(sigset_t)];
   mcontext_t uc_mcontext;
 } ucontext_t;
@@ -146,8 +152,11 @@
   struct ucontext* uc_link;
   stack_t uc_stack;
   mcontext_t uc_mcontext;
-  sigset_t uc_sigmask;
-  char __padding[128 - sizeof(sigset_t)];
+  // Android has a wrong (smaller) sigset_t on x86.
+  union {
+    sigset_t bionic;
+    uint32_t kernel[2];
+  } uc_sigmask;
   struct _libc_fpstate __fpregs_mem;
 } ucontext_t;
 
@@ -198,7 +207,6 @@
   stack_t uc_stack;
   mcontext_t uc_mcontext;
   sigset_t uc_sigmask;
-  char __padding[128 - sizeof(sigset_t)];
 } ucontext_t;
 
 #elif defined(__mips64__)
@@ -275,7 +283,6 @@
   stack_t uc_stack;
   mcontext_t uc_mcontext;
   sigset_t uc_sigmask;
-  char __padding[128 - sizeof(sigset_t)];
   struct _libc_fpstate __fpregs_mem;
 } ucontext_t;
 
diff --git a/libc/include/unistd.h b/libc/include/unistd.h
index 6cb36d8..08a82e6 100644
--- a/libc/include/unistd.h
+++ b/libc/include/unistd.h
@@ -207,7 +207,7 @@
 
 /* Used to retry syscalls that can return EINTR. */
 #define TEMP_FAILURE_RETRY(exp) ({         \
-    typeof (exp) _rc;                      \
+    __typeof__(exp) _rc;                   \
     do {                                   \
         _rc = (exp);                       \
     } while (_rc == -1 && errno == EINTR); \
diff --git a/libc/private/bionic_futex.h b/libc/private/bionic_futex.h
index dd277ed..11699ce 100644
--- a/libc/private/bionic_futex.h
+++ b/libc/private/bionic_futex.h
@@ -28,42 +28,31 @@
 #ifndef _BIONIC_FUTEX_H
 #define _BIONIC_FUTEX_H
 
-#include <errno.h>
 #include <linux/futex.h>
+#include <sys/cdefs.h>
 #include <stdbool.h>
 #include <stddef.h>
-#include <sys/cdefs.h>
-#include <sys/syscall.h>
 
 __BEGIN_DECLS
 
 struct timespec;
 
-static inline int __futex(volatile void* ftx, int op, int value, const struct timespec* timeout) {
-  // Our generated syscall assembler sets errno, but our callers (pthread functions) don't want to.
-  int saved_errno = errno;
-  if (syscall(__NR_futex, ftx, op, value, timeout) == 0) {
-    return 0;
-  }
-  int result = -errno;
-  errno = saved_errno;
-  return result;
-}
+extern int __futex_syscall4(volatile void* ftx, int op, int value, const struct timespec* timeout);
 
 static inline int __futex_wake(volatile void* ftx, int count) {
-  return __futex(ftx, FUTEX_WAKE, count, NULL);
+  return __futex_syscall4(ftx, FUTEX_WAKE, count, NULL);
 }
 
 static inline int __futex_wake_ex(volatile void* ftx, bool shared, int count) {
-  return __futex(ftx, shared ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE, count, NULL);
+  return __futex_syscall4(ftx, shared ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE, count, NULL);
 }
 
 static inline int __futex_wait(volatile void* ftx, int value, const struct timespec* timeout) {
-  return __futex(ftx, FUTEX_WAIT, value, timeout);
+  return __futex_syscall4(ftx, FUTEX_WAIT, value, timeout);
 }
 
 static inline int __futex_wait_ex(volatile void* ftx, bool shared, int value, const struct timespec* timeout) {
-  return __futex(ftx, shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE, value, timeout);
+  return __futex_syscall4(ftx, shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE, value, timeout);
 }
 
 __END_DECLS
diff --git a/libc/stdio/local.h b/libc/stdio/local.h
index 7ab73c0..151e009 100644
--- a/libc/stdio/local.h
+++ b/libc/stdio/local.h
@@ -41,12 +41,33 @@
 #include "wcio.h"
 #include "fileext.h"
 
+#if defined(__LP64__)
+/*
+ * Android <= KitKat had getc/putc macros in <stdio.h> that referred
+ * to __srget/__swbuf, so those symbols need to be public for LP32
+ * but can be hidden for LP64.
+ */
+__LIBC_HIDDEN__ int __srget(FILE*);
+__LIBC_HIDDEN__ int __swbuf(int, FILE*);
+
+/*
+ * The NDK apparently includes an android_support.a library that
+ * refers to __srefill in its copy of the vsnprintf implementation.
+ */
+/* TODO(LP64): __LIBC_HIDDEN__ int __srefill(FILE*);*/
+/* http://b/15291317: the LP64 NDK needs to be fixed to remove that cruft. */
+__LIBC_ABI_PUBLIC__ int __srefill(FILE*);
+#else
+__LIBC_ABI_PUBLIC__ int __srget(FILE*);
+__LIBC_ABI_PUBLIC__ int __swbuf(int, FILE*);
+__LIBC_ABI_PUBLIC__ int __srefill(FILE*);
+#endif
+
 #pragma GCC visibility push(hidden)
 
 int	__sflush(FILE *);
 int	__sflush_locked(FILE *);
 FILE	*__sfp(void);
-int	__srefill(FILE *);
 int	__sread(void *, char *, int);
 int	__swrite(void *, const char *, int);
 fpos_t	__sseek(void *, fpos_t, int);
@@ -104,10 +125,8 @@
 #define NO_PRINTF_PERCENT_N
 
 /* OpenBSD exposes these in <stdio.h>, but we only want them exposed to the implementation. */
-__BEGIN_DECLS
 int __srget(FILE*);
 int __swbuf(int, FILE*);
-__END_DECLS
 #define __sfeof(p)     (((p)->_flags & __SEOF) != 0)
 #define __sferror(p)   (((p)->_flags & __SERR) != 0)
 #define __sclearerr(p) ((void)((p)->_flags &= ~(__SERR|__SEOF)))
diff --git a/tests/Android.mk b/tests/Android.mk
index 37aeec3..d753acc 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -87,6 +87,7 @@
     stack_protector_test.cpp \
     stack_unwinding_test.cpp \
     stack_unwinding_test_impl.c \
+    stdatomic_test.cpp \
     stdint_test.cpp \
     stdio_test.cpp \
     stdlib_test.cpp \
diff --git a/tests/stdatomic_test.cpp b/tests/stdatomic_test.cpp
new file mode 100644
index 0000000..44f5c7b
--- /dev/null
+++ b/tests/stdatomic_test.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#if !defined(__GLIBC__) /* TODO: fix our prebuilt toolchains! */
+
+#include <stdatomic.h>
+
+TEST(stdatomic, LOCK_FREE) {
+  ASSERT_TRUE(ATOMIC_BOOL_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_CHAR16_T_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_CHAR32_T_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_CHAR_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_INT_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_LLONG_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_LONG_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_POINTER_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_SHORT_LOCK_FREE);
+  ASSERT_TRUE(ATOMIC_WCHAR_T_LOCK_FREE);
+}
+
+TEST(stdatomic, init) {
+  atomic_int v = ATOMIC_VAR_INIT(123);
+  ASSERT_EQ(123, atomic_load(&v));
+
+  atomic_init(&v, 456);
+  ASSERT_EQ(456, atomic_load(&v));
+
+  atomic_flag f = ATOMIC_FLAG_INIT;
+  ASSERT_FALSE(atomic_flag_test_and_set(&f));
+}
+
+TEST(stdatomic, atomic_thread_fence) {
+  atomic_thread_fence(memory_order_relaxed);
+  atomic_thread_fence(memory_order_consume);
+  atomic_thread_fence(memory_order_acquire);
+  atomic_thread_fence(memory_order_release);
+  atomic_thread_fence(memory_order_acq_rel);
+  atomic_thread_fence(memory_order_seq_cst);
+}
+
+TEST(stdatomic, atomic_signal_fence) {
+  atomic_signal_fence(memory_order_relaxed);
+  atomic_signal_fence(memory_order_consume);
+  atomic_signal_fence(memory_order_acquire);
+  atomic_signal_fence(memory_order_release);
+  atomic_signal_fence(memory_order_acq_rel);
+  atomic_signal_fence(memory_order_seq_cst);
+}
+
+TEST(stdatomic, atomic_is_lock_free) {
+  atomic_char small;
+  atomic_intmax_t big;
+  ASSERT_TRUE(atomic_is_lock_free(&small));
+  ASSERT_TRUE(atomic_is_lock_free(&big));
+}
+
+TEST(stdatomic, atomic_flag) {
+  atomic_flag f = ATOMIC_FLAG_INIT;
+  ASSERT_FALSE(atomic_flag_test_and_set(&f));
+  ASSERT_TRUE(atomic_flag_test_and_set(&f));
+
+  atomic_flag_clear(&f);
+
+  ASSERT_FALSE(atomic_flag_test_and_set_explicit(&f, memory_order_relaxed));
+  ASSERT_TRUE(atomic_flag_test_and_set_explicit(&f, memory_order_relaxed));
+
+  atomic_flag_clear_explicit(&f, memory_order_relaxed);
+  ASSERT_FALSE(atomic_flag_test_and_set_explicit(&f, memory_order_relaxed));
+}
+
+TEST(stdatomic, atomic_store) {
+  atomic_int i;
+  atomic_store(&i, 123);
+  ASSERT_EQ(123, atomic_load(&i));
+  atomic_store_explicit(&i, 123, memory_order_relaxed);
+  ASSERT_EQ(123, atomic_load_explicit(&i, memory_order_relaxed));
+}
+
+TEST(stdatomic, atomic_exchange) {
+  atomic_int i;
+  atomic_store(&i, 123);
+  ASSERT_EQ(123, atomic_exchange(&i, 456));
+  ASSERT_EQ(456, atomic_exchange_explicit(&i, 123, memory_order_relaxed));
+}
+
+TEST(stdatomic, atomic_compare_exchange) {
+  atomic_int i;
+  atomic_int expected;
+
+  atomic_store(&i, 123);
+  atomic_store(&expected, 123);
+  ASSERT_TRUE(atomic_compare_exchange_strong(&i, &expected, 456));
+  ASSERT_FALSE(atomic_compare_exchange_strong(&i, &expected, 456));
+  ASSERT_EQ(456, atomic_load(&expected));
+
+  atomic_store(&i, 123);
+  atomic_store(&expected, 123);
+  ASSERT_TRUE(atomic_compare_exchange_strong_explicit(&i, &expected, 456, memory_order_relaxed, memory_order_relaxed));
+  ASSERT_FALSE(atomic_compare_exchange_strong_explicit(&i, &expected, 456, memory_order_relaxed, memory_order_relaxed));
+  ASSERT_EQ(456, atomic_load(&expected));
+
+  atomic_store(&i, 123);
+  atomic_store(&expected, 123);
+  ASSERT_TRUE(atomic_compare_exchange_weak(&i, &expected, 456));
+  ASSERT_FALSE(atomic_compare_exchange_weak(&i, &expected, 456));
+  ASSERT_EQ(456, atomic_load(&expected));
+
+  atomic_store(&i, 123);
+  atomic_store(&expected, 123);
+  ASSERT_TRUE(atomic_compare_exchange_weak_explicit(&i, &expected, 456, memory_order_relaxed, memory_order_relaxed));
+  ASSERT_FALSE(atomic_compare_exchange_weak_explicit(&i, &expected, 456, memory_order_relaxed, memory_order_relaxed));
+  ASSERT_EQ(456, atomic_load(&expected));
+}
+
+TEST(stdatomic, atomic_fetch_add) {
+  atomic_int i = ATOMIC_VAR_INIT(123);
+  ASSERT_EQ(123, atomic_fetch_add(&i, 1));
+  ASSERT_EQ(124, atomic_fetch_add_explicit(&i, 1, memory_order_relaxed));
+  ASSERT_EQ(125, atomic_load(&i));
+}
+
+TEST(stdatomic, atomic_fetch_sub) {
+  atomic_int i = ATOMIC_VAR_INIT(123);
+  ASSERT_EQ(123, atomic_fetch_sub(&i, 1));
+  ASSERT_EQ(122, atomic_fetch_sub_explicit(&i, 1, memory_order_relaxed));
+  ASSERT_EQ(121, atomic_load(&i));
+}
+
+TEST(stdatomic, atomic_fetch_or) {
+  atomic_int i = ATOMIC_VAR_INIT(0x100);
+  ASSERT_EQ(0x100, atomic_fetch_or(&i, 0x020));
+  ASSERT_EQ(0x120, atomic_fetch_or_explicit(&i, 0x003, memory_order_relaxed));
+  ASSERT_EQ(0x123, atomic_load(&i));
+}
+
+TEST(stdatomic, atomic_fetch_xor) {
+  atomic_int i = ATOMIC_VAR_INIT(0x100);
+  ASSERT_EQ(0x100, atomic_fetch_xor(&i, 0x120));
+  ASSERT_EQ(0x020, atomic_fetch_xor_explicit(&i, 0x103, memory_order_relaxed));
+  ASSERT_EQ(0x123, atomic_load(&i));
+}
+
+TEST(stdatomic, atomic_fetch_and) {
+  atomic_int i = ATOMIC_VAR_INIT(0x123);
+  ASSERT_EQ(0x123, atomic_fetch_and(&i, 0x00f));
+  ASSERT_EQ(0x003, atomic_fetch_and_explicit(&i, 0x2, memory_order_relaxed));
+  ASSERT_EQ(0x002, atomic_load(&i));
+}
+
+#endif