am d3f9e815: resolved conflicts for merge of 5d0ad38c to jb-mr2-dev

* commit 'd3f9e8155bfc17b2d67d2ffe5bc1d3a5e2656cd4':
  Upgrade pre-jb-mr2 releases to tzdata2013d.
diff --git a/libc/Android.mk b/libc/Android.mk
index 22ed2d8..c08cf77 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -6,67 +6,33 @@
 # =========================================================
 libc_common_src_files := \
 	$(syscall_src) \
-	unistd/abort.c \
 	unistd/alarm.c \
 	unistd/exec.c \
 	unistd/fnmatch.c \
-	unistd/getopt_long.c \
 	unistd/syslog.c \
 	unistd/system.c \
 	unistd/time.c \
 	stdio/asprintf.c \
-	stdio/clrerr.c \
-	stdio/fclose.c \
-	stdio/fdopen.c \
-	stdio/feof.c \
-	stdio/ferror.c \
 	stdio/fflush.c \
 	stdio/fgetc.c \
-	stdio/fgetln.c \
-	stdio/fgetpos.c \
-	stdio/fgets.c \
-	stdio/fileno.c \
 	stdio/findfp.c \
-	stdio/flags.c \
-	stdio/fopen.c \
 	stdio/fprintf.c \
-	stdio/fpurge.c \
 	stdio/fputc.c \
-	stdio/fputs.c \
 	stdio/fread.c \
 	stdio/freopen.c \
 	stdio/fscanf.c \
 	stdio/fseek.c \
-	stdio/fsetpos.c \
 	stdio/ftell.c \
-	stdio/funopen.c \
 	stdio/fvwrite.c \
-	stdio/fwalk.c \
-	stdio/fwrite.c \
-	stdio/getc.c \
-	stdio/getchar.c \
 	stdio/gets.c \
-	stdio/makebuf.c \
-	stdio/mktemp.c \
 	stdio/printf.c \
-	stdio/putc.c \
-	stdio/putchar.c \
-	stdio/puts.c \
-	stdio/putw.c \
 	stdio/refill.c \
-	stdio/remove.c \
 	stdio/rewind.c \
-	stdio/rget.c \
 	stdio/scanf.c \
-	stdio/setbuf.c \
-	stdio/setbuffer.c \
-	stdio/setvbuf.c \
 	stdio/snprintf.c\
 	stdio/sprintf.c \
 	stdio/sscanf.c \
 	stdio/stdio.c \
-	stdio/tempnam.c \
-	stdio/tmpnam.c \
 	stdio/ungetc.c \
 	stdio/vasprintf.c \
 	stdio/vfprintf.c \
@@ -77,13 +43,11 @@
 	stdio/vscanf.c \
 	stdio/vsscanf.c \
 	stdio/wbuf.c \
-	stdio/wsetup.c \
 	stdlib/atexit.c \
 	stdlib/ctype_.c \
 	stdlib/exit.c \
 	stdlib/getenv.c \
 	stdlib/putenv.c \
-	stdlib/qsort.c \
 	stdlib/setenv.c \
 	stdlib/strtod.c \
 	stdlib/strtoimax.c \
@@ -94,29 +58,17 @@
 	stdlib/strtoumax.c \
 	stdlib/tolower_.c \
 	stdlib/toupper_.c \
-	string/index.c \
 	string/strcasecmp.c \
-	string/strcat.c \
-	string/strchr.c \
 	string/strcspn.c \
 	string/strdup.c \
-	string/strlcat.c \
-	string/strlcpy.c \
-	string/strncat.c \
-	string/strncpy.c \
 	string/strpbrk.c \
-	string/strrchr.c \
+	string/__strrchr_chk.c \
 	string/strsep.c \
 	string/strspn.c \
 	string/strstr.c \
 	string/strtok.c \
 	wchar/wcswidth.c \
 	wchar/wcsxfrm.c \
-	tzcode/asctime.c \
-	tzcode/difftime.c \
-	tzcode/localtime.c \
-	tzcode/strftime.c \
-	tzcode/strptime.c \
 	bionic/arc4random.c \
 	bionic/atoi.c \
 	bionic/atol.c \
@@ -151,11 +103,8 @@
 	bionic/ldexp.c \
 	bionic/lseek64.c \
 	bionic/md5.c \
-	bionic/memchr.c \
 	bionic/memmem.c \
-	bionic/memrchr.c \
 	bionic/memswap.c \
-	bionic/mmap.c \
 	bionic/openat.c \
 	bionic/open.c \
 	bionic/pathconf.c \
@@ -170,7 +119,6 @@
 	bionic/recv.c \
 	bionic/sched_cpualloc.c \
 	bionic/sched_cpucount.c \
-	bionic/sched_getaffinity.c \
 	bionic/sched_getcpu.c \
 	bionic/semaphore.c \
 	bionic/send.c \
@@ -190,11 +138,11 @@
 	bionic/sleep.c \
 	bionic/statfs.c \
 	bionic/strndup.c \
-	bionic/strnlen.c \
 	bionic/strntoimax.c \
 	bionic/strntoumax.c \
 	bionic/strtotimeval.c \
 	bionic/system_properties.c \
+	bionic/system_properties_compat.c \
 	bionic/tcgetpgrp.c \
 	bionic/tcsetpgrp.c \
 	bionic/thread_atexit.c \
@@ -234,6 +182,7 @@
 	netbsd/nameser/ns_samedomain.c \
 
 libc_bionic_src_files := \
+    bionic/abort.cpp \
     bionic/assert.cpp \
     bionic/brk.cpp \
     bionic/dirent.cpp \
@@ -241,6 +190,7 @@
     bionic/eventfd_read.cpp \
     bionic/eventfd_write.cpp \
     bionic/__fgets_chk.cpp \
+    bionic/futimens.cpp \
     bionic/getauxval.cpp \
     bionic/getcwd.cpp \
     bionic/libc_init_common.cpp \
@@ -249,6 +199,7 @@
     bionic/__memcpy_chk.cpp \
     bionic/__memmove_chk.cpp \
     bionic/__memset_chk.cpp \
+    bionic/mmap.cpp \
     bionic/pthread_attr.cpp \
     bionic/pthread_detach.cpp \
     bionic/pthread_equal.cpp \
@@ -264,11 +215,14 @@
     bionic/raise.cpp \
     bionic/sbrk.cpp \
     bionic/scandir.cpp \
+    bionic/sched_getaffinity.cpp \
     bionic/__set_errno.cpp \
     bionic/setlocale.cpp \
     bionic/signalfd.cpp \
     bionic/sigwait.cpp \
+    bionic/statvfs.cpp \
     bionic/__strcat_chk.cpp \
+    bionic/__strchr_chk.cpp \
     bionic/__strcpy_chk.cpp \
     bionic/strerror.cpp \
     bionic/strerror_r.cpp \
@@ -288,32 +242,67 @@
     bionic/wait.cpp \
     bionic/wchar.cpp \
 
+libc_tzcode_src_files := \
+    tzcode/asctime.c \
+    tzcode/difftime.c \
+    tzcode/localtime.c \
+    tzcode/strftime.c \
+    tzcode/strptime.c \
+
 libc_upstream_freebsd_src_files := \
+    upstream-freebsd/lib/libc/stdio/clrerr.c \
+    upstream-freebsd/lib/libc/stdio/fclose.c \
+    upstream-freebsd/lib/libc/stdio/fdopen.c \
+    upstream-freebsd/lib/libc/stdio/feof.c \
+    upstream-freebsd/lib/libc/stdio/ferror.c \
+    upstream-freebsd/lib/libc/stdio/fgetln.c \
+    upstream-freebsd/lib/libc/stdio/fgetpos.c \
+    upstream-freebsd/lib/libc/stdio/fgets.c \
+    upstream-freebsd/lib/libc/stdio/fileno.c \
+    upstream-freebsd/lib/libc/stdio/flags.c \
+    upstream-freebsd/lib/libc/stdio/fopen.c \
+    upstream-freebsd/lib/libc/stdio/fpurge.c \
+    upstream-freebsd/lib/libc/stdio/fputs.c \
+    upstream-freebsd/lib/libc/stdio/fsetpos.c \
+    upstream-freebsd/lib/libc/stdio/funopen.c \
+    upstream-freebsd/lib/libc/stdio/fwalk.c \
+    upstream-freebsd/lib/libc/stdio/fwrite.c \
+    upstream-freebsd/lib/libc/stdio/getc.c \
+    upstream-freebsd/lib/libc/stdio/getchar.c \
+    upstream-freebsd/lib/libc/stdio/makebuf.c \
+    upstream-freebsd/lib/libc/stdio/mktemp.c \
+    upstream-freebsd/lib/libc/stdio/putc.c \
+    upstream-freebsd/lib/libc/stdio/putchar.c \
+    upstream-freebsd/lib/libc/stdio/puts.c \
+    upstream-freebsd/lib/libc/stdio/putw.c \
+    upstream-freebsd/lib/libc/stdio/remove.c \
+    upstream-freebsd/lib/libc/stdio/rget.c \
+    upstream-freebsd/lib/libc/stdio/setbuf.c \
+    upstream-freebsd/lib/libc/stdio/setbuffer.c \
+    upstream-freebsd/lib/libc/stdio/setvbuf.c \
+    upstream-freebsd/lib/libc/stdio/tempnam.c \
+    upstream-freebsd/lib/libc/stdio/tmpnam.c \
+    upstream-freebsd/lib/libc/stdio/wsetup.c \
+    upstream-freebsd/lib/libc/stdlib/getopt_long.c \
+    upstream-freebsd/lib/libc/stdlib/qsort.c \
     upstream-freebsd/lib/libc/stdlib/realpath.c \
     upstream-freebsd/lib/libc/string/wcpcpy.c \
     upstream-freebsd/lib/libc/string/wcpncpy.c \
     upstream-freebsd/lib/libc/string/wcscasecmp.c \
-    upstream-freebsd/lib/libc/string/wcscat.c \
-    upstream-freebsd/lib/libc/string/wcschr.c \
-    upstream-freebsd/lib/libc/string/wcscmp.c \
-    upstream-freebsd/lib/libc/string/wcscpy.c \
     upstream-freebsd/lib/libc/string/wcscspn.c \
     upstream-freebsd/lib/libc/string/wcsdup.c \
     upstream-freebsd/lib/libc/string/wcslcat.c \
     upstream-freebsd/lib/libc/string/wcslcpy.c \
-    upstream-freebsd/lib/libc/string/wcslen.c \
     upstream-freebsd/lib/libc/string/wcsncasecmp.c \
     upstream-freebsd/lib/libc/string/wcsncat.c \
     upstream-freebsd/lib/libc/string/wcsncmp.c \
     upstream-freebsd/lib/libc/string/wcsncpy.c \
     upstream-freebsd/lib/libc/string/wcsnlen.c \
     upstream-freebsd/lib/libc/string/wcspbrk.c \
-    upstream-freebsd/lib/libc/string/wcsrchr.c \
     upstream-freebsd/lib/libc/string/wcsspn.c \
     upstream-freebsd/lib/libc/string/wcsstr.c \
     upstream-freebsd/lib/libc/string/wcstok.c \
     upstream-freebsd/lib/libc/string/wmemchr.c \
-    upstream-freebsd/lib/libc/string/wmemcmp.c \
     upstream-freebsd/lib/libc/string/wmemcpy.c \
     upstream-freebsd/lib/libc/string/wmemmove.c \
     upstream-freebsd/lib/libc/string/wmemset.c \
@@ -369,6 +358,24 @@
 	bionic/memmove.c.arm \
 	string/bcopy.c \
 	string/strncmp.c \
+	string/strcat.c \
+	string/strncat.c \
+	string/strncpy.c \
+	bionic/strchr.cpp \
+	string/strrchr.c \
+	bionic/memchr.c \
+	bionic/memrchr.c \
+	string/index.c \
+	bionic/strnlen.c \
+	string/strlcat.c \
+	string/strlcpy.c \
+	upstream-freebsd/lib/libc/string/wcschr.c \
+	upstream-freebsd/lib/libc/string/wcsrchr.c \
+	upstream-freebsd/lib/libc/string/wcscmp.c \
+	upstream-freebsd/lib/libc/string/wcscpy.c \
+	upstream-freebsd/lib/libc/string/wmemcmp.c \
+	upstream-freebsd/lib/libc/string/wcslen.c \
+	upstream-freebsd/lib/libc/string/wcscat.c
 
 # These files need to be arm so that gdbserver
 # can set breakpoints in them without messing
@@ -392,7 +399,6 @@
     bionic/pthread-rwlocks.c \
     bionic/pthread-timers.c \
     bionic/ptrace.c \
-    string/strcpy.c \
 
 libc_static_common_src_files += \
     bionic/pthread.c \
@@ -407,7 +413,25 @@
 	string/bcopy.c \
 	string/strcmp.c \
 	string/strcpy.c \
-	string/strncmp.c
+	string/strncmp.c \
+	string/strcat.c \
+	string/strncat.c \
+	string/strncpy.c \
+	bionic/strchr.cpp \
+	string/strrchr.c \
+	bionic/memchr.c \
+	bionic/memrchr.c \
+	string/index.c \
+	bionic/strnlen.c \
+	string/strlcat.c \
+	string/strlcpy.c \
+	upstream-freebsd/lib/libc/string/wcschr.c \
+	upstream-freebsd/lib/libc/string/wcsrchr.c \
+	upstream-freebsd/lib/libc/string/wcscmp.c \
+	upstream-freebsd/lib/libc/string/wcscpy.c \
+	upstream-freebsd/lib/libc/string/wmemcmp.c \
+	upstream-freebsd/lib/libc/string/wcslen.c \
+	upstream-freebsd/lib/libc/string/wcscat.c
 
 libc_common_src_files += \
 	bionic/pthread-atfork.c \
@@ -469,14 +493,6 @@
     -DLOG_ON_HEAP_ERROR \
     -Wall -Wextra
 
-# these macro definitions are required to implement the
-# 'timezone' and 'daylight' global variables, as well as
-# properly update the 'tm_gmtoff' field in 'struct tm'.
-#
-libc_common_cflags += \
-    -DTM_GMTOFF=tm_gmtoff \
-    -DUSG_COMPAT=1
-
 ifeq ($(strip $(DEBUG_BIONIC_LIBC)),true)
   libc_common_cflags += -DDEBUG
 endif
@@ -554,23 +570,17 @@
 # static C++ destructors are properly called on dlclose().
 #
 ifeq ($(TARGET_ARCH),arm)
-    libc_crtbegin_extension := c
     libc_crt_target_so_cflags :=
 endif
 ifeq ($(TARGET_ARCH),mips)
-    libc_crtbegin_extension := S
     libc_crt_target_so_cflags := -fPIC
 endif
 ifeq ($(TARGET_ARCH),x86)
-    libc_crtbegin_extension := c
     libc_crt_target_so_cflags := -fPIC
 endif
-ifeq ($(libc_crtbegin_extension),)
-    $(error $(TARGET_ARCH) not supported)
-endif
 libc_crt_target_so_cflags += $(libc_crt_target_cflags)
-libc_crt_target_crtbegin_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin.$(libc_crtbegin_extension)
-libc_crt_target_crtbegin_so_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin_so.$(libc_crtbegin_extension)
+libc_crt_target_crtbegin_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin.c
+libc_crt_target_crtbegin_so_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin_so.c
 
 # See the comment in crtbrand.c for the reason why we need to generate
 # crtbrand.s before generating crtbrand.o.
@@ -687,6 +697,28 @@
 
 
 # ========================================================
+# libc_tzcode.a - upstream 'tzcode' code
+# ========================================================
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(libc_tzcode_src_files)
+LOCAL_CFLAGS := \
+    $(libc_common_cflags) \
+    -std=gnu99 \
+    -DSTD_INSPIRED=1 \
+    -DTZDIR=\"/system/usr/share/zoneinfo\" \
+    -DTM_GMTOFF=tm_gmtoff \
+    -DUSG_COMPAT=1
+LOCAL_C_INCLUDES := $(libc_common_c_includes)
+LOCAL_MODULE := libc_tzcode
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_SYSTEM_SHARED_LIBRARIES :=
+
+include $(BUILD_STATIC_LIBRARY)
+
+
+# ========================================================
 # libc_freebsd.a - upstream FreeBSD C library code
 # ========================================================
 #
@@ -761,7 +793,12 @@
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
 LOCAL_MODULE := libc_common
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
-LOCAL_WHOLE_STATIC_LIBRARIES := libbionic_ssp libc_bionic libc_freebsd libc_netbsd
+LOCAL_WHOLE_STATIC_LIBRARIES := \
+    libbionic_ssp \
+    libc_bionic \
+    libc_freebsd \
+    libc_netbsd \
+    libc_tzcode
 LOCAL_SYSTEM_SHARED_LIBRARIES :=
 
 # TODO: split out the asflags.
diff --git a/libc/NOTICE b/libc/NOTICE
index 730fa46..8ed455c 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -1677,38 +1677,6 @@
    The Regents of the University of California.  All rights reserved.
 
 This code is derived from software contributed to Berkeley by
-Chris Torek.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-4. Neither the name of the University nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
--------------------------------------------------------------------
-
-Copyright (c) 1990, 1993
-   The Regents of the University of California.  All rights reserved.
-
-This code is derived from software contributed to Berkeley by
 Donn Seeley at UUNET Technologies, Inc.
 
 Redistribution and use in source and binary forms, with or without
@@ -2852,7 +2820,7 @@
 2. Redistributions in binary form must reproduce the above copyright
    notices, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
-4. Neither the name of the University nor the names of its contributors
+3. Neither the name of the University nor the names of its contributors
    may be used to endorse or promote products derived from this software
    without specific prior written permission.
 
@@ -3092,13 +3060,6 @@
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
-3. All advertising materials mentioning features or use of this software
-   must display the following acknowledgement:
-       This product includes software developed by the NetBSD
-       Foundation, Inc. and its contributors.
-4. Neither the name of The NetBSD Foundation nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -3916,6 +3877,36 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2010, 2011 Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+   Neither the name of Intel Corporation nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 2010, Intel Corporation
 All rights reserved.
 
@@ -3946,6 +3937,36 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2011 Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+   Neither the name of Intel Corporation nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 2011 The Android Open Source Project
 Copyright (c) 2008 ARM Ltd
 All rights reserved.
@@ -3975,6 +3996,36 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+   Neither the name of Intel Corporation nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 2011, VMware, Inc.
 All rights reserved.
 
@@ -4058,6 +4109,38 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2013, Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+     Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+     Neither the name of Linaro Limited nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c)1999 Citrus Project,
 All rights reserved.
 
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 96cc9e6..88c980f 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -201,6 +201,8 @@
 int     lremovexattr(const char *, const char *) 1
 int     __statfs64:statfs64(const char *, size_t, struct statfs *)  1
 long    unshare(unsigned long)  1
+int     swapon(const char *, int) 1
+int     swapoff(const char *) 1
 
 # time
 int           pause ()                       1
@@ -221,6 +223,9 @@
 int           __timer_delete:timer_delete(timer_t)                                                      1
 int           utimes(const char*, const struct timeval tvp[2])                          1
 int           utimensat(int, const char *, const struct timespec times[2], int)         1
+int           timerfd_create(clockid_t, int)   1
+int           timerfd_settime(int, int, const struct itimerspec *, struct itimerspec *)   1
+int           timerfd_gettime(int, struct itimerspec *)   1
 
 # signals
 int     sigaction(int, const struct sigaction *, struct sigaction *)  1
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk
index d15ec9d..1a2185f 100644
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -15,7 +15,6 @@
     arch-arm/bionic/setjmp.S \
     arch-arm/bionic/sigsetjmp.S \
     arch-arm/bionic/strcpy.S \
-    arch-arm/bionic/strlen.c.arm \
     arch-arm/bionic/syscall.S \
     arch-arm/bionic/tgkill.S \
     arch-arm/bionic/tkill.S \
@@ -29,7 +28,7 @@
     arch-arm/bionic/exidx_dynamic.c
 
 ifeq ($(strip $(wildcard bionic/libc/arch-arm/$(TARGET_CPU_VARIANT)/$(TARGET_CPU_VARIANT).mk)),)
-$(error "TARGET_CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a9, cortex-a15, krait. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
+$(error "TARGET_CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
 endif
 
 include bionic/libc/arch-arm/$(TARGET_CPU_VARIANT)/$(TARGET_CPU_VARIANT).mk
diff --git a/libc/arch-arm/bionic/abort_arm.S b/libc/arch-arm/bionic/abort_arm.S
index 51b0871..e1ab86b 100644
--- a/libc/arch-arm/bionic/abort_arm.S
+++ b/libc/arch-arm/bionic/abort_arm.S
@@ -38,5 +38,5 @@
 ENTRY(abort)
     .save   {r3, r14}
     stmfd   sp!, {r3, r14}
-    blx     PIC_SYM(_C_LABEL(__libc_android_abort), PLT)
+    bl      PIC_SYM(_C_LABEL(__libc_android_abort), PLT)
 END(abort)
diff --git a/libc/arch-arm/bionic/clone.S b/libc/arch-arm/bionic/clone.S
index 01eb966..d00b6a6 100644
--- a/libc/arch-arm/bionic/clone.S
+++ b/libc/arch-arm/bionic/clone.S
@@ -32,33 +32,33 @@
 
 // int  __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg);
 ENTRY(__pthread_clone)
-    # Copy the args onto the new stack.
+    # Push 'fn' and 'arg' onto 'child_stack'.
     stmdb   r1!, {r0, r3}
 
     # The sys_clone system call only takes two arguments: 'flags' and 'child_stack'.
     # 'child_stack' is already in r1, but we need to move 'flags' into position.
     mov     r0, r2
-    stmfd   sp!, {r4, r7}
 
     # System call.
+    mov     ip, r7
     ldr     r7, =__NR_clone
     swi     #0
+
+    # Child?
     movs    r0, r0
     beq     1f
 
-    # In parent, reload saved registers then either return or set errno.
-    ldmfd   sp!, {r4, r7}
+    # Parent.
+    mov     r7, ip
     cmn     r0, #(MAX_ERRNO + 1)
     bxls    lr
     neg     r0, r0
     b       __set_errno
 
-1:  # The child.
-    # pick the function arg and call address off the stack and jump
-    # to the C __thread_entry function which does some setup and then
-    # calls the thread's start function
+1:  # Child.
+    # Pop 'fn' and 'arg' back off the stack and call __thread_entry.
     pop     {r0, r1}
-    # __thread_entry needs the TLS pointer
+    # __thread_entry also needs our stack pointer.
     mov     r2, sp
     b       __thread_entry
 END(__pthread_clone)
diff --git a/libc/arch-arm/bionic/futex_arm.S b/libc/arch-arm/bionic/futex_arm.S
index e21a385..4131cdb 100644
--- a/libc/arch-arm/bionic/futex_arm.S
+++ b/libc/arch-arm/bionic/futex_arm.S
@@ -34,11 +34,10 @@
 
 // __futex_syscall3(*ftx, op, val)
 ENTRY(__futex_syscall3)
-    stmdb   sp!, {r4, r7}
-    .save   {r4, r7}
+    mov     ip, r7
     ldr     r7, =__NR_futex
     swi     #0
-    ldmia   sp!, {r4, r7}
+    mov     r7, ip
     bx      lr
 END(__futex_syscall3)
 
@@ -49,25 +48,23 @@
 
 // __futex_wait(*ftx, val, *timespec)
 ENTRY(__futex_wait)
-    stmdb   sp!, {r4, r7}
-    .save   {r4, r7}
+    mov     ip, r7
     mov     r3, r2
     mov     r2, r1
     mov     r1, #FUTEX_WAIT
     ldr     r7, =__NR_futex
     swi     #0
-    ldmia   sp!, {r4, r7}
+    mov     r7, ip
     bx      lr
 END(__futex_wait)
 
 // __futex_wake(*ftx, counter)
 ENTRY(__futex_wake)
-    .save   {r4, r7}
-    stmdb   sp!, {r4, r7}
+    mov     ip, r7
     mov     r2, r1
     mov     r1, #FUTEX_WAKE
     ldr     r7, =__NR_futex
     swi     #0
-    ldmia   sp!, {r4, r7}
+    mov     r7, ip
     bx      lr
 END(__futex_wake)
diff --git a/libc/arch-arm/bionic/libgcc_compat.c b/libc/arch-arm/bionic/libgcc_compat.c
index e4473f3..f57dfb0 100644
--- a/libc/arch-arm/bionic/libgcc_compat.c
+++ b/libc/arch-arm/bionic/libgcc_compat.c
@@ -30,8 +30,8 @@
  * dynamic linker to copy their definition into the final libc.so binary.
  *
  * They are required to ensure backwards binary compatibility with
- * Android 1.5, 1.6 and even 3.0  system images. Some applications built
- * using the NDK require them to be here.
+ * libc.so provided by the platform and binaries built with the NDK or
+ * different versions/configurations of toolchains.
  *
  * Now, for a more elaborate description of the issue:
  *
@@ -48,7 +48,9 @@
  *         gcc <options> -o libfoo.so  foo.a libgcc.a -lc -lm
  *
  * This ensures that any helper function needed by the code in foo.a is copied
- * into the final libfoo.so. Unfortunately, the Android build system has been
+ * into the final libfoo.so. However, doing so will link a bunch of other __cxa
+ * functions from libgcc.a into each .so and executable, causing 4k+ increase
+ * in every binary. Therefore the Android platform build system has been
  * using this instead:
  *
  *         gcc <options> -o libfoo.so foo.a -lc -lm libgcc.a
@@ -58,9 +60,10 @@
  * into libfoo.so. Instead, a symbol import definition will be added to it
  * so libfoo.so can directly call the one in libc.so at runtime.
  *
- * When changing toolchains for 2.0, the set of helper functions copied to
- * libc.so changed, which resulted in some native shared libraries generated
- * with the NDK to fail to load properly.
+ * When refreshing toolchains for new versions or using different architecture
+ * flags, the set of helper functions copied to libc.so may change, which
+ * resulted in some native shared libraries generated with the NDK or prebuilts
+ * from vendors to fail to load properly.
  *
  * The NDK has been fixed after 1.6_r1 to use the correct link command, so
  * any native shared library generated with it should now be safe from that
@@ -73,6 +76,11 @@
  * but it is easier to add a single function here than asking several app
  * developers to fix their build.
  *
+ * The __aeabi_idiv function is added to the list since cortex-a15 supports
+ * HW idiv instructions so the system libc.so doesn't pull in the reference to
+ * __aeabi_idiv but legacy libraries built against cortex-a9 targets still need
+ * it.
+ *
  * Final note: some of the functions below should really be in libm.so to
  *             completely reflect the state of 1.5/1.6 system images. However,
  *             since libm.so depends on libc.so, it's easier to put all of
@@ -113,6 +121,7 @@
     XX(__aeabi_idiv)         \
     XX(__aeabi_l2d)          \
     XX(__aeabi_l2f)          \
+    XX(__aeabi_lasr)         \
     XX(__aeabi_lmul)         \
     XX(__aeabi_llsl)         \
     XX(__aeabi_llsr)         \
@@ -123,7 +132,7 @@
     XX(__cmpdf2)             \
     XX(__divdf3)             \
     XX(__divsf3)             \
-    XX(__eqdf2)             \
+    XX(__eqdf2)              \
     XX(__extendsfdf2)        \
     XX(__fixdfsi)            \
     XX(__fixsfsi)            \
diff --git a/libc/arch-arm/bionic/memcpy.a9.S b/libc/arch-arm/bionic/memcpy.a9.S
new file mode 100644
index 0000000..2ba1ff5
--- /dev/null
+++ b/libc/arch-arm/bionic/memcpy.a9.S
@@ -0,0 +1,614 @@
+/* Copyright (c) 2013, Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+      * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+      * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+      * Neither the name of Linaro Limited nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+/*
+   This memcpy routine is optimised for Cortex-A15 cores and takes advantage
+   of VFP or NEON when built with the appropriate flags.
+
+   Assumptions:
+
+    ARMv6 (ARMv7-a if using Neon)
+    ARM state
+    Unaligned accesses
+    LDRD/STRD support unaligned word accesses
+
+ */
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+	.syntax unified
+	/* This implementation requires ARM state.  */
+	.arm
+
+#ifdef __ARM_NEON__
+
+	.fpu	neon
+	.arch	armv7-a
+# define FRAME_SIZE	4
+# define USE_VFP
+# define USE_NEON
+
+#elif !defined (__SOFTFP__)
+
+	.arch	armv6
+	.fpu	vfpv2
+# define FRAME_SIZE	32
+# define USE_VFP
+
+#else
+	.arch	armv6
+# define FRAME_SIZE    32
+
+#endif
+
+/* Old versions of GAS incorrectly implement the NEON align semantics.  */
+#ifdef BROKEN_ASM_NEON_ALIGN
+#define ALIGN(addr, align) addr,:align
+#else
+#define ALIGN(addr, align) addr:align
+#endif
+
+#define PC_OFFSET	8	/* PC pipeline compensation.  */
+#define INSN_SIZE	4
+
+/* Call parameters.  */
+#define dstin	r0
+#define src	r1
+#define count	r2
+
+/* Locals.  */
+#define tmp1	r3
+#define dst	ip
+#define tmp2	r10
+
+#ifndef USE_NEON
+/* For bulk copies using GP registers.  */
+#define	A_l	r2		/* Call-clobbered.  */
+#define	A_h	r3		/* Call-clobbered.  */
+#define	B_l	r4
+#define	B_h	r5
+#define	C_l	r6
+#define	C_h	r7
+#define	D_l	r8
+#define	D_h	r9
+#endif
+
+/* Number of lines ahead to pre-fetch data.  If you change this the code
+   below will need adjustment to compensate.  */
+
+#define prefetch_lines	5
+
+#ifdef USE_VFP
+	.macro	cpy_line_vfp vreg, base
+	vstr	\vreg, [dst, #\base]
+	vldr	\vreg, [src, #\base]
+	vstr	d0, [dst, #\base + 8]
+	vldr	d0, [src, #\base + 8]
+	vstr	d1, [dst, #\base + 16]
+	vldr	d1, [src, #\base + 16]
+	vstr	d2, [dst, #\base + 24]
+	vldr	d2, [src, #\base + 24]
+	vstr	\vreg, [dst, #\base + 32]
+	vldr	\vreg, [src, #\base + prefetch_lines * 64 - 32]
+	vstr	d0, [dst, #\base + 40]
+	vldr	d0, [src, #\base + 40]
+	vstr	d1, [dst, #\base + 48]
+	vldr	d1, [src, #\base + 48]
+	vstr	d2, [dst, #\base + 56]
+	vldr	d2, [src, #\base + 56]
+	.endm
+
+	.macro	cpy_tail_vfp vreg, base
+	vstr	\vreg, [dst, #\base]
+	vldr	\vreg, [src, #\base]
+	vstr	d0, [dst, #\base + 8]
+	vldr	d0, [src, #\base + 8]
+	vstr	d1, [dst, #\base + 16]
+	vldr	d1, [src, #\base + 16]
+	vstr	d2, [dst, #\base + 24]
+	vldr	d2, [src, #\base + 24]
+	vstr	\vreg, [dst, #\base + 32]
+	vstr	d0, [dst, #\base + 40]
+	vldr	d0, [src, #\base + 40]
+	vstr	d1, [dst, #\base + 48]
+	vldr	d1, [src, #\base + 48]
+	vstr	d2, [dst, #\base + 56]
+	vldr	d2, [src, #\base + 56]
+	.endm
+#endif
+
+	.p2align 6
+ENTRY(memcpy)
+
+	mov	dst, dstin	/* Preserve dstin, we need to return it.  */
+	cmp	count, #64
+	bge	.Lcpy_not_short
+	/* Deal with small copies quickly by dropping straight into the
+	   exit block.  */
+
+.Ltail63unaligned:
+#ifdef USE_NEON
+	and	tmp1, count, #0x38
+	rsb	tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
+	add	pc, pc, tmp1
+	vld1.8	{d0}, [src]!	/* 14 words to go.  */
+	vst1.8	{d0}, [dst]!
+	vld1.8	{d0}, [src]!	/* 12 words to go.  */
+	vst1.8	{d0}, [dst]!
+	vld1.8	{d0}, [src]!	/* 10 words to go.  */
+	vst1.8	{d0}, [dst]!
+	vld1.8	{d0}, [src]!	/* 8 words to go.  */
+	vst1.8	{d0}, [dst]!
+	vld1.8	{d0}, [src]!	/* 6 words to go.  */
+	vst1.8	{d0}, [dst]!
+	vld1.8	{d0}, [src]!	/* 4 words to go.  */
+	vst1.8	{d0}, [dst]!
+	vld1.8	{d0}, [src]!	/* 2 words to go.  */
+	vst1.8	{d0}, [dst]!
+
+	tst	count, #4
+	ldrne	tmp1, [src], #4
+	strne	tmp1, [dst], #4
+#else
+	/* Copy up to 15 full words of data.  May not be aligned.  */
+	/* Cannot use VFP for unaligned data.  */
+	and	tmp1, count, #0x3c
+	add	dst, dst, tmp1
+	add	src, src, tmp1
+	rsb	tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
+	/* Jump directly into the sequence below at the correct offset.  */
+	add	pc, pc, tmp1, lsl #1
+
+	ldr	tmp1, [src, #-60]	/* 15 words to go.  */
+	str	tmp1, [dst, #-60]
+
+	ldr	tmp1, [src, #-56]	/* 14 words to go.  */
+	str	tmp1, [dst, #-56]
+	ldr	tmp1, [src, #-52]
+	str	tmp1, [dst, #-52]
+
+	ldr	tmp1, [src, #-48]	/* 12 words to go.  */
+	str	tmp1, [dst, #-48]
+	ldr	tmp1, [src, #-44]
+	str	tmp1, [dst, #-44]
+
+	ldr	tmp1, [src, #-40]	/* 10 words to go.  */
+	str	tmp1, [dst, #-40]
+	ldr	tmp1, [src, #-36]
+	str	tmp1, [dst, #-36]
+
+	ldr	tmp1, [src, #-32]	/* 8 words to go.  */
+	str	tmp1, [dst, #-32]
+	ldr	tmp1, [src, #-28]
+	str	tmp1, [dst, #-28]
+
+	ldr	tmp1, [src, #-24]	/* 6 words to go.  */
+	str	tmp1, [dst, #-24]
+	ldr	tmp1, [src, #-20]
+	str	tmp1, [dst, #-20]
+
+	ldr	tmp1, [src, #-16]	/* 4 words to go.  */
+	str	tmp1, [dst, #-16]
+	ldr	tmp1, [src, #-12]
+	str	tmp1, [dst, #-12]
+
+	ldr	tmp1, [src, #-8]	/* 2 words to go.  */
+	str	tmp1, [dst, #-8]
+	ldr	tmp1, [src, #-4]
+	str	tmp1, [dst, #-4]
+#endif
+
+	lsls	count, count, #31
+	ldrhcs	tmp1, [src], #2
+	ldrbne	src, [src]		/* Src is dead, use as a scratch.  */
+	strhcs	tmp1, [dst], #2
+	strbne	src, [dst]
+	bx	lr
+
+.Lcpy_not_short:
+	/* At least 64 bytes to copy, but don't know the alignment yet.  */
+	str	tmp2, [sp, #-FRAME_SIZE]!
+	and	tmp2, src, #7
+	and	tmp1, dst, #7
+	cmp	tmp1, tmp2
+	bne	.Lcpy_notaligned
+
+#ifdef USE_VFP
+	/* Magic dust alert!  Force VFP on Cortex-A9.  Experiments show
+	   that the FP pipeline is much better at streaming loads and
+	   stores.  This is outside the critical loop.  */
+	vmov.f32	s0, s0
+#endif
+
+	/* SRC and DST have the same mutual 32-bit alignment, but we may
+	   still need to pre-copy some bytes to get to natural alignment.
+	   We bring DST into full 64-bit alignment.  */
+	lsls	tmp2, dst, #29
+	beq	1f
+	rsbs	tmp2, tmp2, #0
+	sub	count, count, tmp2, lsr #29
+	ldrmi	tmp1, [src], #4
+	strmi	tmp1, [dst], #4
+	lsls	tmp2, tmp2, #2
+	ldrhcs	tmp1, [src], #2
+	ldrbne	tmp2, [src], #1
+	strhcs	tmp1, [dst], #2
+	strbne	tmp2, [dst], #1
+
+1:
+	subs	tmp2, count, #64	/* Use tmp2 for count.  */
+	blt	.Ltail63aligned
+
+	cmp	tmp2, #512
+	bge	.Lcpy_body_long
+
+.Lcpy_body_medium:			/* Count in tmp2.  */
+#ifdef USE_VFP
+1:
+	vldr	d0, [src, #0]
+	subs	tmp2, tmp2, #64
+	vldr	d1, [src, #8]
+	vstr	d0, [dst, #0]
+	vldr	d0, [src, #16]
+	vstr	d1, [dst, #8]
+	vldr	d1, [src, #24]
+	vstr	d0, [dst, #16]
+	vldr	d0, [src, #32]
+	vstr	d1, [dst, #24]
+	vldr	d1, [src, #40]
+	vstr	d0, [dst, #32]
+	vldr	d0, [src, #48]
+	vstr	d1, [dst, #40]
+	vldr	d1, [src, #56]
+	vstr	d0, [dst, #48]
+	add	src, src, #64
+	vstr	d1, [dst, #56]
+	add	dst, dst, #64
+	bge	1b
+	tst	tmp2, #0x3f
+	beq	.Ldone
+
+.Ltail63aligned:			/* Count in tmp2.  */
+	and	tmp1, tmp2, #0x38
+	add	dst, dst, tmp1
+	add	src, src, tmp1
+	rsb	tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
+	add	pc, pc, tmp1
+
+	vldr	d0, [src, #-56]	/* 14 words to go.  */
+	vstr	d0, [dst, #-56]
+	vldr	d0, [src, #-48]	/* 12 words to go.  */
+	vstr	d0, [dst, #-48]
+	vldr	d0, [src, #-40]	/* 10 words to go.  */
+	vstr	d0, [dst, #-40]
+	vldr	d0, [src, #-32]	/* 8 words to go.  */
+	vstr	d0, [dst, #-32]
+	vldr	d0, [src, #-24]	/* 6 words to go.  */
+	vstr	d0, [dst, #-24]
+	vldr	d0, [src, #-16]	/* 4 words to go.  */
+	vstr	d0, [dst, #-16]
+	vldr	d0, [src, #-8]	/* 2 words to go.  */
+	vstr	d0, [dst, #-8]
+#else
+	sub	src, src, #8
+	sub	dst, dst, #8
+1:
+	ldrd	A_l, A_h, [src, #8]
+	strd	A_l, A_h, [dst, #8]
+	ldrd	A_l, A_h, [src, #16]
+	strd	A_l, A_h, [dst, #16]
+	ldrd	A_l, A_h, [src, #24]
+	strd	A_l, A_h, [dst, #24]
+	ldrd	A_l, A_h, [src, #32]
+	strd	A_l, A_h, [dst, #32]
+	ldrd	A_l, A_h, [src, #40]
+	strd	A_l, A_h, [dst, #40]
+	ldrd	A_l, A_h, [src, #48]
+	strd	A_l, A_h, [dst, #48]
+	ldrd	A_l, A_h, [src, #56]
+	strd	A_l, A_h, [dst, #56]
+	ldrd	A_l, A_h, [src, #64]!
+	strd	A_l, A_h, [dst, #64]!
+	subs	tmp2, tmp2, #64
+	bge	1b
+	tst	tmp2, #0x3f
+	bne	1f
+	ldr	tmp2,[sp], #FRAME_SIZE
+	bx	lr
+1:
+	add	src, src, #8
+	add	dst, dst, #8
+
+.Ltail63aligned:			/* Count in tmp2.  */
+	/* Copy up to 7 d-words of data.  Similar to Ltail63unaligned, but
+	   we know that the src and dest are 32-bit aligned so we can use
+	   LDRD/STRD to improve efficiency.  */
+	/* TMP2 is now negative, but we don't care about that.  The bottom
+	   six bits still tell us how many bytes are left to copy.  */
+
+	and	tmp1, tmp2, #0x38
+	add	dst, dst, tmp1
+	add	src, src, tmp1
+	rsb	tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
+	add	pc, pc, tmp1
+	ldrd	A_l, A_h, [src, #-56]	/* 14 words to go.  */
+	strd	A_l, A_h, [dst, #-56]
+	ldrd	A_l, A_h, [src, #-48]	/* 12 words to go.  */
+	strd	A_l, A_h, [dst, #-48]
+	ldrd	A_l, A_h, [src, #-40]	/* 10 words to go.  */
+	strd	A_l, A_h, [dst, #-40]
+	ldrd	A_l, A_h, [src, #-32]	/* 8 words to go.  */
+	strd	A_l, A_h, [dst, #-32]
+	ldrd	A_l, A_h, [src, #-24]	/* 6 words to go.  */
+	strd	A_l, A_h, [dst, #-24]
+	ldrd	A_l, A_h, [src, #-16]	/* 4 words to go.  */
+	strd	A_l, A_h, [dst, #-16]
+	ldrd	A_l, A_h, [src, #-8]	/* 2 words to go.  */
+	strd	A_l, A_h, [dst, #-8]
+
+#endif
+	tst	tmp2, #4
+	ldrne	tmp1, [src], #4
+	strne	tmp1, [dst], #4
+	lsls	tmp2, tmp2, #31		/* Count (tmp2) now dead. */
+	ldrhcs	tmp1, [src], #2
+	ldrbne	tmp2, [src]
+	strhcs	tmp1, [dst], #2
+	strbne	tmp2, [dst]
+
+.Ldone:
+	ldr	tmp2, [sp], #FRAME_SIZE
+	bx	lr
+
+.Lcpy_body_long:			/* Count in tmp2.  */
+
+	/* Long copy.  We know that there's at least (prefetch_lines * 64)
+	   bytes to go.  */
+#ifdef USE_VFP
+	/* Don't use PLD.  Instead, read some data in advance of the current
+	   copy position into a register.  This should act like a PLD
+	   operation but we won't have to repeat the transfer.  */
+
+	vldr	d3, [src, #0]
+	vldr	d4, [src, #64]
+	vldr	d5, [src, #128]
+	vldr	d6, [src, #192]
+	vldr	d7, [src, #256]
+
+	vldr	d0, [src, #8]
+	vldr	d1, [src, #16]
+	vldr	d2, [src, #24]
+	add	src, src, #32
+
+	subs	tmp2, tmp2, #prefetch_lines * 64 * 2
+	blt	2f
+1:
+	cpy_line_vfp	d3, 0
+	cpy_line_vfp	d4, 64
+	cpy_line_vfp	d5, 128
+	add	dst, dst, #3 * 64
+	add	src, src, #3 * 64
+	cpy_line_vfp	d6, 0
+	cpy_line_vfp	d7, 64
+	add	dst, dst, #2 * 64
+	add	src, src, #2 * 64
+	subs	tmp2, tmp2, #prefetch_lines * 64
+	bge	1b
+
+2:
+	cpy_tail_vfp	d3, 0
+	cpy_tail_vfp	d4, 64
+	cpy_tail_vfp	d5, 128
+	add	src, src, #3 * 64
+	add	dst, dst, #3 * 64
+	cpy_tail_vfp	d6, 0
+	vstr	d7, [dst, #64]
+	vldr	d7, [src, #64]
+	vstr	d0, [dst, #64 + 8]
+	vldr	d0, [src, #64 + 8]
+	vstr	d1, [dst, #64 + 16]
+	vldr	d1, [src, #64 + 16]
+	vstr	d2, [dst, #64 + 24]
+	vldr	d2, [src, #64 + 24]
+	vstr	d7, [dst, #64 + 32]
+	add	src, src, #96
+	vstr	d0, [dst, #64 + 40]
+	vstr	d1, [dst, #64 + 48]
+	vstr	d2, [dst, #64 + 56]
+	add	dst, dst, #128
+	add	tmp2, tmp2, #prefetch_lines * 64
+	b	.Lcpy_body_medium
+#else
+	/* Long copy.  Use an SMS style loop to maximize the I/O
+	   bandwidth of the core.  We don't have enough spare registers
+	   to synthesise prefetching, so use PLD operations.  */
+	/* Pre-bias src and dst.  */
+	sub	src, src, #8
+	sub	dst, dst, #8
+	pld	[src, #8]
+	pld	[src, #72]
+	subs	tmp2, tmp2, #64
+	pld	[src, #136]
+	ldrd	A_l, A_h, [src, #8]
+	strd	B_l, B_h, [sp, #8]
+	ldrd	B_l, B_h, [src, #16]
+	strd	C_l, C_h, [sp, #16]
+	ldrd	C_l, C_h, [src, #24]
+	strd	D_l, D_h, [sp, #24]
+	pld	[src, #200]
+	ldrd	D_l, D_h, [src, #32]!
+	b	1f
+	.p2align	6
+2:
+	pld	[src, #232]
+	strd	A_l, A_h, [dst, #40]
+	ldrd	A_l, A_h, [src, #40]
+	strd	B_l, B_h, [dst, #48]
+	ldrd	B_l, B_h, [src, #48]
+	strd	C_l, C_h, [dst, #56]
+	ldrd	C_l, C_h, [src, #56]
+	strd	D_l, D_h, [dst, #64]!
+	ldrd	D_l, D_h, [src, #64]!
+	subs	tmp2, tmp2, #64
+1:
+	strd	A_l, A_h, [dst, #8]
+	ldrd	A_l, A_h, [src, #8]
+	strd	B_l, B_h, [dst, #16]
+	ldrd	B_l, B_h, [src, #16]
+	strd	C_l, C_h, [dst, #24]
+	ldrd	C_l, C_h, [src, #24]
+	strd	D_l, D_h, [dst, #32]
+	ldrd	D_l, D_h, [src, #32]
+	bcs	2b
+	/* Save the remaining bytes and restore the callee-saved regs.  */
+	strd	A_l, A_h, [dst, #40]
+	add	src, src, #40
+	strd	B_l, B_h, [dst, #48]
+	ldrd	B_l, B_h, [sp, #8]
+	strd	C_l, C_h, [dst, #56]
+	ldrd	C_l, C_h, [sp, #16]
+	strd	D_l, D_h, [dst, #64]
+	ldrd	D_l, D_h, [sp, #24]
+	add	dst, dst, #72
+	tst	tmp2, #0x3f
+	bne	.Ltail63aligned
+	ldr	tmp2, [sp], #FRAME_SIZE
+	bx	lr
+#endif
+
+.Lcpy_notaligned:
+	pld	[src]
+	pld	[src, #64]
+	/* There's at least 64 bytes to copy, but there is no mutual
+	   alignment.  */
+	/* Bring DST to 64-bit alignment.  */
+	lsls	tmp2, dst, #29
+	pld	[src, #(2 * 64)]
+	beq	1f
+	rsbs	tmp2, tmp2, #0
+	sub	count, count, tmp2, lsr #29
+	ldrmi	tmp1, [src], #4
+	strmi	tmp1, [dst], #4
+	lsls	tmp2, tmp2, #2
+	ldrbne	tmp1, [src], #1
+	ldrhcs	tmp2, [src], #2
+	strbne	tmp1, [dst], #1
+	strhcs	tmp2, [dst], #2
+1:
+	pld	[src, #(3 * 64)]
+	subs	count, count, #64
+	ldrmi	tmp2, [sp], #FRAME_SIZE
+	bmi	.Ltail63unaligned
+	pld	[src, #(4 * 64)]
+
+#ifdef USE_NEON
+	vld1.8	{d0-d3}, [src]!
+	vld1.8	{d4-d7}, [src]!
+	subs	count, count, #64
+	bmi	2f
+1:
+	pld	[src, #(4 * 64)]
+	vst1.8	{d0-d3}, [ALIGN (dst, 64)]!
+	vld1.8	{d0-d3}, [src]!
+	vst1.8	{d4-d7}, [ALIGN (dst, 64)]!
+	vld1.8	{d4-d7}, [src]!
+	subs	count, count, #64
+	bpl	1b
+2:
+	vst1.8	{d0-d3}, [ALIGN (dst, 64)]!
+	vst1.8	{d4-d7}, [ALIGN (dst, 64)]!
+	ands	count, count, #0x3f
+#else
+	/* Use an SMS style loop to maximize the I/O bandwidth.  */
+	sub	src, src, #4
+	sub	dst, dst, #8
+	subs	tmp2, count, #64	/* Use tmp2 for count.  */
+	ldr	A_l, [src, #4]
+	ldr	A_h, [src, #8]
+	strd	B_l, B_h, [sp, #8]
+	ldr	B_l, [src, #12]
+	ldr	B_h, [src, #16]
+	strd	C_l, C_h, [sp, #16]
+	ldr	C_l, [src, #20]
+	ldr	C_h, [src, #24]
+	strd	D_l, D_h, [sp, #24]
+	ldr	D_l, [src, #28]
+	ldr	D_h, [src, #32]!
+	b	1f
+	.p2align	6
+2:
+	pld	[src, #(5 * 64) - (32 - 4)]
+	strd	A_l, A_h, [dst, #40]
+	ldr	A_l, [src, #36]
+	ldr	A_h, [src, #40]
+	strd	B_l, B_h, [dst, #48]
+	ldr	B_l, [src, #44]
+	ldr	B_h, [src, #48]
+	strd	C_l, C_h, [dst, #56]
+	ldr	C_l, [src, #52]
+	ldr	C_h, [src, #56]
+	strd	D_l, D_h, [dst, #64]!
+	ldr	D_l, [src, #60]
+	ldr	D_h, [src, #64]!
+	subs	tmp2, tmp2, #64
+1:
+	strd	A_l, A_h, [dst, #8]
+	ldr	A_l, [src, #4]
+	ldr	A_h, [src, #8]
+	strd	B_l, B_h, [dst, #16]
+	ldr	B_l, [src, #12]
+	ldr	B_h, [src, #16]
+	strd	C_l, C_h, [dst, #24]
+	ldr	C_l, [src, #20]
+	ldr	C_h, [src, #24]
+	strd	D_l, D_h, [dst, #32]
+	ldr	D_l, [src, #28]
+	ldr	D_h, [src, #32]
+	bcs	2b
+
+	/* Save the remaining bytes and restore the callee-saved regs.  */
+	strd	A_l, A_h, [dst, #40]
+	add	src, src, #36
+	strd	B_l, B_h, [dst, #48]
+	ldrd	B_l, B_h, [sp, #8]
+	strd	C_l, C_h, [dst, #56]
+	ldrd	C_l, C_h, [sp, #16]
+	strd	D_l, D_h, [dst, #64]
+	ldrd	D_l, D_h, [sp, #24]
+	add	dst, dst, #72
+	ands	count, tmp2, #0x3f
+#endif
+	ldr	tmp2, [sp], #FRAME_SIZE
+	bne	.Ltail63unaligned
+	bx	lr
+END(memcpy)
diff --git a/libc/arch-arm/bionic/tgkill.S b/libc/arch-arm/bionic/tgkill.S
index f46cb58..5f11b20 100644
--- a/libc/arch-arm/bionic/tgkill.S
+++ b/libc/arch-arm/bionic/tgkill.S
@@ -39,6 +39,7 @@
 */
 
 ENTRY(tgkill)
+    .save   {r4-r7, ip, lr}
     stmfd   sp!, {r4-r7, ip, lr}
     ldr     r7, =__NR_tgkill
     swi     #0
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S
index 16187b5..d297064 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S
@@ -25,80 +25,109 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
-/* Assumes neon instructions and a cache line size of 64 bytes. */
+    /* Prototype: void *memcpy (void *dst, const void *src, size_t count).  */
+
+        // This version is tuned for the Cortex-A15 processor.
 
 #include <machine/cpu-features.h>
 #include <machine/asm.h>
 
-/*
- * This code assumes it is running on a processor that supports all arm v7
- * instructions, that supports neon instructions, and that has a 64 byte
- * cache line.
- */
-
         .text
+        .syntax unified
         .fpu    neon
 
-#define CACHE_LINE_SIZE     64
+#define CACHE_LINE_SIZE 64
 
 ENTRY(memcpy)
-        .save       {r0, lr}
-        /* start preloading as early as possible */
-        pld         [r1, #(CACHE_LINE_SIZE*0)]
-        stmfd       sp!, {r0, lr}
-        pld         [r1, #(CACHE_LINE_SIZE*1)]
+        // Assumes that n >= 0, and dst, src are valid pointers.
+        // For any sizes less than 832 use the neon code that doesn't
+        // care about the src alignment. This avoids any checks
+        // for src alignment, and offers the best improvement since
+        // smaller sized copies are dominated by the overhead of
+        // the pre and post main loop.
+        // For larger copies, if src and dst cannot both be aligned to
+        // word boundaries, use the neon code.
+        // For all other copies, align dst to a double word boundary
+        // and copy using LDRD/STRD instructions.
 
-        /* do we have at least 16-bytes to copy (needed for alignment below) */
-        cmp         r2, #16
-        blo         5f
+        // Save registers (r0 holds the return value):
+        // optimized push {r0, lr}.
+        .save   {r0, lr}
+        pld     [r1, #(CACHE_LINE_SIZE*16)]
+        push    {r0, lr}
 
-        /* align destination to cache-line for the write-buffer */
+        cmp     r2, #16
+        blo     copy_less_than_16_unknown_align
+
+        cmp     r2, #832
+        bge     check_alignment
+
+copy_unknown_alignment:
+        // Unknown alignment of src and dst.
+        // Assumes that the first few bytes have already been prefetched.
+
+        // Align destination to 128 bits. The mainloop store instructions
+        // require this alignment or they will throw an exception.
         rsb         r3, r0, #0
         ands        r3, r3, #0xF
-        beq         0f
+        beq         2f
 
-        /* copy up to 15-bytes (count in r3) */
+        // Copy up to 15 bytes (count in r3).
         sub         r2, r2, r3
         movs        ip, r3, lsl #31
-        ldrmib      lr, [r1], #1
-        strmib      lr, [r0], #1
-        ldrcsb      ip, [r1], #1
-        ldrcsb      lr, [r1], #1
-        strcsb      ip, [r0], #1
-        strcsb      lr, [r0], #1
+
+        itt         mi
+        ldrbmi      lr, [r1], #1
+        strbmi      lr, [r0], #1
+        itttt       cs
+        ldrbcs      ip, [r1], #1
+        ldrbcs      lr, [r1], #1
+        strbcs      ip, [r0], #1
+        strbcs      lr, [r0], #1
+
         movs        ip, r3, lsl #29
         bge         1f
-        // copies 4 bytes, destination 32-bits aligned
+        // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after.
         vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
         vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
 1:      bcc         2f
-        // copies 8 bytes, destination 64-bits aligned
+        // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after.
         vld1.8      {d0}, [r1]!
         vst1.8      {d0}, [r0, :64]!
-2:
 
-0:      /* preload immediately the next cache line, which we may need */
-        pld         [r1, #(CACHE_LINE_SIZE*0)]
-        pld         [r1, #(CACHE_LINE_SIZE*1)]
-
-        /* make sure we have at least 64 bytes to copy */
+2:      // Make sure we have at least 64 bytes to copy.
         subs        r2, r2, #64
         blo         2f
 
-        /* Preload all the cache lines we need.
-         * NOTE: The number of pld below depends on CACHE_LINE_SIZE,
-         * ideally we would increase the distance in the main loop to
-         * avoid the goofy code below. In practice this doesn't seem to make
-         * a big difference.
-         * NOTE: The value CACHE_LINE_SIZE * 4 was chosen through
-         * experimentation.
-         */
-        pld         [r1, #(CACHE_LINE_SIZE*2)]
-        pld         [r1, #(CACHE_LINE_SIZE*3)]
-        pld         [r1, #(CACHE_LINE_SIZE*4)]
-
-1:      /* The main loop copies 64 bytes at a time */
+1:      // The main loop copies 64 bytes at a time.
         vld1.8      {d0  - d3},   [r1]!
         vld1.8      {d4  - d7},   [r1]!
         pld         [r1, #(CACHE_LINE_SIZE*4)]
@@ -107,25 +136,24 @@
         vst1.8      {d4  - d7},   [r0, :128]!
         bhs         1b
 
-2:      /* fix-up the remaining count and make sure we have >= 32 bytes left */
-        add         r2, r2, #64
-        subs        r2, r2, #32
-        blo         4f
+2:      // Fix-up the remaining count and make sure we have >= 32 bytes left.
+        adds        r2, r2, #32
+        blo         3f
 
-3:      /* 32 bytes at a time. These cache lines were already preloaded */
+        // 32 bytes. These cache lines were already preloaded.
         vld1.8      {d0 - d3},  [r1]!
-        subs        r2, r2, #32
+        sub         r2, r2, #32
         vst1.8      {d0 - d3},  [r0, :128]!
-        bhs         3b
-4:      /* less than 32 left */
+3:      // Less than 32 left.
         add         r2, r2, #32
         tst         r2, #0x10
-        beq         5f
-        // copies 16 bytes, 128-bits aligned
+        beq         copy_less_than_16_unknown_align
+        // Copies 16 bytes, destination 128 bits aligned.
         vld1.8      {d0, d1}, [r1]!
         vst1.8      {d0, d1}, [r0, :128]!
 
-5:      /* copy up to 15-bytes (count in r2) */
+copy_less_than_16_unknown_align:
+        // Copy up to 15 bytes (count in r2).
         movs        ip, r2, lsl #29
         bcc         1f
         vld1.8      {d0}, [r1]!
@@ -133,14 +161,164 @@
 1:      bge         2f
         vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
         vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0]!
-2:      movs        ip, r2, lsl #31
-        ldrmib      r3, [r1], #1
-        ldrcsb      ip, [r1], #1
-        ldrcsb      lr, [r1], #1
-        strmib      r3, [r0], #1
-        strcsb      ip, [r0], #1
-        strcsb      lr, [r0], #1
 
-        ldmfd       sp!, {r0, lr}
-        bx          lr
+2:      // Copy 0 to 4 bytes.
+        lsls        r2, r2, #31
+        itt         ne
+        ldrbne      lr, [r1], #1
+        strbne      lr, [r0], #1
+        itttt       cs
+        ldrbcs      ip, [r1], #1
+        ldrbcs      lr, [r1]
+        strbcs      ip, [r0], #1
+        strbcs      lr, [r0]
+
+        pop         {r0, pc}
+
+check_alignment:
+        // If src and dst cannot both be aligned to a word boundary,
+        // use the unaligned copy version.
+        eor     r3, r0, r1
+        ands    r3, r3, #0x3
+        bne     copy_unknown_alignment
+
+        // To try and improve performance, stack layout changed,
+        // i.e., not keeping the stack looking like users expect
+        // (highest numbered register at highest address).
+        // TODO: Add debug frame directives.
+        // We don't need exception unwind directives, because the code below
+        // does not throw any exceptions and does not call any other functions.
+        // Generally, newlib functions like this lack debug information for
+        // assembler source.
+        .save   {r4, r5}
+        strd    r4, r5, [sp, #-8]!
+        .save   {r6, r7}
+        strd    r6, r7, [sp, #-8]!
+        .save   {r8, r9}
+        strd    r8, r9, [sp, #-8]!
+
+        // Optimized for already aligned dst code.
+        ands    ip, r0, #3
+        bne     dst_not_word_aligned
+
+word_aligned:
+        // Align the destination buffer to 8 bytes, to make sure double
+        // loads and stores don't cross a cache line boundary,
+        // as they are then more expensive even if the data is in the cache
+        // (require two load/store issue cycles instead of one).
+        // If only one of the buffers is not 8 bytes aligned,
+        // then it's more important to align dst than src,
+        // because there is more penalty for stores
+        // than loads that cross a cacheline boundary.
+        // This check and realignment are only done if there is >= 832
+        // bytes to copy.
+
+        // Dst is word aligned, but check if it is already double word aligned.
+        ands    r3, r0, #4
+        beq     1f
+        ldr     r3, [r1], #4
+        str     r3, [r0], #4
+        sub     r2, #4
+
+1:      // Can only get here if > 64 bytes to copy, so don't do check r2.
+        sub     r2, #64
+
+2:      // Every loop iteration copies 64 bytes.
+        .irp    offset, #0, #8, #16, #24, #32
+        ldrd    r4, r5, [r1, \offset]
+        strd    r4, r5, [r0, \offset]
+        .endr
+
+        ldrd    r4, r5, [r1, #40]
+        ldrd    r6, r7, [r1, #48]
+        ldrd    r8, r9, [r1, #56]
+
+        // Keep the pld as far from the next load as possible.
+        // The amount to prefetch was determined experimentally using
+        // large sizes, and verifying the prefetch size does not affect
+        // the smaller copies too much.
+        // WARNING: If the ldrd and strd instructions get too far away
+        //          from each other, performance suffers. Three loads
+        //          in a row is the best tradeoff.
+        pld     [r1, #(CACHE_LINE_SIZE*16)]
+        strd    r4, r5, [r0, #40]
+        strd    r6, r7, [r0, #48]
+        strd    r8, r9, [r0, #56]
+
+        add     r0, r0, #64
+        add     r1, r1, #64
+        subs    r2, r2, #64
+        bge     2b
+
+        // Fix-up the remaining count and make sure we have >= 32 bytes left.
+        adds    r2, r2, #32
+        blo     4f
+
+        // Copy 32 bytes. These cache lines were already preloaded.
+        .irp    offset, #0, #8, #16, #24
+        ldrd    r4, r5, [r1, \offset]
+        strd    r4, r5, [r0, \offset]
+        .endr
+        add     r1, r1, #32
+        add     r0, r0, #32
+        sub     r2, r2, #32
+4:      // Less than 32 left.
+        add     r2, r2, #32
+        tst     r2, #0x10
+        beq     5f
+        // Copy 16 bytes.
+        .irp    offset, #0, #8
+        ldrd    r4, r5, [r1, \offset]
+        strd    r4, r5, [r0, \offset]
+        .endr
+        add     r1, r1, #16
+        add     r0, r0, #16
+
+5:      // Copy up to 15 bytes (count in r2).
+        movs    ip, r2, lsl #29
+        bcc     1f
+        // Copy 8 bytes.
+        ldrd    r4, r5, [r1], #8
+        strd    r4, r5, [r0], #8
+1:      bge         2f
+        // Copy 4 bytes.
+        ldr     r4, [r1], #4
+        str     r4, [r0], #4
+2:      // Copy 0 to 4 bytes.
+        lsls    r2, r2, #31
+        itt     ne
+        ldrbne  lr, [r1], #1
+        strbne  lr, [r0], #1
+        itttt   cs
+        ldrbcs  ip, [r1], #1
+        ldrbcs  lr, [r1]
+        strbcs  ip, [r0], #1
+        strbcs  lr, [r0]
+
+        // Restore registers: optimized pop {r0, pc}
+        ldrd    r8, r9, [sp], #8
+        ldrd    r6, r7, [sp], #8
+        ldrd    r4, r5, [sp], #8
+        pop     {r0, pc}
+
+dst_not_word_aligned:
+        // Align dst to word.
+        rsb     ip, ip, #4
+        cmp     ip, #2
+
+        itt     gt
+        ldrbgt  lr, [r1], #1
+        strbgt  lr, [r0], #1
+
+        itt     ge
+        ldrbge  lr, [r1], #1
+        strbge  lr, [r0], #1
+
+        ldrb    lr, [r1], #1
+        strb    lr, [r0], #1
+
+        sub     r2, r2, ip
+
+        // Src is guaranteed to be at least word aligned by this point.
+        b       word_aligned
 END(memcpy)
diff --git a/libc/arch-arm/cortex-a15/bionic/memset.S b/libc/arch-arm/cortex-a15/bionic/memset.S
index 7bb3297..2e1ad54 100644
--- a/libc/arch-arm/cortex-a15/bionic/memset.S
+++ b/libc/arch-arm/cortex-a15/bionic/memset.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2013 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -35,11 +35,12 @@
          * memset() returns its first argument.
 		 */
 
-    .fpu    neon
+        .fpu        neon
+        .syntax     unified
 
 ENTRY(bzero)
-        mov     r2, r1
-        mov     r1, #0
+        mov         r2, r1
+        mov         r1, #0
         // Fall through to memset...
 END(bzero)
 
@@ -47,60 +48,117 @@
         .save       {r0}
         stmfd       sp!, {r0}
 
-        vdup.8      q0, r1
-
-        /* do we have at least 16-bytes to write (needed for alignment below) */
+        // The new algorithm is slower for copies < 16 so use the old
+        // neon code in that case.
         cmp         r2, #16
-        blo         3f
+        blo         set_less_than_16_unknown_align
 
-        /* align destination to 16 bytes for the write-buffer */
-        rsb         r3, r0, #0
-        ands        r3, r3, #0xF
-        beq         2f
+        // Use strd which requires an even and odd register so move the
+        // values so that:
+        //   r0 and r1 contain the memset value
+        //   r2 is the number of bytes to set
+        //   r3 is the destination pointer
+        mov         r3, r0
 
-        /* write up to 15-bytes (count in r3) */
-        sub         r2, r2, r3
-        movs        ip, r3, lsl #31
-        strmib      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        movs        ip, r3, lsl #29
-        bge         1f
+        // Copy the byte value in every byte of r1.
+        mov         r1, r1, lsl #24
+        orr         r1, r1, r1, lsr #8
+        orr         r1, r1, r1, lsr #16
 
-        // writes 4 bytes, 32-bits aligned
-        vst1.32     {d0[0]}, [r0, :32]!
-1:      bcc         2f
+check_alignment:
+        // Align destination to a double word to avoid the strd crossing
+        // a cache line boundary.
+        ands        ip, r3, #7
+        bne         do_double_word_align
 
-        // writes 8 bytes, 64-bits aligned
-        vst1.8      {d0}, [r0, :64]!
-2:
-        /* make sure we have at least 32 bytes to write */
-        subs        r2, r2, #32
-        blo         2f
-        vmov        q1, q0
+double_word_aligned:
+        mov         r0, r1
 
-1:      /* The main loop writes 32 bytes at a time */
-        subs        r2, r2, #32
-        vst1.8      {d0 - d3}, [r0, :128]!
-        bhs         1b
+        subs        r2, #64
+        blo         set_less_than_64
 
-2:      /* less than 32 left */
-        add         r2, r2, #32
-        tst         r2, #0x10
-        beq         3f
+1:      // Main loop sets 64 bytes at a time.
+        .irp        offset, #0, #8, #16, #24, #32, #40, #48, #56
+        strd        r0, r1, [r3, \offset]
+        .endr
 
-        // writes 16 bytes, 128-bits aligned
-        vst1.8      {d0, d1}, [r0, :128]!
-3:      /* write up to 15-bytes (count in r2) */
+        add         r3, #64
+        subs        r2, #64
+        bge         1b
+
+set_less_than_64:
+        // Restore r2 to the count of bytes left to set.
+        add         r2, #64
+        lsls        ip, r2, #27
+        bcc         set_less_than_32
+        // Set 32 bytes.
+        .irp        offset, #0, #8, #16, #24
+        strd        r0, r1, [r3, \offset]
+        .endr
+        add         r3, #32
+
+set_less_than_32:
+        bpl         set_less_than_16
+        // Set 16 bytes.
+        .irp        offset, #0, #8
+        strd        r0, r1, [r3, \offset]
+        .endr
+        add         r3, #16
+
+set_less_than_16:
+        // Less than 16 bytes to set.
+        lsls        ip, r2, #29
+        bcc         set_less_than_8
+
+        // Set 8 bytes.
+        strd        r0, r1, [r3], #8
+
+set_less_than_8:
+        bpl         set_less_than_4
+        // Set 4 bytes
+        str         r1, [r3], #4
+
+set_less_than_4:
+        lsls        ip, r2, #31
+        it          ne
+        strbne      r1, [r3], #1
+        itt         cs
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3]
+
+        ldmfd       sp!, {r0}
+        bx          lr
+
+do_double_word_align:
+        rsb         ip, ip, #8
+        sub         r2, r2, ip
+        movs        r0, ip, lsl #31
+        it          mi
+        strbmi      r1, [r3], #1
+        itt         cs
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3], #1
+
+        // Dst is at least word aligned by this point.
+        cmp         ip, #4
+        blo         double_word_aligned
+        str         r1, [r3], #4
+        b           double_word_aligned
+
+set_less_than_16_unknown_align:
+        // Set up to 15 bytes.
+        vdup.8      d0, r1
         movs        ip, r2, lsl #29
         bcc         1f
         vst1.8      {d0}, [r0]!
 1:      bge         2f
         vst1.32     {d0[0]}, [r0]!
 2:      movs        ip, r2, lsl #31
-        strmib      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        strcsb      r1, [r0], #1
+        it          mi
+        strbmi      r1, [r0], #1
+        itt         cs
+        strbcs      r1, [r0], #1
+        strbcs      r1, [r0], #1
         ldmfd       sp!, {r0}
         bx          lr
 END(memset)
diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S
new file mode 100644
index 0000000..d5b8ba4
--- /dev/null
+++ b/libc/arch-arm/cortex-a15/bionic/strlen.S
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+    .syntax unified
+
+    .thumb
+    .thumb_func
+
+ENTRY(strlen)
+    pld [r1, #128]
+    mov r1, r0
+
+    rsb     r3, r0, #0
+    ands    r3, r3, #7
+    beq     mainloop
+
+    // Align to a double word (64 bits).
+    ands    ip, r3, #1
+    beq     align_to_32
+
+    ldrb    r2, [r1], #1
+    cmp     r2, #0
+    beq     update_count_and_return
+
+align_to_32:
+    ands    ip, r3, #2
+    beq     align_to_64
+
+    ldrb    r2, [r1], #1
+    cmp     r2, #0
+    beq     update_count_and_return
+    ldrb    r2, [r1], #1
+    cmp     r2, #0
+    beq     update_count_and_return
+
+align_to_64:
+    ands    ip, r3, #4
+    beq     mainloop
+    ldr     r3, [r1], #4
+
+    sub     ip, r3, #0x01010101
+    bic     ip, ip, r3
+    ands    ip, ip, #0x80808080
+    bne     zero_in_second_register
+
+mainloop:
+    ldrd    r2, r3, [r1], #8
+
+    pld     [r1, #64]
+
+    sub     ip, r2, #0x01010101
+    bic     ip, ip, r2
+    ands    ip, ip, #0x80808080
+    bne     zero_in_first_register
+
+    sub     ip, r3, #0x01010101
+    bic     ip, ip, r3
+    ands    ip, ip, #0x80808080
+    bne     zero_in_second_register
+    b       mainloop
+
+zero_in_first_register:
+    sub     r1, r1, #4
+
+zero_in_second_register:
+    sub     r0, r1, r0
+
+    // Check for zero in byte 0.
+    ands    r1, ip, #0x80
+    beq     check_byte1
+
+    sub     r0, r0, #4
+    bx      lr
+
+check_byte1:
+    // Check for zero in byte 1.
+    ands    r1, ip, #0x8000
+    beq     check_byte2
+
+    sub     r0, r0, #3
+    bx      lr
+
+check_byte2:
+    // Check for zero in byte 2.
+    ands    r1, ip, #0x800000
+    beq     return
+
+    sub     r0, r0, #2
+    bx      lr
+
+update_count_and_return:
+    sub     r0, r1, r0
+
+return:
+    sub     r0, r0, #1
+    bx      lr
+END(strlen)
diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk
index d8193f8..0904e6b 100644
--- a/libc/arch-arm/cortex-a15/cortex-a15.mk
+++ b/libc/arch-arm/cortex-a15/cortex-a15.mk
@@ -1,5 +1,6 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a15/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a15/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a15/bionic/strcmp.S)
+$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
 
 include bionic/libc/arch-arm/generic/generic.mk
diff --git a/libc/arch-arm/cortex-a8/cortex-a8.mk b/libc/arch-arm/cortex-a8/cortex-a8.mk
new file mode 100644
index 0000000..9af03d9
--- /dev/null
+++ b/libc/arch-arm/cortex-a8/cortex-a8.mk
@@ -0,0 +1 @@
+include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk
diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk
index 4862933..5c684ed 100644
--- a/libc/arch-arm/cortex-a9/cortex-a9.mk
+++ b/libc/arch-arm/cortex-a9/cortex-a9.mk
@@ -1,5 +1,7 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a9/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a9/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a9/bionic/strcmp.S)
+# Use cortex-a15 version of strlen.
+$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
 
 include bionic/libc/arch-arm/generic/generic.mk
diff --git a/libc/arch-arm/bionic/strlen.c b/libc/arch-arm/generic/bionic/strlen.c
similarity index 83%
rename from libc/arch-arm/bionic/strlen.c
rename to libc/arch-arm/generic/bionic/strlen.c
index 01632e3..824cf78 100644
--- a/libc/arch-arm/bionic/strlen.c
+++ b/libc/arch-arm/generic/bionic/strlen.c
@@ -33,16 +33,16 @@
 {
     __builtin_prefetch(s);
     __builtin_prefetch(s+32);
-    
+
     union {
         const char      *b;
         const uint32_t  *w;
         uintptr_t       i;
     } u;
-    
+
     // these are some scratch variables for the asm code below
     uint32_t v, t;
-    
+
     // initialize the string length to zero
     size_t l = 0;
 
@@ -60,52 +60,60 @@
     // We need to process 32 bytes per loop to schedule PLD properly
     // and achieve the maximum bus speed.
     asm(
-        "ldr     %[v], [ %[s] ], #4         \n"
+        "ldr     %[v], [%[s]], #4           \n"
         "sub     %[l], %[l], %[s]           \n"
         "0:                                 \n"
 #if __ARM_HAVE_PLD
-        "pld     [ %[s], #64 ]              \n"
+        "pld     [%[s], #64]                \n"
 #endif
         "sub     %[t], %[v], %[mask], lsr #7\n"
         "and     %[t], %[t], %[mask]        \n"
         "bics    %[t], %[t], %[v]           \n"
-        "ldreq   %[v], [ %[s] ], #4         \n"
+        "it      eq                         \n"
+        "ldreq   %[v], [%[s]], #4           \n"
 #if !defined(__OPTIMIZE_SIZE__)
         "bne     1f                         \n"
         "sub     %[t], %[v], %[mask], lsr #7\n"
         "and     %[t], %[t], %[mask]        \n"
         "bics    %[t], %[t], %[v]           \n"
-        "ldreq   %[v], [ %[s] ], #4         \n"
+        "it      eq                         \n"
+        "ldreq   %[v], [%[s]], #4           \n"
         "bne     1f                         \n"
         "sub     %[t], %[v], %[mask], lsr #7\n"
         "and     %[t], %[t], %[mask]        \n"
         "bics    %[t], %[t], %[v]           \n"
-        "ldreq   %[v], [ %[s] ], #4         \n"
+        "it      eq                         \n"
+        "ldreq   %[v], [%[s]], #4           \n"
         "bne     1f                         \n"
         "sub     %[t], %[v], %[mask], lsr #7\n"
         "and     %[t], %[t], %[mask]        \n"
         "bics    %[t], %[t], %[v]           \n"
-        "ldreq   %[v], [ %[s] ], #4         \n"
+        "it      eq                         \n"
+        "ldreq   %[v], [%[s]], #4           \n"
         "bne     1f                         \n"
         "sub     %[t], %[v], %[mask], lsr #7\n"
         "and     %[t], %[t], %[mask]        \n"
         "bics    %[t], %[t], %[v]           \n"
-        "ldreq   %[v], [ %[s] ], #4         \n"
+        "it      eq                         \n"
+        "ldreq   %[v], [%[s]], #4           \n"
         "bne     1f                         \n"
         "sub     %[t], %[v], %[mask], lsr #7\n"
         "and     %[t], %[t], %[mask]        \n"
         "bics    %[t], %[t], %[v]           \n"
-        "ldreq   %[v], [ %[s] ], #4         \n"
+        "it      eq                         \n"
+        "ldreq   %[v], [%[s]], #4           \n"
         "bne     1f                         \n"
         "sub     %[t], %[v], %[mask], lsr #7\n"
         "and     %[t], %[t], %[mask]        \n"
         "bics    %[t], %[t], %[v]           \n"
-        "ldreq   %[v], [ %[s] ], #4         \n"
+        "it      eq                         \n"
+        "ldreq   %[v], [%[s]], #4           \n"
         "bne     1f                         \n"
         "sub     %[t], %[v], %[mask], lsr #7\n"
         "and     %[t], %[t], %[mask]        \n"
         "bics    %[t], %[t], %[v]           \n"
-        "ldreq   %[v], [ %[s] ], #4         \n"
+        "it      eq                         \n"
+        "ldreq   %[v], [%[s]], #4           \n"
 #endif
         "beq     0b                         \n"
         "1:                                 \n"
@@ -117,13 +125,14 @@
         "beq     2f                         \n"
         "add     %[l], %[l], #1             \n"
         "tst     %[v], #0xFF0000            \n"
+        "it      ne                         \n"
         "addne   %[l], %[l], #1             \n"
         "2:                                 \n"
         : [l]"=&r"(l), [v]"=&r"(v), [t]"=&r"(t), [s]"=&r"(u.b)
         : "%[l]"(l), "%[s]"(u.b), [mask]"r"(0x80808080UL)
         : "cc"
     );
-    
+
 done:
     return l;
 }
diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk
index 358b1e6..18cad9d 100644
--- a/libc/arch-arm/generic/generic.mk
+++ b/libc/arch-arm/generic/generic.mk
@@ -1,3 +1,4 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/generic/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/generic/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/generic/bionic/strcmp.S)
+$(call libc-add-cpu-variant-src,STRLEN,arch-arm/generic/bionic/strlen.c)
diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk
index 4847f86..288afbb 100644
--- a/libc/arch-arm/krait/krait.mk
+++ b/libc/arch-arm/krait/krait.mk
@@ -1,5 +1,7 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/krait/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/krait/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/krait/bionic/strcmp.S)
+# Use cortex-a15 version of strlen.
+$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
 
 include bionic/libc/arch-arm/generic/generic.mk
diff --git a/libc/arch-arm/syscalls.mk b/libc/arch-arm/syscalls.mk
index 9eb5136..252a428 100644
--- a/libc/arch-arm/syscalls.mk
+++ b/libc/arch-arm/syscalls.mk
@@ -126,6 +126,8 @@
 syscall_src += arch-arm/syscalls/lremovexattr.S
 syscall_src += arch-arm/syscalls/__statfs64.S
 syscall_src += arch-arm/syscalls/unshare.S
+syscall_src += arch-arm/syscalls/swapon.S
+syscall_src += arch-arm/syscalls/swapoff.S
 syscall_src += arch-arm/syscalls/pause.S
 syscall_src += arch-arm/syscalls/gettimeofday.S
 syscall_src += arch-arm/syscalls/settimeofday.S
@@ -144,6 +146,9 @@
 syscall_src += arch-arm/syscalls/__timer_delete.S
 syscall_src += arch-arm/syscalls/utimes.S
 syscall_src += arch-arm/syscalls/utimensat.S
+syscall_src += arch-arm/syscalls/timerfd_create.S
+syscall_src += arch-arm/syscalls/timerfd_settime.S
+syscall_src += arch-arm/syscalls/timerfd_gettime.S
 syscall_src += arch-arm/syscalls/sigaction.S
 syscall_src += arch-arm/syscalls/sigprocmask.S
 syscall_src += arch-arm/syscalls/__sigsuspend.S
diff --git a/libc/arch-arm/syscalls/swapoff.S b/libc/arch-arm/syscalls/swapoff.S
new file mode 100644
index 0000000..070d160
--- /dev/null
+++ b/libc/arch-arm/syscalls/swapoff.S
@@ -0,0 +1,15 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <machine/asm.h>
+
+ENTRY(swapoff)
+    mov     ip, r7
+    ldr     r7, =__NR_swapoff
+    swi     #0
+    mov     r7, ip
+    cmn     r0, #(MAX_ERRNO + 1)
+    bxls    lr
+    neg     r0, r0
+    b       __set_errno
+END(swapoff)
diff --git a/libc/arch-arm/syscalls/swapon.S b/libc/arch-arm/syscalls/swapon.S
new file mode 100644
index 0000000..a77e0cd
--- /dev/null
+++ b/libc/arch-arm/syscalls/swapon.S
@@ -0,0 +1,15 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <machine/asm.h>
+
+ENTRY(swapon)
+    mov     ip, r7
+    ldr     r7, =__NR_swapon
+    swi     #0
+    mov     r7, ip
+    cmn     r0, #(MAX_ERRNO + 1)
+    bxls    lr
+    neg     r0, r0
+    b       __set_errno
+END(swapon)
diff --git a/libc/arch-arm/syscalls/timerfd_create.S b/libc/arch-arm/syscalls/timerfd_create.S
new file mode 100644
index 0000000..7e3f16f
--- /dev/null
+++ b/libc/arch-arm/syscalls/timerfd_create.S
@@ -0,0 +1,15 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <machine/asm.h>
+
+ENTRY(timerfd_create)
+    mov     ip, r7
+    ldr     r7, =__NR_timerfd_create
+    swi     #0
+    mov     r7, ip
+    cmn     r0, #(MAX_ERRNO + 1)
+    bxls    lr
+    neg     r0, r0
+    b       __set_errno
+END(timerfd_create)
diff --git a/libc/arch-arm/syscalls/timerfd_gettime.S b/libc/arch-arm/syscalls/timerfd_gettime.S
new file mode 100644
index 0000000..2c3e2cf
--- /dev/null
+++ b/libc/arch-arm/syscalls/timerfd_gettime.S
@@ -0,0 +1,15 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <machine/asm.h>
+
+ENTRY(timerfd_gettime)
+    mov     ip, r7
+    ldr     r7, =__NR_timerfd_gettime
+    swi     #0
+    mov     r7, ip
+    cmn     r0, #(MAX_ERRNO + 1)
+    bxls    lr
+    neg     r0, r0
+    b       __set_errno
+END(timerfd_gettime)
diff --git a/libc/arch-arm/syscalls/timerfd_settime.S b/libc/arch-arm/syscalls/timerfd_settime.S
new file mode 100644
index 0000000..f7f0cf0
--- /dev/null
+++ b/libc/arch-arm/syscalls/timerfd_settime.S
@@ -0,0 +1,15 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <machine/asm.h>
+
+ENTRY(timerfd_settime)
+    mov     ip, r7
+    ldr     r7, =__NR_timerfd_settime
+    swi     #0
+    mov     r7, ip
+    cmn     r0, #(MAX_ERRNO + 1)
+    bxls    lr
+    neg     r0, r0
+    b       __set_errno
+END(timerfd_settime)
diff --git a/libc/arch-mips/bionic/__dso_handle_so.S b/libc/arch-mips/bionic/__dso_handle_so.S
deleted file mode 100644
index 77a5d7f..0000000
--- a/libc/arch-mips/bionic/__dso_handle_so.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-# The __dso_handle global variable is used by static
-# C++ constructors and destructors in the binary.
-# See http://www.codesourcery.com/public/cxx-abi/abi.html#dso-dtor
-#
-	.data
-        .align 4
-	.hidden __dso_handle
-        .globl __dso_handle
-__dso_handle:
-        .long __dso_handle
diff --git a/libc/arch-mips/bionic/atexit.S b/libc/arch-mips/bionic/atexit.h
similarity index 81%
rename from libc/arch-mips/bionic/atexit.S
rename to libc/arch-mips/bionic/atexit.h
index 7f0c820..759008c 100644
--- a/libc/arch-mips/bionic/atexit.S
+++ b/libc/arch-mips/bionic/atexit.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2013 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,19 +25,12 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-	.text
-	.globl	atexit
-	.hidden	atexit
-	.type	atexit, @function
-	.align  4
-	.ent	atexit
-atexit:
-	.set	noreorder
-	.cpload	$t9
-	.set	reorder
-	la	$t9, __cxa_atexit
-	move	$a1, $0
-	la      $a2, __dso_handle
-	j	$t9
-	.size	atexit, .-atexit
-	.end	atexit
+
+extern void *__dso_handle;
+extern int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
+
+__attribute__ ((visibility ("hidden")))
+int atexit(void (*func)(void))
+{
+  return (__cxa_atexit((void (*)(void *))func, (void *)0, &__dso_handle));
+}
diff --git a/libc/arch-mips/bionic/crtbegin.S b/libc/arch-mips/bionic/crtbegin.S
deleted file mode 100644
index 40b689e..0000000
--- a/libc/arch-mips/bionic/crtbegin.S
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-	.text
-	.align 4
-	.type __start,@function
-	.globl __start
-	.globl _start
-
-# this is the small startup code that is first run when
-# any executable that is statically-linked with Bionic
-# runs.
-#
-# it's purpose is to call __libc_init with appropriate
-# arguments, which are:
-#
-#    - the address of the raw data block setup by the Linux
-#      kernel ELF loader
-#
-#    - address of an "onexit" function, not used on any
-#      platform supported by Bionic
-#
-#    - address of the "main" function of the program.
-#
-#    - address of the constructor list
-#
-
-	.ent	__start
-__start:	
-_start:
-	bal	1f
-1:
-	.set	noreorder
-	.cpload	$ra
-	.set	reorder
-
-	move	$a0, $sp
-	move	$a1, $0
-	la	$a2, main
-	la	$a3, 1f
-	subu	$sp, 32
-	la	$t9, __libc_init
-	j	$t9
-	.end	__start
-
-1:	.long	__PREINIT_ARRAY__
-	.long	__INIT_ARRAY__
-	.long	__FINI_ARRAY__
-
-	.section .preinit_array, "aw"
-	.type __PREINIT_ARRAY__, @object
-	.globl __PREINIT_ARRAY__
-__PREINIT_ARRAY__:
-	.long -1
-
-	.section .init_array, "aw"
-	.type __INIT_ARRAY__, @object
-	.globl __INIT_ARRAY__
-__INIT_ARRAY__:
-	.long -1
-
-	.section .fini_array, "aw"
-	.type __FINI_ARRAY__, @object
-	.globl __FINI_ARRAY__
-__FINI_ARRAY__:
-	.long -1
-	.long __do_global_dtors_aux
-
-	.abicalls
-	.text
-	.align	2
-	.set	nomips16
-	.ent	__do_global_dtors_aux
-	.type	__do_global_dtors_aux, @function
-__do_global_dtors_aux:
-	.frame	$sp,32,$31		# vars= 0, regs= 1/0, args= 16, gp= 8
-	.mask	0x80000000,-4
-	.fmask	0x00000000,0
-	.set	noreorder
-	.cpload	$25
-	.set	nomacro
-	addiu	$sp,$sp,-32
-	sw	$31,28($sp)
-	.cprestore	16
-	lw	$2,%got(completed.1269)($28)
-	lbu	$2,%lo(completed.1269)($2)
-	bne	$2,$0,$L8
-	nop
-
-$L4:
-	lw	$2,%got(__cxa_finalize)($28)
-	beq	$2,$0,$L6
-	nop
-
-	lw	$2,%got(__dso_handle)($28)
-	lw	$4,0($2)
-	lw	$25,%call16(__cxa_finalize)($28)
-	.reloc	1f,R_MIPS_JALR,__cxa_finalize
-1:	jalr	$25
-	nop
-
-	lw	$28,16($sp)
-$L6:
-	lw	$2,%got(completed.1269)($28)
-	li	$3,1			# 0x1
-	sb	$3,%lo(completed.1269)($2)
-$L8:
-	lw	$31,28($sp)
-	addiu	$sp,$sp,32
-	j	$31
-	nop
-
-	.set	macro
-	.set	reorder
-	.end	__do_global_dtors_aux
-	.size	__do_global_dtors_aux, .-__do_global_dtors_aux
-	.local	completed.1269
-	.comm	completed.1269,1,1
-	.weak	__cxa_finalize
-
-#include "__dso_handle.S"
-#include "atexit.S"
diff --git a/libc/arch-mips/bionic/crtbegin.c b/libc/arch-mips/bionic/crtbegin.c
new file mode 100644
index 0000000..d3a3401
--- /dev/null
+++ b/libc/arch-mips/bionic/crtbegin.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "../../bionic/libc_init_common.h"
+#include <stddef.h>
+#include <stdint.h>
+
+__attribute__ ((section (".preinit_array")))
+void (*__PREINIT_ARRAY__)(void) = (void (*)(void)) -1;
+
+__attribute__ ((section (".init_array")))
+void (*__INIT_ARRAY__)(void) = (void (*)(void)) -1;
+
+__attribute__ ((section (".fini_array")))
+void (*__FINI_ARRAY__)(void) = (void (*)(void)) -1;
+
+
+__LIBC_HIDDEN__  void do_mips_start(void *raw_args) {
+  structors_array_t array;
+  array.preinit_array = &__PREINIT_ARRAY__;
+  array.init_array = &__INIT_ARRAY__;
+  array.fini_array = &__FINI_ARRAY__;
+
+  __libc_init(raw_args, NULL, &main, &array);
+}
+
+/*
+ * This function prepares the return address with a branch-and-link
+ * instruction (bal) and then uses a .cpload to compute the Global
+ * Offset Table (GOT) pointer ($gp). The $gp is then used to load
+ * the address of _do_start() into $t9 just before calling it.
+ * Terminating the stack with a NULL return address.
+ */
+__asm__ (
+"       .set push                   \n"
+"                                   \n"
+"       .text                       \n"
+"       .align  4                   \n"
+"       .type __start,@function     \n"
+"       .globl __start              \n"
+"       .globl  _start              \n"
+"                                   \n"
+"       .ent    __start             \n"
+"__start:                           \n"
+" _start:                           \n"
+"       .frame   $sp,32,$ra         \n"
+"       .mask   0x80000000,-4       \n"
+"                                   \n"
+"       .set noreorder              \n"
+"       bal     1f                  \n"
+"       nop                         \n"
+"1:                                 \n"
+"       .cpload $ra                 \n"
+"       .set reorder                \n"
+"                                   \n"
+"       move    $a0, $sp            \n"
+"       addiu   $sp, $sp, (-32)     \n"
+"       sw      $0, 28($sp)         \n"
+"       la      $t9, do_mips_start  \n"
+"       jalr    $t9                 \n"
+"                                   \n"
+"2:     b       2b                  \n"
+"       .end    __start             \n"
+"                                   \n"
+"       .set pop                    \n"
+);
+
+#include "__dso_handle.h"
+#include "atexit.h"
diff --git a/libc/arch-mips/bionic/crtbegin_so.S b/libc/arch-mips/bionic/crtbegin_so.S
deleted file mode 100644
index 377888a..0000000
--- a/libc/arch-mips/bionic/crtbegin_so.S
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-	.section .init_array, "aw"
-	.type __INIT_ARRAY__, @object
-	.globl __INIT_ARRAY__
-__INIT_ARRAY__:
-	.long -1
-
-	.section .fini_array, "aw"
-	.type __FINI_ARRAY__, @object
-	.globl __FINI_ARRAY__
-__FINI_ARRAY__:
-	.long -1
-	.long __do_global_dtors_aux
-
-	.abicalls
-	.text
-	.align	2
-	.set	nomips16
-	.ent	__do_global_dtors_aux
-	.type	__do_global_dtors_aux, @function
-__do_global_dtors_aux:
-	.frame	$sp,32,$31		# vars= 0, regs= 1/0, args= 16, gp= 8
-	.mask	0x80000000,-4
-	.fmask	0x00000000,0
-	.set	noreorder
-	.cpload	$25
-	.set	nomacro
-	addiu	$sp,$sp,-32
-	sw	$31,28($sp)
-	.cprestore	16
-	lw	$2,%got(completed.1269)($28)
-	lbu	$2,%lo(completed.1269)($2)
-	bne	$2,$0,$L8
-	nop
-
-$L4:
-	lw	$2,%got(__cxa_finalize)($28)
-	beq	$2,$0,$L6
-	nop
-
-	lw	$2,%got(__dso_handle)($28)
-	lw	$4,0($2)
-	lw	$25,%call16(__cxa_finalize)($28)
-	.reloc	1f,R_MIPS_JALR,__cxa_finalize
-1:	jalr	$25
-	nop
-
-	lw	$28,16($sp)
-$L6:
-	lw	$2,%got(completed.1269)($28)
-	li	$3,1			# 0x1
-	sb	$3,%lo(completed.1269)($2)
-$L8:
-	lw	$31,28($sp)
-	addiu	$sp,$sp,32
-	j	$31
-	nop
-
-	.set	macro
-	.set	reorder
-	.end	__do_global_dtors_aux
-	.size	__do_global_dtors_aux, .-__do_global_dtors_aux
-	.local	completed.1269
-	.comm	completed.1269,1,1
-	.weak	__cxa_finalize
-
-#include "__dso_handle_so.S"
-#include "atexit.S"
diff --git a/libc/arch-mips/bionic/atexit.S b/libc/arch-mips/bionic/crtbegin_so.c
similarity index 81%
copy from libc/arch-mips/bionic/atexit.S
copy to libc/arch-mips/bionic/crtbegin_so.c
index 7f0c820..925dc8c 100644
--- a/libc/arch-mips/bionic/atexit.S
+++ b/libc/arch-mips/bionic/crtbegin_so.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2013 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,19 +25,14 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-	.text
-	.globl	atexit
-	.hidden	atexit
-	.type	atexit, @function
-	.align  4
-	.ent	atexit
-atexit:
-	.set	noreorder
-	.cpload	$t9
-	.set	reorder
-	la	$t9, __cxa_atexit
-	move	$a1, $0
-	la      $a2, __dso_handle
-	j	$t9
-	.size	atexit, .-atexit
-	.end	atexit
+
+extern void __cxa_finalize(void *);
+extern void *__dso_handle;
+
+__attribute__((visibility("hidden"),destructor))
+void __on_dlclose() {
+  __cxa_finalize(&__dso_handle);
+}
+
+#include "__dso_handle_so.h"
+#include "atexit.h"
diff --git a/libc/arch-mips/syscalls.mk b/libc/arch-mips/syscalls.mk
index 0b8eccd..23393a2 100644
--- a/libc/arch-mips/syscalls.mk
+++ b/libc/arch-mips/syscalls.mk
@@ -129,6 +129,8 @@
 syscall_src += arch-mips/syscalls/lremovexattr.S
 syscall_src += arch-mips/syscalls/__statfs64.S
 syscall_src += arch-mips/syscalls/unshare.S
+syscall_src += arch-mips/syscalls/swapon.S
+syscall_src += arch-mips/syscalls/swapoff.S
 syscall_src += arch-mips/syscalls/pause.S
 syscall_src += arch-mips/syscalls/gettimeofday.S
 syscall_src += arch-mips/syscalls/settimeofday.S
@@ -147,6 +149,9 @@
 syscall_src += arch-mips/syscalls/__timer_delete.S
 syscall_src += arch-mips/syscalls/utimes.S
 syscall_src += arch-mips/syscalls/utimensat.S
+syscall_src += arch-mips/syscalls/timerfd_create.S
+syscall_src += arch-mips/syscalls/timerfd_settime.S
+syscall_src += arch-mips/syscalls/timerfd_gettime.S
 syscall_src += arch-mips/syscalls/sigaction.S
 syscall_src += arch-mips/syscalls/sigprocmask.S
 syscall_src += arch-mips/syscalls/__sigsuspend.S
diff --git a/libc/arch-mips/syscalls/swapoff.S b/libc/arch-mips/syscalls/swapoff.S
new file mode 100644
index 0000000..4f204c9
--- /dev/null
+++ b/libc/arch-mips/syscalls/swapoff.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+    .text
+    .globl swapoff
+    .align 4
+    .ent swapoff
+
+swapoff:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_swapoff
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end swapoff
diff --git a/libc/arch-mips/syscalls/swapon.S b/libc/arch-mips/syscalls/swapon.S
new file mode 100644
index 0000000..af3dda6
--- /dev/null
+++ b/libc/arch-mips/syscalls/swapon.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+    .text
+    .globl swapon
+    .align 4
+    .ent swapon
+
+swapon:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_swapon
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end swapon
diff --git a/libc/arch-mips/syscalls/timerfd_create.S b/libc/arch-mips/syscalls/timerfd_create.S
new file mode 100644
index 0000000..b5ac003
--- /dev/null
+++ b/libc/arch-mips/syscalls/timerfd_create.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+    .text
+    .globl timerfd_create
+    .align 4
+    .ent timerfd_create
+
+timerfd_create:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_timerfd_create
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end timerfd_create
diff --git a/libc/arch-mips/syscalls/timerfd_gettime.S b/libc/arch-mips/syscalls/timerfd_gettime.S
new file mode 100644
index 0000000..b1c21ff
--- /dev/null
+++ b/libc/arch-mips/syscalls/timerfd_gettime.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+    .text
+    .globl timerfd_gettime
+    .align 4
+    .ent timerfd_gettime
+
+timerfd_gettime:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_timerfd_gettime
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end timerfd_gettime
diff --git a/libc/arch-mips/syscalls/timerfd_settime.S b/libc/arch-mips/syscalls/timerfd_settime.S
new file mode 100644
index 0000000..f68819d
--- /dev/null
+++ b/libc/arch-mips/syscalls/timerfd_settime.S
@@ -0,0 +1,22 @@
+/* autogenerated by gensyscalls.py */
+#include <asm/unistd.h>
+    .text
+    .globl timerfd_settime
+    .align 4
+    .ent timerfd_settime
+
+timerfd_settime:
+    .set noreorder
+    .cpload $t9
+    li $v0, __NR_timerfd_settime
+    syscall
+    bnez $a3, 1f
+    move $a0, $v0
+    j $ra
+    nop
+1:
+    la $t9,__set_errno
+    j $t9
+    nop
+    .set reorder
+    .end timerfd_settime
diff --git a/libc/arch-x86/include/machine/endian.h b/libc/arch-x86/include/machine/endian.h
index e7ad257..e1506b1 100644
--- a/libc/arch-x86/include/machine/endian.h
+++ b/libc/arch-x86/include/machine/endian.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: endian.h,v 1.14 2005/12/13 00:35:23 millert Exp $	*/
+/*	$OpenBSD: endian.h,v 1.17 2011/03/12 04:03:04 guenther Exp $	*/
 
 /*-
  * Copyright (c) 1997 Niklas Hallqvist.  All rights reserved.
@@ -24,38 +24,28 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _I386_ENDIAN_H_
-#define _I386_ENDIAN_H_
+#ifndef _MACHINE_ENDIAN_H_
+#define _MACHINE_ENDIAN_H_
 
 #ifdef __GNUC__
 
-#if defined(_KERNEL) && !defined(I386_CPU)
-#define	__swap32md(x) ({						\
+#define	__swap32md(x) __statement({					\
 	uint32_t __swap32md_x = (x);					\
 									\
-	__asm ("bswap %1" : "+r" (__swap32md_x));			\
+	__asm ("bswap %0" : "+r" (__swap32md_x));			\
 	__swap32md_x;							\
 })
-#else
-#define	__swap32md(x) ({						\
-	uint32_t __swap32md_x = (x);					\
-									\
-	__asm ("rorw $8, %w1; rorl $16, %1; rorw $8, %w1" :		\
-	    "+r" (__swap32md_x));					\
-	__swap32md_x;							\
-})
-#endif	/* _KERNEL && !I386_CPU */
 
-#define	__swap64md(x) ({						\
+#define	__swap64md(x) __statement({					\
 	uint64_t __swap64md_x = (x);					\
 									\
 	(uint64_t)__swap32md(__swap64md_x >> 32) |			\
 	    (uint64_t)__swap32md(__swap64md_x & 0xffffffff) << 32;	\
 })
-#define	__swap16md(x) ({						\
+#define	__swap16md(x) __statement({					\
 	uint16_t __swap16md_x = (x);					\
 									\
-	__asm ("rorw $8, %w1" : "+r" (__swap16md_x));			\
+	__asm ("rorw $8, %w0" : "+r" (__swap16md_x));			\
 	__swap16md_x;							\
 })
 
@@ -68,4 +58,4 @@
 #include <sys/types.h>
 #include <sys/endian.h>
 
-#endif /* _I386_ENDIAN_H_ */
+#endif /* _MACHINE_ENDIAN_H_ */
diff --git a/libc/arch-x86/string/bcopy_wrapper.S b/libc/arch-x86/string/bcopy_wrapper.S
deleted file mode 100644
index fa8774c..0000000
--- a/libc/arch-x86/string/bcopy_wrapper.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#if defined(USE_SSSE3)
-
-# include "cache_wrapper.S"
-# undef __i686
-# define MEMCPY	bcopy
-# define USE_AS_MEMMOVE
-# define USE_AS_BCOPY
-# include "ssse3-memcpy5.S"
-
-#else
-
-# include "bcopy.S"
-
-#endif
diff --git a/libc/arch-x86/string/bzero_wrapper.S b/libc/arch-x86/string/bzero_wrapper.S
deleted file mode 100644
index aa1bb9c..0000000
--- a/libc/arch-x86/string/bzero_wrapper.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#if defined(USE_SSE2)
-
-# include "cache_wrapper.S"
-# undef __i686
-# define USE_AS_BZERO
-# define sse2_memset5_atom bzero
-# include "sse2-memset5-atom.S"
-
-#else
-
-# include "bzero.S"
-
-#endif
diff --git a/libc/arch-x86/string/cache_wrapper.S b/libc/arch-x86/string/cache.h
similarity index 100%
rename from libc/arch-x86/string/cache_wrapper.S
rename to libc/arch-x86/string/cache.h
diff --git a/libc/arch-x86/string/index.S b/libc/arch-x86/string/index.S
deleted file mode 100644
index 7f83ef5..0000000
--- a/libc/arch-x86/string/index.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/*	$OpenBSD: index.S,v 1.4 2005/08/07 11:30:38 espie Exp $ */
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-#ifdef STRCHR
-ENTRY(strchr)
-#else
-ENTRY(index)
-#endif
-	movl	4(%esp),%eax
-	movb	8(%esp),%cl
-	.align 2,0x90
-L1:
-	movb	(%eax),%dl
-	cmpb	%dl,%cl			/* found char??? */
-	je 	L2
-	incl	%eax
-	testb	%dl,%dl			/* null terminator??? */
-	jnz	L1
-	xorl	%eax,%eax
-L2:
-	ret
diff --git a/libc/arch-x86/string/memchr.S b/libc/arch-x86/string/memchr.S
deleted file mode 100644
index 367c7b4..0000000
--- a/libc/arch-x86/string/memchr.S
+++ /dev/null
@@ -1,27 +0,0 @@
-/*	$OpenBSD: memchr.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-ENTRY(memchr)
-	pushl	%edi
-	movl	8(%esp),%edi		/* string address */
-	movl	12(%esp),%eax		/* set character to search for */
-	movl	16(%esp),%ecx		/* set length of search */
-	testl	%ecx,%ecx		/* test for len == 0 */
-	jz	L1
-	cld				/* set search forward */
-	repne				/* search! */
-	scasb
-	jne	L1			/* scan failed, return null */
-	leal	-1(%edi),%eax		/* adjust result of scan */
-	popl	%edi
-	ret
-	.align 2,0x90
-L1:	xorl	%eax,%eax
-	popl	%edi
-	ret
-END(memchr)
diff --git a/libc/arch-x86/string/memcpy_wrapper.S b/libc/arch-x86/string/memcpy_wrapper.S
deleted file mode 100644
index 7e765ea..0000000
--- a/libc/arch-x86/string/memcpy_wrapper.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#if defined(USE_SSSE3)
-
-# include "cache_wrapper.S"
-# undef __i686
-# define MEMCPY	memcpy
-# define USE_AS_MEMMOVE
-# include "ssse3-memcpy5.S"
-
-#else
-
-# include "memcpy.S"
-
-#endif
diff --git a/libc/arch-x86/string/memmove_wrapper.S b/libc/arch-x86/string/memmove_wrapper.S
deleted file mode 100644
index 7e83e27..0000000
--- a/libc/arch-x86/string/memmove_wrapper.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#if defined(USE_SSSE3)
-
-# include "cache_wrapper.S"
-# undef __i686
-# define MEMCPY memmove
-# define USE_AS_MEMMOVE
-# include "ssse3-memcpy5.S"
-
-#else
-
-# include "memmove.S"
-
-#endif
diff --git a/libc/arch-x86/string/memset_wrapper.S b/libc/arch-x86/string/memset_wrapper.S
deleted file mode 100644
index d037a50..0000000
--- a/libc/arch-x86/string/memset_wrapper.S
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#if defined(USE_SSE2)
-
-# include "cache_wrapper.S"
-# undef __i686
-# define sse2_memset5_atom memset
-# include "sse2-memset5-atom.S"
-
-#else
-
-# include "memset.S"
-
-#endif
diff --git a/libc/arch-x86/string/strlen_wrapper.S b/libc/arch-x86/string/sse2-bzero-atom.S
similarity index 92%
copy from libc/arch-x86/string/strlen_wrapper.S
copy to libc/arch-x86/string/sse2-bzero-atom.S
index e62786b..0ddc499 100644
--- a/libc/arch-x86/string/strlen_wrapper.S
+++ b/libc/arch-x86/string/sse2-bzero-atom.S
@@ -28,13 +28,6 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSE2)
-
-# define sse2_strlen_atom strlen
-# include "sse2-strlen-atom.S"
-
-#else
-
-# include "strlen.S"
-
-#endif
+#define USE_AS_BZERO
+#define MEMSET  bzero
+#include "sse2-memset-atom.S"
diff --git a/libc/arch-x86/string/memcmp_wrapper.S b/libc/arch-x86/string/sse2-index-atom.S
similarity index 90%
copy from libc/arch-x86/string/memcmp_wrapper.S
copy to libc/arch-x86/string/sse2-index-atom.S
index fa0c672..d51e1d4 100644
--- a/libc/arch-x86/string/memcmp_wrapper.S
+++ b/libc/arch-x86/string/sse2-index-atom.S
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2010, Intel Corporation
+Copyright (c) 2011, Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,13 +28,5 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSSE3)
-
-# define MEMCMP memcmp
-# include "ssse3-memcmp3-new.S"
-
-#else
-
-# include "memcmp.S"
-
-#endif
+#define strchr  index
+#include "sse2-strchr-atom.S"
diff --git a/libc/arch-x86/string/sse2-memchr-atom.S b/libc/arch-x86/string/sse2-memchr-atom.S
new file mode 100644
index 0000000..013af9b
--- /dev/null
+++ b/libc/arch-x86/string/sse2-memchr-atom.S
@@ -0,0 +1,556 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc	.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc	.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name,  @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name,	.-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define ENTRANCE PUSH (%edi);
+#define PARMS  8
+#define RETURN  POP (%edi); ret; CFI_PUSH (%edi);
+
+#define STR1  PARMS
+#define STR2  STR1+4
+#define LEN   STR2+4
+
+	.text
+ENTRY (memchr)
+	ENTRANCE
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+	mov	LEN(%esp), %edx
+	test	%edx, %edx
+	jz	L(return_null)
+
+	punpcklbw %xmm1, %xmm1
+	mov	%ecx, %edi
+	punpcklbw %xmm1, %xmm1
+
+	and	$63, %ecx
+	pshufd	$0, %xmm1, %xmm1
+	cmp	$48, %ecx
+	ja	L(crosscache)
+
+	movdqu	(%edi), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(match_case2_prolog)
+
+	sub	$16, %edx
+	jbe	L(return_null)
+	lea	16(%edi), %edi
+	and	$15, %ecx
+	and	$-16, %edi
+	add	%ecx, %edx
+	sub	$64, %edx
+	jbe	L(exit_loop)
+	jmp	L(loop_prolog)
+
+	.p2align 4
+L(crosscache):
+	and	$15, %ecx
+	and	$-16, %edi
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %eax
+	sar	%cl, %eax
+	test	%eax, %eax
+
+	jnz	L(match_case2_prolog1)
+	lea	-16(%edx), %edx
+	add	%ecx, %edx
+	jle	L(return_null)
+	lea	16(%edi), %edi
+	sub	$64, %edx
+	jbe	L(exit_loop)
+
+	.p2align 4
+L(loop_prolog):
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	xor	%ecx, %ecx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	movdqa	16(%edi), %xmm2
+	pcmpeqb	%xmm1, %xmm2
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm2, %eax
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	movdqa	32(%edi), %xmm3
+	pcmpeqb	%xmm1, %xmm3
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm3, %eax
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	movdqa	48(%edi), %xmm4
+	pcmpeqb	%xmm1, %xmm4
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm4, %eax
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	lea	64(%edi), %edi
+	sub	$64, %edx
+	jbe	L(exit_loop)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	xor	%ecx, %ecx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	movdqa	16(%edi), %xmm2
+	pcmpeqb	%xmm1, %xmm2
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm2, %eax
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	movdqa	32(%edi), %xmm3
+	pcmpeqb	%xmm1, %xmm3
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm3, %eax
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	movdqa	48(%edi), %xmm4
+	pcmpeqb	%xmm1, %xmm4
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm4, %eax
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	lea	64(%edi), %edi
+	mov	%edi, %ecx
+	and	$-64, %edi
+	and	$63, %ecx
+	add	%ecx, %edx
+
+	.p2align 4
+L(align64_loop):
+	sub	$64, %edx
+	jbe	L(exit_loop)
+	movdqa	(%edi), %xmm0
+	movdqa	16(%edi), %xmm2
+	movdqa	32(%edi), %xmm3
+	movdqa	48(%edi), %xmm4
+	pcmpeqb	%xmm1, %xmm0
+	pcmpeqb	%xmm1, %xmm2
+	pcmpeqb	%xmm1, %xmm3
+	pcmpeqb	%xmm1, %xmm4
+
+	pmaxub	%xmm0, %xmm3
+	pmaxub	%xmm2, %xmm4
+	pmaxub	%xmm3, %xmm4
+	add	$64, %edi
+	pmovmskb %xmm4, %eax
+
+	test	%eax, %eax
+	jz	L(align64_loop)
+
+	sub	$64, %edi
+
+	pmovmskb %xmm0, %eax
+	xor	%ecx, %ecx
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	pmovmskb %xmm2, %eax
+	lea	16(%ecx), %ecx
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	movdqa	32(%edi), %xmm3
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %eax
+	lea	16(%ecx), %ecx
+	test	%eax, %eax
+	jnz	L(match_case1)
+
+	pcmpeqb	48(%edi), %xmm1
+	pmovmskb %xmm1, %eax
+	lea	16(%ecx), %ecx
+
+	.p2align 4
+L(match_case1):
+	add	%ecx, %edi
+	test	%al, %al
+	jz	L(match_case1_high)
+	mov	%al, %cl
+	and	$15, %cl
+	jz	L(match_case1_8)
+	test	$0x01, %al
+	jnz	L(exit_case1_1)
+	test	$0x02, %al
+	jnz	L(exit_case1_2)
+	test	$0x04, %al
+	jnz	L(exit_case1_3)
+	lea	3(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case1_8):
+	test	$0x10, %al
+	jnz	L(exit_case1_5)
+	test	$0x20, %al
+	jnz	L(exit_case1_6)
+	test	$0x40, %al
+	jnz	L(exit_case1_7)
+	lea	7(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case1_high):
+	mov	%ah, %ch
+	and	$15, %ch
+	jz	L(match_case1_high_8)
+	test	$0x01, %ah
+	jnz	L(exit_case1_9)
+	test	$0x02, %ah
+	jnz	L(exit_case1_10)
+	test	$0x04, %ah
+	jnz	L(exit_case1_11)
+	lea	11(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case1_high_8):
+	test	$0x10, %ah
+	jnz	L(exit_case1_13)
+	test	$0x20, %ah
+	jnz	L(exit_case1_14)
+	test	$0x40, %ah
+	jnz	L(exit_case1_15)
+	lea	15(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_loop):
+	add	$64, %edx
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	xor	%ecx, %ecx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(match_case2)
+	cmp	$16, %edx
+	jbe	L(return_null)
+
+	movdqa	16(%edi), %xmm2
+	pcmpeqb	%xmm1, %xmm2
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm2, %eax
+	test	%eax, %eax
+	jnz	L(match_case2)
+	cmp	$32, %edx
+	jbe	L(return_null)
+
+	movdqa	32(%edi), %xmm3
+	pcmpeqb	%xmm1, %xmm3
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm3, %eax
+	test	%eax, %eax
+	jnz	L(match_case2)
+	cmp	$48, %edx
+	jbe	L(return_null)
+
+	pcmpeqb	48(%edi), %xmm1
+	lea	16(%ecx), %ecx
+	pmovmskb %xmm1, %eax
+	test	%eax, %eax
+	jnz	L(match_case2)
+
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_1):
+	mov	%edi, %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_2):
+	lea	1(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_3):
+	lea	2(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_5):
+	lea	4(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_6):
+	lea	5(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_7):
+	lea	6(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_9):
+	lea	8(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_10):
+	lea	9(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_11):
+	lea	10(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_13):
+	lea	12(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_14):
+	lea	13(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case1_15):
+	lea	14(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case2):
+	sub	%ecx, %edx
+L(match_case2_prolog1):
+	add	%ecx, %edi
+L(match_case2_prolog):
+	test	%al, %al
+	jz	L(match_case2_high)
+	mov	%al, %cl
+	and	$15, %cl
+	jz	L(match_case2_8)
+	test	$0x01, %al
+	jnz	L(exit_case2_1)
+	test	$0x02, %al
+	jnz	L(exit_case2_2)
+	test	$0x04, %al
+	jnz	L(exit_case2_3)
+	sub	$4, %edx
+	jb	L(return_null)
+	lea	3(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case2_8):
+	test	$0x10, %al
+	jnz	L(exit_case2_5)
+	test	$0x20, %al
+	jnz	L(exit_case2_6)
+	test	$0x40, %al
+	jnz	L(exit_case2_7)
+	sub	$8, %edx
+	jb	L(return_null)
+	lea	7(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case2_high):
+	mov	%ah, %ch
+	and	$15, %ch
+	jz	L(match_case2_high_8)
+	test	$0x01, %ah
+	jnz	L(exit_case2_9)
+	test	$0x02, %ah
+	jnz	L(exit_case2_10)
+	test	$0x04, %ah
+	jnz	L(exit_case2_11)
+	sub	$12, %edx
+	jb	L(return_null)
+	lea	11(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case2_high_8):
+	test	$0x10, %ah
+	jnz	L(exit_case2_13)
+	test	$0x20, %ah
+	jnz	L(exit_case2_14)
+	test	$0x40, %ah
+	jnz	L(exit_case2_15)
+	sub	$16, %edx
+	jb	L(return_null)
+	lea	15(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_1):
+	mov	%edi, %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_2):
+	sub	$2, %edx
+	jb	L(return_null)
+	lea	1(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_3):
+	sub	$3, %edx
+	jb	L(return_null)
+	lea	2(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_5):
+	sub	$5, %edx
+	jb	L(return_null)
+	lea	4(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_6):
+	sub	$6, %edx
+	jb	L(return_null)
+	lea	5(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_7):
+	sub	$7, %edx
+	jb	L(return_null)
+	lea	6(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_9):
+	sub	$9, %edx
+	jb	L(return_null)
+	lea	8(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_10):
+	sub	$10, %edx
+	jb	L(return_null)
+	lea	9(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_11):
+	sub	$11, %edx
+	jb	L(return_null)
+	lea	10(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_13):
+	sub	$13, %edx
+	jb	L(return_null)
+	lea	12(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_14):
+	sub	$14, %edx
+	jb	L(return_null)
+	lea	13(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(exit_case2_15):
+	sub	$15, %edx
+	jb	L(return_null)
+	lea	14(%edi), %eax
+	RETURN
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	RETURN
+END (memchr)
diff --git a/libc/arch-x86/string/sse2-memrchr-atom.S b/libc/arch-x86/string/sse2-memrchr-atom.S
new file mode 100644
index 0000000..1aa1a1a
--- /dev/null
+++ b/libc/arch-x86/string/sse2-memrchr-atom.S
@@ -0,0 +1,778 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc	.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc	.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name,  @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name,	.-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define PARMS  4
+#define STR1  PARMS
+#define STR2  STR1+4
+#define LEN   STR2+4
+
+	.text
+ENTRY (memrchr)
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+	mov	LEN(%esp), %edx
+
+	test	%edx, %edx
+	jz	L(return_null)
+	sub	$16, %edx
+	jbe	L(length_less16)
+
+	punpcklbw %xmm1, %xmm1
+	add	%edx, %ecx
+	punpcklbw %xmm1, %xmm1
+
+	movdqu	(%ecx), %xmm0
+	pshufd	$0, %xmm1, %xmm1
+	pcmpeqb	%xmm1, %xmm0
+
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(exit_dispatch)
+
+	sub	$64, %ecx
+	mov	%ecx, %eax
+	and	$15, %eax
+	jz	L(loop_prolog)
+
+	add	$16, %ecx
+	add	$16, %edx
+	and	$-16, %ecx
+	sub	%eax, %edx
+
+	.p2align 4
+/* Loop start on aligned string.  */
+L(loop_prolog):
+	sub	$64, %edx
+	jbe	L(exit_loop)
+
+	movdqa	48(%ecx), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches48)
+
+	movdqa	32(%ecx), %xmm2
+	pcmpeqb	%xmm1, %xmm2
+	pmovmskb %xmm2, %eax
+	test	%eax, %eax
+	jnz	L(matches32)
+
+	movdqa	16(%ecx), %xmm3
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %eax
+	test	%eax, %eax
+	jnz	L(matches16)
+
+	movdqa	(%ecx), %xmm4
+	pcmpeqb	%xmm1, %xmm4
+	pmovmskb %xmm4, %eax
+	test	%eax, %eax
+	jnz	L(exit_dispatch)
+
+	sub	$64, %ecx
+	sub	$64, %edx
+	jbe	L(exit_loop)
+
+	movdqa	48(%ecx), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches48)
+
+	movdqa	32(%ecx), %xmm2
+	pcmpeqb	%xmm1, %xmm2
+	pmovmskb %xmm2, %eax
+	test	%eax, %eax
+	jnz	L(matches32)
+
+	movdqa	16(%ecx), %xmm3
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %eax
+	test	%eax, %eax
+	jnz	L(matches16)
+
+	movdqa	(%ecx), %xmm3
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %eax
+	test	%eax, %eax
+	jnz	L(exit_dispatch)
+
+	mov	%ecx, %eax
+	and	$63, %eax
+	test	%eax, %eax
+	jz	L(align64_loop)
+
+	add	$64, %ecx
+	add	$64, %edx
+	and	$-64, %ecx
+	sub	%eax, %edx
+
+	.p2align 4
+L(align64_loop):
+	sub	$64, %ecx
+	sub	$64, %edx
+	jbe	L(exit_loop)
+
+	movdqa	(%ecx), %xmm0
+	movdqa	16(%ecx), %xmm2
+	movdqa	32(%ecx), %xmm3
+	movdqa	48(%ecx), %xmm4
+
+	pcmpeqb	%xmm1, %xmm0
+	pcmpeqb	%xmm1, %xmm2
+	pcmpeqb	%xmm1, %xmm3
+	pcmpeqb	%xmm1, %xmm4
+
+	pmaxub	%xmm3, %xmm0
+	pmaxub	%xmm4, %xmm2
+	pmaxub	%xmm0, %xmm2
+	pmovmskb %xmm2, %eax
+
+	test	%eax, %eax
+	jz	L(align64_loop)
+
+	pmovmskb %xmm4, %eax
+	test	%eax, %eax
+	jnz	L(matches48)
+
+	pmovmskb %xmm3, %eax
+	test	%eax, %eax
+	jnz	L(matches32)
+
+	movdqa	16(%ecx), %xmm2
+
+	pcmpeqb	%xmm1, %xmm2
+	pcmpeqb	(%ecx), %xmm1
+
+	pmovmskb %xmm2, %eax
+	test	%eax, %eax
+	jnz	L(matches16)
+
+	pmovmskb %xmm1, %eax
+	test	%ah, %ah
+	jnz	L(exit_dispatch_high)
+	mov	%al, %dl
+	and	$15 << 4, %dl
+	jnz	L(exit_dispatch_8)
+	test	$0x08, %al
+	jnz	L(exit_4)
+	test	$0x04, %al
+	jnz	L(exit_3)
+	test	$0x02, %al
+	jnz	L(exit_2)
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(exit_loop):
+	add	$64, %edx
+	cmp	$32, %edx
+	jbe	L(exit_loop_32)
+
+	movdqa	48(%ecx), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches48)
+
+	movdqa	32(%ecx), %xmm2
+	pcmpeqb	%xmm1, %xmm2
+	pmovmskb %xmm2, %eax
+	test	%eax, %eax
+	jnz	L(matches32)
+
+	movdqa	16(%ecx), %xmm3
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %eax
+	test	%eax, %eax
+	jnz	L(matches16_1)
+	cmp	$48, %edx
+	jbe	L(return_null)
+
+	pcmpeqb	(%ecx), %xmm1
+	pmovmskb %xmm1, %eax
+	test	%eax, %eax
+	jnz	L(matches0_1)
+	xor	%eax, %eax
+	ret
+
+	.p2align 4
+L(exit_loop_32):
+	movdqa	48(%ecx), %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches48_1)
+	cmp	$16, %edx
+	jbe	L(return_null)
+
+	pcmpeqb	32(%ecx), %xmm1
+	pmovmskb %xmm1, %eax
+	test	%eax, %eax
+	jnz	L(matches32_1)
+	xor	%eax, %eax
+	ret
+
+	.p2align 4
+L(matches16):
+	lea	16(%ecx), %ecx
+	test	%ah, %ah
+	jnz	L(exit_dispatch_high)
+	mov	%al, %dl
+	and	$15 << 4, %dl
+	jnz	L(exit_dispatch_8)
+	test	$0x08, %al
+	jnz	L(exit_4)
+	test	$0x04, %al
+	jnz	L(exit_3)
+	test	$0x02, %al
+	jnz	L(exit_2)
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(matches32):
+	lea	32(%ecx), %ecx
+	test	%ah, %ah
+	jnz	L(exit_dispatch_high)
+	mov	%al, %dl
+	and	$15 << 4, %dl
+	jnz	L(exit_dispatch_8)
+	test	$0x08, %al
+	jnz	L(exit_4)
+	test	$0x04, %al
+	jnz	L(exit_3)
+	test	$0x02, %al
+	jnz	L(exit_2)
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(matches48):
+	lea	48(%ecx), %ecx
+
+	.p2align 4
+L(exit_dispatch):
+	test	%ah, %ah
+	jnz	L(exit_dispatch_high)
+	mov	%al, %dl
+	and	$15 << 4, %dl
+	jnz	L(exit_dispatch_8)
+	test	$0x08, %al
+	jnz	L(exit_4)
+	test	$0x04, %al
+	jnz	L(exit_3)
+	test	$0x02, %al
+	jnz	L(exit_2)
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(exit_dispatch_8):
+	test	$0x80, %al
+	jnz	L(exit_8)
+	test	$0x40, %al
+	jnz	L(exit_7)
+	test	$0x20, %al
+	jnz	L(exit_6)
+	lea	4(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_dispatch_high):
+	mov	%ah, %dh
+	and	$15 << 4, %dh
+	jnz	L(exit_dispatch_high_8)
+	test	$0x08, %ah
+	jnz	L(exit_12)
+	test	$0x04, %ah
+	jnz	L(exit_11)
+	test	$0x02, %ah
+	jnz	L(exit_10)
+	lea	8(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_dispatch_high_8):
+	test	$0x80, %ah
+	jnz	L(exit_16)
+	test	$0x40, %ah
+	jnz	L(exit_15)
+	test	$0x20, %ah
+	jnz	L(exit_14)
+	lea	12(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_2):
+	lea	1(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_3):
+	lea	2(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_4):
+	lea	3(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_6):
+	lea	5(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_7):
+	lea	6(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_8):
+	lea	7(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_10):
+	lea	9(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_11):
+	lea	10(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_12):
+	lea	11(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_14):
+	lea	13(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_15):
+	lea	14(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_16):
+	lea	15(%ecx), %eax
+	ret
+
+	.p2align 4
+L(matches0_1):
+	lea	-64(%edx), %edx
+
+	test	%ah, %ah
+	jnz	L(exit_dispatch_1_high)
+	mov	%al, %ah
+	and	$15 << 4, %ah
+	jnz	L(exit_dispatch_1_8)
+	test	$0x08, %al
+	jnz	L(exit_1_4)
+	test	$0x04, %al
+	jnz	L(exit_1_3)
+	test	$0x02, %al
+	jnz	L(exit_1_2)
+
+	add	$0, %edx
+	jl	L(return_null)
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(matches16_1):
+	lea	-48(%edx), %edx
+	lea	16(%ecx), %ecx
+
+	test	%ah, %ah
+	jnz	L(exit_dispatch_1_high)
+	mov	%al, %ah
+	and	$15 << 4, %ah
+	jnz	L(exit_dispatch_1_8)
+	test	$0x08, %al
+	jnz	L(exit_1_4)
+	test	$0x04, %al
+	jnz	L(exit_1_3)
+	test	$0x02, %al
+	jnz	L(exit_1_2)
+
+	add	$0, %edx
+	jl	L(return_null)
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(matches32_1):
+	lea	-32(%edx), %edx
+	lea	32(%ecx), %ecx
+
+	test	%ah, %ah
+	jnz	L(exit_dispatch_1_high)
+	mov	%al, %ah
+	and	$15 << 4, %ah
+	jnz	L(exit_dispatch_1_8)
+	test	$0x08, %al
+	jnz	L(exit_1_4)
+	test	$0x04, %al
+	jnz	L(exit_1_3)
+	test	$0x02, %al
+	jnz	L(exit_1_2)
+
+	add	$0, %edx
+	jl	L(return_null)
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(matches48_1):
+	lea	-16(%edx), %edx
+	lea	48(%ecx), %ecx
+
+	.p2align 4
+L(exit_dispatch_1):
+	test	%ah, %ah
+	jnz	L(exit_dispatch_1_high)
+	mov	%al, %ah
+	and	$15 << 4, %ah
+	jnz	L(exit_dispatch_1_8)
+	test	$0x08, %al
+	jnz	L(exit_1_4)
+	test	$0x04, %al
+	jnz	L(exit_1_3)
+	test	$0x02, %al
+	jnz	L(exit_1_2)
+
+	add	$0, %edx
+	jl	L(return_null)
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(exit_dispatch_1_8):
+	test	$0x80, %al
+	jnz	L(exit_1_8)
+	test	$0x40, %al
+	jnz	L(exit_1_7)
+	test	$0x20, %al
+	jnz	L(exit_1_6)
+
+	add	$4, %edx
+	jl	L(return_null)
+	lea	4(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_dispatch_1_high):
+	mov	%ah, %al
+	and	$15 << 4, %al
+	jnz	L(exit_dispatch_1_high_8)
+	test	$0x08, %ah
+	jnz	L(exit_1_12)
+	test	$0x04, %ah
+	jnz	L(exit_1_11)
+	test	$0x02, %ah
+	jnz	L(exit_1_10)
+
+	add	$8, %edx
+	jl	L(return_null)
+	lea	8(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_dispatch_1_high_8):
+	test	$0x80, %ah
+	jnz	L(exit_1_16)
+	test	$0x40, %ah
+	jnz	L(exit_1_15)
+	test	$0x20, %ah
+	jnz	L(exit_1_14)
+
+	add	$12, %edx
+	jl	L(return_null)
+	lea	12(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_2):
+	add	$1, %edx
+	jl	L(return_null)
+	lea	1(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_3):
+	add	$2, %edx
+	jl	L(return_null)
+	lea	2(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_4):
+	add	$3, %edx
+	jl	L(return_null)
+	lea	3(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_6):
+	add	$5, %edx
+	jl	L(return_null)
+	lea	5(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_7):
+	add	$6, %edx
+	jl	L(return_null)
+	lea	6(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_8):
+	add	$7, %edx
+	jl	L(return_null)
+	lea	7(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_10):
+	add	$9, %edx
+	jl	L(return_null)
+	lea	9(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_11):
+	add	$10, %edx
+	jl	L(return_null)
+	lea	10(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_12):
+	add	$11, %edx
+	jl	L(return_null)
+	lea	11(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_14):
+	add	$13, %edx
+	jl	L(return_null)
+	lea	13(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_15):
+	add	$14, %edx
+	jl	L(return_null)
+	lea	14(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit_1_16):
+	add	$15, %edx
+	jl	L(return_null)
+	lea	15(%ecx), %eax
+	ret
+
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	ret
+
+	.p2align 4
+L(length_less16_offset0):
+	mov	%dl, %cl
+	pcmpeqb	(%eax), %xmm1
+
+	mov	$1, %edx
+	sal	%cl, %edx
+	sub	$1, %edx
+
+	mov	%eax, %ecx
+	pmovmskb %xmm1, %eax
+
+	and	%edx, %eax
+	test	%eax, %eax
+	jnz	L(exit_dispatch)
+
+	xor	%eax, %eax
+	ret
+
+	.p2align 4
+L(length_less16):
+	punpcklbw %xmm1, %xmm1
+	add	$16, %edx
+	punpcklbw %xmm1, %xmm1
+
+	mov	%ecx, %eax
+	pshufd	$0, %xmm1, %xmm1
+
+	and	$15, %ecx
+	jz	L(length_less16_offset0)
+
+	PUSH	(%edi)
+
+	mov	%cl, %dh
+	add	%dl, %dh
+	and	$-16, %eax
+
+	sub	$16, %dh
+	ja	L(length_less16_part2)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edi
+
+	sar	%cl, %edi
+	add	%ecx, %eax
+	mov	%dl, %cl
+
+	mov	$1, %edx
+	sal	%cl, %edx
+	sub	$1, %edx
+
+	and	%edx, %edi
+	test	%edi, %edi
+	jz	L(ret_null)
+
+	bsr	%edi, %edi
+	add	%edi, %eax
+	POP	(%edi)
+	ret
+
+	CFI_PUSH     (%edi)
+
+	.p2align 4
+L(length_less16_part2):
+	movdqa	16(%eax), %xmm2
+	pcmpeqb	%xmm1, %xmm2
+	pmovmskb %xmm2, %edi
+
+	mov	%cl, %ch
+
+	mov	%dh, %cl
+	mov	$1, %edx
+	sal	%cl, %edx
+	sub	$1, %edx
+
+	and	%edx, %edi
+
+	test	%edi, %edi
+	jnz	L(length_less16_part2_return)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edi
+
+	mov	%ch, %cl
+	sar	%cl, %edi
+	test	%edi, %edi
+	jz	L(ret_null)
+
+	bsr	%edi, %edi
+	add	%edi, %eax
+	xor	%ch, %ch
+	add	%ecx, %eax
+	POP	(%edi)
+	ret
+
+	CFI_PUSH     (%edi)
+
+	.p2align 4
+L(length_less16_part2_return):
+	bsr	%edi, %edi
+	lea	16(%eax, %edi), %eax
+	POP	(%edi)
+	ret
+
+	CFI_PUSH     (%edi)
+
+	.p2align 4
+L(ret_null):
+	xor	%eax, %eax
+	POP	(%edi)
+	ret
+
+END (memrchr)
diff --git a/libc/arch-x86/string/sse2-memset5-atom.S b/libc/arch-x86/string/sse2-memset-atom.S
similarity index 99%
rename from libc/arch-x86/string/sse2-memset5-atom.S
rename to libc/arch-x86/string/sse2-memset-atom.S
index 557c019..a54bf51 100644
--- a/libc/arch-x86/string/sse2-memset5-atom.S
+++ b/libc/arch-x86/string/sse2-memset-atom.S
@@ -28,6 +28,9 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include "cache.h"
+#undef __i686
+
 #ifndef L
 # define L(label)	.L##label
 #endif
@@ -136,9 +139,13 @@
     jmp		*TABLE(,%ecx,4)
 #endif
 
+#ifndef MEMSET
+# define MEMSET memset
+#endif
+
 	.section .text.sse2,"ax",@progbits
 	ALIGN (4)
-ENTRY (sse2_memset5_atom)
+ENTRY (MEMSET)
 	ENTRANCE
 
 	movl	LEN(%esp), %ecx
@@ -911,4 +918,4 @@
 	SETRTNVAL
 	RETURN_END
 
-END (sse2_memset5_atom)
+END (MEMSET)
diff --git a/libc/arch-x86/string/sse2-strchr-atom.S b/libc/arch-x86/string/sse2-strchr-atom.S
new file mode 100644
index 0000000..e325181
--- /dev/null
+++ b/libc/arch-x86/string/sse2-strchr-atom.S
@@ -0,0 +1,391 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc	.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc	.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name,  @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name,	.-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
+#define POP(REG)	popl REG;	CFI_POP (REG)
+
+#define PARMS	8
+#define ENTRANCE	PUSH(%edi)
+#define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
+
+
+#define STR1	PARMS
+#define STR2	STR1+4
+
+	.text
+ENTRY (strchr)
+
+	ENTRANCE
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+
+	pxor	%xmm2, %xmm2
+	mov	%ecx, %edi
+	punpcklbw %xmm1, %xmm1
+	punpcklbw %xmm1, %xmm1
+	/* ECX has OFFSET. */
+	and	$15, %ecx
+	pshufd	$0, %xmm1, %xmm1
+	je	L(loop)
+
+/* Handle unaligned string.  */
+	and	$-16, %edi
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm2, %edx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	/* Remove the leading bytes.  */
+	sarl	%cl, %edx
+	sarl	%cl, %eax
+	test	%eax, %eax
+	jz	L(unaligned_no_match)
+	add	%ecx, %edi
+	test	%edx, %edx
+	jz	L(match_case1)
+	jmp	L(match_case2)
+
+	.p2align 4
+L(unaligned_no_match):
+	test	%edx, %edx
+	jne	L(return_null)
+
+	pxor	%xmm2, %xmm2
+	add	$16, %edi
+
+	.p2align 4
+/* Loop start on aligned string.  */
+L(loop):
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches)
+	test	%edx, %edx
+	jnz	L(return_null)
+	add	$16, %edi
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches)
+	test	%edx, %edx
+	jnz	L(return_null)
+	add	$16, %edi
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches)
+	test	%edx, %edx
+	jnz	L(return_null)
+	add	$16, %edi
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches)
+	test	%edx, %edx
+	jnz	L(return_null)
+	add	$16, %edi
+	jmp	L(loop)
+
+L(matches):
+	/* There is a match.  First find where NULL is.  */
+	test	%edx, %edx
+	jz	L(match_case1)
+
+	.p2align 4
+L(match_case2):
+	test	%al, %al
+	jz	L(match_higth_case2)
+
+	mov	%al, %cl
+	and	$15, %cl
+	jnz	L(match_case2_4)
+
+	mov	%dl, %ch
+	and	$15, %ch
+	jnz	L(return_null)
+
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x10, %dl
+	jnz	L(return_null)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x20, %dl
+	jnz	L(return_null)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	test	$0x40, %dl
+	jnz	L(return_null)
+	lea	7(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case2_4):
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x01, %dl
+	jnz	L(return_null)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x02, %dl
+	jnz	L(return_null)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	test	$0x04, %dl
+	jnz	L(return_null)
+	lea	3(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_higth_case2):
+	test	%dl, %dl
+	jnz	L(return_null)
+
+	mov	%ah, %cl
+	and	$15, %cl
+	jnz	L(match_case2_12)
+
+	mov	%dh, %ch
+	and	$15, %ch
+	jnz	L(return_null)
+
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x10, %dh
+	jnz	L(return_null)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x20, %dh
+	jnz	L(return_null)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	test	$0x40, %dh
+	jnz	L(return_null)
+	lea	15(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case2_12):
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x01, %dh
+	jnz	L(return_null)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x02, %dh
+	jnz	L(return_null)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	test	$0x04, %dh
+	jnz	L(return_null)
+	lea	11(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case1):
+	test	%al, %al
+	jz	L(match_higth_case1)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	lea	7(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_higth_case1):
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	test	$0x08, %ah
+	jnz	L(Exit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	lea	15(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit1):
+	lea	(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit2):
+	lea	1(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit3):
+	lea	2(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit4):
+	lea	3(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit5):
+	lea	4(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit6):
+	lea	5(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit7):
+	lea	6(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit9):
+	lea	8(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit10):
+	lea	9(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit11):
+	lea	10(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit12):
+	lea	11(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit13):
+	lea	12(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit14):
+	lea	13(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit15):
+	lea	14(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	RETURN
+
+END (strchr)
diff --git a/libc/arch-x86/string/sse2-strlen-atom.S b/libc/arch-x86/string/sse2-strlen-atom.S
index 8911868..81768fb 100644
--- a/libc/arch-x86/string/sse2-strlen-atom.S
+++ b/libc/arch-x86/string/sse2-strlen-atom.S
@@ -1,71 +1,112 @@
-#define STRLEN sse2_strlen_atom
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
 
-#ifndef L
-# define L(label)	.L##label
-#endif
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
 
-#ifndef cfi_startproc
-# define cfi_startproc			.cfi_startproc
-#endif
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
 
-#ifndef cfi_endproc
-# define cfi_endproc			.cfi_endproc
-#endif
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
 
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
 
-#ifndef cfi_restore
-# define cfi_restore(reg)		.cfi_restore reg
-#endif
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
 
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
+#ifndef USE_AS_STRCAT
 
-#ifndef cfi_remember_state
-# define cfi_remember_state		.cfi_remember_state
-#endif
+# ifndef STRLEN
+#  define STRLEN strlen
+# endif
 
-#ifndef cfi_restore_state
-# define cfi_restore_state		.cfi_restore_state
-#endif
+# ifndef L
+#  define L(label)	.L##label
+# endif
 
-#ifndef ENTRY
-# define ENTRY(name)			\
-	.type name,  @function; 	\
-	.globl name;			\
-	.p2align 4;			\
-name:					\
+# ifndef cfi_startproc
+#  define cfi_startproc	.cfi_startproc
+# endif
+
+# ifndef cfi_endproc
+#  define cfi_endproc	.cfi_endproc
+# endif
+
+/* calee safe register only for strnlen is required */
+
+# ifdef USE_AS_STRNLEN
+#  ifndef cfi_rel_offset
+#   define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#  endif
+
+#  ifndef cfi_restore
+#   define cfi_restore(reg)	.cfi_restore reg
+#  endif
+
+#  ifndef cfi_adjust_cfa_offset
+#   define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#  endif
+# endif
+
+# ifndef ENTRY
+#  define ENTRY(name)	\
+	.type name,  @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
 	cfi_startproc
-#endif
+# endif
 
-#ifndef END
-# define END(name)			\
-	cfi_endproc;			\
+# ifndef END
+#  define END(name)	\
+	cfi_endproc;	\
 	.size name, .-name
-#endif
+# endif
 
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
+# define PARMS	4
+# define STR	PARMS
+# define RETURN	ret
 
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
+# ifdef USE_AS_STRNLEN
+#  define LEN	PARMS + 8
+#  define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
 
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-#define PARMS		4
-#define	STR		PARMS
-#define ENTRANCE
-#define RETURN		ret
+#  define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#  define PUSH(REG)	pushl	REG;	CFI_PUSH (REG)
+#  define POP(REG)	popl	REG;	CFI_POP (REG)
+#  undef RETURN
+#  define RETURN	POP (%edi); ret; CFI_PUSH(%edi);
+# endif
 
 	.text
 ENTRY (STRLEN)
-	ENTRANCE
 	mov	STR(%esp), %edx
+# ifdef USE_AS_STRNLEN
+	PUSH	(%edi)
+	movl	LEN(%esp), %edi
+	sub	$4, %edi
+	jbe	L(len_less4_prolog)
+# endif
+#endif
 	xor	%eax, %eax
 	cmpb	$0, (%edx)
 	jz	L(exit_tail0)
@@ -75,6 +116,12 @@
 	jz	L(exit_tail2)
 	cmpb	$0, 3(%edx)
 	jz	L(exit_tail3)
+
+#ifdef USE_AS_STRNLEN
+	sub	$4, %edi
+	jbe	L(len_less8_prolog)
+#endif
+
 	cmpb	$0, 4(%edx)
 	jz	L(exit_tail4)
 	cmpb	$0, 5(%edx)
@@ -83,6 +130,12 @@
 	jz	L(exit_tail6)
 	cmpb	$0, 7(%edx)
 	jz	L(exit_tail7)
+
+#ifdef USE_AS_STRNLEN
+	sub	$4, %edi
+	jbe	L(len_less12_prolog)
+#endif
+
 	cmpb	$0, 8(%edx)
 	jz	L(exit_tail8)
 	cmpb	$0, 9(%edx)
@@ -91,6 +144,12 @@
 	jz	L(exit_tail10)
 	cmpb	$0, 11(%edx)
 	jz	L(exit_tail11)
+
+#ifdef USE_AS_STRNLEN
+	sub	$4, %edi
+	jbe	L(len_less16_prolog)
+#endif
+
 	cmpb	$0, 12(%edx)
 	jz	L(exit_tail12)
 	cmpb	$0, 13(%edx)
@@ -99,211 +158,531 @@
 	jz	L(exit_tail14)
 	cmpb	$0, 15(%edx)
 	jz	L(exit_tail15)
+
 	pxor	%xmm0, %xmm0
-	mov	%edx, %eax
-	mov	%edx, %ecx
+	lea	16(%edx), %eax
+	mov	%eax, %ecx
 	and	$-16, %eax
-	add	$16, %ecx
-	add	$16, %eax
+
+#ifdef USE_AS_STRNLEN
+	and	$15, %edx
+	add	%edx, %edi
+	sub	$64, %edi
+	jbe	L(len_less64)
+#endif
 
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
 	pxor	%xmm1, %xmm1
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm1
 	pmovmskb %xmm1, %edx
 	pxor	%xmm2, %xmm2
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
-
 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
 	pxor	%xmm3, %xmm3
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm3
 	pmovmskb %xmm3, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
+#ifdef USE_AS_STRNLEN
+	sub	$64, %edi
+	jbe	L(len_less64)
+#endif
+
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm1
 	pmovmskb %xmm1, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm3
 	pmovmskb %xmm3, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
+#ifdef USE_AS_STRNLEN
+	sub	$64, %edi
+	jbe	L(len_less64)
+#endif
+
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm1
 	pmovmskb %xmm1, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm3
 	pmovmskb %xmm3, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
+#ifdef USE_AS_STRNLEN
+	sub	$64, %edi
+	jbe	L(len_less64)
+#endif
+
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm1
 	pmovmskb %xmm1, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
 	pcmpeqb	(%eax), %xmm3
 	pmovmskb %xmm3, %edx
-	test	%edx, %edx
 	lea	16(%eax), %eax
+	test	%edx, %edx
 	jnz	L(exit)
 
+#ifdef USE_AS_STRNLEN
+	mov	%eax, %edx
+	and	$63, %edx
+	add	%edx, %edi
+#endif
+
 	and	$-0x40, %eax
-	PUSH (%esi)
-	PUSH (%edi)
-	PUSH (%ebx)
-	PUSH (%ebp)
-	xor	%ebp, %ebp
-L(aligned_64):
-	pcmpeqb	(%eax), %xmm0
-	pcmpeqb	16(%eax), %xmm1
-	pcmpeqb	32(%eax), %xmm2
-	pcmpeqb	48(%eax), %xmm3
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm1, %esi
-	pmovmskb %xmm2, %edi
-	pmovmskb %xmm3, %ebx
-	or	%edx, %ebp
-	or	%esi, %ebp
-	or	%edi, %ebp
-	or	%ebx, %ebp
+
+	.p2align 4
+L(aligned_64_loop):
+#ifdef USE_AS_STRNLEN
+	sub	$64, %edi
+	jbe	L(len_less64)
+#endif
+	movaps	(%eax), %xmm0
+	movaps	16(%eax), %xmm1
+	movaps	32(%eax), %xmm2
+	movaps	48(%eax), %xmm6
+	pminub	%xmm1, %xmm0
+	pminub	%xmm6, %xmm2
+	pminub	%xmm0, %xmm2
+	pcmpeqb	%xmm3, %xmm2
+	pmovmskb %xmm2, %edx
 	lea	64(%eax), %eax
-	jz	L(aligned_64)
-L(48leave):
 	test	%edx, %edx
-	jnz	L(aligned_64_exit_16)
-	test	%esi, %esi
-	jnz	L(aligned_64_exit_32)
-	test	%edi, %edi
-	jnz	L(aligned_64_exit_48)
-	mov	%ebx, %edx
-	lea	(%eax), %eax
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_48):
-	lea	-16(%eax), %eax
-	mov	%edi, %edx
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_32):
-	lea	-32(%eax), %eax
-	mov	%esi, %edx
-	jmp	L(aligned_64_exit)
-L(aligned_64_exit_16):
-	lea	-48(%eax), %eax
-L(aligned_64_exit):
-	POP (%ebp)
-	POP (%ebx)
-	POP (%edi)
-	POP (%esi)
+	jz	L(aligned_64_loop)
+
+	pcmpeqb	-64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	48(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	-32(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	%xmm6, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
 L(exit):
 	sub	%ecx, %eax
 	test	%dl, %dl
 	jz	L(exit_high)
+
+	mov	%dl, %cl
+	and	$15, %cl
+	jz	L(exit_8)
 	test	$0x01, %dl
 	jnz	L(exit_tail0)
-
 	test	$0x02, %dl
 	jnz	L(exit_tail1)
-
 	test	$0x04, %dl
 	jnz	L(exit_tail2)
+	add	$3, %eax
+	RETURN
 
-	test	$0x08, %dl
-	jnz	L(exit_tail3)
-
+	.p2align 4
+L(exit_8):
 	test	$0x10, %dl
 	jnz	L(exit_tail4)
-
 	test	$0x20, %dl
 	jnz	L(exit_tail5)
-
 	test	$0x40, %dl
 	jnz	L(exit_tail6)
 	add	$7, %eax
+	RETURN
+
+	.p2align 4
+L(exit_high):
+	mov	%dh, %ch
+	and	$15, %ch
+	jz	L(exit_high_8)
+	test	$0x01, %dh
+	jnz	L(exit_tail8)
+	test	$0x02, %dh
+	jnz	L(exit_tail9)
+	test	$0x04, %dh
+	jnz	L(exit_tail10)
+	add	$11, %eax
+	RETURN
+
+	.p2align 4
+L(exit_high_8):
+	test	$0x10, %dh
+	jnz	L(exit_tail12)
+	test	$0x20, %dh
+	jnz	L(exit_tail13)
+	test	$0x40, %dh
+	jnz	L(exit_tail14)
+	add	$15, %eax
 L(exit_tail0):
 	RETURN
 
-L(exit_high):
-	add	$8, %eax
-	test	$0x01, %dh
-	jnz	L(exit_tail0)
+#ifdef USE_AS_STRNLEN
 
-	test	$0x02, %dh
-	jnz	L(exit_tail1)
+	.p2align 4
+L(len_less64):
+	pxor	%xmm0, %xmm0
+	add	$64, %edi
 
-	test	$0x04, %dh
-	jnz	L(exit_tail2)
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(strnlen_exit)
 
-	test	$0x08, %dh
-	jnz	L(exit_tail3)
+	sub	$16, %edi
+	jbe	L(return_start_len)
 
-	test	$0x10, %dh
-	jnz	L(exit_tail4)
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(strnlen_exit)
 
-	test	$0x20, %dh
-	jnz	L(exit_tail5)
+	sub	$16, %edi
+	jbe	L(return_start_len)
 
-	test	$0x40, %dh
-	jnz	L(exit_tail6)
-	add	$7, %eax
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(strnlen_exit)
+
+	sub	$16, %edi
+	jbe	L(return_start_len)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(strnlen_exit)
+
+#ifndef USE_AS_STRLCAT
+	movl	LEN(%esp), %eax
 	RETURN
+#else
+	jmp	L(return_start_len)
+#endif
+
+	.p2align 4
+L(strnlen_exit):
+	sub	%ecx, %eax
+
+	test	%dl, %dl
+	jz	L(strnlen_exit_high)
+	mov	%dl, %cl
+	and	$15, %cl
+	jz	L(strnlen_exit_8)
+	test	$0x01, %dl
+	jnz	L(exit_tail0)
+	test	$0x02, %dl
+	jnz	L(strnlen_exit_tail1)
+	test	$0x04, %dl
+	jnz	L(strnlen_exit_tail2)
+	sub	$4, %edi
+	jb	L(return_start_len)
+	lea	3(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_8):
+	test	$0x10, %dl
+	jnz	L(strnlen_exit_tail4)
+	test	$0x20, %dl
+	jnz	L(strnlen_exit_tail5)
+	test	$0x40, %dl
+	jnz	L(strnlen_exit_tail6)
+	sub	$8, %edi
+	jb	L(return_start_len)
+	lea	7(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_high):
+	mov	%dh, %ch
+	and	$15, %ch
+	jz	L(strnlen_exit_high_8)
+	test	$0x01, %dh
+	jnz	L(strnlen_exit_tail8)
+	test	$0x02, %dh
+	jnz	L(strnlen_exit_tail9)
+	test	$0x04, %dh
+	jnz	L(strnlen_exit_tail10)
+	sub	$12, %edi
+	jb	L(return_start_len)
+	lea	11(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_high_8):
+	test	$0x10, %dh
+	jnz	L(strnlen_exit_tail12)
+	test	$0x20, %dh
+	jnz	L(strnlen_exit_tail13)
+	test	$0x40, %dh
+	jnz	L(strnlen_exit_tail14)
+	sub	$16, %edi
+	jb	L(return_start_len)
+	lea	15(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail1):
+	sub	$2, %edi
+	jb	L(return_start_len)
+	lea	1(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail2):
+	sub	$3, %edi
+	jb	L(return_start_len)
+	lea	2(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail4):
+	sub	$5, %edi
+	jb	L(return_start_len)
+	lea	4(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail5):
+	sub	$6, %edi
+	jb	L(return_start_len)
+	lea	5(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail6):
+	sub	$7, %edi
+	jb	L(return_start_len)
+	lea	6(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail8):
+	sub	$9, %edi
+	jb	L(return_start_len)
+	lea	8(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail9):
+	sub	$10, %edi
+	jb	L(return_start_len)
+	lea	9(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail10):
+	sub	$11, %edi
+	jb	L(return_start_len)
+	lea	10(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail12):
+	sub	$13, %edi
+	jb	L(return_start_len)
+	lea	12(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail13):
+	sub	$14, %edi
+	jb	L(return_start_len)
+	lea	13(%eax), %eax
+	RETURN
+
+	.p2align 4
+L(strnlen_exit_tail14):
+	sub	$15, %edi
+	jb	L(return_start_len)
+	lea	14(%eax), %eax
+	RETURN
+
+#ifndef USE_AS_STRLCAT
+	.p2align 4
+L(return_start_len):
+	movl	LEN(%esp), %eax
+	RETURN
+#endif
+
+/* for prolog only */
+
+	.p2align 4
+L(len_less4_prolog):
+	xor	%eax, %eax
+
+	add	$4, %edi
+	jz	L(exit_tail0)
+
+	cmpb	$0, (%edx)
+	jz	L(exit_tail0)
+	cmp	$1, %edi
+	je	L(exit_tail1)
+
+	cmpb	$0, 1(%edx)
+	jz	L(exit_tail1)
+	cmp	$2, %edi
+	je	L(exit_tail2)
+
+	cmpb	$0, 2(%edx)
+	jz	L(exit_tail2)
+	cmp	$3, %edi
+	je	L(exit_tail3)
+
+	cmpb	$0, 3(%edx)
+	jz	L(exit_tail3)
+	mov	%edi, %eax
+	RETURN
+
+	.p2align 4
+L(len_less8_prolog):
+	add	$4, %edi
+
+	cmpb	$0, 4(%edx)
+	jz	L(exit_tail4)
+	cmp	$1, %edi
+	je	L(exit_tail5)
+
+	cmpb	$0, 5(%edx)
+	jz	L(exit_tail5)
+	cmp	$2, %edi
+	je	L(exit_tail6)
+
+	cmpb	$0, 6(%edx)
+	jz	L(exit_tail6)
+	cmp	$3, %edi
+	je	L(exit_tail7)
+
+	cmpb	$0, 7(%edx)
+	jz	L(exit_tail7)
+	mov	$8, %eax
+	RETURN
+
+
+	.p2align 4
+L(len_less12_prolog):
+	add	$4, %edi
+
+	cmpb	$0, 8(%edx)
+	jz	L(exit_tail8)
+	cmp	$1, %edi
+	je	L(exit_tail9)
+
+	cmpb	$0, 9(%edx)
+	jz	L(exit_tail9)
+	cmp	$2, %edi
+	je	L(exit_tail10)
+
+	cmpb	$0, 10(%edx)
+	jz	L(exit_tail10)
+	cmp	$3, %edi
+	je	L(exit_tail11)
+
+	cmpb	$0, 11(%edx)
+	jz	L(exit_tail11)
+	mov	$12, %eax
+	RETURN
+
+	.p2align 4
+L(len_less16_prolog):
+	add	$4, %edi
+
+	cmpb	$0, 12(%edx)
+	jz	L(exit_tail12)
+	cmp	$1, %edi
+	je	L(exit_tail13)
+
+	cmpb	$0, 13(%edx)
+	jz	L(exit_tail13)
+	cmp	$2, %edi
+	je	L(exit_tail14)
+
+	cmpb	$0, 14(%edx)
+	jz	L(exit_tail14)
+	cmp	$3, %edi
+	je	L(exit_tail15)
+
+	cmpb	$0, 15(%edx)
+	jz	L(exit_tail15)
+	mov	$16, %eax
+	RETURN
+#endif
 
 	.p2align 4
 L(exit_tail1):
@@ -364,6 +743,7 @@
 
 L(exit_tail15):
 	add	$15, %eax
-	ret
-
+#ifndef USE_AS_STRCAT
+	RETURN
 END (STRLEN)
+#endif
diff --git a/libc/arch-x86/string/memcmp_wrapper.S b/libc/arch-x86/string/sse2-strnlen-atom.S
similarity index 90%
copy from libc/arch-x86/string/memcmp_wrapper.S
copy to libc/arch-x86/string/sse2-strnlen-atom.S
index fa0c672..1f89b4e 100644
--- a/libc/arch-x86/string/memcmp_wrapper.S
+++ b/libc/arch-x86/string/sse2-strnlen-atom.S
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2010, Intel Corporation
+Copyright (c) 2011, Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,13 +28,6 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSSE3)
-
-# define MEMCMP memcmp
-# include "ssse3-memcmp3-new.S"
-
-#else
-
-# include "memcmp.S"
-
-#endif
+#define USE_AS_STRNLEN 1
+#define STRLEN  strnlen
+#include "sse2-strlen-atom.S"
diff --git a/libc/arch-x86/string/sse2-strrchr-atom.S b/libc/arch-x86/string/sse2-strrchr-atom.S
new file mode 100644
index 0000000..da3dc3b
--- /dev/null
+++ b/libc/arch-x86/string/sse2-strrchr-atom.S
@@ -0,0 +1,753 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc	.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc	.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name, @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#define PARMS	8
+#define ENTRANCE	PUSH(%edi);
+#define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
+
+#define STR1  PARMS
+#define STR2  STR1+4
+
+	.text
+ENTRY (strrchr)
+
+	ENTRANCE
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+
+	pxor	%xmm2, %xmm2
+	mov	%ecx, %edi
+	punpcklbw %xmm1, %xmm1
+	punpcklbw %xmm1, %xmm1
+	/* ECX has OFFSET. */
+	and	$63, %ecx
+	pshufd	$0, %xmm1, %xmm1
+	cmp	$48, %ecx
+	ja	L(crosscache)
+
+/* unaligned string. */
+	movdqu	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm2, %ecx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	add	$16, %edi
+
+	test	%eax, %eax
+	jnz	L(unaligned_match1)
+
+	test	%ecx, %ecx
+	jnz	L(return_null)
+
+	and	$-16, %edi
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	xor	%ebx, %ebx
+	jmp	L(loop)
+
+	CFI_POP    (%esi)
+	CFI_POP    (%ebx)
+
+	.p2align 4
+L(unaligned_match1):
+	test	%ecx, %ecx
+	jnz	L(prolog_find_zero_1)
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	mov	%eax, %ebx
+	mov	%edi, %esi
+	and	$-16, %edi
+	jmp	L(loop)
+
+	CFI_POP    (%esi)
+	CFI_POP    (%ebx)
+
+	.p2align 4
+L(crosscache):
+/* Hancle unaligned string.  */
+	and	$15, %ecx
+	and	$-16, %edi
+	pxor	%xmm3, %xmm3
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm3
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm3, %edx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	/* Remove the leading bytes.  */
+	shr	%cl, %edx
+	shr	%cl, %eax
+	add	$16, %edi
+
+	test	%eax, %eax
+	jnz	L(unaligned_match)
+
+	test	%edx, %edx
+	jnz	L(return_null)
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	xor	%ebx, %ebx
+	jmp	L(loop)
+
+	CFI_POP    (%esi)
+	CFI_POP    (%ebx)
+
+	.p2align 4
+L(unaligned_match):
+	test	%edx, %edx
+	jnz	L(prolog_find_zero)
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	mov	%eax, %ebx
+	lea	(%edi, %ecx), %esi
+
+/* Loop start on aligned string.  */
+	.p2align 4
+L(loop):
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jz	L(loop)
+
+L(matches):
+	test	%eax, %eax
+	jnz	L(match)
+L(return_value):
+	test	%ebx, %ebx
+	jz	L(return_null_1)
+	mov	%ebx, %eax
+	mov	%esi, %edi
+
+	POP	(%ebx)
+	POP	(%esi)
+
+	jmp	L(match_case1)
+
+	CFI_PUSH    (%ebx)
+	CFI_PUSH    (%esi)
+
+	.p2align 4
+L(return_null_1):
+	POP	(%ebx)
+	POP	(%esi)
+
+	xor	%eax, %eax
+	RETURN
+
+	CFI_PUSH    (%ebx)
+	CFI_PUSH    (%esi)
+
+	.p2align 4
+L(match):
+	pmovmskb %xmm2, %ecx
+	test	%ecx, %ecx
+	jnz	L(find_zero)
+	mov	%eax, %ebx
+	mov	%edi, %esi
+	jmp	L(loop)
+
+	.p2align 4
+L(find_zero):
+	test	%cl, %cl
+	jz	L(find_zero_high)
+	mov	%cl, %dl
+	and	$15, %dl
+	jz	L(find_zero_8)
+	test	$0x01, %cl
+	jnz	L(FindZeroExit1)
+	test	$0x02, %cl
+	jnz	L(FindZeroExit2)
+	test	$0x04, %cl
+	jnz	L(FindZeroExit3)
+	and	$1 << 4 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_8):
+	test	$0x10, %cl
+	jnz	L(FindZeroExit5)
+	test	$0x20, %cl
+	jnz	L(FindZeroExit6)
+	test	$0x40, %cl
+	jnz	L(FindZeroExit7)
+	and	$1 << 8 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_high):
+	mov	%ch, %dh
+	and	$15, %dh
+	jz	L(find_zero_high_8)
+	test	$0x01, %ch
+	jnz	L(FindZeroExit9)
+	test	$0x02, %ch
+	jnz	L(FindZeroExit10)
+	test	$0x04, %ch
+	jnz	L(FindZeroExit11)
+	and	$1 << 12 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_high_8):
+	test	$0x10, %ch
+	jnz	L(FindZeroExit13)
+	test	$0x20, %ch
+	jnz	L(FindZeroExit14)
+	test	$0x40, %ch
+	jnz	L(FindZeroExit15)
+	and	$1 << 16 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit1):
+	and	$1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit2):
+	and	$1 << 2 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit3):
+	and	$1 << 3 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit5):
+	and	$1 << 5 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit6):
+	and	$1 << 6 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit7):
+	and	$1 << 7 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit9):
+	and	$1 << 9 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit10):
+	and	$1 << 10 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit11):
+	and	$1 << 11 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit13):
+	and	$1 << 13 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit14):
+	and	$1 << 14 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp     L(match_case1)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit15):
+	and	$1 << 15 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+
+	.p2align 4
+L(match_case1):
+	test	%ah, %ah
+	jnz	L(match_case1_high)
+	mov	%al, %dl
+	and	$15 << 4, %dl
+	jnz	L(match_case1_8)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	lea	-16(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case1_8):
+	test	$0x80, %al
+	jnz	L(Exit8)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	lea	-12(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case1_high):
+	mov	%ah, %dh
+	and	$15 << 4, %dh
+	jnz	L(match_case1_high_8)
+	test	$0x08, %ah
+	jnz	L(Exit12)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	lea	-8(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case1_high_8):
+	test	$0x80, %ah
+	jnz	L(Exit16)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	lea	-4(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit2):
+	lea	-15(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit3):
+	lea	-14(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit4):
+	lea	-13(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit6):
+	lea	-11(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit7):
+	lea	-10(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit8):
+	lea	-9(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit10):
+	lea	-7(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit11):
+	lea	-6(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit12):
+	lea	-5(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit14):
+	lea	-3(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit15):
+	lea	-2(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit16):
+	lea	-1(%edi), %eax
+	RETURN
+
+/* Return NULL.  */
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero):
+	add	%ecx, %edi
+	mov     %edx, %ecx
+L(prolog_find_zero_1):
+	test	%cl, %cl
+	jz	L(prolog_find_zero_high)
+	mov	%cl, %dl
+	and	$15, %dl
+	jz	L(prolog_find_zero_8)
+	test	$0x01, %cl
+	jnz	L(PrologFindZeroExit1)
+	test	$0x02, %cl
+	jnz	L(PrologFindZeroExit2)
+	test	$0x04, %cl
+	jnz	L(PrologFindZeroExit3)
+	and	$1 << 4 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_8):
+	test	$0x10, %cl
+	jnz	L(PrologFindZeroExit5)
+	test	$0x20, %cl
+	jnz	L(PrologFindZeroExit6)
+	test	$0x40, %cl
+	jnz	L(PrologFindZeroExit7)
+	and	$1 << 8 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_high):
+	mov	%ch, %dh
+	and	$15, %dh
+	jz	L(prolog_find_zero_high_8)
+	test	$0x01, %ch
+	jnz	L(PrologFindZeroExit9)
+	test	$0x02, %ch
+	jnz	L(PrologFindZeroExit10)
+	test	$0x04, %ch
+	jnz	L(PrologFindZeroExit11)
+	and	$1 << 12 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_high_8):
+	test	$0x10, %ch
+	jnz	L(PrologFindZeroExit13)
+	test	$0x20, %ch
+	jnz	L(PrologFindZeroExit14)
+	test	$0x40, %ch
+	jnz	L(PrologFindZeroExit15)
+	and	$1 << 16 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit1):
+	and	$1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit2):
+	and	$1 << 2 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit3):
+	and	$1 << 3 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit5):
+	and	$1 << 5 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit6):
+	and	$1 << 6 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit7):
+	and	$1 << 7 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit9):
+	and	$1 << 9 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit10):
+	and	$1 << 10 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit11):
+	and	$1 << 11 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit13):
+	and	$1 << 13 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit14):
+	and	$1 << 14 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit15):
+	and	$1 << 15 - 1, %eax
+	jnz	L(match_case1)
+	xor	%eax, %eax
+	RETURN
+
+END (strrchr)
diff --git a/libc/arch-x86/string/sse2-wcschr-atom.S b/libc/arch-x86/string/sse2-wcschr-atom.S
new file mode 100644
index 0000000..729302b
--- /dev/null
+++ b/libc/arch-x86/string/sse2-wcschr-atom.S
@@ -0,0 +1,267 @@
+/*
+Copyright (c) 2011 Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc	.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc	.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name,  @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name,	.-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#define PARMS	4
+
+
+#define STR1  PARMS
+#define STR2  STR1+4
+
+	.text
+ENTRY (wcschr)
+
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+
+	mov	%ecx, %eax
+	punpckldq %xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	punpckldq %xmm1, %xmm1
+
+	and	$63, %eax
+	cmp	$48, %eax
+	ja	L(cross_cache)
+
+	movdqu	(%ecx), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jnz	L(matches)
+	and	$-16, %ecx
+	jmp	L(loop)
+
+	.p2align 4
+L(cross_cache):
+	PUSH	(%edi)
+	mov	%ecx, %edi
+	mov	%eax, %ecx
+	and	$-16, %edi
+	and	$15, %ecx
+	movdqa	(%edi), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+
+	sarl	%cl, %edx
+	sarl	%cl, %eax
+	test	%eax, %eax
+	jz	L(unaligned_no_match)
+
+	add	%edi, %ecx
+	POP	(%edi)
+
+	test	%edx, %edx
+	jz	L(match_case1)
+	test	%al, %al
+	jz	L(match_higth_case2)
+	test	$15, %al
+	jnz	L(match_case2_4)
+	test	$15, %dl
+	jnz	L(return_null)
+	lea	4(%ecx), %eax
+	ret
+
+	CFI_PUSH (%edi)
+
+	.p2align 4
+L(unaligned_no_match):
+	mov	%edi, %ecx
+	POP	(%edi)
+
+	test	%edx, %edx
+	jnz	L(return_null)
+
+	pxor	%xmm2, %xmm2
+
+/* Loop start on aligned string.  */
+	.p2align 4
+L(loop):
+	add	$16, %ecx
+	movdqa	(%ecx), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jnz	L(matches)
+	add	$16, %ecx
+
+	movdqa	(%ecx), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jnz	L(matches)
+	add	$16, %ecx
+
+	movdqa	(%ecx), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jnz	L(matches)
+	add	$16, %ecx
+
+	movdqa	(%ecx), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jz	L(loop)
+
+	.p2align 4
+L(matches):
+	pmovmskb %xmm2, %edx
+	test	%eax, %eax
+	jz	L(return_null)
+	test	%edx, %edx
+	jz	L(match_case1)
+
+	.p2align 4
+L(match_case2):
+	test	%al, %al
+	jz	L(match_higth_case2)
+	test	$15, %al
+	jnz	L(match_case2_4)
+	test	$15, %dl
+	jnz	L(return_null)
+	lea	4(%ecx), %eax
+	ret
+
+	.p2align 4
+L(match_case2_4):
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(match_higth_case2):
+	test	%dl, %dl
+	jnz	L(return_null)
+	test	$15, %ah
+	jnz	L(match_case2_12)
+	test	$15, %dh
+	jnz	L(return_null)
+	lea	12(%ecx), %eax
+	ret
+
+	.p2align 4
+L(match_case2_12):
+	lea	8(%ecx), %eax
+	ret
+
+	.p2align 4
+L(match_case1):
+	test	%al, %al
+	jz	L(match_higth_case1)
+
+	test	$0x01, %al
+	jnz	L(exit0)
+	lea	4(%ecx), %eax
+	ret
+
+	.p2align 4
+L(match_higth_case1):
+	test	$0x01, %ah
+	jnz	L(exit3)
+	lea	12(%ecx), %eax
+	ret
+
+	.p2align 4
+L(exit0):
+	mov	%ecx, %eax
+	ret
+
+	.p2align 4
+L(exit3):
+	lea	8(%ecx), %eax
+	ret
+
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	ret
+
+END (wcschr)
diff --git a/libc/arch-x86/string/sse2-wcscmp-atom.S b/libc/arch-x86/string/sse2-wcscmp-atom.S
new file mode 100644
index 0000000..8867d28
--- /dev/null
+++ b/libc/arch-x86/string/sse2-wcscmp-atom.S
@@ -0,0 +1,1062 @@
+/*
+Copyright (c) 2011 Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc	.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc	.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name, @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define ENTRANCE PUSH(%esi); PUSH(%edi)
+#define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
+#define PARMS  4
+#define STR1  PARMS
+#define STR2  STR1+4
+
+	.text
+ENTRY (wcscmp)
+/*
+	* This implementation uses SSE to compare up to 16 bytes at a time.
+*/
+	mov	STR1(%esp), %edx
+	mov	STR2(%esp), %eax
+
+	mov	(%eax), %ecx
+	cmp	%ecx, (%edx)
+	jne	L(neq)
+	test	%ecx, %ecx
+	jz	L(eq)
+
+	mov	4(%eax), %ecx
+	cmp	%ecx, 4(%edx)
+	jne	L(neq)
+	test	%ecx, %ecx
+	jz	L(eq)
+
+	mov	8(%eax), %ecx
+	cmp	%ecx, 8(%edx)
+	jne	L(neq)
+	test	%ecx, %ecx
+	jz	L(eq)
+
+	mov	12(%eax), %ecx
+	cmp	%ecx, 12(%edx)
+	jne	L(neq)
+	test	%ecx, %ecx
+	jz	L(eq)
+
+	ENTRANCE
+	add	$16, %eax
+	add	$16, %edx
+
+	mov	%eax, %esi
+	mov	%edx, %edi
+	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
+	mov	%al, %ch
+	mov	%dl, %cl
+	and	$63, %eax		/* esi alignment in cache line */
+	and	$63, %edx		/* edi alignment in cache line */
+	and	$15, %cl
+	jz	L(continue_00)
+	cmp	$16, %edx
+	jb	L(continue_0)
+	cmp	$32, %edx
+	jb	L(continue_16)
+	cmp	$48, %edx
+	jb	L(continue_32)
+
+L(continue_48):
+	and	$15, %ch
+	jz	L(continue_48_00)
+	cmp	$16, %eax
+	jb	L(continue_0_48)
+	cmp	$32, %eax
+	jb	L(continue_16_48)
+	cmp	$48, %eax
+	jb	L(continue_32_48)
+
+	.p2align 4
+L(continue_48_48):
+	mov	(%esi), %ecx
+	cmp	%ecx, (%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	4(%esi), %ecx
+	cmp	%ecx, 4(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	8(%esi), %ecx
+	cmp	%ecx, 8(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	12(%esi), %ecx
+	cmp	%ecx, 12(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	movdqu	16(%edi), %xmm1
+	movdqu	16(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	movdqu	32(%edi), %xmm1
+	movdqu	32(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	movdqu	48(%edi), %xmm1
+	movdqu	48(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_48)
+
+	add	$64, %esi
+	add	$64, %edi
+	jmp	L(continue_48_48)
+
+L(continue_0):
+	and	$15, %ch
+	jz	L(continue_0_00)
+	cmp	$16, %eax
+	jb	L(continue_0_0)
+	cmp	$32, %eax
+	jb	L(continue_0_16)
+	cmp	$48, %eax
+	jb	L(continue_0_32)
+
+	.p2align 4
+L(continue_0_48):
+	mov	(%esi), %ecx
+	cmp	%ecx, (%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	4(%esi), %ecx
+	cmp	%ecx, 4(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	8(%esi), %ecx
+	cmp	%ecx, 8(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	12(%esi), %ecx
+	cmp	%ecx, 12(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	movdqu	16(%edi), %xmm1
+	movdqu	16(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	movdqu	32(%edi), %xmm1
+	movdqu	32(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	mov	48(%esi), %ecx
+	cmp	%ecx, 48(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	52(%esi), %ecx
+	cmp	%ecx, 52(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	56(%esi), %ecx
+	cmp	%ecx, 56(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	60(%esi), %ecx
+	cmp	%ecx, 60(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	add	$64, %esi
+	add	$64, %edi
+	jmp	L(continue_0_48)
+
+	.p2align 4
+L(continue_00):
+	and	$15, %ch
+	jz	L(continue_00_00)
+	cmp	$16, %eax
+	jb	L(continue_00_0)
+	cmp	$32, %eax
+	jb	L(continue_00_16)
+	cmp	$48, %eax
+	jb	L(continue_00_32)
+
+	.p2align 4
+L(continue_00_48):
+	pcmpeqd	(%edi), %xmm0
+	mov	(%edi), %eax
+	pmovmskb %xmm0, %ecx
+	test	%ecx, %ecx
+	jnz	L(less4_double_words1)
+
+	cmp	(%esi), %eax
+	jne	L(nequal)
+
+	mov	4(%edi), %eax
+	cmp	4(%esi), %eax
+	jne	L(nequal)
+
+	mov	8(%edi), %eax
+	cmp	8(%esi), %eax
+	jne	L(nequal)
+
+	mov	12(%edi), %eax
+	cmp	12(%esi), %eax
+	jne	L(nequal)
+
+	movdqu	16(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	movdqu	32(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	movdqu	48(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	48(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_48)
+
+	add	$64, %esi
+	add	$64, %edi
+	jmp	L(continue_00_48)
+
+	.p2align 4
+L(continue_32):
+	and	$15, %ch
+	jz	L(continue_32_00)
+	cmp	$16, %eax
+	jb	L(continue_0_32)
+	cmp	$32, %eax
+	jb	L(continue_16_32)
+	cmp	$48, %eax
+	jb	L(continue_32_32)
+
+	.p2align 4
+L(continue_32_48):
+	mov	(%esi), %ecx
+	cmp	%ecx, (%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	4(%esi), %ecx
+	cmp	%ecx, 4(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	8(%esi), %ecx
+	cmp	%ecx, 8(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	12(%esi), %ecx
+	cmp	%ecx, 12(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	16(%esi), %ecx
+	cmp	%ecx, 16(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	20(%esi), %ecx
+	cmp	%ecx, 20(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	24(%esi), %ecx
+	cmp	%ecx, 24(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	28(%esi), %ecx
+	cmp	%ecx, 28(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	movdqu	32(%edi), %xmm1
+	movdqu	32(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	movdqu	48(%edi), %xmm1
+	movdqu	48(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_48)
+
+	add	$64, %esi
+	add	$64, %edi
+	jmp	L(continue_32_48)
+
+	.p2align 4
+L(continue_16):
+	and	$15, %ch
+	jz	L(continue_16_00)
+	cmp	$16, %eax
+	jb	L(continue_0_16)
+	cmp	$32, %eax
+	jb	L(continue_16_16)
+	cmp	$48, %eax
+	jb	L(continue_16_32)
+
+	.p2align 4
+L(continue_16_48):
+	mov	(%esi), %ecx
+	cmp	%ecx, (%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	4(%esi), %ecx
+	cmp	%ecx, 4(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	8(%esi), %ecx
+	cmp	%ecx, 8(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	12(%esi), %ecx
+	cmp	%ecx, 12(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	movdqu	16(%edi), %xmm1
+	movdqu	16(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	mov	32(%esi), %ecx
+	cmp	%ecx, 32(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	36(%esi), %ecx
+	cmp	%ecx, 36(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	40(%esi), %ecx
+	cmp	%ecx, 40(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	44(%esi), %ecx
+	cmp	%ecx, 44(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	movdqu	48(%edi), %xmm1
+	movdqu	48(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_48)
+
+	add	$64, %esi
+	add	$64, %edi
+	jmp	L(continue_16_48)
+
+	.p2align 4
+L(continue_00_00):
+	movdqa	(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	movdqa	16(%edi), %xmm3
+	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
+	pcmpeqd	16(%esi), %xmm3		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
+	pmovmskb %xmm3, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	movdqa	32(%edi), %xmm5
+	pcmpeqd	%xmm5, %xmm0		/* Any null double_word? */
+	pcmpeqd	32(%esi), %xmm5		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm5		/* packed sub of comparison results*/
+	pmovmskb %xmm5, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	movdqa	48(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_48)
+
+	add	$64, %esi
+	add	$64, %edi
+	jmp	L(continue_00_00)
+
+	.p2align 4
+L(continue_00_32):
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	add	$16, %esi
+	add	$16, %edi
+	jmp	L(continue_00_48)
+
+	.p2align 4
+L(continue_00_16):
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	movdqu	16(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	add	$32, %esi
+	add	$32, %edi
+	jmp	L(continue_00_48)
+
+	.p2align 4
+L(continue_00_0):
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	movdqu	16(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	movdqu	32(%esi), %xmm2
+	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
+	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pmovmskb %xmm2, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	add	$48, %esi
+	add	$48, %edi
+	jmp	L(continue_00_48)
+
+	.p2align 4
+L(continue_48_00):
+	pcmpeqd	(%esi), %xmm0
+	mov	(%edi), %eax
+	pmovmskb %xmm0, %ecx
+	test	%ecx, %ecx
+	jnz	L(less4_double_words1)
+
+	cmp	(%esi), %eax
+	jne	L(nequal)
+
+	mov	4(%edi), %eax
+	cmp	4(%esi), %eax
+	jne	L(nequal)
+
+	mov	8(%edi), %eax
+	cmp	8(%esi), %eax
+	jne	L(nequal)
+
+	mov	12(%edi), %eax
+	cmp	12(%esi), %eax
+	jne	L(nequal)
+
+	movdqu	16(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	movdqu	32(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	movdqu	48(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_48)
+
+	add	$64, %esi
+	add	$64, %edi
+	jmp	L(continue_48_00)
+
+	.p2align 4
+L(continue_32_00):
+	movdqu	(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	add	$16, %esi
+	add	$16, %edi
+	jmp	L(continue_48_00)
+
+	.p2align 4
+L(continue_16_00):
+	movdqu	(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	movdqu	16(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	add	$32, %esi
+	add	$32, %edi
+	jmp	L(continue_48_00)
+
+	.p2align 4
+L(continue_0_00):
+	movdqu	(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	movdqu	16(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	movdqu	32(%edi), %xmm1
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	add	$48, %esi
+	add	$48, %edi
+	jmp	L(continue_48_00)
+
+	.p2align 4
+L(continue_32_32):
+	movdqu	(%edi), %xmm1
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	add	$16, %esi
+	add	$16, %edi
+	jmp	L(continue_48_48)
+
+	.p2align 4
+L(continue_16_16):
+	movdqu	(%edi), %xmm1
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	movdqu	16(%edi), %xmm3
+	movdqu	16(%esi), %xmm4
+	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
+	pmovmskb %xmm3, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	add	$32, %esi
+	add	$32, %edi
+	jmp	L(continue_48_48)
+
+	.p2align 4
+L(continue_0_0):
+	movdqu	(%edi), %xmm1
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	movdqu	16(%edi), %xmm3
+	movdqu	16(%esi), %xmm4
+	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
+	pmovmskb %xmm3, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	movdqu	32(%edi), %xmm1
+	movdqu	32(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_32)
+
+	add	$48, %esi
+	add	$48, %edi
+	jmp	L(continue_48_48)
+
+	.p2align 4
+L(continue_0_16):
+	movdqu	(%edi), %xmm1
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	movdqu	16(%edi), %xmm1
+	movdqu	16(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words_16)
+
+	add	$32, %esi
+	add	$32, %edi
+	jmp	L(continue_32_48)
+
+	.p2align 4
+L(continue_0_32):
+	movdqu	(%edi), %xmm1
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	add	$16, %esi
+	add	$16, %edi
+	jmp	L(continue_16_48)
+
+	.p2align 4
+L(continue_16_32):
+	movdqu	(%edi), %xmm1
+	movdqu	(%esi), %xmm2
+	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
+	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pmovmskb %xmm1, %edx
+	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
+	jnz	L(less4_double_words)
+
+	add	$16, %esi
+	add	$16, %edi
+	jmp	L(continue_32_48)
+
+	.p2align 4
+L(less4_double_words1):
+	cmp	(%esi), %eax
+	jne	L(nequal)
+	test	%eax, %eax
+	jz	L(equal)
+
+	mov	4(%esi), %ecx
+	cmp	%ecx, 4(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	8(%esi), %ecx
+	cmp	%ecx, 8(%edi)
+	jne	L(nequal)
+	test	%ecx, %ecx
+	jz	L(equal)
+
+	mov	12(%esi), %ecx
+	cmp	%ecx, 12(%edi)
+	jne	L(nequal)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(less4_double_words):
+	xor	%eax, %eax
+	test	%dl, %dl
+	jz	L(next_two_double_words)
+	and	$15, %dl
+	jz	L(second_double_word)
+	mov	(%esi), %ecx
+	cmp	%ecx, (%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(second_double_word):
+	mov	4(%esi), %ecx
+	cmp	%ecx, 4(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(next_two_double_words):
+	and	$15, %dh
+	jz	L(fourth_double_word)
+	mov	8(%esi), %ecx
+	cmp	%ecx, 8(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(fourth_double_word):
+	mov	12(%esi), %ecx
+	cmp	%ecx, 12(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(less4_double_words_16):
+	xor	%eax, %eax
+	test	%dl, %dl
+	jz	L(next_two_double_words_16)
+	and	$15, %dl
+	jz	L(second_double_word_16)
+	mov	16(%esi), %ecx
+	cmp	%ecx, 16(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(second_double_word_16):
+	mov	20(%esi), %ecx
+	cmp	%ecx, 20(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(next_two_double_words_16):
+	and	$15, %dh
+	jz	L(fourth_double_word_16)
+	mov	24(%esi), %ecx
+	cmp	%ecx, 24(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(fourth_double_word_16):
+	mov	28(%esi), %ecx
+	cmp	%ecx, 28(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(less4_double_words_32):
+	xor	%eax, %eax
+	test	%dl, %dl
+	jz	L(next_two_double_words_32)
+	and	$15, %dl
+	jz	L(second_double_word_32)
+	mov	32(%esi), %ecx
+	cmp	%ecx, 32(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(second_double_word_32):
+	mov	36(%esi), %ecx
+	cmp	%ecx, 36(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(next_two_double_words_32):
+	and	$15, %dh
+	jz	L(fourth_double_word_32)
+	mov	40(%esi), %ecx
+	cmp	%ecx, 40(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(fourth_double_word_32):
+	mov	44(%esi), %ecx
+	cmp	%ecx, 44(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(less4_double_words_48):
+	xor	%eax, %eax
+	test	%dl, %dl
+	jz	L(next_two_double_words_48)
+	and	$15, %dl
+	jz	L(second_double_word_48)
+	mov	48(%esi), %ecx
+	cmp	%ecx, 48(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(second_double_word_48):
+	mov	52(%esi), %ecx
+	cmp	%ecx, 52(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(next_two_double_words_48):
+	and	$15, %dh
+	jz	L(fourth_double_word_48)
+	mov	56(%esi), %ecx
+	cmp	%ecx, 56(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(fourth_double_word_48):
+	mov	60(%esi), %ecx
+	cmp	%ecx, 60(%edi)
+	jne	L(nequal)
+	RETURN
+
+	.p2align 4
+L(nequal):
+	mov	$1, %eax
+	jg	L(return)
+	neg	%eax
+	RETURN
+
+	.p2align 4
+L(return):
+	RETURN
+
+	.p2align 4
+L(equal):
+	xorl	%eax, %eax
+	RETURN
+
+	CFI_POP (%edi)
+	CFI_POP (%esi)
+
+	.p2align 4
+L(neq):
+	mov	$1, %eax
+	jg	L(neq_bigger)
+	neg	%eax
+
+L(neq_bigger):
+	ret
+
+	.p2align 4
+L(eq):
+	xorl	%eax, %eax
+	ret
+
+END (wcscmp)
+
diff --git a/libc/arch-x86/string/sse2-wcslen-atom.S b/libc/arch-x86/string/sse2-wcslen-atom.S
new file mode 100644
index 0000000..6a6ad51
--- /dev/null
+++ b/libc/arch-x86/string/sse2-wcslen-atom.S
@@ -0,0 +1,306 @@
+/*
+Copyright (c) 2011 Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef USE_AS_WCSCAT
+
+# ifndef L
+#  define L(label)	.L##label
+# endif
+
+# ifndef cfi_startproc
+#  define cfi_startproc	.cfi_startproc
+# endif
+
+# ifndef cfi_endproc
+#  define cfi_endproc	.cfi_endproc
+# endif
+
+# ifndef ENTRY
+#  define ENTRY(name)	\
+	.type name,  @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+# endif
+
+# ifndef END
+#  define END(name)	\
+	cfi_endproc;	\
+	.size name, .-name
+# endif
+
+# define PARMS	4
+# define STR	PARMS
+# define RETURN ret
+
+	.text
+ENTRY (wcslen)
+	mov	STR(%esp), %edx
+#endif
+	cmp	$0, (%edx)
+	jz	L(exit_tail0)
+	cmp	$0, 4(%edx)
+	jz	L(exit_tail1)
+	cmp	$0, 8(%edx)
+	jz	L(exit_tail2)
+	cmp	$0, 12(%edx)
+	jz	L(exit_tail3)
+	cmp	$0, 16(%edx)
+	jz	L(exit_tail4)
+	cmp	$0, 20(%edx)
+	jz	L(exit_tail5)
+	cmp	$0, 24(%edx)
+	jz	L(exit_tail6)
+	cmp	$0, 28(%edx)
+	jz	L(exit_tail7)
+
+	pxor	%xmm0, %xmm0
+
+	lea	32(%edx), %eax
+	lea	-16(%eax), %ecx
+	and	$-16, %eax
+
+	pcmpeqd	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	pxor	%xmm2, %xmm2
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	pxor	%xmm3, %xmm3
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	and	$-0x40, %eax
+
+	.p2align 4
+L(aligned_64_loop):
+	movaps	(%eax), %xmm0
+	movaps	16(%eax), %xmm1
+	movaps	32(%eax), %xmm2
+	movaps	48(%eax), %xmm6
+
+	pminub	%xmm1, %xmm0
+	pminub	%xmm6, %xmm2
+	pminub	%xmm0, %xmm2
+	pcmpeqd	%xmm3, %xmm2
+	pmovmskb %xmm2, %edx
+	lea	64(%eax), %eax
+	test	%edx, %edx
+	jz	L(aligned_64_loop)
+
+	pcmpeqd	-64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	48(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	-32(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqd	%xmm6, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	jmp	L(aligned_64_loop)
+
+	.p2align 4
+L(exit):
+	sub	%ecx, %eax
+	shr	$2, %eax
+	test	%dl, %dl
+	jz	L(exit_high)
+
+	mov	%dl, %cl
+	and	$15, %cl
+	jz	L(exit_1)
+	RETURN
+
+	.p2align 4
+L(exit_high):
+	mov	%dh, %ch
+	and	$15, %ch
+	jz	L(exit_3)
+	add	$2, %eax
+	RETURN
+
+	.p2align 4
+L(exit_1):
+	add	$1, %eax
+	RETURN
+
+	.p2align 4
+L(exit_3):
+	add	$3, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail0):
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail1):
+	mov	$1, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail2):
+	mov	$2, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail3):
+	mov	$3, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail4):
+	mov	$4, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail5):
+	mov	$5, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail6):
+	mov	$6, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail7):
+	mov	$7, %eax
+#ifndef USE_AS_WCSCAT
+	RETURN
+
+END (wcslen)
+#endif
diff --git a/libc/arch-x86/string/sse2-wcsrchr-atom.S b/libc/arch-x86/string/sse2-wcsrchr-atom.S
new file mode 100644
index 0000000..e30779d
--- /dev/null
+++ b/libc/arch-x86/string/sse2-wcsrchr-atom.S
@@ -0,0 +1,402 @@
+/*
+Copyright (c) 2011 Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc	.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc	.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name, @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
+#define POP(REG)	popl REG;	CFI_POP (REG)
+
+#define PARMS  8
+#define ENTRANCE PUSH(%edi);
+#define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi);
+
+#define STR1  PARMS
+#define STR2  STR1+4
+
+	.text
+ENTRY (wcsrchr)
+
+	ENTRANCE
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+
+	mov	%ecx, %edi
+	punpckldq %xmm1, %xmm1
+	pxor	%xmm2, %xmm2
+	punpckldq %xmm1, %xmm1
+
+/* ECX has OFFSET. */
+	and	$63, %ecx
+	cmp	$48, %ecx
+	ja	L(crosscache)
+
+/* unaligned string. */
+	movdqu	(%edi), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	pcmpeqd	%xmm1, %xmm0
+/* Find where NULL is.  */
+	pmovmskb %xmm2, %ecx
+/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	add	$16, %edi
+
+	test	%eax, %eax
+	jnz	L(unaligned_match1)
+
+	test	%ecx, %ecx
+	jnz	L(return_null)
+
+	and	$-16, %edi
+
+	PUSH	(%esi)
+
+	xor	%edx, %edx
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+
+	.p2align 4
+L(unaligned_match1):
+	test	%ecx, %ecx
+	jnz	L(prolog_find_zero_1)
+
+	PUSH	(%esi)
+
+/* Save current match */
+	mov	%eax, %edx
+	mov	%edi, %esi
+	and	$-16, %edi
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+
+	.p2align 4
+L(crosscache):
+/* Hancle unaligned string.  */
+	and	$15, %ecx
+	and	$-16, %edi
+	pxor	%xmm3, %xmm3
+	movdqa	(%edi), %xmm0
+	pcmpeqd	%xmm0, %xmm3
+	pcmpeqd	%xmm1, %xmm0
+/* Find where NULL is.  */
+	pmovmskb %xmm3, %edx
+/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+/* Remove the leading bytes.  */
+	shr	%cl, %edx
+	shr	%cl, %eax
+	add	$16, %edi
+
+	test	%eax, %eax
+	jnz	L(unaligned_match)
+
+	test	%edx, %edx
+	jnz	L(return_null)
+
+	PUSH	(%esi)
+
+	xor	%edx, %edx
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+
+	.p2align 4
+L(unaligned_match):
+	test	%edx, %edx
+	jnz	L(prolog_find_zero)
+
+	PUSH	(%esi)
+
+	mov	%eax, %edx
+	lea	(%edi, %ecx), %esi
+
+/* Loop start on aligned string.  */
+	.p2align 4
+L(loop):
+	movdqa	(%edi), %xmm0
+	pcmpeqd	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm3
+	pcmpeqd	%xmm3, %xmm2
+	add	$16, %edi
+	pcmpeqd	%xmm1, %xmm3
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm3, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm4
+	pcmpeqd	%xmm4, %xmm2
+	add	$16, %edi
+	pcmpeqd	%xmm1, %xmm4
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm4, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm5
+	pcmpeqd	%xmm5, %xmm2
+	add	$16, %edi
+	pcmpeqd	%xmm1, %xmm5
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm5, %eax
+	or	%eax, %ecx
+	jz	L(loop)
+
+	.p2align 4
+L(matches):
+	test	%eax, %eax
+	jnz	L(match)
+L(return_value):
+	test	%edx, %edx
+	jz	L(return_null_1)
+	mov	%edx, %eax
+	mov	%esi, %edi
+
+	POP	(%esi)
+
+	test	%ah, %ah
+	jnz	L(match_third_or_fourth_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%edi), %eax
+	RETURN
+
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(return_null_1):
+	POP	(%esi)
+
+	xor	%eax, %eax
+	RETURN
+
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(match):
+	pmovmskb %xmm2, %ecx
+	test	%ecx, %ecx
+	jnz	L(find_zero)
+/* save match info */
+	mov	%eax, %edx
+	mov	%edi, %esi
+	jmp	L(loop)
+
+	.p2align 4
+L(find_zero):
+	test	%cl, %cl
+	jz	L(find_zero_in_third_or_fourth_wchar)
+	test	$15, %cl
+	jz	L(find_zero_in_second_wchar)
+	and	$1, %eax
+	jz	L(return_value)
+
+	POP	(%esi)
+
+	lea	-16(%edi), %eax
+	RETURN
+
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_in_second_wchar):
+	and	$1 << 5 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%esi)
+
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%edi), %eax
+	RETURN
+
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_in_third_or_fourth_wchar):
+	test	$15, %ch
+	jz	L(find_zero_in_fourth_wchar)
+	and	$1 << 9 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%esi)
+
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%edi), %eax
+	RETURN
+
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_in_fourth_wchar):
+
+	POP	(%esi)
+
+	test	%ah, %ah
+	jnz	L(match_third_or_fourth_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%edi), %eax
+	RETURN
+
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(match_second_wchar):
+	lea	-12(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_third_or_fourth_wchar):
+	test	$15 << 4, %ah
+	jnz	L(match_fourth_wchar)
+	lea	-8(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_third_wchar):
+	lea	-8(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_fourth_wchar):
+	lea	-4(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero):
+	add	%ecx, %edi
+	mov     %edx, %ecx
+L(prolog_find_zero_1):
+	test	%cl, %cl
+	jz	L(prolog_find_zero_in_third_or_fourth_wchar)
+	test	$15, %cl
+	jz	L(prolog_find_zero_in_second_wchar)
+	and	$1, %eax
+	jz	L(return_null)
+
+	lea	-16(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_in_second_wchar):
+	and	$1 << 5 - 1, %eax
+	jz	L(return_null)
+
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_in_third_or_fourth_wchar):
+	test	$15, %ch
+	jz	L(prolog_find_zero_in_fourth_wchar)
+	and	$1 << 9 - 1, %eax
+	jz	L(return_null)
+
+	test	%ah, %ah
+	jnz	L(match_third_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_in_fourth_wchar):
+	test	%ah, %ah
+	jnz	L(match_third_or_fourth_wchar)
+	test	$15 << 4, %al
+	jnz	L(match_second_wchar)
+	lea	-16(%edi), %eax
+	RETURN
+
+END (wcsrchr)
diff --git a/libc/arch-x86/string/strlen_wrapper.S b/libc/arch-x86/string/ssse3-bcopy-atom.S
similarity index 92%
rename from libc/arch-x86/string/strlen_wrapper.S
rename to libc/arch-x86/string/ssse3-bcopy-atom.S
index e62786b..e4b791a 100644
--- a/libc/arch-x86/string/strlen_wrapper.S
+++ b/libc/arch-x86/string/ssse3-bcopy-atom.S
@@ -28,13 +28,8 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSE2)
 
-# define sse2_strlen_atom strlen
-# include "sse2-strlen-atom.S"
-
-#else
-
-# include "strlen.S"
-
-#endif
+#define MEMCPY	bcopy
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#include "ssse3-memcpy-atom.S"
diff --git a/libc/arch-x86/string/ssse3-memcmp3-new.S b/libc/arch-x86/string/ssse3-memcmp-atom.S
similarity index 73%
rename from libc/arch-x86/string/ssse3-memcmp3-new.S
rename to libc/arch-x86/string/ssse3-memcmp-atom.S
index 5ad8791..0387084 100644
--- a/libc/arch-x86/string/ssse3-memcmp3-new.S
+++ b/libc/arch-x86/string/ssse3-memcmp-atom.S
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2010, Intel Corporation
+Copyright (c) 2010, 2011, 2012, 2013 Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,24 +28,16 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifndef MEMCMP
-# define MEMCMP		ssse3_memcmp3_new
-#endif
-
 #ifndef L
 # define L(label)	.L##label
 #endif
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 #ifndef cfi_startproc
-# define cfi_startproc			.cfi_startproc
+# define cfi_startproc	.cfi_startproc
 #endif
 
 #ifndef cfi_endproc
-# define cfi_endproc			.cfi_endproc
+# define cfi_endproc	.cfi_endproc
 #endif
 
 #ifndef cfi_rel_offset
@@ -53,7 +45,7 @@
 #endif
 
 #ifndef cfi_restore
-# define cfi_restore(reg)		.cfi_restore reg
+# define cfi_restore(reg)	.cfi_restore reg
 #endif
 
 #ifndef cfi_adjust_cfa_offset
@@ -61,35 +53,39 @@
 #endif
 
 #ifndef cfi_remember_state
-# define cfi_remember_state		.cfi_remember_state
+# define cfi_remember_state	.cfi_remember_state
 #endif
 
 #ifndef cfi_restore_state
-# define cfi_restore_state		.cfi_restore_state
+# define cfi_restore_state	.cfi_restore_state
 #endif
 
 #ifndef ENTRY
-# define ENTRY(name)			\
-	.type name,  @function; 	\
-	.globl name;			\
-	.p2align 4;			\
-name:					\
+# define ENTRY(name)             \
+	.type name, @function;   \
+	.globl name;             \
+	.p2align 4;              \
+name:                            \
 	cfi_startproc
 #endif
 
 #ifndef END
-# define END(name)			\
-	cfi_endproc;			\
+# define END(name)               \
+	cfi_endproc;             \
 	.size name, .-name
 #endif
 
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
+#ifndef MEMCMP
+# define MEMCMP	memcmp
+#endif
 
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
 
 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
 #define POP(REG)	popl REG; CFI_POP (REG)
@@ -101,22 +97,42 @@
 #define RETURN_END	POP (%edi); POP (%esi); POP (%ebx); ret
 #define RETURN		RETURN_END; cfi_restore_state; cfi_remember_state
 
-	.section .text.ssse3,"ax",@progbits
+/* Warning!
+           wmemcmp has to use SIGNED comparison for elements.
+           memcmp has to use UNSIGNED comparison for elemnts.
+*/
+
+	.text
 ENTRY (MEMCMP)
 	movl	LEN(%esp), %ecx
+
+#ifdef USE_WCHAR
+	shl	$2, %ecx
+	jz	L(zero)
+#elif defined USE_UTF16
+	shl	$1, %ecx
+	jz	L(zero)
+#endif
+
 	movl	BLK1(%esp), %eax
 	cmp	$48, %ecx
 	movl	BLK2(%esp), %edx
 	jae	L(48bytesormore)
+
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cmp	$1, %ecx
 	jbe	L(less1bytes)
-	PUSH (%ebx)
+#endif
+
+	PUSH	(%ebx)
 	add	%ecx, %edx
 	add	%ecx, %eax
 	jmp	L(less48bytes)
 
-	CFI_POP (%ebx)
-	ALIGN (4)
+	CFI_POP	(%ebx)
+
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
+	.p2align 4
 L(less1bytes):
 	jb	L(zero)
 	movb	(%eax), %cl
@@ -127,29 +143,30 @@
 	neg	%eax
 L(1bytesend):
 	ret
+#endif
 
-	ALIGN (4)
+	.p2align 4
 L(zero):
-	mov	$0, %eax
+	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(48bytesormore):
-	PUSH (%ebx)
-	PUSH (%esi)
-	PUSH (%edi)
+	PUSH	(%ebx)
+	PUSH	(%esi)
+	PUSH	(%edi)
 	cfi_remember_state
-	movdqu    (%eax), %xmm3
-	movdqu    (%edx), %xmm0
+	movdqu	(%eax), %xmm3
+	movdqu	(%edx), %xmm0
 	movl	%eax, %edi
 	movl	%edx, %esi
-	pcmpeqb   %xmm0, %xmm3
-	pmovmskb  %xmm3, %edx
+	pcmpeqb	%xmm0, %xmm3
+	pmovmskb %xmm3, %edx
 	lea	16(%edi), %edi
 
-	sub      $0xffff, %edx
+	sub	$0xffff, %edx
 	lea	16(%esi), %esi
-	jnz	  L(less16bytes)
+	jnz	L(less16bytes)
 	mov	%edi, %edx
 	and	$0xf, %edx
 	xor	%edx, %edi
@@ -160,6 +177,7 @@
 	jz	L(shr_0)
 	xor	%edx, %esi
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cmp	$8, %edx
 	jae	L(next_unaligned_table)
 	cmp	$0, %edx
@@ -178,7 +196,7 @@
 	je	L(shr_6)
 	jmp	L(shr_7)
 
-	ALIGN (4)
+	.p2align 2
 L(next_unaligned_table):
 	cmp	$8, %edx
 	je	L(shr_8)
@@ -195,8 +213,33 @@
 	cmp	$14, %edx
 	je	L(shr_14)
 	jmp	L(shr_15)
+#elif defined(USE_WCHAR)
+	cmp	$0, %edx
+	je	L(shr_0)
+	cmp	$4, %edx
+	je	L(shr_4)
+	cmp	$8, %edx
+	je	L(shr_8)
+	jmp	L(shr_12)
+#elif defined(USE_UTF16)
+	cmp	$0, %edx
+	je	L(shr_0)
+	cmp	$2, %edx
+	je	L(shr_2)
+	cmp	$4, %edx
+	je	L(shr_4)
+	cmp	$6, %edx
+	je	L(shr_6)
+	cmp	$8, %edx
+	je	L(shr_8)
+	cmp	$10, %edx
+	je	L(shr_10)
+	cmp	$12, %edx
+	je	L(shr_12)
+	jmp	L(shr_14)
+#endif
 
-	ALIGN (4)
+	.p2align 4
 L(shr_0):
 	cmp	$80, %ecx
 	jae	L(shr_0_gobble)
@@ -215,13 +258,13 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_0_gobble):
 	lea	-48(%ecx), %ecx
 	movdqa	(%esi), %xmm0
@@ -261,13 +304,14 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_1):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -291,13 +335,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	1(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_1_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -344,13 +388,16 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	1(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
+
+#if !defined(USE_WCHAR)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_2):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -374,13 +421,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	2(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_2_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -427,13 +474,15 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	2(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_3):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -457,13 +506,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	3(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_3_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -510,13 +559,14 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	3(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_4):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -540,13 +590,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	4(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_4_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -593,13 +643,14 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	4(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_5):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -623,13 +674,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	5(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_5_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -676,13 +727,15 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	5(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
+#if !defined(USE_WCHAR)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_6):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -706,13 +759,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	6(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_6_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -759,13 +812,15 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	6(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_7):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -789,13 +844,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	7(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_7_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -842,13 +897,14 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	7(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_8):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -872,13 +928,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	8(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_8_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -925,13 +981,14 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	8(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_9):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -955,13 +1012,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	9(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_9_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -1008,13 +1065,15 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	9(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
+#if !defined(USE_WCHAR)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_10):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -1038,13 +1097,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	10(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_10_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -1091,13 +1150,15 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	10(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_11):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -1121,13 +1182,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	11(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_11_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -1174,13 +1235,14 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	11(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_12):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -1204,13 +1266,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	12(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_12_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -1257,13 +1319,14 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	12(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_13):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -1287,13 +1350,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	13(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_13_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -1340,13 +1403,15 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	13(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
+#if !defined(USE_WCHAR)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_14):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -1370,13 +1435,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	14(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_14_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -1423,13 +1488,15 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	14(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_15):
 	cmp	$80, %ecx
 	lea	-48(%ecx), %ecx
@@ -1453,13 +1520,13 @@
 	jnz	L(exit)
 	lea	(%ecx, %edi,1), %eax
 	lea	15(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(shr_15_gobble):
 	sub	$32, %ecx
 	movdqa	16(%esi), %xmm0
@@ -1506,13 +1573,14 @@
 
 	lea	(%ecx, %edi,1), %eax
 	lea	15(%ecx, %esi,1), %edx
-	POP (%edi)
-	POP (%esi)
+	POP	(%edi)
+	POP	(%esi)
 	jmp	L(less48bytes)
+#endif
 
 	cfi_restore_state
 	cfi_remember_state
-	ALIGN (4)
+	.p2align 4
 L(exit):
 	pmovmskb %xmm1, %ebx
 	sub	$0xffff, %ebx
@@ -1520,9 +1588,12 @@
 	lea	-16(%esi), %esi
 	lea	-16(%edi), %edi
 	mov	%ebx, %edx
+
 L(first16bytes):
 	add	%eax, %esi
 L(less16bytes):
+
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	test	%dl, %dl
 	jz	L(next_24_bytes)
 
@@ -1547,61 +1618,61 @@
 	test	$0x40, %dl
 	jnz	L(Byte22)
 L(Byte23):
-	movzbl	 -9(%edi), %eax
-	movzbl	 -9(%esi), %edx
+	movzbl	-9(%edi), %eax
+	movzbl	-9(%esi), %edx
 	sub	%edx, %eax
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(Byte16):
-	movzbl	 -16(%edi), %eax
-	movzbl	 -16(%esi), %edx
+	movzbl	-16(%edi), %eax
+	movzbl	-16(%esi), %edx
 	sub	%edx, %eax
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(Byte17):
-	movzbl	 -15(%edi), %eax
-	movzbl	 -15(%esi), %edx
+	movzbl	-15(%edi), %eax
+	movzbl	-15(%esi), %edx
 	sub	%edx, %eax
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(Byte18):
-	movzbl	 -14(%edi), %eax
-	movzbl	 -14(%esi), %edx
+	movzbl	-14(%edi), %eax
+	movzbl	-14(%esi), %edx
 	sub	%edx, %eax
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(Byte19):
-	movzbl	 -13(%edi), %eax
-	movzbl	 -13(%esi), %edx
+	movzbl	-13(%edi), %eax
+	movzbl	-13(%esi), %edx
 	sub	%edx, %eax
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(Byte20):
-	movzbl	 -12(%edi), %eax
-	movzbl	 -12(%esi), %edx
+	movzbl	-12(%edi), %eax
+	movzbl	-12(%esi), %edx
 	sub	%edx, %eax
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(Byte21):
-	movzbl	 -11(%edi), %eax
-	movzbl	 -11(%esi), %edx
+	movzbl	-11(%edi), %eax
+	movzbl	-11(%esi), %edx
 	sub	%edx, %eax
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(Byte22):
-	movzbl	 -10(%edi), %eax
-	movzbl	 -10(%esi), %edx
+	movzbl	-10(%edi), %eax
+	movzbl	-10(%esi), %edx
 	sub	%edx, %eax
 	RETURN
 
-	ALIGN (4)
+	.p2align 4
 L(next_24_bytes):
 	lea	8(%edi), %edi
 	lea	8(%esi), %esi
@@ -1626,20 +1697,142 @@
 	test	$0x40, %dh
 	jnz	L(Byte22)
 
-	ALIGN (4)
+	.p2align 4
 L(Byte31):
-	movzbl	 -9(%edi), %eax
-	movzbl	 -9(%esi), %edx
+	movzbl	-9(%edi), %eax
+	movzbl	-9(%esi), %edx
 	sub	%edx, %eax
 	RETURN_END
+#elif defined(USE_AS_WMEMCMP)
+
+/* special for wmemcmp */
+	test	%dl, %dl
+	jz	L(next_two_double_words)
+	and	$15, %dl
+	jz	L(second_double_word)
+	mov	-16(%edi), %ecx
+	cmp	-16(%esi), %ecx
+	mov	$1, %eax
+	jg	L(nequal_bigger)
+	neg	%eax
+	RETURN
+
+	.p2align 4
+L(second_double_word):
+	mov	-12(%edi), %ecx
+	cmp	-12(%esi), %ecx
+	mov	$1, %eax
+	jg	L(nequal_bigger)
+	neg	%eax
+	RETURN
+
+	.p2align 4
+L(next_two_double_words):
+	and	$15, %dh
+	jz	L(fourth_double_word)
+	mov	-8(%edi), %ecx
+	cmp	-8(%esi), %ecx
+	mov	$1, %eax
+	jg	L(nequal_bigger)
+	neg	%eax
+	RETURN
+
+	.p2align 4
+L(fourth_double_word):
+	mov	-4(%edi), %ecx
+	cmp	-4(%esi), %ecx
+	mov	$1, %eax
+	jg	L(nequal_bigger)
+	neg	%eax
+	RETURN
+
+	.p2align 4
+L(nequal_bigger):
+	RETURN_END
+
+#elif defined(USE_AS_MEMCMP16)
+
+/* special for __memcmp16 */
+	test	%dl, %dl
+	jz	L(next_four_words)
+	test	$15, %dl
+	jz	L(second_two_words)
+	test	$3, %dl
+	jz	L(second_word)
+	movzwl	-16(%edi), %eax
+	movzwl	-16(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(second_word):
+	movzwl	-14(%edi), %eax
+	movzwl	-14(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(second_two_words):
+	test	$63, %dl
+	jz	L(fourth_word)
+	movzwl	-12(%edi), %eax
+	movzwl	-12(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(fourth_word):
+	movzwl	-10(%edi), %eax
+	movzwl	-10(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(next_four_words):
+	test	$15, %dh
+	jz	L(fourth_two_words)
+	test	$3, %dh
+	jz	L(sixth_word)
+	movzwl	-8(%edi), %eax
+	movzwl	-8(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(sixth_word):
+	movzwl	-6(%edi), %eax
+	movzwl	-6(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(fourth_two_words):
+	test	$63, %dh
+	jz	L(eighth_word)
+	movzwl	-4(%edi), %eax
+	movzwl	-4(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+
+	.p2align 4
+L(eighth_word):
+	movzwl	-2(%edi), %eax
+	movzwl	-2(%esi), %ebx
+	subl	%ebx, %eax
+	RETURN
+#else
+# error Unreachable preprocessor case
+#endif
+
 	CFI_PUSH (%ebx)
 
-	ALIGN (4)
+	.p2align 4
 L(more8bytes):
 	cmp	$16, %ecx
 	jae	L(more16bytes)
 	cmp	$8, %ecx
 	je	L(8bytes)
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cmp	$9, %ecx
 	je	L(9bytes)
 	cmp	$10, %ecx
@@ -1653,13 +1846,25 @@
 	cmp	$14, %ecx
 	je	L(14bytes)
 	jmp	L(15bytes)
+#elif defined(USE_WCHAR) && !defined(USE_UTF16)
+	jmp	L(12bytes)
+#elif defined(USE_UTF16) && !defined(USE_WCHAR)
+	cmp	$10, %ecx
+	je	L(10bytes)
+	cmp	$12, %ecx
+	je	L(12bytes)
+	jmp	L(14bytes)
+#else
+# error Unreachable preprocessor case
+#endif
 
-	ALIGN (4)
+	.p2align 4
 L(more16bytes):
 	cmp	$24, %ecx
 	jae	L(more24bytes)
 	cmp	$16, %ecx
 	je	L(16bytes)
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cmp	$17, %ecx
 	je	L(17bytes)
 	cmp	$18, %ecx
@@ -1673,13 +1878,25 @@
 	cmp	$22, %ecx
 	je	L(22bytes)
 	jmp	L(23bytes)
+#elif defined(USE_WCHAR) && !defined(USE_UTF16)
+	jmp	L(20bytes)
+#elif defined(USE_UTF16) && !defined(USE_WCHAR)
+	cmp	$18, %ecx
+	je	L(18bytes)
+	cmp	$20, %ecx
+	je	L(20bytes)
+	jmp	L(22bytes)
+#else
+# error Unreachable preprocessor case
+#endif
 
-	ALIGN (4)
+	.p2align 4
 L(more24bytes):
 	cmp	$32, %ecx
 	jae	L(more32bytes)
 	cmp	$24, %ecx
 	je	L(24bytes)
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cmp	$25, %ecx
 	je	L(25bytes)
 	cmp	$26, %ecx
@@ -1693,13 +1910,25 @@
 	cmp	$30, %ecx
 	je	L(30bytes)
 	jmp	L(31bytes)
+#elif defined(USE_WCHAR) && !defined(USE_UTF16)
+	jmp	L(28bytes)
+#elif defined(USE_UTF16) && !defined(USE_WCHAR)
+	cmp	$26, %ecx
+	je	L(26bytes)
+	cmp	$28, %ecx
+	je	L(28bytes)
+	jmp	L(30bytes)
+#else
+# error Unreachable preprocessor case
+#endif
 
-	ALIGN (4)
+	.p2align 4
 L(more32bytes):
 	cmp	$40, %ecx
 	jae	L(more40bytes)
 	cmp	$32, %ecx
 	je	L(32bytes)
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cmp	$33, %ecx
 	je	L(33bytes)
 	cmp	$34, %ecx
@@ -1713,11 +1942,51 @@
 	cmp	$38, %ecx
 	je	L(38bytes)
 	jmp	L(39bytes)
+#elif defined(USE_WCHAR) && !defined(USE_UTF16)
+	jmp	L(36bytes)
+#elif defined(USE_UTF16) && !defined(USE_WCHAR)
+	cmp	$34, %ecx
+	je	L(34bytes)
+	cmp	$36, %ecx
+	je	L(36bytes)
+	jmp	L(38bytes)
+#else
+# error Unreachable preprocessor case
+#endif
 
-	ALIGN (4)
+	.p2align 4
+L(less48bytes):
+	cmp	$8, %ecx
+	jae	L(more8bytes)
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
+	cmp	$2, %ecx
+	je	L(2bytes)
+	cmp	$3, %ecx
+	je	L(3bytes)
+	cmp	$4, %ecx
+	je	L(4bytes)
+	cmp	$5, %ecx
+	je	L(5bytes)
+	cmp	$6, %ecx
+	je	L(6bytes)
+	jmp	L(7bytes)
+#elif defined(USE_WCHAR) && !defined(USE_UTF16)
+	jmp	L(4bytes)
+#elif defined(USE_UTF16) && !defined(USE_WCHAR)
+	cmp	$2, %ecx
+	je	L(2bytes)
+	cmp	$4, %ecx
+	je	L(4bytes)
+	jmp	L(6bytes)
+#else
+# error Unreachable preprocessor case
+#endif
+
+	.p2align 4
 L(more40bytes):
 	cmp	$40, %ecx
 	je	L(40bytes)
+#if !defined(USE_WCHAR) && !defined(USE_UTF16)
 	cmp	$41, %ecx
 	je	L(41bytes)
 	cmp	$42, %ecx
@@ -1731,25 +2000,16 @@
 	cmp	$46, %ecx
 	je	L(46bytes)
 	jmp	L(47bytes)
+#elif defined(USE_UTF16) && !defined(USE_WCHAR)
+	cmp	$42, %ecx
+	je	L(42bytes)
+	cmp	$44, %ecx
+	je	L(44bytes)
+	jmp	L(46bytes)
+#endif
 
-	ALIGN (4)
-L(less48bytes):
-	cmp	$8, %ecx
-	jae	L(more8bytes)
-	cmp	$2, %ecx
-	je	L(2bytes)
-	cmp	$3, %ecx
-	je	L(3bytes)
-	cmp	$4, %ecx
-	je	L(4bytes)
-	cmp	$5, %ecx
-	je	L(5bytes)
-	cmp	$6, %ecx
-	je	L(6bytes)
-	jmp	L(7bytes)
-
-
-	ALIGN (4)
+#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
+	.p2align 4
 L(44bytes):
 	mov	-44(%eax), %ecx
 	mov	-44(%edx), %ebx
@@ -1806,11 +2066,187 @@
 	cmp	%ebx, %ecx
 	mov	$0, %eax
 	jne	L(find_diff)
-	POP (%ebx)
+	POP	(%ebx)
 	ret
 	CFI_PUSH (%ebx)
+#elif defined(USE_AS_WMEMCMP)
 
-	ALIGN (4)
+	.p2align 4
+L(44bytes):
+	mov	-44(%eax), %ecx
+	cmp	-44(%edx), %ecx
+	jne	L(find_diff)
+L(40bytes):
+	mov	-40(%eax), %ecx
+	cmp	-40(%edx), %ecx
+	jne	L(find_diff)
+L(36bytes):
+	mov	-36(%eax), %ecx
+	cmp	-36(%edx), %ecx
+	jne	L(find_diff)
+L(32bytes):
+	mov	-32(%eax), %ecx
+	cmp	-32(%edx), %ecx
+	jne	L(find_diff)
+L(28bytes):
+	mov	-28(%eax), %ecx
+	cmp	-28(%edx), %ecx
+	jne	L(find_diff)
+L(24bytes):
+	mov	-24(%eax), %ecx
+	cmp	-24(%edx), %ecx
+	jne	L(find_diff)
+L(20bytes):
+	mov	-20(%eax), %ecx
+	cmp	-20(%edx), %ecx
+	jne	L(find_diff)
+L(16bytes):
+	mov	-16(%eax), %ecx
+	cmp	-16(%edx), %ecx
+	jne	L(find_diff)
+L(12bytes):
+	mov	-12(%eax), %ecx
+	cmp	-12(%edx), %ecx
+	jne	L(find_diff)
+L(8bytes):
+	mov	-8(%eax), %ecx
+	cmp	-8(%edx), %ecx
+	jne	L(find_diff)
+L(4bytes):
+	mov	-4(%eax), %ecx
+	xor	%eax, %eax
+	cmp	-4(%edx), %ecx
+	jne	L(find_diff)
+	POP	(%ebx)
+	ret
+	CFI_PUSH (%ebx)
+#elif defined USE_AS_MEMCMP16
+
+	.p2align 4
+L(46bytes):
+	movzwl	-46(%eax), %ecx
+	movzwl	-46(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(44bytes):
+	movzwl	-44(%eax), %ecx
+	movzwl	-44(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(42bytes):
+	movzwl	-42(%eax), %ecx
+	movzwl	-42(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(40bytes):
+	movzwl	-40(%eax), %ecx
+	movzwl	-40(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(38bytes):
+	movzwl	-38(%eax), %ecx
+	movzwl	-38(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(36bytes):
+	movzwl	-36(%eax), %ecx
+	movzwl	-36(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(34bytes):
+	movzwl	-34(%eax), %ecx
+	movzwl	-34(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(32bytes):
+	movzwl	-32(%eax), %ecx
+	movzwl	-32(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(30bytes):
+	movzwl	-30(%eax), %ecx
+	movzwl	-30(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(28bytes):
+	movzwl	-28(%eax), %ecx
+	movzwl	-28(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(26bytes):
+	movzwl	-26(%eax), %ecx
+	movzwl	-26(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(24bytes):
+	movzwl	-24(%eax), %ecx
+	movzwl	-24(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(22bytes):
+	movzwl	-22(%eax), %ecx
+	movzwl	-22(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(20bytes):
+	movzwl	-20(%eax), %ecx
+	movzwl	-20(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(18bytes):
+	movzwl	-18(%eax), %ecx
+	movzwl	-18(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(16bytes):
+	movzwl	-16(%eax), %ecx
+	movzwl	-16(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(14bytes):
+	movzwl	-14(%eax), %ecx
+	movzwl	-14(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(12bytes):
+	movzwl	-12(%eax), %ecx
+	movzwl	-12(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(10bytes):
+	movzwl	-10(%eax), %ecx
+	movzwl	-10(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(8bytes):
+	movzwl	-8(%eax), %ecx
+	movzwl	-8(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(6bytes):
+	movzwl	-6(%eax), %ecx
+	movzwl	-6(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(4bytes):
+	movzwl	-4(%eax), %ecx
+	movzwl	-4(%edx), %ebx
+	subl	%ebx, %ecx
+	jne	L(memcmp16_exit)
+L(2bytes):
+	movzwl	-2(%eax), %eax
+	movzwl	-2(%edx), %ebx
+	subl	%ebx, %eax
+	POP	(%ebx)
+	ret
+	CFI_PUSH (%ebx)
+#else
+# error Unreachable preprocessor case
+#endif
+
+#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
+
+	.p2align 4
 L(45bytes):
 	mov	-45(%eax), %ecx
 	mov	-45(%edx), %ebx
@@ -1870,11 +2306,11 @@
 	cmp	-1(%edx), %cl
 	mov	$0, %eax
 	jne	L(end)
-	POP (%ebx)
+	POP	(%ebx)
 	ret
 	CFI_PUSH (%ebx)
 
-	ALIGN (4)
+	.p2align 4
 L(46bytes):
 	mov	-46(%eax), %ecx
 	mov	-46(%edx), %ebx
@@ -1938,11 +2374,11 @@
 	cmp	%bh, %ch
 	mov	$0, %eax
 	jne	L(end)
-	POP (%ebx)
+	POP	(%ebx)
 	ret
 	CFI_PUSH (%ebx)
 
-	ALIGN (4)
+	.p2align 4
 L(47bytes):
 	movl	-47(%eax), %ecx
 	movl	-47(%edx), %ebx
@@ -2009,11 +2445,11 @@
 	cmpb	-1(%edx), %al
 	mov	$0, %eax
 	jne	L(end)
-	POP (%ebx)
+	POP	(%ebx)
 	ret
 	CFI_PUSH (%ebx)
 
-	ALIGN (4)
+	.p2align 4
 L(find_diff):
 	cmpb	%bl, %cl
 	jne	L(end)
@@ -2024,12 +2460,37 @@
 	cmp	%bl, %cl
 	jne	L(end)
 	cmp	%bx, %cx
+
+	.p2align 4
 L(end):
-	POP (%ebx)
+	POP	(%ebx)
 	mov	$1, %eax
 	ja	L(bigger)
 	neg	%eax
 L(bigger):
 	ret
+#elif defined(USE_AS_WMEMCMP)
 
+	.p2align 4
+L(find_diff):
+	POP	(%ebx)
+	mov	$1, %eax
+	jg	L(find_diff_bigger)
+	neg	%eax
+	ret
+
+	.p2align 4
+L(find_diff_bigger):
+	ret
+
+#elif defined(USE_AS_MEMCMP16)
+
+	.p2align 4
+L(memcmp16_exit):
+	POP	(%ebx)
+	mov	%ecx, %eax
+	ret
+#else
+# error Unreachable preprocessor case
+#endif
 END (MEMCMP)
diff --git a/libc/arch-x86/string/strlen_wrapper.S b/libc/arch-x86/string/ssse3-memcmp16-atom.S
similarity index 86%
copy from libc/arch-x86/string/strlen_wrapper.S
copy to libc/arch-x86/string/ssse3-memcmp16-atom.S
index e62786b..1be8f3d 100644
--- a/libc/arch-x86/string/strlen_wrapper.S
+++ b/libc/arch-x86/string/ssse3-memcmp16-atom.S
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2010, Intel Corporation
+Copyright (c) 2013, Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,13 +28,10 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSE2)
+#define MEMCMP  __memcmp16
 
-# define sse2_strlen_atom strlen
-# include "sse2-strlen-atom.S"
+/* int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n); */
 
-#else
-
-# include "strlen.S"
-
-#endif
+#define USE_UTF16
+#define USE_AS_MEMCMP16 1
+#include "ssse3-memcmp-atom.S"
diff --git a/libc/arch-x86/string/ssse3-memcpy5.S b/libc/arch-x86/string/ssse3-memcpy-atom.S
similarity index 99%
rename from libc/arch-x86/string/ssse3-memcpy5.S
rename to libc/arch-x86/string/ssse3-memcpy-atom.S
index b0612a6..1080a38 100644
--- a/libc/arch-x86/string/ssse3-memcpy5.S
+++ b/libc/arch-x86/string/ssse3-memcpy-atom.S
@@ -28,8 +28,11 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include "cache.h"
+#undef __i686
+
 #ifndef MEMCPY
-# define MEMCPY	ssse3_memcpy5
+# define MEMCPY	memcpy
 #endif
 
 #ifndef L
diff --git a/libc/arch-x86/string/strlen_wrapper.S b/libc/arch-x86/string/ssse3-memmove-atom.S
similarity index 92%
copy from libc/arch-x86/string/strlen_wrapper.S
copy to libc/arch-x86/string/ssse3-memmove-atom.S
index e62786b..be85596 100644
--- a/libc/arch-x86/string/strlen_wrapper.S
+++ b/libc/arch-x86/string/ssse3-memmove-atom.S
@@ -28,13 +28,7 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSE2)
 
-# define sse2_strlen_atom strlen
-# include "sse2-strlen-atom.S"
-
-#else
-
-# include "strlen.S"
-
-#endif
+#define MEMCPY memmove
+#define USE_AS_MEMMOVE
+#include "ssse3-memcpy-atom.S"
diff --git a/libc/arch-x86/string/ssse3-strcat-atom.S b/libc/arch-x86/string/ssse3-strcat-atom.S
new file mode 100644
index 0000000..d9b6129
--- /dev/null
+++ b/libc/arch-x86/string/ssse3-strcat-atom.S
@@ -0,0 +1,620 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc			.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc			.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)		.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef cfi_remember_state
+# define cfi_remember_state		.cfi_remember_state
+#endif
+
+#ifndef cfi_restore_state
+# define cfi_restore_state		.cfi_restore_state
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)			\
+	.type name,  @function; 	\
+	.globl name;			\
+	.p2align 4;			\
+name:					\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)			\
+	cfi_endproc;			\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)			\
+  cfi_adjust_cfa_offset (4);		\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)			\
+  cfi_adjust_cfa_offset (-4);		\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#ifndef STRCAT
+# define STRCAT	strcat
+#endif
+
+#define PARMS	4
+#define STR1	PARMS+4
+#define STR2	STR1+4
+
+#ifdef USE_AS_STRNCAT
+# define LEN	STR2+8
+#endif
+
+#define USE_AS_STRCAT
+
+	.section .text.ssse3,"ax",@progbits
+ENTRY (STRCAT)
+	PUSH	(%edi)
+	mov	STR1(%esp), %edi
+	mov	%edi, %edx
+
+#define RETURN	jmp	L(StrcpyAtom)
+#include "sse2-strlen-atom.S"
+
+L(StrcpyAtom):
+	mov	STR2(%esp), %ecx
+	lea	(%edi, %eax), %edx
+#ifdef USE_AS_STRNCAT
+	PUSH	(%ebx)
+	mov	LEN(%esp), %ebx
+	test	%ebx, %ebx
+	jz	L(StrncatExit0)
+	cmp	$8, %ebx
+	jbe	L(StrncpyExit8Bytes)
+#endif
+	cmpb	$0, (%ecx)
+	jz	L(Exit1)
+	cmpb	$0, 1(%ecx)
+	jz	L(Exit2)
+	cmpb	$0, 2(%ecx)
+	jz	L(Exit3)
+	cmpb	$0, 3(%ecx)
+	jz	L(Exit4)
+	cmpb	$0, 4(%ecx)
+	jz	L(Exit5)
+	cmpb	$0, 5(%ecx)
+	jz	L(Exit6)
+	cmpb	$0, 6(%ecx)
+	jz	L(Exit7)
+	cmpb	$0, 7(%ecx)
+	jz	L(Exit8)
+	cmpb	$0, 8(%ecx)
+	jz	L(Exit9)
+#ifdef USE_AS_STRNCAT
+	cmp	$16, %ebx
+	jb	L(StrncpyExit15Bytes)
+#endif
+	cmpb	$0, 9(%ecx)
+	jz	L(Exit10)
+	cmpb	$0, 10(%ecx)
+	jz	L(Exit11)
+	cmpb	$0, 11(%ecx)
+	jz	L(Exit12)
+	cmpb	$0, 12(%ecx)
+	jz	L(Exit13)
+	cmpb	$0, 13(%ecx)
+	jz	L(Exit14)
+	cmpb	$0, 14(%ecx)
+	jz	L(Exit15)
+	cmpb	$0, 15(%ecx)
+	jz	L(Exit16)
+#ifdef USE_AS_STRNCAT
+	cmp	$16, %ebx
+	je	L(StrncatExit16)
+
+# define RETURN1	POP (%ebx); POP (%edi);	ret; \
+	CFI_PUSH (%ebx); CFI_PUSH (%edi)
+# define USE_AS_STRNCPY
+#else
+# define RETURN1	POP(%edi); ret; CFI_PUSH(%edi)
+#endif
+#include "ssse3-strcpy-atom.S"
+
+	.p2align 4
+L(CopyFrom1To16Bytes):
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+	test	%al, %al
+	jz	L(ExitHigh)
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHigh):
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	test	$0x08, %ah
+	jnz	L(Exit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	movlpd	(%ecx), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit1):
+	movb	%bh, 1(%edx)
+L(Exit1):
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit2):
+	movb	%bh, 2(%edx)
+L(Exit2):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit3):
+	movb	%bh, 3(%edx)
+L(Exit3):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit4):
+	movb	%bh, 4(%edx)
+L(Exit4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit5):
+	movb	%bh, 5(%edx)
+L(Exit5):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit6):
+	movb	%bh, 6(%edx)
+L(Exit6):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit7):
+	movb	%bh, 7(%edx)
+L(Exit7):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit8):
+	movb	%bh, 8(%edx)
+L(Exit8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit9):
+	movb	%bh, 9(%edx)
+L(Exit9):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	8(%ecx), %al
+	movb	%al, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit10):
+	movb	%bh, 10(%edx)
+L(Exit10):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movw	8(%ecx), %ax
+	movw	%ax, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit11):
+	movb	%bh, 11(%edx)
+L(Exit11):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	7(%ecx), %eax
+	movl	%eax, 7(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit12):
+	movb	%bh, 12(%edx)
+L(Exit12):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit13):
+	movb	%bh, 13(%edx)
+L(Exit13):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	5(%ecx), %xmm0
+	movlpd	%xmm0, 5(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit14):
+	movb	%bh, 14(%edx)
+L(Exit14):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	6(%ecx), %xmm0
+	movlpd	%xmm0, 6(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit15):
+	movb	%bh, 15(%edx)
+L(Exit15):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit16):
+	movb	%bh, 16(%edx)
+L(Exit16):
+	movlpd	(%ecx), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+#ifdef USE_AS_STRNCPY
+
+	CFI_PUSH(%esi)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2):
+	add	$16, %ebx
+	add	%esi, %ecx
+	lea	(%esi, %edx), %esi
+	lea	-9(%ebx), %edx
+	and	$1<<7, %dh
+	or	%al, %dh
+	lea	(%esi), %edx
+	POP	(%esi)
+	jz	L(ExitHighCase2)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	cmp	$1, %ebx
+	je	L(StrncatExit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	cmp	$2, %ebx
+	je	L(StrncatExit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	cmp	$3, %ebx
+	je	L(StrncatExit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	cmp	$4, %ebx
+	je	L(StrncatExit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	cmp	$5, %ebx
+	je	L(StrncatExit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	cmp	$6, %ebx
+	je	L(StrncatExit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	cmp	$7, %ebx
+	je	L(StrncatExit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	lea	7(%edx), %eax
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+	xor	%cl, %cl
+	movb	%cl, (%eax)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHighCase2):
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	cmp	$9, %ebx
+	je	L(StrncatExit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	cmp	$10, %ebx
+	je	L(StrncatExit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	cmp	$11, %ebx
+	je	L(StrncatExit11)
+	test	$0x8, %ah
+	jnz	L(Exit12)
+	cmp	$12, %ebx
+	je	L(StrncatExit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	cmp	$13, %ebx
+	je	L(StrncatExit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	cmp	$14, %ebx
+	je	L(StrncatExit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	cmp	$15, %ebx
+	je	L(StrncatExit15)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm1, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	CFI_PUSH(%esi)
+
+L(CopyFrom1To16BytesCase2OrCase3):
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase3):
+	add	$16, %ebx
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+
+	cmp	$8, %ebx
+	ja	L(ExitHighCase3)
+	cmp	$1, %ebx
+	je	L(StrncatExit1)
+	cmp	$2, %ebx
+	je	L(StrncatExit2)
+	cmp	$3, %ebx
+	je	L(StrncatExit3)
+	cmp	$4, %ebx
+	je	L(StrncatExit4)
+	cmp	$5, %ebx
+	je	L(StrncatExit5)
+	cmp	$6, %ebx
+	je	L(StrncatExit6)
+	cmp	$7, %ebx
+	je	L(StrncatExit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	%bh, 8(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHighCase3):
+	cmp	$9, %ebx
+	je	L(StrncatExit9)
+	cmp	$10, %ebx
+	je	L(StrncatExit10)
+	cmp	$11, %ebx
+	je	L(StrncatExit11)
+	cmp	$12, %ebx
+	je	L(StrncatExit12)
+	cmp	$13, %ebx
+	je	L(StrncatExit13)
+	cmp	$14, %ebx
+	je	L(StrncatExit14)
+	cmp	$15, %ebx
+	je	L(StrncatExit15)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm1, 8(%edx)
+	movb	%bh, 16(%edx)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncatExit0):
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncpyExit15Bytes):
+	cmp	$9, %ebx
+	je	L(StrncatExit9)
+	cmpb	$0, 9(%ecx)
+	jz	L(Exit10)
+	cmp	$10, %ebx
+	je	L(StrncatExit10)
+	cmpb	$0, 10(%ecx)
+	jz	L(Exit11)
+	cmp	$11, %ebx
+	je	L(StrncatExit11)
+	cmpb	$0, 11(%ecx)
+	jz	L(Exit12)
+	cmp	$12, %ebx
+	je	L(StrncatExit12)
+	cmpb	$0, 12(%ecx)
+	jz	L(Exit13)
+	cmp	$13, %ebx
+	je	L(StrncatExit13)
+	cmpb	$0, 13(%ecx)
+	jz	L(Exit14)
+	cmp	$14, %ebx
+	je	L(StrncatExit14)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+	lea	14(%edx), %eax
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+	movb	%bh, (%eax)
+	movl	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncpyExit8Bytes):
+	cmpb	$0, (%ecx)
+	jz	L(Exit1)
+	cmp	$1, %ebx
+	je	L(StrncatExit1)
+	cmpb	$0, 1(%ecx)
+	jz	L(Exit2)
+	cmp	$2, %ebx
+	je	L(StrncatExit2)
+	cmpb	$0, 2(%ecx)
+	jz	L(Exit3)
+	cmp	$3, %ebx
+	je	L(StrncatExit3)
+	cmpb	$0, 3(%ecx)
+	jz	L(Exit4)
+	cmp	$4, %ebx
+	je	L(StrncatExit4)
+	cmpb	$0, 4(%ecx)
+	jz	L(Exit5)
+	cmp	$5, %ebx
+	je	L(StrncatExit5)
+	cmpb	$0, 5(%ecx)
+	jz	L(Exit6)
+	cmp	$6, %ebx
+	je	L(StrncatExit6)
+	cmpb	$0, 6(%ecx)
+	jz	L(Exit7)
+	cmp	$7, %ebx
+	je	L(StrncatExit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	lea	7(%edx), %eax
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+	movb	%bh, (%eax)
+	movl	%edi, %eax
+	RETURN1
+
+#endif
+END (STRCAT)
diff --git a/libc/arch-x86/string/ssse3-strcmp-latest.S b/libc/arch-x86/string/ssse3-strcmp-atom.S
similarity index 99%
rename from libc/arch-x86/string/ssse3-strcmp-latest.S
rename to libc/arch-x86/string/ssse3-strcmp-atom.S
index 673ba57..1275379 100644
--- a/libc/arch-x86/string/ssse3-strcmp-latest.S
+++ b/libc/arch-x86/string/ssse3-strcmp-atom.S
@@ -107,8 +107,12 @@
 	sub	%esi, %ebp
 #endif
 
+#ifndef STRCMP
+# define STRCMP strcmp
+#endif
+
 	.section .text.ssse3,"ax",@progbits
-ENTRY (ssse3_strcmp_latest)
+ENTRY (STRCMP)
 #ifdef USE_AS_STRNCMP
 	PUSH	(%ebp)
 #endif
@@ -2271,4 +2275,4 @@
 	ret
 #endif
 
-END (ssse3_strcmp_latest)
+END (STRCMP)
diff --git a/libc/arch-x86/string/ssse3-strcpy-atom.S b/libc/arch-x86/string/ssse3-strcpy-atom.S
new file mode 100644
index 0000000..30254ca
--- /dev/null
+++ b/libc/arch-x86/string/ssse3-strcpy-atom.S
@@ -0,0 +1,3955 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef USE_AS_STRCAT
+
+# ifndef L
+#  define L(label)	.L##label
+# endif
+
+# ifndef cfi_startproc
+#  define cfi_startproc	.cfi_startproc
+# endif
+
+# ifndef cfi_endproc
+#  define cfi_endproc	.cfi_endproc
+# endif
+
+# ifndef cfi_rel_offset
+#  define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+# endif
+
+# ifndef cfi_restore
+#  define cfi_restore(reg)	.cfi_restore reg
+# endif
+
+# ifndef cfi_adjust_cfa_offset
+#  define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+# endif
+
+# ifndef ENTRY
+#  define ENTRY(name)	\
+	.type name, @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+# endif
+
+# ifndef END
+#  define END(name)	\
+	cfi_endproc;	\
+	.size name, .-name
+# endif
+
+# define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+# define POP(REG)	popl REG; CFI_POP (REG)
+
+# ifndef STRCPY
+#  define STRCPY  strcpy
+# endif
+
+# ifdef USE_AS_STRNCPY
+#  define PARMS  8
+#  define ENTRANCE PUSH (%ebx)
+#  define RETURN  POP (%ebx); ret; CFI_PUSH (%ebx);
+#  define RETURN1  POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
+# else
+#  define PARMS  4
+#  define ENTRANCE
+#  define RETURN  ret
+#  define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
+# endif
+
+# ifdef USE_AS_STPCPY
+#  define SAVE_RESULT(n)  lea	n(%edx), %eax
+#  define SAVE_RESULT_TAIL(n)  lea	n(%edx), %eax
+# else
+#  define SAVE_RESULT(n)  movl	%edi, %eax
+#  define SAVE_RESULT_TAIL(n)  movl	%edx, %eax
+# endif
+
+# define STR1  PARMS
+# define STR2  STR1+4
+# define LEN  STR2+4
+
+/* In this code following instructions are used for copying:
+	movb	- 1 byte
+	movw	- 2 byte
+	movl	- 4 byte
+	movlpd	- 8 byte
+	movaps	- 16 byte - requires 16 byte alignment
+	of	sourse and destination adresses.
+*/
+
+.text
+ENTRY (STRCPY)
+	ENTRANCE
+	mov	STR1(%esp), %edx
+	mov	STR2(%esp), %ecx
+# ifdef USE_AS_STRNCPY
+	movl	LEN(%esp), %ebx
+	cmp	$8, %ebx
+	jbe	L(StrncpyExit8Bytes)
+# endif
+	cmpb	$0, (%ecx)
+	jz	L(ExitTail1)
+	cmpb	$0, 1(%ecx)
+	jz	L(ExitTail2)
+	cmpb	$0, 2(%ecx)
+	jz	L(ExitTail3)
+	cmpb	$0, 3(%ecx)
+	jz	L(ExitTail4)
+	cmpb	$0, 4(%ecx)
+	jz	L(ExitTail5)
+	cmpb	$0, 5(%ecx)
+	jz	L(ExitTail6)
+	cmpb	$0, 6(%ecx)
+	jz	L(ExitTail7)
+	cmpb	$0, 7(%ecx)
+	jz	L(ExitTail8)
+# ifdef USE_AS_STRNCPY
+	cmp	$16, %ebx
+	jb	L(StrncpyExit15Bytes)
+# endif
+	cmpb	$0, 8(%ecx)
+	jz	L(ExitTail9)
+	cmpb	$0, 9(%ecx)
+	jz	L(ExitTail10)
+	cmpb	$0, 10(%ecx)
+	jz	L(ExitTail11)
+	cmpb	$0, 11(%ecx)
+	jz	L(ExitTail12)
+	cmpb	$0, 12(%ecx)
+	jz	L(ExitTail13)
+	cmpb	$0, 13(%ecx)
+	jz	L(ExitTail14)
+	cmpb	$0, 14(%ecx)
+	jz	L(ExitTail15)
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRLCPY
+	cmp	$16, %ebx
+	je	L(ExitTail16)
+# endif
+	cmpb	$0, 15(%ecx)
+	jz	L(ExitTail16)
+
+# if defined USE_AS_STRNCPY && defined USE_AS_STRLCPY
+	cmp	$16, %ebx
+	je	L(StrlcpyExitTail16)
+# endif
+
+	PUSH	(%edi)
+# ifndef USE_AS_STRLCPY
+	mov	%edx, %edi
+# else
+	mov	%ecx, %edi
+# endif
+#endif
+	PUSH	(%esi)
+#ifdef USE_AS_STRNCPY
+	mov	%ecx, %esi
+	sub	$16, %ebx
+	and	$0xf, %esi
+
+/* add 16 bytes ecx_offset to ebx */
+
+	add	%esi, %ebx
+#endif
+	lea	16(%ecx), %esi
+	and	$-16, %esi
+	pxor	%xmm0, %xmm0
+	movlpd	(%ecx), %xmm1
+	movlpd	%xmm1, (%edx)
+
+	pcmpeqb	(%esi), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm1, 8(%edx)
+
+	pmovmskb %xmm0, %eax
+	sub	%ecx, %esi
+
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	mov	%edx, %eax
+	lea	16(%edx), %edx
+	and	$-16, %edx
+	sub	%edx, %eax
+
+#ifdef USE_AS_STRNCPY
+	add	%eax, %esi
+	lea	-1(%esi), %esi
+	and	$1<<31, %esi
+	test	%esi, %esi
+	jnz	L(ContinueCopy)
+	lea	16(%ebx), %ebx
+
+L(ContinueCopy):
+#endif
+	sub	%eax, %ecx
+	mov	%ecx, %eax
+	and	$0xf, %eax
+	mov	$0, %esi
+
+/* case: ecx_offset == edx_offset */
+
+	jz	L(Align16Both)
+
+	cmp	$8, %eax
+	jae	L(ShlHigh8)
+	cmp	$1, %eax
+	je	L(Shl1)
+	cmp	$2, %eax
+	je	L(Shl2)
+	cmp	$3, %eax
+	je	L(Shl3)
+	cmp	$4, %eax
+	je	L(Shl4)
+	cmp	$5, %eax
+	je	L(Shl5)
+	cmp	$6, %eax
+	je	L(Shl6)
+	jmp	L(Shl7)
+
+L(ShlHigh8):
+	je	L(Shl8)
+	cmp	$9, %eax
+	je	L(Shl9)
+	cmp	$10, %eax
+	je	L(Shl10)
+	cmp	$11, %eax
+	je	L(Shl11)
+	cmp	$12, %eax
+	je	L(Shl12)
+	cmp	$13, %eax
+	je	L(Shl13)
+	cmp	$14, %eax
+	je	L(Shl14)
+	jmp	L(Shl15)
+
+L(Align16Both):
+	movaps	(%ecx), %xmm1
+	movaps	16(%ecx), %xmm2
+	movaps	%xmm1, (%edx)
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm3
+	movaps	%xmm2, (%edx, %esi)
+	pcmpeqb	%xmm3, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm4
+	movaps	%xmm3, (%edx, %esi)
+	pcmpeqb	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm1
+	movaps	%xmm4, (%edx, %esi)
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm2
+	movaps	%xmm1, (%edx, %esi)
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm3
+	movaps	%xmm2, (%edx, %esi)
+	pcmpeqb	%xmm3, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	%xmm3, (%edx, %esi)
+	mov	%ecx, %eax
+	lea	16(%ecx, %esi), %ecx
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	lea	112(%ebx, %eax), %ebx
+#endif
+	mov	$-0x40, %esi
+
+L(Aligned64Loop):
+	movaps	(%ecx), %xmm2
+	movaps	32(%ecx), %xmm3
+	movaps	%xmm2, %xmm4
+	movaps	16(%ecx), %xmm5
+	movaps	%xmm3, %xmm6
+	movaps	48(%ecx), %xmm7
+	pminub	%xmm5, %xmm2
+	pminub	%xmm7, %xmm3
+	pminub	%xmm2, %xmm3
+	lea	64(%edx), %edx
+	pcmpeqb	%xmm0, %xmm3
+	lea	64(%ecx), %ecx
+	pmovmskb %xmm3, %eax
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeaveCase2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Aligned64Leave)
+	movaps	%xmm4, -64(%edx)
+	movaps	%xmm5, -48(%edx)
+	movaps	%xmm6, -32(%edx)
+	movaps	%xmm7, -16(%edx)
+	jmp	L(Aligned64Loop)
+
+L(Aligned64Leave):
+#ifdef USE_AS_STRNCPY
+	lea	48(%ebx), %ebx
+#endif
+	pcmpeqb	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqb	%xmm5, %xmm0
+#ifdef USE_AS_STRNCPY
+	lea	-16(%ebx), %ebx
+#endif
+	pmovmskb %xmm0, %eax
+	movaps	%xmm4, -64(%edx)
+	lea	16(%esi), %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqb	%xmm6, %xmm0
+#ifdef USE_AS_STRNCPY
+	lea	-16(%ebx), %ebx
+#endif
+	pmovmskb %xmm0, %eax
+	movaps	%xmm5, -48(%edx)
+	lea	16(%esi), %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	%xmm6, -32(%edx)
+	pcmpeqb	%xmm7, %xmm0
+#ifdef USE_AS_STRNCPY
+	lea	-16(%ebx), %ebx
+#endif
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl1):
+	movaps	-1(%ecx), %xmm1
+	movaps	15(%ecx), %xmm2
+L(Shl1Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit1Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl1LoopExit)
+
+	palignr	$1, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	31(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit1Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl1LoopExit)
+
+	palignr	$1, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	31(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit1Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl1LoopExit)
+
+	palignr	$1, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	31(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit1Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl1LoopExit)
+
+	palignr	$1, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	31(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-15(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-1(%ecx), %xmm1
+
+L(Shl1LoopStart):
+	movaps	15(%ecx), %xmm2
+	movaps	31(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	47(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	63(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$1, %xmm4, %xmm5
+	palignr	$1, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl1Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave1)
+#endif
+	palignr	$1, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$1, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl1LoopStart)
+
+L(Shl1LoopExit):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+	mov	$15, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl2):
+	movaps	-2(%ecx), %xmm1
+	movaps	14(%ecx), %xmm2
+L(Shl2Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit2Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl2LoopExit)
+
+	palignr	$2, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	30(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit2Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl2LoopExit)
+
+	palignr	$2, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	30(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit2Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl2LoopExit)
+
+	palignr	$2, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	30(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit2Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl2LoopExit)
+
+	palignr	$2, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	30(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-14(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-2(%ecx), %xmm1
+
+L(Shl2LoopStart):
+	movaps	14(%ecx), %xmm2
+	movaps	30(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	46(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	62(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$2, %xmm4, %xmm5
+	palignr	$2, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl2Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave2)
+#endif
+	palignr	$2, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$2, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl2LoopStart)
+
+L(Shl2LoopExit):
+	movlpd	(%ecx), %xmm0
+	movlpd	6(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 6(%edx)
+	mov	$14, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl3):
+	movaps	-3(%ecx), %xmm1
+	movaps	13(%ecx), %xmm2
+L(Shl3Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit3Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl3LoopExit)
+
+	palignr	$3, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	29(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit3Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl3LoopExit)
+
+	palignr	$3, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	29(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit3Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl3LoopExit)
+
+	palignr	$3, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	29(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit3Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl3LoopExit)
+
+	palignr	$3, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	29(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-13(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-3(%ecx), %xmm1
+
+L(Shl3LoopStart):
+	movaps	13(%ecx), %xmm2
+	movaps	29(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	45(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	61(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$3, %xmm4, %xmm5
+	palignr	$3, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl3Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave3)
+#endif
+	palignr	$3, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$3, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl3LoopStart)
+
+L(Shl3LoopExit):
+	movlpd	(%ecx), %xmm0
+	movlpd	5(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 5(%edx)
+	mov	$13, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl4):
+	movaps	-4(%ecx), %xmm1
+	movaps	12(%ecx), %xmm2
+L(Shl4Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit4Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	28(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit4Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	28(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit4Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	28(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit4Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	28(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-12(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-4(%ecx), %xmm1
+
+L(Shl4LoopStart):
+	movaps	12(%ecx), %xmm2
+	movaps	28(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	44(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	60(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$4, %xmm4, %xmm5
+	palignr	$4, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl4Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave4)
+#endif
+	palignr	$4, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl4LoopStart)
+
+L(Shl4LoopExit):
+	movlpd	(%ecx), %xmm0
+	movl	8(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 8(%edx)
+	mov	$12, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl5):
+	movaps	-5(%ecx), %xmm1
+	movaps	11(%ecx), %xmm2
+L(Shl5Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit5Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl5LoopExit)
+
+	palignr	$5, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	27(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit5Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl5LoopExit)
+
+	palignr	$5, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	27(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit5Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl5LoopExit)
+
+	palignr	$5, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	27(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit5Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl5LoopExit)
+
+	palignr	$5, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	27(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-11(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-5(%ecx), %xmm1
+
+L(Shl5LoopStart):
+	movaps	11(%ecx), %xmm2
+	movaps	27(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	43(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	59(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$5, %xmm4, %xmm5
+	palignr	$5, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl5Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave5)
+#endif
+	palignr	$5, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$5, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl5LoopStart)
+
+L(Shl5LoopExit):
+	movlpd	(%ecx), %xmm0
+	movl	7(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 7(%edx)
+	mov	$11, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl6):
+	movaps	-6(%ecx), %xmm1
+	movaps	10(%ecx), %xmm2
+L(Shl6Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit6Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl6LoopExit)
+
+	palignr	$6, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	26(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit6Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl6LoopExit)
+
+	palignr	$6, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	26(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit6Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl6LoopExit)
+
+	palignr	$6, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	26(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit6Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl6LoopExit)
+
+	palignr	$6, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	26(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-10(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-6(%ecx), %xmm1
+
+L(Shl6LoopStart):
+	movaps	10(%ecx), %xmm2
+	movaps	26(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	42(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	58(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$6, %xmm4, %xmm5
+	palignr	$6, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl6Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave6)
+#endif
+	palignr	$6, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$6, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl6LoopStart)
+
+L(Shl6LoopExit):
+	movlpd	(%ecx), %xmm0
+	movl	6(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 6(%edx)
+	mov	$10, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl7):
+	movaps	-7(%ecx), %xmm1
+	movaps	9(%ecx), %xmm2
+L(Shl7Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit7Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl7LoopExit)
+
+	palignr	$7, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	25(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit7Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl7LoopExit)
+
+	palignr	$7, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	25(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit7Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl7LoopExit)
+
+	palignr	$7, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	25(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit7Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl7LoopExit)
+
+	palignr	$7, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	25(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-9(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-7(%ecx), %xmm1
+
+L(Shl7LoopStart):
+	movaps	9(%ecx), %xmm2
+	movaps	25(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	41(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	57(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$7, %xmm4, %xmm5
+	palignr	$7, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl7Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave7)
+#endif
+	palignr	$7, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$7, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl7LoopStart)
+
+L(Shl7LoopExit):
+	movlpd	(%ecx), %xmm0
+	movl	5(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 5(%edx)
+	mov	$9, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl8):
+	movaps	-8(%ecx), %xmm1
+	movaps	8(%ecx), %xmm2
+L(Shl8Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit8Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	24(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit8Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	24(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit8Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	24(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit8Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	24(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-8(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-8(%ecx), %xmm1
+
+L(Shl8LoopStart):
+	movaps	8(%ecx), %xmm2
+	movaps	24(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	40(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	56(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$8, %xmm4, %xmm5
+	palignr	$8, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl8Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave8)
+#endif
+	palignr	$8, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl8LoopStart)
+
+L(Shl8LoopExit):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	mov	$8, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl9):
+	movaps	-9(%ecx), %xmm1
+	movaps	7(%ecx), %xmm2
+L(Shl9Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit9Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl9LoopExit)
+
+	palignr	$9, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	23(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit9Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl9LoopExit)
+
+	palignr	$9, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	23(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit9Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl9LoopExit)
+
+	palignr	$9, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	23(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit9Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl9LoopExit)
+
+	palignr	$9, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	23(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-7(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-9(%ecx), %xmm1
+
+L(Shl9LoopStart):
+	movaps	7(%ecx), %xmm2
+	movaps	23(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	39(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	55(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$9, %xmm4, %xmm5
+	palignr	$9, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl9Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave9)
+#endif
+	palignr	$9, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$9, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl9LoopStart)
+
+L(Shl9LoopExit):
+	movlpd	-1(%ecx), %xmm0
+	movlpd	%xmm0, -1(%edx)
+	mov	$7, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl10):
+	movaps	-10(%ecx), %xmm1
+	movaps	6(%ecx), %xmm2
+L(Shl10Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit10Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl10LoopExit)
+
+	palignr	$10, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	22(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit10Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl10LoopExit)
+
+	palignr	$10, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	22(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit10Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl10LoopExit)
+
+	palignr	$10, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	22(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit10Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl10LoopExit)
+
+	palignr	$10, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	22(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-6(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-10(%ecx), %xmm1
+
+L(Shl10LoopStart):
+	movaps	6(%ecx), %xmm2
+	movaps	22(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	38(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	54(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$10, %xmm4, %xmm5
+	palignr	$10, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl10Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave10)
+#endif
+	palignr	$10, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$10, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl10LoopStart)
+
+L(Shl10LoopExit):
+	movlpd	-2(%ecx), %xmm0
+	movlpd	%xmm0, -2(%edx)
+	mov	$6, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl11):
+	movaps	-11(%ecx), %xmm1
+	movaps	5(%ecx), %xmm2
+L(Shl11Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit11Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl11LoopExit)
+
+	palignr	$11, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	21(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit11Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl11LoopExit)
+
+	palignr	$11, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	21(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit11Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl11LoopExit)
+
+	palignr	$11, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	21(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit11Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl11LoopExit)
+
+	palignr	$11, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	21(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-5(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-11(%ecx), %xmm1
+
+L(Shl11LoopStart):
+	movaps	5(%ecx), %xmm2
+	movaps	21(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	37(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	53(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$11, %xmm4, %xmm5
+	palignr	$11, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl11Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave11)
+#endif
+	palignr	$11, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$11, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl11LoopStart)
+
+L(Shl11LoopExit):
+	movlpd	-3(%ecx), %xmm0
+	movlpd	%xmm0, -3(%edx)
+	mov	$5, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl12):
+	movaps	-12(%ecx), %xmm1
+	movaps	4(%ecx), %xmm2
+L(Shl12Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit12Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	20(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit12Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	20(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit12Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	20(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit12Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	20(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-4(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-12(%ecx), %xmm1
+
+L(Shl12LoopStart):
+	movaps	4(%ecx), %xmm2
+	movaps	20(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	36(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	52(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$12, %xmm4, %xmm5
+	palignr	$12, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl12Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave12)
+#endif
+	palignr	$12, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl12LoopStart)
+
+L(Shl12LoopExit):
+	movl	(%ecx), %esi
+	movl	%esi, (%edx)
+	mov	$4, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl13):
+	movaps	-13(%ecx), %xmm1
+	movaps	3(%ecx), %xmm2
+L(Shl13Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit13Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl13LoopExit)
+
+	palignr	$13, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	19(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit13Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl13LoopExit)
+
+	palignr	$13, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	19(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit13Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl13LoopExit)
+
+	palignr	$13, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	19(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit13Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl13LoopExit)
+
+	palignr	$13, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	19(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-3(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-13(%ecx), %xmm1
+
+L(Shl13LoopStart):
+	movaps	3(%ecx), %xmm2
+	movaps	19(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	35(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	51(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$13, %xmm4, %xmm5
+	palignr	$13, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl13Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave13)
+#endif
+	palignr	$13, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$13, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl13LoopStart)
+
+L(Shl13LoopExit):
+	movl	-1(%ecx), %esi
+	movl	%esi, -1(%edx)
+	mov	$3, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl14):
+	movaps	-14(%ecx), %xmm1
+	movaps	2(%ecx), %xmm2
+L(Shl14Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit14Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl14LoopExit)
+
+	palignr	$14, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	18(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit14Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl14LoopExit)
+
+	palignr	$14, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	18(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit14Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl14LoopExit)
+
+	palignr	$14, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	18(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit14Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl14LoopExit)
+
+	palignr	$14, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	18(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-2(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-14(%ecx), %xmm1
+
+L(Shl14LoopStart):
+	movaps	2(%ecx), %xmm2
+	movaps	18(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	34(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	50(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$14, %xmm4, %xmm5
+	palignr	$14, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl14Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave14)
+#endif
+	palignr	$14, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$14, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl14LoopStart)
+
+L(Shl14LoopExit):
+	movl	-2(%ecx), %esi
+	movl	%esi, -2(%edx)
+	mov	$2, %esi
+	jmp	L(CopyFrom1To16Bytes)
+
+	.p2align 4
+L(Shl15):
+	movaps	-15(%ecx), %xmm1
+	movaps	1(%ecx), %xmm2
+L(Shl15Start):
+	pcmpeqb	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit15Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl15LoopExit)
+
+	palignr	$15, %xmm1, %xmm2
+	movaps	%xmm3, %xmm1
+	movaps	%xmm2, (%edx)
+	movaps	17(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit15Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl15LoopExit)
+
+	palignr	$15, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	17(%ecx), %xmm2
+	movaps	%xmm3, %xmm1
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit15Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl15LoopExit)
+
+	palignr	$15, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	17(%ecx), %xmm2
+
+	pcmpeqb	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+#ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	jbe	L(StrncpyExit15Case2OrCase3)
+#endif
+	test	%eax, %eax
+	jnz	L(Shl15LoopExit)
+
+	palignr	$15, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	17(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-1(%ecx), %ecx
+	sub	%eax, %edx
+#ifdef USE_AS_STRNCPY
+	add	%eax, %ebx
+#endif
+	movaps	-15(%ecx), %xmm1
+
+L(Shl15LoopStart):
+	movaps	1(%ecx), %xmm2
+	movaps	17(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	33(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	49(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqb	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$15, %xmm4, %xmm5
+	palignr	$15, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl15Start)
+#ifdef USE_AS_STRNCPY
+	sub	$64, %ebx
+	jbe	L(StrncpyLeave15)
+#endif
+	palignr	$15, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$15, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl15LoopStart)
+
+L(Shl15LoopExit):
+	movl	-3(%ecx), %esi
+	movl	%esi, -3(%edx)
+	mov	$1, %esi
+#if defined USE_AS_STRCAT || defined USE_AS_STRLCPY
+	jmp	L(CopyFrom1To16Bytes)
+#endif
+
+
+#if !defined USE_AS_STRCAT && !defined USE_AS_STRLCPY
+
+	.p2align 4
+L(CopyFrom1To16Bytes):
+# ifdef USE_AS_STRNCPY
+	add	$16, %ebx
+# endif
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+	test	%al, %al
+	jz	L(ExitHigh8)
+
+L(CopyFrom1To16BytesLess8):
+	mov	%al, %ah
+	and	$15, %ah
+	jz	L(ExitHigh4)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+
+	.p2align 4
+L(Exit4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	SAVE_RESULT	(3)
+# ifdef USE_AS_STRNCPY
+	sub	$4, %ebx
+	lea	4(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(ExitHigh4):
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+
+	.p2align 4
+L(Exit8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	SAVE_RESULT	(7)
+# ifdef USE_AS_STRNCPY
+	sub	$8, %ebx
+	lea	8(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(ExitHigh8):
+	mov	%ah, %al
+	and	$15, %al
+	jz	L(ExitHigh12)
+
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+
+	.p2align 4
+L(Exit12):
+	movlpd	(%ecx), %xmm0
+	movl	8(%ecx), %eax
+	movlpd	%xmm0, (%edx)
+	movl	%eax, 8(%edx)
+	SAVE_RESULT	(11)
+# ifdef USE_AS_STRNCPY
+	sub	$12, %ebx
+	lea	12(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(ExitHigh12):
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+
+	.p2align 4
+L(Exit16):
+	movdqu	(%ecx), %xmm0
+	movdqu	%xmm0, (%edx)
+	SAVE_RESULT	(15)
+# ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	lea	16(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+#  ifdef USE_AS_STRNCPY
+
+	CFI_PUSH(%esi)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2):
+	add	$16, %ebx
+	add	%esi, %ecx
+	add	%esi, %edx
+
+	POP	(%esi)
+
+	test	%al, %al
+	jz	L(ExitHighCase2)
+
+	cmp	$8, %ebx
+	ja	L(CopyFrom1To16BytesLess8)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	cmp	$1, %ebx
+	je	L(Exit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	cmp	$2, %ebx
+	je	L(Exit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	cmp	$3, %ebx
+	je	L(Exit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	cmp	$4, %ebx
+	je	L(Exit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	cmp	$5, %ebx
+	je	L(Exit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	cmp	$6, %ebx
+	je	L(Exit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	cmp	$7, %ebx
+	je	L(Exit7)
+	jmp	L(Exit8)
+
+	.p2align 4
+L(ExitHighCase2):
+	cmp	$8, %ebx
+	jbe	L(CopyFrom1To16BytesLess8Case3)
+
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	cmp	$9, %ebx
+	je	L(Exit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	cmp	$10, %ebx
+	je	L(Exit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	cmp	$11, %ebx
+	je	L(Exit11)
+	test	$0x8, %ah
+	jnz	L(Exit12)
+	cmp	$12, %ebx
+	je	L(Exit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	cmp	$13, %ebx
+	je	L(Exit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	cmp	$14, %ebx
+	je	L(Exit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	cmp	$15, %ebx
+	je	L(Exit15)
+	jmp	L(Exit16)
+
+	CFI_PUSH(%esi)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2OrCase3):
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase3):
+	add	$16, %ebx
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+
+	cmp	$8, %ebx
+	ja	L(ExitHigh8Case3)
+
+L(CopyFrom1To16BytesLess8Case3):
+	cmp	$4, %ebx
+	ja	L(ExitHigh4Case3)
+
+	cmp	$1, %ebx
+	je	L(Exit1)
+	cmp	$2, %ebx
+	je	L(Exit2)
+	cmp	$3, %ebx
+	je	L(Exit3)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	SAVE_RESULT	(4)
+	RETURN1
+
+	.p2align 4
+L(ExitHigh4Case3):
+	cmp	$5, %ebx
+	je	L(Exit5)
+	cmp	$6, %ebx
+	je	L(Exit6)
+	cmp	$7, %ebx
+	je	L(Exit7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	SAVE_RESULT	(8)
+	RETURN1
+
+	.p2align 4
+L(ExitHigh8Case3):
+	cmp	$12, %ebx
+	ja	L(ExitHigh12Case3)
+
+	cmp	$9, %ebx
+	je	L(Exit9)
+	cmp	$10, %ebx
+	je	L(Exit10)
+	cmp	$11, %ebx
+	je	L(Exit11)
+	movlpd	(%ecx), %xmm0
+	movl	8(%ecx), %eax
+	movlpd	%xmm0, (%edx)
+	movl	%eax, 8(%edx)
+	SAVE_RESULT	(12)
+	RETURN1
+
+	.p2align 4
+L(ExitHigh12Case3):
+	cmp	$13, %ebx
+	je	L(Exit13)
+	cmp	$14, %ebx
+	je	L(Exit14)
+	cmp	$15, %ebx
+	je	L(Exit15)
+	movlpd	(%ecx), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+	SAVE_RESULT	(16)
+	RETURN1
+
+# endif
+
+	.p2align 4
+L(Exit1):
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+	SAVE_RESULT	(0)
+# ifdef USE_AS_STRNCPY
+	sub	$1, %ebx
+	lea	1(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit2):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	SAVE_RESULT	(1)
+# ifdef USE_AS_STRNCPY
+	sub	$2, %ebx
+	lea	2(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit3):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+	SAVE_RESULT	(2)
+# ifdef USE_AS_STRNCPY
+	sub	$3, %ebx
+	lea	3(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit5):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+	SAVE_RESULT	(4)
+# ifdef USE_AS_STRNCPY
+	sub	$5, %ebx
+	lea	5(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit6):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+	SAVE_RESULT	(5)
+# ifdef USE_AS_STRNCPY
+	sub	$6, %ebx
+	lea	6(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit7):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+	SAVE_RESULT	(6)
+# ifdef USE_AS_STRNCPY
+	sub	$7, %ebx
+	lea	7(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit9):
+	movlpd	(%ecx), %xmm0
+	movb	8(%ecx), %al
+	movlpd	%xmm0, (%edx)
+	movb	%al, 8(%edx)
+	SAVE_RESULT	(8)
+# ifdef USE_AS_STRNCPY
+	sub	$9, %ebx
+	lea	9(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit10):
+	movlpd	(%ecx), %xmm0
+	movw	8(%ecx), %ax
+	movlpd	%xmm0, (%edx)
+	movw	%ax, 8(%edx)
+	SAVE_RESULT	(9)
+# ifdef USE_AS_STRNCPY
+	sub	$10, %ebx
+	lea	10(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit11):
+	movlpd	(%ecx), %xmm0
+	movl	7(%ecx), %eax
+	movlpd	%xmm0, (%edx)
+	movl	%eax, 7(%edx)
+	SAVE_RESULT	(10)
+# ifdef USE_AS_STRNCPY
+	sub	$11, %ebx
+	lea	11(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit13):
+	movlpd	(%ecx), %xmm0
+	movlpd	5(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 5(%edx)
+	SAVE_RESULT	(12)
+# ifdef USE_AS_STRNCPY
+	sub	$13, %ebx
+	lea	13(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit14):
+	movlpd	(%ecx), %xmm0
+	movlpd	6(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 6(%edx)
+	SAVE_RESULT	(13)
+# ifdef USE_AS_STRNCPY
+	sub	$14, %ebx
+	lea	14(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+	.p2align 4
+L(Exit15):
+	movlpd	(%ecx), %xmm0
+	movlpd	7(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 7(%edx)
+	SAVE_RESULT	(14)
+# ifdef USE_AS_STRNCPY
+	sub	$15, %ebx
+	lea	15(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero1)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN1
+
+CFI_POP	(%edi)
+
+# ifdef USE_AS_STRNCPY
+	.p2align 4
+L(Fill0):
+	RETURN
+
+	.p2align 4
+L(Fill1):
+	movb	%dl, (%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill2):
+	movw	%dx, (%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill3):
+	movw	%dx, (%ecx)
+	movb	%dl, 2(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill4):
+	movl	%edx, (%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill5):
+	movl	%edx, (%ecx)
+	movb	%dl, 4(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill6):
+	movl	%edx, (%ecx)
+	movw	%dx, 4(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill7):
+	movl	%edx, (%ecx)
+	movl	%edx, 3(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill8):
+	movlpd	%xmm0, (%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill9):
+	movlpd	%xmm0, (%ecx)
+	movb	%dl, 8(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill10):
+	movlpd	%xmm0, (%ecx)
+	movw	%dx, 8(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill11):
+	movlpd	%xmm0, (%ecx)
+	movl	%edx, 7(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill12):
+	movlpd	%xmm0, (%ecx)
+	movl	%edx, 8(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill13):
+	movlpd	%xmm0, (%ecx)
+	movlpd	%xmm0, 5(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill14):
+	movlpd	%xmm0, (%ecx)
+	movlpd	%xmm0, 6(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill15):
+	movlpd	%xmm0, (%ecx)
+	movlpd	%xmm0, 7(%ecx)
+	RETURN
+
+	.p2align 4
+L(Fill16):
+	movlpd	%xmm0, (%ecx)
+	movlpd	%xmm0, 8(%ecx)
+	RETURN
+
+	.p2align 4
+L(StrncpyFillExit1):
+	lea	16(%ebx), %ebx
+L(FillFrom1To16Bytes):
+	test	%ebx, %ebx
+	jz	L(Fill0)
+	cmp	$16, %ebx
+	je	L(Fill16)
+	cmp	$8, %ebx
+	je	L(Fill8)
+	jg	L(FillMore8)
+	cmp	$4, %ebx
+	je	L(Fill4)
+	jg	L(FillMore4)
+	cmp	$2, %ebx
+	jl	L(Fill1)
+	je	L(Fill2)
+	jg	L(Fill3)
+L(FillMore8):	/* but less than 16 */
+	cmp	$12, %ebx
+	je	L(Fill12)
+	jl	L(FillLess12)
+	cmp	$14, %ebx
+	jl	L(Fill13)
+	je	L(Fill14)
+	jg	L(Fill15)
+L(FillMore4):	/* but less than 8 */
+	cmp	$6, %ebx
+	jl	L(Fill5)
+	je	L(Fill6)
+	jg	L(Fill7)
+L(FillLess12):	/* but more than 8 */
+	cmp	$10, %ebx
+	jl	L(Fill9)
+	je	L(Fill10)
+	jmp	L(Fill11)
+
+	CFI_PUSH(%edi)
+
+	.p2align 4
+L(StrncpyFillTailWithZero1):
+	POP	(%edi)
+L(StrncpyFillTailWithZero):
+	pxor	%xmm0, %xmm0
+	xor	%edx, %edx
+	sub	$16, %ebx
+	jbe	L(StrncpyFillExit1)
+
+	movlpd	%xmm0, (%ecx)
+	movlpd	%xmm0, 8(%ecx)
+
+	lea	16(%ecx), %ecx
+
+	mov	%ecx, %edx
+	and	$0xf, %edx
+	sub	%edx, %ecx
+	add	%edx, %ebx
+	xor	%edx, %edx
+	sub	$64, %ebx
+	jb	L(StrncpyFillLess64)
+
+L(StrncpyFillLoopMovdqa):
+	movdqa	%xmm0, (%ecx)
+	movdqa	%xmm0, 16(%ecx)
+	movdqa	%xmm0, 32(%ecx)
+	movdqa	%xmm0, 48(%ecx)
+	lea	64(%ecx), %ecx
+	sub	$64, %ebx
+	jae	L(StrncpyFillLoopMovdqa)
+
+L(StrncpyFillLess64):
+	add	$32, %ebx
+	jl	L(StrncpyFillLess32)
+	movdqa	%xmm0, (%ecx)
+	movdqa	%xmm0, 16(%ecx)
+	lea	32(%ecx), %ecx
+	sub	$16, %ebx
+	jl	L(StrncpyFillExit1)
+	movdqa	%xmm0, (%ecx)
+	lea	16(%ecx), %ecx
+	jmp	L(FillFrom1To16Bytes)
+
+L(StrncpyFillLess32):
+	add	$16, %ebx
+	jl	L(StrncpyFillExit1)
+	movdqa	%xmm0, (%ecx)
+	lea	16(%ecx), %ecx
+	jmp	L(FillFrom1To16Bytes)
+# endif
+
+	.p2align 4
+L(ExitTail1):
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+	SAVE_RESULT_TAIL (0)
+# ifdef USE_AS_STRNCPY
+	sub	$1, %ebx
+	lea	1(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail2):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	SAVE_RESULT_TAIL (1)
+# ifdef USE_AS_STRNCPY
+	sub	$2, %ebx
+	lea	2(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail3):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+	SAVE_RESULT_TAIL (2)
+# ifdef USE_AS_STRNCPY
+	sub	$3, %ebx
+	lea	3(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	SAVE_RESULT_TAIL (3)
+# ifdef USE_AS_STRNCPY
+	sub	$4, %ebx
+	lea	4(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail5):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+	SAVE_RESULT_TAIL (4)
+# ifdef USE_AS_STRNCPY
+	sub	$5, %ebx
+	lea	5(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail6):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+	SAVE_RESULT_TAIL (5)
+# ifdef USE_AS_STRNCPY
+	sub	$6, %ebx
+	lea	6(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail7):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+	SAVE_RESULT_TAIL (6)
+# ifdef USE_AS_STRNCPY
+	sub	$7, %ebx
+	lea	7(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	SAVE_RESULT_TAIL (7)
+# ifdef USE_AS_STRNCPY
+	sub	$8, %ebx
+	lea	8(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail9):
+	movlpd	(%ecx), %xmm0
+	movb	8(%ecx), %al
+	movlpd	%xmm0, (%edx)
+	movb	%al, 8(%edx)
+	SAVE_RESULT_TAIL (8)
+# ifdef USE_AS_STRNCPY
+	sub	$9, %ebx
+	lea	9(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail10):
+	movlpd	(%ecx), %xmm0
+	movw	8(%ecx), %ax
+	movlpd	%xmm0, (%edx)
+	movw	%ax, 8(%edx)
+	SAVE_RESULT_TAIL (9)
+# ifdef USE_AS_STRNCPY
+	sub	$10, %ebx
+	lea	10(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail11):
+	movlpd	(%ecx), %xmm0
+	movl	7(%ecx), %eax
+	movlpd	%xmm0, (%edx)
+	movl	%eax, 7(%edx)
+	SAVE_RESULT_TAIL (10)
+# ifdef USE_AS_STRNCPY
+	sub	$11, %ebx
+	lea	11(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail12):
+	movlpd	(%ecx), %xmm0
+	movl	8(%ecx), %eax
+	movlpd	%xmm0, (%edx)
+	movl	%eax, 8(%edx)
+	SAVE_RESULT_TAIL (11)
+# ifdef USE_AS_STRNCPY
+	sub	$12, %ebx
+	lea	12(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail13):
+	movlpd	(%ecx), %xmm0
+	movlpd	5(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 5(%edx)
+	SAVE_RESULT_TAIL (12)
+# ifdef USE_AS_STRNCPY
+	sub	$13, %ebx
+	lea	13(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail14):
+	movlpd	(%ecx), %xmm0
+	movlpd	6(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 6(%edx)
+	SAVE_RESULT_TAIL (13)
+# ifdef USE_AS_STRNCPY
+	sub	$14, %ebx
+	lea	14(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+# ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail15):
+	movlpd	(%ecx), %xmm0
+	movlpd	7(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 7(%edx)
+	SAVE_RESULT_TAIL (14)
+# ifdef USE_AS_STRNCPY
+	sub	$15, %ebx
+	lea	15(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+# endif
+	RETURN
+
+	.p2align 4
+L(ExitTail16):
+	movdqu	(%ecx), %xmm0
+	movdqu	%xmm0, (%edx)
+	SAVE_RESULT_TAIL (15)
+# ifdef USE_AS_STRNCPY
+	sub	$16, %ebx
+	lea	16(%edx), %ecx
+	jnz	L(StrncpyFillTailWithZero)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+# endif
+	RETURN
+#endif
+
+#ifdef USE_AS_STRNCPY
+# ifndef USE_AS_STRCAT
+	CFI_PUSH (%esi)
+	CFI_PUSH (%edi)
+# endif
+	.p2align 4
+L(StrncpyLeaveCase2OrCase3):
+	test	%eax, %eax
+	jnz	L(Aligned64LeaveCase2)
+
+L(Aligned64LeaveCase3):
+	add	$48, %ebx
+	jle	L(CopyFrom1To16BytesCase3)
+	movaps	%xmm4, -64(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase3)
+	movaps	%xmm5, -48(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase3)
+	movaps	%xmm6, -32(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(Aligned64LeaveCase2):
+	pcmpeqb	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	add	$48, %ebx
+	jle	L(CopyFrom1To16BytesCase2OrCase3)
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqb	%xmm5, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm4, -64(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqb	%xmm6, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm5, -48(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(CopyFrom1To16BytesCase2OrCase3)
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqb	%xmm7, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm6, -32(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+	jmp	L(CopyFrom1To16BytesCase2)
+
+/*--------------------------------------------------*/
+	.p2align 4
+L(StrncpyExit1Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movlpd	7(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 7(%edx)
+	mov	$15, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit2Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movlpd	6(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 6(%edx)
+	mov	$14, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit3Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movlpd	5(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 5(%edx)
+	mov	$13, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit4Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movl	8(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 8(%edx)
+	mov	$12, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit5Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movl	7(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 7(%edx)
+	mov	$11, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit6Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movl	6(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 6(%edx)
+	mov	$10, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit7Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movl	5(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 5(%edx)
+	mov	$9, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit8Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	mov	$8, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit9Case2OrCase3):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	mov	$7, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit10Case2OrCase3):
+	movlpd	-1(%ecx), %xmm0
+	movlpd	%xmm0, -1(%edx)
+	mov	$6, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit11Case2OrCase3):
+	movlpd	-2(%ecx), %xmm0
+	movlpd	%xmm0, -2(%edx)
+	mov	$5, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit12Case2OrCase3):
+	movl	(%ecx), %esi
+	movl	%esi, (%edx)
+	mov	$4, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit13Case2OrCase3):
+	movl	-1(%ecx), %esi
+	movl	%esi, -1(%edx)
+	mov	$3, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit14Case2OrCase3):
+	movl	-2(%ecx), %esi
+	movl	%esi, -2(%edx)
+	mov	$2, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+	.p2align 4
+L(StrncpyExit15Case2OrCase3):
+	movl	-3(%ecx), %esi
+	movl	%esi, -3(%edx)
+	mov	$1, %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave1):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit1)
+	palignr	$1, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	31(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit1)
+	palignr	$1, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit1)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit1)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit1):
+	lea	15(%edx, %esi), %edx
+	lea	15(%ecx, %esi), %ecx
+	movdqu	-16(%ecx), %xmm0
+	xor	%esi, %esi
+	movdqu	%xmm0, -16(%edx)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave2):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit2)
+	palignr	$2, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	30(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit2)
+	palignr	$2, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit2)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit2)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit2):
+	lea	14(%edx, %esi), %edx
+	lea	14(%ecx, %esi), %ecx
+	movdqu	-16(%ecx), %xmm0
+	xor	%esi, %esi
+	movdqu	%xmm0, -16(%edx)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave3):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit3)
+	palignr	$3, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	29(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit3)
+	palignr	$3, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit3)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit3)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit3):
+	lea	13(%edx, %esi), %edx
+	lea	13(%ecx, %esi), %ecx
+	movdqu	-16(%ecx), %xmm0
+	xor	%esi, %esi
+	movdqu	%xmm0, -16(%edx)
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave4):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit4)
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	28(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit4)
+	palignr	$4, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit4)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit4)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit4):
+	lea	12(%edx, %esi), %edx
+	lea	12(%ecx, %esi), %ecx
+	movlpd	-12(%ecx), %xmm0
+	movl	-4(%ecx), %eax
+	movlpd	%xmm0, -12(%edx)
+	movl	%eax, -4(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave5):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit5)
+	palignr	$5, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	27(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit5)
+	palignr	$5, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit5)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit5)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit5):
+	lea	11(%edx, %esi), %edx
+	lea	11(%ecx, %esi), %ecx
+	movlpd	-11(%ecx), %xmm0
+	movl	-4(%ecx), %eax
+	movlpd	%xmm0, -11(%edx)
+	movl	%eax, -4(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave6):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit6)
+	palignr	$6, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	26(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit6)
+	palignr	$6, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit6)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit6)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit6):
+	lea	10(%edx, %esi), %edx
+	lea	10(%ecx, %esi), %ecx
+
+	movlpd	-10(%ecx), %xmm0
+	movw	-2(%ecx), %ax
+	movlpd	%xmm0, -10(%edx)
+	movw	%ax, -2(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave7):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit7)
+	palignr	$7, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	25(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit7)
+	palignr	$7, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit7)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit7)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit7):
+	lea	9(%edx, %esi), %edx
+	lea	9(%ecx, %esi), %ecx
+
+	movlpd	-9(%ecx), %xmm0
+	movb	-1(%ecx), %ah
+	movlpd	%xmm0, -9(%edx)
+	movb	%ah, -1(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave8):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit8)
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	24(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit8)
+	palignr	$8, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit8)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit8)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit8):
+	lea	8(%edx, %esi), %edx
+	lea	8(%ecx, %esi), %ecx
+	movlpd	-8(%ecx), %xmm0
+	movlpd	%xmm0, -8(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave9):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit9)
+	palignr	$9, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	23(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit9)
+	palignr	$9, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit9)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit9)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit9):
+	lea	7(%edx, %esi), %edx
+	lea	7(%ecx, %esi), %ecx
+
+	movlpd	-8(%ecx), %xmm0
+	movlpd	%xmm0, -8(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave10):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit10)
+	palignr	$10, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	22(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit10)
+	palignr	$10, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit10)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit10)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit10):
+	lea	6(%edx, %esi), %edx
+	lea	6(%ecx, %esi), %ecx
+
+	movlpd	-8(%ecx), %xmm0
+	movlpd	%xmm0, -8(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave11):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit11)
+	palignr	$11, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	21(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit11)
+	palignr	$11, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit11)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit11)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit11):
+	lea	5(%edx, %esi), %edx
+	lea	5(%ecx, %esi), %ecx
+	movl	-5(%ecx), %esi
+	movb	-1(%ecx), %ah
+	movl	%esi, -5(%edx)
+	movb	%ah, -1(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave12):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit12)
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	20(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit12)
+	palignr	$12, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit12)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit12)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit12):
+	lea	4(%edx, %esi), %edx
+	lea	4(%ecx, %esi), %ecx
+	movl	-4(%ecx), %eax
+	movl	%eax, -4(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave13):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit13)
+	palignr	$13, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	19(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit13)
+	palignr	$13, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit13)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit13)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit13):
+	lea	3(%edx, %esi), %edx
+	lea	3(%ecx, %esi), %ecx
+
+	movl	-4(%ecx), %eax
+	movl	%eax, -4(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave14):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit14)
+	palignr	$14, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	18(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit14)
+	palignr	$14, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit14)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit14)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit14):
+	lea	2(%edx, %esi), %edx
+	lea	2(%ecx, %esi), %ecx
+	movw	-2(%ecx), %ax
+	movw	%ax, -2(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+
+L(StrncpyLeave15):
+	movaps	%xmm2, %xmm3
+	add	$48, %ebx
+	jle	L(StrncpyExit15)
+	palignr	$15, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	17(%ecx), %xmm2
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit15)
+	palignr	$15, %xmm3, %xmm2
+	movaps	%xmm2, 16(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit15)
+	movaps	%xmm4, 32(%edx)
+	lea	16(%esi), %esi
+	sub	$16, %ebx
+	jbe	L(StrncpyExit15)
+	movaps	%xmm5, 48(%edx)
+	lea	16(%esi), %esi
+	lea	-16(%ebx), %ebx
+L(StrncpyExit15):
+	lea	1(%edx, %esi), %edx
+	lea	1(%ecx, %esi), %ecx
+	movb	-1(%ecx), %ah
+	movb	%ah, -1(%edx)
+	xor	%esi, %esi
+	jmp	L(CopyFrom1To16BytesCase3)
+#endif
+
+#if !defined USE_AS_STRCAT && ! defined USE_AS_STRLCPY
+# ifdef USE_AS_STRNCPY
+	CFI_POP (%esi)
+	CFI_POP (%edi)
+
+	.p2align 4
+L(ExitTail0):
+	movl	%edx, %eax
+	RETURN
+
+	.p2align 4
+L(StrncpyExit15Bytes):
+	cmp	$12, %ebx
+	jbe	L(StrncpyExit12Bytes)
+	cmpb	$0, 8(%ecx)
+	jz	L(ExitTail9)
+	cmpb	$0, 9(%ecx)
+	jz	L(ExitTail10)
+	cmpb	$0, 10(%ecx)
+	jz	L(ExitTail11)
+	cmpb	$0, 11(%ecx)
+	jz	L(ExitTail12)
+	cmp	$13, %ebx
+	je	L(ExitTail13)
+	cmpb	$0, 12(%ecx)
+	jz	L(ExitTail13)
+	cmp	$14, %ebx
+	je	L(ExitTail14)
+	cmpb	$0, 13(%ecx)
+	jz	L(ExitTail14)
+	movlpd	(%ecx), %xmm0
+	movlpd	7(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 7(%edx)
+#  ifdef USE_AS_STPCPY
+	lea	14(%edx), %eax
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  else
+	movl	%edx, %eax
+#  endif
+	RETURN
+
+	.p2align 4
+L(StrncpyExit12Bytes):
+	cmp	$9, %ebx
+	je	L(ExitTail9)
+	cmpb	$0, 8(%ecx)
+	jz	L(ExitTail9)
+	cmp	$10, %ebx
+	je	L(ExitTail10)
+	cmpb	$0, 9(%ecx)
+	jz	L(ExitTail10)
+	cmp	$11, %ebx
+	je	L(ExitTail11)
+	cmpb	$0, 10(%ecx)
+	jz	L(ExitTail11)
+	movlpd	(%ecx), %xmm0
+	movl	8(%ecx), %eax
+	movlpd	%xmm0, (%edx)
+	movl	%eax, 8(%edx)
+	SAVE_RESULT_TAIL (11)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+	RETURN
+
+	.p2align 4
+L(StrncpyExit8Bytes):
+	cmp	$4, %ebx
+	jbe	L(StrncpyExit4Bytes)
+	cmpb	$0, (%ecx)
+	jz	L(ExitTail1)
+	cmpb	$0, 1(%ecx)
+	jz	L(ExitTail2)
+	cmpb	$0, 2(%ecx)
+	jz	L(ExitTail3)
+	cmpb	$0, 3(%ecx)
+	jz	L(ExitTail4)
+
+	cmp	$5, %ebx
+	je	L(ExitTail5)
+	cmpb	$0, 4(%ecx)
+	jz	L(ExitTail5)
+	cmp	$6, %ebx
+	je	L(ExitTail6)
+	cmpb	$0, 5(%ecx)
+	jz	L(ExitTail6)
+	cmp	$7, %ebx
+	je	L(ExitTail7)
+	cmpb	$0, 6(%ecx)
+	jz	L(ExitTail7)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+#  ifdef USE_AS_STPCPY
+	lea	7(%edx), %eax
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  else
+	movl	%edx, %eax
+#  endif
+	RETURN
+
+	.p2align 4
+L(StrncpyExit4Bytes):
+	test	%ebx, %ebx
+	jz	L(ExitTail0)
+	cmp	$1, %ebx
+	je	L(ExitTail1)
+	cmpb	$0, (%ecx)
+	jz	L(ExitTail1)
+	cmp	$2, %ebx
+	je	L(ExitTail2)
+	cmpb	$0, 1(%ecx)
+	jz	L(ExitTail2)
+	cmp	$3, %ebx
+	je	L(ExitTail3)
+	cmpb	$0, 2(%ecx)
+	jz	L(ExitTail3)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	SAVE_RESULT_TAIL (3)
+#  ifdef USE_AS_STPCPY
+	cmpb	$1, (%eax)
+	sbb	$-1, %eax
+#  endif
+	RETURN
+# endif
+
+END (STRCPY)
+#endif
diff --git a/libc/arch-x86/string/ssse3-strlcat-atom.S b/libc/arch-x86/string/ssse3-strlcat-atom.S
new file mode 100644
index 0000000..daaf254
--- /dev/null
+++ b/libc/arch-x86/string/ssse3-strlcat-atom.S
@@ -0,0 +1,1225 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Optimized strlcat with SSSE3 */
+
+#ifndef cfi_startproc
+# define cfi_startproc	.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc	.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name,  @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG)	pushl	REG;	CFI_PUSH (REG)
+#define POP(REG)	popl	REG;	CFI_POP (REG)
+#define L(label)	.L##Prolog_##label
+
+#define DST	4
+#define SRC	DST+8
+#define LEN	SRC+4
+
+	.text
+ENTRY (strlcat)
+	mov	DST(%esp), %edx
+	PUSH	(%ebx)
+	mov	LEN(%esp), %ebx
+	sub	$4, %ebx
+	jbe	L(len_less4_prolog)
+
+#define RETURN	jmp	L(StrcpyStep)
+#define edi	ebx
+
+#define USE_AS_STRNLEN
+#define USE_AS_STRCAT
+#define USE_AS_STRLCAT
+
+#include "sse2-strlen-atom.S"
+
+	.p2align 4
+L(StrcpyStep):
+
+#undef edi
+#undef L
+#define L(label) .L##label
+#undef RETURN
+#define RETURN	POP (%ebx); ret; CFI_PUSH (%ebx);
+#define RETURN1	POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
+
+        movl	SRC(%esp), %ecx
+	movl	LEN(%esp), %ebx
+
+	cmp	%eax, %ebx
+	je	L(CalculateLengthOfSrcProlog)
+	sub	%eax, %ebx
+
+	test	%ebx, %ebx
+	jz	L(CalculateLengthOfSrcProlog)
+
+	mov	DST + 4(%esp), %edx
+
+	PUSH	(%edi)
+	add	%eax, %edx
+	mov	%ecx, %edi
+	sub	%eax, %edi
+
+	cmp	$8, %ebx
+	jbe	L(StrncpyExit8Bytes)
+
+	cmpb	$0, (%ecx)
+	jz	L(Exit1)
+	cmpb	$0, 1(%ecx)
+	jz	L(Exit2)
+	cmpb	$0, 2(%ecx)
+	jz	L(Exit3)
+	cmpb	$0, 3(%ecx)
+	jz	L(Exit4)
+	cmpb	$0, 4(%ecx)
+	jz	L(Exit5)
+	cmpb	$0, 5(%ecx)
+	jz	L(Exit6)
+	cmpb	$0, 6(%ecx)
+	jz	L(Exit7)
+	cmpb	$0, 7(%ecx)
+	jz	L(Exit8)
+	cmp	$16, %ebx
+	jb	L(StrncpyExit15Bytes)
+	cmpb	$0, 8(%ecx)
+	jz	L(Exit9)
+	cmpb	$0, 9(%ecx)
+	jz	L(Exit10)
+	cmpb	$0, 10(%ecx)
+	jz	L(Exit11)
+	cmpb	$0, 11(%ecx)
+	jz	L(Exit12)
+	cmpb	$0, 12(%ecx)
+	jz	L(Exit13)
+	cmpb	$0, 13(%ecx)
+	jz	L(Exit14)
+	cmpb	$0, 14(%ecx)
+	jz	L(Exit15)
+	cmpb	$0, 15(%ecx)
+	jz	L(Exit16)
+	cmp	$16, %ebx
+	je	L(StrlcpyExit16)
+
+#define USE_AS_STRNCPY
+#include "ssse3-strcpy-atom.S"
+
+	.p2align 4
+L(CopyFrom1To16Bytes):
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+	test	%al, %al
+	jz	L(ExitHigh8)
+
+L(CopyFrom1To16BytesLess8):
+	mov	%al, %ah
+	and	$15, %ah
+	jz	L(ExitHigh4)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+L(Exit4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+
+	lea	3(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHigh4):
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+L(Exit8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+
+	lea	7(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHigh8):
+	mov	%ah, %al
+	and	$15, %al
+	jz	L(ExitHigh12)
+
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+L(Exit12):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+
+	lea	11(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHigh12):
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+L(Exit16):
+	movlpd	(%ecx), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+
+	lea	15(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	CFI_PUSH(%esi)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2):
+	add	$16, %ebx
+	add	%esi, %ecx
+	add	%esi, %edx
+
+	POP	(%esi)
+
+	test	%al, %al
+	jz	L(ExitHighCase2)
+
+	cmp	$8, %ebx
+	ja	L(CopyFrom1To16BytesLess8)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	cmp	$1, %ebx
+	je	L(StrlcpyExit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	cmp	$2, %ebx
+	je	L(StrlcpyExit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	cmp	$3, %ebx
+	je	L(StrlcpyExit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	cmp	$4, %ebx
+	je	L(StrlcpyExit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	cmp	$5, %ebx
+	je	L(StrlcpyExit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	cmp	$6, %ebx
+	je	L(StrlcpyExit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	cmp	$7, %ebx
+	je	L(StrlcpyExit7)
+	test	$0x80, %al
+	jnz	L(Exit8)
+	jmp	L(StrlcpyExit8)
+
+	.p2align 4
+L(ExitHighCase2):
+	cmp	$8, %ebx
+	jbe	L(CopyFrom1To16BytesLess8Case3)
+
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	cmp	$9, %ebx
+	je	L(StrlcpyExit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	cmp	$10, %ebx
+	je	L(StrlcpyExit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	cmp	$11, %ebx
+	je	L(StrlcpyExit11)
+	test	$0x8, %ah
+	jnz	L(Exit12)
+	cmp	$12, %ebx
+	je	L(StrlcpyExit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	cmp	$13, %ebx
+	je	L(StrlcpyExit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	cmp	$14, %ebx
+	je	L(StrlcpyExit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	cmp	$15, %ebx
+	je	L(StrlcpyExit15)
+	test	$0x80, %ah
+	jnz	L(Exit16)
+	jmp	L(StrlcpyExit16)
+
+	CFI_PUSH(%esi)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2OrCase3):
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase3):
+	add	$16, %ebx
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+
+	cmp	$8, %ebx
+	ja	L(ExitHigh8Case3)
+
+L(CopyFrom1To16BytesLess8Case3):
+	cmp	$4, %ebx
+	ja	L(ExitHigh4Case3)
+
+	cmp	$1, %ebx
+	je	L(StrlcpyExit1)
+	cmp	$2, %ebx
+	je	L(StrlcpyExit2)
+	cmp	$3, %ebx
+	je	L(StrlcpyExit3)
+L(StrlcpyExit4):
+	movb	%bh, 3(%edx)
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+
+	lea	4(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(ExitHigh4Case3):
+	cmp	$5, %ebx
+	je	L(StrlcpyExit5)
+	cmp	$6, %ebx
+	je	L(StrlcpyExit6)
+	cmp	$7, %ebx
+	je	L(StrlcpyExit7)
+L(StrlcpyExit8):
+	movb	%bh, 7(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+
+	lea	8(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(ExitHigh8Case3):
+	cmp	$12, %ebx
+	ja	L(ExitHigh12Case3)
+
+	cmp	$9, %ebx
+	je	L(StrlcpyExit9)
+	cmp	$10, %ebx
+	je	L(StrlcpyExit10)
+	cmp	$11, %ebx
+	je	L(StrlcpyExit11)
+L(StrlcpyExit12):
+	movb	%bh, 11(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	7(%ecx), %eax
+	movl	%eax, 7(%edx)
+
+	lea	12(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(ExitHigh12Case3):
+	cmp	$13, %ebx
+	je	L(StrlcpyExit13)
+	cmp	$14, %ebx
+	je	L(StrlcpyExit14)
+	cmp	$15, %ebx
+	je	L(StrlcpyExit15)
+L(StrlcpyExit16):
+	movb	%bh, 15(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+
+	lea	16(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(StrlcpyExit1):
+	movb	%bh, (%edx)
+
+	lea	1(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit1):
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+
+	mov	%ecx, %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit2):
+	movb	%bh, 1(%edx)
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+
+	lea	2(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit2):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movl	%edi, %eax
+
+	lea	1(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit3):
+	movb	%bh, 2(%edx)
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+
+	lea	3(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit3):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+
+	lea	2(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit5):
+	movb	%bh, 4(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	%edi, %eax
+
+	lea	5(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit5):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+
+	lea	4(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit6):
+	movb	%bh, 5(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+
+	lea	6(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit6):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+
+	lea	5(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit7):
+	movb	%bh, 6(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+
+	lea	7(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit7):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+
+	lea	6(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit9):
+	movb	%bh, 8(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+
+	lea	9(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit9):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	8(%ecx), %al
+	movb	%al, 8(%edx)
+
+	lea	8(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit10):
+	movb	%bh, 9(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	8(%ecx), %al
+	movb	%al, 8(%edx)
+
+	lea	10(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit10):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movw	8(%ecx), %ax
+	movw	%ax, 8(%edx)
+
+	lea	9(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit11):
+	movb	%bh, 10(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movw	8(%ecx), %ax
+	movw	%ax, 8(%edx)
+
+	lea	11(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit11):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	7(%ecx), %eax
+	movl	%eax, 7(%edx)
+
+	lea	10(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit13):
+	movb	%bh, 12(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+
+	lea	13(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit13):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	5(%ecx), %xmm0
+	movlpd	%xmm0, 5(%edx)
+
+	lea	12(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit14):
+	movb	%bh, 13(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	5(%ecx), %xmm0
+	movlpd	%xmm0, 5(%edx)
+
+	lea	14(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit14):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	6(%ecx), %xmm0
+	movlpd	%xmm0, 6(%edx)
+
+	lea	13(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit15):
+	movb	%bh, 14(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	6(%ecx), %xmm0
+	movlpd	%xmm0, 6(%edx)
+
+	lea	15(%ecx), %edx
+	mov	%edi, %ecx
+	POP	(%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit15):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+
+	lea	14(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrncpyExit15Bytes):
+	cmp	$12, %ebx
+	ja	L(StrncpyExit15Bytes1)
+
+	cmpb	$0, 8(%ecx)
+	jz	L(Exit9)
+	cmp	$9, %ebx
+	je	L(StrlcpyExit9)
+
+	cmpb	$0, 9(%ecx)
+	jz	L(Exit10)
+	cmp	$10, %ebx
+	je	L(StrlcpyExit10)
+
+	cmpb	$0, 10(%ecx)
+	jz	L(Exit11)
+	cmp	$11, %ebx
+	je	L(StrlcpyExit11)
+
+	cmpb	$0, 11(%ecx)
+	jz	L(Exit12)
+	jmp	L(StrlcpyExit12)
+
+	.p2align 4
+L(StrncpyExit15Bytes1):
+	cmpb	$0, 8(%ecx)
+	jz	L(Exit9)
+	cmpb	$0, 9(%ecx)
+	jz	L(Exit10)
+	cmpb	$0, 10(%ecx)
+	jz	L(Exit11)
+	cmpb	$0, 11(%ecx)
+	jz	L(Exit12)
+
+	cmpb	$0, 12(%ecx)
+	jz	L(Exit13)
+	cmp	$13, %ebx
+	je	L(StrlcpyExit13)
+
+	cmpb	$0, 13(%ecx)
+	jz	L(Exit14)
+	cmp	$14, %ebx
+	je	L(StrlcpyExit14)
+
+	cmpb	$0, 14(%ecx)
+	jz	L(Exit15)
+	jmp	L(StrlcpyExit15)
+
+	.p2align 4
+L(StrncpyExit8Bytes):
+	cmp	$4, %ebx
+	ja	L(StrncpyExit8Bytes1)
+
+	cmpb	$0, (%ecx)
+	jz	L(Exit1)
+	cmp	$1, %ebx
+	je	L(StrlcpyExit1)
+
+	cmpb	$0, 1(%ecx)
+	jz	L(Exit2)
+	cmp	$2, %ebx
+	je	L(StrlcpyExit2)
+
+	cmpb	$0, 2(%ecx)
+	jz	L(Exit3)
+	cmp	$3, %ebx
+	je	L(StrlcpyExit3)
+
+	cmpb	$0, 3(%ecx)
+	jz	L(Exit4)
+	jmp	L(StrlcpyExit4)
+
+	.p2align 4
+L(StrncpyExit8Bytes1):
+	cmpb	$0, (%ecx)
+	jz	L(Exit1)
+	cmpb	$0, 1(%ecx)
+	jz	L(Exit2)
+	cmpb	$0, 2(%ecx)
+	jz	L(Exit3)
+	cmpb	$0, 3(%ecx)
+	jz	L(Exit4)
+
+	cmpb	$0, 4(%ecx)
+	jz	L(Exit5)
+	cmp	$5, %ebx
+	je	L(StrlcpyExit5)
+
+	cmpb	$0, 5(%ecx)
+	jz	L(Exit6)
+	cmp	$6, %ebx
+	je	L(StrlcpyExit6)
+
+	cmpb	$0, 6(%ecx)
+	jz	L(Exit7)
+	cmp	$7, %ebx
+	je	L(StrlcpyExit7)
+
+	cmpb	$0, 7(%ecx)
+	jz	L(Exit8)
+	jmp	L(StrlcpyExit8)
+
+	CFI_POP	(%edi)
+
+
+	.p2align 4
+L(Prolog_return_start_len):
+	movl	LEN(%esp), %ebx
+        movl	SRC(%esp), %ecx
+L(CalculateLengthOfSrcProlog):
+	mov	%ecx, %edx
+	sub	%ebx, %ecx
+
+	.p2align 4
+L(CalculateLengthOfSrc):
+	cmpb	$0, (%edx)
+	jz	L(exit_tail0)
+	cmpb	$0, 1(%edx)
+	jz	L(exit_tail1)
+	cmpb	$0, 2(%edx)
+	jz	L(exit_tail2)
+	cmpb	$0, 3(%edx)
+	jz	L(exit_tail3)
+
+	cmpb	$0, 4(%edx)
+	jz	L(exit_tail4)
+	cmpb	$0, 5(%edx)
+	jz	L(exit_tail5)
+	cmpb	$0, 6(%edx)
+	jz	L(exit_tail6)
+	cmpb	$0, 7(%edx)
+	jz	L(exit_tail7)
+
+	cmpb	$0, 8(%edx)
+	jz	L(exit_tail8)
+	cmpb	$0, 9(%edx)
+	jz	L(exit_tail9)
+	cmpb	$0, 10(%edx)
+	jz	L(exit_tail10)
+	cmpb	$0, 11(%edx)
+	jz	L(exit_tail11)
+
+	cmpb	$0, 12(%edx)
+	jz	L(exit_tail12)
+	cmpb	$0, 13(%edx)
+	jz	L(exit_tail13)
+	cmpb	$0, 14(%edx)
+	jz	L(exit_tail14)
+	cmpb	$0, 15(%edx)
+	jz	L(exit_tail15)
+
+	pxor	%xmm0, %xmm0
+	lea	16(%edx), %eax
+	add	$16, %ecx
+	and	$-16, %eax
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	pxor	%xmm2, %xmm2
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	pxor	%xmm3, %xmm3
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	and	$-0x40, %eax
+
+	.p2align 4
+L(aligned_64_loop):
+	movaps	(%eax), %xmm0
+	movaps	16(%eax), %xmm1
+	movaps	32(%eax), %xmm2
+	movaps	48(%eax), %xmm6
+	pminub	%xmm1, %xmm0
+	pminub	%xmm6, %xmm2
+	pminub	%xmm0, %xmm2
+	pcmpeqb	%xmm3, %xmm2
+	pmovmskb %xmm2, %edx
+	lea	64(%eax), %eax
+	test	%edx, %edx
+	jz	L(aligned_64_loop)
+
+	pcmpeqb	-64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	48(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	-32(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	%xmm6, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+
+	.p2align 4
+L(exit):
+	sub	%ecx, %eax
+	test	%dl, %dl
+	jz	L(exit_more_8)
+
+	mov	%dl, %cl
+	and	$15, %cl
+	jz	L(exit_more_4)
+	test	$0x01, %dl
+	jnz	L(exit_0)
+	test	$0x02, %dl
+	jnz	L(exit_1)
+	test	$0x04, %dl
+	jnz	L(exit_2)
+	add	$3, %eax
+	RETURN
+
+	.p2align 4
+L(exit_more_4):
+	test	$0x10, %dl
+	jnz	L(exit_4)
+	test	$0x20, %dl
+	jnz	L(exit_5)
+	test	$0x40, %dl
+	jnz	L(exit_6)
+	add	$7, %eax
+	RETURN
+
+	.p2align 4
+L(exit_more_8):
+	mov	%dh, %ch
+	and	$15, %ch
+	jz	L(exit_more_12)
+	test	$0x01, %dh
+	jnz	L(exit_8)
+	test	$0x02, %dh
+	jnz	L(exit_9)
+	test	$0x04, %dh
+	jnz	L(exit_10)
+	add	$11, %eax
+	RETURN
+
+	.p2align 4
+L(exit_more_12):
+	test	$0x10, %dh
+	jnz	L(exit_12)
+	test	$0x20, %dh
+	jnz	L(exit_13)
+	test	$0x40, %dh
+	jnz	L(exit_14)
+	add	$15, %eax
+L(exit_0):
+	RETURN
+
+	.p2align 4
+L(exit_1):
+	add	$1, %eax
+	RETURN
+
+L(exit_2):
+	add	$2, %eax
+	RETURN
+
+L(exit_3):
+	add	$3, %eax
+	RETURN
+
+L(exit_4):
+	add	$4, %eax
+	RETURN
+
+L(exit_5):
+	add	$5, %eax
+	RETURN
+
+L(exit_6):
+	add	$6, %eax
+	RETURN
+
+L(exit_7):
+	add	$7, %eax
+	RETURN
+
+L(exit_8):
+	add	$8, %eax
+	RETURN
+
+L(exit_9):
+	add	$9, %eax
+	RETURN
+
+L(exit_10):
+	add	$10, %eax
+	RETURN
+
+L(exit_11):
+	add	$11, %eax
+	RETURN
+
+L(exit_12):
+	add	$12, %eax
+	RETURN
+
+L(exit_13):
+	add	$13, %eax
+	RETURN
+
+L(exit_14):
+	add	$14, %eax
+	RETURN
+
+L(exit_15):
+	add	$15, %eax
+	RETURN
+
+L(exit_tail0):
+	mov	%edx, %eax
+	sub	%ecx, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail1):
+	lea	1(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail2):
+	lea	2(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail3):
+	lea	3(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail4):
+	lea	4(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail5):
+	lea	5(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail6):
+	lea	6(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail7):
+	lea	7(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail8):
+	lea	8(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail9):
+	lea	9(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail10):
+	lea	10(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail11):
+	lea	11(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail12):
+	lea	12(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail13):
+	lea	13(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail14):
+	lea	14(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail15):
+	lea	15(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+END (strlcat)
diff --git a/libc/arch-x86/string/ssse3-strlcpy-atom.S b/libc/arch-x86/string/ssse3-strlcpy-atom.S
new file mode 100644
index 0000000..cdb17cc
--- /dev/null
+++ b/libc/arch-x86/string/ssse3-strlcpy-atom.S
@@ -0,0 +1,1403 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define USE_AS_STRNCPY
+#define STRCPY strlcpy
+#define STRLEN strlcpy
+#define USE_AS_STRLCPY
+#include "ssse3-strcpy-atom.S"
+
+	.p2align 4
+L(CopyFrom1To16Bytes):
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+	test	%al, %al
+	jz	L(ExitHigh8)
+
+L(CopyFrom1To16BytesLess8):
+	mov	%al, %ah
+	and	$15, %ah
+	jz	L(ExitHigh4)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+L(Exit4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+
+	lea	3(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHigh4):
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+L(Exit8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+
+	lea	7(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHigh8):
+	mov	%ah, %al
+	and	$15, %al
+	jz	L(ExitHigh12)
+
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+L(Exit12):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+
+	lea	11(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(ExitHigh12):
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+L(Exit16):
+	movlpd	(%ecx), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+
+	lea	15(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	CFI_PUSH(%esi)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2):
+	add	$16, %ebx
+	add	%esi, %ecx
+        add     %esi, %edx
+
+	POP	(%esi)
+
+        test    %al, %al
+        jz      L(ExitHighCase2)
+
+        cmp     $8, %ebx
+        ja      L(CopyFrom1To16BytesLess8)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	cmp	$1, %ebx
+	je	L(StrlcpyExit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	cmp	$2, %ebx
+	je	L(StrlcpyExit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	cmp	$3, %ebx
+	je	L(StrlcpyExit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	cmp	$4, %ebx
+	je	L(StrlcpyExit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	cmp	$5, %ebx
+	je	L(StrlcpyExit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	cmp	$6, %ebx
+	je	L(StrlcpyExit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	cmp	$7, %ebx
+	je	L(StrlcpyExit7)
+	test	$0x80, %al
+	jnz	L(Exit8)
+	jmp	L(StrlcpyExit8)
+
+	.p2align 4
+L(ExitHighCase2):
+        cmp     $8, %ebx
+        jbe      L(CopyFrom1To16BytesLess8Case3)
+
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	cmp	$9, %ebx
+	je	L(StrlcpyExit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	cmp	$10, %ebx
+	je	L(StrlcpyExit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	cmp	$11, %ebx
+	je	L(StrlcpyExit11)
+	test	$0x8, %ah
+	jnz	L(Exit12)
+	cmp	$12, %ebx
+	je	L(StrlcpyExit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	cmp	$13, %ebx
+	je	L(StrlcpyExit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	cmp	$14, %ebx
+	je	L(StrlcpyExit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	cmp	$15, %ebx
+	je	L(StrlcpyExit15)
+	test	$0x80, %ah
+	jnz	L(Exit16)
+	jmp	L(StrlcpyExit16)
+
+	CFI_PUSH(%esi)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase2OrCase3):
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16BytesCase2)
+
+	.p2align 4
+L(CopyFrom1To16BytesCase3):
+	add	$16, %ebx
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+
+	cmp	$8, %ebx
+	ja	L(ExitHigh8Case3)
+
+L(CopyFrom1To16BytesLess8Case3):
+	cmp	$4, %ebx
+	ja	L(ExitHigh4Case3)
+
+	cmp	$1, %ebx
+	je	L(StrlcpyExit1)
+	cmp	$2, %ebx
+	je	L(StrlcpyExit2)
+	cmp	$3, %ebx
+	je	L(StrlcpyExit3)
+L(StrlcpyExit4):
+	movb	%bh, 3(%edx)
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+
+	lea	4(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(ExitHigh4Case3):
+	cmp	$5, %ebx
+	je	L(StrlcpyExit5)
+	cmp	$6, %ebx
+	je	L(StrlcpyExit6)
+	cmp	$7, %ebx
+	je	L(StrlcpyExit7)
+L(StrlcpyExit8):
+	movb	%bh, 7(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+
+	lea	8(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(ExitHigh8Case3):
+	cmp	$12, %ebx
+	ja	L(ExitHigh12Case3)
+
+	cmp	$9, %ebx
+	je	L(StrlcpyExit9)
+	cmp	$10, %ebx
+	je	L(StrlcpyExit10)
+	cmp	$11, %ebx
+	je	L(StrlcpyExit11)
+L(StrlcpyExit12):
+	movb	%bh, 11(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	7(%ecx), %eax
+	movl	%eax, 7(%edx)
+
+	lea	12(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(ExitHigh12Case3):
+	cmp	$13, %ebx
+	je	L(StrlcpyExit13)
+	cmp	$14, %ebx
+	je	L(StrlcpyExit14)
+	cmp	$15, %ebx
+	je	L(StrlcpyExit15)
+L(StrlcpyExit16):
+	movb	%bh, 15(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+
+	lea	16(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(StrlcpyExit1):
+	movb	%bh, (%edx)
+
+	lea	1(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit1):
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+
+	mov	%ecx, %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit2):
+	movb	%bh, 1(%edx)
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+
+	lea	2(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit2):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movl	%edi, %eax
+
+	lea	1(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit3):
+	movb	%bh, 2(%edx)
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+
+	lea	3(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit3):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+
+	lea	2(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit5):
+	movb	%bh, 4(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	%edi, %eax
+
+	lea	5(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit5):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+
+	lea	4(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit6):
+	movb	%bh, 5(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+
+	lea	6(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit6):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+
+	lea	5(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit7):
+	movb	%bh, 6(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+
+	lea	7(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit7):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+
+	lea	6(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit9):
+	movb	%bh, 8(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+
+	lea	9(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit9):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	8(%ecx), %al
+	movb	%al, 8(%edx)
+
+	lea	8(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit10):
+	movb	%bh, 9(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	8(%ecx), %al
+	movb	%al, 8(%edx)
+
+	lea	10(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit10):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movw	8(%ecx), %ax
+	movw	%ax, 8(%edx)
+
+	lea	9(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit11):
+	movb	%bh, 10(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movw	8(%ecx), %ax
+	movw	%ax, 8(%edx)
+
+	lea	11(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit11):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	7(%ecx), %eax
+	movl	%eax, 7(%edx)
+
+	lea	10(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit13):
+	movb	%bh, 12(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+
+	lea	13(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit13):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	5(%ecx), %xmm0
+	movlpd	%xmm0, 5(%edx)
+
+	lea	12(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit14):
+	movb	%bh, 13(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	5(%ecx), %xmm0
+	movlpd	%xmm0, 5(%edx)
+
+	lea	14(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit14):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	6(%ecx), %xmm0
+	movlpd	%xmm0, 6(%edx)
+
+	lea	13(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+	.p2align 4
+L(StrlcpyExit15):
+	movb	%bh, 14(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	6(%ecx), %xmm0
+	movlpd	%xmm0, 6(%edx)
+
+	lea	15(%ecx), %edx
+	mov	%edi, %ecx
+        POP     (%edi)
+	jmp	L(CalculateLengthOfSrc)
+        CFI_PUSH     (%edi)
+
+	.p2align 4
+L(Exit15):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+
+	lea	14(%ecx), %eax
+	sub	%edi, %eax
+	RETURN1
+
+        CFI_POP (%edi)
+
+	.p2align 4
+L(StrlcpyExit0):
+	movl	$0, %eax
+	RETURN
+
+	.p2align 4
+L(StrncpyExit15Bytes):
+	cmp	$12, %ebx
+	ja	L(StrncpyExit15Bytes1)
+
+	cmpb	$0, 8(%ecx)
+	jz	L(ExitTail9)
+	cmp	$9, %ebx
+	je	L(StrlcpyExitTail9)
+
+	cmpb	$0, 9(%ecx)
+	jz	L(ExitTail10)
+	cmp	$10, %ebx
+	je	L(StrlcpyExitTail10)
+
+	cmpb	$0, 10(%ecx)
+	jz	L(ExitTail11)
+	cmp	$11, %ebx
+	je	L(StrlcpyExitTail11)
+
+	cmpb	$0, 11(%ecx)
+	jz	L(ExitTail12)
+
+	movb	%bh, 11(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	7(%ecx), %eax
+	movl	%eax, 7(%edx)
+
+	lea	12(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(StrncpyExit15Bytes1):
+	cmpb	$0, 8(%ecx)
+	jz	L(ExitTail9)
+	cmpb	$0, 9(%ecx)
+	jz	L(ExitTail10)
+	cmpb	$0, 10(%ecx)
+	jz	L(ExitTail11)
+	cmpb	$0, 11(%ecx)
+	jz	L(ExitTail12)
+
+	cmpb	$0, 12(%ecx)
+	jz	L(ExitTail13)
+	cmp	$13, %ebx
+	je	L(StrlcpyExitTail13)
+
+	cmpb	$0, 13(%ecx)
+	jz	L(ExitTail14)
+	cmp	$14, %ebx
+	je	L(StrlcpyExitTail14)
+
+	cmpb	$0, 14(%ecx)
+	jz	L(ExitTail15)
+
+	movb	%bh, 14(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	6(%ecx), %xmm0
+	movlpd	%xmm0, 6(%edx)
+
+	lea	15(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(StrncpyExit8Bytes):
+	cmp	$4, %ebx
+	ja	L(StrncpyExit8Bytes1)
+
+	test	%ebx, %ebx
+	jz	L(StrlcpyExitTail0)
+
+	cmpb	$0, (%ecx)
+	jz	L(ExitTail1)
+	cmp	$1, %ebx
+	je	L(StrlcpyExitTail1)
+
+	cmpb	$0, 1(%ecx)
+	jz	L(ExitTail2)
+	cmp	$2, %ebx
+	je	L(StrlcpyExitTail2)
+
+	cmpb	$0, 2(%ecx)
+	jz	L(ExitTail3)
+	cmp	$3, %ebx
+	je	L(StrlcpyExitTail3)
+
+	cmpb	$0, 3(%ecx)
+	jz	L(ExitTail4)
+
+	movb	%bh, 3(%edx)
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+
+	lea	4(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(StrncpyExit8Bytes1):
+	cmpb	$0, (%ecx)
+	jz	L(ExitTail1)
+	cmpb	$0, 1(%ecx)
+	jz	L(ExitTail2)
+	cmpb	$0, 2(%ecx)
+	jz	L(ExitTail3)
+	cmpb	$0, 3(%ecx)
+	jz	L(ExitTail4)
+
+	cmpb	$0, 4(%ecx)
+	jz	L(ExitTail5)
+	cmp	$5, %ebx
+	je	L(StrlcpyExitTail5)
+
+	cmpb	$0, 5(%ecx)
+	jz	L(ExitTail6)
+	cmp	$6, %ebx
+	je	L(StrlcpyExitTail6)
+
+	cmpb	$0, 6(%ecx)
+	jz	L(ExitTail7)
+	cmp	$7, %ebx
+	je	L(StrlcpyExitTail7)
+
+	cmpb	$0, 7(%ecx)
+	jz	L(ExitTail8)
+
+	movb	%bh, 7(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+
+	lea	8(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(StrlcpyExitTail0):
+	mov	%ecx, %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(StrlcpyExitTail1):
+	movb	%bh, (%edx)
+
+	lea	1(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail1):
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+
+	mov	$0, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail2):
+	movb	%bh, 1(%edx)
+	movb	(%ecx), %al
+	movb	%al, (%edx)
+
+	lea	2(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail2):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movl	%edx, %eax
+
+	mov	$1, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail3):
+	movb	%bh, 2(%edx)
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+
+	lea	3(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail3):
+	movw	(%ecx), %ax
+	movw	%ax, (%edx)
+	movb	2(%ecx), %al
+	movb	%al, 2(%edx)
+
+	mov	$2, %eax
+	RETURN
+
+	.p2align 4
+L(ExitTail4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+
+	mov	$3, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail5):
+	movb	%bh, 4(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	%edx, %eax
+
+	lea	5(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail5):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+
+	mov	$4, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail6):
+	movb	%bh, 5(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movb	4(%ecx), %al
+	movb	%al, 4(%edx)
+
+	lea	6(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail6):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+
+	mov	$5, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail7):
+	movb	%bh, 6(%edx)
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movw	4(%ecx), %ax
+	movw	%ax, 4(%edx)
+
+	lea	7(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail7):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	3(%ecx), %eax
+	movl	%eax, 3(%edx)
+
+	mov	$6, %eax
+	RETURN
+
+	.p2align 4
+L(ExitTail8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+
+	mov	$7, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail9):
+	movb	%bh, 8(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+
+	lea	9(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail9):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	8(%ecx), %al
+	movb	%al, 8(%edx)
+
+	mov	$8, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail10):
+	movb	%bh, 9(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movb	8(%ecx), %al
+	movb	%al, 8(%edx)
+
+	lea	10(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail10):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movw	8(%ecx), %ax
+	movw	%ax, 8(%edx)
+
+	mov	$9, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail11):
+	movb	%bh, 10(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movw	8(%ecx), %ax
+	movw	%ax, 8(%edx)
+
+	lea	11(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail11):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	7(%ecx), %eax
+	movl	%eax, 7(%edx)
+
+	mov	$10, %eax
+	RETURN
+
+	.p2align 4
+L(ExitTail12):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+
+	mov	$11, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail13):
+	movb	%bh, 12(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+
+	lea	13(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail13):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	5(%ecx), %xmm0
+	movlpd	%xmm0, 5(%edx)
+
+	mov	$12, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail14):
+	movb	%bh, 13(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	5(%ecx), %xmm0
+	movlpd	%xmm0, 5(%edx)
+
+	lea	14(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail14):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	6(%ecx), %xmm0
+	movlpd	%xmm0, 6(%edx)
+
+	mov	$13, %eax
+	RETURN
+
+	.p2align 4
+L(ExitTail15):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+
+	mov	$14, %eax
+	RETURN
+
+	.p2align 4
+L(StrlcpyExitTail16):
+	movb	%bh, 15(%edx)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movlpd	7(%ecx), %xmm0
+	movlpd	%xmm0, 7(%edx)
+
+	lea	16(%ecx), %edx
+	jmp	L(CalculateLengthOfSrc)
+
+	.p2align 4
+L(ExitTail16):
+	movlpd	(%ecx), %xmm0
+	movlpd	8(%ecx), %xmm1
+	movlpd	%xmm0, (%edx)
+	movlpd	%xmm1, 8(%edx)
+
+	mov	$15, %eax
+	RETURN
+
+	.p2align 4
+L(CalculateLengthOfSrc):
+	xor	%eax, %eax
+	cmpb	$0, (%edx)
+	jz	L(exit_tail0)
+	cmpb	$0, 1(%edx)
+	jz	L(exit_tail1)
+	cmpb	$0, 2(%edx)
+	jz	L(exit_tail2)
+	cmpb	$0, 3(%edx)
+	jz	L(exit_tail3)
+
+	cmpb	$0, 4(%edx)
+	jz	L(exit_tail4)
+	cmpb	$0, 5(%edx)
+	jz	L(exit_tail5)
+	cmpb	$0, 6(%edx)
+	jz	L(exit_tail6)
+	cmpb	$0, 7(%edx)
+	jz	L(exit_tail7)
+
+	cmpb	$0, 8(%edx)
+	jz	L(exit_tail8)
+	cmpb	$0, 9(%edx)
+	jz	L(exit_tail9)
+	cmpb	$0, 10(%edx)
+	jz	L(exit_tail10)
+	cmpb	$0, 11(%edx)
+	jz	L(exit_tail11)
+
+	cmpb	$0, 12(%edx)
+	jz	L(exit_tail12)
+	cmpb	$0, 13(%edx)
+	jz	L(exit_tail13)
+	cmpb	$0, 14(%edx)
+	jz	L(exit_tail14)
+	cmpb	$0, 15(%edx)
+	jz	L(exit_tail15)
+
+	pxor	%xmm0, %xmm0
+	lea	16(%edx), %eax
+	add	$16, %ecx
+	and	$-16, %eax
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	pxor	%xmm1, %xmm1
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	pxor	%xmm2, %xmm2
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	pxor	%xmm3, %xmm3
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm0
+	pmovmskb %xmm0, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm1
+	pmovmskb %xmm1, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm2
+	pmovmskb %xmm2, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	16(%eax), %eax
+	test	%edx, %edx
+	jnz	L(exit)
+
+	and	$-0x40, %eax
+
+	.p2align 4
+L(aligned_64_loop):
+	movaps	(%eax), %xmm0
+	movaps	16(%eax), %xmm1
+	movaps	32(%eax), %xmm2
+	movaps	48(%eax), %xmm6
+	pminub	%xmm1, %xmm0
+	pminub	%xmm6, %xmm2
+	pminub	%xmm0, %xmm2
+	pcmpeqb	%xmm3, %xmm2
+	pmovmskb %xmm2, %edx
+	lea	64(%eax), %eax
+	test	%edx, %edx
+	jz	L(aligned_64_loop)
+
+	pcmpeqb	-64(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	48(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	%xmm1, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	-32(%eax), %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+	test	%edx, %edx
+	jnz	L(exit)
+
+	pcmpeqb	%xmm6, %xmm3
+	pmovmskb %xmm3, %edx
+	lea	-16(%ecx), %ecx
+
+	.p2align 4
+L(exit):
+	sub	%ecx, %eax
+	test	%dl, %dl
+	jz	L(exit_more_8)
+
+	mov	%dl, %cl
+	and	$15, %cl
+	jz	L(exit_more_4)
+	test	$0x01, %dl
+	jnz	L(exit_0)
+	test	$0x02, %dl
+	jnz	L(exit_1)
+	test	$0x04, %dl
+	jnz	L(exit_2)
+	add	$3, %eax
+	RETURN
+
+	.p2align 4
+L(exit_more_4):
+	test	$0x10, %dl
+	jnz	L(exit_4)
+	test	$0x20, %dl
+	jnz	L(exit_5)
+	test	$0x40, %dl
+	jnz	L(exit_6)
+	add	$7, %eax
+	RETURN
+
+	.p2align 4
+L(exit_more_8):
+	mov	%dh, %ch
+	and	$15, %ch
+	jz	L(exit_more_12)
+	test	$0x01, %dh
+	jnz	L(exit_8)
+	test	$0x02, %dh
+	jnz	L(exit_9)
+	test	$0x04, %dh
+	jnz	L(exit_10)
+	add	$11, %eax
+	RETURN
+
+	.p2align 4
+L(exit_more_12):
+	test	$0x10, %dh
+	jnz	L(exit_12)
+	test	$0x20, %dh
+	jnz	L(exit_13)
+	test	$0x40, %dh
+	jnz	L(exit_14)
+	add	$15, %eax
+L(exit_0):
+	RETURN
+
+	.p2align 4
+L(exit_1):
+	add	$1, %eax
+	RETURN
+
+L(exit_2):
+	add	$2, %eax
+	RETURN
+
+L(exit_3):
+	add	$3, %eax
+	RETURN
+
+L(exit_4):
+	add	$4, %eax
+	RETURN
+
+L(exit_5):
+	add	$5, %eax
+	RETURN
+
+L(exit_6):
+	add	$6, %eax
+	RETURN
+
+L(exit_7):
+	add	$7, %eax
+	RETURN
+
+L(exit_8):
+	add	$8, %eax
+	RETURN
+
+L(exit_9):
+	add	$9, %eax
+	RETURN
+
+L(exit_10):
+	add	$10, %eax
+	RETURN
+
+L(exit_11):
+	add	$11, %eax
+	RETURN
+
+L(exit_12):
+	add	$12, %eax
+	RETURN
+
+L(exit_13):
+	add	$13, %eax
+	RETURN
+
+L(exit_14):
+	add	$14, %eax
+	RETURN
+
+L(exit_15):
+	add	$15, %eax
+	RETURN
+
+L(exit_tail0):
+	mov	%edx, %eax
+	sub	%ecx, %eax
+	RETURN
+
+	.p2align 4
+L(exit_tail1):
+	lea	1(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail2):
+	lea	2(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail3):
+	lea	3(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail4):
+	lea	4(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail5):
+	lea	5(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail6):
+	lea	6(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail7):
+	lea	7(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail8):
+	lea	8(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail9):
+	lea	9(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail10):
+	lea	10(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail11):
+	lea	11(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail12):
+	lea	12(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail13):
+	lea	13(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail14):
+	lea	14(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+L(exit_tail15):
+	lea	15(%edx), %eax
+	sub	%ecx, %eax
+	RETURN
+
+END (STRCPY)
+
diff --git a/libc/arch-x86/string/memcmp_wrapper.S b/libc/arch-x86/string/ssse3-strncat-atom.S
similarity index 90%
rename from libc/arch-x86/string/memcmp_wrapper.S
rename to libc/arch-x86/string/ssse3-strncat-atom.S
index fa0c672..5618771 100644
--- a/libc/arch-x86/string/memcmp_wrapper.S
+++ b/libc/arch-x86/string/ssse3-strncat-atom.S
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2010, Intel Corporation
+Copyright (c) 2011, Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,13 +28,7 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSSE3)
+#define STRCAT  strncat
+#define USE_AS_STRNCAT
 
-# define MEMCMP memcmp
-# include "ssse3-memcmp3-new.S"
-
-#else
-
-# include "memcmp.S"
-
-#endif
+#include "ssse3-strcat-atom.S"
diff --git a/libc/arch-x86/string/strlen_wrapper.S b/libc/arch-x86/string/ssse3-strncmp-atom.S
similarity index 92%
copy from libc/arch-x86/string/strlen_wrapper.S
copy to libc/arch-x86/string/ssse3-strncmp-atom.S
index e62786b..4762d7e 100644
--- a/libc/arch-x86/string/strlen_wrapper.S
+++ b/libc/arch-x86/string/ssse3-strncmp-atom.S
@@ -28,13 +28,8 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSE2)
 
-# define sse2_strlen_atom strlen
-# include "sse2-strlen-atom.S"
+#define USE_AS_STRNCMP
+#define STRCMP  strncmp
+#include "ssse3-strcmp-atom.S"
 
-#else
-
-# include "strlen.S"
-
-#endif
diff --git a/libc/arch-x86/string/memcmp_wrapper.S b/libc/arch-x86/string/ssse3-strncpy-atom.S
similarity index 90%
copy from libc/arch-x86/string/memcmp_wrapper.S
copy to libc/arch-x86/string/ssse3-strncpy-atom.S
index fa0c672..0948b6d 100644
--- a/libc/arch-x86/string/memcmp_wrapper.S
+++ b/libc/arch-x86/string/ssse3-strncpy-atom.S
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2010, Intel Corporation
+Copyright (c) 2011, Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,13 +28,6 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSSE3)
-
-# define MEMCMP memcmp
-# include "ssse3-memcmp3-new.S"
-
-#else
-
-# include "memcmp.S"
-
-#endif
+#define USE_AS_STRNCPY
+#define STRCPY strncpy
+#include "ssse3-strcpy-atom.S"
diff --git a/libc/arch-x86/string/ssse3-wcscat-atom.S b/libc/arch-x86/string/ssse3-wcscat-atom.S
new file mode 100644
index 0000000..17b0843
--- /dev/null
+++ b/libc/arch-x86/string/ssse3-wcscat-atom.S
@@ -0,0 +1,114 @@
+/*
+Copyright (c) 2011 Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc                  .cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc                    .cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)	.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)	\
+	.type name,  @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)	\
+	cfi_endproc;	\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
+#define POP(REG)	popl REG;	CFI_POP (REG)
+
+#define PARMS  4
+#define STR1  PARMS+4
+#define STR2  STR1+4
+
+#define USE_AS_WCSCAT
+
+.text
+ENTRY (wcscat)
+	PUSH    (%edi)
+	mov	STR1(%esp), %edi
+	mov	%edi, %edx
+
+#define RETURN  jmp L(WcscpyAtom)
+#include "sse2-wcslen-atom.S"
+
+L(WcscpyAtom):
+	shl	$2, %eax
+	mov	STR2(%esp), %ecx
+	lea	(%edi, %eax), %edx
+
+	cmp	$0, (%ecx)
+	jz	L(Exit4)
+	cmp	$0, 4(%ecx)
+	jz	L(Exit8)
+	cmp	$0, 8(%ecx)
+	jz	L(Exit12)
+	cmp	$0, 12(%ecx)
+	jz	L(Exit16)
+
+#undef RETURN
+#define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi)
+#include "ssse3-wcscpy-atom.S"
+
+END (wcscat)
diff --git a/libc/arch-x86/string/ssse3-wcscpy-atom.S b/libc/arch-x86/string/ssse3-wcscpy-atom.S
new file mode 100644
index 0000000..8ba84bc
--- /dev/null
+++ b/libc/arch-x86/string/ssse3-wcscpy-atom.S
@@ -0,0 +1,652 @@
+/*
+Copyright (c) 2011, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef USE_AS_WCSCAT
+
+# ifndef L
+#  define L(label)	.L##label
+# endif
+
+# ifndef cfi_startproc
+#  define cfi_startproc	.cfi_startproc
+# endif
+
+# ifndef cfi_endproc
+#  define cfi_endproc	.cfi_endproc
+# endif
+
+# ifndef cfi_rel_offset
+#  define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+# endif
+
+# ifndef cfi_restore
+#  define cfi_restore(reg)	.cfi_restore reg
+# endif
+
+# ifndef cfi_adjust_cfa_offset
+#  define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+# endif
+
+# ifndef ENTRY
+#  define ENTRY(name)	\
+	.type name, @function;	\
+	.globl name;	\
+	.p2align 4;	\
+name:	\
+	cfi_startproc
+# endif
+
+# ifndef END
+#  define END(name)	\
+	cfi_endproc;	\
+	.size name, .-name
+# endif
+
+# define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+# define POP(REG)	popl REG; CFI_POP (REG)
+
+# define PARMS	4
+# define RETURN	POP (%edi); ret; CFI_PUSH (%edi)
+
+# define STR1	PARMS
+# define STR2	STR1+4
+# define LEN	STR2+4
+
+.text
+ENTRY (wcscpy)
+	mov	STR1(%esp), %edx
+	mov	STR2(%esp), %ecx
+
+	cmp	$0, (%ecx)
+	jz	L(ExitTail4)
+	cmp	$0, 4(%ecx)
+	jz	L(ExitTail8)
+	cmp	$0, 8(%ecx)
+	jz	L(ExitTail12)
+	cmp	$0, 12(%ecx)
+	jz	L(ExitTail16)
+
+	PUSH	(%edi)
+	mov	%edx, %edi
+#endif
+	PUSH	(%esi)
+	lea	16(%ecx), %esi
+
+	and	$-16, %esi
+
+	pxor	%xmm0, %xmm0
+	pcmpeqd	(%esi), %xmm0
+	movdqu	(%ecx), %xmm1
+	movdqu	%xmm1, (%edx)
+
+	pmovmskb %xmm0, %eax
+	sub	%ecx, %esi
+
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	mov	%edx, %eax
+	lea	16(%edx), %edx
+	and	$-16, %edx
+	sub	%edx, %eax
+
+	sub	%eax, %ecx
+	mov	%ecx, %eax
+	and	$0xf, %eax
+	mov	$0, %esi
+
+	jz	L(Align16Both)
+	cmp	$4, %eax
+	je	L(Shl4)
+	cmp	$8, %eax
+	je	L(Shl8)
+	jmp	L(Shl12)
+
+L(Align16Both):
+	movaps	(%ecx), %xmm1
+	movaps	16(%ecx), %xmm2
+	movaps	%xmm1, (%edx)
+	pcmpeqd	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm3
+	movaps	%xmm2, (%edx, %esi)
+	pcmpeqd	%xmm3, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm4
+	movaps	%xmm3, (%edx, %esi)
+	pcmpeqd	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm1
+	movaps	%xmm4, (%edx, %esi)
+	pcmpeqd	%xmm1, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm2
+	movaps	%xmm1, (%edx, %esi)
+	pcmpeqd	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	16(%ecx, %esi), %xmm3
+	movaps	%xmm2, (%edx, %esi)
+	pcmpeqd	%xmm3, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	%xmm3, (%edx, %esi)
+	mov	%ecx, %eax
+	lea	16(%ecx, %esi), %ecx
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	sub	%eax, %edx
+
+	mov	$-0x40, %esi
+
+L(Aligned64Loop):
+	movaps	(%ecx), %xmm2
+	movaps	32(%ecx), %xmm3
+	movaps	%xmm2, %xmm4
+	movaps	16(%ecx), %xmm5
+	movaps	%xmm3, %xmm6
+	movaps	48(%ecx), %xmm7
+	pminub	%xmm5, %xmm2
+	pminub	%xmm7, %xmm3
+	pminub	%xmm2, %xmm3
+	lea	64(%edx), %edx
+	pcmpeqd	%xmm0, %xmm3
+	lea	64(%ecx), %ecx
+	pmovmskb %xmm3, %eax
+
+	test	%eax, %eax
+	jnz	L(Aligned64Leave)
+	movaps	%xmm4, -64(%edx)
+	movaps	%xmm5, -48(%edx)
+	movaps	%xmm6, -32(%edx)
+	movaps	%xmm7, -16(%edx)
+	jmp	L(Aligned64Loop)
+
+L(Aligned64Leave):
+	pcmpeqd	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqd	%xmm5, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm4, -64(%edx)
+	lea	16(%esi), %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	pcmpeqd	%xmm6, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm5, -48(%edx)
+	lea	16(%esi), %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	movaps	%xmm6, -32(%edx)
+	pcmpeqd	%xmm7, %xmm0
+	pmovmskb %xmm0, %eax
+	lea	16(%esi), %esi
+	test	%eax, %eax
+	jnz	L(CopyFrom1To16Bytes)
+
+	mov	$-0x40, %esi
+	movaps	%xmm7, -16(%edx)
+	jmp	L(Aligned64Loop)
+
+	.p2align 4
+L(Shl4):
+	movaps	-4(%ecx), %xmm1
+	movaps	12(%ecx), %xmm2
+L(Shl4Start):
+	pcmpeqd	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	28(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm1
+
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	28(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	28(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+
+	test	%eax, %eax
+	jnz	L(Shl4LoopExit)
+
+	palignr	$4, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	28(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-12(%ecx), %ecx
+	sub	%eax, %edx
+
+	movaps	-4(%ecx), %xmm1
+
+L(Shl4LoopStart):
+	movaps	12(%ecx), %xmm2
+	movaps	28(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	44(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	60(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqd	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$4, %xmm4, %xmm5
+	palignr	$4, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl4Start)
+
+	palignr	$4, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$4, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl4LoopStart)
+
+L(Shl4LoopExit):
+	movlpd	(%ecx), %xmm0
+	movl	8(%ecx), %esi
+	movlpd	%xmm0, (%edx)
+	movl	%esi, 8(%edx)
+	POP	(%esi)
+	add	$12, %edx
+	add	$12, %ecx
+	test	%al, %al
+	jz	L(ExitHigh)
+	test	$0x01, %al
+	jnz	L(Exit4)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	%edi, %eax
+	RETURN
+
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(Shl8):
+	movaps	-8(%ecx), %xmm1
+	movaps	8(%ecx), %xmm2
+L(Shl8Start):
+	pcmpeqd	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	24(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm1
+
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	24(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	24(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+
+	test	%eax, %eax
+	jnz	L(Shl8LoopExit)
+
+	palignr	$8, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	24(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-8(%ecx), %ecx
+	sub	%eax, %edx
+
+	movaps	-8(%ecx), %xmm1
+
+L(Shl8LoopStart):
+	movaps	8(%ecx), %xmm2
+	movaps	24(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	40(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	56(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqd	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$8, %xmm4, %xmm5
+	palignr	$8, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl8Start)
+
+	palignr	$8, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$8, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl8LoopStart)
+
+L(Shl8LoopExit):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	POP	(%esi)
+	add	$8, %edx
+	add	$8, %ecx
+	test	%al, %al
+	jz	L(ExitHigh)
+	test	$0x01, %al
+	jnz	L(Exit4)
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	%edi, %eax
+	RETURN
+
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(Shl12):
+	movaps	-12(%ecx), %xmm1
+	movaps	4(%ecx), %xmm2
+L(Shl12Start):
+	pcmpeqd	%xmm2, %xmm0
+	pmovmskb %xmm0, %eax
+	movaps	%xmm2, %xmm3
+
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	20(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm1
+
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	20(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+	movaps	%xmm2, %xmm3
+
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm2, (%edx)
+	movaps	20(%ecx), %xmm2
+
+	pcmpeqd	%xmm2, %xmm0
+	lea	16(%edx), %edx
+	pmovmskb %xmm0, %eax
+	lea	16(%ecx), %ecx
+
+	test	%eax, %eax
+	jnz	L(Shl12LoopExit)
+
+	palignr	$12, %xmm3, %xmm2
+	movaps	%xmm2, (%edx)
+	lea	20(%ecx), %ecx
+	lea	16(%edx), %edx
+
+	mov	%ecx, %eax
+	and	$-0x40, %ecx
+	sub	%ecx, %eax
+	lea	-4(%ecx), %ecx
+	sub	%eax, %edx
+
+	movaps	-12(%ecx), %xmm1
+
+L(Shl12LoopStart):
+	movaps	4(%ecx), %xmm2
+	movaps	20(%ecx), %xmm3
+	movaps	%xmm3, %xmm6
+	movaps	36(%ecx), %xmm4
+	movaps	%xmm4, %xmm7
+	movaps	52(%ecx), %xmm5
+	pminub	%xmm2, %xmm6
+	pminub	%xmm5, %xmm7
+	pminub	%xmm6, %xmm7
+	pcmpeqd	%xmm0, %xmm7
+	pmovmskb %xmm7, %eax
+	movaps	%xmm5, %xmm7
+	palignr	$12, %xmm4, %xmm5
+	palignr	$12, %xmm3, %xmm4
+	test	%eax, %eax
+	jnz	L(Shl12Start)
+
+	palignr	$12, %xmm2, %xmm3
+	lea	64(%ecx), %ecx
+	palignr	$12, %xmm1, %xmm2
+	movaps	%xmm7, %xmm1
+	movaps	%xmm5, 48(%edx)
+	movaps	%xmm4, 32(%edx)
+	movaps	%xmm3, 16(%edx)
+	movaps	%xmm2, (%edx)
+	lea	64(%edx), %edx
+	jmp	L(Shl12LoopStart)
+
+L(Shl12LoopExit):
+	movl	(%ecx), %esi
+	movl	%esi, (%edx)
+	mov	$4, %esi
+
+	.p2align 4
+L(CopyFrom1To16Bytes):
+	add	%esi, %edx
+	add	%esi, %ecx
+
+	POP	(%esi)
+	test	%al, %al
+	jz	L(ExitHigh)
+	test	$0x01, %al
+	jnz	L(Exit4)
+L(Exit8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	%edi, %eax
+	RETURN
+
+	.p2align 4
+L(ExitHigh):
+	test	$0x01, %ah
+	jnz	L(Exit12)
+L(Exit16):
+	movdqu	(%ecx), %xmm0
+	movdqu	%xmm0, (%edx)
+	movl	%edi, %eax
+	RETURN
+
+	.p2align 4
+L(Exit4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	%edi, %eax
+	RETURN
+
+	.p2align 4
+L(Exit12):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+	movl	%edi, %eax
+	RETURN
+
+CFI_POP	(%edi)
+
+	.p2align 4
+L(ExitTail4):
+	movl	(%ecx), %eax
+	movl	%eax, (%edx)
+	movl	%edx, %eax
+	ret
+
+	.p2align 4
+L(ExitTail8):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	%edx, %eax
+	ret
+
+	.p2align 4
+L(ExitTail12):
+	movlpd	(%ecx), %xmm0
+	movlpd	%xmm0, (%edx)
+	movl	8(%ecx), %eax
+	movl	%eax, 8(%edx)
+	movl	%edx, %eax
+	ret
+
+	.p2align 4
+L(ExitTail16):
+	movdqu	(%ecx), %xmm0
+	movdqu	%xmm0, (%edx)
+	movl	%edx, %eax
+	ret
+
+#ifndef USE_AS_WCSCAT
+END (wcscpy)
+#endif
diff --git a/libc/arch-x86/string/memcmp_wrapper.S b/libc/arch-x86/string/ssse3-wmemcmp-atom.S
similarity index 90%
copy from libc/arch-x86/string/memcmp_wrapper.S
copy to libc/arch-x86/string/ssse3-wmemcmp-atom.S
index fa0c672..2c3fa02 100644
--- a/libc/arch-x86/string/memcmp_wrapper.S
+++ b/libc/arch-x86/string/ssse3-wmemcmp-atom.S
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2010, Intel Corporation
+Copyright (c) 2011, 2012, 2013 Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,13 +28,8 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSSE3)
+#define MEMCMP  wmemcmp
 
-# define MEMCMP memcmp
-# include "ssse3-memcmp3-new.S"
-
-#else
-
-# include "memcmp.S"
-
-#endif
+#define USE_WCHAR
+#define USE_AS_WMEMCMP 1
+#include "ssse3-memcmp-atom.S"
diff --git a/libc/arch-x86/string/strchr.S b/libc/arch-x86/string/strchr.S
deleted file mode 100644
index f76e593..0000000
--- a/libc/arch-x86/string/strchr.S
+++ /dev/null
@@ -1,3 +0,0 @@
-/*	$OpenBSD: strchr.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
-#define STRCHR
-#include "index.S"
diff --git a/libc/arch-x86/string/strcmp_wrapper.S b/libc/arch-x86/string/strcmp_wrapper.S
deleted file mode 100644
index 20f3064..0000000
--- a/libc/arch-x86/string/strcmp_wrapper.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#if defined(USE_SSSE3)
-
-# define ssse3_strcmp_latest strcmp
-# include "ssse3-strcmp-latest.S"
-
-#else
-
-# include "strcmp.S"
-
-#endif
diff --git a/libc/arch-x86/string/strcpy.S b/libc/arch-x86/string/strcpy.S
deleted file mode 100644
index 7d9b87e..0000000
--- a/libc/arch-x86/string/strcpy.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/*	$OpenBSD: strcpy.S,v 1.8 2005/08/07 11:30:38 espie Exp $ */
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-#if defined(APIWARN)
-#APP
-	.section .gnu.warning.strcpy
-	.ascii "warning: strcpy() is almost always misused, please use strlcpy()"
-#NO_APP
-#endif
-
-/*
- * NOTE: I've unrolled the loop eight times: large enough to make a
- * significant difference, and small enough not to totally trash the
- * cache.
- */
-
-ENTRY(strcpy)
-	movl	4(%esp),%ecx		/* dst address */
-	movl	8(%esp),%edx		/* src address */
-	pushl	%ecx			/* push dst address */
-
-	.align 2,0x90
-L1:	movb	(%edx),%al		/* unroll loop, but not too much */
-	movb	%al,(%ecx)
-	testb	%al,%al
-	jz	L2
-	movb	1(%edx),%al
-	movb	%al,1(%ecx)
-	testb	%al,%al
-	jz	L2
-	movb	2(%edx),%al
-	movb	%al,2(%ecx)
-	testb	%al,%al
-	jz	L2
-	movb	3(%edx),%al
-	movb	%al,3(%ecx)
-	testb	%al,%al
-	jz	L2
-	movb	4(%edx),%al
-	movb	%al,4(%ecx)
-	testb	%al,%al
-	jz	L2
-	movb	5(%edx),%al
-	movb	%al,5(%ecx)
-	testb	%al,%al
-	jz	L2
-	movb	6(%edx),%al
-	movb	%al,6(%ecx)
-	testb	%al,%al
-	jz	L2
-	movb	7(%edx),%al
-	movb	%al,7(%ecx)
-	addl	$8,%edx
-	addl	$8,%ecx
-	testb	%al,%al
-	jnz	L1
-L2:	popl	%eax			/* pop dst address */
-	ret
-END(strcpy)
diff --git a/libc/arch-x86/string/strncmp_wrapper.S b/libc/arch-x86/string/strncmp_wrapper.S
deleted file mode 100644
index 191d755..0000000
--- a/libc/arch-x86/string/strncmp_wrapper.S
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#if defined(USE_SSSE3)
-
-# define USE_AS_STRNCMP
-# define ssse3_strcmp_latest strncmp
-# include "ssse3-strcmp-latest.S"
-
-#else
-
-# include "strncmp.S"
-
-#endif
-
diff --git a/libc/arch-x86/syscalls.mk b/libc/arch-x86/syscalls.mk
index b4ad564..11573de 100644
--- a/libc/arch-x86/syscalls.mk
+++ b/libc/arch-x86/syscalls.mk
@@ -130,6 +130,8 @@
 syscall_src += arch-x86/syscalls/lremovexattr.S
 syscall_src += arch-x86/syscalls/__statfs64.S
 syscall_src += arch-x86/syscalls/unshare.S
+syscall_src += arch-x86/syscalls/swapon.S
+syscall_src += arch-x86/syscalls/swapoff.S
 syscall_src += arch-x86/syscalls/pause.S
 syscall_src += arch-x86/syscalls/gettimeofday.S
 syscall_src += arch-x86/syscalls/settimeofday.S
@@ -148,6 +150,9 @@
 syscall_src += arch-x86/syscalls/__timer_delete.S
 syscall_src += arch-x86/syscalls/utimes.S
 syscall_src += arch-x86/syscalls/utimensat.S
+syscall_src += arch-x86/syscalls/timerfd_create.S
+syscall_src += arch-x86/syscalls/timerfd_settime.S
+syscall_src += arch-x86/syscalls/timerfd_gettime.S
 syscall_src += arch-x86/syscalls/sigaction.S
 syscall_src += arch-x86/syscalls/sigprocmask.S
 syscall_src += arch-x86/syscalls/__sigsuspend.S
diff --git a/libc/arch-x86/syscalls/swapoff.S b/libc/arch-x86/syscalls/swapoff.S
new file mode 100644
index 0000000..f2d6ddb
--- /dev/null
+++ b/libc/arch-x86/syscalls/swapoff.S
@@ -0,0 +1,21 @@
+/* autogenerated by gensyscalls.py */
+#include <linux/err.h>
+#include <machine/asm.h>
+#include <asm/unistd.h>
+
+ENTRY(swapoff)
+    pushl   %ebx
+    mov     8(%esp), %ebx
+    movl    $__NR_swapoff, %eax
+    int     $0x80
+    cmpl    $-MAX_ERRNO, %eax
+    jb      1f
+    negl    %eax
+    pushl   %eax
+    call    __set_errno
+    addl    $4, %esp
+    orl     $-1, %eax
+1:
+    popl    %ebx
+    ret
+END(swapoff)
diff --git a/libc/arch-x86/syscalls/swapon.S b/libc/arch-x86/syscalls/swapon.S
new file mode 100644
index 0000000..08602fb
--- /dev/null
+++ b/libc/arch-x86/syscalls/swapon.S
@@ -0,0 +1,24 @@
+/* autogenerated by gensyscalls.py */
+#include <linux/err.h>
+#include <machine/asm.h>
+#include <asm/unistd.h>
+
+ENTRY(swapon)
+    pushl   %ebx
+    pushl   %ecx
+    mov     12(%esp), %ebx
+    mov     16(%esp), %ecx
+    movl    $__NR_swapon, %eax
+    int     $0x80
+    cmpl    $-MAX_ERRNO, %eax
+    jb      1f
+    negl    %eax
+    pushl   %eax
+    call    __set_errno
+    addl    $4, %esp
+    orl     $-1, %eax
+1:
+    popl    %ecx
+    popl    %ebx
+    ret
+END(swapon)
diff --git a/libc/arch-x86/syscalls/timerfd_create.S b/libc/arch-x86/syscalls/timerfd_create.S
new file mode 100644
index 0000000..801f8a7
--- /dev/null
+++ b/libc/arch-x86/syscalls/timerfd_create.S
@@ -0,0 +1,24 @@
+/* autogenerated by gensyscalls.py */
+#include <linux/err.h>
+#include <machine/asm.h>
+#include <asm/unistd.h>
+
+ENTRY(timerfd_create)
+    pushl   %ebx
+    pushl   %ecx
+    mov     12(%esp), %ebx
+    mov     16(%esp), %ecx
+    movl    $__NR_timerfd_create, %eax
+    int     $0x80
+    cmpl    $-MAX_ERRNO, %eax
+    jb      1f
+    negl    %eax
+    pushl   %eax
+    call    __set_errno
+    addl    $4, %esp
+    orl     $-1, %eax
+1:
+    popl    %ecx
+    popl    %ebx
+    ret
+END(timerfd_create)
diff --git a/libc/arch-x86/syscalls/timerfd_gettime.S b/libc/arch-x86/syscalls/timerfd_gettime.S
new file mode 100644
index 0000000..fde17be
--- /dev/null
+++ b/libc/arch-x86/syscalls/timerfd_gettime.S
@@ -0,0 +1,24 @@
+/* autogenerated by gensyscalls.py */
+#include <linux/err.h>
+#include <machine/asm.h>
+#include <asm/unistd.h>
+
+ENTRY(timerfd_gettime)
+    pushl   %ebx
+    pushl   %ecx
+    mov     12(%esp), %ebx
+    mov     16(%esp), %ecx
+    movl    $__NR_timerfd_gettime, %eax
+    int     $0x80
+    cmpl    $-MAX_ERRNO, %eax
+    jb      1f
+    negl    %eax
+    pushl   %eax
+    call    __set_errno
+    addl    $4, %esp
+    orl     $-1, %eax
+1:
+    popl    %ecx
+    popl    %ebx
+    ret
+END(timerfd_gettime)
diff --git a/libc/arch-x86/syscalls/timerfd_settime.S b/libc/arch-x86/syscalls/timerfd_settime.S
new file mode 100644
index 0000000..5a5f3e4
--- /dev/null
+++ b/libc/arch-x86/syscalls/timerfd_settime.S
@@ -0,0 +1,30 @@
+/* autogenerated by gensyscalls.py */
+#include <linux/err.h>
+#include <machine/asm.h>
+#include <asm/unistd.h>
+
+ENTRY(timerfd_settime)
+    pushl   %ebx
+    pushl   %ecx
+    pushl   %edx
+    pushl   %esi
+    mov     20(%esp), %ebx
+    mov     24(%esp), %ecx
+    mov     28(%esp), %edx
+    mov     32(%esp), %esi
+    movl    $__NR_timerfd_settime, %eax
+    int     $0x80
+    cmpl    $-MAX_ERRNO, %eax
+    jb      1f
+    negl    %eax
+    pushl   %eax
+    call    __set_errno
+    addl    $4, %esp
+    orl     $-1, %eax
+1:
+    popl    %esi
+    popl    %edx
+    popl    %ecx
+    popl    %ebx
+    ret
+END(timerfd_settime)
diff --git a/libc/arch-x86/x86.mk b/libc/arch-x86/x86.mk
index 0e5d283..19c1402 100644
--- a/libc/arch-x86/x86.mk
+++ b/libc/arch-x86/x86.mk
@@ -10,16 +10,77 @@
     arch-x86/bionic/sigsetjmp.S \
     arch-x86/bionic/syscall.S \
     arch-x86/bionic/vfork.S \
-    arch-x86/string/bcopy_wrapper.S \
-    arch-x86/string/bzero_wrapper.S \
-    arch-x86/string/ffs.S \
-    arch-x86/string/memcmp_wrapper.S \
-    arch-x86/string/memcpy_wrapper.S \
-    arch-x86/string/memmove_wrapper.S \
-    arch-x86/string/memset_wrapper.S \
-    arch-x86/string/strcmp_wrapper.S \
-    arch-x86/string/strlen_wrapper.S \
-    arch-x86/string/strncmp_wrapper.S \
+    arch-x86/string/ffs.S
+
+ifeq ($(ARCH_X86_HAVE_SSSE3),true)
+_LIBC_ARCH_COMMON_SRC_FILES += \
+	arch-x86/string/ssse3-memcpy-atom.S \
+	arch-x86/string/ssse3-memmove-atom.S \
+	arch-x86/string/ssse3-bcopy-atom.S \
+	arch-x86/string/ssse3-strncat-atom.S \
+	arch-x86/string/ssse3-strncpy-atom.S \
+	arch-x86/string/ssse3-strlcat-atom.S \
+	arch-x86/string/ssse3-strlcpy-atom.S \
+	arch-x86/string/ssse3-strcmp-atom.S \
+	arch-x86/string/ssse3-strncmp-atom.S \
+	arch-x86/string/ssse3-strcat-atom.S \
+	arch-x86/string/ssse3-strcpy-atom.S \
+	arch-x86/string/ssse3-memcmp-atom.S \
+	arch-x86/string/ssse3-wmemcmp-atom.S \
+	arch-x86/string/ssse3-memcmp16-atom.S \
+	arch-x86/string/ssse3-wcscat-atom.S \
+	arch-x86/string/ssse3-wcscpy-atom.S
+else
+_LIBC_ARCH_COMMON_SRC_FILES += \
+	arch-x86/string/memcpy.S \
+	arch-x86/string/memmove.S \
+	arch-x86/string/bcopy.S \
+	arch-x86/string/strcmp.S \
+	arch-x86/string/strncmp.S \
+	arch-x86/string/strcat.S \
+	arch-x86/string/memcmp.S \
+	string/memcmp16.c \
+	string/strcpy.c \
+	string/strncat.c \
+	string/strncpy.c \
+	string/strlcat.c \
+	string/strlcpy.c \
+	upstream-freebsd/lib/libc/string/wcscpy.c \
+	upstream-freebsd/lib/libc/string/wcscat.c \
+	upstream-freebsd/lib/libc/string/wmemcmp.c
+endif
+
+ifeq ($(ARCH_X86_HAVE_SSE2),true)
+_LIBC_ARCH_COMMON_SRC_FILES += \
+	arch-x86/string/sse2-memset-atom.S \
+	arch-x86/string/sse2-bzero-atom.S \
+	arch-x86/string/sse2-memchr-atom.S \
+	arch-x86/string/sse2-memrchr-atom.S \
+	arch-x86/string/sse2-strchr-atom.S \
+	arch-x86/string/sse2-strrchr-atom.S \
+	arch-x86/string/sse2-index-atom.S \
+	arch-x86/string/sse2-strlen-atom.S \
+	arch-x86/string/sse2-strnlen-atom.S \
+	arch-x86/string/sse2-wcschr-atom.S \
+	arch-x86/string/sse2-wcsrchr-atom.S \
+	arch-x86/string/sse2-wcslen-atom.S \
+	arch-x86/string/sse2-wcscmp-atom.S
+else
+_LIBC_ARCH_COMMON_SRC_FILES += \
+	arch-x86/string/memset.S \
+	arch-x86/string/strlen.S \
+	arch-x86/string/bzero.S \
+	bionic/memrchr.c \
+	bionic/memchr.c \
+	string/strchr.cpp \
+	string/strrchr.c \
+	string/index.c \
+	bionic/strnlen.c \
+	upstream-freebsd/lib/libc/string/wcschr.c \
+	upstream-freebsd/lib/libc/string/wcsrchr.c \
+	upstream-freebsd/lib/libc/string/wcslen.c \
+	upstream-freebsd/lib/libc/string/wcscmp.c
+endif
 
 _LIBC_ARCH_STATIC_SRC_FILES := \
     bionic/dl_iterate_phdr_static.c \
diff --git a/libc/bionic/__memcpy_chk.cpp b/libc/bionic/__memcpy_chk.cpp
index b36cfdd..a3d744c 100644
--- a/libc/bionic/__memcpy_chk.cpp
+++ b/libc/bionic/__memcpy_chk.cpp
@@ -45,7 +45,7 @@
 extern "C" void *__memcpy_chk(void *dest, const void *src,
               size_t copy_amount, size_t dest_len)
 {
-    if (__builtin_expect(copy_amount > dest_len, 0)) {
+    if (__predict_false(copy_amount > dest_len)) {
         __fortify_chk_fail("memcpy buffer overflow",
                              BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW);
     }
diff --git a/libc/bionic/__memmove_chk.cpp b/libc/bionic/__memmove_chk.cpp
index ff770b5..49a0597 100644
--- a/libc/bionic/__memmove_chk.cpp
+++ b/libc/bionic/__memmove_chk.cpp
@@ -44,7 +44,7 @@
 extern "C" void *__memmove_chk (void *dest, const void *src,
               size_t len, size_t dest_len)
 {
-    if (len > dest_len) {
+    if (__predict_false(len > dest_len)) {
         __fortify_chk_fail("memmove buffer overflow",
                              BIONIC_EVENT_MEMMOVE_BUFFER_OVERFLOW);
     }
diff --git a/libc/bionic/__memset_chk.cpp b/libc/bionic/__memset_chk.cpp
index b201ed2..f7a5f24 100644
--- a/libc/bionic/__memset_chk.cpp
+++ b/libc/bionic/__memset_chk.cpp
@@ -42,7 +42,7 @@
  * greater than 0.
  */
 extern "C" void *__memset_chk (void *dest, int c, size_t n, size_t dest_len) {
-    if (n > dest_len) {
+    if (__predict_false(n > dest_len)) {
         __fortify_chk_fail("memset buffer overflow",
                              BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW);
     }
diff --git a/libc/bionic/__strcat_chk.cpp b/libc/bionic/__strcat_chk.cpp
index fb46e0d..e0b3259 100644
--- a/libc/bionic/__strcat_chk.cpp
+++ b/libc/bionic/__strcat_chk.cpp
@@ -29,7 +29,6 @@
 #include <string.h>
 #include <stdlib.h>
 #include "libc_logging.h"
-#include <safe_iop.h>
 
 /*
  * Runtime implementation of __builtin____strcat_chk.
@@ -42,22 +41,24 @@
  * This strcat check is called if _FORTIFY_SOURCE is defined and
  * greater than 0.
  */
-extern "C" char *__strcat_chk (char *dest, const char *src, size_t dest_buf_size) {
-    // TODO: optimize so we don't scan src/dest twice.
-    size_t src_len  = strlen(src);
-    size_t dest_len = strlen(dest);
-    size_t sum;
+extern "C" char* __strcat_chk(
+        char* __restrict dest,
+        const char* __restrict src,
+        size_t dest_buf_size)
+{
+    char* save = dest;
+    size_t dest_len = __strlen_chk(dest, dest_buf_size);
 
-    // sum = src_len + dest_len + 1 (with overflow protection)
-    if (!safe_add3(&sum, src_len, dest_len, 1U)) {
-        __fortify_chk_fail("strcat integer overflow",
-                             BIONIC_EVENT_STRCAT_INTEGER_OVERFLOW);
+    dest += dest_len;
+    dest_buf_size -= dest_len;
+
+    while ((*dest++ = *src++) != '\0') {
+        dest_buf_size--;
+        if (__predict_false(dest_buf_size == 0)) {
+            __fortify_chk_fail("strcat buffer overflow",
+                               BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW);
+        }
     }
 
-    if (sum > dest_buf_size) {
-        __fortify_chk_fail("strcat buffer overflow",
-                             BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW);
-    }
-
-    return strcat(dest, src);
+    return save;
 }
diff --git a/libc/string/strchr.c b/libc/bionic/__strchr_chk.cpp
similarity index 78%
copy from libc/string/strchr.c
copy to libc/bionic/__strchr_chk.cpp
index 29acca5..72559bc 100644
--- a/libc/string/strchr.c
+++ b/libc/bionic/__strchr_chk.cpp
@@ -1,4 +1,3 @@
-/*	$OpenBSD: index.c,v 1.5 2005/08/08 08:05:37 espie Exp $ */
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
@@ -31,21 +30,17 @@
 #include <string.h>
 #include "libc_logging.h"
 
-char *
-__strchr_chk(const char *p, int ch, size_t s_len)
-{
-	for (;; ++p, s_len--) {
-		if (s_len == 0)
-			__fortify_chk_fail("strchr read beyond buffer", 0);
-		if (*p == (char) ch)
-			return((char *)p);
-		if (!*p)
-			return((char *)NULL);
-	}
-	/* NOTREACHED */
-}
-
-char *
-strchr(const char *p, int ch) {
-    return __strchr_chk(p, ch, __BIONIC_FORTIFY_UNKNOWN_SIZE);
+extern "C" char* __strchr_chk(const char* p, int ch, size_t s_len) {
+  for (;; ++p, s_len--) {
+    if (__predict_false(s_len == 0)) {
+      __fortify_chk_fail("read beyond buffer", 0);
+    }
+    if (*p == static_cast<char>(ch)) {
+      return const_cast<char*>(p);
+    }
+    if (*p == '\0') {
+      return NULL;
+    }
+  }
+  /* NOTREACHED */
 }
diff --git a/libc/bionic/__strcpy_chk.cpp b/libc/bionic/__strcpy_chk.cpp
index bfb6642..5aa0e93 100644
--- a/libc/bionic/__strcpy_chk.cpp
+++ b/libc/bionic/__strcpy_chk.cpp
@@ -44,7 +44,7 @@
 extern "C" char *__strcpy_chk (char *dest, const char *src, size_t dest_len) {
     // TODO: optimize so we don't scan src twice.
     size_t src_len = strlen(src) + 1;
-    if (src_len > dest_len) {
+    if (__predict_false(src_len > dest_len)) {
         __fortify_chk_fail("strcpy buffer overflow",
                              BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW);
     }
diff --git a/libc/bionic/__strlcat_chk.cpp b/libc/bionic/__strlcat_chk.cpp
index 96f62f9..25c67ad 100644
--- a/libc/bionic/__strlcat_chk.cpp
+++ b/libc/bionic/__strlcat_chk.cpp
@@ -45,7 +45,7 @@
 extern "C" size_t __strlcat_chk(char *dest, const char *src,
               size_t supplied_size, size_t dest_len_from_compiler)
 {
-    if (supplied_size > dest_len_from_compiler) {
+    if (__predict_false(supplied_size > dest_len_from_compiler)) {
         __fortify_chk_fail("strlcat buffer overflow", 0);
     }
 
diff --git a/libc/bionic/__strlcpy_chk.cpp b/libc/bionic/__strlcpy_chk.cpp
index 636966b..f6b11fc 100644
--- a/libc/bionic/__strlcpy_chk.cpp
+++ b/libc/bionic/__strlcpy_chk.cpp
@@ -45,7 +45,7 @@
 extern "C" size_t __strlcpy_chk(char *dest, const char *src,
               size_t supplied_size, size_t dest_len_from_compiler)
 {
-    if (supplied_size > dest_len_from_compiler) {
+    if (__predict_false(supplied_size > dest_len_from_compiler)) {
         __fortify_chk_fail("strlcpy buffer overflow", 0);
     }
 
diff --git a/libc/bionic/__strlen_chk.cpp b/libc/bionic/__strlen_chk.cpp
index 6ebf09c..151a497 100644
--- a/libc/bionic/__strlen_chk.cpp
+++ b/libc/bionic/__strlen_chk.cpp
@@ -56,7 +56,7 @@
 extern "C" size_t __strlen_chk(const char *s, size_t s_len) {
     size_t ret = strlen(s);
 
-    if (__builtin_expect(ret >= s_len, 0)) {
+    if (__predict_false(ret >= s_len)) {
         __fortify_chk_fail("strlen read overflow", 0);
     }
 
diff --git a/libc/bionic/__strncat_chk.cpp b/libc/bionic/__strncat_chk.cpp
index ab28541..f54d838 100644
--- a/libc/bionic/__strncat_chk.cpp
+++ b/libc/bionic/__strncat_chk.cpp
@@ -29,7 +29,6 @@
 #include <string.h>
 #include <stdlib.h>
 #include "libc_logging.h"
-#include <safe_iop.h>
 
 /*
  * Runtime implementation of __builtin____strncat_chk.
@@ -42,27 +41,33 @@
  * This strncat check is called if _FORTIFY_SOURCE is defined and
  * greater than 0.
  */
-extern "C" char *__strncat_chk (char *dest, const char *src,
-              size_t len, size_t dest_buf_size)
+extern "C" char *__strncat_chk(
+        char* __restrict dest,
+        const char* __restrict src,
+        size_t len, size_t dest_buf_size)
 {
-    // TODO: optimize so we don't scan src/dest twice.
-    size_t dest_len = strlen(dest);
-    size_t src_len = strlen(src);
-    if (src_len > len) {
-        src_len = len;
+    if (len == 0) {
+        return dest;
     }
 
-    size_t sum;
-    // sum = src_len + dest_len + 1 (with overflow protection)
-    if (!safe_add3(&sum, src_len, dest_len, 1U)) {
-        __fortify_chk_fail("strncat integer overflow",
-                             BIONIC_EVENT_STRNCAT_INTEGER_OVERFLOW);
+    size_t dest_len = __strlen_chk(dest, dest_buf_size);
+    char *d = dest + dest_len;
+    dest_buf_size -= dest_len;
+
+    while (*src != '\0') {
+        *d++ = *src++;
+        len--; dest_buf_size--;
+
+        if (__predict_false(dest_buf_size == 0)) {
+            __fortify_chk_fail("strncat buffer overflow",
+                               BIONIC_EVENT_STRNCAT_BUFFER_OVERFLOW);
+        }
+
+        if (len == 0) {
+            break;
+        }
     }
 
-    if (sum > dest_buf_size) {
-        __fortify_chk_fail("strncat buffer overflow",
-                             BIONIC_EVENT_STRNCAT_BUFFER_OVERFLOW);
-    }
-
-    return strncat(dest, src, len);
+    *d = '\0';
+    return dest;
 }
diff --git a/libc/bionic/__strncpy_chk.cpp b/libc/bionic/__strncpy_chk.cpp
index 0f1797e..b01879c 100644
--- a/libc/bionic/__strncpy_chk.cpp
+++ b/libc/bionic/__strncpy_chk.cpp
@@ -44,7 +44,7 @@
 extern "C" char *__strncpy_chk (char *dest, const char *src,
               size_t len, size_t dest_len)
 {
-    if (len > dest_len) {
+    if (__predict_false(len > dest_len)) {
         __fortify_chk_fail("strncpy buffer overflow",
                              BIONIC_EVENT_STRNCPY_BUFFER_OVERFLOW);
     }
diff --git a/libc/bionic/__umask_chk.cpp b/libc/bionic/__umask_chk.cpp
index ff67ed6..8fe95a2 100644
--- a/libc/bionic/__umask_chk.cpp
+++ b/libc/bionic/__umask_chk.cpp
@@ -42,7 +42,7 @@
  * greater than 0.
  */
 extern "C" mode_t __umask_chk(mode_t mode) {
-    if ((mode & 0777) != mode) {
+    if (__predict_false((mode & 0777) != mode)) {
         __fortify_chk_fail("umask called with invalid mask", 0);
     }
 
diff --git a/libc/bionic/__vsnprintf_chk.cpp b/libc/bionic/__vsnprintf_chk.cpp
index 0fdda3e..2d3a81e 100644
--- a/libc/bionic/__vsnprintf_chk.cpp
+++ b/libc/bionic/__vsnprintf_chk.cpp
@@ -50,7 +50,7 @@
         const char *format,
         va_list va)
 {
-    if (supplied_size > dest_len_from_compiler) {
+    if (__predict_false(supplied_size > dest_len_from_compiler)) {
         __fortify_chk_fail("vsnprintf buffer overflow", 0);
     }
 
diff --git a/libc/stdio/fpurge.c b/libc/bionic/abort.cpp
similarity index 60%
copy from libc/stdio/fpurge.c
copy to libc/bionic/abort.cpp
index e04c4fe..6fcdfda 100644
--- a/libc/stdio/fpurge.c
+++ b/libc/bionic/abort.cpp
@@ -1,10 +1,6 @@
-/*	$OpenBSD: fpurge.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
-/*-
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
+/*
+ * Copyright (c) 1985 Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -31,31 +27,41 @@
  * SUCH DAMAGE.
  */
 
-#include <errno.h>
-#include <stdio.h>
+#include <signal.h>
 #include <stdlib.h>
-#include "local.h"
+#include <unistd.h>
+#include "atexit.h"
 
-/*
- * fpurge: like fflush, but without writing anything: leave the
- * given FILE's buffer empty.
- */
-int
-fpurge(FILE *fp)
+__LIBC_HIDDEN__ void (*__cleanup)();
+
+#ifdef __arm__
+extern "C" __LIBC_HIDDEN__ void __libc_android_abort()
+#else
+void abort()
+#endif
 {
-	FLOCKFILE(fp);
-	if (!fp->_flags) {
-		FUNLOCKFILE(fp);
-		errno = EBADF;
-		return(EOF);
-	}
+  // Don't block SIGABRT to give any signal handler a chance; we ignore
+  // any errors -- X311J doesn't allow abort to return anyway.
+  sigset_t mask;
+  sigfillset(&mask);
+  sigdelset(&mask, SIGABRT);
+  sigprocmask(SIG_SETMASK, &mask, NULL);
 
-	if (HASUB(fp))
-		FREEUB(fp);
-	WCIO_FREE(fp);
-	fp->_p = fp->_bf._base;
-	fp->_r = 0;
-	fp->_w = fp->_flags & (__SLBF|__SNBF) ? 0 : fp->_bf._size;
-	FUNLOCKFILE(fp);
-	return (0);
+  // POSIX requires we flush stdio buffers on abort.
+  if (__cleanup) {
+    (*__cleanup)();
+  }
+
+  raise(SIGABRT);
+
+  // If SIGABRT ignored, or caught and the handler returns,
+  // remove the SIGABRT signal handler and raise SIGABRT again.
+  struct sigaction sa;
+  sa.sa_handler = SIG_DFL;
+  sa.sa_flags   = SA_RESTART;
+  sigemptyset(&sa.sa_mask);
+  sigaction(SIGABRT, &sa, &sa);
+  sigprocmask(SIG_SETMASK, &mask, NULL);
+  raise(SIGABRT);
+  _exit(1);
 }
diff --git a/libc/bionic/dl_iterate_phdr_static.c b/libc/bionic/dl_iterate_phdr_static.c
index 90ed1b7..fc79ce5 100644
--- a/libc/bionic/dl_iterate_phdr_static.c
+++ b/libc/bionic/dl_iterate_phdr_static.c
@@ -27,39 +27,56 @@
  */
 
 #include <elf.h>
+#include <sys/auxv.h>
 #include <sys/types.h>
 #include <link.h>
 
-/* Dynamic binaries get this from the dynamic linker (system/linker), which
- * we don't pull in for static bins. We also don't have a list of so's to
- * iterate over, since there's really only a single monolithic blob of
- * code/data.
- *
- * All we need to do is to find where the executable is in memory, and grab the
- * phdr and phnum from there.
- */
-
 /* ld provides this to us in the default link script */
-extern void *__executable_start;
+extern void* __executable_start;
 
-int
-dl_iterate_phdr(int (*cb)(struct dl_phdr_info *info, size_t size, void *data),
-                void *data)
-{
-    struct dl_phdr_info dl_info;
-    Elf32_Ehdr *ehdr = (Elf32_Ehdr *) &__executable_start;
-    Elf32_Phdr *phdr = (Elf32_Phdr *)((unsigned long)ehdr + ehdr->e_phoff);
+int dl_iterate_phdr(int (*cb)(struct dl_phdr_info* info, size_t size, void* data), void* data) {
+    Elf32_Ehdr* ehdr = (Elf32_Ehdr*) &__executable_start;
 
-    /* TODO: again, copied from linker.c. Find a better home for this
-     * later. */
+    // TODO: again, copied from linker.c. Find a better home for this later.
     if (ehdr->e_ident[EI_MAG0] != ELFMAG0) return -1;
     if (ehdr->e_ident[EI_MAG1] != ELFMAG1) return -1;
     if (ehdr->e_ident[EI_MAG2] != ELFMAG2) return -1;
     if (ehdr->e_ident[EI_MAG3] != ELFMAG3) return -1;
 
-    dl_info.dlpi_addr = 0;
-    dl_info.dlpi_name = NULL;
-    dl_info.dlpi_phdr = phdr;
-    dl_info.dlpi_phnum = ehdr->e_phnum;
-    return cb(&dl_info, sizeof (struct dl_phdr_info), data);
+    // Dynamic binaries get their dl_iterate_phdr from the dynamic linker, but
+    // static binaries get this. We don't have a list of shared objects to
+    // iterate over, since there's really only a single monolithic blob of
+    // code/data, plus optionally a VDSO.
+
+    struct dl_phdr_info exe_info;
+    exe_info.dlpi_addr = 0;
+    exe_info.dlpi_name = NULL;
+    exe_info.dlpi_phdr = (Elf32_Phdr*) ((unsigned long) ehdr + ehdr->e_phoff);
+    exe_info.dlpi_phnum = ehdr->e_phnum;
+
+#ifdef AT_SYSINFO_EHDR
+    // Try the executable first.
+    int rc = cb(&exe_info, sizeof(exe_info), data);
+    if (rc != 0) {
+        return rc;
+    }
+
+    // Try the VDSO if that didn't work.
+    Elf32_Ehdr* ehdr_vdso = (Elf32_Ehdr*) getauxval(AT_SYSINFO_EHDR);
+    struct dl_phdr_info vdso_info;
+    vdso_info.dlpi_addr = 0;
+    vdso_info.dlpi_name = NULL;
+    vdso_info.dlpi_phdr = (Elf32_Phdr*) ((char*) ehdr_vdso + ehdr_vdso->e_phoff);
+    vdso_info.dlpi_phnum = ehdr_vdso->e_phnum;
+    for (size_t i = 0; i < vdso_info.dlpi_phnum; ++i) {
+        if (vdso_info.dlpi_phdr[i].p_type == PT_LOAD) {
+            vdso_info.dlpi_addr = (Elf32_Addr) ehdr_vdso - vdso_info.dlpi_phdr[i].p_vaddr;
+            break;
+        }
+    }
+    return cb(&vdso_info, sizeof(vdso_info), data);
+#else
+    // There's only the executable to try.
+    return cb(&exe_info, sizeof(exe_info), data);
+#endif
 }
diff --git a/libc/bionic/dlmalloc.c b/libc/bionic/dlmalloc.c
index 51c62a7..78f2e1d 100644
--- a/libc/bionic/dlmalloc.c
+++ b/libc/bionic/dlmalloc.c
@@ -28,13 +28,17 @@
 // Ugly inclusion of C file so that bionic specific #defines configure dlmalloc.
 #include "../upstream-dlmalloc/malloc.c"
 
+extern void (*__cleanup)();
+
 static void __bionic_heap_corruption_error(const char* function) {
-  __libc_fatal("@@@ ABORTING: heap corruption detected by %s", function);
+  __cleanup = NULL; // The heap is corrupt. We can forget trying to shut down stdio.
+  __libc_fatal("heap corruption detected by %s", function);
 }
 
 static void __bionic_heap_usage_error(const char* function, void* address) {
-  __libc_fatal("@@@ ABORTING: invalid address or address of corrupt block %p passed to %s",
+  __libc_fatal_no_abort("invalid address or address of corrupt block %p passed to %s",
                address, function);
-  // So that we can get a memory dump around the specific address.
+  // So that debuggerd gives us a memory dump around the specific address.
+  // TODO: improve the debuggerd protocol so we can tell it to dump an address when we abort.
   *((int**) 0xdeadbaad) = (int*) address;
 }
diff --git a/libc/bionic/dlmalloc.h b/libc/bionic/dlmalloc.h
index a00a583..71b3be8 100644
--- a/libc/bionic/dlmalloc.h
+++ b/libc/bionic/dlmalloc.h
@@ -27,6 +27,7 @@
 #define LOCK_AT_FORK 1
 #define USE_RECURSIVE_LOCK 0
 #define USE_SPIN_LOCKS 0
+#define DEFAULT_MMAP_THRESHOLD (64U * 1024U)
 
 /* Include the proper definitions. */
 #include "../upstream-dlmalloc/malloc.h"
diff --git a/libc/arch-mips/bionic/atexit.S b/libc/bionic/futimens.cpp
similarity index 81%
copy from libc/arch-mips/bionic/atexit.S
copy to libc/bionic/futimens.cpp
index 7f0c820..1ca8eb5 100644
--- a/libc/arch-mips/bionic/atexit.S
+++ b/libc/bionic/futimens.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2013 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,19 +25,10 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-	.text
-	.globl	atexit
-	.hidden	atexit
-	.type	atexit, @function
-	.align  4
-	.ent	atexit
-atexit:
-	.set	noreorder
-	.cpload	$t9
-	.set	reorder
-	la	$t9, __cxa_atexit
-	move	$a1, $0
-	la      $a2, __dso_handle
-	j	$t9
-	.size	atexit, .-atexit
-	.end	atexit
+
+#include <errno.h>
+#include <sys/stat.h>
+
+int futimens(int fd, const struct timespec times[2]) {
+  return utimensat(fd, NULL, times, 0);
+}
diff --git a/libc/bionic/libc_logging.cpp b/libc/bionic/libc_logging.cpp
index 8de1192..6bf7415 100644
--- a/libc/bionic/libc_logging.cpp
+++ b/libc/bionic/libc_logging.cpp
@@ -42,7 +42,6 @@
 #include <unistd.h>
 
 static pthread_mutex_t gAbortMsgLock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_mutex_t gLogInitializationLock = PTHREAD_MUTEX_INITIALIZER;
 
 __LIBC_HIDDEN__ abort_msg_t** __abort_message_ptr; // Accessible to __libc_init_common.
 
@@ -348,7 +347,7 @@
             buffer[0] = '0';
             buffer[1] = 'x';
             format_integer(buffer + 2, sizeof(buffer) - 2, value, 'x');
-        } else if (c == 'd' || c == 'i' || c == 'o' || c == 'x' || c == 'X') {
+        } else if (c == 'd' || c == 'i' || c == 'o' || c == 'u' || c == 'x' || c == 'X') {
             /* integers - first read value from stack */
             uint64_t value;
             int is_signed = (c == 'd' || c == 'i' || c == 'o');
@@ -421,13 +420,9 @@
 }
 
 static int __libc_write_log(int priority, const char* tag, const char* msg) {
-  static int main_log_fd = -1;
+  int main_log_fd = TEMP_FAILURE_RETRY(open("/dev/log/main", O_CLOEXEC | O_WRONLY));
   if (main_log_fd == -1) {
-    ScopedPthreadMutexLocker locker(&gLogInitializationLock);
-    main_log_fd = TEMP_FAILURE_RETRY(open("/dev/log/main", O_CLOEXEC | O_WRONLY));
-    if (main_log_fd == -1) {
-      return -1;
-    }
+    return -1;
   }
 
   iovec vec[3];
@@ -438,7 +433,9 @@
   vec[2].iov_base = const_cast<char*>(msg);
   vec[2].iov_len = strlen(msg) + 1;
 
-  return TEMP_FAILURE_RETRY(writev(main_log_fd, vec, 3));
+  int result = TEMP_FAILURE_RETRY(writev(main_log_fd, vec, 3));
+  close(main_log_fd);
+  return result;
 }
 
 int __libc_format_log_va_list(int priority, const char* tag, const char* format, va_list args) {
@@ -465,12 +462,13 @@
   vec[2].iov_base = const_cast<void*>(payload);
   vec[2].iov_len = len;
 
-  static int event_log_fd = -1;
+  int event_log_fd = TEMP_FAILURE_RETRY(open("/dev/log/events", O_CLOEXEC | O_WRONLY));
   if (event_log_fd == -1) {
-    ScopedPthreadMutexLocker locker(&gLogInitializationLock);
-    event_log_fd = TEMP_FAILURE_RETRY(open("/dev/log/events", O_CLOEXEC | O_WRONLY));
+    return -1;
   }
-  return TEMP_FAILURE_RETRY(writev(event_log_fd, vec, 3));
+  int result = TEMP_FAILURE_RETRY(writev(event_log_fd, vec, 3));
+  close(event_log_fd);
+  return result;
 }
 
 void __libc_android_log_event_int(int32_t tag, int value) {
@@ -488,13 +486,10 @@
   __libc_fatal("FORTIFY_SOURCE: %s. Calling abort().", msg);
 }
 
-void __libc_fatal(const char* format, ...) {
+static void __libc_fatal(const char* format, va_list args) {
   char msg[1024];
   BufferOutputStream os(msg, sizeof(msg));
-  va_list args;
-  va_start(args, format);
   out_vformat(os, format, args);
-  va_end(args);
 
   // TODO: log to stderr for the benefit of "adb shell" users.
 
@@ -502,7 +497,20 @@
   __libc_write_log(ANDROID_LOG_FATAL, "libc", msg);
 
   __libc_set_abort_message(msg);
+}
 
+void __libc_fatal_no_abort(const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  __libc_fatal(format, args);
+  va_end(args);
+}
+
+void __libc_fatal(const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  __libc_fatal(format, args);
+  va_end(args);
   abort();
 }
 
diff --git a/libc/bionic/malloc_debug_check.cpp b/libc/bionic/malloc_debug_check.cpp
index 91cf287..11a6ec1 100644
--- a/libc/bionic/malloc_debug_check.cpp
+++ b/libc/bionic/malloc_debug_check.cpp
@@ -74,6 +74,10 @@
 
 struct hdr_t {
     uint32_t tag;
+    void* base;  // Always points to the memory allocated using dlmalloc.
+                 // For memory allocated in chk_memalign, this value will
+                 // not be the same as the location of the start of this
+                 // structure.
     hdr_t* prev;
     hdr_t* next;
     uintptr_t bt[MAX_BACKTRACE_DEPTH];
@@ -82,7 +86,7 @@
     int freed_bt_depth;
     size_t size;
     char front_guard[FRONT_GUARD_LEN];
-} __attribute__((packed));
+} __attribute__((packed, aligned(MALLOC_ALIGNMENT)));
 
 struct ftr_t {
     char rear_guard[REAR_GUARD_LEN];
@@ -100,21 +104,26 @@
     return reinterpret_cast<hdr_t*>(user) - 1;
 }
 
+static inline const hdr_t* const_meta(const void* user) {
+    return reinterpret_cast<const hdr_t*>(user) - 1;
+}
+
+
 static unsigned gAllocatedBlockCount;
-static hdr_t *tail;
-static hdr_t *head;
+static hdr_t* tail;
+static hdr_t* head;
 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
 
 static unsigned backlog_num;
-static hdr_t *backlog_tail;
-static hdr_t *backlog_head;
+static hdr_t* backlog_tail;
+static hdr_t* backlog_head;
 static pthread_mutex_t backlog_lock = PTHREAD_MUTEX_INITIALIZER;
 
-static inline void init_front_guard(hdr_t *hdr) {
+static inline void init_front_guard(hdr_t* hdr) {
     memset(hdr->front_guard, FRONT_GUARD, FRONT_GUARD_LEN);
 }
 
-static inline bool is_front_guard_valid(hdr_t *hdr) {
+static inline bool is_front_guard_valid(hdr_t* hdr) {
     for (size_t i = 0; i < FRONT_GUARD_LEN; i++) {
         if (hdr->front_guard[i] != FRONT_GUARD) {
             return 0;
@@ -123,12 +132,12 @@
     return 1;
 }
 
-static inline void init_rear_guard(hdr_t *hdr) {
+static inline void init_rear_guard(hdr_t* hdr) {
     ftr_t* ftr = to_ftr(hdr);
     memset(ftr->rear_guard, REAR_GUARD, REAR_GUARD_LEN);
 }
 
-static inline bool is_rear_guard_valid(hdr_t *hdr) {
+static inline bool is_rear_guard_valid(hdr_t* hdr) {
     unsigned i;
     int valid = 1;
     int first_mismatch = -1;
@@ -149,7 +158,7 @@
     return valid;
 }
 
-static inline void add_locked(hdr_t *hdr, hdr_t **tail, hdr_t **head) {
+static inline void add_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) {
     hdr->prev = NULL;
     hdr->next = *head;
     if (*head)
@@ -159,7 +168,7 @@
     *head = hdr;
 }
 
-static inline int del_locked(hdr_t *hdr, hdr_t **tail, hdr_t **head) {
+static inline int del_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) {
     if (hdr->prev) {
         hdr->prev->next = hdr->next;
     } else {
@@ -173,7 +182,7 @@
     return 0;
 }
 
-static inline void add(hdr_t *hdr, size_t size) {
+static inline void add(hdr_t* hdr, size_t size) {
     ScopedPthreadMutexLocker locker(&lock);
     hdr->tag = ALLOCATION_TAG;
     hdr->size = size;
@@ -183,7 +192,7 @@
     add_locked(hdr, &tail, &head);
 }
 
-static inline int del(hdr_t *hdr) {
+static inline int del(hdr_t* hdr) {
     if (hdr->tag != ALLOCATION_TAG) {
         return -1;
     }
@@ -194,13 +203,13 @@
     return 0;
 }
 
-static inline void poison(hdr_t *hdr) {
+static inline void poison(hdr_t* hdr) {
     memset(user(hdr), FREE_POISON, hdr->size);
 }
 
-static int was_used_after_free(hdr_t *hdr) {
+static int was_used_after_free(hdr_t* hdr) {
     unsigned i;
-    const char *data = (const char *)user(hdr);
+    const char* data = reinterpret_cast<const char *>(user(hdr));
     for (i = 0; i < hdr->size; i++)
         if (data[i] != FREE_POISON)
             return 1;
@@ -208,7 +217,7 @@
 }
 
 /* returns 1 if valid, *safe == 1 if safe to dump stack */
-static inline int check_guards(hdr_t *hdr, int *safe) {
+static inline int check_guards(hdr_t* hdr, int* safe) {
     *safe = 1;
     if (!is_front_guard_valid(hdr)) {
         if (hdr->front_guard[0] == FRONT_GUARD) {
@@ -233,7 +242,7 @@
 }
 
 /* returns 1 if valid, *safe == 1 if safe to dump stack */
-static inline int check_allocation_locked(hdr_t *hdr, int *safe) {
+static inline int check_allocation_locked(hdr_t* hdr, int* safe) {
     int valid = 1;
     *safe = 1;
 
@@ -270,9 +279,9 @@
     return valid;
 }
 
-static inline int del_and_check_locked(hdr_t *hdr,
-                                       hdr_t **tail, hdr_t **head, unsigned *cnt,
-                                       int *safe) {
+static inline int del_and_check_locked(hdr_t* hdr,
+                                       hdr_t** tail, hdr_t** head, unsigned* cnt,
+                                       int* safe) {
     int valid = check_allocation_locked(hdr, safe);
     if (safe) {
         (*cnt)--;
@@ -281,7 +290,7 @@
     return valid;
 }
 
-static inline void del_from_backlog_locked(hdr_t *hdr) {
+static inline void del_from_backlog_locked(hdr_t* hdr) {
     int safe;
     del_and_check_locked(hdr,
                          &backlog_tail, &backlog_head, &backlog_num,
@@ -289,17 +298,17 @@
     hdr->tag = 0; /* clear the tag */
 }
 
-static inline void del_from_backlog(hdr_t *hdr) {
+static inline void del_from_backlog(hdr_t* hdr) {
     ScopedPthreadMutexLocker locker(&backlog_lock);
     del_from_backlog_locked(hdr);
 }
 
-static inline int del_leak(hdr_t *hdr, int *safe) {
+static inline int del_leak(hdr_t* hdr, int* safe) {
     ScopedPthreadMutexLocker locker(&lock);
     return del_and_check_locked(hdr, &tail, &head, &gAllocatedBlockCount, safe);
 }
 
-static inline void add_to_backlog(hdr_t *hdr) {
+static inline void add_to_backlog(hdr_t* hdr) {
     ScopedPthreadMutexLocker locker(&backlog_lock);
     hdr->tag = BACKLOG_TAG;
     backlog_num++;
@@ -307,9 +316,9 @@
     poison(hdr);
     /* If we've exceeded the maximum backlog, clear it up */
     while (backlog_num > gMallocDebugBacklog) {
-        hdr_t *gone = backlog_tail;
+        hdr_t* gone = backlog_tail;
         del_from_backlog_locked(gone);
-        dlfree(gone);
+        dlfree(gone->base);
     }
 }
 
@@ -318,6 +327,7 @@
 
     hdr_t* hdr = static_cast<hdr_t*>(dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t)));
     if (hdr) {
+        hdr->base = hdr;
         hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
         add(hdr, size);
         return user(hdr);
@@ -325,13 +335,44 @@
     return NULL;
 }
 
-extern "C" void* chk_memalign(size_t, size_t bytes) {
-//  log_message("%s: %s\n", __FILE__, __FUNCTION__);
-    // XXX: it's better to use malloc, than being wrong
-    return chk_malloc(bytes);
+extern "C" void* chk_memalign(size_t alignment, size_t bytes) {
+    if (alignment <= MALLOC_ALIGNMENT) {
+        return chk_malloc(bytes);
+    }
+
+    // Make the alignment a power of two.
+    if (alignment & (alignment-1)) {
+        alignment = 1L << (31 - __builtin_clz(alignment));
+    }
+
+    // here, alignment is at least MALLOC_ALIGNMENT<<1 bytes
+    // we will align by at least MALLOC_ALIGNMENT bytes
+    // and at most alignment-MALLOC_ALIGNMENT bytes
+    size_t size = (alignment-MALLOC_ALIGNMENT) + bytes;
+    if (size < bytes) { // Overflow.
+        return NULL;
+    }
+
+    void* base = dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t));
+    if (base != NULL) {
+        // Check that the actual pointer that will be returned is aligned
+        // properly.
+        uintptr_t ptr = reinterpret_cast<uintptr_t>(user(reinterpret_cast<hdr_t*>(base)));
+        if ((ptr % alignment) != 0) {
+            // Align the pointer.
+            ptr += ((-ptr) % alignment);
+        }
+
+        hdr_t* hdr = meta(reinterpret_cast<void*>(ptr));
+        hdr->base = base;
+        hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
+        add(hdr, bytes);
+        return user(hdr);
+    }
+    return base;
 }
 
-extern "C" void chk_free(void *ptr) {
+extern "C" void chk_free(void* ptr) {
 //  log_message("%s: %s\n", __FILE__, __FUNCTION__);
 
     if (!ptr) /* ignore free(NULL) */
@@ -366,7 +407,7 @@
     }
 }
 
-extern "C" void *chk_realloc(void *ptr, size_t size) {
+extern "C" void* chk_realloc(void* ptr, size_t size) {
 //  log_message("%s: %s\n", __FILE__, __FUNCTION__);
 
     if (!ptr) {
@@ -414,8 +455,23 @@
         }
     }
 
-    hdr = static_cast<hdr_t*>(dlrealloc(hdr, sizeof(hdr_t) + size + sizeof(ftr_t)));
+    if (hdr->base != hdr) {
+        // An allocation from memalign, so create another allocation and
+        // copy the data out.
+        void* newMem = dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t));
+        if (newMem) {
+            memcpy(newMem, hdr, sizeof(hdr_t) + hdr->size);
+            dlfree(hdr->base);
+            hdr = static_cast<hdr_t*>(newMem);
+        } else {
+            dlfree(hdr->base);
+            hdr = NULL;
+        }
+    } else {
+        hdr = static_cast<hdr_t*>(dlrealloc(hdr, sizeof(hdr_t) + size + sizeof(ftr_t)));
+    }
     if (hdr) {
+        hdr->base = hdr;
         hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
         add(hdr, size);
         return user(hdr);
@@ -424,11 +480,12 @@
     return NULL;
 }
 
-extern "C" void *chk_calloc(int nmemb, size_t size) {
+extern "C" void* chk_calloc(int nmemb, size_t size) {
 //  log_message("%s: %s\n", __FILE__, __FUNCTION__);
     size_t total_size = nmemb * size;
     hdr_t* hdr = static_cast<hdr_t*>(dlcalloc(1, sizeof(hdr_t) + total_size + sizeof(ftr_t)));
     if (hdr) {
+        hdr->base = hdr;
         hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
         add(hdr, total_size);
         return user(hdr);
@@ -436,6 +493,18 @@
     return NULL;
 }
 
+extern "C" size_t chk_malloc_usable_size(const void* ptr) {
+    // dlmalloc_usable_size returns 0 for NULL and unknown blocks.
+    if (ptr == NULL)
+        return 0;
+
+    const hdr_t* hdr = const_meta(ptr);
+
+    // The sentinel tail is written just after the request block bytes
+    // so there is no extra room we can report here.
+    return hdr->size;
+}
+
 static void ReportMemoryLeaks() {
   // We only track leaks at level 10.
   if (gMallocDebugLevel != 10) {
diff --git a/libc/bionic/malloc_debug_common.cpp b/libc/bionic/malloc_debug_common.cpp
index 2148d20..ccceb14 100644
--- a/libc/bionic/malloc_debug_common.cpp
+++ b/libc/bionic/malloc_debug_common.cpp
@@ -190,10 +190,6 @@
     return dlmallinfo();
 }
 
-extern "C" size_t malloc_usable_size(void* mem) {
-    return dlmalloc_usable_size(mem);
-}
-
 extern "C" void* valloc(size_t bytes) {
     return dlvalloc(bytes);
 }
@@ -215,8 +211,9 @@
 
 /* Table for dispatching malloc calls, initialized with default dispatchers. */
 extern const MallocDebug __libc_malloc_default_dispatch;
-const MallocDebug __libc_malloc_default_dispatch __attribute__((aligned(32))) = {
-    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign
+const MallocDebug __libc_malloc_default_dispatch __attribute__((aligned(32))) =
+{
+    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign, dlmalloc_usable_size,
 };
 
 /* Selector of dispatch table to use for dispatching malloc calls. */
@@ -242,6 +239,10 @@
     return __libc_malloc_dispatch->memalign(alignment, bytes);
 }
 
+extern "C" size_t malloc_usable_size(const void* mem) {
+    return __libc_malloc_dispatch->malloc_usable_size(mem);
+}
+
 /* We implement malloc debugging only in libc.so, so code below
  * must be excluded if we compile this file for static libc.a
  */
@@ -253,7 +254,7 @@
 
 /* Table for dispatching malloc calls, depending on environment. */
 static MallocDebug gMallocUse __attribute__((aligned(32))) = {
-    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign
+    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign, dlmalloc_usable_size
 };
 
 extern const char* __progname;
@@ -276,15 +277,10 @@
  * Actual functionality for debug levels 1-10 is implemented in
  * libc_malloc_debug_leak.so, while functionality for emultor's instrumented
  * allocations is implemented in libc_malloc_debug_qemu.so and can be run inside
-  * the emulator only.
+ * the emulator only.
  */
 static void* libc_malloc_impl_handle = NULL;
 
-// This must match the alignment used by dlmalloc.
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
-#endif
-
 /* This variable is set to the value of property libc.debug.malloc.backlog,
  * when the value of libc.debug.malloc = 10.  It determines the size of the
  * backlog we use to detect multiple frees.  If the property is not set, the
@@ -296,41 +292,26 @@
 /* The value of libc.debug.malloc. */
 int gMallocDebugLevel;
 
-static void InitMalloc(MallocDebug* table, const char* prefix) {
-  __libc_format_log(ANDROID_LOG_INFO, "libc", "%s: using libc.debug.malloc %d (%s)\n",
-                    __progname, gMallocDebugLevel, prefix);
+template<typename FunctionType>
+void InitMallocFunction(void* malloc_impl_handler, FunctionType* func, const char* prefix, const char* suffix) {
+    char symbol[128];
+    snprintf(symbol, sizeof(symbol), "%s_%s", prefix, suffix);
+    *func = reinterpret_cast<FunctionType>(dlsym(malloc_impl_handler, symbol));
+    if (*func == NULL) {
+        error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
+    }
+}
 
-  char symbol[128];
+static void InitMalloc(void* malloc_impl_handler, MallocDebug* table, const char* prefix) {
+    __libc_format_log(ANDROID_LOG_INFO, "libc", "%s: using libc.debug.malloc %d (%s)\n",
+                      __progname, gMallocDebugLevel, prefix);
 
-  snprintf(symbol, sizeof(symbol), "%s_malloc", prefix);
-  table->malloc = reinterpret_cast<MallocDebugMalloc>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->malloc == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
-
-  snprintf(symbol, sizeof(symbol), "%s_free", prefix);
-  table->free = reinterpret_cast<MallocDebugFree>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->free == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
-
-  snprintf(symbol, sizeof(symbol), "%s_calloc", prefix);
-  table->calloc = reinterpret_cast<MallocDebugCalloc>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->calloc == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
-
-  snprintf(symbol, sizeof(symbol), "%s_realloc", prefix);
-  table->realloc = reinterpret_cast<MallocDebugRealloc>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->realloc == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
-
-  snprintf(symbol, sizeof(symbol), "%s_memalign", prefix);
-  table->memalign = reinterpret_cast<MallocDebugMemalign>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->memalign == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
+    InitMallocFunction<MallocDebugMalloc>(malloc_impl_handler, &table->malloc, prefix, "malloc");
+    InitMallocFunction<MallocDebugFree>(malloc_impl_handler, &table->free, prefix, "free");
+    InitMallocFunction<MallocDebugCalloc>(malloc_impl_handler, &table->calloc, prefix, "calloc");
+    InitMallocFunction<MallocDebugRealloc>(malloc_impl_handler, &table->realloc, prefix, "realloc");
+    InitMallocFunction<MallocDebugMemalign>(malloc_impl_handler, &table->memalign, prefix, "memalign");
+    InitMallocFunction<MallocDebugMallocUsableSize>(malloc_impl_handler, &table->malloc_usable_size, prefix, "malloc_usable_size");
 }
 
 /* Initializes memory allocation framework once per process. */
@@ -422,24 +403,24 @@
     }
 
     // Load .so that implements the required malloc debugging functionality.
-    libc_malloc_impl_handle = dlopen(so_name, RTLD_LAZY);
-    if (libc_malloc_impl_handle == NULL) {
+    void* malloc_impl_handle = dlopen(so_name, RTLD_LAZY);
+    if (malloc_impl_handle == NULL) {
         error_log("%s: Missing module %s required for malloc debug level %d: %s",
                   __progname, so_name, gMallocDebugLevel, dlerror());
         return;
     }
 
     // Initialize malloc debugging in the loaded module.
-    malloc_debug_initialize = reinterpret_cast<MallocDebugInit>(dlsym(libc_malloc_impl_handle,
+    malloc_debug_initialize = reinterpret_cast<MallocDebugInit>(dlsym(malloc_impl_handle,
                                                                       "malloc_debug_initialize"));
     if (malloc_debug_initialize == NULL) {
         error_log("%s: Initialization routine is not found in %s\n",
                   __progname, so_name);
-        dlclose(libc_malloc_impl_handle);
+        dlclose(malloc_impl_handle);
         return;
     }
     if (malloc_debug_initialize() == -1) {
-        dlclose(libc_malloc_impl_handle);
+        dlclose(malloc_impl_handle);
         return;
     }
 
@@ -447,34 +428,35 @@
         // For memory checker we need to do extra initialization.
         typedef int (*MemCheckInit)(int, const char*);
         MemCheckInit memcheck_initialize =
-            reinterpret_cast<MemCheckInit>(dlsym(libc_malloc_impl_handle,
+            reinterpret_cast<MemCheckInit>(dlsym(malloc_impl_handle,
                                                  "memcheck_initialize"));
         if (memcheck_initialize == NULL) {
             error_log("%s: memcheck_initialize routine is not found in %s\n",
                       __progname, so_name);
-            dlclose(libc_malloc_impl_handle);
+            dlclose(malloc_impl_handle);
             return;
         }
 
         if (memcheck_initialize(MALLOC_ALIGNMENT, memcheck_tracing)) {
-            dlclose(libc_malloc_impl_handle);
+            dlclose(malloc_impl_handle);
             return;
         }
     }
 
+
     // Initialize malloc dispatch table with appropriate routines.
     switch (gMallocDebugLevel) {
         case 1:
-            InitMalloc(&gMallocUse, "leak");
+            InitMalloc(malloc_impl_handle, &gMallocUse, "leak");
             break;
         case 5:
-            InitMalloc(&gMallocUse, "fill");
+            InitMalloc(malloc_impl_handle, &gMallocUse, "fill");
             break;
         case 10:
-            InitMalloc(&gMallocUse, "chk");
+            InitMalloc(malloc_impl_handle, &gMallocUse, "chk");
             break;
         case 20:
-            InitMalloc(&gMallocUse, "qemu_instrumented");
+            InitMalloc(malloc_impl_handle, &gMallocUse, "qemu_instrumented");
             break;
         default:
             break;
@@ -485,13 +467,14 @@
         (gMallocUse.free == NULL) ||
         (gMallocUse.calloc == NULL) ||
         (gMallocUse.realloc == NULL) ||
-        (gMallocUse.memalign == NULL)) {
+        (gMallocUse.memalign == NULL) ||
+        (gMallocUse.malloc_usable_size == NULL)) {
         error_log("%s: some symbols for libc.debug.malloc level %d were not found (see above)",
                   __progname, gMallocDebugLevel);
-        dlclose(libc_malloc_impl_handle);
-        libc_malloc_impl_handle = NULL;
+        dlclose(malloc_impl_handle);
     } else {
         __libc_malloc_dispatch = &gMallocUse;
+        libc_malloc_impl_handle = malloc_impl_handle;
     }
 }
 
@@ -524,7 +507,7 @@
  * This routine is called from __libc_init routines implemented
  * in libc_init_static.c and libc_init_dynamic.c files.
  */
-extern "C" void malloc_debug_init() {
+extern "C" __LIBC_HIDDEN__ void malloc_debug_init() {
     /* We need to initialize malloc iff we implement here custom
      * malloc routines (i.e. USE_DL_PREFIX is defined) for libc.so */
 #if defined(USE_DL_PREFIX) && !defined(LIBC_STATIC)
@@ -534,7 +517,7 @@
 #endif  // USE_DL_PREFIX && !LIBC_STATIC
 }
 
-extern "C" void malloc_debug_fini() {
+extern "C" __LIBC_HIDDEN__ void malloc_debug_fini() {
     /* We need to finalize malloc iff we implement here custom
      * malloc routines (i.e. USE_DL_PREFIX is defined) for libc.so */
 #if defined(USE_DL_PREFIX) && !defined(LIBC_STATIC)
diff --git a/libc/bionic/malloc_debug_common.h b/libc/bionic/malloc_debug_common.h
index 12d0e65..a3f9909 100644
--- a/libc/bionic/malloc_debug_common.h
+++ b/libc/bionic/malloc_debug_common.h
@@ -45,6 +45,11 @@
 
 #define MAX_SIZE_T           (~(size_t)0)
 
+// This must match the alignment used by dlmalloc.
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+#endif
+
 // =============================================================================
 // Structures
 // =============================================================================
@@ -71,12 +76,14 @@
 typedef void* (*MallocDebugCalloc)(size_t, size_t);
 typedef void* (*MallocDebugRealloc)(void*, size_t);
 typedef void* (*MallocDebugMemalign)(size_t, size_t);
+typedef size_t (*MallocDebugMallocUsableSize)(const void*);
 struct MallocDebug {
   MallocDebugMalloc malloc;
   MallocDebugFree free;
   MallocDebugCalloc calloc;
   MallocDebugRealloc realloc;
   MallocDebugMemalign memalign;
+  MallocDebugMallocUsableSize malloc_usable_size;
 };
 
 /* Malloc debugging initialization and finalization routines.
diff --git a/libc/bionic/malloc_debug_leak.cpp b/libc/bionic/malloc_debug_leak.cpp
index 2db8a1f..45b45c2 100644
--- a/libc/bionic/malloc_debug_leak.cpp
+++ b/libc/bionic/malloc_debug_leak.cpp
@@ -67,9 +67,6 @@
 // stack trace functions
 // =============================================================================
 
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT    ((size_t)8U)
-#endif
 #define GUARD               0x48151642
 #define DEBUG               0
 
@@ -80,12 +77,16 @@
 struct AllocationEntry {
     HashEntry* entry;
     uint32_t guard;
-};
+} __attribute__((aligned(MALLOC_ALIGNMENT)));
 
-static AllocationEntry* to_header(void* mem) {
+static inline AllocationEntry* to_header(void* mem) {
   return reinterpret_cast<AllocationEntry*>(mem) - 1;
 }
 
+static inline const AllocationEntry* const_to_header(const void* mem) {
+  return reinterpret_cast<const AllocationEntry*>(mem) - 1;
+}
+
 // =============================================================================
 // Hash Table functions
 // =============================================================================
@@ -229,17 +230,16 @@
 }
 
 extern "C" void* fill_realloc(void* mem, size_t bytes) {
-    void* buffer = fill_malloc(bytes);
-    if (mem == NULL) {
-        return buffer;
+    size_t oldSize = dlmalloc_usable_size(mem);
+    void* newMem = dlrealloc(mem, bytes);
+    if (newMem) {
+        // If this is larger than before, fill the extra with our pattern.
+        size_t newSize = dlmalloc_usable_size(newMem);
+        if (newSize > oldSize) {
+            memset(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(newMem)+oldSize), CHK_FILL_FREE, newSize-oldSize);
+        }
     }
-    if (buffer) {
-        size_t old_size = dlmalloc_usable_size(mem);
-        size_t size = (bytes < old_size)?(bytes):(old_size);
-        memcpy(buffer, mem, size);
-        fill_free(mem);
-    }
-    return buffer;
+    return newMem;
 }
 
 extern "C" void* fill_memalign(size_t alignment, size_t bytes) {
@@ -250,11 +250,17 @@
     return buffer;
 }
 
+extern "C" size_t fill_malloc_usable_size(const void* mem) {
+    // Since we didn't allocate extra bytes before or after, we can
+    // report the normal usable size here.
+    return dlmalloc_usable_size(mem);
+}
+
 // =============================================================================
 // malloc leak functions
 // =============================================================================
 
-static void* MEMALIGN_GUARD = reinterpret_cast<void*>(0xA1A41520);
+static uint32_t MEMALIGN_GUARD      = 0xA1A41520;
 
 extern "C" void* leak_malloc(size_t bytes) {
     // allocate enough space infront of the allocation to store the pointer for
@@ -296,9 +302,10 @@
 
         if (header->guard != GUARD) {
             // could be a memaligned block
-            if (reinterpret_cast<void**>(mem)[-1] == MEMALIGN_GUARD) {
-                mem = reinterpret_cast<void**>(mem)[-2];
-                header = to_header(mem);
+            if (header->guard == MEMALIGN_GUARD) {
+                // For memaligned blocks, header->entry points to the memory
+                // allocated through leak_malloc.
+                header = to_header(header->entry);
             }
         }
 
@@ -338,19 +345,26 @@
     if (oldMem == NULL) {
         return leak_malloc(bytes);
     }
+
     void* newMem = NULL;
     AllocationEntry* header = to_header(oldMem);
-    if (header && header->guard == GUARD) {
-        size_t oldSize = header->entry->size & ~SIZE_FLAG_MASK;
-        newMem = leak_malloc(bytes);
-        if (newMem != NULL) {
-            size_t copySize = (oldSize <= bytes) ? oldSize : bytes;
-            memcpy(newMem, oldMem, copySize);
-            leak_free(oldMem);
-        }
-    } else {
-        newMem = dlrealloc(oldMem, bytes);
+    if (header->guard == MEMALIGN_GUARD) {
+        // Get the real header.
+        header = to_header(header->entry);
+    } else if (header->guard != GUARD) {
+        debug_log("WARNING bad header guard: '0x%x'! and invalid entry: %p\n",
+                   header->guard, header->entry);
+        return NULL;
     }
+
+    newMem = leak_malloc(bytes);
+    if (newMem != NULL) {
+        size_t oldSize = header->entry->size & ~SIZE_FLAG_MASK;
+        size_t copySize = (oldSize <= bytes) ? oldSize : bytes;
+        memcpy(newMem, oldMem, copySize);
+    }
+    leak_free(oldMem);
+
     return newMem;
 }
 
@@ -375,7 +389,7 @@
 
     void* base = leak_malloc(size);
     if (base != NULL) {
-        intptr_t ptr = reinterpret_cast<intptr_t>(base);
+        uintptr_t ptr = reinterpret_cast<uintptr_t>(base);
         if ((ptr % alignment) == 0) {
             return base;
         }
@@ -383,11 +397,38 @@
         // align the pointer
         ptr += ((-ptr) % alignment);
 
-        // there is always enough space for the base pointer and the guard
-        reinterpret_cast<void**>(ptr)[-1] = MEMALIGN_GUARD;
-        reinterpret_cast<void**>(ptr)[-2] = base;
+        // Already allocated enough space for the header. This assumes
+        // that the malloc alignment is at least 8, otherwise, this is
+        // not guaranteed to have the space for the header.
+        AllocationEntry* header = to_header(reinterpret_cast<void*>(ptr));
+        header->guard = MEMALIGN_GUARD;
+        header->entry = reinterpret_cast<HashEntry*>(base);
 
         return reinterpret_cast<void*>(ptr);
     }
     return base;
 }
+
+extern "C" size_t leak_malloc_usable_size(const void* mem) {
+    if (mem != NULL) {
+        // Check the guard to make sure it is valid.
+        const AllocationEntry* header = const_to_header((void*)mem);
+
+        if (header->guard == MEMALIGN_GUARD) {
+            // If this is a memalign'd pointer, then grab the header from
+            // entry.
+            header = const_to_header(header->entry);
+        } else if (header->guard != GUARD) {
+            debug_log("WARNING bad header guard: '0x%x'! and invalid entry: %p\n",
+                      header->guard, header->entry);
+            return 0;
+        }
+
+        size_t ret = dlmalloc_usable_size(header);
+        if (ret != 0) {
+            // The usable area starts at 'mem' and stops at 'header+ret'.
+            return reinterpret_cast<uintptr_t>(header) + ret - reinterpret_cast<uintptr_t>(mem);
+        }
+    }
+    return 0;
+}
diff --git a/libc/bionic/malloc_debug_qemu.cpp b/libc/bionic/malloc_debug_qemu.cpp
index 34ddb87..4c666a9 100644
--- a/libc/bionic/malloc_debug_qemu.cpp
+++ b/libc/bionic/malloc_debug_qemu.cpp
@@ -137,7 +137,7 @@
      * will respond with information about allocated block that contains this
      * pointer.
      */
-    void*       ptr;
+    const void*       ptr;
 
     /* Id of the process that initialized libc instance, in which this query
      * is called. This field is used by the emulator to report errors in
@@ -469,7 +469,7 @@
  * Return:
  *  Zero on success, or -1 on failure.
  */
-static inline int query_qemu_malloc_info(void* ptr, MallocDesc* desc, uint32_t routine) {
+static inline int query_qemu_malloc_info(const void* ptr, MallocDesc* desc, uint32_t routine) {
     volatile MallocDescQuery query;
 
     query.ptr = ptr;
@@ -574,11 +574,12 @@
 // API routines
 // =============================================================================
 
-void* qemu_instrumented_malloc(size_t bytes);
-void  qemu_instrumented_free(void* mem);
-void* qemu_instrumented_calloc(size_t n_elements, size_t elem_size);
-void* qemu_instrumented_realloc(void* mem, size_t bytes);
-void* qemu_instrumented_memalign(size_t alignment, size_t bytes);
+extern "C" void* qemu_instrumented_malloc(size_t bytes);
+extern "C" void  qemu_instrumented_free(void* mem);
+extern "C" void* qemu_instrumented_calloc(size_t n_elements, size_t elem_size);
+extern "C" void* qemu_instrumented_realloc(void* mem, size_t bytes);
+extern "C" void* qemu_instrumented_memalign(size_t alignment, size_t bytes);
+extern "C" size_t qemu_instrumented_malloc_usable_size(const void* mem);
 
 /* Initializes malloc debugging instrumentation for the emulator.
  * This routine is called from malloc_init_impl routine implemented in
@@ -589,7 +590,7 @@
  * Return:
  *  0 on success, or -1 on failure.
 */
-int malloc_debug_initialize() {
+extern "C" int malloc_debug_initialize() {
     /* We will be using emulator's magic page to report memory allocation
      * activities. In essence, what magic page does, it translates writes to
      * the memory mapped spaces into writes to an I/O port that emulator
@@ -627,7 +628,7 @@
  * Return:
  *  0 on success, or -1 on failure.
 */
-int memcheck_initialize(int alignment, const char* memcheck_param) {
+extern "C" int memcheck_initialize(int alignment, const char* memcheck_param) {
     malloc_alignment = alignment;
 
     /* Parse -memcheck parameter for the guest tracing flags. */
@@ -673,7 +674,7 @@
  * bytes (plus prefix, and suffix guards), and report allocation to the
  * emulator.
  */
-void* qemu_instrumented_malloc(size_t bytes) {
+extern "C" void* qemu_instrumented_malloc(size_t bytes) {
     MallocDesc desc;
 
     /* Initialize block descriptor and allocate memory. Note that dlmalloc
@@ -708,7 +709,7 @@
  * Primary responsibility of this routine is to free requested memory, and
  * report free block to the emulator.
  */
-void qemu_instrumented_free(void* mem) {
+extern "C" void qemu_instrumented_free(void* mem) {
     MallocDesc desc;
 
     if (mem == NULL) {
@@ -751,7 +752,7 @@
 /* This routine serves as entry point for 'calloc'.
  * This routine behaves similarly to qemu_instrumented_malloc.
  */
-void* qemu_instrumented_calloc(size_t n_elements, size_t elem_size) {
+extern "C" void* qemu_instrumented_calloc(size_t n_elements, size_t elem_size) {
     if (n_elements == 0 || elem_size == 0) {
         // Just let go zero bytes allocation.
         qemu_info_log("::: <libc_pid=%03u, pid=%03u>: Zero calloc redir to malloc",
@@ -823,7 +824,7 @@
  * allocation, but overall it doesn't seem to matter, as caller of realloc
  * should not expect that pointer returned after shrinking will remain the same.
  */
-void* qemu_instrumented_realloc(void* mem, size_t bytes) {
+extern "C" void* qemu_instrumented_realloc(void* mem, size_t bytes) {
     MallocDesc new_desc;
     MallocDesc cur_desc;
     size_t to_copy;
@@ -927,7 +928,7 @@
 /* This routine serves as entry point for 'memalign'.
  * This routine behaves similarly to qemu_instrumented_malloc.
  */
-void* qemu_instrumented_memalign(size_t alignment, size_t bytes) {
+extern "C" void* qemu_instrumented_memalign(size_t alignment, size_t bytes) {
     MallocDesc desc;
 
     if (bytes == 0) {
@@ -967,3 +968,27 @@
               malloc_pid, getpid(), alignment, bytes);
     return mallocdesc_user_ptr(&desc);
 }
+
+extern "C" size_t qemu_instrumented_malloc_usable_size(const void* mem) {
+    MallocDesc cur_desc;
+
+    // Query emulator for the reallocating block information.
+    if (query_qemu_malloc_info(mem, &cur_desc, 2)) {
+        // Note that this violation should be already caught in the emulator.
+        error_log("<libc_pid=%03u, pid=%03u>: malloc_usable_size(%p) query_info failed.",
+                  malloc_pid, getpid(), mem);
+        return 0;
+    }
+
+    /* Make sure that reallocating pointer value is what we would expect
+     * for this memory block. Note that this violation should be already caught
+     * in the emulator.*/
+    if (mem != mallocdesc_user_ptr(&cur_desc)) {
+        log_mdesc(error, &cur_desc, "<libc_pid=%03u, pid=%03u>: malloc_usable_size(%p) is invalid for ",
+                  malloc_pid, getpid(), mem);
+        return 0;
+    }
+
+    /* during instrumentation, we can't really report anything more than requested_bytes */
+    return cur_desc.requested_bytes;
+}
diff --git a/libc/bionic/mmap.c b/libc/bionic/mmap.cpp
similarity index 66%
rename from libc/bionic/mmap.c
rename to libc/bionic/mmap.cpp
index 40a6538..febc459 100644
--- a/libc/bionic/mmap.c
+++ b/libc/bionic/mmap.cpp
@@ -25,19 +25,31 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-#include <unistd.h>
+
 #include <errno.h>
 #include <sys/mman.h>
+#include <unistd.h>
 
-extern void*  __mmap2(void*, size_t, int, int, int, size_t);
+#include "private/ErrnoRestorer.h"
 
-#define  MMAP2_SHIFT  12
-void* mmap(void *addr, size_t size, int prot, int flags, int fd, long offset)
-{
-    if (offset & ((1UL << MMAP2_SHIFT)-1)) {
-        errno = EINVAL;
-        return MAP_FAILED;
-    }
+// mmap2(2) is like mmap(2), but the offset is in 4096-byte blocks, not bytes.
+extern "C" void*  __mmap2(void*, size_t, int, int, int, size_t);
 
-    return __mmap2(addr, size, prot, flags, fd, (size_t)offset >> MMAP2_SHIFT);
+#define MMAP2_SHIFT 12 // 2**12 == 4096
+
+void* mmap(void* addr, size_t size, int prot, int flags, int fd, off_t offset) {
+  if (offset & ((1UL << MMAP2_SHIFT)-1)) {
+    errno = EINVAL;
+    return MAP_FAILED;
+  }
+
+  size_t unsigned_offset = static_cast<size_t>(offset); // To avoid sign extension.
+  void* result = __mmap2(addr, size, prot, flags, fd, unsigned_offset >> MMAP2_SHIFT);
+
+  if (result != MAP_FAILED && (flags & (MAP_PRIVATE | MAP_ANONYMOUS)) != 0) {
+    ErrnoRestorer errno_restorer;
+    madvise(result, size, MADV_MERGEABLE);
+  }
+
+  return result;
 }
diff --git a/libc/bionic/open.c b/libc/bionic/open.c
index e3573a3..424573f 100644
--- a/libc/bionic/open.c
+++ b/libc/bionic/open.c
@@ -52,7 +52,7 @@
 }
 
 int __open_2(const char *pathname, int flags) {
-    if (flags & O_CREAT) {
+    if (__predict_false(flags & O_CREAT)) {
         __fortify_chk_fail("open(O_CREAT) called without specifying a mode", 0);
     }
 
diff --git a/libc/bionic/pthread-rwlocks.c b/libc/bionic/pthread-rwlocks.c
index deee577..59e2248 100644
--- a/libc/bionic/pthread-rwlocks.c
+++ b/libc/bionic/pthread-rwlocks.c
@@ -53,9 +53,6 @@
  *
  */
 
-#define  __likely(cond)    __builtin_expect(!!(cond), 1)
-#define  __unlikely(cond)  __builtin_expect(!!(cond), 0)
-
 #define  RWLOCKATTR_DEFAULT     0
 #define  RWLOCKATTR_SHARED_MASK 0x0010
 
@@ -212,7 +209,7 @@
         return EINVAL;
 
     pthread_mutex_lock(&rwlock->lock);
-    if (__unlikely(!read_precondition(rwlock, __get_thread()->tid)))
+    if (__predict_false(!read_precondition(rwlock, __get_thread()->tid)))
         ret = EBUSY;
     else
         rwlock->numLocks ++;
@@ -230,7 +227,7 @@
 
     pthread_mutex_lock(&rwlock->lock);
     int tid = __get_thread()->tid;
-    if (__unlikely(!read_precondition(rwlock, tid))) {
+    if (__predict_false(!read_precondition(rwlock, tid))) {
         rwlock->pendingReaders += 1;
         do {
             ret = pthread_cond_timedwait(&rwlock->cond, &rwlock->lock, abs_timeout);
@@ -260,7 +257,7 @@
 
     pthread_mutex_lock(&rwlock->lock);
     int tid = __get_thread()->tid;
-    if (__unlikely(!write_precondition(rwlock, tid))) {
+    if (__predict_false(!write_precondition(rwlock, tid))) {
         ret = EBUSY;
     } else {
         rwlock->numLocks ++;
@@ -279,7 +276,7 @@
 
     pthread_mutex_lock(&rwlock->lock);
     int tid = __get_thread()->tid;
-    if (__unlikely(!write_precondition(rwlock, tid))) {
+    if (__predict_false(!write_precondition(rwlock, tid))) {
         /* If we can't read yet, wait until the rwlock is unlocked
          * and try again. Increment pendingReaders to get the
          * cond broadcast when that happens.
diff --git a/libc/bionic/pthread-timers.c b/libc/bionic/pthread-timers.c
index 23d31df..d81bfef 100644
--- a/libc/bionic/pthread-timers.c
+++ b/libc/bionic/pthread-timers.c
@@ -33,6 +33,12 @@
 #include <stdio.h>
 #include <string.h>
 
+extern int __pthread_cond_timedwait(pthread_cond_t*, pthread_mutex_t*, const struct timespec*,
+                                    clockid_t);
+
+extern int __pthread_cond_timedwait_relative(pthread_cond_t*, pthread_mutex_t*,
+                                             const struct timespec*);
+
 // Normal (i.e. non-SIGEV_THREAD) timers are created directly by the kernel
 // and are passed as is to/from the caller.
 //
@@ -81,9 +87,6 @@
 /* the maximum value of overrun counters */
 #define  DELAYTIMER_MAX    0x7fffffff
 
-#define  __likely(x)   __builtin_expect(!!(x),1)
-#define  __unlikely(x) __builtin_expect(!!(x),0)
-
 typedef struct thr_timer          thr_timer_t;
 typedef struct thr_timer_table    thr_timer_table_t;
 
@@ -282,6 +285,49 @@
     pthread_mutex_unlock(&t->mutex);
 }
 
+
+static __inline__ void timespec_add(struct timespec* a, const struct timespec* b) {
+  a->tv_sec  += b->tv_sec;
+  a->tv_nsec += b->tv_nsec;
+  if (a->tv_nsec >= 1000000000) {
+    a->tv_nsec -= 1000000000;
+    a->tv_sec  += 1;
+  }
+}
+
+static __inline__ void timespec_sub(struct timespec* a, const struct timespec* b) {
+  a->tv_sec  -= b->tv_sec;
+  a->tv_nsec -= b->tv_nsec;
+  if (a->tv_nsec < 0) {
+    a->tv_nsec += 1000000000;
+    a->tv_sec  -= 1;
+  }
+}
+
+static __inline__ void timespec_zero(struct timespec* a) {
+  a->tv_sec = a->tv_nsec = 0;
+}
+
+static __inline__ int timespec_is_zero(const struct timespec* a) {
+  return (a->tv_sec == 0 && a->tv_nsec == 0);
+}
+
+static __inline__ int timespec_cmp(const struct timespec* a, const struct timespec* b) {
+  if (a->tv_sec  < b->tv_sec)  return -1;
+  if (a->tv_sec  > b->tv_sec)  return +1;
+  if (a->tv_nsec < b->tv_nsec) return -1;
+  if (a->tv_nsec > b->tv_nsec) return +1;
+  return 0;
+}
+
+static __inline__ int timespec_cmp0(const struct timespec* a) {
+  if (a->tv_sec < 0) return -1;
+  if (a->tv_sec > 0) return +1;
+  if (a->tv_nsec < 0) return -1;
+  if (a->tv_nsec > 0) return +1;
+  return 0;
+}
+
 /** POSIX TIMERS APIs */
 
 extern int __timer_create(clockid_t, struct sigevent*, timer_t*);
@@ -294,7 +340,7 @@
 
 int timer_create(clockid_t clock_id, struct sigevent* evp, timer_t* timer_id) {
   // If not a SIGEV_THREAD timer, the kernel can handle it without our help.
-  if (__likely(evp == NULL || evp->sigev_notify != SIGEV_THREAD)) {
+  if (__predict_true(evp == NULL || evp->sigev_notify != SIGEV_THREAD)) {
     return __timer_create(clock_id, evp, timer_id);
   }
 
@@ -360,7 +406,7 @@
 int
 timer_delete( timer_t  id )
 {
-    if ( __likely(!TIMER_ID_IS_WRAPPED(id)) )
+    if ( __predict_true(!TIMER_ID_IS_WRAPPED(id)) )
         return __timer_delete( id );
     else
     {
@@ -422,7 +468,7 @@
         return -1;
     }
 
-    if ( __likely(!TIMER_ID_IS_WRAPPED(id)) ) {
+    if ( __predict_true(!TIMER_ID_IS_WRAPPED(id)) ) {
         return __timer_gettime( id, ospec );
     } else {
         thr_timer_t*  timer = thr_timer_from_id(id);
@@ -450,7 +496,7 @@
         return -1;
     }
 
-    if ( __likely(!TIMER_ID_IS_WRAPPED(id)) ) {
+    if ( __predict_true(!TIMER_ID_IS_WRAPPED(id)) ) {
         return __timer_settime( id, flags, spec, ospec );
     } else {
         thr_timer_t*        timer = thr_timer_from_id(id);
@@ -494,7 +540,7 @@
 int
 timer_getoverrun(timer_t  id)
 {
-    if ( __likely(!TIMER_ID_IS_WRAPPED(id)) ) {
+    if ( __predict_true(!TIMER_ID_IS_WRAPPED(id)) ) {
         return __timer_getoverrun( id );
     } else {
         thr_timer_t*  timer = thr_timer_from_id(id);
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index e30fa9d..92e2c27 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -31,6 +31,7 @@
 #include <errno.h>
 #include <limits.h>
 #include <sys/atomics.h>
+#include <sys/mman.h>
 #include <unistd.h>
 
 #include "bionic_atomic_inline.h"
@@ -56,19 +57,6 @@
     return __futex_syscall4(ftx, pshared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE, val, timeout);
 }
 
-#define  __likely(cond)    __builtin_expect(!!(cond), 1)
-#define  __unlikely(cond)  __builtin_expect(!!(cond), 0)
-
-void*
-__get_stack_base(int  *p_stack_size)
-{
-    pthread_internal_t*  thread = __get_thread();
-
-    *p_stack_size = thread->attr.stack_size;
-    return thread->attr.stack_base;
-}
-
-
 /* CAVEAT: our implementation of pthread_cleanup_push/pop doesn't support C++ exceptions
  *         and thread cancelation
  */
@@ -115,6 +103,18 @@
     // space (see pthread_key_delete)
     pthread_key_clean_all();
 
+    if (thread->alternate_signal_stack != NULL) {
+      // Tell the kernel to stop using the alternate signal stack.
+      stack_t ss;
+      ss.ss_sp = NULL;
+      ss.ss_flags = SS_DISABLE;
+      sigaltstack(&ss, NULL);
+
+      // Free it.
+      munmap(thread->alternate_signal_stack, SIGSTKSZ);
+      thread->alternate_signal_stack = NULL;
+    }
+
     // if the thread is detached, destroy the pthread_internal_t
     // otherwise, keep it in memory and signal any joiners.
     pthread_mutex_lock(&gThreadListLock);
@@ -130,23 +130,13 @@
             thread->tls = NULL;
         }
 
-       /* the join_count field is used to store the number of threads waiting for
-        * the termination of this thread with pthread_join(),
-        *
-        * if it is positive we need to signal the waiters, and we do not touch
-        * the count (it will be decremented by the waiters, the last one will
-        * also remove/free the thread structure
-        *
-        * if it is zero, we set the count value to -1 to indicate that the
-        * thread is in 'zombie' state: it has stopped executing, and its stack
-        * is gone (as well as its TLS area). when another thread calls pthread_join()
-        * on it, it will immediately free the thread and return.
-        */
+       /* Indicate that the thread has exited for joining threads. */
+        thread->attr.flags |= PTHREAD_ATTR_FLAG_ZOMBIE;
         thread->return_value = retval;
-        if (thread->join_count > 0) {
-            pthread_cond_broadcast(&thread->join_cond);
-        } else {
-            thread->join_count = -1;  /* zombie thread */
+
+       /* Signal the joining thread if present. */
+        if (thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) {
+            pthread_cond_signal(&thread->join_cond);
         }
     }
     pthread_mutex_unlock(&gThreadListLock);
@@ -409,7 +399,7 @@
     if (mutex == NULL)
         return EINVAL;
 
-    if (__likely(attr == NULL)) {
+    if (__predict_true(attr == NULL)) {
         mutex->value = MUTEX_TYPE_BITS_NORMAL;
         return 0;
     }
@@ -575,7 +565,7 @@
     for (;;) {
         /* increment counter, overflow was already checked */
         int newval = mvalue + MUTEX_COUNTER_BITS_ONE;
-        if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
+        if (__predict_true(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
             /* mutex is still locked, not need for a memory barrier */
             return 0;
         }
@@ -592,7 +582,7 @@
 {
     int mvalue, mtype, tid, shared;
 
-    if (__unlikely(mutex == NULL))
+    if (__predict_false(mutex == NULL))
         return EINVAL;
 
     mvalue = mutex->value;
@@ -600,7 +590,7 @@
     shared = (mvalue & MUTEX_SHARED_MASK);
 
     /* Handle normal case first */
-    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
+    if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
         _normal_lock(mutex, shared);
         return 0;
     }
@@ -641,7 +631,7 @@
              *        implement it to get rid of the explicit memory
              *        barrier below.
              */
-            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
+            if (__predict_false(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
                 mvalue = mutex->value;
                 continue;
             }
@@ -653,7 +643,7 @@
          * we will change it to 2 to indicate contention. */
         if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
             newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue); /* locked state 1 => state 2 */
-            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
+            if (__predict_false(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
                 mvalue = mutex->value;
                 continue;
             }
@@ -686,7 +676,7 @@
 {
     int mvalue, mtype, tid, shared;
 
-    if (__unlikely(mutex == NULL))
+    if (__predict_false(mutex == NULL))
         return EINVAL;
 
     mvalue = mutex->value;
@@ -694,7 +684,7 @@
     shared = (mvalue & MUTEX_SHARED_MASK);
 
     /* Handle common case first */
-    if (__likely(mtype == MUTEX_TYPE_BITS_NORMAL)) {
+    if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
         _normal_unlock(mutex, shared);
         return 0;
     }
@@ -711,7 +701,7 @@
     if (!MUTEX_COUNTER_BITS_IS_ZERO(mvalue)) {
         for (;;) {
             int newval = mvalue - MUTEX_COUNTER_BITS_ONE;
-            if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
+            if (__predict_true(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
                 /* success: we still own the mutex, so no memory barrier */
                 return 0;
             }
@@ -753,7 +743,7 @@
 {
     int mvalue, mtype, tid, shared;
 
-    if (__unlikely(mutex == NULL))
+    if (__predict_false(mutex == NULL))
         return EINVAL;
 
     mvalue = mutex->value;
@@ -761,7 +751,7 @@
     shared = (mvalue & MUTEX_SHARED_MASK);
 
     /* Handle common case first */
-    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) )
+    if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) )
     {
         if (__bionic_cmpxchg(shared|MUTEX_STATE_BITS_UNLOCKED,
                              shared|MUTEX_STATE_BITS_LOCKED_UNCONTENDED,
@@ -785,7 +775,7 @@
     mtype |= shared | MUTEX_STATE_BITS_UNLOCKED;
     mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
 
-    if (__likely(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) {
+    if (__predict_true(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) {
         ANDROID_MEMBAR_FULL();
         return 0;
     }
@@ -851,7 +841,7 @@
     /* compute absolute expiration time */
     __timespec_to_relative_msec(&abstime, msecs, clock);
 
-    if (__unlikely(mutex == NULL))
+    if (__predict_false(mutex == NULL))
         return EINVAL;
 
     mvalue = mutex->value;
@@ -859,7 +849,7 @@
     shared = (mvalue & MUTEX_SHARED_MASK);
 
     /* Handle common case first */
-    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) )
+    if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) )
     {
         const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
         const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
@@ -896,7 +886,7 @@
     /* first try a quick lock */
     if (mvalue == mtype) {
         mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-        if (__likely(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) {
+        if (__predict_true(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
@@ -1073,7 +1063,7 @@
 {
     long flags;
 
-    if (__unlikely(cond == NULL))
+    if (__predict_false(cond == NULL))
         return EINVAL;
 
     flags = (cond->value & ~COND_COUNTER_MASK);
@@ -1214,7 +1204,7 @@
     * stores performed by the initialization function are observable on
     * this CPU after we exit.
     */
-    if (__likely((*ocptr & ONCE_COMPLETED) != 0)) {
+    if (__predict_true((*ocptr & ONCE_COMPLETED) != 0)) {
         ANDROID_MEMBAR_FULL();
         return 0;
     }
diff --git a/libc/bionic/pthread_attr.cpp b/libc/bionic/pthread_attr.cpp
index c47f95e..d7c6c13 100644
--- a/libc/bionic/pthread_attr.cpp
+++ b/libc/bionic/pthread_attr.cpp
@@ -30,12 +30,16 @@
 
 #include "pthread_internal.h"
 
-#define DEFAULT_STACK_SIZE (1024 * 1024)
+// Traditionally we give threads a 1MiB stack. When we started allocating per-thread
+// alternate signal stacks to ease debugging of stack overflows, we subtracted the
+// same amount we were using there from the default thread stack size. This should
+// keep memory usage roughly constant.
+#define DEFAULT_THREAD_STACK_SIZE ((1 * 1024 * 1024) - SIGSTKSZ)
 
 int pthread_attr_init(pthread_attr_t* attr) {
   attr->flags = 0;
   attr->stack_base = NULL;
-  attr->stack_size = DEFAULT_STACK_SIZE;
+  attr->stack_size = DEFAULT_THREAD_STACK_SIZE;
   attr->guard_size = PAGE_SIZE;
   attr->sched_policy = SCHED_NORMAL;
   attr->sched_priority = 0;
@@ -84,7 +88,7 @@
 }
 
 int pthread_attr_setstacksize(pthread_attr_t* attr, size_t stack_size) {
-  if ((stack_size & (PAGE_SIZE - 1) || stack_size < PTHREAD_STACK_MIN)) {
+  if (stack_size < PTHREAD_STACK_MIN) {
     return EINVAL;
   }
   attr->stack_size = stack_size;
@@ -128,9 +132,6 @@
 }
 
 int pthread_attr_setguardsize(pthread_attr_t* attr, size_t guard_size) {
-  if (guard_size & (PAGE_SIZE - 1) || guard_size < PAGE_SIZE) {
-    return EINVAL;
-  }
   attr->guard_size = guard_size;
   return 0;
 }
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 70a9bf5..63695d3 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -69,9 +69,22 @@
   thread->tls[TLS_SLOT_STACK_GUARD] = (void*) __stack_chk_guard;
 
   __set_tls(thread->tls);
+
+  // Create and set an alternate signal stack.
+  // This must happen after __set_tls, in case a system call fails and tries to set errno.
+  stack_t ss;
+  ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+  if (ss.ss_sp != MAP_FAILED) {
+    ss.ss_size = SIGSTKSZ;
+    ss.ss_flags = 0;
+    sigaltstack(&ss, NULL);
+    thread->alternate_signal_stack = ss.ss_sp;
+  }
 }
 
-// This trampoline is called from the assembly _pthread_clone() function.
+// This trampoline is called from the assembly _pthread_clone function.
+// Our 'tls' and __pthread_clone's 'child_stack' are one and the same, just growing in
+// opposite directions.
 extern "C" void __thread_entry(void* (*func)(void*), void* arg, void** tls) {
   // Wait for our creating thread to release us. This lets it have time to
   // notify gdb about this thread before we start doing anything.
@@ -104,13 +117,12 @@
     if (sched_setscheduler(thread->tid, thread->attr.sched_policy, &param) == -1) {
       // For backwards compatibility reasons, we just warn about failures here.
       // error = errno;
-      const char* msg = "pthread_create sched_setscheduler call failed: %s\n";
-      __libc_format_log(ANDROID_LOG_WARN, "libc", msg, strerror(errno));
+      __libc_format_log(ANDROID_LOG_WARN, "libc",
+                        "pthread_create sched_setscheduler call failed: %s", strerror(errno));
     }
   }
 
   pthread_cond_init(&thread->join_cond, NULL);
-  thread->join_count = 0;
   thread->cleanup_stack = NULL;
 
   if (add_to_thread_list) {
@@ -120,20 +132,27 @@
   return error;
 }
 
-static void* __create_thread_stack(size_t stack_size, size_t guard_size) {
+static void* __create_thread_stack(pthread_internal_t* thread) {
   ScopedPthreadMutexLocker lock(&gPthreadStackCreationLock);
 
   // Create a new private anonymous map.
   int prot = PROT_READ | PROT_WRITE;
   int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-  void* stack = mmap(NULL, stack_size, prot, flags, -1, 0);
+  void* stack = mmap(NULL, thread->attr.stack_size, prot, flags, -1, 0);
   if (stack == MAP_FAILED) {
+    __libc_format_log(ANDROID_LOG_WARN,
+                      "libc",
+                      "pthread_create failed: couldn't allocate %zd-byte stack: %s",
+                      thread->attr.stack_size, strerror(errno));
     return NULL;
   }
 
   // Set the guard region at the end of the stack to PROT_NONE.
-  if (mprotect(stack, guard_size, PROT_NONE) == -1) {
-    munmap(stack, stack_size);
+  if (mprotect(stack, thread->attr.guard_size, PROT_NONE) == -1) {
+    __libc_format_log(ANDROID_LOG_WARN, "libc",
+                      "pthread_create failed: couldn't mprotect PROT_NONE %zd-byte stack guard region: %s",
+                      thread->attr.guard_size, strerror(errno));
+    munmap(stack, thread->attr.stack_size);
     return NULL;
   }
 
@@ -165,15 +184,15 @@
     attr = NULL; // Prevent misuse below.
   }
 
-  // Make sure the stack size is PAGE_SIZE aligned.
-  size_t stack_size = (thread->attr.stack_size + (PAGE_SIZE-1)) & ~(PAGE_SIZE-1);
+  // Make sure the stack size and guard size are multiples of PAGE_SIZE.
+  thread->attr.stack_size = (thread->attr.stack_size + (PAGE_SIZE-1)) & ~(PAGE_SIZE-1);
+  thread->attr.guard_size = (thread->attr.guard_size + (PAGE_SIZE-1)) & ~(PAGE_SIZE-1);
 
   if (thread->attr.stack_base == NULL) {
     // The caller didn't provide a stack, so allocate one.
-    thread->attr.stack_base = __create_thread_stack(stack_size, thread->attr.guard_size);
+    thread->attr.stack_base = __create_thread_stack(thread);
     if (thread->attr.stack_base == NULL) {
       free(thread);
-      __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: couldn't allocate %zd-byte stack", stack_size);
       return EAGAIN;
     }
   } else {
@@ -181,8 +200,12 @@
     thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_STACK;
   }
 
-  // Make room for TLS.
-  void** tls = (void**)((uint8_t*)(thread->attr.stack_base) + stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
+  // Make room for the TLS area.
+  // The child stack is the same address, just growing in the opposite direction.
+  // At offsets >= 0, we have the TLS slots.
+  // At offsets < 0, we have the child stack.
+  void** tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
+  void* child_stack = tls;
 
   // Create a mutex for the thread in TLS_SLOT_SELF to wait on once it starts so we can keep
   // it from doing anything until after we notify the debugger about it
@@ -198,11 +221,11 @@
 
   int flags = CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM;
 
-  int tid = __pthread_clone(start_routine, tls, flags, arg);
+  int tid = __pthread_clone(start_routine, child_stack, flags, arg);
   if (tid < 0) {
     int clone_errno = errno;
     if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_STACK) == 0) {
-      munmap(thread->attr.stack_base, stack_size);
+      munmap(thread->attr.stack_base, thread->attr.stack_size);
     }
     free(thread);
     __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno));
diff --git a/libc/bionic/pthread_detach.cpp b/libc/bionic/pthread_detach.cpp
index 63f5809..95f11ac 100644
--- a/libc/bionic/pthread_detach.cpp
+++ b/libc/bionic/pthread_detach.cpp
@@ -40,7 +40,7 @@
     return EINVAL; // Already detached.
   }
 
-  if (thread->join_count > 0) {
+  if (thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) {
     return 0; // Already being joined; silently do nothing, like glibc.
   }
 
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 0eb0e0a..31b8ca7 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -30,6 +30,7 @@
 
 #include <pthread.h>
 #include <stdbool.h>
+#include <sys/cdefs.h>
 
 __BEGIN_DECLS
 
@@ -41,12 +42,13 @@
     pid_t                       tid;
     bool                        allocated_on_heap;
     pthread_cond_t              join_cond;
-    int                         join_count;
     void*                       return_value;
     int                         internal_flags;
     __pthread_cleanup_t*        cleanup_stack;
     void**                      tls;         /* thread-local storage area */
 
+    void* alternate_signal_stack;
+
     /*
      * The dynamic linker implements dlerror(3), which makes it hard for us to implement this
      * per-thread buffer by simply using malloc(3) and free(3).
@@ -63,71 +65,21 @@
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
 __LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread);
 
+/* Has the thread been detached by a pthread_join or pthread_detach call? */
 #define PTHREAD_ATTR_FLAG_DETACHED      0x00000001
+
+/* Was the thread's stack allocated by the user rather than by us? */
 #define PTHREAD_ATTR_FLAG_USER_STACK    0x00000002
 
+/* Has the thread been joined by another thread? */
+#define PTHREAD_ATTR_FLAG_JOINED        0x00000004
+
+/* Has the thread already exited but not been joined? */
+#define PTHREAD_ATTR_FLAG_ZOMBIE        0x00000008
+
 __LIBC_HIDDEN__ extern pthread_internal_t* gThreadList;
 __LIBC_HIDDEN__ extern pthread_mutex_t gThreadListLock;
 
-/* needed by posix-timers.c */
-
-static __inline__ void timespec_add( struct timespec*  a, const struct timespec*  b )
-{
-    a->tv_sec  += b->tv_sec;
-    a->tv_nsec += b->tv_nsec;
-    if (a->tv_nsec >= 1000000000) {
-        a->tv_nsec -= 1000000000;
-        a->tv_sec  += 1;
-    }
-}
-
-static  __inline__ void timespec_sub( struct timespec*  a, const struct timespec*  b )
-{
-    a->tv_sec  -= b->tv_sec;
-    a->tv_nsec -= b->tv_nsec;
-    if (a->tv_nsec < 0) {
-        a->tv_nsec += 1000000000;
-        a->tv_sec  -= 1;
-    }
-}
-
-static  __inline__ void timespec_zero( struct timespec*  a )
-{
-    a->tv_sec = a->tv_nsec = 0;
-}
-
-static  __inline__ int timespec_is_zero( const struct timespec*  a )
-{
-    return (a->tv_sec == 0 && a->tv_nsec == 0);
-}
-
-static  __inline__ int timespec_cmp( const struct timespec*  a, const struct timespec*  b )
-{
-    if (a->tv_sec  < b->tv_sec)  return -1;
-    if (a->tv_sec  > b->tv_sec)  return +1;
-    if (a->tv_nsec < b->tv_nsec) return -1;
-    if (a->tv_nsec > b->tv_nsec) return +1;
-    return 0;
-}
-
-static  __inline__ int timespec_cmp0( const struct timespec*  a )
-{
-    if (a->tv_sec < 0) return -1;
-    if (a->tv_sec > 0) return +1;
-    if (a->tv_nsec < 0) return -1;
-    if (a->tv_nsec > 0) return +1;
-    return 0;
-}
-
-extern int  __pthread_cond_timedwait(pthread_cond_t*,
-                                     pthread_mutex_t*,
-                                     const struct timespec*,
-                                     clockid_t);
-
-extern int  __pthread_cond_timedwait_relative(pthread_cond_t*,
-                                              pthread_mutex_t*,
-                                              const struct timespec*);
-
 /* needed by fork.c */
 extern void __timer_table_start_stop(int  stop);
 extern void __bionic_atfork_run_prepare();
diff --git a/libc/bionic/pthread_join.cpp b/libc/bionic/pthread_join.cpp
index e6acc34..7e022c2 100644
--- a/libc/bionic/pthread_join.cpp
+++ b/libc/bionic/pthread_join.cpp
@@ -30,7 +30,7 @@
 
 #include "pthread_accessor.h"
 
-int pthread_join(pthread_t t, void ** ret_val) {
+int pthread_join(pthread_t t, void** ret_val) {
   if (t == pthread_self()) {
     return EDEADLK;
   }
@@ -44,25 +44,19 @@
     return EINVAL;
   }
 
-  // Wait for thread death when needed.
+  if (thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) {
+    return EINVAL;
+  }
 
-  // If the 'join_count' is negative, this is a 'zombie' thread that
-  // is already dead and without stack/TLS. Otherwise, we need to increment 'join-count'
-  // and wait to be signaled
-  int count = thread->join_count;
-  if (count >= 0) {
-    thread->join_count += 1;
+  // Signal our intention to join, and wait for the thread to exit.
+  thread->attr.flags |= PTHREAD_ATTR_FLAG_JOINED;
+  while ((thread->attr.flags & PTHREAD_ATTR_FLAG_ZOMBIE) == 0) {
     pthread_cond_wait(&thread->join_cond, &gThreadListLock);
-    count = --thread->join_count;
   }
   if (ret_val) {
     *ret_val = thread->return_value;
   }
 
-  // Remove thread from thread list when we're the last joiner or when the
-  // thread was already a zombie.
-  if (count <= 0) {
-    _pthread_internal_remove_locked(thread.get());
-  }
+  _pthread_internal_remove_locked(thread.get());
   return 0;
 }
diff --git a/libc/bionic/pthread_key.cpp b/libc/bionic/pthread_key.cpp
index c793fc6..2ae6519 100644
--- a/libc/bionic/pthread_key.cpp
+++ b/libc/bionic/pthread_key.cpp
@@ -212,16 +212,13 @@
   // Clear value in all threads.
   pthread_mutex_lock(&gThreadListLock);
   for (pthread_internal_t*  t = gThreadList; t != NULL; t = t->next) {
-    // Avoid zombie threads with a negative 'join_count'. These are really
-    // already dead and don't have a TLS area anymore.
-
+    // Skip zombie threads. They don't have a valid TLS area any more.
     // Similarly, it is possible to have t->tls == NULL for threads that
     // were just recently created through pthread_create() but whose
     // startup trampoline (__thread_entry) hasn't been run yet by the
-    // scheduler. t->tls will also be NULL after it's stack has been
+    // scheduler. t->tls will also be NULL after a thread's stack has been
     // unmapped but before the ongoing pthread_join() is finished.
-    // so check for this too.
-    if (t->join_count < 0 || !t->tls) {
+    if ((t->attr.flags & PTHREAD_ATTR_FLAG_ZOMBIE) || t->tls == NULL) {
       continue;
     }
 
diff --git a/libc/bionic/sched_getaffinity.c b/libc/bionic/sched_getaffinity.cpp
similarity index 76%
rename from libc/bionic/sched_getaffinity.c
rename to libc/bionic/sched_getaffinity.cpp
index 7313822..26f22b1 100644
--- a/libc/bionic/sched_getaffinity.c
+++ b/libc/bionic/sched_getaffinity.cpp
@@ -25,17 +25,21 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+
 #define _GNU_SOURCE 1
 #include <sched.h>
+#include <string.h>
 
-int  sched_getaffinity(pid_t pid, size_t setsize, cpu_set_t* set)
-{
-    int ret = __sched_getaffinity(pid, setsize, set);
-    if (ret >= 0) {
-        if ((size_t)ret < setsize) {
-            memset((char*)set + ret, '\0', setsize - (size_t)ret);
-        }
-        ret = 0;
-    }
-    return ret;
+extern "C" int __sched_getaffinity(pid_t, size_t, cpu_set_t*);
+
+int sched_getaffinity(pid_t pid, size_t set_size, cpu_set_t* set) {
+  int rc = __sched_getaffinity(pid, set_size, set);
+  if (rc == -1) {
+      return -1;
+  }
+
+  // Clear any bytes the kernel didn't touch.
+  // (The kernel returns the number of bytes written on success.)
+  memset(reinterpret_cast<char*>(set) + rc, 0, set_size - rc);
+  return 0;
 }
diff --git a/libc/bionic/statvfs.cpp b/libc/bionic/statvfs.cpp
new file mode 100644
index 0000000..5d42aaa
--- /dev/null
+++ b/libc/bionic/statvfs.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/statvfs.h>
+
+#include <sys/statfs.h>
+
+extern "C" int __statfs64(const char*, size_t, struct statfs*);
+extern "C" int __fstatfs64(int, size_t, struct statfs*);
+
+#define ST_VALID 0x0020
+
+#if defined(__mips__)
+#define __val val
+#endif
+
+static void __statfs_to_statvfs(const struct statfs& in, struct statvfs* out) {
+  out->f_bsize = in.f_bsize;
+  out->f_frsize = in.f_frsize;
+  out->f_blocks = in.f_blocks;
+  out->f_bfree = in.f_bfree;
+  out->f_bavail = in.f_bavail;
+  out->f_files = in.f_files;
+  out->f_ffree = in.f_ffree;
+  out->f_favail = in.f_ffree;
+  out->f_fsid = in.f_fsid.__val[0] | (static_cast<uint64_t>(in.f_fsid.__val[1]) << 32);
+  out->f_flag = in.f_flags & ~ST_VALID;
+  out->f_namemax = in.f_namelen;
+}
+
+int statvfs(const char* path, struct statvfs* result) {
+  struct statfs tmp;
+  int rc = __statfs64(path, sizeof(tmp), &tmp);
+  if (rc != 0) {
+    return rc;
+  }
+  __statfs_to_statvfs(tmp, result);
+  return 0;
+}
+
+int fstatvfs(int fd, struct statvfs* result) {
+  struct statfs tmp;
+  int rc = __fstatfs64(fd, sizeof(tmp), &tmp);
+  if (rc != 0) {
+    return rc;
+  }
+  __statfs_to_statvfs(tmp, result);
+  return 0;
+}
diff --git a/libc/stdio/clrerr.c b/libc/bionic/strchr.cpp
similarity index 79%
copy from libc/stdio/clrerr.c
copy to libc/bionic/strchr.cpp
index cb6c4df..e2f4471 100644
--- a/libc/stdio/clrerr.c
+++ b/libc/bionic/strchr.cpp
@@ -1,10 +1,6 @@
-/*	$OpenBSD: clrerr.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
 /*-
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -31,14 +27,8 @@
  * SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include "local.h"
-#undef	clearerr
+#include <string.h>
 
-void
-clearerr(FILE *fp)
-{
-	FLOCKFILE(fp);
-	__sclearerr(fp);
-	FUNLOCKFILE(fp);
+extern "C" char* strchr(const char* p, int ch) {
+  return __strchr_chk(p, ch, __BIONIC_FORTIFY_UNKNOWN_SIZE);
 }
diff --git a/libc/bionic/sysconf.cpp b/libc/bionic/sysconf.cpp
index 5e39e61..e945fbf 100644
--- a/libc/bionic/sysconf.cpp
+++ b/libc/bionic/sysconf.cpp
@@ -33,6 +33,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <pthread.h>
 #include <stdio.h>  // For FOPEN_MAX.
 #include <string.h>
 #include <sys/sysconf.h>
@@ -57,7 +58,6 @@
 
 /* the following depends on our implementation */
 #define  SYSTEM_ATEXIT_MAX          65536    /* our implementation is unlimited */
-#define  SYSTEM_THREAD_STACK_MIN    32768    /* lower values may be possible, but be conservative */
 #define  SYSTEM_THREAD_THREADS_MAX  2048     /* really unlimited */
 
 #define  SYSTEM_2_C_BIND     _POSIX_VERSION  /* Posix C binding version */
@@ -298,8 +298,8 @@
     case _SC_TIMERS:            return _POSIX_TIMERS;
 #endif
 
-    // GETGR_R_SIZE_MAX ?
-    // GETPW_R_SIZE_MAX ?
+    case _SC_GETGR_R_SIZE_MAX: return 1024;
+    case _SC_GETPW_R_SIZE_MAX: return 1024;
 
     case _SC_LOGIN_NAME_MAX:    return SYSTEM_LOGIN_NAME_MAX;
 
@@ -309,7 +309,7 @@
     case _SC_THREAD_KEYS_MAX:
       return (BIONIC_TLS_SLOTS - TLS_SLOT_FIRST_USER_SLOT - GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT);
 
-    case _SC_THREAD_STACK_MIN:    return SYSTEM_THREAD_STACK_MIN;
+    case _SC_THREAD_STACK_MIN:    return PTHREAD_STACK_MIN;
     case _SC_THREAD_THREADS_MAX:  return SYSTEM_THREAD_THREADS_MAX;
     case _SC_TTY_NAME_MAX:        return SYSTEM_TTY_NAME_MAX;
 #ifdef _POSIX_THREADS
diff --git a/libc/bionic/system_properties.c b/libc/bionic/system_properties.c
index 0587430..4c2e5a2 100644
--- a/libc/bionic/system_properties.c
+++ b/libc/bionic/system_properties.c
@@ -26,6 +26,7 @@
  * SUCH DAMAGE.
  */
 #include <stdio.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <stddef.h>
@@ -33,6 +34,7 @@
 #include <poll.h>
 #include <fcntl.h>
 #include <stdbool.h>
+#include <string.h>
 
 #include <sys/mman.h>
 
@@ -48,12 +50,71 @@
 #include <sys/_system_properties.h>
 
 #include <sys/atomics.h>
+#include <bionic_atomic_inline.h>
+
+#define ALIGN(x, a) (((x) + (a - 1)) & ~(a - 1))
+
+struct prop_area {
+    unsigned bytes_used;
+    unsigned volatile serial;
+    unsigned magic;
+    unsigned version;
+    unsigned reserved[28];
+    char data[0];
+};
+
+typedef struct prop_area prop_area;
+
+struct prop_info {
+    unsigned volatile serial;
+    char value[PROP_VALUE_MAX];
+    char name[0];
+};
+
+typedef struct prop_info prop_info;
+
+/*
+ * Properties are stored in a hybrid trie/binary tree structure.
+ * Each property's name is delimited at '.' characters, and the tokens are put
+ * into a trie structure.  Siblings at each level of the trie are stored in a
+ * binary tree.  For instance, "ro.secure"="1" could be stored as follows:
+ *
+ * +-----+   children    +----+   children    +--------+
+ * |     |-------------->| ro |-------------->| secure |
+ * +-----+               +----+               +--------+
+ *                       /    \                /   |
+ *                 left /      \ right   left /    |  prop   +===========+
+ *                     v        v            v     +-------->| ro.secure |
+ *                  +-----+   +-----+     +-----+            +-----------+
+ *                  | net |   | sys |     | com |            |     1     |
+ *                  +-----+   +-----+     +-----+            +===========+
+ */
+
+typedef volatile uint32_t prop_off_t;
+struct prop_bt {
+    uint8_t namelen;
+    uint8_t reserved[3];
+
+    prop_off_t prop;
+
+    prop_off_t left;
+    prop_off_t right;
+
+    prop_off_t children;
+
+    char name[0];
+};
+
+typedef struct prop_bt prop_bt;
 
 static const char property_service_socket[] = "/dev/socket/" PROP_SERVICE_NAME;
+static char property_filename[PATH_MAX] = PROP_FILENAME;
+static bool compat_mode = false;
 
-static unsigned dummy_props = 0;
+prop_area *__system_property_area__ = NULL;
 
-prop_area *__system_property_area__ = (void*) &dummy_props;
+size_t pa_data_size;
+size_t pa_size;
 
 static int get_fd_from_env(void)
 {
@@ -66,17 +127,89 @@
     return atoi(env);
 }
 
-int __system_properties_init(void)
+static int map_prop_area_rw()
+{
+    prop_area *pa;
+    int fd;
+    int ret;
+
+    /* dev is a tmpfs that we can use to carve a shared workspace
+     * out of, so let's do that...
+     */
+    fd = open(property_filename, O_RDWR | O_CREAT | O_NOFOLLOW | O_CLOEXEC |
+            O_EXCL, 0444);
+    if (fd < 0) {
+        if (errno == EACCES) {
+            /* for consistency with the case where the process has already
+             * mapped the page in and segfaults when trying to write to it
+             */
+            abort();
+        }
+        return -1;
+    }
+
+    ret = fcntl(fd, F_SETFD, FD_CLOEXEC);
+    if (ret < 0)
+        goto out;
+
+    if (ftruncate(fd, PA_SIZE) < 0)
+        goto out;
+
+    pa_size = PA_SIZE;
+    pa_data_size = pa_size - sizeof(prop_area);
+    compat_mode = false;
+
+    pa = mmap(NULL, pa_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+    if(pa == MAP_FAILED)
+        goto out;
+
+    memset(pa, 0, pa_size);
+    pa->magic = PROP_AREA_MAGIC;
+    pa->version = PROP_AREA_VERSION;
+    /* reserve root node */
+    pa->bytes_used = sizeof(prop_bt);
+
+    /* plug into the lib property services */
+    __system_property_area__ = pa;
+
+    close(fd);
+    return 0;
+
+out:
+    close(fd);
+    return -1;
+}
+
+int __system_property_set_filename(const char *filename)
+{
+    size_t len = strlen(filename);
+    if (len >= sizeof(property_filename))
+        return -1;
+
+    strcpy(property_filename, filename);
+    return 0;
+}
+
+int __system_property_area_init()
+{
+    return map_prop_area_rw();
+}
+
+static int map_prop_area()
 {
     bool fromFile = true;
     int result = -1;
+    int fd;
+    int ret;
 
-    if(__system_property_area__ != ((void*) &dummy_props)) {
-        return 0;
+    fd = open(property_filename, O_RDONLY | O_NOFOLLOW | O_CLOEXEC);
+    if (fd >= 0) {
+        /* For old kernels that don't support O_CLOEXEC */
+        ret = fcntl(fd, F_SETFD, FD_CLOEXEC);
+        if (ret < 0)
+            goto cleanup;
     }
 
-    int fd = open(PROP_FILENAME, O_RDONLY | O_NOFOLLOW);
-
     if ((fd < 0) && (errno == ENOENT)) {
         /*
          * For backwards compatibility, if the file doesn't
@@ -102,24 +235,33 @@
 
     if ((fd_stat.st_uid != 0)
             || (fd_stat.st_gid != 0)
-            || ((fd_stat.st_mode & (S_IWGRP | S_IWOTH)) != 0)) {
+            || ((fd_stat.st_mode & (S_IWGRP | S_IWOTH)) != 0)
+            || (fd_stat.st_size < sizeof(prop_area)) ) {
         goto cleanup;
     }
 
-    prop_area *pa = mmap(NULL, fd_stat.st_size, PROT_READ, MAP_SHARED, fd, 0);
+    pa_size = fd_stat.st_size;
+    pa_data_size = pa_size - sizeof(prop_area);
+    prop_area *pa = mmap(NULL, pa_size, PROT_READ, MAP_SHARED, fd, 0);
 
     if (pa == MAP_FAILED) {
         goto cleanup;
     }
 
-    if((pa->magic != PROP_AREA_MAGIC) || (pa->version != PROP_AREA_VERSION)) {
-        munmap(pa, fd_stat.st_size);
+    if((pa->magic != PROP_AREA_MAGIC) || (pa->version != PROP_AREA_VERSION &&
+                pa->version != PROP_AREA_VERSION_COMPAT)) {
+        munmap(pa, pa_size);
         goto cleanup;
     }
 
-    __system_property_area__ = pa;
+    if (pa->version == PROP_AREA_VERSION_COMPAT) {
+        compat_mode = true;
+    }
+
     result = 0;
 
+    __system_property_area__ = pa;
+
 cleanup:
     if (fromFile) {
         close(fd);
@@ -128,42 +270,182 @@
     return result;
 }
 
-const prop_info *__system_property_find_nth(unsigned n)
+int __system_properties_init()
+{
+    return map_prop_area();
+}
+
+static void *new_prop_obj(size_t size, prop_off_t *off)
 {
     prop_area *pa = __system_property_area__;
+    size = ALIGN(size, sizeof(uint32_t));
 
-    if(n >= pa->count) {
-        return 0;
+    if (pa->bytes_used + size > pa_data_size)
+        return NULL;
+
+    *off = pa->bytes_used;
+    __system_property_area__->bytes_used += size;
+    return __system_property_area__->data + *off;
+}
+
+static prop_bt *new_prop_bt(const char *name, uint8_t namelen, prop_off_t *off)
+{
+    prop_off_t off_tmp;
+    prop_bt *bt = new_prop_obj(sizeof(prop_bt) + namelen + 1, &off_tmp);
+    if (bt) {
+        memcpy(bt->name, name, namelen);
+        bt->name[namelen] = '\0';
+        bt->namelen = namelen;
+        ANDROID_MEMBAR_FULL();
+        *off = off_tmp;
+    }
+
+    return bt;
+}
+
+static prop_info *new_prop_info(const char *name, uint8_t namelen,
+        const char *value, uint8_t valuelen, prop_off_t *off)
+{
+    prop_off_t off_tmp;
+    prop_info *info = new_prop_obj(sizeof(prop_info) + namelen + 1, &off_tmp);
+    if (info) {
+        memcpy(info->name, name, namelen);
+        info->name[namelen] = '\0';
+        info->serial = (valuelen << 24);
+        memcpy(info->value, value, valuelen);
+        info->value[valuelen] = '\0';
+        ANDROID_MEMBAR_FULL();
+        *off = off_tmp;
+    }
+
+    return info;
+}
+
+static void *to_prop_obj(prop_off_t off)
+{
+    if (off > pa_data_size)
+        return NULL;
+
+    return __system_property_area__->data + off;
+}
+
+static prop_bt *root_node()
+{
+    return to_prop_obj(0);
+}
+
+static int cmp_prop_name(const char *one, uint8_t one_len, const char *two,
+        uint8_t two_len)
+{
+    if (one_len < two_len)
+        return -1;
+    else if (one_len > two_len)
+        return 1;
+    else
+        return strncmp(one, two, one_len);
+}
+
+static prop_bt *find_prop_bt(prop_bt *bt, const char *name, uint8_t namelen,
+        bool alloc_if_needed)
+{
+    while (true) {
+        int ret;
+        if (!bt)
+            return bt;
+        ret = cmp_prop_name(name, namelen, bt->name, bt->namelen);
+
+        if (ret == 0) {
+            return bt;
+        } else if (ret < 0) {
+            if (bt->left) {
+                bt = to_prop_obj(bt->left);
+            } else {
+                if (!alloc_if_needed)
+                   return NULL;
+
+                bt = new_prop_bt(name, namelen, &bt->left);
+            }
+        } else {
+            if (bt->right) {
+                bt = to_prop_obj(bt->right);
+            } else {
+                if (!alloc_if_needed)
+                   return NULL;
+
+                bt = new_prop_bt(name, namelen, &bt->right);
+            }
+        }
+    }
+}
+
+static const prop_info *find_property(prop_bt *trie, const char *name,
+        uint8_t namelen, const char *value, uint8_t valuelen,
+        bool alloc_if_needed)
+{
+    const char *remaining_name = name;
+
+    while (true) {
+        char *sep = strchr(remaining_name, '.');
+        bool want_subtree = (sep != NULL);
+        uint8_t substr_size;
+
+        prop_bt *root;
+
+        if (want_subtree) {
+            substr_size = sep - remaining_name;
+        } else {
+            substr_size = strlen(remaining_name);
+        }
+
+        if (!substr_size)
+            return NULL;
+
+        if (trie->children) {
+            root = to_prop_obj(trie->children);
+        } else if (alloc_if_needed) {
+            root = new_prop_bt(remaining_name, substr_size, &trie->children);
+        } else {
+            root = NULL;
+        }
+
+        if (!root)
+            return NULL;
+
+        trie = find_prop_bt(root, remaining_name, substr_size, alloc_if_needed);
+        if (!trie)
+            return NULL;
+
+        if (!want_subtree)
+            break;
+
+        remaining_name = sep + 1;
+    }
+
+    if (trie->prop) {
+        return to_prop_obj(trie->prop);
+    } else if (alloc_if_needed) {
+        return new_prop_info(name, namelen, value, valuelen, &trie->prop);
     } else {
-        return TOC_TO_INFO(pa, pa->toc[n]);
+        return NULL;
     }
 }
 
 const prop_info *__system_property_find(const char *name)
 {
-    prop_area *pa = __system_property_area__;
-    unsigned count = pa->count;
-    unsigned *toc = pa->toc;
-    unsigned len = strlen(name);
-    prop_info *pi;
-
-    while(count--) {
-        unsigned entry = *toc++;
-        if(TOC_NAME_LEN(entry) != len) continue;
-
-        pi = TOC_TO_INFO(pa, entry);
-        if(memcmp(name, pi->name, len)) continue;
-
-        return pi;
+    if (__predict_false(compat_mode)) {
+        return __system_property_find_compat(name);
     }
-
-    return 0;
+    return find_property(root_node(), name, strlen(name), NULL, 0, false);
 }
 
 int __system_property_read(const prop_info *pi, char *name, char *value)
 {
     unsigned serial, len;
 
+    if (__predict_false(compat_mode)) {
+        return __system_property_read_compat(pi, name, value);
+    }
+
     for(;;) {
         serial = pi->serial;
         while(SERIAL_DIRTY(serial)) {
@@ -172,6 +454,7 @@
         }
         len = SERIAL_VALUE_LEN(serial);
         memcpy(value, pi->value, len + 1);
+        ANDROID_MEMBAR_FULL();
         if(serial == pi->serial) {
             if(name != 0) {
                 strcpy(name, pi->name);
@@ -294,3 +577,133 @@
     }
     return 0;
 }
+
+int __system_property_update(prop_info *pi, const char *value, unsigned int len)
+{
+    prop_area *pa = __system_property_area__;
+
+    if (len >= PROP_VALUE_MAX)
+        return -1;
+
+    pi->serial = pi->serial | 1;
+    ANDROID_MEMBAR_FULL();
+    memcpy(pi->value, value, len + 1);
+    ANDROID_MEMBAR_FULL();
+    pi->serial = (len << 24) | ((pi->serial + 1) & 0xffffff);
+    __futex_wake(&pi->serial, INT32_MAX);
+
+    pa->serial++;
+    __futex_wake(&pa->serial, INT32_MAX);
+
+    return 0;
+}
+
+int __system_property_add(const char *name, unsigned int namelen,
+            const char *value, unsigned int valuelen)
+{
+    prop_area *pa = __system_property_area__;
+    const prop_info *pi;
+
+    if (namelen >= PROP_NAME_MAX)
+        return -1;
+    if (valuelen >= PROP_VALUE_MAX)
+        return -1;
+    if (namelen < 1)
+        return -1;
+
+    pi = find_property(root_node(), name, namelen, value, valuelen, true);
+    if (!pi)
+        return -1;
+
+    pa->serial++;
+    __futex_wake(&pa->serial, INT32_MAX);
+    return 0;
+}
+
+unsigned int __system_property_serial(const prop_info *pi)
+{
+    return pi->serial;
+}
+
+unsigned int __system_property_wait_any(unsigned int serial)
+{
+    prop_area *pa = __system_property_area__;
+
+    do {
+        __futex_wait(&pa->serial, serial, 0);
+    } while(pa->serial == serial);
+
+    return pa->serial;
+}
+
+struct find_nth_cookie {
+    unsigned count;
+    unsigned n;
+    const prop_info *pi;
+};
+
+static void find_nth_fn(const prop_info *pi, void *ptr)
+{
+    struct find_nth_cookie *cookie = ptr;
+
+    if (cookie->n == cookie->count)
+        cookie->pi = pi;
+
+    cookie->count++;
+}
+
+const prop_info *__system_property_find_nth(unsigned n)
+{
+    struct find_nth_cookie cookie;
+    int err;
+
+    memset(&cookie, 0, sizeof(cookie));
+    cookie.n = n;
+
+    err = __system_property_foreach(find_nth_fn, &cookie);
+    if (err < 0)
+        return NULL;
+
+    return cookie.pi;
+}
+
+static int foreach_property(prop_off_t off,
+        void (*propfn)(const prop_info *pi, void *cookie), void *cookie)
+{
+    prop_bt *trie = to_prop_obj(off);
+    if (!trie)
+        return -1;
+
+    if (trie->left) {
+        int err = foreach_property(trie->left, propfn, cookie);
+        if (err < 0)
+            return -1;
+    }
+    if (trie->prop) {
+        prop_info *info = to_prop_obj(trie->prop);
+        if (!info)
+            return -1;
+        propfn(info, cookie);
+    }
+    if (trie->children) {
+        int err = foreach_property(trie->children, propfn, cookie);
+        if (err < 0)
+            return -1;
+    }
+    if (trie->right) {
+        int err = foreach_property(trie->right, propfn, cookie);
+        if (err < 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+int __system_property_foreach(void (*propfn)(const prop_info *pi, void *cookie),
+        void *cookie)
+{
+    if (__predict_false(compat_mode)) {
+        return __system_property_foreach_compat(propfn, cookie);
+	}
+    return foreach_property(0, propfn, cookie);
+}
diff --git a/libc/bionic/system_properties_compat.c b/libc/bionic/system_properties_compat.c
new file mode 100644
index 0000000..6dbc4cc
--- /dev/null
+++ b/libc/bionic/system_properties_compat.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file is only used to provide backwards compatibility to property areas
+ * created by old versions of init, which occurs when an ota runs.  The updater
+ * binary is compiled statically against the newest bionic, but the recovery
+ * ramdisk may be using an old version of init.  This can all be removed once
+ * OTAs from pre-K versions are no longer supported.
+ */
+
+#include <string.h>
+#include <sys/atomics.h>
+
+#define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
+#include <sys/_system_properties.h>
+
+#define TOC_NAME_LEN(toc)       ((toc) >> 24)
+#define TOC_TO_INFO(area, toc)  ((prop_info_compat*) (((char*) area) + ((toc) & 0xFFFFFF)))
+
+struct prop_area_compat {
+    unsigned volatile count;
+    unsigned volatile serial;
+    unsigned magic;
+    unsigned version;
+    unsigned toc[1];
+};
+
+typedef struct prop_area_compat prop_area_compat;
+
+struct prop_area;
+typedef struct prop_area prop_area;
+
+struct prop_info_compat {
+    char name[PROP_NAME_MAX];
+    unsigned volatile serial;
+    char value[PROP_VALUE_MAX];
+};
+
+typedef struct prop_info_compat prop_info_compat;
+
+extern prop_area *__system_property_area__;
+
+const prop_info *__system_property_find_compat(const char *name)
+{
+    prop_area_compat *pa = (prop_area_compat *)__system_property_area__;
+    unsigned count = pa->count;
+    unsigned *toc = pa->toc;
+    unsigned len = strlen(name);
+    prop_info_compat *pi;
+
+    if (len >= PROP_NAME_MAX)
+        return 0;
+    if (len < 1)
+        return 0;
+
+    while(count--) {
+        unsigned entry = *toc++;
+        if(TOC_NAME_LEN(entry) != len) continue;
+
+        pi = TOC_TO_INFO(pa, entry);
+        if(memcmp(name, pi->name, len)) continue;
+
+        return (const prop_info *)pi;
+    }
+
+    return 0;
+}
+
+int __system_property_read_compat(const prop_info *_pi, char *name, char *value)
+{
+    unsigned serial, len;
+    const prop_info_compat *pi = (const prop_info_compat *)_pi;
+
+    for(;;) {
+        serial = pi->serial;
+        while(SERIAL_DIRTY(serial)) {
+            __futex_wait((volatile void *)&pi->serial, serial, 0);
+            serial = pi->serial;
+        }
+        len = SERIAL_VALUE_LEN(serial);
+        memcpy(value, pi->value, len + 1);
+        if(serial == pi->serial) {
+            if(name != 0) {
+                strcpy(name, pi->name);
+            }
+            return len;
+        }
+    }
+}
+
+int __system_property_foreach_compat(
+        void (*propfn)(const prop_info *pi, void *cookie),
+        void *cookie)
+{
+    prop_area_compat *pa = (prop_area_compat *)__system_property_area__;
+    unsigned i;
+
+    for (i = 0; i < pa->count; i++) {
+        unsigned entry = pa->toc[i];
+        prop_info_compat *pi = TOC_TO_INFO(pa, entry);
+        propfn((const prop_info *)pi, cookie);
+    }
+
+    return 0;
+}
diff --git a/libc/include/err.h b/libc/include/err.h
index 1636efe..f24da61 100644
--- a/libc/include/err.h
+++ b/libc/include/err.h
@@ -48,42 +48,42 @@
 __BEGIN_DECLS
 
 __noreturn void	err(int, const char *, ...)
-			__attribute__((__format__ (printf, 2, 3)));
+			__printflike(2, 3);
 __noreturn void	verr(int, const char *, __va_list)
-			__attribute__((__format__ (printf, 2, 0)));
+			__printflike(2, 0);
 __noreturn void	errx(int, const char *, ...)
-			__attribute__((__format__ (printf, 2, 3)));
+			__printflike(2, 3);
 __noreturn void	verrx(int, const char *, __va_list)
-			__attribute__((__format__ (printf, 2, 0)));
+			__printflike(2, 0);
 void		warn(const char *, ...)
-			__attribute__((__format__ (printf, 1, 2)));
+			__printflike(1, 2);
 void		vwarn(const char *, __va_list)
-			__attribute__((__format__ (printf, 1, 0)));
+			__printflike(1, 0);
 void		warnx(const char *, ...)
-			__attribute__((__format__ (printf, 1, 2)));
+			__printflike(1, 2);
 void		vwarnx(const char *, __va_list)
-			__attribute__((__format__ (printf, 1, 0)));
+			__printflike(1, 0);
 
 /*
  * The _* versions are for use in library functions so user-defined
  * versions of err*,warn* do not get used.
  */
 __noreturn void	_err(int, const char *, ...)
-			__attribute__((__format__ (printf, 2, 3)));
+			__printflike(2, 3);
 __noreturn void	_verr(int, const char *, __va_list)
-			__attribute__((__format__ (printf, 2, 0)));
+			__printflike(2, 0);
 __noreturn void	_errx(int, const char *, ...)
-			__attribute__((__format__ (printf, 2, 3)));
+			__printflike(2, 3);
 __noreturn void	_verrx(int, const char *, __va_list)
-			__attribute__((__format__ (printf, 2, 0)));
+			__printflike(2, 0);
 void		_warn(const char *, ...)
-			__attribute__((__format__ (printf, 1, 2)));
+			__printflike(1, 2);
 void		_vwarn(const char *, __va_list)
-			__attribute__((__format__ (printf, 1, 0)));
+			__printflike(1, 0);
 void		_warnx(const char *, ...)
-			__attribute__((__format__ (printf, 1, 2)));
+			__printflike(1, 2);
 void		_vwarnx(const char *, __va_list)
-			__attribute__((__format__ (printf, 1, 0)));
+			__printflike(1, 0);
 
 __END_DECLS
 
diff --git a/libc/include/fcntl.h b/libc/include/fcntl.h
index de2e3e3..3cb3d8a 100644
--- a/libc/include/fcntl.h
+++ b/libc/include/fcntl.h
@@ -49,12 +49,9 @@
 extern int  fcntl(int   fd, int   command, ...);
 extern int  creat(const char*  path, mode_t  mode);
 
-#if defined(__BIONIC_FORTIFY)
-
-extern void __creat_error()
-    __attribute__((__error__ ("called with O_CREAT, but missing mode")));
-extern void __too_many_args_error()
-    __attribute__((__error__ ("too many arguments")));
+#if defined(__BIONIC_FORTIFY) && !defined(__clang__)
+__errordecl(__creat_missing_mode, "called with O_CREAT, but missing mode");
+__errordecl(__creat_too_many_args, "too many arguments");
 extern int __open_real(const char *pathname, int flags, ...)
     __asm__(__USER_LABEL_PREFIX__ "open");
 extern int __open_2(const char *, int);
@@ -63,12 +60,12 @@
 int open(const char *pathname, int flags, ...) {
     if (__builtin_constant_p(flags)) {
         if ((flags & O_CREAT) && __builtin_va_arg_pack_len() == 0) {
-            __creat_error();  // compile time error
+            __creat_missing_mode();  // compile time error
         }
     }
 
     if (__builtin_va_arg_pack_len() > 1) {
-        __too_many_args_error();  // compile time error
+        __creat_too_many_args();  // compile time error
     }
 
     if ((__builtin_va_arg_pack_len() == 0) && !__builtin_constant_p(flags)) {
@@ -86,12 +83,12 @@
 int openat(int dirfd, const char *pathname, int flags, ...) {
     if (__builtin_constant_p(flags)) {
         if ((flags & O_CREAT) && __builtin_va_arg_pack_len() == 0) {
-            __creat_error();  // compile time error
+            __creat_missing_mode();  // compile time error
         }
     }
 
     if (__builtin_va_arg_pack_len() > 1) {
-        __too_many_args_error();  // compile time error
+        __creat_too_many_args();  // compile time error
     }
 
     if ((__builtin_va_arg_pack_len() == 0) && !__builtin_constant_p(flags)) {
@@ -101,7 +98,7 @@
     return __openat_real(dirfd, pathname, flags, __builtin_va_arg_pack());
 }
 
-#endif /* defined(__BIONIC_FORTIFY) */
+#endif /* defined(__BIONIC_FORTIFY) && !defined(__clang__) */
 
 __END_DECLS
 
diff --git a/libc/include/getopt.h b/libc/include/getopt.h
index 56f1983..4451941 100644
--- a/libc/include/getopt.h
+++ b/libc/include/getopt.h
@@ -1,5 +1,5 @@
-/*	$OpenBSD: getopt.h,v 1.1 2002/12/03 20:24:29 millert Exp $	*/
 /*	$NetBSD: getopt.h,v 1.4 2000/07/07 10:43:54 ad Exp $	*/
+/*	$FreeBSD$ */
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -16,13 +16,6 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *        This product includes software developed by the NetBSD
- *        Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -43,7 +36,8 @@
 #include <sys/cdefs.h>
 
 /*
- * GNU-like getopt_long() and 4.4BSD getsubopt()/optreset extensions
+ * GNU-like getopt_long()/getopt_long_only() with 4.4BSD optreset extension.
+ * getopt() is declared here too for GNU programs.
  */
 #define no_argument        0
 #define required_argument  1
@@ -64,26 +58,20 @@
 };
 
 __BEGIN_DECLS
-int	 getopt_long(int, char * const *, const char *,
-	    const struct option *, int *);
-int	 getopt_long_only(int, char * const *, const char *,
-	    const struct option *, int *);
-#ifndef _GETOPT_DEFINED_
-#define _GETOPT_DEFINED_
-int	 getopt(int, char * const *, const char *);
+int	getopt_long(int, char * const *, const char *,
+	const struct option *, int *);
+int	getopt_long_only(int, char * const *, const char *,
+	const struct option *, int *);
+#ifndef _GETOPT_DECLARED
+#define	_GETOPT_DECLARED
+int	 getopt(int, char * const [], const char *);
 
-
-extern   char *optarg;                  /* getopt(3) external variables */
-extern   int opterr;
-extern   int optind;
-extern   int optopt;
-extern   int optreset;
-
-#if 0 /* MISSING FROM BIONIC */
-int       getsubopt(char **, char * const *, char **);
-extern   char *suboptarg;               /* getsubopt(3) external variable */
-#endif /* MISSING */
-
+extern char *optarg;			/* getopt(3) external variables */
+extern int optind, opterr, optopt;
+#endif
+#ifndef _OPTRESET_DECLARED
+#define	_OPTRESET_DECLARED
+extern int optreset;			/* getopt(3) external variable */
 #endif
 __END_DECLS
  
diff --git a/libc/include/inttypes.h b/libc/include/inttypes.h
index 81d2315..73b22db 100644
--- a/libc/include/inttypes.h
+++ b/libc/include/inttypes.h
@@ -56,7 +56,7 @@
 #define	PRIdFAST64		"lld"		/* int_fast64_t */
 
 #define	PRIdMAX			"jd"		/* intmax_t */
-#define	PRIdPTR			"ld"		/* intptr_t */
+#define	PRIdPTR			"d"		/* intptr_t */
 
 #define	PRIi8			"i"		/* int8_t */
 #define	PRIi16			"i"		/* int16_t */
@@ -74,7 +74,7 @@
 #define	PRIiFAST64		"lli"		/* int_fast64_t */
 
 #define	PRIiMAX			"ji"		/* intmax_t */
-#define	PRIiPTR			"li"		/* intptr_t */
+#define	PRIiPTR			"i"		/* intptr_t */
 
 /* fprintf macros for unsigned integers */
 #define	PRIo8			"o"		/* int8_t */
@@ -93,7 +93,7 @@
 #define	PRIoFAST64		"llo"		/* int_fast64_t */
 
 #define	PRIoMAX			"jo"		/* intmax_t */
-#define	PRIoPTR			"lo"		/* intptr_t */
+#define	PRIoPTR			"o"		/* intptr_t */
 
 #define	PRIu8			"u"		/* uint8_t */
 #define	PRIu16			"u"		/* uint16_t */
@@ -111,7 +111,7 @@
 #define	PRIuFAST64		"llu"		/* uint_fast64_t */
 
 #define	PRIuMAX			"ju"		/* uintmax_t */
-#define	PRIuPTR			"lu"		/* uintptr_t */
+#define	PRIuPTR			"u"		/* uintptr_t */
 
 #define	PRIx8			"x"		/* uint8_t */
 #define	PRIx16			"x"		/* uint16_t */
@@ -129,7 +129,7 @@
 #define	PRIxFAST64		"llx"		/* uint_fast64_t */
 
 #define	PRIxMAX			"jx"		/* uintmax_t */
-#define	PRIxPTR			"lx"		/* uintptr_t */
+#define	PRIxPTR			"x"		/* uintptr_t */
 
 #define	PRIX8			"X"		/* uint8_t */
 #define	PRIX16			"X"		/* uint16_t */
@@ -147,7 +147,7 @@
 #define	PRIXFAST64		"llX"		/* uint_fast64_t */
 
 #define	PRIXMAX			"jX"		/* uintmax_t */
-#define	PRIXPTR			"lX"		/* uintptr_t */
+#define	PRIXPTR			"X"		/* uintptr_t */
 
 /* fscanf macros for signed integers */
 #define	SCNd8			"hhd"		/* int8_t */
@@ -166,7 +166,7 @@
 #define	SCNdFAST64		"lld"		/* int_fast64_t */
 
 #define	SCNdMAX			"jd"		/* intmax_t */
-#define	SCNdPTR			"ld"		/* intptr_t */
+#define	SCNdPTR			"d"		/* intptr_t */
 
 #define	SCNi8			"hhi"		/* int8_t */
 #define	SCNi16			"hi"		/* int16_t */
@@ -184,7 +184,7 @@
 #define	SCNiFAST64		"lli"		/* int_fast64_t */
 
 #define	SCNiMAX			"ji"		/* intmax_t */
-#define	SCNiPTR			"li"		/* intptr_t */
+#define	SCNiPTR			"i"		/* intptr_t */
 
 /* fscanf macros for unsigned integers */
 #define	SCNo8			"hho"		/* uint8_t */
@@ -203,7 +203,7 @@
 #define	SCNoFAST64		"llo"		/* uint_fast64_t */
 
 #define	SCNoMAX			"jo"		/* uintmax_t */
-#define	SCNoPTR			"lo"		/* uintptr_t */
+#define	SCNoPTR			"o"		/* uintptr_t */
 
 #define	SCNu8			"hhu"		/* uint8_t */
 #define	SCNu16			"hu"		/* uint16_t */
@@ -221,7 +221,7 @@
 #define	SCNuFAST64		"llu"		/* uint_fast64_t */
 
 #define	SCNuMAX			"ju"		/* uintmax_t */
-#define	SCNuPTR			"lu"		/* uintptr_t */
+#define	SCNuPTR			"u"		/* uintptr_t */
 
 #define	SCNx8			"hhx"		/* uint8_t */
 #define	SCNx16			"hx"		/* uint16_t */
@@ -239,7 +239,7 @@
 #define	SCNxFAST64		"llx"		/* uint_fast64_t */
 
 #define	SCNxMAX			"jx"		/* uintmax_t */
-#define	SCNxPTR			"lx"		/* uintptr_t */
+#define	SCNxPTR			"x"		/* uintptr_t */
 
 #endif /* __cplusplus || __STDC_FORMAT_MACROS */
 
diff --git a/libc/include/malloc.h b/libc/include/malloc.h
index 8875e54..eaedc49 100644
--- a/libc/include/malloc.h
+++ b/libc/include/malloc.h
@@ -33,7 +33,7 @@
 extern void free(void* p);
 
 extern void* memalign(size_t alignment, size_t byte_count) __mallocfunc __wur;
-extern size_t malloc_usable_size(void* p);
+extern size_t malloc_usable_size(const void* p);
 
 extern void* valloc(size_t byte_count) __mallocfunc __wur;
 extern void* pvalloc(size_t byte_count) __mallocfunc __wur;
diff --git a/libc/include/mntent.h b/libc/include/mntent.h
index b83da1f..8b87f71 100644
--- a/libc/include/mntent.h
+++ b/libc/include/mntent.h
@@ -29,6 +29,7 @@
 #define _MNTENT_H_
 
 #include <stdio.h>
+#include <sys/cdefs.h>
 
 #define MNTTYPE_IGNORE "ignore"
 
diff --git a/libc/include/netinet/icmp6.h b/libc/include/netinet/icmp6.h
index fbc8234..6625712 100644
--- a/libc/include/netinet/icmp6.h
+++ b/libc/include/netinet/icmp6.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: icmp6.h,v 1.40 2009/10/31 22:32:17 christos Exp $	*/
+/*	$NetBSD: icmp6.h,v 1.47 2013/07/01 12:43:15 christos Exp $	*/
 /*	$KAME: icmp6.h,v 1.84 2003/04/23 10:26:51 itojun Exp $	*/
 
 
@@ -65,6 +65,8 @@
 #ifndef _NETINET_ICMP6_H_
 #define _NETINET_ICMP6_H_
 
+#include <netinet/in.h> /* android-added: glibc source compatibility. */
+
 #define ICMPV6_PLD_MAXLEN	1232	/* IPV6_MMTU - sizeof(struct ip6_hdr)
 					   - sizeof(struct icmp6_hdr) */
 
@@ -98,6 +100,7 @@
 #define MLD_LISTENER_QUERY		130 	/* multicast listener query */
 #define MLD_LISTENER_REPORT		131	/* multicast listener report */
 #define MLD_LISTENER_DONE		132	/* multicast listener done */
+#define MLD_LISTENER_REDUCTION MLD_LISTENER_DONE /* RFC3542 definition */
 
 /* RFC2292 decls */
 #define ICMP6_MEMBERSHIP_QUERY		130	/* group membership query */
@@ -125,6 +128,7 @@
 #define ICMP6_FQDN_REPLY		140	/* FQDN reply */
 #define ICMP6_NI_QUERY			139	/* node information request */
 #define ICMP6_NI_REPLY			140	/* node information reply */
+#define MLDV2_LISTENER_REPORT		143	/* RFC3810 listener report */
 
 /* The definitions below are experimental. TBA */
 #define MLD_MTRACE_RESP			200	/* mtrace response(to sender) */
@@ -144,6 +148,9 @@
 #define ICMP6_DST_UNREACH_BEYONDSCOPE	2	/* beyond scope of source address */
 #define ICMP6_DST_UNREACH_ADDR		3	/* address unreachable */
 #define ICMP6_DST_UNREACH_NOPORT	4	/* port unreachable */
+#define ICMP6_DST_UNREACH_POLICY	5	/* source address failed ingress/egress policy */
+#define ICMP6_DST_UNREACH_REJROUTE	6	/* reject route to destination */
+#define ICMP6_DST_UNREACH_SOURCERT	7	/* error in source routing header */
 
 #define ICMP6_TIME_EXCEED_TRANSIT 	0	/* ttl==0 in transit */
 #define ICMP6_TIME_EXCEED_REASSEMBLY	1	/* ttl==0 in reass */
@@ -229,7 +236,7 @@
 #define ND_RA_FLAG_HOME_AGENT	0x20
 
 /*
- * Router preference values based on RFC4199.
+ * Router preference values based on RFC4191.
  */
 #define ND_RA_FLAG_RTPREF_MASK	0x18 /* 00011000 */
 
@@ -300,11 +307,10 @@
 #define ND_OPT_HOMEAGENT_INFO		8
 #define ND_OPT_SOURCE_ADDRLIST		9
 #define ND_OPT_TARGET_ADDRLIST		10
-#define ND_OPT_RDNSS			25
-/* draft-ietf-ipngwg-router-preference, not officially assigned yet */
-#define ND_OPT_ROUTE_INFO		200
-/* draft-ietf-mobileip-hmipv6, not officially assigned yet */
-#define ND_OPT_MAP			201
+#define ND_OPT_MAP			23	/* RFC 5380 */
+#define ND_OPT_ROUTE_INFO		24	/* RFC 4191 */
+#define ND_OPT_RDNSS			25	/* RFC 6016 */
+#define ND_OPT_DNSSL			31	/* RFC 6016 */
 
 struct nd_opt_route_info {	/* route info */
 	u_int8_t	nd_opt_rti_type;
@@ -344,7 +350,7 @@
 	u_int32_t	nd_opt_mtu_mtu;
 } __packed;
 
-struct nd_opt_rdnss {		/* RDNSS option RFC 5006 */
+struct nd_opt_rdnss {		/* RDNSS option RFC 6106 */
 	u_int8_t	nd_opt_rdnss_type;
 	u_int8_t	nd_opt_rdnss_len;
 	u_int16_t	nd_opt_rdnss_reserved;
@@ -352,6 +358,14 @@
 	/* followed by list of IP prefixes */
 } __packed;
 
+struct nd_opt_dnssl {		/* DNSSL option RFC 6106 */
+	u_int8_t	nd_opt_dnssl_type;
+	u_int8_t	nd_opt_dnssl_len;
+	u_int16_t	nd_opt_dnssl_reserved;
+	u_int32_t	nd_opt_dnssl_lifetime;
+	/* followed by list of IP prefixes */
+} __packed;
+
 /*
  * icmp6 namelookup
  */
@@ -376,11 +390,15 @@
 	/* could be followed by reply data */
 } __packed;
 
+/*
+ * BEGIN android-removed: glibc doesn't have these, and external/ping declares them itself.
 #define ni_type		icmp6_ni_hdr.icmp6_type
 #define ni_code		icmp6_ni_hdr.icmp6_code
 #define ni_cksum	icmp6_ni_hdr.icmp6_cksum
 #define ni_qtype	icmp6_ni_hdr.icmp6_data16[0]
 #define ni_flags	icmp6_ni_hdr.icmp6_data16[1]
+ * END android-removed
+ */
 
 #define NI_QTYPE_NOOP		0 /* NOOP  */
 #define NI_QTYPE_SUPTYPES	1 /* Supported Qtypes */
@@ -521,18 +539,28 @@
 	u_int32_t icmp6_filt[8];
 };
 
+/*
+ * BEGIN android-changed
+ * Linux and *BSD kernels use opposite values to indicate pass/block in ICMPv6
+ * filters, and assign a different value to the ICMP6_FILTER sockopt.
+ */
+#define ICMP6_FILTER 1
+
 #define	ICMP6_FILTER_SETPASSALL(filterp) \
-	(void)memset(filterp, 0xff, sizeof(struct icmp6_filter))
-#define	ICMP6_FILTER_SETBLOCKALL(filterp) \
 	(void)memset(filterp, 0x00, sizeof(struct icmp6_filter))
+#define	ICMP6_FILTER_SETBLOCKALL(filterp) \
+	(void)memset(filterp, 0xff, sizeof(struct icmp6_filter))
 #define	ICMP6_FILTER_SETPASS(type, filterp) \
-	(((filterp)->icmp6_filt[(type) >> 5]) |= (1 << ((type) & 31)))
-#define	ICMP6_FILTER_SETBLOCK(type, filterp) \
 	(((filterp)->icmp6_filt[(type) >> 5]) &= ~(1 << ((type) & 31)))
+#define	ICMP6_FILTER_SETBLOCK(type, filterp) \
+	(((filterp)->icmp6_filt[(type) >> 5]) |= (1 << ((type) & 31)))
 #define	ICMP6_FILTER_WILLPASS(type, filterp) \
-	((((filterp)->icmp6_filt[(type) >> 5]) & (1 << ((type) & 31))) != 0)
-#define	ICMP6_FILTER_WILLBLOCK(type, filterp) \
 	((((filterp)->icmp6_filt[(type) >> 5]) & (1 << ((type) & 31))) == 0)
+#define	ICMP6_FILTER_WILLBLOCK(type, filterp) \
+	((((filterp)->icmp6_filt[(type) >> 5]) & (1 << ((type) & 31))) != 0)
+/*
+ * END android-changed
+ */
 
 /*
  * Variables related to this implementation
@@ -569,8 +597,9 @@
 #define	ICMP6_STAT_BADRS	538	/* bad router solicitiation */
 #define	ICMP6_STAT_BADRA	539	/* bad router advertisement */
 #define	ICMP6_STAT_BADREDIRECT	540	/* bad redirect message */
+#define ICMP6_STAT_DROPPED_RAROUTE 541	/* discarded routes from router advertisement */
 
-#define	ICMP6_NSTATS		541
+#define	ICMP6_NSTATS		542
 
 #define	ICMP6_ERRSTAT_DST_UNREACH_NOROUTE	0
 #define	ICMP6_ERRSTAT_DST_UNREACH_ADMIN		1
@@ -640,8 +669,6 @@
 	{ "nd6_maxqueuelen", CTLTYPE_INT }, \
 }
 
-#define RTF_PROBEMTU	RTF_PROTO1
-
 #ifdef _KERNEL
 struct	rtentry;
 struct	rttimer;
@@ -727,4 +754,41 @@
 extern int	icmp6_redirtimeout;	/* cache time for redirect routes */
 #endif /* _KERNEL */
 
+#ifdef ICMP6_STRINGS
+/* Info: http://www.iana.org/assignments/icmpv6-parameters */
+
+static const char * const icmp6_type_err[] = {
+	"reserved0", "unreach", "packet_too_big", "timxceed", "paramprob",
+	NULL
+};
+
+static const char * const icmp6_type_info[] = {
+	"echo", "echoreply",
+	"mcastlistenq", "mcastlistenrep", "mcastlistendone",
+	"rtsol", "rtadv", "neighsol", "neighadv", "redirect",
+	"routerrenum", "nodeinfoq", "nodeinfor", "invneighsol", "invneighrep",
+	"mcastlistenrep2", "haad_req", "haad_rep",
+	"mobile_psol", "mobile_padv", "cga_sol", "cga_adv",
+	"experimental150", "mcast_rtadv", "mcast_rtsol", "mcast_rtterm",
+	"fmipv6_msg", "rpl_control", NULL
+};
+
+static const char * const icmp6_code_none[] = { "none", NULL };
+
+static const char * const icmp6_code_unreach[] = {
+	"noroute", "admin", "beyondscope", "addr", "port",
+	"srcaddr_policy", "reject_route", "source_route_err", NULL
+};
+
+static const char * const icmp6_code_timxceed[] = {
+	"intrans", "reass", NULL
+};
+
+static const char * const icmp6_code_paramprob[] = {
+	"hdr_field", "nxthdr_type", "option", NULL
+};      
+
+/* not all informational icmps that have codes have a names array */
+#endif
+
 #endif /* !_NETINET_ICMP6_H_ */
diff --git a/libc/include/netinet/in.h b/libc/include/netinet/in.h
index c5b964e..bf3b498 100644
--- a/libc/include/netinet/in.h
+++ b/libc/include/netinet/in.h
@@ -29,11 +29,13 @@
 #define _NETINET_IN_H_
 
 #include <endian.h>
+#include <netinet/in6.h>
+#include <sys/cdefs.h>
 #include <sys/socket.h>
+
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/ipv6.h>
-#include <netinet/in6.h>
 
 __BEGIN_DECLS
 
diff --git a/libc/include/netinet/tcp.h b/libc/include/netinet/tcp.h
index 9adf904..bc52249 100644
--- a/libc/include/netinet/tcp.h
+++ b/libc/include/netinet/tcp.h
@@ -31,4 +31,22 @@
 #include <endian.h>		/* Include *before* linux/tcp.h */
 #include <linux/tcp.h>
 
+__BEGIN_DECLS
+
+enum {
+  TCP_ESTABLISHED = 1,
+  TCP_SYN_SENT,
+  TCP_SYN_RECV,
+  TCP_FIN_WAIT1,
+  TCP_FIN_WAIT2,
+  TCP_TIME_WAIT,
+  TCP_CLOSE,
+  TCP_CLOSE_WAIT,
+  TCP_LAST_ACK,
+  TCP_LISTEN,
+  TCP_CLOSING
+};
+
+__END_DECLS
+
 #endif /* _NETINET_TCP_H */
diff --git a/libc/include/resolv.h b/libc/include/resolv.h
index 7c34012..36b93ee 100644
--- a/libc/include/resolv.h
+++ b/libc/include/resolv.h
@@ -43,12 +43,14 @@
 extern struct __res_state *__res_state(void);
 #define _res (*__res_state())
 
-/* Base-64 functions - because some code expects it there */
+#define b64_ntop __b64_ntop
+#define b64_pton __b64_pton
+extern int b64_ntop(u_char const*, size_t, char*, size_t);
+extern int b64_pton(char const*, u_char*, size_t);
 
-#define b64_ntop        __b64_ntop
-#define b64_pton        __b64_pton
-extern int   b64_ntop(u_char const *, size_t, char *, size_t);
-extern int   b64_pton(char const *, u_char *, size_t);
+#define dn_comp __dn_comp
+extern int dn_comp(const char*, u_char*, int, u_char**, u_char**);
+extern int dn_expand(const u_char*, const u_char*, const u_char*, char*, int);
 
 __END_DECLS
 
diff --git a/libc/include/signal.h b/libc/include/signal.h
index 8c9b170..15d2d3a 100644
--- a/libc/include/signal.h
+++ b/libc/include/signal.h
@@ -59,32 +59,32 @@
 
 static __inline__ int sigismember(const sigset_t* set, int signum) {
   int bit = signum - 1; // Signal numbers start at 1, but bit positions start at 0.
+  const unsigned long* local_set = (const unsigned long*) set;
   if (set == NULL || bit < 0 || bit >= (int) (8*sizeof(sigset_t))) {
     errno = EINVAL;
     return -1;
   }
-  const unsigned long* local_set = (const unsigned long*) set;
   return (int) ((local_set[bit / LONG_BIT] >> (bit % LONG_BIT)) & 1);
 }
 
 static __inline__ int sigaddset(sigset_t* set, int signum) {
   int bit = signum - 1; // Signal numbers start at 1, but bit positions start at 0.
+  unsigned long* local_set = (unsigned long*) set;
   if (set == NULL || bit < 0 || bit >= (int) (8*sizeof(sigset_t))) {
     errno = EINVAL;
     return -1;
   }
-  unsigned long* local_set = (unsigned long*) set;
   local_set[bit / LONG_BIT] |= 1UL << (bit % LONG_BIT);
   return 0;
 }
 
 static __inline__ int sigdelset(sigset_t* set, int signum) {
   int bit = signum - 1; // Signal numbers start at 1, but bit positions start at 0.
+  unsigned long* local_set = (unsigned long*) set;
   if (set == NULL || bit < 0 || bit >= (int) (8*sizeof(sigset_t))) {
     errno = EINVAL;
     return -1;
   }
-  unsigned long* local_set = (unsigned long*) set;
   local_set[bit / LONG_BIT] &= ~(1UL << (bit % LONG_BIT));
   return 0;
 }
diff --git a/libc/include/stdint.h b/libc/include/stdint.h
index c3e29dd..b6b6444 100644
--- a/libc/include/stdint.h
+++ b/libc/include/stdint.h
@@ -66,7 +66,7 @@
 #  define INT_FAST8_MIN    INT8_MIN
 #  define INT_FAST8_MAX    INT8_MAX
 
-#  define UINT8_MAX           (255U)
+#  define UINT8_MAX           (255)
 #  define UINT_LEAST8_MAX     UINT8_MAX
 #  define UINT_FAST8_MAX      UINT8_MAX
 #endif
@@ -76,7 +76,7 @@
 #  define INT_LEAST8_C(c)	 INT8_C(c)
 #  define INT_FAST8_C(c)	INT8_C(c)
 
-#  define UINT8_C(c)	c ## U
+#  define UINT8_C(c)	c
 #  define UINT_LEAST8_C(c)  UINT8_C(c)
 #  define UINT_FAST8_C(c)  UINT8_C(c)
 #endif
@@ -99,7 +99,7 @@
 #  define INT_FAST16_MIN	INT32_MIN
 #  define INT_FAST16_MAX	INT32_MAX
 
-#  define UINT16_MAX	(65535U)
+#  define UINT16_MAX	(65535)
 #  define UINT_LEAST16_MAX UINT16_MAX
 #  define UINT_FAST16_MAX UINT32_MAX
 #endif
@@ -109,7 +109,7 @@
 #  define INT_LEAST16_C(c) INT16_C(c)
 #  define INT_FAST16_C(c)	 INT32_C(c)
 
-#  define UINT16_C(c)	c ## U
+#  define UINT16_C(c)	c
 #  define UINT_LEAST16_C(c) UINT16_C(c)
 #  define UINT_FAST16_C(c) UINT32_C(c)
 #endif
@@ -191,8 +191,13 @@
  * intptr_t & uintptr_t
  */
 
+#ifdef __LP64__
+typedef long           intptr_t;
+typedef unsigned long  uintptr_t;
+#else
 typedef int           intptr_t;
 typedef unsigned int  uintptr_t;
+#endif
 
 #ifdef __STDINT_LIMITS
 #  define INTPTR_MIN    INT32_MIN
@@ -226,6 +231,25 @@
 #  define UINTMAX_C(c)	UINT64_C(c)
 #endif
 
+/*
+ * sig_atomic_t, size_t, wchar_t, and wint_t.
+ */
+
+#ifdef __STDINT_LIMITS
+#  define SIG_ATOMIC_MAX INT32_MAX
+#  define SIG_ATOMIC_MIN INT32_MIN
+
+#  define SIZE_MAX UINT32_MAX
+
+#  ifndef WCHAR_MAX /* These might also have been defined by <wchar.h>. */
+#    define WCHAR_MAX INT32_MAX
+#    define WCHAR_MIN INT32_MIN
+#  endif
+
+#  define WINT_MAX INT32_MAX
+#  define WINT_MIN INT32_MIN
+#endif
+
 #define _BITSIZE 32
 
 /* Keep the kernel from trying to define these types... */
diff --git a/libc/include/stdio.h b/libc/include/stdio.h
index fdf747d..23fc944 100644
--- a/libc/include/stdio.h
+++ b/libc/include/stdio.h
@@ -139,8 +139,8 @@
 #define	__SMBF	0x0080		/* _buf is from malloc */
 #define	__SAPP	0x0100		/* fdopen()ed in append mode */
 #define	__SSTR	0x0200		/* this is an sprintf/snprintf string */
-#define	__SOPT	0x0400		/* do fseek() optimisation */
-#define	__SNPT	0x0800		/* do not do fseek() optimisation */
+#define	__SOPT	0x0400		/* do fseek() optimization */
+#define	__SNPT	0x0800		/* do not do fseek() optimization */
 #define	__SOFF	0x1000		/* set iff _offset is in fact correct */
 #define	__SMOD	0x2000		/* true => fgetln modified _p text */
 #define	__SALC	0x4000		/* allocate string space dynamically */
@@ -160,14 +160,14 @@
 #define	_IONBF	2		/* setvbuf should set unbuffered */
 
 #define	BUFSIZ	1024		/* size of buffer used by setbuf */
-
 #define	EOF	(-1)
 
 /*
- * FOPEN_MAX is a minimum maximum, and should be the number of descriptors
- * that the kernel can provide without allocation of a resource that can
- * fail without the process sleeping.  Do not use this for anything.
+ * FOPEN_MAX is a minimum maximum, and is the number of streams that
+ * stdio can provide without attempting to allocate further resources
+ * (which could fail).  Do not use this for anything.
  */
+
 #define	FOPEN_MAX	20	/* must be <= OPEN_MAX <sys/syslimits.h> */
 #define	FILENAME_MAX	1024	/* must be <= PATH_MAX <sys/syslimits.h> */
 
@@ -178,6 +178,7 @@
 #define	L_tmpnam	1024	/* XXX must be == PATH_MAX */
 #define	TMP_MAX		308915776
 
+/* Always ensure that these are consistent with <fcntl.h> and <unistd.h>! */
 #ifndef SEEK_SET
 #define	SEEK_SET	0	/* set file offset to offset */
 #endif
@@ -202,25 +203,20 @@
 int	 ferror(FILE *);
 int	 fflush(FILE *);
 int	 fgetc(FILE *);
-int	 fgetpos(FILE *, fpos_t *);
-char	*fgets(char *, int, FILE *);
-FILE	*fopen(const char *, const char *);
-int	 fprintf(FILE *, const char *, ...)
-		__attribute__((__format__ (printf, 2, 3)))
-		__attribute__((__nonnull__ (2)));
+char	*fgets(char * __restrict, int, FILE * __restrict);
+FILE	*fopen(const char * __restrict , const char * __restrict);
+int	 fprintf(FILE * __restrict , const char * __restrict, ...)
+		__printflike(2, 3);
 int	 fputc(int, FILE *);
-int	 fputs(const char *, FILE *);
-size_t	 fread(void *, size_t, size_t, FILE *);
-FILE	*freopen(const char *, const char *, FILE *);
-int	 fscanf(FILE *, const char *, ...)
-		__attribute__ ((__format__ (scanf, 2, 3)))
-		__attribute__ ((__nonnull__ (2)));
+int	 fputs(const char * __restrict, FILE * __restrict);
+size_t	 fread(void * __restrict, size_t, size_t, FILE * __restrict);
+FILE	*freopen(const char * __restrict, const char * __restrict,
+	    FILE * __restrict);
+int	 fscanf(FILE * __restrict, const char * __restrict, ...)
+		__scanflike(2, 3);
 int	 fseek(FILE *, long, int);
-int	 fseeko(FILE *, off_t, int);
-int	 fsetpos(FILE *, const fpos_t *);
 long	 ftell(FILE *);
-off_t	 ftello(FILE *);
-size_t	 fwrite(const void *, size_t, size_t, FILE *);
+size_t	 fwrite(const void * __restrict, size_t, size_t, FILE * __restrict);
 int	 getc(FILE *);
 int	 getchar(void);
 ssize_t	 getdelim(char ** __restrict, size_t * __restrict, int,
@@ -234,55 +230,55 @@
 extern char *sys_errlist[];
 #endif
 void	 perror(const char *);
-int	 printf(const char *, ...)
-		__attribute__((__format__ (printf, 1, 2)))
-		__attribute__((__nonnull__ (1)));
+int	 printf(const char * __restrict, ...)
+		__printflike(1, 2);
 int	 putc(int, FILE *);
 int	 putchar(int);
 int	 puts(const char *);
 int	 remove(const char *);
-int	 rename(const char *, const char *);
 void	 rewind(FILE *);
-int	 scanf(const char *, ...)
-		__attribute__ ((__format__ (scanf, 1, 2)))
-		__attribute__ ((__nonnull__ (1)));
-void	 setbuf(FILE *, char *);
-int	 setvbuf(FILE *, char *, int, size_t);
-int	 sprintf(char *, const char *, ...)
-		__attribute__((__format__ (printf, 2, 3)))
-		__attribute__((__nonnull__ (2)));
-int	 sscanf(const char *, const char *, ...)
-		__attribute__ ((__format__ (scanf, 2, 3)))
-		__attribute__ ((__nonnull__ (2)));
+int	 scanf(const char * __restrict, ...)
+		__scanflike(1, 2);
+void	 setbuf(FILE * __restrict, char * __restrict);
+int	 setvbuf(FILE * __restrict, char * __restrict, int, size_t);
+int	 sscanf(const char * __restrict, const char * __restrict, ...)
+		__scanflike(2, 3);
 FILE	*tmpfile(void);
-char	*tmpnam(char *);
 int	 ungetc(int, FILE *);
-int	 vfprintf(FILE *, const char *, __va_list)
-		__attribute__((__format__ (printf, 2, 0)))
-		__attribute__((__nonnull__ (2)));
-int	 vprintf(const char *, __va_list)
-		__attribute__((__format__ (printf, 1, 0)))
-		__attribute__((__nonnull__ (1)));
-int	 vsprintf(char *, const char *, __va_list)
-		__attribute__((__format__ (printf, 2, 0)))
-		__attribute__((__nonnull__ (2)));
+int	 vfprintf(FILE * __restrict, const char * __restrict, __va_list)
+		__printflike(2, 0);
+int	 vprintf(const char * __restrict, __va_list)
+		__printflike(1, 0);
+
+#ifndef __AUDIT__
+char	*gets(char *);
+int	 sprintf(char * __restrict, const char * __restrict, ...)
+		__printflike(2, 3);
+char	*tmpnam(char *);
+int	 vsprintf(char * __restrict, const char * __restrict,
+    __va_list)
+		__printflike(2, 0);
+#endif
+
+int	 rename (const char *, const char *);
+
+int	 fgetpos(FILE * __restrict, fpos_t * __restrict);
+int	 fsetpos(FILE *, const fpos_t *);
+
+int	 fseeko(FILE *, off_t, int);
+off_t	 ftello(FILE *);
 
 #if __ISO_C_VISIBLE >= 1999 || __BSD_VISIBLE
-int	 snprintf(char *, size_t, const char *, ...)
-		__attribute__((__format__ (printf, 3, 4)))
-		__attribute__((__nonnull__ (3)));
-int	 vfscanf(FILE *, const char *, __va_list)
-		__attribute__((__format__ (scanf, 2, 0)))
-		__attribute__((__nonnull__ (2)));
+int	 snprintf(char * __restrict, size_t, const char * __restrict, ...)
+		__printflike(3, 4);
+int	 vfscanf(FILE * __restrict, const char * __restrict, __va_list)
+		__scanflike(2, 0);
 int	 vscanf(const char *, __va_list)
-		__attribute__((__format__ (scanf, 1, 0)))
-		__attribute__((__nonnull__ (1)));
-int	 vsnprintf(char *, size_t, const char *, __va_list)
-		__attribute__((__format__ (printf, 3, 0)))
-		__attribute__((__nonnull__ (3)));
-int	 vsscanf(const char *, const char *, __va_list)
-		__attribute__((__format__ (scanf, 2, 0)))
-		__attribute__((__nonnull__ (2)));
+		__scanflike(1, 0);
+int	 vsnprintf(char * __restrict, size_t, const char * __restrict, __va_list)
+		__printflike(3, 0);
+int	 vsscanf(const char * __restrict, const char * __restrict, __va_list)
+		__scanflike(2, 0);
 #endif /* __ISO_C_VISIBLE >= 1999 || __BSD_VISIBLE */
 
 __END_DECLS
@@ -335,18 +331,17 @@
  */
 #if __BSD_VISIBLE
 __BEGIN_DECLS
-int	 asprintf(char **, const char *, ...)
-		__attribute__((__format__ (printf, 2, 3)))
-		__attribute__((__nonnull__ (2)));
-char	*fgetln(FILE *, size_t *);
+int	 asprintf(char ** __restrict, const char * __restrict, ...)
+		__printflike(2, 3);
+char	*fgetln(FILE * __restrict, size_t * __restrict);
 int	 fpurge(FILE *);
 int	 getw(FILE *);
 int	 putw(int, FILE *);
 void	 setbuffer(FILE *, char *, int);
 int	 setlinebuf(FILE *);
-int	 vasprintf(char **, const char *, __va_list)
-		__attribute__((__format__ (printf, 2, 0)))
-		__attribute__((__nonnull__ (2)));
+int	 vasprintf(char ** __restrict, const char * __restrict,
+    __va_list)
+		__printflike(2, 0);
 __END_DECLS
 
 /*
@@ -449,11 +444,9 @@
  */
 __BEGIN_DECLS
 int fdprintf(int, const char*, ...)
-		__attribute__((__format__ (printf, 2, 3)))
-		__attribute__((__nonnull__ (2)));
+		__printflike(2, 3);
 int vfdprintf(int, const char*, __va_list)
-		__attribute__((__format__ (printf, 2, 0)))
-		__attribute__((__nonnull__ (2)));
+		__printflike(2, 0);
 __END_DECLS
 #endif /* _GNU_SOURCE */
 
@@ -462,53 +455,54 @@
 __BEGIN_DECLS
 
 __BIONIC_FORTIFY_INLINE
-__attribute__((__format__ (printf, 3, 0)))
-__attribute__((__nonnull__ (3)))
+__printflike(3, 0)
 int vsnprintf(char *dest, size_t size, const char *format, __va_list ap)
 {
-    return __builtin___vsnprintf_chk(dest, size, 0,
-        __builtin_object_size(dest, 0), format, ap);
+    return __builtin___vsnprintf_chk(dest, size, 0, __bos(dest), format, ap);
 }
 
 __BIONIC_FORTIFY_INLINE
-__attribute__((__format__ (printf, 2, 0)))
-__attribute__((__nonnull__ (2)))
+__printflike(2, 0)
 int vsprintf(char *dest, const char *format, __va_list ap)
 {
-    return __builtin___vsprintf_chk(dest, 0,
-        __builtin_object_size(dest, 0), format, ap);
+    return __builtin___vsprintf_chk(dest, 0, __bos(dest), format, ap);
 }
 
+#if defined(__clang__)
+#define snprintf(dest, size, ...) __builtin___snprintf_chk(dest, size, 0, __bos(dest), __VA_ARGS__)
+#else
 __BIONIC_FORTIFY_INLINE
-__attribute__((__format__ (printf, 3, 4)))
-__attribute__((__nonnull__ (3)))
-int snprintf(char *str, size_t size, const char *format, ...)
+__printflike(3, 4)
+int snprintf(char *dest, size_t size, const char *format, ...)
 {
-    return __builtin___snprintf_chk(str, size, 0,
-        __builtin_object_size(str, 0), format, __builtin_va_arg_pack());
+    return __builtin___snprintf_chk(dest, size, 0,
+        __bos(dest), format, __builtin_va_arg_pack());
 }
+#endif
 
+#if defined(__clang__)
+#define sprintf(dest, ...) __builtin___sprintf_chk(dest, 0, __bos(dest), __VA_ARGS__)
+#else
 __BIONIC_FORTIFY_INLINE
-__attribute__((__format__ (printf, 2, 3)))
-__attribute__((__nonnull__ (2)))
+__printflike(2, 3)
 int sprintf(char *dest, const char *format, ...)
 {
     return __builtin___sprintf_chk(dest, 0,
-        __builtin_object_size(dest, 0), format, __builtin_va_arg_pack());
+        __bos(dest), format, __builtin_va_arg_pack());
 }
+#endif
 
+#if !defined(__clang__)
 extern char *__fgets_real(char *, int, FILE *)
     __asm__(__USER_LABEL_PREFIX__ "fgets");
-extern void __fgets_too_big_error()
-    __attribute__((__error__("fgets called with size bigger than buffer")));
-extern void __fgets_too_small_error()
-    __attribute__((__error__("fgets called with size less than zero")));
+__errordecl(__fgets_too_big_error, "fgets called with size bigger than buffer");
+__errordecl(__fgets_too_small_error, "fgets called with size less than zero");
 extern char *__fgets_chk(char *, int, FILE *, size_t);
 
 __BIONIC_FORTIFY_INLINE
 char *fgets(char *dest, int size, FILE *stream)
 {
-    size_t bos = __builtin_object_size(dest, 0);
+    size_t bos = __bos(dest);
 
     // Compiler can prove, at compile time, that the passed in size
     // is always negative. Force a compiler error.
@@ -536,6 +530,8 @@
     return __fgets_chk(dest, size, stream, bos);
 }
 
+#endif /* !defined(__clang__) */
+
 __END_DECLS
 
 #endif /* defined(__BIONIC_FORTIFY) */
diff --git a/libc/include/string.h b/libc/include/string.h
index 56d89a4..7801ee9 100644
--- a/libc/include/string.h
+++ b/libc/include/string.h
@@ -34,11 +34,11 @@
 
 __BEGIN_DECLS
 
-extern void*  memccpy(void *, const void *, int, size_t);
+extern void*  memccpy(void* __restrict, const void* __restrict, int, size_t);
 extern void*  memchr(const void *, int, size_t) __purefunc;
 extern void*  memrchr(const void *, int, size_t) __purefunc;
 extern int    memcmp(const void *, const void *, size_t) __purefunc;
-extern void*  memcpy(void *, const void *, size_t);
+extern void*  memcpy(void* __restrict, const void* __restrict, size_t);
 extern void*  memmove(void *, const void *, size_t);
 extern void*  memset(void *, int, size_t);
 extern void*  memmem(const void *, size_t, const void *, size_t) __purefunc;
@@ -49,9 +49,10 @@
 extern char*  strrchr(const char *, int) __purefunc;
 
 extern size_t strlen(const char *) __purefunc;
+extern size_t __strlen_chk(const char *, size_t);
 extern int    strcmp(const char *, const char *) __purefunc;
-extern char*  strcpy(char *, const char *);
-extern char*  strcat(char *, const char *);
+extern char*  strcpy(char* __restrict, const char* __restrict);
+extern char*  strcat(char* __restrict, const char* __restrict);
 
 extern int    strcasecmp(const char *, const char *) __purefunc;
 extern int    strncasecmp(const char *, const char *, size_t) __purefunc;
@@ -59,40 +60,38 @@
 
 extern char*  strstr(const char *, const char *) __purefunc;
 extern char*  strcasestr(const char *haystack, const char *needle) __purefunc;
-extern char*  strtok(char *, const char *);
-extern char*  strtok_r(char *, const char *, char**);
+extern char*  strtok(char* __restrict, const char* __restrict);
+extern char*  strtok_r(char* __restrict, const char* __restrict, char** __restrict);
 
 extern char*  strerror(int);
 extern int    strerror_r(int errnum, char *buf, size_t n);
 
 extern size_t strnlen(const char *, size_t) __purefunc;
-extern char*  strncat(char *, const char *, size_t);
+extern char*  strncat(char* __restrict, const char* __restrict, size_t);
 extern char*  strndup(const char *, size_t);
 extern int    strncmp(const char *, const char *, size_t) __purefunc;
-extern char*  strncpy(char *, const char *, size_t);
+extern char*  strncpy(char* __restrict, const char* __restrict, size_t);
 
-extern size_t strlcat(char *, const char *, size_t);
-extern size_t strlcpy(char *, const char *, size_t);
+extern size_t strlcat(char* __restrict, const char* __restrict, size_t);
+extern size_t strlcpy(char* __restrict, const char* __restrict, size_t);
 
 extern size_t strcspn(const char *, const char *) __purefunc;
 extern char*  strpbrk(const char *, const char *) __purefunc;
-extern char*  strsep(char **, const char *);
+extern char*  strsep(char** __restrict, const char* __restrict);
 extern size_t strspn(const char *, const char *);
 
 extern char*  strsignal(int  sig);
 
 extern int    strcoll(const char *, const char *) __purefunc;
-extern size_t strxfrm(char *, const char *, size_t);
+extern size_t strxfrm(char* __restrict, const char* __restrict, size_t);
 
 #if defined(__BIONIC_FORTIFY)
 
-extern void __memcpy_dest_size_error()
-    __attribute__((__error__("memcpy called with size bigger than destination")));
-extern void __memcpy_src_size_error()
-    __attribute__((__error__("memcpy called with size bigger than source")));
+__errordecl(__memcpy_dest_size_error, "memcpy called with size bigger than destination");
+__errordecl(__memcpy_src_size_error, "memcpy called with size bigger than source");
 
 __BIONIC_FORTIFY_INLINE
-void *memcpy (void *dest, const void *src, size_t copy_amount) {
+void* memcpy(void* __restrict dest, const void* __restrict src, size_t copy_amount) {
     char *d = (char *) dest;
     const char *s = (const char *) src;
     size_t s_len = __builtin_object_size(s, 0);
@@ -110,45 +109,51 @@
 }
 
 __BIONIC_FORTIFY_INLINE
-void *memmove (void *dest, const void *src, size_t len) {
+void* memmove(void *dest, const void *src, size_t len) {
     return __builtin___memmove_chk(dest, src, len, __builtin_object_size (dest, 0));
 }
 
 __BIONIC_FORTIFY_INLINE
-char *strcpy(char *dest, const char *src) {
-    return __builtin___strcpy_chk(dest, src, __builtin_object_size (dest, 0));
+char* strcpy(char* __restrict dest, const char* __restrict src) {
+    return __builtin___strcpy_chk(dest, src, __bos(dest));
+}
+
+__errordecl(__strncpy_error, "strncpy called with size bigger than buffer");
+
+__BIONIC_FORTIFY_INLINE
+char* strncpy(char* __restrict dest, const char* __restrict src, size_t n) {
+    size_t bos = __bos(dest);
+    if (__builtin_constant_p(n) && (n > bos)) {
+        __strncpy_error();
+    }
+    return __builtin___strncpy_chk(dest, src, n, bos);
 }
 
 __BIONIC_FORTIFY_INLINE
-char *strncpy(char *dest, const char *src, size_t n) {
-    return __builtin___strncpy_chk(dest, src, n, __builtin_object_size (dest, 0));
+char* strcat(char* __restrict dest, const char* __restrict src) {
+    return __builtin___strcat_chk(dest, src, __bos(dest));
 }
 
 __BIONIC_FORTIFY_INLINE
-char *strcat(char *dest, const char *src) {
-    return __builtin___strcat_chk(dest, src, __builtin_object_size (dest, 0));
+char *strncat(char* __restrict dest, const char* __restrict src, size_t n) {
+    return __builtin___strncat_chk(dest, src, n, __bos(dest));
 }
 
 __BIONIC_FORTIFY_INLINE
-char *strncat(char *dest, const char *src, size_t n) {
-    return __builtin___strncat_chk(dest, src, n, __builtin_object_size (dest, 0));
-}
-
-__BIONIC_FORTIFY_INLINE
-void *memset (void *s, int c, size_t n) {
+void* memset(void *s, int c, size_t n) {
     return __builtin___memset_chk(s, c, n, __builtin_object_size (s, 0));
 }
 
-extern size_t __strlcpy_real(char *, const char *, size_t)
+extern size_t __strlcpy_real(char* __restrict, const char* __restrict, size_t)
     __asm__(__USER_LABEL_PREFIX__ "strlcpy");
-extern void __strlcpy_error()
-    __attribute__((__error__("strlcpy called with size bigger than buffer")));
+__errordecl(__strlcpy_error, "strlcpy called with size bigger than buffer");
 extern size_t __strlcpy_chk(char *, const char *, size_t, size_t);
 
 __BIONIC_FORTIFY_INLINE
-size_t strlcpy(char *dest, const char *src, size_t size) {
-    size_t bos = __builtin_object_size(dest, 0);
+size_t strlcpy(char* __restrict dest, const char* __restrict src, size_t size) {
+    size_t bos = __bos(dest);
 
+#if !defined(__clang__)
     // Compiler doesn't know destination size. Don't call __strlcpy_chk
     if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
         return __strlcpy_real(dest, src, size);
@@ -165,21 +170,22 @@
     if (__builtin_constant_p(size) && (size > bos)) {
         __strlcpy_error();
     }
+#endif /* !defined(__clang__) */
 
     return __strlcpy_chk(dest, src, size, bos);
 }
 
-extern size_t __strlcat_real(char *, const char *, size_t)
+extern size_t __strlcat_real(char* __restrict, const char* __restrict, size_t)
     __asm__(__USER_LABEL_PREFIX__ "strlcat");
-extern void __strlcat_error()
-    __attribute__((__error__("strlcat called with size bigger than buffer")));
-extern size_t __strlcat_chk(char *, const char *, size_t, size_t);
+__errordecl(__strlcat_error, "strlcat called with size bigger than buffer");
+extern size_t __strlcat_chk(char* __restrict, const char* __restrict, size_t, size_t);
 
 
 __BIONIC_FORTIFY_INLINE
-size_t strlcat(char *dest, const char *src, size_t size) {
-    size_t bos = __builtin_object_size(dest, 0);
+size_t strlcat(char* __restrict dest, const char* __restrict src, size_t size) {
+    size_t bos = __bos(dest);
 
+#if !defined(__clang__)
     // Compiler doesn't know destination size. Don't call __strlcat_chk
     if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
         return __strlcat_real(dest, src, size);
@@ -196,16 +202,16 @@
     if (__builtin_constant_p(size) && (size > bos)) {
         __strlcat_error();
     }
+#endif /* !defined(__clang__) */
 
     return __strlcat_chk(dest, src, size, bos);
 }
 
-extern size_t __strlen_chk(const char *, size_t);
-
 __BIONIC_FORTIFY_INLINE
 size_t strlen(const char *s) {
-    size_t bos = __builtin_object_size(s, 0);
+    size_t bos = __bos(s);
 
+#if !defined(__clang__)
     // Compiler doesn't know destination size. Don't call __strlen_chk
     if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
         return __builtin_strlen(s);
@@ -215,6 +221,7 @@
     if (__builtin_constant_p(slen)) {
         return slen;
     }
+#endif /* !defined(__clang__) */
 
     return __strlen_chk(s, bos);
 }
@@ -223,8 +230,9 @@
 
 __BIONIC_FORTIFY_INLINE
 char* strchr(const char *s, int c) {
-    size_t bos = __builtin_object_size(s, 0);
+    size_t bos = __bos(s);
 
+#if !defined(__clang__)
     // Compiler doesn't know destination size. Don't call __strchr_chk
     if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
         return __builtin_strchr(s, c);
@@ -234,6 +242,7 @@
     if (__builtin_constant_p(slen) && (slen < bos)) {
         return __builtin_strchr(s, c);
     }
+#endif /* !defined(__clang__) */
 
     return __strchr_chk(s, c, bos);
 }
@@ -242,8 +251,9 @@
 
 __BIONIC_FORTIFY_INLINE
 char* strrchr(const char *s, int c) {
-    size_t bos = __builtin_object_size(s, 0);
+    size_t bos = __bos(s);
 
+#if !defined(__clang__)
     // Compiler doesn't know destination size. Don't call __strrchr_chk
     if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
         return __builtin_strrchr(s, c);
@@ -253,6 +263,7 @@
     if (__builtin_constant_p(slen) && (slen < bos)) {
         return __builtin_strrchr(s, c);
     }
+#endif /* !defined(__clang__) */
 
     return __strrchr_chk(s, c, bos);
 }
diff --git a/libc/include/sys/_system_properties.h b/libc/include/sys/_system_properties.h
index 5d2043d..5eee7f0 100644
--- a/libc/include/sys/_system_properties.h
+++ b/libc/include/sys/_system_properties.h
@@ -34,38 +34,21 @@
 #else
 #include <sys/system_properties.h>
 
-typedef struct prop_area prop_area;
 typedef struct prop_msg prop_msg;
 
 #define PROP_AREA_MAGIC   0x504f5250
-#define PROP_AREA_VERSION 0x45434f76
+#define PROP_AREA_VERSION 0xfc6ed0ab
+#define PROP_AREA_VERSION_COMPAT 0x45434f76
 
 #define PROP_SERVICE_NAME "property_service"
 #define PROP_FILENAME "/dev/__properties__"
 
-/* #define PROP_MAX_ENTRIES 247 */
-/* 247 -> 32620 bytes (<32768) */
-
-#define TOC_NAME_LEN(toc)       ((toc) >> 24)
-#define TOC_TO_INFO(area, toc)  ((prop_info*) (((char*) area) + ((toc) & 0xFFFFFF)))
-
-struct prop_area {
-    unsigned volatile count;
-    unsigned volatile serial;
-    unsigned magic;
-    unsigned version;
-    unsigned reserved[4];
-    unsigned toc[1];
-};
+#define PA_SIZE         (128 * 1024)
 
 #define SERIAL_VALUE_LEN(serial) ((serial) >> 24)
 #define SERIAL_DIRTY(serial) ((serial) & 1)
 
-struct prop_info {
-    char name[PROP_NAME_MAX];
-    unsigned volatile serial;
-    char value[PROP_VALUE_MAX];
-};
+__BEGIN_DECLS
 
 struct prop_msg 
 {
@@ -93,11 +76,6 @@
 **   1. pi->serial = pi->serial | 1
 **   2. memcpy(pi->value, local_value, value_len)
 **   3. pi->serial = (value_len << 24) | ((pi->serial + 1) & 0xffffff)
-**
-** Improvements:
-** - maintain the toc sorted by pi->name to allow lookup
-**   by binary search
-**
 */
 
 #define PROP_PATH_RAMDISK_DEFAULT  "/default.prop"
@@ -106,5 +84,62 @@
 #define PROP_PATH_LOCAL_OVERRIDE   "/data/local.prop"
 #define PROP_PATH_FACTORY          "/factory/factory.prop"
 
+/*
+** Map the property area from the specified filename.  This
+** method is for testing only.
+*/
+int __system_property_set_filename(const char *filename);
+
+/*
+** Initialize the area to be used to store properties.  Can
+** only be done by a single process that has write access to
+** the property area.
+*/
+int __system_property_area_init();
+
+/* Add a new system property.  Can only be done by a single
+** process that has write access to the property area, and
+** that process must handle sequencing to ensure the property
+** does not already exist and that only one property is added
+** or updated at a time.
+**
+** Returns 0 on success, -1 if the property area is full.
+*/
+int __system_property_add(const char *name, unsigned int namelen,
+			const char *value, unsigned int valuelen);
+
+/* Update the value of a system property returned by
+** __system_property_find.  Can only be done by a single process
+** that has write access to the property area, and that process
+** must handle sequencing to ensure that only one property is
+** updated at a time.
+**
+** Returns 0 on success, -1 if the parameters are incorrect.
+*/
+int __system_property_update(prop_info *pi, const char *value, unsigned int len);
+
+/* Read the serial number of a system property returned by
+** __system_property_find.
+**
+** Returns the serial number on success, -1 on error.
+*/
+unsigned int __system_property_serial(const prop_info *pi);
+
+/* Wait for any system property to be updated.  Caller must pass
+** in 0 the first time, and the previous return value on each
+** successive call. */
+unsigned int __system_property_wait_any(unsigned int serial);
+
+/*  Compatibility functions to support using an old init with a new libc,
+ ** mostly for the OTA updater binary.  These can be deleted once OTAs from
+ ** a pre-K release no longer needed to be supported. */
+const prop_info *__system_property_find_compat(const char *name);
+int __system_property_read_compat(const prop_info *pi, char *name, char *value);
+int __system_property_foreach_compat(
+        void (*propfn)(const prop_info *pi, void *cookie),
+        void *cookie);
+
+__END_DECLS
+
 #endif
 #endif
diff --git a/libc/include/sys/auxv.h b/libc/include/sys/auxv.h
index 918442f..0d753c3 100644
--- a/libc/include/sys/auxv.h
+++ b/libc/include/sys/auxv.h
@@ -29,6 +29,7 @@
 #define _SYS_AUXV_H_
 
 #include <linux/auxvec.h>
+#include <sys/cdefs.h>
 
 __BEGIN_DECLS
 
diff --git a/libc/include/sys/cdefs.h b/libc/include/sys/cdefs.h
index 1976d6a..a4c1aff 100644
--- a/libc/include/sys/cdefs.h
+++ b/libc/include/sys/cdefs.h
@@ -211,6 +211,11 @@
 #define __statement(x)	(x)
 #endif
 
+#define __nonnull(args) __attribute__((__nonnull__ args))
+
+#define __printflike(x, y) __attribute__((__format__(printf, x, y))) __nonnull((x))
+#define __scanflike(x, y) __attribute__((__format__(scanf, x, y))) __nonnull((x))
+
 /*
  * C99 defines the restrict type qualifier keyword, which was made available
  * in GCC 2.92.
@@ -327,6 +332,12 @@
 #define __wur
 #endif
 
+#if __GNUC_PREREQ__(4, 3)
+#define __errordecl(name, msg) extern void name(void) __attribute__((__error__(msg)))
+#else
+#define __errordecl(name, msg) extern void name(void)
+#endif
+
 /*
  * Macros for manipulating "link sets".  Link sets are arrays of pointers
  * to objects, which are gathered up by the linker.
@@ -515,13 +526,18 @@
 #define  __BIONIC__   1
 #include <android/api-level.h>
 
-#if defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0 && defined(__OPTIMIZE__) && __OPTIMIZE__ > 0 && !defined(__clang__)
+#if defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0 && defined(__OPTIMIZE__) && __OPTIMIZE__ > 0
 #define __BIONIC_FORTIFY 1
+#if _FORTIFY_SOURCE == 2
+#define __bos(s) __builtin_object_size((s), 1)
+#else
+#define __bos(s) __builtin_object_size((s), 0)
+#endif
+
 #define __BIONIC_FORTIFY_INLINE \
     extern inline \
     __attribute__ ((always_inline)) \
-    __attribute__ ((gnu_inline)) \
-    __attribute__ ((artificial))
+    __attribute__ ((gnu_inline))
 #endif
 #define __BIONIC_FORTIFY_UNKNOWN_SIZE ((size_t) -1)
 
diff --git a/libc/include/sys/epoll.h b/libc/include/sys/epoll.h
index decdb46..38739aa 100644
--- a/libc/include/sys/epoll.h
+++ b/libc/include/sys/epoll.h
@@ -42,6 +42,9 @@
 #define EPOLLWRNORM      0x00000100
 #define EPOLLWRBAND      0x00000200
 #define EPOLLMSG         0x00000400
+#define EPOLLRDHUP       0x00002000
+#define EPOLLWAKEUP      0x20000000
+#define EPOLLONESHOT     0x40000000
 #define EPOLLET          0x80000000
 
 #define EPOLL_CTL_ADD    1
diff --git a/libc/include/sys/ioctl.h b/libc/include/sys/ioctl.h
index 9f68510..49d452c 100644
--- a/libc/include/sys/ioctl.h
+++ b/libc/include/sys/ioctl.h
@@ -30,6 +30,11 @@
 
 #include <sys/cdefs.h>
 #include <linux/ioctl.h>
+/*
+ * NetBSD and glibc's <sys/ioctl.h> provide some of the
+ * terminal-related ioctl data structures such as struct winsize.
+ */
+#include <linux/termios.h>
 #include <asm/ioctls.h>
 #include <asm/termbits.h>
 #include <sys/ioctl_compat.h>
diff --git a/libc/include/sys/limits.h b/libc/include/sys/limits.h
index 2d0d11e..f1080fe 100644
--- a/libc/include/sys/limits.h
+++ b/libc/include/sys/limits.h
@@ -176,9 +176,9 @@
 #define  _POSIX_JOB_CONTROL         1    /* job control is a Linux feature */
 
 #define _POSIX_THREAD_DESTRUCTOR_ITERATIONS 4 /* the minimum mandated by POSIX */
-#define PTHREAD_DESTRUCTOR_ITERATIONS 4
+#define PTHREAD_DESTRUCTOR_ITERATIONS _POSIX_THREAD_DESTRUCTOR_ITERATIONS
 #define _POSIX_THREAD_KEYS_MAX 128            /* the minimum mandated by POSIX */
-/* TODO: our PTHREAD_KEYS_MAX is currently too low to be posix compliant! */
+#define PTHREAD_KEYS_MAX _POSIX_THREAD_KEYS_MAX
 #define _POSIX_THREAD_THREADS_MAX 64          /* the minimum mandated by POSIX */
 #define PTHREAD_THREADS_MAX                   /* bionic has no specific limit */
 
diff --git a/libc/include/sys/mount.h b/libc/include/sys/mount.h
index 207527e..3c35d31 100644
--- a/libc/include/sys/mount.h
+++ b/libc/include/sys/mount.h
@@ -30,75 +30,18 @@
 
 #include <sys/cdefs.h>
 #include <sys/ioctl.h>
+#include <linux/fs.h>
 
 __BEGIN_DECLS
 
-/*
- * These are the fs-independent mount-flags: up to 32 flags are supported
- */
-#define MS_RDONLY        1      /* Mount read-only */
-#define MS_NOSUID        2      /* Ignore suid and sgid bits */
-#define MS_NODEV         4      /* Disallow access to device special files */
-#define MS_NOEXEC        8      /* Disallow program execution */
-#define MS_SYNCHRONOUS  16      /* Writes are synced at once */
-#define MS_REMOUNT      32      /* Alter flags of a mounted FS */
-#define MS_MANDLOCK     64      /* Allow mandatory locks on an FS */
-#define MS_DIRSYNC      128     /* Directory modifications are synchronous */
-#define MS_NOATIME      1024    /* Do not update access times. */
-#define MS_NODIRATIME   2048    /* Do not update directory access times */
-#define MS_BIND         4096
-#define MS_MOVE         8192
-#define MS_REC          16384
-#define MS_VERBOSE      32768
-#define MS_POSIXACL     (1<<16) /* VFS does not apply the umask */
-#define MS_UNBINDABLE   (1<<17) /* change to unbindable */
-#define MS_PRIVATE      (1<<18) /* change to private */
-#define MS_SLAVE        (1<<19) /* change to slave */
-#define MS_SHARED       (1<<20) /* change to shared */
-#define MS_ACTIVE       (1<<30)
-#define MS_NOUSER       (1<<31)
-
-/*
- * Superblock flags that can be altered by MS_REMOUNT
- */
-#define MS_RMT_MASK     (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|MS_NODIRATIME)
-
-/*
- * Old magic mount flag and mask
- */
-#define MS_MGC_VAL 0xC0ED0000
-#define MS_MGC_MSK 0xffff0000
-
-/*
- * umount2() flags
- */
+/* umount2 flags. */
 #define MNT_FORCE	1	/* Forcibly unmount */
 #define MNT_DETACH	2	/* Detach from tree only */
 #define MNT_EXPIRE	4	/* Mark for expiry */
 
-/*
- * Block device ioctls
- */
-#define BLKROSET   _IO(0x12, 93) /* Set device read-only (0 = read-write).  */
-#define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write).  */
-#define BLKRRPART  _IO(0x12, 95) /* Re-read partition table.  */
-#define BLKGETSIZE _IO(0x12, 96) /* Return device size.  */
-#define BLKFLSBUF  _IO(0x12, 97) /* Flush buffer cache.  */
-#define BLKRASET   _IO(0x12, 98) /* Set read ahead for block device.  */
-#define BLKRAGET   _IO(0x12, 99) /* Get current read ahead setting.  */
-
-/*
- * Prototypes
- */
-extern int mount(const char *, const char *,
-		   const char *, unsigned long,
-		   const void *);
-extern int umount(const char *);
-extern int umount2(const char *, int);
-
-#if 0 /* MISSING FROM BIONIC */
-extern int pivot_root(const char *, const char *);
-#endif /* MISSING */
+extern int mount(const char*, const char*, const char*, unsigned long, const void*);
+extern int umount(const char*);
+extern int umount2(const char*, int);
 
 __END_DECLS
 
diff --git a/libc/include/sys/personality.h b/libc/include/sys/personality.h
index 7772550..8a023f9 100644
--- a/libc/include/sys/personality.h
+++ b/libc/include/sys/personality.h
@@ -29,6 +29,7 @@
 #ifndef _SYS_PERSONALITY_H_
 #define _SYS_PERSONALITY_H_
 
+#include <sys/cdefs.h>
 #include <linux/personality.h>
 
 __BEGIN_DECLS
diff --git a/libc/include/sys/signalfd.h b/libc/include/sys/signalfd.h
index c03a0e9..2537ab9 100644
--- a/libc/include/sys/signalfd.h
+++ b/libc/include/sys/signalfd.h
@@ -29,12 +29,13 @@
 #ifndef _SYS_SIGNALFD_H_
 #define _SYS_SIGNALFD_H_
 
-#include <signal.h>
 #include <linux/signalfd.h>
+#include <signal.h>
+#include <sys/cdefs.h>
 
 __BEGIN_DECLS
 
-extern int signalfd(int fd, const sigset_t* mask, int flags) __attribute__((__nonnull__(2)));
+extern int signalfd(int fd, const sigset_t* mask, int flags) __nonnull((2));
 
 __END_DECLS
 
diff --git a/libc/include/sys/stat.h b/libc/include/sys/stat.h
index 4e8beb6..10627b9 100644
--- a/libc/include/sys/stat.h
+++ b/libc/include/sys/stat.h
@@ -134,17 +134,18 @@
 extern mode_t __umask_chk(mode_t);
 extern mode_t __umask_real(mode_t)
     __asm__(__USER_LABEL_PREFIX__ "umask");
-extern void __umask_error()
-    __attribute__((__error__("umask called with invalid mode")));
+__errordecl(__umask_invalid_mode, "umask called with invalid mode");
 
 __BIONIC_FORTIFY_INLINE
 mode_t umask(mode_t mode) {
+#if !defined(__clang__)
   if (__builtin_constant_p(mode)) {
     if ((mode & 0777) != mode) {
-      __umask_error();
+      __umask_invalid_mode();
     }
     return __umask_real(mode);
   }
+#endif
   return __umask_chk(mode);
 }
 #endif /* defined(__BIONIC_FORTIFY) */
@@ -167,7 +168,8 @@
 
 # define UTIME_NOW      ((1l << 30) - 1l)
 # define UTIME_OMIT     ((1l << 30) - 2l)
-extern int utimensat (int fd, const char *path, const struct timespec times[2], int flags);
+extern int utimensat(int fd, const char *path, const struct timespec times[2], int flags);
+extern int futimens(int fd, const struct timespec times[2]);
 
 __END_DECLS
 
diff --git a/libc/include/sys/statvfs.h b/libc/include/sys/statvfs.h
new file mode 100644
index 0000000..e910c03
--- /dev/null
+++ b/libc/include/sys/statvfs.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _SYS_STATVFS_H_
+#define _SYS_STATVFS_H_
+
+#include <stdint.h>
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+__BEGIN_DECLS
+
+struct statvfs {
+  unsigned long f_bsize;
+  unsigned long f_frsize;
+  fsblkcnt_t    f_blocks;
+  fsblkcnt_t    f_bfree;
+  fsblkcnt_t    f_bavail;
+  fsfilcnt_t    f_files;
+  fsfilcnt_t    f_ffree;
+  fsfilcnt_t    f_favail;
+  unsigned long f_fsid;
+  unsigned long f_flag;
+  unsigned long f_namemax;
+};
+
+#define ST_RDONLY      0x0001
+#define ST_NOSUID      0x0002
+#define ST_NODEV       0x0004
+#define ST_NOEXEC      0x0008
+#define ST_SYNCHRONOUS 0x0010
+#define ST_MANDLOCK    0x0040
+#define ST_NOATIME     0x0400
+#define ST_NODIRATIME  0x0800
+#define ST_RELATIME    0x1000
+
+extern int statvfs(const char* __restrict, struct statvfs* __restrict) __nonnull((1, 2));
+extern int fstatvfs(int, struct statvfs*) __nonnull((2));
+
+__END_DECLS
+
+#endif /* _SYS_STATVFS_H_ */
diff --git a/libc/arch-mips/bionic/atexit.S b/libc/include/sys/swap.h
similarity index 81%
copy from libc/arch-mips/bionic/atexit.S
copy to libc/include/sys/swap.h
index 7f0c820..97a6f36 100644
--- a/libc/arch-mips/bionic/atexit.S
+++ b/libc/include/sys/swap.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2013 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,19 +25,17 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-	.text
-	.globl	atexit
-	.hidden	atexit
-	.type	atexit, @function
-	.align  4
-	.ent	atexit
-atexit:
-	.set	noreorder
-	.cpload	$t9
-	.set	reorder
-	la	$t9, __cxa_atexit
-	move	$a1, $0
-	la      $a2, __dso_handle
-	j	$t9
-	.size	atexit, .-atexit
-	.end	atexit
+
+#ifndef _SYS_SWAP_H_
+#define _SYS_SWAP_H_
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+
+extern int swapon(const char*, int) __nonnull((1));
+extern int swapoff(const char*) __nonnull((1));
+
+__END_DECLS
+
+#endif /* _SYS_SWAP_H_ */
diff --git a/libc/include/sys/system_properties.h b/libc/include/sys/system_properties.h
index 85915b2..01c3db3 100644
--- a/libc/include/sys/system_properties.h
+++ b/libc/include/sys/system_properties.h
@@ -76,13 +76,26 @@
 ** there is no nth property.  Use __system_property_read() to
 ** read the value of this property.
 **
-** This method is for inspecting and debugging the property 
+** Please do not call this method.  It only exists to provide
+** backwards compatibility to NDK apps.  Its implementation
+** is inefficient and order of results may change from call
+** to call.
+*/ 
+const prop_info *__system_property_find_nth(unsigned n);
+
+/* Pass a prop_info for each system property to the provided
+** callback.  Use __system_property_read() to read the value
+** of this property.
+**
+** This method is for inspecting and debugging the property
 ** system.  Please use __system_property_find() instead.
 **
 ** Order of results may change from call to call.  This is
 ** not a bug.
-*/ 
-const prop_info *__system_property_find_nth(unsigned n);
+*/
+int __system_property_foreach(
+        void (*propfn)(const prop_info *pi, void *cookie),
+        void *cookie);
 
 __END_DECLS
 
diff --git a/libc/include/sys/timeb.h b/libc/include/sys/timeb.h
index f2cc39c..cf6f255 100644
--- a/libc/include/sys/timeb.h
+++ b/libc/include/sys/timeb.h
@@ -28,6 +28,7 @@
 #ifndef _SYS_TIMEB_H
 #define _SYS_TIMEB_H
 
+#include <sys/cdefs.h>
 #include <sys/time.h>
 
 __BEGIN_DECLS
diff --git a/libc/arch-mips/bionic/__dso_handle.S b/libc/include/sys/timerfd.h
similarity index 74%
rename from libc/arch-mips/bionic/__dso_handle.S
rename to libc/include/sys/timerfd.h
index 3e80128..0651f1c 100644
--- a/libc/arch-mips/bionic/__dso_handle.S
+++ b/libc/include/sys/timerfd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 The Android Open Source Project
+ * Copyright (C) 2013 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,17 +26,20 @@
  * SUCH DAMAGE.
  */
 
-# The __dso_handle global variable is used by static
-# C++ constructors and destructors in the binary.
-# See http://www.codesourcery.com/public/cxx-abi/abi.html#dso-dtor
-#
-        .section .bss
-        .align 4
+#ifndef _SYS_TIMERFD_H_
+#define _SYS_TIMERFD_H_
 
-#ifndef CRT_LEGACY_WORKAROUND
-	.hidden __dso_handle
-#endif
+#include <time.h>
+#include <sys/types.h>
+#include <linux/timerfd.h>
 
-        .globl __dso_handle
-__dso_handle:
-        .long 0
+__BEGIN_DECLS
+
+extern int timerfd_create(clockid_t, int);
+extern int timerfd_settime(int, int, const struct itimerspec*,
+                           struct itimerspec*);
+extern int timerfd_gettime(int, struct itimerspec*);
+
+__END_DECLS
+
+#endif /* _SYS_TIMERFD_H */
diff --git a/libc/include/sys/vfs.h b/libc/include/sys/vfs.h
index d14944d..6a55c99 100644
--- a/libc/include/sys/vfs.h
+++ b/libc/include/sys/vfs.h
@@ -34,7 +34,7 @@
 
 __BEGIN_DECLS
 
-/* note: this corresponds to the kernel's statfs64 type */
+/* These correspond to the kernel's statfs64 type. */
 #ifdef __mips__
 struct statfs {
     uint32_t        f_type;
@@ -48,7 +48,8 @@
     uint64_t        f_bavail;
     __kernel_fsid_t f_fsid;
     uint32_t        f_namelen;
-    uint32_t        f_spare[6];
+    uint32_t        f_flags;
+    uint32_t        f_spare[5];
 };
 #else
 struct statfs {
@@ -62,7 +63,8 @@
     __kernel_fsid_t f_fsid;
     uint32_t        f_namelen;
     uint32_t        f_frsize;
-    uint32_t        f_spare[5];
+    uint32_t        f_flags;
+    uint32_t        f_spare[4];
 };
 #endif
 
diff --git a/libc/include/syslog.h b/libc/include/syslog.h
index d35bc79..4677c14 100644
--- a/libc/include/syslog.h
+++ b/libc/include/syslog.h
@@ -98,18 +98,18 @@
 
 #define SYSLOG_DATA_INIT {-1, 0, 0, 0, (const char *)0, LOG_USER, 0xff}
 
-#define _PATH_LOG  "/dev/kmsg"
+#define _PATH_LOG  "/dev/syslog"
 
 extern void	closelog(void);
 extern void	openlog(const char *, int, int);
 extern int	setlogmask(int);
-extern void	syslog(int, const char *, ...);
-extern void	vsyslog(int, const char *, va_list);
+extern void	syslog(int, const char *, ...) __printflike(2, 3);
+extern void	vsyslog(int, const char *, va_list) __printflike(2, 0);
 extern void	closelog_r(struct syslog_data *);
 extern void	openlog_r(const char *, int, int, struct syslog_data *);
 extern int	setlogmask_r(int, struct syslog_data *);
-extern void	syslog_r(int, struct syslog_data *, const char *, ...);
-extern void	vsyslog_r(int, struct syslog_data *, const char *, va_list);
+extern void	syslog_r(int, struct syslog_data *, const char *, ...) __printflike(3, 4);
+extern void	vsyslog_r(int, struct syslog_data *, const char *, va_list) __printflike(3, 0);
 
 __END_DECLS
 
diff --git a/libc/include/time.h b/libc/include/time.h
index e280e0a..8995585 100644
--- a/libc/include/time.h
+++ b/libc/include/time.h
@@ -101,9 +101,12 @@
 #define CLOCK_MONOTONIC            1
 #define CLOCK_PROCESS_CPUTIME_ID   2
 #define CLOCK_THREAD_CPUTIME_ID    3
-#define CLOCK_REALTIME_HR          4
-#define CLOCK_MONOTONIC_HR         5
+#define CLOCK_MONOTONIC_RAW        4
+#define CLOCK_REALTIME_COARSE      5
+#define CLOCK_MONOTONIC_COARSE     6
 #define CLOCK_BOOTTIME             7
+#define CLOCK_REALTIME_ALARM       8
+#define CLOCK_BOOTTIME_ALARM       9
 
 extern int  timer_create(int, struct sigevent*, timer_t*);
 extern int  timer_delete(timer_t);
diff --git a/libc/include/wchar.h b/libc/include/wchar.h
index a4e19f0..76ac02c 100644
--- a/libc/include/wchar.h
+++ b/libc/include/wchar.h
@@ -65,8 +65,11 @@
     WC_TYPE_MAX
 } wctype_t;
 
+#ifndef WCHAR_MAX
 #define  WCHAR_MAX   INT_MAX
 #define  WCHAR_MIN   INT_MIN
+#endif
+
 #define  WEOF        ((wint_t)(-1))
 
 extern wint_t            btowc(int);
diff --git a/libc/kernel/common/linux/fs.h b/libc/kernel/common/linux/fs.h
index 6b7e6b8..538d0e4 100644
--- a/libc/kernel/common/linux/fs.h
+++ b/libc/kernel/common/linux/fs.h
@@ -33,190 +33,195 @@
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define SEEK_CUR 1
 #define SEEK_END 2
-#define SEEK_MAX SEEK_END
-struct fstrim_range {
+#define SEEK_DATA 3
+#define SEEK_HOLE 4
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define SEEK_MAX SEEK_HOLE
+struct fstrim_range {
  __u64 start;
  __u64 len;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  __u64 minlen;
 };
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 struct files_stat_struct {
  unsigned long nr_files;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  unsigned long nr_free_files;
  unsigned long max_files;
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 };
 struct inodes_stat_t {
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  int nr_inodes;
  int nr_unused;
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  int dummy[5];
 };
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define NR_FILE 8192
-#define MAY_EXEC 1
+#define MAY_EXEC 0x00000001
+#define MAY_WRITE 0x00000002
+#define MAY_READ 0x00000004
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
-#define MAY_WRITE 2
-#define MAY_READ 4
-#define MAY_APPEND 8
-#define MAY_ACCESS 16
+#define MAY_APPEND 0x00000008
+#define MAY_ACCESS 0x00000010
+#define MAY_OPEN 0x00000020
+#define MAY_CHDIR 0x00000040
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
-#define MAY_OPEN 32
-#define MAY_CHDIR 64
+#define MAY_NOT_BLOCK 0x00000080
 #define FMODE_READ ((__force fmode_t)0x1)
 #define FMODE_WRITE ((__force fmode_t)0x2)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define FMODE_LSEEK ((__force fmode_t)0x4)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define FMODE_PREAD ((__force fmode_t)0x8)
 #define FMODE_PWRITE ((__force fmode_t)0x10)
 #define FMODE_EXEC ((__force fmode_t)0x20)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define FMODE_NDELAY ((__force fmode_t)0x40)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define FMODE_EXCL ((__force fmode_t)0x80)
 #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
-#define FMODE_NOCMTIME ((__force fmode_t)0x800)
+#define FMODE_32BITHASH ((__force fmode_t)0x200)
+#define FMODE_64BITHASH ((__force fmode_t)0x400)
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define FMODE_NOCMTIME ((__force fmode_t)0x800)
 #define FMODE_RANDOM ((__force fmode_t)0x1000)
 #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
 #define FMODE_PATH ((__force fmode_t)0x4000)
-#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
 #define RW_MASK REQ_WRITE
 #define RWA_MASK REQ_RAHEAD
 #define READ 0
-#define WRITE RW_MASK
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define WRITE RW_MASK
 #define READA RWA_MASK
 #define READ_SYNC (READ | REQ_SYNC)
-#define READ_META (READ | REQ_META)
 #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE)
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define WRITE_ODIRECT (WRITE | REQ_SYNC)
-#define WRITE_META (WRITE | REQ_META)
 #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
 #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define SEL_IN 1
 #define SEL_OUT 2
 #define SEL_EX 4
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define FS_REQUIRES_DEV 1
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define FS_BINARY_MOUNTDATA 2
 #define FS_HAS_SUBTYPE 4
 #define FS_REVAL_DOT 16384
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define FS_RENAME_DOES_D_MOVE 32768
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_RDONLY 1
 #define MS_NOSUID 2
 #define MS_NODEV 4
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_NOEXEC 8
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_SYNCHRONOUS 16
 #define MS_REMOUNT 32
 #define MS_MANDLOCK 64
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_DIRSYNC 128
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_NOATIME 1024
 #define MS_NODIRATIME 2048
 #define MS_BIND 4096
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_MOVE 8192
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_REC 16384
 #define MS_VERBOSE 32768
 #define MS_SILENT 32768
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_POSIXACL (1<<16)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_UNBINDABLE (1<<17)
 #define MS_PRIVATE (1<<18)
 #define MS_SLAVE (1<<19)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_SHARED (1<<20)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_RELATIME (1<<21)
 #define MS_KERNMOUNT (1<<22)
 #define MS_I_VERSION (1<<23)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_STRICTATIME (1<<24)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_NOSEC (1<<28)
 #define MS_BORN (1<<29)
 #define MS_ACTIVE (1<<30)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_NOUSER (1<<31)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
 #define MS_MGC_VAL 0xC0ED0000
 #define MS_MGC_MSK 0xffff0000
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define S_SYNC 1
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define S_NOATIME 2
 #define S_APPEND 4
 #define S_IMMUTABLE 8
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define S_DEAD 16
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define S_NOQUOTA 32
 #define S_DIRSYNC 64
 #define S_NOCMTIME 128
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define S_SWAPFILE 256
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define S_PRIVATE 512
 #define S_IMA 1024
 #define S_AUTOMOUNT 2048
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define S_NOSEC 4096
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg))
 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) ||   ((inode)->i_flags & S_SYNC))
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) ||   ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IS_IMA(inode) ((inode)->i_flags & S_IMA)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
 #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
 #define BLKROSET _IO(0x12,93)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKROGET _IO(0x12,94)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKRRPART _IO(0x12,95)
 #define BLKGETSIZE _IO(0x12,96)
 #define BLKFLSBUF _IO(0x12,97)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKRASET _IO(0x12,98)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKRAGET _IO(0x12,99)
 #define BLKFRASET _IO(0x12,100)
 #define BLKFRAGET _IO(0x12,101)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKSECTSET _IO(0x12,102)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKSECTGET _IO(0x12,103)
 #define BLKSSZGET _IO(0x12,104)
 #define BLKBSZGET _IOR(0x12,112,size_t)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKBSZSET _IOW(0x12,113,size_t)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKGETSIZE64 _IOR(0x12,114,size_t)
 #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
 #define BLKTRACESTART _IO(0x12,116)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKTRACESTOP _IO(0x12,117)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKTRACETEARDOWN _IO(0x12,118)
 #define BLKDISCARD _IO(0x12,119)
 #define BLKIOMIN _IO(0x12,120)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKIOOPT _IO(0x12,121)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKALIGNOFF _IO(0x12,122)
 #define BLKPBSZGET _IO(0x12,123)
 #define BLKDISCARDZEROES _IO(0x12,124)
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define BLKSECDISCARD _IO(0x12,125)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define BLKROTATIONAL _IO(0x12,126)
 #define BMAP_IOCTL 1
 #define FIBMAP _IO(0x00,1)
 #define FIGETBSZ _IO(0x00,2)
diff --git a/libc/kernel/common/linux/netfilter_ipv4/ip_tables.h b/libc/kernel/common/linux/netfilter_ipv4/ip_tables.h
index e5e9d66..6f75416 100644
--- a/libc/kernel/common/linux/netfilter_ipv4/ip_tables.h
+++ b/libc/kernel/common/linux/netfilter_ipv4/ip_tables.h
@@ -23,7 +23,7 @@
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #include <linux/netfilter/x_tables.h>
 #define IPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
-#define IPT_TABLE_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
+#define IPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
 #define ipt_match xt_match
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define ipt_target xt_target
diff --git a/libc/kernel/common/linux/timerfd.h b/libc/kernel/common/linux/timerfd.h
new file mode 100644
index 0000000..0165ebb
--- /dev/null
+++ b/libc/kernel/common/linux/timerfd.h
@@ -0,0 +1,31 @@
+/****************************************************************************
+ ****************************************************************************
+ ***
+ ***   This header was automatically generated from a Linux kernel header
+ ***   of the same name, to make information necessary for userspace to
+ ***   call into the kernel available to libc.  It contains only constants,
+ ***   structures, and macros generated from the original header, and thus,
+ ***   contains no copyrightable information.
+ ***
+ ***   To edit the content of this header, modify the corresponding
+ ***   source file (e.g. under external/kernel-headers/original/) then
+ ***   run bionic/libc/kernel/tools/update_all.py
+ ***
+ ***   Any manual change here will be lost the next time this script will
+ ***   be run. You've been warned!
+ ***
+ ****************************************************************************
+ ****************************************************************************/
+#ifndef _LINUX_TIMERFD_H
+#define _LINUX_TIMERFD_H
+#include <linux/fcntl.h>
+#define TFD_TIMER_ABSTIME (1 << 0)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
+#define TFD_CLOEXEC O_CLOEXEC
+#define TFD_NONBLOCK O_NONBLOCK
+#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
+#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
+#endif
diff --git a/libc/kernel/common/linux/types.h b/libc/kernel/common/linux/types.h
index 90b0f6b..b40959c 100644
--- a/libc/kernel/common/linux/types.h
+++ b/libc/kernel/common/linux/types.h
@@ -33,12 +33,14 @@
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 typedef __u64 __bitwise __be64;
 #endif
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 struct ustat {
  __kernel_daddr_t f_tfree;
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  __kernel_ino_t f_tinode;
  char f_fname[6];
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  char f_fpack[6];
 };
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #endif
diff --git a/libc/kernel/tools/utils.py b/libc/kernel/tools/utils.py
index 8ec7353..0478e93 100644
--- a/libc/kernel/tools/utils.py
+++ b/libc/kernel/tools/utils.py
@@ -47,32 +47,6 @@
 def find_program_dir():
     return os.path.dirname(sys.argv[0])
 
-def find_file_from_upwards(from_path,target_file):
-    """find a file in the current directory or its parents. if 'from_path' is None,
-       seach from the current program's directory"""
-    path = from_path
-    if path == None:
-        path = os.path.realpath(sys.argv[0])
-        path = os.path.dirname(path)
-        D("this script seems to be located in: %s" % path)
-
-    while 1:
-        D("probing "+path)
-        if path == "":
-            file = target_file
-        else:
-            file = path + "/" + target_file
-
-        if os.path.isfile(file):
-            D("found %s in %s" % (target_file, path))
-            return file
-
-        if path == "":
-            return None
-
-        path = os.path.dirname(path)
-
-
 class StringOutput:
     def __init__(self):
         self.line = ""
@@ -143,35 +117,6 @@
             for name in dirs:
                 os.rmdir(os.path.join(root, name))
 
-def update_file( path, newdata ):
-    """update a file on disk, only if its content has changed"""
-    if os.path.exists( path ):
-        try:
-            f = open( path, "r" )
-            olddata = f.read()
-            f.close()
-        except:
-            D("update_file: cannot read existing file '%s'" % path)
-            return 0
-
-        if oldata == newdata:
-            D2("update_file: no change to file '%s'" % path )
-            return 0
-
-        update = 1
-    else:
-        try:
-            create_file_path(path)
-        except:
-            D("update_file: cannot create path to '%s'" % path)
-            return 0
-
-    f = open( path, "w" )
-    f.write( newdata )
-    f.close()
-
-    return 1
-
 
 class BatchFileUpdater:
     """a class used to edit several files at once"""
diff --git a/libc/netbsd/net/getaddrinfo.c b/libc/netbsd/net/getaddrinfo.c
index 401bc6e..c4766e4 100644
--- a/libc/netbsd/net/getaddrinfo.c
+++ b/libc/netbsd/net/getaddrinfo.c
@@ -93,6 +93,7 @@
 #include <errno.h>
 #include <netdb.h>
 #include "resolv_private.h"
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -1864,17 +1865,19 @@
 	free(elems);
 }
 
-static int _using_alt_dns()
+static bool _using_default_dns(const char *iface)
 {
-	char propname[PROP_NAME_MAX];
-	char propvalue[PROP_VALUE_MAX];
+	char buf[IF_NAMESIZE+1];
+	size_t if_len;
 
-	propvalue[0] = 0;
-	snprintf(propname, sizeof(propname), "net.dns1.%d", getpid());
-	if (__system_property_get(propname, propvalue) > 0 ) {
-		return 1;
+	// common case
+	if (iface == NULL || *iface == '\0') return true;
+
+	if_len = _resolv_get_default_iface(buf, sizeof(buf));
+	if (if_len + 1 <= sizeof(buf)) {
+		if (strcmp(buf, iface) != 0) return false;
 	}
-	return 0;
+	return true;
 }
 
 /*ARGSUSED*/
@@ -1924,7 +1927,7 @@
 			// Only implement AI_ADDRCONFIG if the application is not
 			// using its own DNS servers, since our implementation
 			// only works on the default connection.
-			if (!_using_alt_dns()) {
+			if (_using_default_dns(iface)) {
 				query_ipv6 = _have_ipv6();
 				query_ipv4 = _have_ipv4();
 			}
@@ -2310,6 +2313,12 @@
 	    (dots && !trailing_dot && (res->options & RES_DNSRCH))) {
 		int done = 0;
 
+		/* Unfortunately we need to set stuff up before
+		 * the domain stuff is tried.  Will have a better
+		 * fix after thread pools are used.
+		 */
+		_resolv_populate_res_for_iface(res);
+
 		for (domain = (const char * const *)res->dnsrch;
 		   *domain && !done;
 		   domain++) {
diff --git a/libc/netbsd/net/reentrant.h b/libc/netbsd/net/reentrant.h
index 15507eb..60bff08 100644
--- a/libc/netbsd/net/reentrant.h
+++ b/libc/netbsd/net/reentrant.h
@@ -102,6 +102,7 @@
 
 #include <pthread.h>
 #include <signal.h>
+#include <sys/cdefs.h>
 
 #define	mutex_t			pthread_mutex_t
 #define	MUTEX_INITIALIZER	PTHREAD_MUTEX_INITIALIZER
diff --git a/libc/netbsd/resolv/res_send.c b/libc/netbsd/resolv/res_send.c
index d407ac8..0bb5b6b 100644
--- a/libc/netbsd/resolv/res_send.c
+++ b/libc/netbsd/resolv/res_send.c
@@ -581,8 +581,8 @@
 			if (n == 0)
 				goto next_ns;
 			if (DBG) {
-				__libc_format_log(ANDROID_LOG_DEBUG, "libc",
-					"time=%d, %d\n",time(NULL), time(NULL)%2);
+				__libc_format_log(ANDROID_LOG_DEBUG, "libc", "time=%ld\n",
+                                                  time(NULL));
 			}
 			if (v_circuit)
 				goto same_ns;
@@ -961,7 +961,7 @@
 	fcntl(sock, F_SETFL, origflags);
 	if (DBG) {
 		__libc_format_log(ANDROID_LOG_DEBUG, "libc",
-			"  %d connect_with_timeout returning %s\n", sock, res);
+			"  %d connect_with_timeout returning %d\n", sock, res);
 	}
 	return res;
 }
@@ -1025,7 +1025,7 @@
 	}
 	if (DBG) {
 		__libc_format_log(ANDROID_LOG_DEBUG, "libc",
-			"  %d retrying_select returning %d for %d\n",sock, n);
+			"  %d retrying_select returning %d\n",sock, n);
 	}
 
 	return n;
diff --git a/libc/netbsd/resolv/res_state.c b/libc/netbsd/resolv/res_state.c
index 32ffdca..de82e1a 100644
--- a/libc/netbsd/resolv/res_state.c
+++ b/libc/netbsd/resolv/res_state.c
@@ -71,7 +71,7 @@
         rt->_serial = 0;
         rt->_pi = (struct prop_info*) __system_property_find("net.change");
         if (rt->_pi) {
-            rt->_serial = rt->_pi->serial;
+            rt->_serial = __system_property_serial(rt->_pi);
         }
         memset(rt->_rstatic, 0, sizeof rt->_rstatic);
     }
@@ -135,14 +135,14 @@
                 return rt;
             }
         }
-        if (rt->_serial == rt->_pi->serial) {
+        if (rt->_serial == __system_property_serial(rt->_pi)) {
             /* Nothing changed, so return the current state */
             D("%s: tid=%d rt=%p nothing changed, returning",
               __FUNCTION__, gettid(), rt);
             return rt;
         }
         /* Update the recorded serial number, and go reset the state */
-        rt->_serial = rt->_pi->serial;
+        rt->_serial = __system_property_serial(rt->_pi);
         goto RESET_STATE;
     }
 
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
index 275c1c9..3bb639e 100644
--- a/libc/private/bionic_atomic_arm.h
+++ b/libc/private/bionic_atomic_arm.h
@@ -70,10 +70,6 @@
  *        code is going to run on a single or multi-core device, so we
  *        need to be cautious.
  *
- *        Fortunately, we can use the kernel helper function that is
- *        mapped at address 0xffff0fa0 in all user process, and that
- *        provides a device-specific barrier operation.
- *
  *        I.e. on single-core devices, the helper immediately returns,
  *        on multi-core devices, it uses "dmb" or any other means to
  *        perform a full-memory barrier.
@@ -82,7 +78,7 @@
  *
  *    - multi-core ARMv7-A       => use the 'dmb' hardware instruction
  *    - multi-core ARMv6         => use the coprocessor
- *    - single core ARMv5TE/6/7  => do not use any hardware barrier
+ *    - single core ARMv6+       => do not use any hardware barrier
  */
 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
 
@@ -124,18 +120,14 @@
 }
 #endif /* !ANDROID_SMP */
 
+#ifndef __ARM_HAVE_LDREX_STREX
+#error Only ARM devices which have LDREX / STREX are supported
+#endif
+
 /* Compare-and-swap, without any explicit barriers. Note that this functions
  * returns 0 on success, and 1 on failure. The opposite convention is typically
  * used on other platforms.
- *
- * There are two cases to consider:
- *
- *     - ARMv6+  => use LDREX/STREX instructions
- *     - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
- *
- * LDREX/STREX are only available starting from ARMv6
  */
-#ifdef __ARM_HAVE_LDREX_STREX
 __ATOMIC_INLINE__ int
 __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
 {
@@ -157,32 +149,8 @@
     } while (__builtin_expect(status != 0, 0));
     return prev != old_value;
 }
-#  else /* !__ARM_HAVE_LDREX_STREX */
 
-/* Use the handy kernel helper function mapped at 0xffff0fc0 */
-typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
-
-__ATOMIC_INLINE__ int
-__kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
-{
-    /* Note: the kernel function returns 0 on success too */
-    return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
-}
-
-__ATOMIC_INLINE__ int
-__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
-{
-    return __kernel_cmpxchg(old_value, new_value, ptr);
-}
-#endif /* !__ARM_HAVE_LDREX_STREX */
-
-/* Swap operation, without any explicit barriers.
- * There are again two similar cases to consider:
- *
- *   ARMv6+ => use LDREX/STREX
- *   < ARMv6 => use SWP instead.
- */
-#ifdef __ARM_HAVE_LDREX_STREX
+/* Swap operation, without any explicit barriers. */
 __ATOMIC_INLINE__ int32_t
 __bionic_swap(int32_t new_value, volatile int32_t* ptr)
 {
@@ -199,24 +167,10 @@
     } while (__builtin_expect(status != 0, 0));
     return prev;
 }
-#else /* !__ARM_HAVE_LDREX_STREX */
-__ATOMIC_INLINE__ int32_t
-__bionic_swap(int32_t new_value, volatile int32_t* ptr)
-{
-    int32_t prev;
-    /* NOTE: SWP is available in Thumb-1 too */
-    __asm__ __volatile__ ("swp %0, %2, [%3]"
-                          : "=&r" (prev), "+m" (*ptr)
-                          : "r" (new_value), "r" (ptr)
-                          : "cc");
-    return prev;
-}
-#endif /* !__ARM_HAVE_LDREX_STREX */
 
 /* Atomic increment - without any barriers
  * This returns the old value
  */
-#ifdef __ARM_HAVE_LDREX_STREX
 __ATOMIC_INLINE__ int32_t
 __bionic_atomic_inc(volatile int32_t* ptr)
 {
@@ -234,23 +188,10 @@
     } while (__builtin_expect(status != 0, 0));
     return prev;
 }
-#else
-__ATOMIC_INLINE__ int32_t
-__bionic_atomic_inc(volatile int32_t* ptr)
-{
-    int32_t  prev, status;
-    do {
-        prev = *ptr;
-        status = __kernel_cmpxchg(prev, prev+1, ptr);
-    } while (__builtin_expect(status != 0, 0));
-    return prev;
-}
-#endif
 
 /* Atomic decrement - without any barriers
  * This returns the old value.
  */
-#ifdef __ARM_HAVE_LDREX_STREX
 __ATOMIC_INLINE__ int32_t
 __bionic_atomic_dec(volatile int32_t* ptr)
 {
@@ -268,17 +209,5 @@
     } while (__builtin_expect(status != 0, 0));
     return prev;
 }
-#else
-__ATOMIC_INLINE__ int32_t
-__bionic_atomic_dec(volatile int32_t* ptr)
-{
-    int32_t  prev, status;
-    do {
-        prev = *ptr;
-        status = __kernel_cmpxchg(prev, prev-1, ptr);
-    } while (__builtin_expect(status != 0, 0));
-    return prev;
-}
-#endif
 
 #endif /* SYS_ATOMICS_ARM_H */
diff --git a/libc/private/bionic_auxv.h b/libc/private/bionic_auxv.h
index 69d15b6..23b2e37 100644
--- a/libc/private/bionic_auxv.h
+++ b/libc/private/bionic_auxv.h
@@ -29,6 +29,7 @@
 #define _PRIVATE_BIONIC_AUXV_H_
 
 #include <elf.h>
+#include <sys/cdefs.h>
 
 __BEGIN_DECLS
 
diff --git a/libc/private/bionic_futex.h b/libc/private/bionic_futex.h
index 6c7fdbe..69658b1 100644
--- a/libc/private/bionic_futex.h
+++ b/libc/private/bionic_futex.h
@@ -29,6 +29,7 @@
 #define _BIONIC_FUTEX_H
 
 #include <linux/futex.h>
+#include <sys/cdefs.h>
 
 __BEGIN_DECLS
 
diff --git a/libc/private/bionic_time.h b/libc/private/bionic_time.h
index 464ab0f..7c80f59 100644
--- a/libc/private/bionic_time.h
+++ b/libc/private/bionic_time.h
@@ -29,6 +29,7 @@
 #define _BIONIC_TIME_H
 
 #include <time.h>
+#include <sys/cdefs.h>
 
 __BEGIN_DECLS
 
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 56a0ac2..d15b1ca 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -76,7 +76,12 @@
  * maintain that second number, but pthread_test will fail if we forget.
  */
 #define GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT 4
-#define BIONIC_TLS_SLOTS 64
+/*
+ * This is PTHREAD_KEYS_MAX + TLS_SLOT_FIRST_USER_SLOT + GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT
+ * rounded up to maintain stack alignment.
+ */
+#define BIONIC_ALIGN(x, a) (((x) + (a - 1)) & ~(a - 1))
+#define BIONIC_TLS_SLOTS BIONIC_ALIGN(128 + TLS_SLOT_FIRST_USER_SLOT + GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT, 4)
 
 /* syscall only, do not call directly */
 extern int __set_tls(void* ptr);
@@ -84,20 +89,18 @@
 /* get the TLS */
 #if defined(__arm__)
 # define __get_tls() \
-    ({ register unsigned int __val asm("r0"); \
-       asm ("mrc p15, 0, r0, c13, c0, 3" : "=r"(__val) ); \
-       (volatile void*)__val; })
+    ({ register unsigned int __val; \
+       asm ("mrc p15, 0, %0, c13, c0, 3" : "=r"(__val)); \
+       (volatile void*) __val; })
 #elif defined(__mips__)
 # define __get_tls() \
-    ({ register unsigned int __val asm("v1");   \
-        asm (                                   \
-            "   .set    push\n"                 \
-            "   .set    mips32r2\n"             \
-            "   rdhwr   %0,$29\n"               \
-            "   .set    pop\n"                  \
-            : "=r"(__val)                       \
-            );                                  \
-        (volatile void*)__val; })
+    /* On mips32r1, this goes via a kernel illegal instruction trap that's optimized for v1. */ \
+    ({ register unsigned int __val asm("v1"); \
+       asm ("   .set    push\n" \
+            "   .set    mips32r2\n" \
+            "   rdhwr   %0,$29\n" \
+            "   .set    pop\n" : "=r"(__val)); \
+       (volatile void*) __val; })
 #elif defined(__i386__)
 # define __get_tls() \
     ({ register void* __val; \
@@ -107,9 +110,6 @@
 #error unsupported architecture
 #endif
 
-/* return the stack base and size, used by our malloc debugger */
-extern void* __get_stack_base(int* p_stack_size);
-
 __END_DECLS
 
 #if defined(__cplusplus)
diff --git a/libc/private/libc_logging.h b/libc/private/libc_logging.h
index 281bad3..f69e2ed 100644
--- a/libc/private/libc_logging.h
+++ b/libc/private/libc_logging.h
@@ -45,9 +45,6 @@
   BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW = 80125,
   BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW = 80130,
 
-  BIONIC_EVENT_STRCAT_INTEGER_OVERFLOW = 80200,
-  BIONIC_EVENT_STRNCAT_INTEGER_OVERFLOW = 80205,
-
   BIONIC_EVENT_RESOLVER_OLD_RESPONSE = 80300,
   BIONIC_EVENT_RESOLVER_WRONG_SERVER = 80305,
   BIONIC_EVENT_RESOLVER_WRONG_QUERY = 80310,
@@ -79,7 +76,16 @@
 //
 
 __LIBC_HIDDEN__ __noreturn void __libc_fatal(const char* format, ...)
-    __attribute__((__format__(printf, 1, 2)));
+    __printflike(1, 2);
+
+//
+// Formats a message to the log (priority 'fatal'), but doesn't abort.
+// Used by the malloc implementation to ensure that debuggerd dumps memory
+// around the bad address.
+//
+
+__LIBC_HIDDEN__ void __libc_fatal_no_abort(const char* format, ...)
+    __printflike(1, 2);
 
 //
 // Formatting routines for the C library's internal debugging.
@@ -87,13 +93,13 @@
 //
 
 __LIBC_HIDDEN__ int __libc_format_buffer(char* buffer, size_t buffer_size, const char* format, ...)
-    __attribute__((__format__(printf, 3, 4)));
+    __printflike(3, 4);
 
 __LIBC_HIDDEN__ int __libc_format_fd(int fd, const char* format, ...)
-    __attribute__((__format__(printf, 2, 3)));
+    __printflike(2, 3);
 
 __LIBC_HIDDEN__ int __libc_format_log(int priority, const char* tag, const char* format, ...)
-    __attribute__((__format__(printf, 3, 4)));
+    __printflike(3, 4);
 
 __LIBC_HIDDEN__ int __libc_format_log_va_list(int priority, const char* tag, const char* format,
                                               va_list ap);
diff --git a/libc/private/resolv_private.h b/libc/private/resolv_private.h
index 1c3c1a2..9648a8f 100644
--- a/libc/private/resolv_private.h
+++ b/libc/private/resolv_private.h
@@ -54,6 +54,8 @@
 #ifndef _RESOLV_PRIVATE_H_
 #define	_RESOLV_PRIVATE_H_
 
+#include <sys/cdefs.h>
+
 #include <resolv.h>
 #include "resolv_static.h"
 #include <net/if.h>
@@ -343,12 +345,9 @@
 extern const struct res_sym __p_rcode_syms[];
 #endif /* SHARED_LIBBIND */
 
-#ifndef ADNROID_CHANGES
-#define b64_ntop		__b64_ntop
-#define b64_pton		__b64_pton
-#endif
-
+#ifndef ANDROID_CHANGES
 #define dn_comp			__dn_comp
+#endif
 #define dn_count_labels		__dn_count_labels
 #define dn_skipname		__dn_skipname
 #define fp_resstat		__fp_resstat
@@ -443,9 +442,6 @@
 const char *	p_option(u_long);
 char *		p_secstodate(u_long);
 int		dn_count_labels(const char *);
-int		dn_comp(const char *, u_char *, int, u_char **, u_char **);
-int		dn_expand(const u_char *, const u_char *, const u_char *,
-			       char *, int);
 u_int		res_randomid(void);
 int		res_nameinquery(const char *, int, int, const u_char *,
 				     const u_char *);
diff --git a/libc/private/thread_private.h b/libc/private/thread_private.h
index c03f0ab..b19ad09 100644
--- a/libc/private/thread_private.h
+++ b/libc/private/thread_private.h
@@ -20,22 +20,6 @@
 extern int __isthreaded;
 
 /*
- * Weak symbols are used in libc so that the thread library can
- * efficiently wrap libc functions.
- * 
- * Use WEAK_NAME(n) to get a libc-private name for n (_weak_n),
- *     WEAK_ALIAS(n) to generate the weak symbol n pointing to _weak_n,
- *     WEAK_PROTOTYPE(n) to generate a prototype for _weak_n (based on n).
- */
-#define WEAK_NAME(name)		__CONCAT(_weak_,name)
-#define WEAK_ALIAS(name)	__weak_alias(name, WEAK_NAME(name))
-#ifdef __GNUC__
-#define WEAK_PROTOTYPE(name)	__typeof__(name) WEAK_NAME(name)
-#else
-#define WEAK_PROTOTYPE(name)	/* typeof() only in gcc */
-#endif
-
-/*
  * helper macro to make unique names in the thread namespace
  */
 #define __THREAD_NAME(name)	__CONCAT(_thread_tagname_,name)
@@ -52,101 +36,6 @@
 #define _THREAD_PRIVATE_MUTEX_UNLOCK(name) \
 	pthread_mutex_unlock( &__THREAD_NAME(name)._private_lock )
 
-#define _THREAD_PRIVATE_KEY(name)    \
-	static struct __thread_private_tag_t  __THREAD_NAME(name) = { PTHREAD_MUTEX_INITIALIZER, -1 }
-
-extern void*  __bionic_thread_private_storage( struct __thread_private_tag_t*  ptag, void*  storage, size_t  objsize, void*  error );
-
-#define _THREAD_PRIVATE(name, storage, error)    \
-	__bionic_thread_private_storage( &__THREAD_NAME(name), &(storage),sizeof(storage), error)
-	
-#if 0
-/*
- * helper functions that exist as (weak) null functions in libc and
- * (strong) functions in the thread library.   These functions:
- *
- * _thread_tag_lock:
- *	lock the mutex associated with the given tag.   If the given
- *	tag is NULL a tag is first allocated.
- *
- * _thread_tag_unlock:
- *	unlock the mutex associated with the given tag.   If the given
- *	tag is NULL a tag is first allocated.
- *
- * _thread_tag_storage:
- *	return a pointer to per thread instance of data associated
- *	with the given tag.  If the given tag is NULL a tag is first
- *	allocated.
- */
-void	_thread_tag_lock(void **);
-void	_thread_tag_unlock(void **);
-void   *_thread_tag_storage(void **, void *, size_t, void *);
-
-/*
- * Macros used in libc to access thread mutex, keys, and per thread storage.
- * _THREAD_PRIVATE_KEY and _THREAD_PRIVATE_MUTEX are different macros for
- * historical reasons.   They do the same thing, define a static variable
- * keyed by 'name' that identifies a mutex and a key to identify per thread
- * data.
- */
-#define _THREAD_PRIVATE_KEY(name)					\
-	static void *__THREAD_NAME(name)
-#define _THREAD_PRIVATE_MUTEX(name)					\
-	static void *__THREAD_NAME(name)
-#define _THREAD_PRIVATE_MUTEX_LOCK(name)				\
-	_thread_tag_lock(&(__THREAD_NAME(name)))
-#define _THREAD_PRIVATE_MUTEX_UNLOCK(name)				\
-	_thread_tag_unlock(&(__THREAD_NAME(name)))
-#define _THREAD_PRIVATE(keyname, storage, error)			\
-	_thread_tag_storage(&(__THREAD_NAME(keyname)), &(storage),	\
-			    sizeof (storage), error)
-/*
- * Resolver code is special cased in that it uses global keys.
- */
-extern void *__THREAD_NAME(_res);
-extern void *__THREAD_NAME(_res_ext);
-extern void *__THREAD_NAME(serv_mutex);
-#endif
-
-/*
- * File descriptor locking definitions.
- */
-#define FD_READ		0x1
-#define FD_WRITE	0x2
-#define FD_RDWR		(FD_READ | FD_WRITE)
-
-struct timespec;
-int	_thread_fd_lock(int, int, struct timespec *);
-void	_thread_fd_unlock(int, int);
-
-/*
- * Macros are used in libc code for historical (debug) reasons.
- * Define them here.
- */
-#define _FD_LOCK(_fd,_type,_ts)	_thread_fd_lock(_fd, _type, _ts)
-#define _FD_UNLOCK(_fd,_type)	_thread_fd_unlock(_fd, _type)
-
-
-/*
- * malloc lock/unlock prototypes and definitions
- */
-void	_thread_malloc_init(void);
-void	_thread_malloc_lock(void);
-void	_thread_malloc_unlock(void);
-
-#define _MALLOC_LOCK()		do {					\
-					if (__isthreaded)		\
-						_thread_malloc_lock();	\
-				} while (0)
-#define _MALLOC_UNLOCK()	do {					\
-					if (__isthreaded)		\
-						_thread_malloc_unlock();\
-				} while (0)
-#define _MALLOC_LOCK_INIT()	do {					\
-					if (__isthreaded)		\
-						_thread_malloc_init();\
-				} while (0)
-
 void	_thread_atexit_lock(void);
 void	_thread_atexit_unlock(void);
 
diff --git a/libc/stdio/fgetpos.c b/libc/stdio/fgetpos.c
deleted file mode 100644
index e6188e5..0000000
--- a/libc/stdio/fgetpos.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*	$OpenBSD: fgetpos.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
-/*-
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <stdio.h>
-
-/*
- * fgetpos: like ftello.
- */
-int
-fgetpos(FILE *fp, fpos_t *pos)
-{
-	return((*pos = ftello(fp)) == (fpos_t)-1);
-}
diff --git a/libc/stdio/findfp.c b/libc/stdio/findfp.c
index 76ed5ee..863235b 100644
--- a/libc/stdio/findfp.c
+++ b/libc/stdio/findfp.c
@@ -171,10 +171,9 @@
 #endif
 
 /*
- * exit() and abort() call _cleanup() through the callback registered
- * with __atexit_register_cleanup(), set whenever we open or buffer a
- * file. This chicanery is done so that programs that do not use stdio
- * need not link it all in.
+ * exit() calls _cleanup() through *__cleanup, set whenever we
+ * open or buffer a file.  This chicanery is done so that programs
+ * that do not use stdio need not link it all in.
  *
  * The name `_cleanup' is, alas, fairly well known outside stdio.
  */
@@ -201,7 +200,7 @@
 		_FILEEXT_SETUP(usual+i, usualext+i);
 	}
 	/* make sure we clean up on exit */
-	__atexit_register_cleanup(_cleanup); /* conservative */
+	__cleanup = _cleanup; /* conservative */
 	__sdidinit = 1;
 out:
 	_THREAD_PRIVATE_MUTEX_UNLOCK(__sinit_mutex);
diff --git a/libc/stdio/local.h b/libc/stdio/local.h
index 664cec1..e3a40bc 100644
--- a/libc/stdio/local.h
+++ b/libc/stdio/local.h
@@ -58,7 +58,10 @@
 int	__sflags(const char *, int *);
 int	__vfprintf(FILE *, const char *, __va_list);
 
-extern void __atexit_register_cleanup(void (*)(void));
+/*
+ * Function to clean up streams, called from abort() and exit().
+ */
+extern void (*__cleanup)(void);
 extern int __sdidinit;
 
 /*
diff --git a/libc/stdlib/atexit.c b/libc/stdlib/atexit.c
index 23a2636..8f5bd2d 100644
--- a/libc/stdlib/atexit.c
+++ b/libc/stdlib/atexit.c
@@ -44,7 +44,8 @@
  * Function pointers are stored in a linked list of pages. The list
  * is initially empty, and pages are allocated on demand. The first
  * function pointer in the first allocated page (the last one in
- * the linked list) is reserved for the cleanup function.
+ * the linked list) was reserved for the cleanup function.
+ * TODO: switch to the regular FreeBSD/NetBSD atexit implementation.
  *
  * Outside the following functions, all pages are mprotect()'ed
  * to prevent unintentional/malicious corruption.
@@ -172,42 +173,3 @@
 	}
 	_ATEXIT_UNLOCK();
 }
-
-/*
- * Register the cleanup function
- */
-void
-__atexit_register_cleanup(void (*func)(void))
-{
-	struct atexit *p;
-	int pgsize = getpagesize();
-
-	if (pgsize < (int)sizeof(*p))
-		return;
-	_ATEXIT_LOCK();
-	p = __atexit;
-	while (p != NULL && p->next != NULL)
-		p = p->next;
-	if (p == NULL) {
-		p = mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
-		    MAP_ANON | MAP_PRIVATE, -1, 0);
-		if (p == MAP_FAILED)
-			goto unlock;
-		p->ind = 1;
-		p->max = (pgsize - ((char *)&p->fns[0] - (char *)p)) /
-		    sizeof(p->fns[0]);
-		p->next = NULL;
-		__atexit = p;
-		if (__atexit_invalid)
-			__atexit_invalid = 0;
-	} else {
-		if (mprotect(p, pgsize, PROT_READ | PROT_WRITE))
-			goto unlock;
-	}
-	p->fns[0].fn_ptr.std_func = func;
-	p->fns[0].fn_arg = NULL;
-	p->fns[0].fn_dso = NULL;
-	mprotect(p, pgsize, PROT_READ);
-unlock:
-	_ATEXIT_UNLOCK();
-}
diff --git a/libc/stdlib/qsort.c b/libc/stdlib/qsort.c
deleted file mode 100644
index f6fc8e1..0000000
--- a/libc/stdlib/qsort.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*	$OpenBSD: qsort.c,v 1.10 2005/08/08 08:05:37 espie Exp $ */
-/*-
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/types.h>
-#include <stdlib.h>
-
-static __inline char	*med3(char *, char *, char *, int (*)(const void *, const void *));
-static __inline void	 swapfunc(char *, char *, int, int);
-
-#define min(a, b)	(a) < (b) ? a : b
-
-/*
- * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
- */
-#define swapcode(TYPE, parmi, parmj, n) {		\
-	long i = (n) / sizeof (TYPE);			\
-	TYPE *pi = (TYPE *) (parmi);			\
-	TYPE *pj = (TYPE *) (parmj);			\
-	do {						\
-		TYPE	t = *pi;			\
-		*pi++ = *pj;				\
-		*pj++ = t;				\
-        } while (--i > 0);				\
-}
-
-#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
-	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
-
-static __inline void
-swapfunc(char *a, char *b, int n, int swaptype)
-{
-	if (swaptype <= 1)
-		swapcode(long, a, b, n)
-	else
-		swapcode(char, a, b, n)
-}
-
-#define swap(a, b)					\
-	if (swaptype == 0) {				\
-		long t = *(long *)(a);			\
-		*(long *)(a) = *(long *)(b);		\
-		*(long *)(b) = t;			\
-	} else						\
-		swapfunc(a, b, es, swaptype)
-
-#define vecswap(a, b, n)	if ((n) > 0) swapfunc(a, b, n, swaptype)
-
-static __inline char *
-med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
-{
-	return cmp(a, b) < 0 ?
-	       (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
-              :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
-}
-
-void
-qsort(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
-{
-	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
-	int d, r, swaptype, swap_cnt;
-	char *a = aa;
-
-loop:	SWAPINIT(a, es);
-	swap_cnt = 0;
-	if (n < 7) {
-		for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
-			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
-			     pl -= es)
-				swap(pl, pl - es);
-		return;
-	}
-	pm = (char *)a + (n / 2) * es;
-	if (n > 7) {
-		pl = (char *)a;
-		pn = (char *)a + (n - 1) * es;
-		if (n > 40) {
-			d = (n / 8) * es;
-			pl = med3(pl, pl + d, pl + 2 * d, cmp);
-			pm = med3(pm - d, pm, pm + d, cmp);
-			pn = med3(pn - 2 * d, pn - d, pn, cmp);
-		}
-		pm = med3(pl, pm, pn, cmp);
-	}
-	swap(a, pm);
-	pa = pb = (char *)a + es;
-
-	pc = pd = (char *)a + (n - 1) * es;
-	for (;;) {
-		while (pb <= pc && (r = cmp(pb, a)) <= 0) {
-			if (r == 0) {
-				swap_cnt = 1;
-				swap(pa, pb);
-				pa += es;
-			}
-			pb += es;
-		}
-		while (pb <= pc && (r = cmp(pc, a)) >= 0) {
-			if (r == 0) {
-				swap_cnt = 1;
-				swap(pc, pd);
-				pd -= es;
-			}
-			pc -= es;
-		}
-		if (pb > pc)
-			break;
-		swap(pb, pc);
-		swap_cnt = 1;
-		pb += es;
-		pc -= es;
-	}
-	if (swap_cnt == 0) {  /* Switch to insertion sort */
-		for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
-			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
-			     pl -= es)
-				swap(pl, pl - es);
-		return;
-	}
-
-	pn = (char *)a + n * es;
-	r = min(pa - (char *)a, pb - pa);
-	vecswap(a, pb - r, r);
-	r = min(pd - pc, pn - pd - (int)es);
-	vecswap(pb, pn - r, r);
-	if ((r = pb - pa) > (int)es)
-		qsort(a, r / es, es, cmp);
-	if ((r = pd - pc) > (int)es) {
-		/* Iterate rather than recurse to save stack space */
-		a = pn - r;
-		n = r / es;
-		goto loop;
-	}
-	/* qsort(pn - r, r / es, es, cmp); */
-}
diff --git a/libc/string/strchr.c b/libc/string/__strrchr_chk.c
similarity index 79%
rename from libc/string/strchr.c
rename to libc/string/__strrchr_chk.c
index 29acca5..c1e5d66 100644
--- a/libc/string/strchr.c
+++ b/libc/string/__strrchr_chk.c
@@ -1,6 +1,6 @@
-/*	$OpenBSD: index.c,v 1.5 2005/08/08 08:05:37 espie Exp $ */
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
+/*	$OpenBSD: rindex.c,v 1.6 2005/08/08 08:05:37 espie Exp $ */
+/*
+ * Copyright (c) 1988 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -32,20 +32,17 @@
 #include "libc_logging.h"
 
 char *
-__strchr_chk(const char *p, int ch, size_t s_len)
+__strrchr_chk(const char *p, int ch, size_t s_len)
 {
-	for (;; ++p, s_len--) {
+	char *save;
+
+	for (save = NULL;; ++p, s_len--) {
 		if (s_len == 0)
-			__fortify_chk_fail("strchr read beyond buffer", 0);
+			__fortify_chk_fail("strrchr read beyond buffer", 0);
 		if (*p == (char) ch)
-			return((char *)p);
+			save = (char *)p;
 		if (!*p)
-			return((char *)NULL);
+			return(save);
 	}
 	/* NOTREACHED */
 }
-
-char *
-strchr(const char *p, int ch) {
-    return __strchr_chk(p, ch, __BIONIC_FORTIFY_UNKNOWN_SIZE);
-}
diff --git a/libc/arch-x86/string/strlen_wrapper.S b/libc/string/memcmp16.c
similarity index 81%
copy from libc/arch-x86/string/strlen_wrapper.S
copy to libc/string/memcmp16.c
index e62786b..1267722 100644
--- a/libc/arch-x86/string/strlen_wrapper.S
+++ b/libc/string/memcmp16.c
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2010, Intel Corporation
+Copyright (c) 2013 Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,13 +28,18 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(USE_SSE2)
+#include <stddef.h>
 
-# define sse2_strlen_atom strlen
-# include "sse2-strlen-atom.S"
+/* Unoptimised version of __memcmp16 */
+int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n)
+{
+  size_t i;
 
-#else
-
-# include "strlen.S"
-
-#endif
+  for (i = 0; i < n; i++) {
+    if (*ptr1 != *ptr2)
+      return *ptr1 - *ptr2;
+    ptr1++;
+    ptr2++;
+  }
+  return 0;
+}
diff --git a/libc/string/strrchr.c b/libc/string/strrchr.c
index e709d49..fe2306a 100644
--- a/libc/string/strrchr.c
+++ b/libc/string/strrchr.c
@@ -29,23 +29,6 @@
  */
 
 #include <string.h>
-#include "libc_logging.h"
-
-char *
-__strrchr_chk(const char *p, int ch, size_t s_len)
-{
-	char *save;
-
-	for (save = NULL;; ++p, s_len--) {
-		if (s_len == 0)
-			__fortify_chk_fail("strrchr read beyond buffer", 0);
-		if (*p == (char) ch)
-			save = (char *)p;
-		if (!*p)
-			return(save);
-	}
-	/* NOTREACHED */
-}
 
 char *
 strrchr(const char *p, int ch)
diff --git a/libc/tools/bionic_utils.py b/libc/tools/bionic_utils.py
index bbfff7d..dccf9e3 100644
--- a/libc/tools/bionic_utils.py
+++ b/libc/tools/bionic_utils.py
@@ -37,140 +37,6 @@
     verbose = level
 
 
-def find_dir_of(path):
-    '''return the directory name of 'path', or "." if there is none'''
-    # remove trailing slash
-    if len(path) > 1 and path[-1] == '/':
-        path = path[:-1]
-
-    # find parent directory name
-    d = os.path.dirname(path)
-    if d == "":
-        return "."
-    else:
-        return d
-
-#  other stuff
-#
-#
-def find_file_from_upwards(from_path,target_file):
-    """find a file in the current directory or its parents. if 'from_path' is None,
-       seach from the current program's directory"""
-    path = from_path
-    if path == None:
-        path = find_dir_of(sys.argv[0])
-        D("this script seems to be located in: %s" % path)
-
-    while 1:
-        if path == "":
-            path = "."
-
-        file = path + "/" + target_file
-        D("probing "+file)
-
-        if os.path.isfile(file):
-            D("found %s in %s" % (target_file, path))
-            return file
-
-        if path == ".":
-            break
-
-        path = os.path.dirname(path)
-
-    path = ""
-    while 1:
-        path = "../" + path
-        file = path + target_file
-        D("probing "+file)
-
-        if os.path.isfile(file):
-            D("found %s in %s" % (target_file, path))
-            return file
-
-
-    return None
-
-def find_bionic_root():
-    '''find the root of the Bionic source tree. we check for the SYSCALLS.TXT file
-       from the location of the current program's directory.'''
-
-    # note that we can't use find_file_from_upwards() since we can't use os.path.abspath
-    # that's because in some cases the p4 client is in a symlinked directory, and this
-    # function will return the real path instead, which later creates problems when
-    # p4 commands are issued
-    #
-    file = find_file_from_upwards(None, "SYSCALLS.TXT")
-    if file:
-        return os.path.dirname(file)
-    else:
-        return None
-
-def find_original_kernel_headers():
-    """try to find the directory containing the original kernel headers"""
-    bionic_root = find_bionic_root()
-    if not bionic_root:
-        D("Could not find Bionic root !!")
-        return None
-
-    path = os.path.normpath(bionic_root + "/../../external/kernel-headers/original")
-    if not os.path.isdir(path):
-        D("Could not find %s" % (path))
-        return None
-
-    return path
-
-def find_kernel_headers():
-    """try to find the directory containing the kernel headers for this machine"""
-
-    # First try to find the original kernel headers.
-    ret = find_original_kernel_headers()
-    if ret:
-        D("found original kernel headers in: %s" % (ret))
-        return ret
-
-    status, version = commands.getstatusoutput( "uname -r" )  # get Linux kernel version
-    if status != 0:
-        D("could not execute 'uname -r' command properly")
-        return None
-
-    # get rid of the "-xenU" suffix that is found in Xen virtual machines
-    if len(version) > 5 and version[-5:] == "-xenU":
-        version = version[:-5]
-
-    path = "/usr/src/linux-headers-" + version + "/include"
-    D("probing %s for kernel headers" % (path))
-    ret = os.path.isdir( path )
-    if ret:
-        D("found kernel headers in: %s" % (path))
-        return path
-    return None
-
-def find_arch_header(kernel_headers,arch,header):
-    # First, try in <root>/arch/<arm>/include/<header>
-    # corresponding to the location in the kernel source tree for
-    # certain architectures (e.g. arm).
-    path = "%s/arch/%s/include/asm/%s" % (kernel_headers, arch, header)
-    D("Probing for %s" % path)
-    if os.path.exists(path):
-        return path
-
-    # Try <root>/asm-<arch>/include/<header> corresponding to the location
-    # in the kernel source tree for other architectures (e.g. x86).
-    path = "%s/include/asm-%s/%s" % (kernel_headers, arch, header)
-    D("Probing for %s" % path)
-    if os.path.exists(path):
-        return path
-
-    # Otherwise, look under <root>/asm-<arch>/<header> corresponding
-    # the original kernel headers directory
-    path = "%s/asm-%s/%s" % (kernel_headers, arch, header)
-    D("Probing for %s" % path)
-    if os.path.exists(path):
-        return path
-
-
-    return None
-
 # parser for the SYSCALLS.TXT file
 #
 class SysCallsTxtParser:
@@ -312,52 +178,3 @@
 
     def get(self):
         return self.line
-
-
-def create_file_path(path):
-    dirs = []
-    while 1:
-        parent = os.path.dirname(path)
-        if parent == "/":
-            break
-        dirs.append(parent)
-        path = parent
-
-    dirs.reverse()
-    for dir in dirs:
-        #print "dir %s" % dir
-        if os.path.isdir(dir):
-            continue
-        os.mkdir(dir)
-
-def walk_source_files(paths,callback,args,excludes=[]):
-    """recursively walk a list of paths and files, only keeping the source files in directories"""
-    for path in paths:
-        if not os.path.isdir(path):
-            callback(path,args)
-        else:
-            for root, dirs, files in os.walk(path):
-                #print "w-- %s (ex: %s)" % (repr((root,dirs)), repr(excludes))
-                if len(excludes):
-                    for d in dirs[:]:
-                        if d in excludes:
-                            dirs.remove(d)
-                for f in files:
-                    r, ext = os.path.splitext(f)
-                    if ext in [ ".h", ".c", ".cpp", ".S" ]:
-                        callback( "%s/%s" % (root,f), args )
-
-def cleanup_dir(path):
-    """create a directory if needed, and ensure that it is totally empty
-       by removing any existing content in it"""
-    if not os.path.exists(path):
-        os.mkdir(path)
-    else:
-        for root, dirs, files in os.walk(path, topdown=False):
-            if root.endswith("kernel_headers/"):
-                # skip 'kernel_headers'
-                continue
-            for name in files:
-                os.remove(os.path.join(root, name))
-            for name in dirs:
-                os.rmdir(os.path.join(root, name))
diff --git a/libc/tools/gensyscalls.py b/libc/tools/gensyscalls.py
index ed1b3dc..4894f2d 100755
--- a/libc/tools/gensyscalls.py
+++ b/libc/tools/gensyscalls.py
@@ -10,17 +10,7 @@
 
 from bionic_utils import *
 
-# get the root Bionic directory, simply this script's dirname
-#
-bionic_root = find_bionic_root()
-if not bionic_root:
-    print "could not find the Bionic root directory. aborting"
-    sys.exit(1)
-
-if bionic_root[-1] != '/':
-    bionic_root += "/"
-
-print "bionic_root is %s" % bionic_root
+bionic_libc_root = os.environ["ANDROID_BUILD_TOP"] + "/bionic/libc/"
 
 # temp directory where we store all intermediate files
 bionic_temp = "/tmp/bionic_gensyscalls/"
@@ -334,11 +324,11 @@
         glibc_fp.write("#define _BIONIC_GLIBC_SYSCALLS_H_\n")
 
         glibc_fp.write("#if defined(__arm__)\n")
-        self.scan_linux_unistd_h(glibc_fp, "libc/kernel/arch-arm/asm/unistd.h")
+        self.scan_linux_unistd_h(glibc_fp, bionic_libc_root + "/kernel/arch-arm/asm/unistd.h")
         glibc_fp.write("#elif defined(__mips__)\n")
-        self.scan_linux_unistd_h(glibc_fp, "libc/kernel/arch-mips/asm/unistd.h")
+        self.scan_linux_unistd_h(glibc_fp, bionic_libc_root + "/kernel/arch-mips/asm/unistd.h")
         glibc_fp.write("#elif defined(__i386__)\n")
-        self.scan_linux_unistd_h(glibc_fp, "libc/kernel/arch-x86/asm/unistd_32.h")
+        self.scan_linux_unistd_h(glibc_fp, bionic_libc_root + "/kernel/arch-x86/asm/unistd_32.h")
         glibc_fp.write("#endif\n")
 
         glibc_fp.write("#endif /* _BIONIC_GLIBC_SYSCALLS_H_ */\n")
@@ -397,14 +387,14 @@
     def  regenerate(self):
         D( "scanning for existing architecture-specific stub files" )
 
-        bionic_root_len = len(bionic_root)
+        bionic_libc_root_len = len(bionic_libc_root)
 
         for arch in all_archs:
-            arch_path = bionic_root + "arch-" + arch
+            arch_path = bionic_libc_root + "arch-" + arch
             D( "scanning " + arch_path )
             files = glob.glob( arch_path + "/syscalls/*.S" )
             for f in files:
-                self.old_stubs.append( f[bionic_root_len:] )
+                self.old_stubs.append( f[bionic_libc_root_len:] )
 
         D( "found %d stub files" % len(self.old_stubs) )
 
@@ -424,13 +414,13 @@
         edits   = []
 
         for stub in self.new_stubs + self.other_files:
-            if not os.path.exists( bionic_root + stub ):
+            if not os.path.exists( bionic_libc_root + stub ):
                 # new file, git add it
                 D( "new file:     " + stub)
-                adds.append( bionic_root + stub )
-                shutil.copyfile( bionic_temp + stub, bionic_root + stub )
+                adds.append( bionic_libc_root + stub )
+                shutil.copyfile( bionic_temp + stub, bionic_libc_root + stub )
 
-            elif not filecmp.cmp( bionic_temp + stub, bionic_root + stub ):
+            elif not filecmp.cmp( bionic_temp + stub, bionic_libc_root + stub ):
                 D( "changed file: " + stub)
                 edits.append( stub )
 
@@ -438,7 +428,7 @@
         for stub in self.old_stubs:
             if not stub in self.new_stubs:
                 D( "deleted file: " + stub)
-                deletes.append( bionic_root + stub )
+                deletes.append( bionic_libc_root + stub )
 
 
         if adds:
@@ -447,11 +437,11 @@
             commands.getoutput("git rm " + " ".join(deletes))
         if edits:
             for file in edits:
-                shutil.copyfile( bionic_temp + file, bionic_root + file )
+                shutil.copyfile( bionic_temp + file, bionic_libc_root + file )
             commands.getoutput("git add " +
-                               " ".join((bionic_root + file) for file in edits))
+                               " ".join((bionic_libc_root + file) for file in edits))
 
-        commands.getoutput("git add %s%s" % (bionic_root,"SYSCALLS.TXT"))
+        commands.getoutput("git add %s%s" % (bionic_libc_root,"SYSCALLS.TXT"))
 
         if (not adds) and (not deletes) and (not edits):
             D("no changes detected!")
@@ -461,5 +451,5 @@
 D_setlevel(1)
 
 state = State()
-state.process_file(bionic_root+"SYSCALLS.TXT")
+state.process_file(bionic_libc_root+"SYSCALLS.TXT")
 state.regenerate()
diff --git a/libc/tools/zoneinfo/generate b/libc/tools/zoneinfo/update-tzdata.py
similarity index 68%
rename from libc/tools/zoneinfo/generate
rename to libc/tools/zoneinfo/update-tzdata.py
index 334ba3c..8956136 100755
--- a/libc/tools/zoneinfo/generate
+++ b/libc/tools/zoneinfo/update-tzdata.py
@@ -3,6 +3,7 @@
 """Updates the tzdata file."""
 
 import ftplib
+import httplib
 import os
 import re
 import subprocess
@@ -58,27 +59,57 @@
   setup.close()
 
 
-def Retrieve(ftp, filename):
-  ftp.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
-
-
-def UpgradeTo(ftp, data_filename):
-  """Downloads and repackages the given data from the given FTP server."""
-
-  new_version = re.search('(tzdata.+)\\.tar\\.gz', data_filename).group(1)
-
-  # Switch to a temporary directory.
+def SwitchToNewTemporaryDirectory():
   tmp_dir = tempfile.mkdtemp('-tzdata')
   os.chdir(tmp_dir)
   print 'Created temporary directory "%s"...' % tmp_dir
 
+
+def FtpRetrieve(ftp, filename):
+  ftp.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
+
+
+def FtpUpgrade(ftp, data_filename):
+  """Downloads and repackages the given data from the given FTP server."""
+  SwitchToNewTemporaryDirectory()
+
   print 'Downloading data...'
-  Retrieve(ftp, data_filename)
+  FtpRetrieve(ftp, data_filename)
 
   print 'Downloading signature...'
   signature_filename = '%s.asc' % data_filename
-  Retrieve(ftp, signature_filename)
+  FtpRetrieve(ftp, signature_filename)
 
+  ExtractAndCompile(data_filename)
+
+
+def HttpRetrieve(http, path, output_filename):
+  http.request("GET", path)
+  f = open(output_filename, 'wb')
+  f.write(http.getresponse().read())
+  f.close()
+
+
+def HttpUpgrade(http, data_filename):
+  """Downloads and repackages the given data from the given HTTP server."""
+  SwitchToNewTemporaryDirectory()
+
+  path = "/time-zones/repository/releases/%s" % data_filename
+
+  print 'Downloading data...'
+  HttpRetrieve(http, path, data_filename)
+
+  print 'Downloading signature...'
+  signature_filename = '%s.asc' % data_filename
+  HttpRetrieve(http, "%s.asc" % path, signature_filename)
+
+  ExtractAndCompile(data_filename)
+
+
+def ExtractAndCompile(data_filename):
+  new_version = re.search('(tzdata.+)\\.tar\\.gz', data_filename).group(1)
+
+  signature_filename = '%s.asc' % data_filename
   print 'Verifying signature...'
   # If this fails for you, you probably need to import Paul Eggert's public key:
   # gpg --recv-keys ED97E90E62AA7E34
@@ -113,14 +144,29 @@
 # See http://www.iana.org/time-zones/ for more about the source of this data.
 def main():
   print 'Looking for new tzdata...'
-  ftp = ftplib.FTP('ftp.iana.org')
-  ftp.login()
-  ftp.cwd('tz/releases')
+
   tzdata_filenames = []
-  for filename in ftp.nlst():
-    if filename.startswith('tzdata20') and filename.endswith('.tar.gz'):
-      tzdata_filenames.append(filename)
-  tzdata_filenames.sort()
+
+  # The FTP server lets you download intermediate releases, and also lets you
+  # download the signatures for verification, so it's your best choice.
+  use_ftp = True
+
+  if use_ftp:
+    ftp = ftplib.FTP('ftp.iana.org')
+    ftp.login()
+    ftp.cwd('tz/releases')
+    for filename in ftp.nlst():
+      if filename.startswith('tzdata20') and filename.endswith('.tar.gz'):
+        tzdata_filenames.append(filename)
+    tzdata_filenames.sort()
+  else:
+    http = httplib.HTTPConnection('www.iana.org')
+    http.request("GET", "/time-zones")
+    index_lines = http.getresponse().read().split('\n')
+    for line in index_lines:
+      m = re.compile('.*href="/time-zones/repository/releases/(tzdata20\d\d\c\.tar\.gz)".*').match(line)
+      if m:
+        tzdata_filenames.append(m.group(1))
 
   # If you're several releases behind, we'll walk you through the upgrades
   # one by one.
@@ -129,7 +175,10 @@
   for filename in tzdata_filenames:
     if filename > current_filename:
       print 'Found new tzdata: %s' % filename
-      UpgradeTo(ftp, filename)
+      if use_ftp:
+        FtpUpgrade(ftp, filename)
+      else:
+        HttpUpgrade(http, filename)
       sys.exit(0)
 
   print 'You already have the latest tzdata (%s)!' % current_version
diff --git a/libc/tzcode/asctime.c b/libc/tzcode/asctime.c
index 22bba34..152b0db 100644
--- a/libc/tzcode/asctime.c
+++ b/libc/tzcode/asctime.c
@@ -9,12 +9,6 @@
 ** whereas the output of asctime is supposed to be constant.
 */
 
-#ifndef lint
-#ifndef NOID
-static char elsieid[] = "@(#)asctime.c  8.2";
-#endif /* !defined NOID */
-#endif /* !defined lint */
-
 /*LINTLIBRARY*/
 
 #include "private.h"
@@ -39,9 +33,9 @@
 ** but many implementations pad anyway; most likely the standards are buggy.
 */
 #ifdef __GNUC__
-#define ASCTIME_FMT "%.3s %.3s%3d %2.2d:%2.2d:%2.2d %-4s\n"
+#define ASCTIME_FMT	"%.3s %.3s%3d %2.2d:%2.2d:%2.2d %-4s\n"
 #else /* !defined __GNUC__ */
-#define ASCTIME_FMT "%.3s %.3s%3d %02.2d:%02.2d:%02.2d %-4s\n"
+#define ASCTIME_FMT	"%.3s %.3s%3d %02.2d:%02.2d:%02.2d %-4s\n"
 #endif /* !defined __GNUC__ */
 /*
 ** For years that are more than four digits we put extra spaces before the year
@@ -50,12 +44,12 @@
 ** that no output is better than wrong output).
 */
 #ifdef __GNUC__
-#define ASCTIME_FMT_B   "%.3s %.3s%3d %2.2d:%2.2d:%2.2d     %s\n"
+#define ASCTIME_FMT_B	"%.3s %.3s%3d %2.2d:%2.2d:%2.2d     %s\n"
 #else /* !defined __GNUC__ */
-#define ASCTIME_FMT_B   "%.3s %.3s%3d %02.2d:%02.2d:%02.2d     %s\n"
+#define ASCTIME_FMT_B	"%.3s %.3s%3d %02.2d:%02.2d:%02.2d     %s\n"
 #endif /* !defined __GNUC__ */
 
-#define STD_ASCTIME_BUF_SIZE    26
+#define STD_ASCTIME_BUF_SIZE	26
 /*
 ** Big enough for something such as
 ** ??? ???-2147483648 -2147483648:-2147483648:-2147483648     -2147483648\n
@@ -66,64 +60,65 @@
 ** as an example; the define below calculates the maximum for the system at
 ** hand.
 */
-#define MAX_ASCTIME_BUF_SIZE    (2*3+5*INT_STRLEN_MAXIMUM(int)+7+2+1+1)
+#define MAX_ASCTIME_BUF_SIZE	(2*3+5*INT_STRLEN_MAXIMUM(int)+7+2+1+1)
 
-static char buf_asctime[MAX_ASCTIME_BUF_SIZE];
+static char	buf_asctime[MAX_ASCTIME_BUF_SIZE];
 
 /*
 ** A la ISO/IEC 9945-1, ANSI/IEEE Std 1003.1, 2004 Edition.
 */
 
 char *
-asctime_r(timeptr, buf)
-register const struct tm *  timeptr;
-char *              buf;
+asctime_r(register const struct tm *timeptr, char *buf)
 {
-    static const char   wday_name[][3] = {
-        "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
-    };
-    static const char   mon_name[][3] = {
-        "Jan", "Feb", "Mar", "Apr", "May", "Jun",
-        "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
-    };
-    register const char *   wn;
-    register const char *   mn;
-    char            year[INT_STRLEN_MAXIMUM(int) + 2];
-    char            result[MAX_ASCTIME_BUF_SIZE];
+	static const char	wday_name[][3] = {
+		"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+	};
+	static const char	mon_name[][3] = {
+		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+	};
+	register const char *	wn;
+	register const char *	mn;
+	char			year[INT_STRLEN_MAXIMUM(int) + 2];
+	char			result[MAX_ASCTIME_BUF_SIZE];
 
-    if (timeptr->tm_wday < 0 || timeptr->tm_wday >= DAYSPERWEEK)
-        wn = "???";
-    else    wn = wday_name[timeptr->tm_wday];
-    if (timeptr->tm_mon < 0 || timeptr->tm_mon >= MONSPERYEAR)
-        mn = "???";
-    else    mn = mon_name[timeptr->tm_mon];
-    /*
-    ** Use strftime's %Y to generate the year, to avoid overflow problems
-    ** when computing timeptr->tm_year + TM_YEAR_BASE.
-    ** Assume that strftime is unaffected by other out-of-range members
-    ** (e.g., timeptr->tm_mday) when processing "%Y".
-    */
-    (void) strftime(year, sizeof year, "%Y", timeptr);
-    /*
-    ** We avoid using snprintf since it's not available on all systems.
-    */
-    (void) sprintf(result,
-        ((strlen(year) <= 4) ? ASCTIME_FMT : ASCTIME_FMT_B),
-        wn, mn,
-        timeptr->tm_mday, timeptr->tm_hour,
-        timeptr->tm_min, timeptr->tm_sec,
-        year);
-    if (strlen(result) < STD_ASCTIME_BUF_SIZE || buf == buf_asctime) {
-        (void) strcpy(buf, result);
-        return buf;
-    } else {
+	if (timeptr == NULL) {
+		errno = EINVAL;
+		return strcpy(buf, "??? ??? ?? ??:??:?? ????\n");
+	}
+	if (timeptr->tm_wday < 0 || timeptr->tm_wday >= DAYSPERWEEK)
+		wn = "???";
+	else	wn = wday_name[timeptr->tm_wday];
+	if (timeptr->tm_mon < 0 || timeptr->tm_mon >= MONSPERYEAR)
+		mn = "???";
+	else	mn = mon_name[timeptr->tm_mon];
+	/*
+	** Use strftime's %Y to generate the year, to avoid overflow problems
+	** when computing timeptr->tm_year + TM_YEAR_BASE.
+	** Assume that strftime is unaffected by other out-of-range members
+	** (e.g., timeptr->tm_mday) when processing "%Y".
+	*/
+	(void) strftime(year, sizeof year, "%Y", timeptr);
+	/*
+	** We avoid using snprintf since it's not available on all systems.
+	*/
+	(void) sprintf(result,
+		((strlen(year) <= 4) ? ASCTIME_FMT : ASCTIME_FMT_B),
+		wn, mn,
+		timeptr->tm_mday, timeptr->tm_hour,
+		timeptr->tm_min, timeptr->tm_sec,
+		year);
+	if (strlen(result) < STD_ASCTIME_BUF_SIZE || buf == buf_asctime)
+		return strcpy(buf, result);
+	else {
 #ifdef EOVERFLOW
-        errno = EOVERFLOW;
+		errno = EOVERFLOW;
 #else /* !defined EOVERFLOW */
-        errno = EINVAL;
+		errno = EINVAL;
 #endif /* !defined EOVERFLOW */
-        return NULL;
-    }
+		return NULL;
+	}
 }
 
 /*
@@ -131,8 +126,7 @@
 */
 
 char *
-asctime(timeptr)
-register const struct tm *  timeptr;
+asctime(register const struct tm *timeptr)
 {
-    return asctime_r(timeptr, buf_asctime);
+	return asctime_r(timeptr, buf_asctime);
 }
diff --git a/libc/tzcode/difftime.c b/libc/tzcode/difftime.c
index f7581a4..fcd18ce 100644
--- a/libc/tzcode/difftime.c
+++ b/libc/tzcode/difftime.c
@@ -3,63 +3,54 @@
 ** 1996-06-05 by Arthur David Olson.
 */
 
-#ifndef lint
-#ifndef NOID
-static char elsieid[] = "@(#)difftime.c 8.1";
-#endif /* !defined NOID */
-#endif /* !defined lint */
-
 /*LINTLIBRARY*/
 
-#include "private.h"    /* for time_t, TYPE_INTEGRAL, and TYPE_SIGNED */
+#include "private.h"	/* for time_t, TYPE_INTEGRAL, and TYPE_SIGNED */
 
-double
-difftime(time1, time0)
-const time_t    time1;
-const time_t    time0;
+double ATTRIBUTE_CONST
+difftime(const time_t time1, const time_t time0)
 {
-    /*
-    ** If (sizeof (double) > sizeof (time_t)) simply convert and subtract
-    ** (assuming that the larger type has more precision).
-    ** This is the common real-world case circa 2004.
-    */
-    if (sizeof (double) > sizeof (time_t))
-        return (double) time1 - (double) time0;
-    if (!TYPE_INTEGRAL(time_t)) {
-        /*
-        ** time_t is floating.
-        */
-        return time1 - time0;
-    }
-    if (!TYPE_SIGNED(time_t)) {
-        /*
-        ** time_t is integral and unsigned.
-        ** The difference of two unsigned values can't overflow
-        ** if the minuend is greater than or equal to the subtrahend.
-        */
-        if (time1 >= time0)
-            return time1 - time0;
-        else    return -((double) (time0 - time1));
-    }
-    /*
-    ** time_t is integral and signed.
-    ** Handle cases where both time1 and time0 have the same sign
-    ** (meaning that their difference cannot overflow).
-    */
-    if ((time1 < 0) == (time0 < 0))
-        return time1 - time0;
-    /*
-    ** time1 and time0 have opposite signs.
-    ** Punt if unsigned long is too narrow.
-    */
-    if (sizeof (unsigned long) < sizeof (time_t))
-        return (double) time1 - (double) time0;
-    /*
-    ** Stay calm...decent optimizers will eliminate the complexity below.
-    */
-    if (time1 >= 0 /* && time0 < 0 */)
-        return (unsigned long) time1 +
-            (unsigned long) (-(time0 + 1)) + 1;
-    return -(double) ((unsigned long) time0 +
-        (unsigned long) (-(time1 + 1)) + 1);
+	/*
+	** If (sizeof (double) > sizeof (time_t)) simply convert and subtract
+	** (assuming that the larger type has more precision).
+	*/
+	if (sizeof (double) > sizeof (time_t))
+		return (double) time1 - (double) time0;
+	if (!TYPE_INTEGRAL(time_t)) {
+		/*
+		** time_t is floating.
+		*/
+		return time1 - time0;
+	}
+	if (!TYPE_SIGNED(time_t)) {
+		/*
+		** time_t is integral and unsigned.
+		** The difference of two unsigned values can't overflow
+		** if the minuend is greater than or equal to the subtrahend.
+		*/
+		if (time1 >= time0)
+			return            time1 - time0;
+		else	return -(double) (time0 - time1);
+	}
+	/*
+	** time_t is integral and signed.
+	** Handle cases where both time1 and time0 have the same sign
+	** (meaning that their difference cannot overflow).
+	*/
+	if ((time1 < 0) == (time0 < 0))
+		return time1 - time0;
+	/*
+	** time1 and time0 have opposite signs.
+	** Punt if uintmax_t is too narrow.
+	** This suffers from double rounding; attempt to lessen that
+	** by using long double temporaries.
+	*/
+	if (sizeof (uintmax_t) < sizeof (time_t))
+		return (long double) time1 - (long double) time0;
+	/*
+	** Stay calm...decent optimizers will eliminate the complexity below.
+	*/
+	if (time1 >= 0 /* && time0 < 0 */)
+		return    (uintmax_t) time1 + (uintmax_t) (-1 - time0) + 1;
+	return -(double) ((uintmax_t) time0 + (uintmax_t) (-1 - time1) + 1);
 }
diff --git a/libc/tzcode/localtime.c b/libc/tzcode/localtime.c
index 8a54e81..d1b49e5 100644
--- a/libc/tzcode/localtime.c
+++ b/libc/tzcode/localtime.c
@@ -3,12 +3,6 @@
 ** 1996-06-05 by Arthur David Olson.
 */
 
-#ifndef lint
-#ifndef NOID
-static char elsieid[] = "@(#)localtime.c    8.3";
-#endif /* !defined NOID */
-#endif /* !defined lint */
-
 /*
 ** Leap second handling from Bradley White.
 ** POSIX-style TZ environment variable handling from Guy Harris.
@@ -21,9 +15,6 @@
 #include "fcntl.h"
 #include "float.h"  /* for FLT_MAX and DBL_MAX */
 
-#include "thread_private.h"
-#include <sys/system_properties.h>
-
 #ifndef TZ_ABBR_MAX_LEN
 #define TZ_ABBR_MAX_LEN 16
 #endif /* !defined TZ_ABBR_MAX_LEN */
@@ -54,53 +45,12 @@
 #  define  XLOG(x)   do{}while (0)
 #endif
 
-/* Add the following function implementations:
- *  timelocal()
- *  timegm()
- *  time2posix()
- *  posix2time()
- */
-#define STD_INSPIRED 1
-
-/* THREAD-SAFETY SUPPORT GOES HERE */
-static pthread_mutex_t  _tzMutex = PTHREAD_MUTEX_INITIALIZER;
-
-static __inline__ void _tzLock(void)
-{
-    if (__isthreaded)
-        pthread_mutex_lock(&_tzMutex);
-}
-
-static __inline__ void _tzUnlock(void)
-{
-    if (__isthreaded)
-        pthread_mutex_unlock(&_tzMutex);
-}
-
-/* Complex computations to determine the min/max of time_t depending
- * on TYPE_BIT / TYPE_SIGNED / TYPE_INTEGRAL.
- * These macros cannot be used in pre-processor directives, so we
- * let the C compiler do the work, which makes things a bit funky.
- */
-static const time_t TIME_T_MAX =
-    TYPE_INTEGRAL(time_t) ?
-        ( TYPE_SIGNED(time_t) ?
-            ~((time_t)1 << (TYPE_BIT(time_t)-1))
-        :
-            ~(time_t)0
-        )
-    : /* if time_t is a floating point number */
-        ( sizeof(time_t) > sizeof(float) ? (time_t)DBL_MAX : (time_t)FLT_MAX );
-
-static const time_t TIME_T_MIN =
-    TYPE_INTEGRAL(time_t) ?
-        ( TYPE_SIGNED(time_t) ?
-            ((time_t)1 << (TYPE_BIT(time_t)-1))
-        :
-            0
-        )
-    :
-        ( sizeof(time_t) > sizeof(float) ? (time_t)DBL_MIN : (time_t)FLT_MIN );
+/* BEGIN android-added: thread-safety. */
+#include <pthread.h>
+static pthread_mutex_t _tzMutex = PTHREAD_MUTEX_INITIALIZER;
+static inline void _tzLock(void) { pthread_mutex_lock(&_tzMutex); }
+static inline void _tzUnlock(void) { pthread_mutex_unlock(&_tzMutex); }
+/* END android-added */
 
 #ifndef WILDABBR
 /*
@@ -141,16 +91,16 @@
 #endif /* !defined TZDEFDST */
 
 struct ttinfo {             /* time type information */
-    long    tt_gmtoff;  /* UTC offset in seconds */
-    int     tt_isdst;   /* used to set tm_isdst */
-    int     tt_abbrind; /* abbreviation list index */
-    int     tt_ttisstd; /* TRUE if transition is std time */
-    int     tt_ttisgmt; /* TRUE if transition is UTC */
+    int_fast32_t tt_gmtoff;  /* UTC offset in seconds */
+    int          tt_isdst;   /* used to set tm_isdst */
+    int          tt_abbrind; /* abbreviation list index */
+    int          tt_ttisstd; /* TRUE if transition is std time */
+    int          tt_ttisgmt; /* TRUE if transition is UTC */
 };
 
 struct lsinfo {             /* leap second information */
-    time_t      ls_trans;   /* transition time */
-    long        ls_corr;    /* correction to apply */
+    time_t       ls_trans;   /* transition time */
+    int_fast64_t ls_corr;    /* correction to apply */
 };
 
 #define BIGGEST(a, b)   (((a) > (b)) ? (a) : (b))
@@ -162,10 +112,6 @@
 #define MY_TZNAME_MAX   255
 #endif /* !defined TZNAME_MAX */
 
-/* XXX: This code should really use time64_t instead of time_t
- *      but we can't change it without re-generating the index
- *      file first with the correct data.
- */
 struct state {
     int     leapcnt;
     int     timecnt;
@@ -179,14 +125,15 @@
     char        chars[BIGGEST(BIGGEST(TZ_MAX_CHARS + 1, sizeof gmt),
                 (2 * (MY_TZNAME_MAX + 1)))];
     struct lsinfo   lsis[TZ_MAX_LEAPS];
+    int defaulttype; /* for early times or if no transitions */
 };
 
 struct rule {
-    int     r_type;     /* type of rule--see below */
-    int     r_day;      /* day number of rule */
-    int     r_week;     /* week number of rule */
-    int     r_mon;      /* month number of rule */
-    long        r_time;     /* transition time of rule */
+    int          r_type;     /* type of rule--see below */
+    int          r_day;      /* day number of rule */
+    int          r_week;     /* week number of rule */
+    int          r_mon;      /* month number of rule */
+    int_fast32_t r_time;     /* transition time of rule */
 };
 
 #define JULIAN_DAY      0   /* Jn - Julian day */
@@ -200,51 +147,54 @@
 /* NOTE: all internal functions assume that _tzLock() was already called */
 
 static int __bionic_open_tzdata(const char*, int*);
-static long     detzcode P((const char * codep));
-static time_t   detzcode64 P((const char * codep));
-static int      differ_by_repeat P((time_t t1, time_t t0));
-static const char * getzname P((const char * strp));
-static const char * getqzname P((const char * strp, const int delim));
-static const char * getnum P((const char * strp, int * nump, int min,
-                int max));
-static const char * getsecs P((const char * strp, long * secsp));
-static const char * getoffset P((const char * strp, long * offsetp));
-static const char * getrule P((const char * strp, struct rule * rulep));
-static void     gmtload P((struct state * sp));
-static struct tm *  gmtsub P((const time_t * timep, long offset,
-                struct tm * tmp, const struct state * sp)); // android-changed: added sp.
-static struct tm *  localsub P((const time_t * timep, long offset,
-                struct tm * tmp, const struct state * sp)); // android-changed: added sp.
-static int      increment_overflow P((int * number, int delta));
-static int      leaps_thru_end_of P((int y));
-static int      long_increment_overflow P((long * number, int delta));
-static int      long_normalize_overflow P((long * tensptr,
-                int * unitsptr, int base));
-static int      normalize_overflow P((int * tensptr, int * unitsptr,
-                int base));
-static void     settzname P((void));
-static time_t       time1 P((struct tm * tmp,
-                struct tm * (*funcp) P((const time_t *,
-                long, struct tm *, const struct state *)), // android-changed: added state*.
-                long offset, const struct state * sp)); // android-changed: added sp.
-static time_t       time2 P((struct tm *tmp,
-                struct tm * (*funcp) P((const time_t *,
-                long, struct tm*, const struct state *)), // android-changed: added state*.
-                long offset, int * okayp, const struct state * sp)); // android-changed: added sp.
-static time_t       time2sub P((struct tm *tmp,
-                struct tm * (*funcp) P((const time_t *,
-                long, struct tm*, const struct state *)), // android-changed: added state*.
-                long offset, int * okayp, int do_norm_secs, const struct state * sp)); // android-change: added sp.
-static struct tm *  timesub P((const time_t * timep, long offset,
-                const struct state * sp, struct tm * tmp));
-static int      tmcomp P((const struct tm * atmp,
-                const struct tm * btmp));
-static time_t       transtime P((time_t janfirst, int year,
-                const struct rule * rulep, long offset));
-static int      tzload P((const char * name, struct state * sp,
-                int doextend));
-static int      tzparse P((const char * name, struct state * sp,
-                int lastditch));
+static int_fast32_t detzcode(const char * codep);
+static time_t   detzcode64(const char * codep);
+static int      differ_by_repeat(time_t t1, time_t t0);
+static const char * getzname(const char * strp) ATTRIBUTE_PURE;
+static const char * getqzname(const char * strp, const int delim)
+        ATTRIBUTE_PURE;
+static const char * getnum(const char * strp, int * nump, int min,
+                int max);
+static const char * getsecs(const char * strp, int_fast32_t * secsp);
+static const char * getoffset(const char * strp, int_fast32_t * offsetp);
+static const char * getrule(const char * strp, struct rule * rulep);
+static void     gmtload(struct state * sp);
+static struct tm *  gmtsub(const time_t * timep, const int_fast32_t offset,
+                struct tm * tmp, const struct state * sp); // android-changed: added sp.
+static struct tm *  localsub(const time_t * timep, int_fast32_t offset,
+                struct tm * tmp, const struct state * sp); // android-changed: added sp.
+static int      increment_overflow(int * number, int delta);
+static int      leaps_thru_end_of(int y) ATTRIBUTE_PURE;
+static int      increment_overflow32(int_fast32_t * number, int delta);
+static int      normalize_overflow32(int_fast32_t * tensptr,
+                int * unitsptr, int base);
+static int      normalize_overflow(int * tensptr, int * unitsptr,
+                int base);
+static void     settzname(void);
+static time_t       time1(struct tm * tmp,
+                struct tm * (*funcp)(const time_t *,
+                int_fast32_t, struct tm *, const struct state *), // android-changed: added state*.
+                int_fast32_t offset, const struct state * sp); // android-changed: added sp.
+static time_t       time2(struct tm * const tmp,
+                struct tm * (*const funcp)(const time_t *,
+                int_fast32_t, struct tm*, const struct state *), // android-changed: added state*.
+                int_fast32_t offset, int * okayp, const struct state * sp); // android-changed: added sp.
+static time_t       time2sub(struct tm *tmp,
+                struct tm * (*funcp) (const time_t *,
+                int_fast32_t, struct tm*, const struct state *), // android-changed: added state*.
+                int_fast32_t offset, int * okayp, int do_norm_secs, const struct state * sp); // android-change: added sp.
+static struct tm *  timesub(const time_t * timep, int_fast32_t offset,
+                const struct state * sp, struct tm * tmp);
+static int      tmcomp(const struct tm * atmp,
+                const struct tm * btmp);
+static time_t transtime(time_t janfirst, int year,
+                        const struct rule * rulep, int_fast32_t offset)
+        ATTRIBUTE_PURE;
+static int		typesequiv(const struct state * sp, int a, int b);
+static int      tzload(const char * name, struct state * sp,
+                int doextend);
+static int      tzparse(const char * name, struct state * sp,
+                int lastditch);
 
 #ifdef ALL_STATE
 static struct state *   lclptr;
@@ -290,34 +240,32 @@
 time_t          altzone = 0;
 #endif /* defined ALTZONE */
 
-static long
-detzcode(codep)
-const char * const  codep;
+static int_fast32_t
+detzcode(const char *const codep)
 {
-    register long   result;
-    register int    i;
+	register int_fast32_t	result;
+	register int		i;
 
-    result = (codep[0] & 0x80) ? ~0L : 0;
-    for (i = 0; i < 4; ++i)
-        result = (result << 8) | (codep[i] & 0xff);
-    return result;
+	result = (codep[0] & 0x80) ? -1 : 0;
+	for (i = 0; i < 4; ++i)
+		result = (result << 8) | (codep[i] & 0xff);
+	return result;
 }
 
 static time_t
-detzcode64(codep)
-const char * const  codep;
+detzcode64(const char *const codep)
 {
-    register time_t result;
-    register int    i;
+	register time_t	result;
+	register int	i;
 
-    result = (codep[0] & 0x80) ?  (~(int_fast64_t) 0) : 0;
-    for (i = 0; i < 8; ++i)
-        result = result * 256 + (codep[i] & 0xff);
-    return result;
+	result = (codep[0] & 0x80) ?  (~(int_fast64_t) 0) : 0;
+	for (i = 0; i < 8; ++i)
+		result = result * 256 + (codep[i] & 0xff);
+	return result;
 }
 
 static void
-settzname P((void))
+settzname(void)
 {
     register struct state * const   sp = lclptr;
     register int            i;
@@ -337,25 +285,14 @@
         return;
     }
 #endif /* defined ALL_STATE */
-    for (i = 0; i < sp->typecnt; ++i) {
-        register const struct ttinfo * const    ttisp = &sp->ttis[i];
-
-        tzname[ttisp->tt_isdst] =
-            &sp->chars[ttisp->tt_abbrind];
-#ifdef USG_COMPAT
-        if (ttisp->tt_isdst)
-            daylight = 1;
-        if (i == 0 || !ttisp->tt_isdst)
-            timezone = -(ttisp->tt_gmtoff);
-#endif /* defined USG_COMPAT */
-#ifdef ALTZONE
-        if (i == 0 || ttisp->tt_isdst)
-            altzone = -(ttisp->tt_gmtoff);
-#endif /* defined ALTZONE */
-    }
     /*
     ** And to get the latest zone names into tzname. . .
     */
+    for (i = 0; i < sp->typecnt; ++i) {
+        register const struct ttinfo * const    ttisp = &sp->ttis[i];
+
+        tzname[ttisp->tt_isdst] = &sp->chars[ttisp->tt_abbrind];
+    }
     for (i = 0; i < sp->timecnt; ++i) {
         register const struct ttinfo * const    ttisp =
                             &sp->ttis[
@@ -363,6 +300,16 @@
 
         tzname[ttisp->tt_isdst] =
             &sp->chars[ttisp->tt_abbrind];
+#ifdef USG_COMPAT
+        if (ttisp->tt_isdst)
+            daylight = 1;
+        if (!ttisp->tt_isdst)
+            timezone = -(ttisp->tt_gmtoff);
+#endif /* defined USG_COMPAT */
+#ifdef ALTZONE
+        if (ttisp->tt_isdst)
+            altzone = -(ttisp->tt_gmtoff);
+#endif /* defined ALTZONE */
     }
     /*
     ** Finally, scrub the abbreviations.
@@ -385,113 +332,70 @@
 }
 
 static int
-differ_by_repeat(t1, t0)
-const time_t    t1;
-const time_t    t0;
+differ_by_repeat(const time_t t1, const time_t t0)
 {
-    if (TYPE_INTEGRAL(time_t) &&
-        TYPE_BIT(time_t) - TYPE_SIGNED(time_t) < SECSPERREPEAT_BITS)
-            return 0;
-#if SECSPERREPEAT_BITS <= 32  /* to avoid compiler warning (condition is always false) */
-        return (t1 - t0) == SECSPERREPEAT;
-#else
-        return 0;
-#endif
-}
-
-static int toint(unsigned char *s) {
-    return (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
+	if (TYPE_INTEGRAL(time_t) &&
+		TYPE_BIT(time_t) - TYPE_SIGNED(time_t) < SECSPERREPEAT_BITS)
+			return 0;
+	return t1 - t0 == SECSPERREPEAT;
 }
 
 static int
-tzload(const char* name, struct state* const sp, const int doextend)
+tzload(register const char* name, register struct state* const sp,
+       register const int doextend)
 {
     register const char *       p;
     register int            i;
     register int            fid;
     register int            stored;
     register int            nread;
-    union {
+    typedef union {
         struct tzhead   tzhead;
         char        buf[2 * sizeof(struct tzhead) +
                     2 * sizeof *sp +
                     4 * TZ_MAX_TIMES];
-    } u;
-    int                     toread = sizeof u.buf;
+    } u_t;
+#ifdef ALL_STATE
+    register u_t *			up;
 
-        if (name == NULL && (name = TZDEFAULT) == NULL) {
-                XLOG(("tzload: null 'name' parameter\n" ));
-                return -1;
-        }
-    {
-        register int    doaccess;
-        /*
-        ** Section 4.9.1 of the C standard says that
-        ** "FILENAME_MAX expands to an integral constant expression
-        ** that is the size needed for an array of char large enough
-        ** to hold the longest file name string that the implementation
-        ** guarantees can be opened."
-        */
-        char        fullname[FILENAME_MAX + 1];
-        char        *origname = (char*) name;
+    up = (u_t *) calloc(1, sizeof *up);
+    if (up == NULL)
+        return -1;
+#else /* !defined ALL_STATE */
+    u_t				u;
+    register u_t * const		up = &u;
+#endif /* !defined ALL_STATE */
 
-        if (name[0] == ':')
-            ++name;
-        doaccess = name[0] == '/';
-        if (!doaccess) {
-            if ((p = TZDIR) == NULL) {
-                XLOG(("tzload: null TZDIR macro ?\n" ));
-                return -1;
-            }
-            if ((strlen(p) + strlen(name) + 1) >= sizeof fullname) {
-                XLOG(( "tzload: path too long: %s/%s\n", p, name ));
-                return -1;
-            }
-            (void) strcpy(fullname, p);
-            (void) strcat(fullname, "/");
-            (void) strcat(fullname, name);
-            /*
-            ** Set doaccess if '.' (as in "../") shows up in name.
-            */
-            if (strchr(name, '.') != NULL)
-                doaccess = TRUE;
-            name = fullname;
-        }
-        if (doaccess && access(name, R_OK) != 0) {
-            XLOG(( "tzload: could not find '%s'\n", name ));
-            return -1;
-        }
-        if ((fid = open(name, OPEN_MODE)) == -1) {
-            fid = __bionic_open_tzdata(origname, &toread);
-            if (fid < 0) {
-                return -1;
-            }
-        }
+    sp->goback = sp->goahead = FALSE;
+    if (name == NULL && (name = TZDEFAULT) == NULL)
+        goto oops;
+    int toread;
+    fid = __bionic_open_tzdata(name, &toread);
+    if (fid < 0) {
+        return -1;
     }
-    nread = read(fid, u.buf, toread);
-        if (close(fid) < 0 || nread <= 0) {
-            XLOG(( "tzload: could not read content of '%s'\n", DATAFILE ));
-            return -1;
-        }
+    nread = read(fid, up->buf, toread);
+    if (close(fid) < 0 || nread <= 0)
+        goto oops;
     for (stored = 4; stored <= 8; stored *= 2) {
         int     ttisstdcnt;
         int     ttisgmtcnt;
 
-        ttisstdcnt = (int) detzcode(u.tzhead.tzh_ttisstdcnt);
-        ttisgmtcnt = (int) detzcode(u.tzhead.tzh_ttisgmtcnt);
-        sp->leapcnt = (int) detzcode(u.tzhead.tzh_leapcnt);
-        sp->timecnt = (int) detzcode(u.tzhead.tzh_timecnt);
-        sp->typecnt = (int) detzcode(u.tzhead.tzh_typecnt);
-        sp->charcnt = (int) detzcode(u.tzhead.tzh_charcnt);
-        p = u.tzhead.tzh_charcnt + sizeof u.tzhead.tzh_charcnt;
+        ttisstdcnt = (int) detzcode(up->tzhead.tzh_ttisstdcnt);
+        ttisgmtcnt = (int) detzcode(up->tzhead.tzh_ttisgmtcnt);
+        sp->leapcnt = (int) detzcode(up->tzhead.tzh_leapcnt);
+        sp->timecnt = (int) detzcode(up->tzhead.tzh_timecnt);
+        sp->typecnt = (int) detzcode(up->tzhead.tzh_typecnt);
+        sp->charcnt = (int) detzcode(up->tzhead.tzh_charcnt);
+        p = up->tzhead.tzh_charcnt + sizeof up->tzhead.tzh_charcnt;
         if (sp->leapcnt < 0 || sp->leapcnt > TZ_MAX_LEAPS ||
             sp->typecnt <= 0 || sp->typecnt > TZ_MAX_TYPES ||
             sp->timecnt < 0 || sp->timecnt > TZ_MAX_TIMES ||
             sp->charcnt < 0 || sp->charcnt > TZ_MAX_CHARS ||
             (ttisstdcnt != sp->typecnt && ttisstdcnt != 0) ||
             (ttisgmtcnt != sp->typecnt && ttisgmtcnt != 0))
-                return -1;
-        if (nread - (p - u.buf) <
+                goto oops;
+        if (nread - (p - up->buf) <
             sp->timecnt * stored +      /* ats */
             sp->timecnt +           /* types */
             sp->typecnt * 6 +       /* ttinfos */
@@ -499,7 +403,7 @@
             sp->leapcnt * (stored + 4) +    /* lsinfos */
             ttisstdcnt +            /* ttisstds */
             ttisgmtcnt)         /* ttisgmts */
-                return -1;
+                goto oops;
         for (i = 0; i < sp->timecnt; ++i) {
             sp->ats[i] = (stored == 4) ?
                 detzcode(p) : detzcode64(p);
@@ -508,7 +412,7 @@
         for (i = 0; i < sp->timecnt; ++i) {
             sp->types[i] = (unsigned char) *p++;
             if (sp->types[i] >= sp->typecnt)
-                return -1;
+                goto oops;
         }
         for (i = 0; i < sp->typecnt; ++i) {
             register struct ttinfo *    ttisp;
@@ -518,11 +422,11 @@
             p += 4;
             ttisp->tt_isdst = (unsigned char) *p++;
             if (ttisp->tt_isdst != 0 && ttisp->tt_isdst != 1)
-                return -1;
+                goto oops;
             ttisp->tt_abbrind = (unsigned char) *p++;
             if (ttisp->tt_abbrind < 0 ||
                 ttisp->tt_abbrind > sp->charcnt)
-                    return -1;
+                    goto oops;
         }
         for (i = 0; i < sp->charcnt; ++i)
             sp->chars[i] = *p++;
@@ -547,7 +451,7 @@
                 ttisp->tt_ttisstd = *p++;
                 if (ttisp->tt_ttisstd != TRUE &&
                     ttisp->tt_ttisstd != FALSE)
-                        return -1;
+                        goto oops;
             }
         }
         for (i = 0; i < sp->typecnt; ++i) {
@@ -560,7 +464,7 @@
                 ttisp->tt_ttisgmt = *p++;
                 if (ttisp->tt_ttisgmt != TRUE &&
                     ttisp->tt_ttisgmt != FALSE)
-                        return -1;
+                        goto oops;
             }
         }
         /*
@@ -568,36 +472,47 @@
         ** signed time_t system but using a data file with
         ** unsigned values (or vice versa).
         */
-        for (i = 0; i < sp->timecnt - 2; ++i)
-            if (sp->ats[i] > sp->ats[i + 1]) {
-                ++i;
-                if (TYPE_SIGNED(time_t)) {
-                    /*
-                    ** Ignore the end (easy).
-                    */
-                    sp->timecnt = i;
-                } else {
-                    /*
-                    ** Ignore the beginning (harder).
-                    */
-                    register int    j;
+        for (i = 0; i < sp->timecnt; ++i)
+            if ((i < sp->timecnt - 1 &&
+                sp->ats[i] > sp->ats[i + 1]) ||
+                (i == sp->timecnt - 1 && !TYPE_SIGNED(time_t) &&
+                sp->ats[i] >
+                ((stored == 4) ? INT32_MAX : INT64_MAX))) {
+                    if (TYPE_SIGNED(time_t)) {
+                        /*
+                        ** Ignore the end (easy).
+                        */
+                        sp->timecnt = i + 1;
+                    } else {
+                        /*
+                        ** Ignore the beginning (harder).
+                        */
+                        register int    j;
 
-                    for (j = 0; j + i < sp->timecnt; ++j) {
-                        sp->ats[j] = sp->ats[j + i];
-                        sp->types[j] = sp->types[j + i];
+                        /*
+                        ** Keep the record right before the
+                        ** epoch boundary,
+                        ** but tweak it so that it starts
+                        ** right with the epoch
+                        ** (thanks to Doug Bailey).
+                        */
+                        sp->ats[i] = 0;
+                        for (j = 0; j + i < sp->timecnt; ++j) {
+                            sp->ats[j] = sp->ats[j + i];
+                            sp->types[j] = sp->types[j + i];
+                        }
+                        sp->timecnt = j;
                     }
-                    sp->timecnt = j;
-                }
-                break;
+                    break;
             }
         /*
         ** If this is an old file, we're done.
         */
-        if (u.tzhead.tzh_version[0] == '\0')
+        if (up->tzhead.tzh_version[0] == '\0')
             break;
-        nread -= p - u.buf;
+        nread -= p - up->buf;
         for (i = 0; i < nread; ++i)
-            u.buf[i] = p[i];
+            up->buf[i] = p[i];
         /*
         ** If this is a narrow integer time_t system, we're done.
         */
@@ -605,13 +520,13 @@
             break;
     }
     if (doextend && nread > 2 &&
-        u.buf[0] == '\n' && u.buf[nread - 1] == '\n' &&
+        up->buf[0] == '\n' && up->buf[nread - 1] == '\n' &&
         sp->typecnt + 2 <= TZ_MAX_TYPES) {
             struct state    ts;
             register int    result;
 
-            u.buf[nread - 1] = '\0';
-            result = tzparse(&u.buf[1], &ts, FALSE);
+            up->buf[nread - 1] = '\0';
+            result = tzparse(&up->buf[1], &ts, FALSE);
             if (result == 0 && ts.typecnt == 2 &&
                 sp->charcnt + ts.charcnt <= TZ_MAX_CHARS) {
                     for (i = 0; i < 2; ++i)
@@ -639,16 +554,87 @@
                     sp->ttis[sp->typecnt++] = ts.ttis[1];
             }
     }
-    i = 2 * YEARSPERREPEAT;
-    sp->goback = sp->goahead = sp->timecnt > i;
-    sp->goback &= sp->types[i] == sp->types[0] &&
-        differ_by_repeat(sp->ats[i], sp->ats[0]);
-    sp->goahead &=
-        sp->types[sp->timecnt - 1] == sp->types[sp->timecnt - 1 - i] &&
-        differ_by_repeat(sp->ats[sp->timecnt - 1],
-             sp->ats[sp->timecnt - 1 - i]);
-        XLOG(( "tzload: load ok !!\n" ));
-    return 0;
+    if (sp->timecnt > 1) {
+        for (i = 1; i < sp->timecnt; ++i)
+            if (typesequiv(sp, sp->types[i], sp->types[0]) &&
+                    differ_by_repeat(sp->ats[i], sp->ats[0])) {
+                sp->goback = TRUE;
+                break;
+            }
+            for (i = sp->timecnt - 2; i >= 0; --i)
+                if (typesequiv(sp, sp->types[sp->timecnt - 1],
+                               sp->types[i]) &&
+                        differ_by_repeat(sp->ats[sp->timecnt - 1],
+                                         sp->ats[i])) {
+                    sp->goahead = TRUE;
+                    break;
+            }
+        }
+        /*
+        ** If type 0 is is unused in transitions,
+        ** it's the type to use for early times.
+        */
+        for (i = 0; i < sp->typecnt; ++i)
+            if (sp->types[i] == 0)
+                break;
+        i = (i >= sp->typecnt) ? 0 : -1;
+        /*
+        ** Absent the above,
+        ** if there are transition times
+        ** and the first transition is to a daylight time
+        ** find the standard type less than and closest to
+        ** the type of the first transition.
+        */
+        if (i < 0 && sp->timecnt > 0 && sp->ttis[sp->types[0]].tt_isdst) {
+            i = sp->types[0];
+            while (--i >= 0)
+                if (!sp->ttis[i].tt_isdst)
+                    break;
+        }
+        /*
+        ** If no result yet, find the first standard type.
+        ** If there is none, punt to type zero.
+        */
+        if (i < 0) {
+            i = 0;
+            while (sp->ttis[i].tt_isdst)
+                if (++i >= sp->typecnt) {
+                    i = 0;
+                    break;
+                }
+        }
+        sp->defaulttype = i;
+#ifdef ALL_STATE
+        free(up);
+#endif /* defined ALL_STATE */
+        return 0;
+oops:
+#ifdef ALL_STATE
+        free(up);
+#endif /* defined ALL_STATE */
+        return -1;
+}
+
+static int
+typesequiv(const struct state *const sp, const int a, const int b)
+{
+	register int	result;
+
+	if (sp == NULL ||
+		a < 0 || a >= sp->typecnt ||
+		b < 0 || b >= sp->typecnt)
+			result = FALSE;
+	else {
+		register const struct ttinfo *	ap = &sp->ttis[a];
+		register const struct ttinfo *	bp = &sp->ttis[b];
+		result = ap->tt_gmtoff == bp->tt_gmtoff &&
+			ap->tt_isdst == bp->tt_isdst &&
+			ap->tt_ttisstd == bp->tt_ttisstd &&
+			ap->tt_ttisgmt == bp->tt_ttisgmt &&
+			strcmp(&sp->chars[ap->tt_abbrind],
+			&sp->chars[bp->tt_abbrind]) == 0;
+	}
+	return result;
 }
 
 static const int    mon_lengths[2][MONSPERYEAR] = {
@@ -667,8 +653,7 @@
 */
 
 static const char *
-getzname(strp)
-register const char *   strp;
+getzname(register const char * strp)
 {
     register char   c;
 
@@ -705,11 +690,7 @@
 */
 
 static const char *
-getnum(strp, nump, min, max)
-register const char *   strp;
-int * const     nump;
-const int       min;
-const int       max;
+getnum(register const char * strp, int * const nump, const int min, const int max)
 {
     register char   c;
     register int    num;
@@ -738,9 +719,7 @@
 */
 
 static const char *
-getsecs(strp, secsp)
-register const char *   strp;
-long * const        secsp;
+getsecs(register const char *strp, int_fast32_t *const secsp)
 {
     int num;
 
@@ -753,7 +732,7 @@
     strp = getnum(strp, &num, 0, HOURSPERDAY * DAYSPERWEEK - 1);
     if (strp == NULL)
         return NULL;
-    *secsp = num * (long) SECSPERHOUR;
+    *secsp = num * (int_fast32_t) SECSPERHOUR;
     if (*strp == ':') {
         ++strp;
         strp = getnum(strp, &num, 0, MINSPERHOUR - 1);
@@ -780,9 +759,7 @@
 */
 
 static const char *
-getoffset(strp, offsetp)
-register const char *   strp;
-long * const        offsetp;
+getoffset(register const char *strp, int_fast32_t *const offsetp)
 {
     register int    neg = 0;
 
@@ -807,9 +784,7 @@
 */
 
 static const char *
-getrule(strp, rulep)
-const char *            strp;
-register struct rule * const    rulep;
+getrule(const char * strp, register struct rule * const rulep)
 {
     if (*strp == 'J') {
         /*
@@ -861,11 +836,8 @@
 */
 
 static time_t
-transtime(janfirst, year, rulep, offset)
-const time_t                janfirst;
-const int               year;
-register const struct rule * const  rulep;
-const long              offset;
+transtime(const time_t janfirst, const int year,
+          register const struct rule *const rulep, const int_fast32_t offset)
 {
     register int    leapyear;
     register time_t value;
@@ -956,21 +928,20 @@
 */
 
 static int
-tzparse(name, sp, lastditch)
-const char *            name;
-register struct state * const   sp;
-const int           lastditch;
+tzparse(const char * name, register struct state * const sp,
+        const int lastditch)
 {
     const char *            stdname;
     const char *            dstname;
     size_t              stdlen;
     size_t              dstlen;
-    long                stdoffset;
-    long                dstoffset;
-    register time_t *       atp;
+    int_fast32_t                stdoffset;
+    int_fast32_t                dstoffset;
+    register time_t *           atp;
     register unsigned char *    typep;
     register char *         cp;
     register int            load_result;
+    static struct ttinfo    zttinfo;
 
     INITIALIZE(dstname);
     stdname = name;
@@ -1002,7 +973,6 @@
     load_result = tzload(TZDEFRULES, sp, FALSE);
     if (load_result != 0)
         sp->leapcnt = 0;        /* so, we're off a little */
-    sp->timecnt = 0;
     if (*name != '\0') {
         if (*name == '<') {
             dstname = ++name;
@@ -1044,6 +1014,7 @@
             /*
             ** Two transitions per year, from EPOCH_YEAR forward.
             */
+            sp->ttis[0] = sp->ttis[1] = zttinfo;
             sp->ttis[0].tt_gmtoff = -dstoffset;
             sp->ttis[0].tt_isdst = 1;
             sp->ttis[0].tt_abbrind = stdlen + 1;
@@ -1053,6 +1024,7 @@
             atp = sp->ats;
             typep = sp->types;
             janfirst = 0;
+            sp->timecnt = 0;
             for (year = EPOCH_YEAR;
                 sp->timecnt + 2 <= TZ_MAX_TIMES;
                 ++year) {
@@ -1082,9 +1054,9 @@
                 janfirst = newfirst;
             }
         } else {
-            register long   theirstdoffset;
-            register long   theirdstoffset;
-            register long   theiroffset;
+            register int_fast32_t   theirstdoffset;
+            register int_fast32_t   theirdstoffset;
+            register int_fast32_t   theiroffset;
             register int    isdst;
             register int    i;
             register int    j;
@@ -1156,8 +1128,8 @@
             }
             /*
             ** Finally, fill in ttis.
-            ** ttisstd and ttisgmt need not be handled.
             */
+            sp->ttis[0] = sp->ttis[1] = zttinfo;
             sp->ttis[0].tt_gmtoff = -stdoffset;
             sp->ttis[0].tt_isdst = FALSE;
             sp->ttis[0].tt_abbrind = 0;
@@ -1170,6 +1142,7 @@
         dstlen = 0;
         sp->typecnt = 1;        /* only standard time */
         sp->timecnt = 0;
+        sp->ttis[0] = zttinfo;
         sp->ttis[0].tt_gmtoff = -stdoffset;
         sp->ttis[0].tt_isdst = 0;
         sp->ttis[0].tt_abbrind = 0;
@@ -1191,15 +1164,21 @@
 }
 
 static void
-gmtload(sp)
-struct state * const    sp;
+gmtload(struct state * const sp)
 {
     if (tzload(gmt, sp, TRUE) != 0)
         (void) tzparse(gmt, sp, TRUE);
 }
 
-static void
-tzsetwall P((void))
+#ifndef STD_INSPIRED
+/*
+** A non-static declaration of tzsetwall in a system header file
+** may cause a warning about this upcoming static declaration...
+*/
+static
+#endif /* !defined STD_INSPIRED */
+void
+tzsetwall(void)
 {
     if (lcl_is_set < 0)
         return;
@@ -1207,29 +1186,32 @@
 
 #ifdef ALL_STATE
     if (lclptr == NULL) {
-        lclptr = (struct state *) malloc(sizeof *lclptr);
+        lclptr = calloc(1, sizeof *lclptr);
         if (lclptr == NULL) {
             settzname();    /* all we can do */
             return;
         }
     }
 #endif /* defined ALL_STATE */
-    if (tzload((char *) NULL, lclptr, TRUE) != 0)
+    if (tzload(NULL, lclptr, TRUE) != 0)
         gmtload(lclptr);
     settzname();
 }
 
+#include <sys/system_properties.h> // For __system_property_get.
+
 static void
-tzset_locked P((void))
+tzset_locked(void)
 {
     register const char *   name = NULL;
-    static char buf[PROP_VALUE_MAX];
 
     name = getenv("TZ");
 
     // try the "persist.sys.timezone" system property first
-    if (name == NULL && __system_property_get("persist.sys.timezone", buf) > 0)
+    static char buf[PROP_VALUE_MAX];
+    if (name == NULL && __system_property_get("persist.sys.timezone", buf) > 0) {
         name = buf;
+    }
 
     if (name == NULL) {
         tzsetwall();
@@ -1244,7 +1226,7 @@
 
 #ifdef ALL_STATE
     if (lclptr == NULL) {
-        lclptr = (struct state *) malloc(sizeof *lclptr);
+        lclptr = calloc(1, sizeof *lclptr);
         if (lclptr == NULL) {
             settzname();    /* all we can do */
             return;
@@ -1269,7 +1251,7 @@
 }
 
 void
-tzset P((void))
+tzset(void)
 {
     _tzLock();
     tzset_locked();
@@ -1287,11 +1269,8 @@
 
 /*ARGSUSED*/
 static struct tm *
-localsub(timep, offset, tmp, sp) // android-changed: added sp.
-const time_t * const    timep;
-const long      offset;
-struct tm * const   tmp;
-const struct state * sp; // android-added: added sp.
+localsub(const time_t * const timep, const int_fast32_t offset,
+         struct tm * const tmp, const struct state * sp) // android-changed: added sp.
 {
     register const struct ttinfo *  ttisp;
     register int            i;
@@ -1347,12 +1326,7 @@
             return result;
     }
     if (sp->timecnt == 0 || t < sp->ats[0]) {
-        i = 0;
-        while (sp->ttis[i].tt_isdst)
-            if (++i >= sp->typecnt) {
-                i = 0;
-                break;
-            }
+        i = sp->defaulttype;
     } else {
         register int    lo = 1;
         register int    hi = sp->timecnt;
@@ -1383,8 +1357,7 @@
 }
 
 struct tm *
-localtime(timep)
-const time_t * const    timep;
+localtime(const time_t * const timep)
 {
     return localtime_r(timep, &tmGlobal);
 }
@@ -1394,9 +1367,7 @@
 */
 
 struct tm *
-localtime_r(timep, tmp)
-const time_t * const    timep;
-struct tm *     tmp;
+localtime_r(const time_t * const timep, struct tm * tmp)
 {
     struct tm*  result;
 
@@ -1413,11 +1384,8 @@
 */
 
 static struct tm *
-gmtsub(timep, offset, tmp, sp) // android-changed: added sp.
-const time_t * const    timep;
-const long      offset;
-struct tm * const   tmp;
-const struct state * sp; // android-changed: added sp.
+gmtsub(const time_t * const timep, const int_fast32_t offset,
+       struct tm *const tmp, const struct state * sp) // android-changed: added sp.
 {
     register struct tm *    result;
 
@@ -1426,7 +1394,7 @@
     if (!gmt_is_set) {
         gmt_is_set = TRUE;
 #ifdef ALL_STATE
-        gmtptr = (struct state *) malloc(sizeof *gmtptr);
+        gmtptr = calloc(1, sizeof *gmtptr);
         if (gmtptr != NULL)
 #endif /* defined ALL_STATE */
             gmtload(gmtptr);
@@ -1455,8 +1423,7 @@
 }
 
 struct tm *
-gmtime(timep)
-const time_t * const    timep;
+gmtime(const time_t * const timep)
 {
     return gmtime_r(timep, &tmGlobal);
 }
@@ -1466,9 +1433,7 @@
 */
 
 struct tm *
-gmtime_r(timep, tmp)
-const time_t * const    timep;
-struct tm *     tmp;
+gmtime_r(const time_t * const timep, struct tm * tmp)
 {
     struct tm*  result;
 
@@ -1479,45 +1444,30 @@
     return result;
 }
 
-#ifdef STD_INSPIRED
-#if 0 /* disabled because there is no good documentation for this function */
-struct tm *
-offtime(timep, offset)
-const time_t * const    timep;
-const long      offset;
-{
-    return gmtsub(timep, offset, &tmGlobal, NULL); // android-changed: extra parameter.
-}
-#endif /* 0 */
-#endif /* defined STD_INSPIRED */
-
 /*
 ** Return the number of leap years through the end of the given year
 ** where, to make the math easy, the answer for year zero is defined as zero.
 */
 
 static int
-leaps_thru_end_of(y)
-register const int  y;
+leaps_thru_end_of(register const int y)
 {
     return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
         -(leaps_thru_end_of(-(y + 1)) + 1);
 }
 
 static struct tm *
-timesub(timep, offset, sp, tmp)
-const time_t * const            timep;
-const long              offset;
-register const struct state * const sp;
-register struct tm * const      tmp;
+timesub(const time_t *const timep, const int_fast32_t offset,
+        register const struct state *const sp,
+        register struct tm *const tmp)
 {
     register const struct lsinfo *  lp;
     register time_t         tdays;
     register int            idays;  /* unsigned would be so 2003 */
-    register long           rem;
+    register int_fast64_t           rem;
     int             y;
     register const int *        ip;
-    register long           corr;
+    register int_fast64_t           corr;
     register int            hit;
     register int            i;
 
@@ -1574,9 +1524,10 @@
         y = newy;
     }
     {
-        register long   seconds;
+        register int_fast32_t   seconds;
+        register time_t half_second = 0.5;
 
-        seconds = tdays * SECSPERDAY + 0.5;
+        seconds = tdays * SECSPERDAY + half_second;
         tdays = seconds / SECSPERDAY;
         rem += seconds - tdays * SECSPERDAY;
     }
@@ -1639,8 +1590,7 @@
 }
 
 char *
-ctime(timep)
-const time_t * const    timep;
+ctime(const time_t * const timep)
 {
 /*
 ** Section 4.12.3.2 of X3.159-1989 requires that
@@ -1652,9 +1602,7 @@
 }
 
 char *
-ctime_r(timep, buf)
-const time_t * const    timep;
-char *          buf;
+ctime_r(const time_t * const timep, char * buf)
 {
     struct tm   mytm;
 
@@ -1675,77 +1623,65 @@
 #endif /* !defined WRONG */
 
 /*
-** Simplified normalize logic courtesy Paul Eggert.
+** Normalize logic courtesy Paul Eggert.
 */
 
 static int
-increment_overflow(number, delta)
-int *   number;
-int delta;
+increment_overflow(int *const ip, int j)
 {
-    unsigned  number0 = (unsigned)*number;
-    unsigned  number1 = (unsigned)(number0 + delta);
+	register int const	i = *ip;
 
-    *number = (int)number1;
-
-    if (delta >= 0) {
-        return ((int)number1 < (int)number0);
-    } else {
-        return ((int)number1 > (int)number0);
-    }
+	/*
+	** If i >= 0 there can only be overflow if i + j > INT_MAX
+	** or if j > INT_MAX - i; given i >= 0, INT_MAX - i cannot overflow.
+	** If i < 0 there can only be overflow if i + j < INT_MIN
+	** or if j < INT_MIN - i; given i < 0, INT_MIN - i cannot overflow.
+	*/
+	if ((i >= 0) ? (j > INT_MAX - i) : (j < INT_MIN - i))
+		return TRUE;
+	*ip += j;
+	return FALSE;
 }
 
 static int
-long_increment_overflow(number, delta)
-long *  number;
-int delta;
+increment_overflow32(int_fast32_t *const lp, int const m)
 {
-    unsigned long  number0 = (unsigned long)*number;
-    unsigned long  number1 = (unsigned long)(number0 + delta);
+	register int_fast32_t const	l = *lp;
 
-    *number = (long)number1;
-
-    if (delta >= 0) {
-        return ((long)number1 < (long)number0);
-    } else {
-        return ((long)number1 > (long)number0);
-    }
+	if ((l >= 0) ? (m > INT_FAST32_MAX - l) : (m < INT_FAST32_MIN - l))
+		return TRUE;
+	*lp += m;
+	return FALSE;
 }
 
 static int
-normalize_overflow(tensptr, unitsptr, base)
-int * const tensptr;
-int * const unitsptr;
-const int   base;
+normalize_overflow(int *const tensptr, int *const unitsptr, const int base)
 {
-    register int    tensdelta;
+	register int	tensdelta;
 
-    tensdelta = (*unitsptr >= 0) ?
-        (*unitsptr / base) :
-        (-1 - (-1 - *unitsptr) / base);
-    *unitsptr -= tensdelta * base;
-    return increment_overflow(tensptr, tensdelta);
+	tensdelta = (*unitsptr >= 0) ?
+		(*unitsptr / base) :
+		(-1 - (-1 - *unitsptr) / base);
+	*unitsptr -= tensdelta * base;
+	return increment_overflow(tensptr, tensdelta);
 }
 
 static int
-long_normalize_overflow(tensptr, unitsptr, base)
-long * const    tensptr;
-int * const unitsptr;
-const int   base;
+normalize_overflow32(int_fast32_t *const tensptr, int *const unitsptr,
+		     const int base)
 {
-    register int    tensdelta;
+	register int	tensdelta;
 
-    tensdelta = (*unitsptr >= 0) ?
-        (*unitsptr / base) :
-        (-1 - (-1 - *unitsptr) / base);
-    *unitsptr -= tensdelta * base;
-    return long_increment_overflow(tensptr, tensdelta);
+	tensdelta = (*unitsptr >= 0) ?
+		(*unitsptr / base) :
+		(-1 - (-1 - *unitsptr) / base);
+	*unitsptr -= tensdelta * base;
+	return increment_overflow32(tensptr, tensdelta);
 }
 
 static int
-tmcomp(atmp, btmp)
-register const struct tm * const atmp;
-register const struct tm * const btmp;
+tmcomp(register const struct tm * const atmp,
+       register const struct tm * const btmp)
 {
     register int    result;
 
@@ -1759,21 +1695,19 @@
 }
 
 static time_t
-time2sub(tmp, funcp, offset, okayp, do_norm_secs, sp) // android-changed: added sp
-struct tm * const   tmp;
-struct tm * (* const    funcp) P((const time_t*, long, struct tm*, const struct state*)); // android-changed: added state*
-const long      offset;
-int * const     okayp;
-const int       do_norm_secs;
-const struct state * sp; // android-changed: added sp
+time2sub(struct tm * const tmp,
+         struct tm *(*const funcp)(const time_t*, int_fast32_t, struct tm*, const struct state*),
+         const int_fast32_t offset,
+         int * const okayp,
+         const int do_norm_secs, const struct state * sp) // android-changed: added sp
 {
     register int            dir;
     register int            i, j;
     register int            saved_seconds;
-    register long           li;
+    register int_fast32_t           li;
     register time_t         lo;
     register time_t         hi;
-    long                y;
+    int_fast32_t                y;
     time_t              newt;
     time_t              t;
     struct tm           yourtm, mytm;
@@ -1790,16 +1724,16 @@
     if (normalize_overflow(&yourtm.tm_mday, &yourtm.tm_hour, HOURSPERDAY))
         return WRONG;
     y = yourtm.tm_year;
-    if (long_normalize_overflow(&y, &yourtm.tm_mon, MONSPERYEAR))
+    if (normalize_overflow32(&y, &yourtm.tm_mon, MONSPERYEAR))
         return WRONG;
     /*
     ** Turn y into an actual year number for now.
     ** It is converted back to an offset from TM_YEAR_BASE later.
     */
-    if (long_increment_overflow(&y, TM_YEAR_BASE))
+    if (increment_overflow32(&y, TM_YEAR_BASE))
         return WRONG;
     while (yourtm.tm_mday <= 0) {
-        if (long_increment_overflow(&y, -1))
+        if (increment_overflow32(&y, -1))
             return WRONG;
         li = y + (1 < yourtm.tm_mon);
         yourtm.tm_mday += year_lengths[isleap(li)];
@@ -1807,7 +1741,7 @@
     while (yourtm.tm_mday > DAYSPERLYEAR) {
         li = y + (1 < yourtm.tm_mon);
         yourtm.tm_mday -= year_lengths[isleap(li)];
-        if (long_increment_overflow(&y, 1))
+        if (increment_overflow32(&y, 1))
             return WRONG;
     }
     for ( ; ; ) {
@@ -1817,11 +1751,11 @@
         yourtm.tm_mday -= i;
         if (++yourtm.tm_mon >= MONSPERYEAR) {
             yourtm.tm_mon = 0;
-            if (long_increment_overflow(&y, 1))
+            if (increment_overflow32(&y, 1))
                 return WRONG;
         }
     }
-    if (long_increment_overflow(&y, -TM_YEAR_BASE))
+    if (increment_overflow32(&y, -TM_YEAR_BASE))
         return WRONG;
     yourtm.tm_year = y;
     if (yourtm.tm_year != y)
@@ -1878,14 +1812,14 @@
         } else  dir = tmcomp(&mytm, &yourtm);
         if (dir != 0) {
             if (t == lo) {
-                if (t == TIME_T_MAX)
-                    return WRONG;
                 ++t;
+                if (t <= lo)
+                    return WRONG;
                 ++lo;
             } else if (t == hi) {
-                if (t == TIME_T_MIN)
-                    return WRONG;
                 --t;
+                if (t >= hi)
+                    return WRONG;
                 --hi;
             }
             if (lo > hi)
@@ -1903,14 +1837,10 @@
         ** It's okay to guess wrong since the guess
         ** gets checked.
         */
-        /*
-        ** The (void *) casts are the benefit of SunOS 3.3 on Sun 2's.
-        */
         // BEGIN android-changed: support user-supplied sp
         if (sp == NULL) {
             sp = (const struct state *)
-                (((void *) funcp == (void *) localsub) ?
-                lclptr : gmtptr);
+                ((funcp == localsub) ? lclptr : gmtptr);
         }
         // END android-changed
 #ifdef ALL_STATE
@@ -1950,14 +1880,11 @@
     return t;
 }
 
-// BEGIN android-changed: added sp.
 static time_t
-time2(tmp, funcp, offset, okayp, sp)
-struct tm * const   tmp;
-struct tm * (* const    funcp) P((const time_t*, long, struct tm*, const struct state*));
-const long      offset;
-int * const     okayp;
-const struct state * sp;
+time2(struct tm * const tmp,
+      struct tm * (*const funcp)(const time_t *, int_fast32_t, struct tm *, const struct state *), // android-changed: added sp.
+      const int_fast32_t offset,
+      int *const okayp, const struct state* sp) // android-changed: added sp.
 {
     time_t  t;
 
@@ -1969,14 +1896,11 @@
     t = time2sub(tmp, funcp, offset, okayp, FALSE, sp);
     return *okayp ? t : time2sub(tmp, funcp, offset, okayp, TRUE, sp);
 }
-// END android-changed
 
 static time_t
-time1(tmp, funcp, offset, sp) // android-changed: added sp.
-struct tm * const   tmp;
-struct tm * (* const    funcp) P((const time_t *, long, struct tm *, const struct state *));
-const long      offset;
-const struct state * sp; // android-changed: added sp.
+time1(struct tm * const tmp,
+      struct tm * (* const funcp) (const time_t *, int_fast32_t, struct tm *, const struct state *), // android-changed: added sp.
+      const int_fast32_t offset, const struct state * sp) // android-changed: added sp.
 {
     register time_t         t;
     register int            samei, otheri;
@@ -1987,6 +1911,10 @@
     int             types[TZ_MAX_TYPES];
     int             okay;
 
+    if (tmp == NULL) {
+        errno = EINVAL;
+        return WRONG;
+    }
     if (tmp->tm_isdst > 1)
         tmp->tm_isdst = 1;
     t = time2(tmp, funcp, offset, &okay, sp); // android-changed: added sp.
@@ -2009,13 +1937,9 @@
     ** We try to divine the type they started from and adjust to the
     ** type they need.
     */
-    /*
-    ** The (void *) casts are the benefit of SunOS 3.3 on Sun 2's.
-    */
     // BEGIN android-changed: support user-supplied sp.
     if (sp == NULL) {
-        sp = (const struct state *) (((void *) funcp == (void *) localsub) ?
-            lclptr : gmtptr);
+        sp = (const struct state *) ((funcp == localsub) ?  lclptr : gmtptr);
     }
     // BEGIN android-changed
 #ifdef ALL_STATE
@@ -2053,19 +1977,244 @@
 }
 
 time_t
-mktime(tmp)
-struct tm * const   tmp;
+mktime(struct tm * const tmp)
 {
-    time_t  result;
     _tzLock();
     tzset_locked();
-    result = time1(tmp, localsub, 0L, NULL); // android-changed: extra parameter.
+    time_t result = time1(tmp, localsub, 0L, NULL); // android-changed: extra parameter.
     _tzUnlock();
     return result;
 }
 
+#ifdef STD_INSPIRED
+
+time_t
+timelocal(struct tm * const tmp)
+{
+    if (tmp != NULL)
+        tmp->tm_isdst = -1; /* in case it wasn't initialized */
+    return mktime(tmp);
+}
+
+time_t
+timegm(struct tm * const tmp)
+{
+    time_t  result;
+
+    if (tmp != NULL)
+        tmp->tm_isdst = 0;
+    _tzLock();
+    result = time1(tmp, gmtsub, 0L, NULL); // android-changed: extra parameter.
+    _tzUnlock();
+
+    return result;
+}
+
+#endif /* defined STD_INSPIRED */
+
+#ifdef CMUCS
+
+/*
+** The following is supplied for compatibility with
+** previous versions of the CMUCS runtime library.
+*/
+
+int_fast32_t
+gtime(struct tm * const tmp)
+{
+    const time_t    t = mktime(tmp);
+
+    if (t == WRONG)
+        return -1;
+    return t;
+}
+
+#endif /* defined CMUCS */
+
+/*
+** XXX--is the below the right way to conditionalize??
+*/
+
+#ifdef STD_INSPIRED
+
+/*
+** IEEE Std 1003.1-1988 (POSIX) legislates that 536457599
+** shall correspond to "Wed Dec 31 23:59:59 UTC 1986", which
+** is not the case if we are accounting for leap seconds.
+** So, we provide the following conversion routines for use
+** when exchanging timestamps with POSIX conforming systems.
+*/
+
+static int_fast64_t
+leapcorr(time_t * timep)
+{
+    register struct state *     sp;
+    register struct lsinfo *    lp;
+    register int            i;
+
+    sp = lclptr;
+    i = sp->leapcnt;
+    while (--i >= 0) {
+        lp = &sp->lsis[i];
+        if (*timep >= lp->ls_trans)
+            return lp->ls_corr;
+    }
+    return 0;
+}
+
+time_t
+time2posix(time_t t)
+{
+    tzset();
+    return t - leapcorr(&t);
+}
+
+time_t
+posix2time(time_t t)
+{
+    time_t  x;
+    time_t  y;
+
+    tzset();
+    /*
+    ** For a positive leap second hit, the result
+    ** is not unique. For a negative leap second
+    ** hit, the corresponding time doesn't exist,
+    ** so we return an adjacent second.
+    */
+    x = t + leapcorr(&t);
+    y = x - leapcorr(&x);
+    if (y < t) {
+        do {
+            x++;
+            y = x - leapcorr(&x);
+        } while (y < t);
+        if (t != y)
+            return x - 1;
+    } else if (y > t) {
+        do {
+            --x;
+            y = x - leapcorr(&x);
+        } while (y > t);
+        if (t != y)
+            return x + 1;
+    }
+    return x;
+}
+
+#endif /* defined STD_INSPIRED */
+
 // BEGIN android-added
 
+#include <assert.h>
+#include <stdint.h>
+#include <arpa/inet.h> // For ntohl(3).
+
+static int to_int(unsigned char* s) {
+  return (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
+}
+
+static int __bionic_open_tzdata_path(const char* path, const char* olson_id, int* data_size) {
+  int fd = TEMP_FAILURE_RETRY(open(path, OPEN_MODE));
+  if (fd == -1) {
+    XLOG(("%s: could not open \"%s\": %s\n", __FUNCTION__, path, strerror(errno)));
+    return -2; // Distinguish failure to find any data from failure to find a specific id.
+  }
+
+  // byte[12] tzdata_version  -- "tzdata2012f\0"
+  // int index_offset
+  // int data_offset
+  // int zonetab_offset
+  struct bionic_tzdata_header {
+    char tzdata_version[12];
+    int32_t index_offset;
+    int32_t data_offset;
+    int32_t zonetab_offset;
+  } header;
+  memset(&header, 0, sizeof(header));
+  ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd, &header, sizeof(header)));
+  if (bytes_read != sizeof(header)) {
+    fprintf(stderr, "%s: could not read header of \"%s\": %s\n",
+            __FUNCTION__, path, (bytes_read == -1) ? strerror(errno) : "short read");
+    close(fd);
+    return -1;
+  }
+
+  if (strncmp(header.tzdata_version, "tzdata", 6) != 0 || header.tzdata_version[11] != 0) {
+    fprintf(stderr, "%s: bad magic in \"%s\": \"%.6s\"\n",
+            __FUNCTION__, path, header.tzdata_version);
+    close(fd);
+    return -1;
+  }
+
+#if 0
+  fprintf(stderr, "version: %s\n", header.tzdata_version);
+  fprintf(stderr, "index_offset = %d\n", ntohl(header.index_offset));
+  fprintf(stderr, "data_offset = %d\n", ntohl(header.data_offset));
+  fprintf(stderr, "zonetab_offset = %d\n", ntohl(header.zonetab_offset));
+#endif
+
+  if (TEMP_FAILURE_RETRY(lseek(fd, ntohl(header.index_offset), SEEK_SET)) == -1) {
+    fprintf(stderr, "%s: couldn't seek to index in \"%s\": %s\n",
+            __FUNCTION__, path, strerror(errno));
+    close(fd);
+    return -1;
+  }
+
+  off_t specific_zone_offset = -1;
+
+  static const size_t NAME_LENGTH = 40;
+  unsigned char buf[NAME_LENGTH + 3 * sizeof(int32_t)];
+
+  size_t id_count = (ntohl(header.data_offset) - ntohl(header.index_offset)) / sizeof(buf);
+  for (size_t i = 0; i < id_count; ++i) {
+    if (TEMP_FAILURE_RETRY(read(fd, buf, sizeof(buf))) != (ssize_t) sizeof(buf)) {
+      break;
+    }
+
+    char this_id[NAME_LENGTH + 1];
+    memcpy(this_id, buf, NAME_LENGTH);
+    this_id[NAME_LENGTH] = '\0';
+
+    if (strcmp(this_id, olson_id) == 0) {
+      specific_zone_offset = to_int(buf + NAME_LENGTH) + ntohl(header.data_offset);
+      *data_size = to_int(buf + NAME_LENGTH + sizeof(int32_t));
+      break;
+    }
+  }
+
+  if (specific_zone_offset == -1) {
+    XLOG(("%s: couldn't find zone \"%s\"\n", __FUNCTION__, olson_id));
+    close(fd);
+    return -1;
+  }
+
+  if (TEMP_FAILURE_RETRY(lseek(fd, specific_zone_offset, SEEK_SET)) == -1) {
+    fprintf(stderr, "%s: could not seek to %ld in \"%s\": %s\n",
+            __FUNCTION__, specific_zone_offset, path, strerror(errno));
+    close(fd);
+    return -1;
+  }
+
+  // TODO: check that there's TZ_MAGIC at this offset, so we can fall back to the other file if not.
+
+  return fd;
+}
+
+static int __bionic_open_tzdata(const char* olson_id, int* data_size) {
+  // TODO: use $ANDROID_DATA and $ANDROID_ROOT like libcore, to support bionic on the host.
+  int fd = __bionic_open_tzdata_path("/data/misc/zoneinfo/tzdata", olson_id, data_size);
+  if (fd < 0) {
+    fd = __bionic_open_tzdata_path("/system/usr/share/zoneinfo/tzdata", olson_id, data_size);
+    if (fd == -2) {
+      // The first thing that 'recovery' does is try to format the current time. It doesn't have
+      // any tzdata available, so we must not abort here --- doing so breaks the recovery image!
+      fprintf(stderr, "%s: couldn't find any tzdata when looking for %s!\n", __FUNCTION__, olson_id);
+    }
+  }
+  return fd;
+}
+
 // Caches the most recent timezone (http://b/8270865).
 static int __bionic_tzload_cached(const char* name, struct state* const sp, const int doextend) {
   _tzLock();
@@ -2115,239 +2264,3 @@
 }
 
 // END android-added
-
-#ifdef STD_INSPIRED
-
-time_t
-timelocal(tmp)
-struct tm * const   tmp;
-{
-    tmp->tm_isdst = -1; /* in case it wasn't initialized */
-    return mktime(tmp);
-}
-
-time_t
-timegm(tmp)
-struct tm * const   tmp;
-{
-    time_t  result;
-
-    tmp->tm_isdst = 0;
-    _tzLock();
-    result = time1(tmp, gmtsub, 0L, NULL); // android-changed: extra parameter.
-    _tzUnlock();
-
-    return result;
-}
-
-#if 0 /* disable due to lack of clear documentation on this function */
-time_t
-timeoff(tmp, offset)
-struct tm * const   tmp;
-const long      offset;
-{
-    time_t  result;
-
-    tmp->tm_isdst = 0;
-    _tzLock();
-    result = time1(tmp, gmtsub, offset, NULL); // android-changed: extra parameter.
-    _tzUnlock();
-
-    return result;
-}
-#endif /* 0 */
-
-#endif /* defined STD_INSPIRED */
-
-#ifdef CMUCS
-
-/*
-** The following is supplied for compatibility with
-** previous versions of the CMUCS runtime library.
-*/
-
-long
-gtime(tmp)
-struct tm * const   tmp;
-{
-    const time_t    t = mktime(tmp);
-
-    if (t == WRONG)
-        return -1;
-    return t;
-}
-
-#endif /* defined CMUCS */
-
-/*
-** XXX--is the below the right way to conditionalize??
-*/
-
-#ifdef STD_INSPIRED
-
-/*
-** IEEE Std 1003.1-1988 (POSIX) legislates that 536457599
-** shall correspond to "Wed Dec 31 23:59:59 UTC 1986", which
-** is not the case if we are accounting for leap seconds.
-** So, we provide the following conversion routines for use
-** when exchanging timestamps with POSIX conforming systems.
-*/
-
-static long
-leapcorr(timep)
-time_t *    timep;
-{
-    register struct state *     sp;
-    register struct lsinfo *    lp;
-    register int            i;
-
-    sp = lclptr;
-    i = sp->leapcnt;
-    while (--i >= 0) {
-        lp = &sp->lsis[i];
-        if (*timep >= lp->ls_trans)
-            return lp->ls_corr;
-    }
-    return 0;
-}
-
-time_t
-time2posix(t)
-time_t  t;
-{
-    tzset();
-    return t - leapcorr(&t);
-}
-
-time_t
-posix2time(t)
-time_t  t;
-{
-    time_t  x;
-    time_t  y;
-
-    tzset();
-    /*
-    ** For a positive leap second hit, the result
-    ** is not unique. For a negative leap second
-    ** hit, the corresponding time doesn't exist,
-    ** so we return an adjacent second.
-    */
-    x = t + leapcorr(&t);
-    y = x - leapcorr(&x);
-    if (y < t) {
-        do {
-            x++;
-            y = x - leapcorr(&x);
-        } while (y < t);
-        if (t != y)
-            return x - 1;
-    } else if (y > t) {
-        do {
-            --x;
-            y = x - leapcorr(&x);
-        } while (y > t);
-        if (t != y)
-            return x + 1;
-    }
-    return x;
-}
-
-#endif /* defined STD_INSPIRED */
-
-#include <assert.h>
-#include <stdint.h>
-#include <arpa/inet.h> // For ntohl(3).
-
-static int __bionic_open_tzdata_path(const char* path, const char* olson_id, int* data_size) {
-  int fd = TEMP_FAILURE_RETRY(open(path, OPEN_MODE));
-  if (fd == -1) {
-    XLOG(("%s: could not open \"%s\": %s\n", __FUNCTION__, path, strerror(errno)));
-    return -2; // Distinguish failure to find any data from failure to find a specific id.
-  }
-
-  // byte[12] tzdata_version  -- "tzdata2012f\0"
-  // int index_offset
-  // int data_offset
-  // int zonetab_offset
-  struct bionic_tzdata_header {
-    char tzdata_version[12];
-    int32_t index_offset;
-    int32_t data_offset;
-    int32_t zonetab_offset;
-  } header;
-  if (TEMP_FAILURE_RETRY(read(fd, &header, sizeof(header))) != sizeof(header)) {
-    fprintf(stderr, "%s: could not read header: %s\n", __FUNCTION__, strerror(errno));
-    close(fd);
-    return -1;
-  }
-
-  if (strncmp(header.tzdata_version, "tzdata", 6) != 0 || header.tzdata_version[11] != 0) {
-    fprintf(stderr, "%s: bad magic: %s\n", __FUNCTION__, header.tzdata_version);
-    close(fd);
-    return -1;
-  }
-
-#if 0
-  fprintf(stderr, "version: %s\n", header.tzdata_version);
-  fprintf(stderr, "index_offset = %d\n", ntohl(header.index_offset));
-  fprintf(stderr, "data_offset = %d\n", ntohl(header.data_offset));
-  fprintf(stderr, "zonetab_offset = %d\n", ntohl(header.zonetab_offset));
-#endif
-
-  if (TEMP_FAILURE_RETRY(lseek(fd, ntohl(header.index_offset), SEEK_SET)) == -1) {
-    fprintf(stderr, "%s: couldn't seek to index: %s\n", __FUNCTION__, strerror(errno));
-    close(fd);
-    return -1;
-  }
-
-  off_t specific_zone_offset = -1;
-
-  static const size_t NAME_LENGTH = 40;
-  unsigned char buf[NAME_LENGTH + 3 * sizeof(int32_t)];
-
-  size_t id_count = (ntohl(header.data_offset) - ntohl(header.index_offset)) / sizeof(buf);
-  for (size_t i = 0; i < id_count; ++i) {
-    if (TEMP_FAILURE_RETRY(read(fd, buf, sizeof(buf))) != (ssize_t) sizeof(buf)) {
-      break;
-    }
-
-    char this_id[NAME_LENGTH + 1];
-    memcpy(this_id, buf, NAME_LENGTH);
-    this_id[NAME_LENGTH] = '\0';
-
-    if (strcmp(this_id, olson_id) == 0) {
-      specific_zone_offset = toint(buf + NAME_LENGTH) + ntohl(header.data_offset);
-      *data_size = toint(buf + NAME_LENGTH + sizeof(int32_t));
-      break;
-    }
-  }
-
-  if (specific_zone_offset == -1) {
-    XLOG(("%s: couldn't find zone \"%s\"\n", __FUNCTION__, olson_id));
-    close(fd);
-    return -1;
-  }
-
-  if (TEMP_FAILURE_RETRY(lseek(fd, specific_zone_offset, SEEK_SET)) == -1) {
-    fprintf(stderr, "%s: could not seek to %ld: %s\n", __FUNCTION__, specific_zone_offset, strerror(errno));
-    close(fd);
-    return -1;
-  }
-
-  return fd;
-}
-
-static int __bionic_open_tzdata(const char* olson_id, int* data_size) {
-  // TODO: use $ANDROID_DATA and $ANDROID_ROOT like libcore, to support bionic on the host.
-  int fd = __bionic_open_tzdata_path("/data/misc/zoneinfo/tzdata", olson_id, data_size);
-  if (fd < 0) {
-    fd = __bionic_open_tzdata_path("/system/usr/share/zoneinfo/tzdata", olson_id, data_size);
-    if (fd == -2) {
-      // The first thing that 'recovery' does is try to format the current time. It doesn't have
-      // any tzdata available, so we must not abort here --- doing so breaks the recovery image!
-      fprintf(stderr, "%s: couldn't find any tzdata when looking for %s!\n", __FUNCTION__, olson_id);
-    }
-  }
-  return fd;
-}
diff --git a/libc/tzcode/private.h b/libc/tzcode/private.h
index e82a655..a31a26e 100644
--- a/libc/tzcode/private.h
+++ b/libc/tzcode/private.h
@@ -15,17 +15,7 @@
 ** Thank you!
 */
 
-/*
-** ID
-*/
-
-#ifndef lint
-#ifndef NOID
-static char privatehid[] = "@(#)private.h   8.2";
-#endif /* !defined NOID */
-#endif /* !defined lint */
-
-#define GRANDPARENTED   "Local time zone must be set--see zic manual page"
+#define GRANDPARENTED	"Local time zone must be set--see zic manual page"
 
 /*
 ** Defaults for preprocessor symbols.
@@ -33,45 +23,45 @@
 */
 
 #ifndef HAVE_ADJTIME
-#define HAVE_ADJTIME        1
+#define HAVE_ADJTIME		1
 #endif /* !defined HAVE_ADJTIME */
 
 #ifndef HAVE_GETTEXT
-#define HAVE_GETTEXT        0
+#define HAVE_GETTEXT		0
 #endif /* !defined HAVE_GETTEXT */
 
 #ifndef HAVE_INCOMPATIBLE_CTIME_R
-#define HAVE_INCOMPATIBLE_CTIME_R   0
+#define HAVE_INCOMPATIBLE_CTIME_R	0
 #endif /* !defined INCOMPATIBLE_CTIME_R */
 
 #ifndef HAVE_SETTIMEOFDAY
-#define HAVE_SETTIMEOFDAY   3
+#define HAVE_SETTIMEOFDAY	3
 #endif /* !defined HAVE_SETTIMEOFDAY */
 
-#ifndef HAVE_STRERROR
-#define HAVE_STRERROR       1
-#endif /* !defined HAVE_STRERROR */
-
 #ifndef HAVE_SYMLINK
-#define HAVE_SYMLINK        1
+#define HAVE_SYMLINK		1
 #endif /* !defined HAVE_SYMLINK */
 
 #ifndef HAVE_SYS_STAT_H
-#define HAVE_SYS_STAT_H     1
+#define HAVE_SYS_STAT_H		1
 #endif /* !defined HAVE_SYS_STAT_H */
 
 #ifndef HAVE_SYS_WAIT_H
-#define HAVE_SYS_WAIT_H     1
+#define HAVE_SYS_WAIT_H		1
 #endif /* !defined HAVE_SYS_WAIT_H */
 
 #ifndef HAVE_UNISTD_H
-#define HAVE_UNISTD_H       1
+#define HAVE_UNISTD_H		1
 #endif /* !defined HAVE_UNISTD_H */
 
 #ifndef HAVE_UTMPX_H
-#define HAVE_UTMPX_H        0
+#define HAVE_UTMPX_H		0
 #endif /* !defined HAVE_UTMPX_H */
 
+#ifndef LOCALE_HOME
+#define LOCALE_HOME		"/usr/lib/locale"
+#endif /* !defined LOCALE_HOME */
+
 #if HAVE_INCOMPATIBLE_CTIME_R
 #define asctime_r _incompatible_asctime_r
 #define ctime_r _incompatible_ctime_r
@@ -81,11 +71,11 @@
 ** Nested includes
 */
 
-#include "sys/types.h"  /* for time_t */
+#include "sys/types.h"	/* for time_t */
 #include "stdio.h"
 #include "errno.h"
 #include "string.h"
-#include "limits.h" /* for CHAR_BIT et al. */
+#include "limits.h"	/* for CHAR_BIT et al. */
 #include "time.h"
 #include "stdlib.h"
 
@@ -94,28 +84,26 @@
 #endif /* HAVE_GETTEXT */
 
 #if HAVE_SYS_WAIT_H
-#include <sys/wait.h>   /* for WIFEXITED and WEXITSTATUS */
+#include <sys/wait.h>	/* for WIFEXITED and WEXITSTATUS */
 #endif /* HAVE_SYS_WAIT_H */
 
 #ifndef WIFEXITED
-#define WIFEXITED(status)   (((status) & 0xff) == 0)
+#define WIFEXITED(status)	(((status) & 0xff) == 0)
 #endif /* !defined WIFEXITED */
 #ifndef WEXITSTATUS
-#define WEXITSTATUS(status) (((status) >> 8) & 0xff)
+#define WEXITSTATUS(status)	(((status) >> 8) & 0xff)
 #endif /* !defined WEXITSTATUS */
 
 #if HAVE_UNISTD_H
-#include "unistd.h" /* for F_OK and R_OK */
+#include "unistd.h"	/* for F_OK, R_OK, and other POSIX goodness */
 #endif /* HAVE_UNISTD_H */
 
-#if !HAVE_UNISTD_H
 #ifndef F_OK
-#define F_OK    0
+#define F_OK	0
 #endif /* !defined F_OK */
 #ifndef R_OK
-#define R_OK    4
+#define R_OK	4
 #endif /* !defined R_OK */
-#endif /* !HAVE_UNISTD_H */
 
 /* Unlike <ctype.h>'s isdigit, this also works if c < 0 | c > UCHAR_MAX. */
 #define is_digit(c) ((unsigned)(c) - '0' <= 9)
@@ -128,27 +116,73 @@
 */
 #ifndef HAVE_STDINT_H
 #define HAVE_STDINT_H \
-    (199901 <= __STDC_VERSION__ || \
-    2 < (__GLIBC__ + (0 < __GLIBC_MINOR__)))
+	(199901 <= __STDC_VERSION__ || \
+	2 < (__GLIBC__ + (0 < __GLIBC_MINOR__)))
 #endif /* !defined HAVE_STDINT_H */
 
 #if HAVE_STDINT_H
 #include "stdint.h"
 #endif /* !HAVE_STDINT_H */
 
+#ifndef HAVE_INTTYPES_H
+# define HAVE_INTTYPES_H HAVE_STDINT_H
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+
 #ifndef INT_FAST64_MAX
 /* Pre-C99 GCC compilers define __LONG_LONG_MAX__ instead of LLONG_MAX.  */
 #if defined LLONG_MAX || defined __LONG_LONG_MAX__
-typedef long long   int_fast64_t;
+typedef long long	int_fast64_t;
+# ifdef LLONG_MAX
+#  define INT_FAST64_MIN LLONG_MIN
+#  define INT_FAST64_MAX LLONG_MAX
+# else
+#  define INT_FAST64_MIN __LONG_LONG_MIN__
+#  define INT_FAST64_MAX __LONG_LONG_MAX__
+# endif
+# define SCNdFAST64 "lld"
 #else /* ! (defined LLONG_MAX || defined __LONG_LONG_MAX__) */
 #if (LONG_MAX >> 31) < 0xffffffff
 Please use a compiler that supports a 64-bit integer type (or wider);
 you may need to compile with "-DHAVE_STDINT_H".
 #endif /* (LONG_MAX >> 31) < 0xffffffff */
-typedef long        int_fast64_t;
+typedef long		int_fast64_t;
+# define INT_FAST64_MIN LONG_MIN
+# define INT_FAST64_MAX LONG_MAX
+# define SCNdFAST64 "ld"
 #endif /* ! (defined LLONG_MAX || defined __LONG_LONG_MAX__) */
 #endif /* !defined INT_FAST64_MAX */
 
+#ifndef INT_FAST32_MAX
+# if INT_MAX >> 31 == 0
+typedef long int_fast32_t;
+# else
+typedef int int_fast32_t;
+# endif
+#endif
+
+#ifndef INTMAX_MAX
+# if defined LLONG_MAX || defined __LONG_LONG_MAX__
+typedef long long intmax_t;
+#  define PRIdMAX "lld"
+# else
+typedef long intmax_t;
+#  define PRIdMAX "ld"
+# endif
+#endif
+
+#ifndef UINTMAX_MAX
+# if defined ULLONG_MAX || defined __LONG_LONG_MAX__
+typedef unsigned long long uintmax_t;
+#  define PRIuMAX "llu"
+# else
+typedef unsigned long uintmax_t;
+#  define PRIuMAX "lu"
+# endif
+#endif
+
 #ifndef INT32_MAX
 #define INT32_MAX 0x7fffffff
 #endif /* !defined INT32_MAX */
@@ -156,111 +190,114 @@
 #define INT32_MIN (-1 - INT32_MAX)
 #endif /* !defined INT32_MIN */
 
+#if 2 < __GNUC__ + (96 <= __GNUC_MINOR__)
+# define ATTRIBUTE_CONST __attribute__ ((const))
+# define ATTRIBUTE_PURE __attribute__ ((__pure__))
+#else
+# define ATTRIBUTE_CONST /* empty */
+# define ATTRIBUTE_PURE /* empty */
+#endif
+
+#if !defined _Noreturn && __STDC_VERSION__ < 201112
+# if 2 < __GNUC__ + (8 <= __GNUC_MINOR__)
+#  define _Noreturn __attribute__ ((__noreturn__))
+# else
+#  define _Noreturn
+# endif
+#endif
+
+#if __STDC_VERSION__ < 199901 && !defined restrict
+# define restrict /* empty */
+#endif
+
 /*
 ** Workarounds for compilers/systems.
 */
 
 /*
-** If your compiler lacks prototypes, "#define P(x) ()".
-*/
-
-#ifndef P
-#define P(x)    x
-#endif /* !defined P */
-
-/*
-** SunOS 4.1.1 headers lack EXIT_SUCCESS.
-*/
-
-#ifndef EXIT_SUCCESS
-#define EXIT_SUCCESS    0
-#endif /* !defined EXIT_SUCCESS */
-
-/*
-** SunOS 4.1.1 headers lack EXIT_FAILURE.
-*/
-
-#ifndef EXIT_FAILURE
-#define EXIT_FAILURE    1
-#endif /* !defined EXIT_FAILURE */
-
-/*
-** SunOS 4.1.1 headers lack FILENAME_MAX.
-*/
-
-#ifndef FILENAME_MAX
-
-#ifndef MAXPATHLEN
-#ifdef unix
-#include "sys/param.h"
-#endif /* defined unix */
-#endif /* !defined MAXPATHLEN */
-
-#ifdef MAXPATHLEN
-#define FILENAME_MAX    MAXPATHLEN
-#endif /* defined MAXPATHLEN */
-#ifndef MAXPATHLEN
-#define FILENAME_MAX    1024        /* Pure guesswork */
-#endif /* !defined MAXPATHLEN */
-
-#endif /* !defined FILENAME_MAX */
-
-/*
-** SunOS 4.1.1 libraries lack remove.
-*/
-
-#ifndef remove
-extern int  unlink P((const char * filename));
-#define remove  unlink
-#endif /* !defined remove */
-
-/*
-** Some ancient errno.h implementations don't declare errno.
-** But some newer errno.h implementations define it as a macro.
-** Fix the former without affecting the latter.
-*/
-
-#ifndef errno
-extern int errno;
-#endif /* !defined errno */
-
-/*
 ** Some time.h implementations don't declare asctime_r.
 ** Others might define it as a macro.
 ** Fix the former without affecting the latter.
 */
 
 #ifndef asctime_r
-extern char *   asctime_r();
+extern char *	asctime_r(struct tm const *, char *);
+#endif
+
+/*
+** Compile with -Dtime_tz=T to build the tz package with a private
+** time_t type equivalent to T rather than the system-supplied time_t.
+** This debugging feature can test unusual design decisions
+** (e.g., time_t wider than 'long', or unsigned time_t) even on
+** typical platforms.
+*/
+#ifdef time_tz
+static time_t sys_time(time_t *x) { return time(x); }
+
+# undef  ctime
+# define ctime tz_ctime
+# undef  ctime_r
+# define ctime_r tz_ctime_r
+# undef  difftime
+# define difftime tz_difftime
+# undef  gmtime
+# define gmtime tz_gmtime
+# undef  gmtime_r
+# define gmtime_r tz_gmtime_r
+# undef  localtime
+# define localtime tz_localtime
+# undef  localtime_r
+# define localtime_r tz_localtime_r
+# undef  mktime
+# define mktime tz_mktime
+# undef  time
+# define time tz_time
+# undef  time_t
+# define time_t tz_time_t
+
+typedef time_tz time_t;
+
+char *ctime(time_t const *);
+char *ctime_r(time_t const *, char *);
+double difftime(time_t, time_t);
+struct tm *gmtime(time_t const *);
+struct tm *gmtime_r(time_t const *restrict, struct tm *restrict);
+struct tm *localtime(time_t const *);
+struct tm *localtime_r(time_t const *restrict, struct tm *restrict);
+time_t mktime(struct tm *);
+
+static time_t
+time(time_t *p)
+{
+	time_t r = sys_time(0);
+	if (p)
+		*p = r;
+	return r;
+}
 #endif
 
 /*
 ** Private function declarations.
 */
 
-char *      icalloc P((int nelem, int elsize));
-char *      icatalloc P((char * old, const char * new));
-char *      icpyalloc P((const char * string));
-char *      imalloc P((int n));
-void *      irealloc P((void * pointer, int size));
-void        icfree P((char * pointer));
-void        ifree P((char * pointer));
-const char *    scheck P((const char * string, const char * format));
+char *		icatalloc(char * old, const char * new);
+char *		icpyalloc(const char * string);
+const char *	scheck(const char * string, const char * format);
 
 /*
 ** Finally, some convenience items.
 */
 
 #ifndef TRUE
-#define TRUE    1
+#define TRUE	1
 #endif /* !defined TRUE */
 
 #ifndef FALSE
-#define FALSE   0
+#define FALSE	0
 #endif /* !defined FALSE */
 
 #ifndef TYPE_BIT
-#define TYPE_BIT(type)  (sizeof (type) * CHAR_BIT)
+#define TYPE_BIT(type)	(sizeof (type) * CHAR_BIT)
 #endif /* !defined TYPE_BIT */
 
 #ifndef TYPE_SIGNED
@@ -284,8 +321,8 @@
 ** add one more for a minus sign if the type is signed.
 */
 #define INT_STRLEN_MAXIMUM(type) \
-    ((TYPE_BIT(type) - TYPE_SIGNED(type)) * 302 / 1000 + \
-    1 + TYPE_SIGNED(type))
+	((TYPE_BIT(type) - TYPE_SIGNED(type)) * 302 / 1000 + \
+	1 + TYPE_SIGNED(type))
 #endif /* !defined INT_STRLEN_MAXIMUM */
 
 /*
@@ -305,7 +342,7 @@
 
 #ifndef INITIALIZE
 #ifdef GNUC_or_lint
-#define INITIALIZE(x)   ((x) = 0)
+#define INITIALIZE(x)	((x) = 0)
 #endif /* defined GNUC_or_lint */
 #ifndef GNUC_or_lint
 #define INITIALIZE(x)
@@ -333,12 +370,12 @@
 #if HAVE_INCOMPATIBLE_CTIME_R
 #undef asctime_r
 #undef ctime_r
-char *asctime_r P((struct tm const *, char *));
-char *ctime_r P((time_t const *, char *));
+char *asctime_r(struct tm const *, char *);
+char *ctime_r(time_t const *, char *);
 #endif /* HAVE_INCOMPATIBLE_CTIME_R */
 
 #ifndef YEARSPERREPEAT
-#define YEARSPERREPEAT      400 /* years before a Gregorian repeat */
+#define YEARSPERREPEAT		400	/* years before a Gregorian repeat */
 #endif /* !defined YEARSPERREPEAT */
 
 /*
@@ -346,15 +383,15 @@
 */
 
 #ifndef AVGSECSPERYEAR
-#define AVGSECSPERYEAR      31556952L
+#define AVGSECSPERYEAR		31556952L
 #endif /* !defined AVGSECSPERYEAR */
 
 #ifndef SECSPERREPEAT
-#define SECSPERREPEAT       ((int_fast64_t) YEARSPERREPEAT * (int_fast64_t) AVGSECSPERYEAR)
+#define SECSPERREPEAT		((int_fast64_t) YEARSPERREPEAT * (int_fast64_t) AVGSECSPERYEAR)
 #endif /* !defined SECSPERREPEAT */
- 
+
 #ifndef SECSPERREPEAT_BITS
-#define SECSPERREPEAT_BITS  34  /* ceil(log2(SECSPERREPEAT)) */
+#define SECSPERREPEAT_BITS	34	/* ceil(log2(SECSPERREPEAT)) */
 #endif /* !defined SECSPERREPEAT_BITS */
 
 /*
diff --git a/libc/tzcode/strftime.c b/libc/tzcode/strftime.c
index f6e7435..e92c44d 100644
--- a/libc/tzcode/strftime.c
+++ b/libc/tzcode/strftime.c
@@ -111,12 +111,12 @@
     "%a %b %e %H:%M:%S %Z %Y"
 };
 
-static char *   _add P((const char *, char *, const char *, int));
-static char *   _conv P((int, const char *, char *, const char *));
-static char *   _fmt P((const char *, const struct tm *, char *, const char *,
-            int *, const struct strftime_locale*));
-static char *   _yconv P((int, int, int, int, char *, const char *, int));
-static char *   getformat P((int, char *, char *, char *, char *));
+static char *   _add(const char *, char *, const char *, int);
+static char *   _conv(int, const char *, char *, const char *);
+static char *   _fmt(const char *, const struct tm *, char *, const char *,
+            int *, const struct strftime_locale*);
+static char *   _yconv(int, int, int, int, char *, const char *, int);
+static char *   getformat(int, char *, char *, char *, char *);
 
 extern char *   tzname[];
 
diff --git a/libc/tzcode/tzfile.h b/libc/tzcode/tzfile.h
index f6c9a05..d04fe04 100644
--- a/libc/tzcode/tzfile.h
+++ b/libc/tzcode/tzfile.h
@@ -16,72 +16,62 @@
 */
 
 /*
-** ID
-*/
-
-#ifndef lint
-#ifndef NOID
-static char tzfilehid[] = "@(#)tzfile.h 8.1";
-#endif /* !defined NOID */
-#endif /* !defined lint */
-
-/*
 ** Information about time zone files.
 */
 
 #ifndef TZDIR
-#define TZDIR "/system/usr/share/zoneinfo" /* Time zone object file directory */
+#define TZDIR	"/usr/local/etc/zoneinfo" /* Time zone object file directory */
 #endif /* !defined TZDIR */
 
 #ifndef TZDEFAULT
-#define TZDEFAULT   "localtime"
+#define TZDEFAULT	"localtime"
 #endif /* !defined TZDEFAULT */
 
 #ifndef TZDEFRULES
-#define TZDEFRULES  "posixrules"
+#define TZDEFRULES	"posixrules"
 #endif /* !defined TZDEFRULES */
 
 /*
 ** Each file begins with. . .
 */
 
-#define TZ_MAGIC    "TZif"
+#define	TZ_MAGIC	"TZif"
 
 struct tzhead {
-    char    tzh_magic[4];       /* TZ_MAGIC */
-    char    tzh_version[1];     /* '\0' or '2' as of 2005 */
-    char    tzh_reserved[15];   /* reserved--must be zero */
-    char    tzh_ttisgmtcnt[4];  /* coded number of trans. time flags */
-    char    tzh_ttisstdcnt[4];  /* coded number of trans. time flags */
-    char    tzh_leapcnt[4];     /* coded number of leap seconds */
-    char    tzh_timecnt[4];     /* coded number of transition times */
-    char    tzh_typecnt[4];     /* coded number of local time types */
-    char    tzh_charcnt[4];     /* coded number of abbr. chars */
+	char	tzh_magic[4];		/* TZ_MAGIC */
+	char	tzh_version[1];		/* '\0' or '2' as of 2005 */
+	char	tzh_reserved[15];	/* reserved--must be zero */
+	char	tzh_ttisgmtcnt[4];	/* coded number of trans. time flags */
+	char	tzh_ttisstdcnt[4];	/* coded number of trans. time flags */
+	char	tzh_leapcnt[4];		/* coded number of leap seconds */
+	char	tzh_timecnt[4];		/* coded number of transition times */
+	char	tzh_typecnt[4];		/* coded number of local time types */
+	char	tzh_charcnt[4];		/* coded number of abbr. chars */
 };
 
 /*
 ** . . .followed by. . .
 **
-**  tzh_timecnt (char [4])s     coded transition times a la time(2)
-**  tzh_timecnt (unsigned char)s    types of local time starting at above
-**  tzh_typecnt repetitions of
-**      one (char [4])      coded UTC offset in seconds
-**      one (unsigned char) used to set tm_isdst
-**      one (unsigned char) that's an abbreviation list index
-**  tzh_charcnt (char)s     '\0'-terminated zone abbreviations
-**  tzh_leapcnt repetitions of
-**      one (char [4])      coded leap second transition times
-**      one (char [4])      total correction after above
-**  tzh_ttisstdcnt (char)s      indexed by type; if TRUE, transition
-**                  time is standard time, if FALSE,
-**                  transition time is wall clock time
-**                  if absent, transition times are
-**                  assumed to be wall clock time
-**  tzh_ttisgmtcnt (char)s      indexed by type; if TRUE, transition
-**                  time is UTC, if FALSE,
-**                  transition time is local time
-**                  if absent, transition times are
-**                  assumed to be local time
+**	tzh_timecnt (char [4])s		coded transition times a la time(2)
+**	tzh_timecnt (unsigned char)s	types of local time starting at above
+**	tzh_typecnt repetitions of
+**		one (char [4])		coded UTC offset in seconds
+**		one (unsigned char)	used to set tm_isdst
+**		one (unsigned char)	that's an abbreviation list index
+**	tzh_charcnt (char)s		'\0'-terminated zone abbreviations
+**	tzh_leapcnt repetitions of
+**		one (char [4])		coded leap second transition times
+**		one (char [4])		total correction after above
+**	tzh_ttisstdcnt (char)s		indexed by type; if TRUE, transition
+**					time is standard time, if FALSE,
+**					transition time is wall clock time
+**					if absent, transition times are
+**					assumed to be wall clock time
+**	tzh_ttisgmtcnt (char)s		indexed by type; if TRUE, transition
+**					time is UTC, if FALSE,
+**					transition time is local time
+**					if absent, transition times are
+**					assumed to be local time
 */
 
 /*
@@ -100,81 +90,81 @@
 */
 
 #ifndef TZ_MAX_TIMES
-#define TZ_MAX_TIMES    1200
+#define TZ_MAX_TIMES	1200
 #endif /* !defined TZ_MAX_TIMES */
 
 #ifndef TZ_MAX_TYPES
 #ifndef NOSOLAR
-#define TZ_MAX_TYPES    256 /* Limited by what (unsigned char)'s can hold */
+#define TZ_MAX_TYPES	256 /* Limited by what (unsigned char)'s can hold */
 #endif /* !defined NOSOLAR */
 #ifdef NOSOLAR
 /*
 ** Must be at least 14 for Europe/Riga as of Jan 12 1995,
 ** as noted by Earl Chew.
 */
-#define TZ_MAX_TYPES    20  /* Maximum number of local time types */
+#define TZ_MAX_TYPES	20	/* Maximum number of local time types */
 #endif /* !defined NOSOLAR */
 #endif /* !defined TZ_MAX_TYPES */
 
 #ifndef TZ_MAX_CHARS
-#define TZ_MAX_CHARS    50  /* Maximum number of abbreviation characters */
-                /* (limited by what unsigned chars can hold) */
+#define TZ_MAX_CHARS	50	/* Maximum number of abbreviation characters */
+				/* (limited by what unsigned chars can hold) */
 #endif /* !defined TZ_MAX_CHARS */
 
 #ifndef TZ_MAX_LEAPS
-#define TZ_MAX_LEAPS    50  /* Maximum number of leap second corrections */
+#define TZ_MAX_LEAPS	50	/* Maximum number of leap second corrections */
 #endif /* !defined TZ_MAX_LEAPS */
 
-#define SECSPERMIN  60
-#define MINSPERHOUR 60
-#define HOURSPERDAY 24
-#define DAYSPERWEEK 7
-#define DAYSPERNYEAR    365
-#define DAYSPERLYEAR    366
-#define SECSPERHOUR (SECSPERMIN * MINSPERHOUR)
-#define SECSPERDAY  ((long) SECSPERHOUR * HOURSPERDAY)
-#define MONSPERYEAR 12
+#define SECSPERMIN	60
+#define MINSPERHOUR	60
+#define HOURSPERDAY	24
+#define DAYSPERWEEK	7
+#define DAYSPERNYEAR	365
+#define DAYSPERLYEAR	366
+#define SECSPERHOUR	(SECSPERMIN * MINSPERHOUR)
+#define SECSPERDAY	((int_fast32_t) SECSPERHOUR * HOURSPERDAY)
+#define MONSPERYEAR	12
 
-#define TM_SUNDAY   0
-#define TM_MONDAY   1
-#define TM_TUESDAY  2
-#define TM_WEDNESDAY    3
-#define TM_THURSDAY 4
-#define TM_FRIDAY   5
-#define TM_SATURDAY 6
+#define TM_SUNDAY	0
+#define TM_MONDAY	1
+#define TM_TUESDAY	2
+#define TM_WEDNESDAY	3
+#define TM_THURSDAY	4
+#define TM_FRIDAY	5
+#define TM_SATURDAY	6
 
-#define TM_JANUARY  0
-#define TM_FEBRUARY 1
-#define TM_MARCH    2
-#define TM_APRIL    3
-#define TM_MAY      4
-#define TM_JUNE     5
-#define TM_JULY     6
-#define TM_AUGUST   7
-#define TM_SEPTEMBER    8
-#define TM_OCTOBER  9
-#define TM_NOVEMBER 10
-#define TM_DECEMBER 11
+#define TM_JANUARY	0
+#define TM_FEBRUARY	1
+#define TM_MARCH	2
+#define TM_APRIL	3
+#define TM_MAY		4
+#define TM_JUNE		5
+#define TM_JULY		6
+#define TM_AUGUST	7
+#define TM_SEPTEMBER	8
+#define TM_OCTOBER	9
+#define TM_NOVEMBER	10
+#define TM_DECEMBER	11
 
-#define TM_YEAR_BASE    1900
+#define TM_YEAR_BASE	1900
 
-#define EPOCH_YEAR  1970
-#define EPOCH_WDAY  TM_THURSDAY
+#define EPOCH_YEAR	1970
+#define EPOCH_WDAY	TM_THURSDAY
 
 #define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
 
 /*
 ** Since everything in isleap is modulo 400 (or a factor of 400), we know that
-**  isleap(y) == isleap(y % 400)
+**	isleap(y) == isleap(y % 400)
 ** and so
-**  isleap(a + b) == isleap((a + b) % 400)
+**	isleap(a + b) == isleap((a + b) % 400)
 ** or
-**  isleap(a + b) == isleap(a % 400 + b % 400)
+**	isleap(a + b) == isleap(a % 400 + b % 400)
 ** This is true even if % means modulo rather than Fortran remainder
 ** (which is allowed by C89 but not C99).
 ** We use this to avoid addition overflow problems.
 */
 
-#define isleap_sum(a, b)    isleap((a) % 400 + (b) % 400)
+#define isleap_sum(a, b)	isleap((a) % 400 + (b) % 400)
 
 #endif /* !defined TZFILE_H */
diff --git a/libc/unistd/abort.c b/libc/unistd/abort.c
deleted file mode 100644
index 4dffbae..0000000
--- a/libc/unistd/abort.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*	$OpenBSD: abort.c,v 1.14 2005/08/08 08:05:36 espie Exp $ */
-/*
- * Copyright (c) 1985 Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <signal.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include "thread_private.h"
-#include "atexit.h"
-
-#ifdef __arm__
-__LIBC_HIDDEN__ void
-__libc_android_abort(void)
-#else
-void
-abort(void)
-#endif
-{
-	struct atexit *p = __atexit;
-	static int cleanup_called = 0;
-	sigset_t mask;
-
-
-	sigfillset(&mask);
-	/*
-	 * don't block SIGABRT to give any handler a chance; we ignore
-	 * any errors -- X311J doesn't allow abort to return anyway.
-	 */
-	sigdelset(&mask, SIGABRT);
-    /* temporary, so deliberate seg fault can be caught by debuggerd */
-	sigdelset(&mask, SIGSEGV);
-    /* -- */
-	(void)sigprocmask(SIG_SETMASK, &mask, (sigset_t *)NULL);
-
-	/*
-	 * POSIX requires we flush stdio buffers on abort
-	 */
-	if (cleanup_called == 0) {
-		while (p != NULL && p->next != NULL)
-			p = p->next;
-		/* the check for fn_dso == NULL is mostly paranoia */
-		if (p != NULL && p->fns[0].fn_dso == NULL &&
-		    p->fns[0].fn_ptr.std_func != NULL) {
-			cleanup_called = 1;
-			(*p->fns[0].fn_ptr.std_func)();
-		}
-	}
-
-    /* temporary, for bug hunting */
-    /* seg fault seems to produce better debuggerd results than SIGABRT */
-#ifdef __mips__
-    /* An access that will generate SIGSEGV rather than SIGBUS. */
-    *((char*)0xdeadc0c0) = 39;
-#else
-    *((char*)0xdeadbaad) = 39;
-#endif
-    /* -- */
-
-	(void)kill(getpid(), SIGABRT);
-
-	/*
-	 * if SIGABRT ignored, or caught and the handler returns, do
-	 * it again, only harder.
-	 */
-        {
-            struct sigaction sa;
-
-            sa.sa_handler = SIG_DFL;
-            sa.sa_flags   = SA_RESTART;
-            sigemptyset(&sa.sa_mask);
-
-            (void)sigaction( SIGABRT, &sa, &sa );
-        }
-
-	(void)sigprocmask(SIG_SETMASK, &mask, (sigset_t *)NULL);
-	(void)kill(getpid(), SIGABRT);
-	_exit(1);
-}
diff --git a/libc/upstream-freebsd/freebsd-compat.h b/libc/upstream-freebsd/freebsd-compat.h
index 08dec15..8030eea 100644
--- a/libc/upstream-freebsd/freebsd-compat.h
+++ b/libc/upstream-freebsd/freebsd-compat.h
@@ -17,4 +17,14 @@
 #ifndef _BIONIC_FREEBSD_COMPAT_H_included
 #define _BIONIC_FREEBSD_COMPAT_H_included
 
+#define __USE_BSD
+#define REPLACE_GETOPT
+
+#define _close close
+#define _fcntl fcntl
+#define _fstat fstat
+#define _open open
+
+#define _sseek __sseek /* Needed as long as we have a mix of OpenBSD and FreeBSD stdio. */
+
 #endif
diff --git a/libc/stdio/clrerr.c b/libc/upstream-freebsd/lib/libc/stdio/clrerr.c
similarity index 82%
rename from libc/stdio/clrerr.c
rename to libc/upstream-freebsd/lib/libc/stdio/clrerr.c
index cb6c4df..f161a6e 100644
--- a/libc/stdio/clrerr.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/clrerr.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: clrerr.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,9 +30,19 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)clrerr.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
-#include "local.h"
-#undef	clearerr
+#include "un-namespace.h"
+#include "libc_private.h"
+
+#undef clearerr
+#undef clearerr_unlocked
 
 void
 clearerr(FILE *fp)
@@ -42,3 +51,10 @@
 	__sclearerr(fp);
 	FUNLOCKFILE(fp);
 }
+
+void
+clearerr_unlocked(FILE *fp)
+{
+
+	__sclearerr(fp);
+}
diff --git a/libc/stdio/fclose.c b/libc/upstream-freebsd/lib/libc/stdio/fclose.c
similarity index 75%
rename from libc/stdio/fclose.c
rename to libc/upstream-freebsd/lib/libc/stdio/fclose.c
index 8c3bac4..5ed8b2c 100644
--- a/libc/stdio/fclose.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fclose.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fclose.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,9 +30,19 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fclose.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "un-namespace.h"
+#include <spinlock.h>
+#include "libc_private.h"
 #include "local.h"
 
 int
@@ -46,7 +55,6 @@
 		return (EOF);
 	}
 	FLOCKFILE(fp);
-	WCIO_FREE(fp);
 	r = fp->_flags & __SWR ? __sflush(fp) : 0;
 	if (fp->_close != NULL && (*fp->_close)(fp->_cookie) < 0)
 		r = EOF;
@@ -56,8 +64,22 @@
 		FREEUB(fp);
 	if (HASLB(fp))
 		FREELB(fp);
+	fp->_file = -1;
 	fp->_r = fp->_w = 0;	/* Mess up if reaccessed. */
+
+	/*
+	 * Lock the spinlock used to protect __sglue list walk in
+	 * __sfp().  The __sfp() uses fp->_flags == 0 test as an
+	 * indication of the unused FILE.
+	 *
+	 * Taking the lock prevents possible compiler or processor
+	 * reordering of the writes performed before the final _flags
+	 * cleanup, making sure that we are done with the FILE before
+	 * it is considered available.
+	 */
+	STDIO_THREAD_LOCK();
 	fp->_flags = 0;		/* Release this FILE for reuse. */
+	STDIO_THREAD_UNLOCK();
 	FUNLOCKFILE(fp);
 	return (r);
 }
diff --git a/libc/stdio/fdopen.c b/libc/upstream-freebsd/lib/libc/stdio/fdopen.c
similarity index 74%
rename from libc/stdio/fdopen.c
rename to libc/upstream-freebsd/lib/libc/stdio/fdopen.c
index 1df609c..2e19b9f 100644
--- a/libc/stdio/fdopen.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fdopen.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fdopen.c,v 1.5 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,11 +30,20 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fdopen.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <sys/types.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <errno.h>
+#include <limits.h>
+#include "un-namespace.h"
 #include "local.h"
 
 FILE *
@@ -44,11 +52,23 @@
 	FILE *fp;
 	int flags, oflags, fdflags, tmp;
 
+	/*
+	 * File descriptors are a full int, but _file is only a short.
+	 * If we get a valid file descriptor that is greater than
+	 * SHRT_MAX, then the fd will get sign-extended into an
+	 * invalid file descriptor.  Handle this case by failing the
+	 * open.
+	 */
+	if (fd > SHRT_MAX) {
+		errno = EMFILE;
+		return (NULL);
+	}
+
 	if ((flags = __sflags(mode, &oflags)) == 0)
 		return (NULL);
 
 	/* Make sure the mode the user wants is a subset of the actual mode. */
-	if ((fdflags = fcntl(fd, F_GETFL, 0)) < 0)
+	if ((fdflags = _fcntl(fd, F_GETFL, 0)) < 0)
 		return (NULL);
 	tmp = fdflags & O_ACCMODE;
 	if (tmp != O_RDWR && (tmp != (oflags & O_ACCMODE))) {
@@ -58,11 +78,17 @@
 
 	if ((fp = __sfp()) == NULL)
 		return (NULL);
+
+	if ((oflags & O_CLOEXEC) && _fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
+		fp->_flags = 0;
+		return (NULL);
+	}
+
 	fp->_flags = flags;
 	/*
 	 * If opened for appending, but underlying descriptor does not have
-	 * O_APPEND bit set, assert __SAPP so that __swrite() will lseek to
-	 * end before each write.
+	 * O_APPEND bit set, assert __SAPP so that __swrite() caller
+	 * will _sseek() to the end before write.
 	 */
 	if ((oflags & O_APPEND) && !(fdflags & O_APPEND))
 		fp->_flags |= __SAPP;
diff --git a/libc/stdio/feof.c b/libc/upstream-freebsd/lib/libc/stdio/feof.c
similarity index 82%
rename from libc/stdio/feof.c
rename to libc/upstream-freebsd/lib/libc/stdio/feof.c
index 0fa65b0..b970248 100644
--- a/libc/stdio/feof.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/feof.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: feof.c,v 1.5 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,21 +30,34 @@
  * SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include "local.h"
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)feof.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
 
-/*
- * A subroutine version of the macro feof.
- */
+#include "namespace.h"
+#include <stdio.h>
+#include "un-namespace.h"
+#include "libc_private.h"
+
 #undef feof
+#undef feof_unlocked
 
 int
 feof(FILE *fp)
 {
-	int ret;
+	int	ret;
 
 	FLOCKFILE(fp);
-	ret = __sfeof(fp);
+	ret= __sfeof(fp);
 	FUNLOCKFILE(fp);
 	return (ret);
 }
+
+int
+feof_unlocked(FILE *fp)
+{
+
+	return (__sfeof(fp));
+}
diff --git a/libc/stdio/ferror.c b/libc/upstream-freebsd/lib/libc/stdio/ferror.c
similarity index 81%
rename from libc/stdio/ferror.c
rename to libc/upstream-freebsd/lib/libc/stdio/ferror.c
index 0d2cf01..7e0f8f9 100644
--- a/libc/stdio/ferror.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/ferror.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: ferror.c,v 1.5 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,15 +30,34 @@
  * SUCH DAMAGE.
  */
 
-#include <stdio.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ferror.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
 
-/*
- * A subroutine version of the macro ferror.
- */
+#include "namespace.h"
+#include <stdio.h>
+#include "un-namespace.h"
+#include "libc_private.h"
+
 #undef ferror
+#undef ferror_unlocked
 
 int
 ferror(FILE *fp)
 {
+	int	ret;
+
+	FLOCKFILE(fp);
+	ret = __sferror(fp);
+	FUNLOCKFILE(fp);
+	return (ret);
+}
+
+int
+ferror_unlocked(FILE *fp)
+{
+
 	return (__sferror(fp));
 }
diff --git a/libc/stdio/fgetln.c b/libc/upstream-freebsd/lib/libc/stdio/fgetln.c
similarity index 85%
rename from libc/stdio/fgetln.c
rename to libc/upstream-freebsd/lib/libc/stdio/fgetln.c
index 0947dd8..1779de2 100644
--- a/libc/stdio/fgetln.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fgetln.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fgetln.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,9 +30,18 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fgetln.c	8.2 (Berkeley) 1/2/94";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "un-namespace.h"
+#include "libc_private.h"
 #include "local.h"
 
 /*
@@ -43,7 +51,7 @@
  * so we add 1 here.
 #endif
  */
-static int
+int
 __slbexpand(FILE *fp, size_t newsize)
 {
 	void *p;
@@ -51,7 +59,7 @@
 #ifdef notdef
 	++newsize;
 #endif
-	if ((size_t)fp->_lb._size >= newsize)
+	if (fp->_lb._size >= newsize)
 		return (0);
 	if ((p = realloc(fp->_lb._base, newsize)) == NULL)
 		return (-1);
@@ -62,7 +70,7 @@
 
 /*
  * Get an input line.  The returned pointer often (but not always)
- * points into a stdio buffer.  Fgetline does not alter the text of
+ * points into a stdio buffer.  Fgetln does not alter the text of
  * the returned line (which is thus not a C string because it will
  * not necessarily end with '\0'), but does allow callers to modify
  * it if they wish.  Thus, we set __SMOD in case the caller does.
@@ -71,18 +79,22 @@
 fgetln(FILE *fp, size_t *lenp)
 {
 	unsigned char *p;
-	char *ret;
 	size_t len;
 	size_t off;
 
 	FLOCKFILE(fp);
-
+	ORIENT(fp, -1);
 	/* make sure there is input */
-	if (fp->_r <= 0 && __srefill(fp))
-		goto error;
+	if (fp->_r <= 0 && __srefill(fp)) {
+		*lenp = 0;
+		FUNLOCKFILE(fp);
+		return (NULL);
+	}
 
 	/* look for a newline in the input */
-	if ((p = memchr((void *)fp->_p, '\n', fp->_r)) != NULL) {
+	if ((p = memchr((void *)fp->_p, '\n', (size_t)fp->_r)) != NULL) {
+		char *ret;
+
 		/*
 		 * Found one.  Flag buffer as modified to keep fseek from
 		 * `optimising' a backward seek, in case the user stomps on
@@ -123,7 +135,7 @@
 		off = len;
 		if (__srefill(fp))
 			break;	/* EOF or error: return partial line */
-		if ((p = memchr((void *)fp->_p, '\n', fp->_r)) == NULL)
+		if ((p = memchr((void *)fp->_p, '\n', (size_t)fp->_r)) == NULL)
 			continue;
 
 		/* got it: finish up the line (like code above) */
@@ -139,12 +151,11 @@
 		break;
 	}
 	*lenp = len;
-	ret = (char *)fp->_lb._base;
 #ifdef notdef
-	ret[len] = '\0';
+	fp->_lb._base[len] = 0;
 #endif
 	FUNLOCKFILE(fp);
-	return (ret);
+	return ((char *)fp->_lb._base);
 
 error:
 	*lenp = 0;		/* ??? */
diff --git a/libc/stdio/remove.c b/libc/upstream-freebsd/lib/libc/stdio/fgetpos.c
similarity index 82%
copy from libc/stdio/remove.c
copy to libc/upstream-freebsd/lib/libc/stdio/fgetpos.c
index d09d76f..f161f43 100644
--- a/libc/stdio/remove.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fgetpos.c
@@ -1,5 +1,3 @@
-/*	$OpenBSD: remove.c,v 1.7 2005/08/08 08:05:36 espie Exp $	*/
-
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,18 +30,22 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fgetpos.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <stdio.h>
-#include <unistd.h>
-#include <sys/stat.h>
 
 int
-remove(const char *file)
+fgetpos(FILE * __restrict fp, fpos_t * __restrict pos)
 {
-	struct stat st;
-
-	if (lstat(file, &st) < 0)
+	/*
+	 * ftello is thread-safe; no need to lock fp.
+	 */
+	if ((*pos = ftello(fp)) == (fpos_t)-1)
 		return (-1);
-	if (S_ISDIR(st.st_mode))
-		return (rmdir(file));
-	return (unlink(file));
+	else
+		return (0);
 }
diff --git a/libc/stdio/fgets.c b/libc/upstream-freebsd/lib/libc/stdio/fgets.c
similarity index 86%
rename from libc/stdio/fgets.c
rename to libc/upstream-freebsd/lib/libc/stdio/fgets.c
index 311b7b2..9abf559 100644
--- a/libc/stdio/fgets.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fgets.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fgets.c,v 1.10 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,18 +30,26 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fgets.c	8.2 (Berkeley) 12/22/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
 #include <string.h>
+#include "un-namespace.h"
 #include "local.h"
+#include "libc_private.h"
 
 /*
  * Read at most n-1 characters from the given file.
  * Stop when a newline has been read, or the count runs out.
  * Return first argument, or NULL if no characters were read.
- * Do not return NULL if n == 1.
  */
 char *
-fgets(char *buf, int n, FILE *fp)
+fgets(char * __restrict buf, int n, FILE * __restrict fp)
 {
 	size_t len;
 	char *s;
@@ -52,24 +59,24 @@
 		return (NULL);
 
 	FLOCKFILE(fp);
-	_SET_ORIENTATION(fp, -1);
+	ORIENT(fp, -1);
 	s = buf;
 	n--;			/* leave space for NUL */
 	while (n != 0) {
 		/*
 		 * If the buffer is empty, refill it.
 		 */
-		if (fp->_r <= 0) {
+		if ((len = fp->_r) <= 0) {
 			if (__srefill(fp)) {
 				/* EOF/error: stop with partial or no line */
 				if (s == buf) {
 					FUNLOCKFILE(fp);
 					return (NULL);
-                                }
+				}
 				break;
 			}
+			len = fp->_r;
 		}
-		len = fp->_r;
 		p = fp->_p;
 
 		/*
@@ -78,7 +85,7 @@
 		 * newline, and stop.  Otherwise, copy entire chunk
 		 * and loop.
 		 */
-		if ((int)len > n)
+		if (len > n)
 			len = n;
 		t = memchr((void *)p, '\n', len);
 		if (t != NULL) {
@@ -86,7 +93,7 @@
 			fp->_r -= len;
 			fp->_p = t;
 			(void)memcpy((void *)s, (void *)p, len);
-			s[len] = '\0';
+			s[len] = 0;
 			FUNLOCKFILE(fp);
 			return (buf);
 		}
@@ -96,7 +103,7 @@
 		s += len;
 		n -= len;
 	}
-	*s = '\0';
+	*s = 0;
 	FUNLOCKFILE(fp);
 	return (buf);
 }
diff --git a/libc/stdio/fileno.c b/libc/upstream-freebsd/lib/libc/stdio/fileno.c
similarity index 81%
rename from libc/stdio/fileno.c
rename to libc/upstream-freebsd/lib/libc/stdio/fileno.c
index cbefdeb..3ac1830 100644
--- a/libc/stdio/fileno.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fileno.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fileno.c,v 1.5 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,21 +30,35 @@
  * SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include "local.h"
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fileno.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
 
-/*
- * A subroutine version of the macro fileno.
- */
+#include "namespace.h"
+#include <stdio.h>
+#include "un-namespace.h"
+#include "libc_private.h"
+
 #undef fileno
+#undef fileno_unlocked
 
 int
 fileno(FILE *fp)
 {
-	int ret;
+	int fd;
 
 	FLOCKFILE(fp);
-	ret = __sfileno(fp);
+	fd = __sfileno(fp);
 	FUNLOCKFILE(fp);
-	return (ret);
+
+	return (fd);
+}
+
+int
+fileno_unlocked(FILE *fp)
+{
+
+	return (__sfileno(fp));
 }
diff --git a/libc/stdio/flags.c b/libc/upstream-freebsd/lib/libc/stdio/flags.c
similarity index 76%
rename from libc/stdio/flags.c
rename to libc/upstream-freebsd/lib/libc/stdio/flags.c
index dde0447..1878c2f 100644
--- a/libc/stdio/flags.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/flags.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: flags.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,15 +30,22 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)flags.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <sys/types.h>
 #include <sys/file.h>
 #include <stdio.h>
 #include <errno.h>
-#include <fcntl.h>
+
+#include "local.h"
 
 /*
  * Return the (stdio) flags for a given mode.  Store the flags
- * to be passed to an open() syscall through *optr.
+ * to be passed to an _open() syscall through *optr.
  * Return 0 on error.
  */
 int
@@ -72,11 +78,35 @@
 		return (0);
 	}
 
-	/* [rwa]\+ or [rwa]b\+ means read and write */
-	if (*mode == '+' || (*mode == 'b' && mode[1] == '+')) {
+	/* 'b' (binary) is ignored */
+	if (*mode == 'b')
+		mode++;
+
+	/* [rwa][b]\+ means read and write */
+	if (*mode == '+') {
+		mode++;
 		ret = __SRW;
 		m = O_RDWR;
 	}
+
+	/* 'b' (binary) can appear here, too -- and is ignored again */
+	if (*mode == 'b')
+		mode++;
+
+	/* 'x' means exclusive (fail if the file exists) */
+	if (*mode == 'x') {
+		mode++;
+		if (m == O_RDONLY) {
+			errno = EINVAL;
+			return (0);
+		}
+		o |= O_EXCL;
+	}
+
+	/* set close-on-exec */
+	if (*mode == 'e')
+		o |= O_CLOEXEC;
+
 	*optr = m | o;
 	return (ret);
 }
diff --git a/libc/stdio/fopen.c b/libc/upstream-freebsd/lib/libc/stdio/fopen.c
similarity index 75%
rename from libc/stdio/fopen.c
rename to libc/upstream-freebsd/lib/libc/stdio/fopen.c
index 6d2d882..b08e336 100644
--- a/libc/stdio/fopen.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fopen.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fopen.c,v 1.5 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,18 +30,26 @@
  * SUCH DAMAGE.
  */
 
-#define __USE_BSD
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fopen.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
 
+#include "namespace.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <unistd.h>
 #include <stdio.h>
 #include <errno.h>
+#include <limits.h>
+#include "un-namespace.h"
+
 #include "local.h"
-#include <linux/stat.h>
 
 FILE *
-fopen(const char *file, const char *mode)
+fopen(const char * __restrict file, const char * __restrict mode)
 {
 	FILE *fp;
 	int f;
@@ -52,10 +59,23 @@
 		return (NULL);
 	if ((fp = __sfp()) == NULL)
 		return (NULL);
-	if ((f = open(file, oflags, DEFFILEMODE)) < 0) {
+	if ((f = _open(file, oflags, DEFFILEMODE)) < 0) {
 		fp->_flags = 0;			/* release */
 		return (NULL);
 	}
+	/*
+	 * File descriptors are a full int, but _file is only a short.
+	 * If we get a valid file descriptor that is greater than
+	 * SHRT_MAX, then the fd will get sign-extended into an
+	 * invalid file descriptor.  Handle this case by failing the
+	 * open.
+	 */
+	if (f > SHRT_MAX) {
+		fp->_flags = 0;			/* release */
+		_close(f);
+		errno = EMFILE;
+		return (NULL);
+	}
 	fp->_file = f;
 	fp->_flags = flags;
 	fp->_cookie = fp;
@@ -63,7 +83,6 @@
 	fp->_write = __swrite;
 	fp->_seek = __sseek;
 	fp->_close = __sclose;
-
 	/*
 	 * When opening in append mode, even though we use O_APPEND,
 	 * we need to seek to the end so that ftell() gets the right
@@ -73,6 +92,6 @@
 	 * fseek and ftell.)
 	 */
 	if (oflags & O_APPEND)
-		(void) __sseek((void *)fp, (fpos_t)0, SEEK_END);
+		(void)_sseek(fp, (fpos_t)0, SEEK_END);
 	return (fp);
 }
diff --git a/libc/stdio/fpurge.c b/libc/upstream-freebsd/lib/libc/stdio/fpurge.c
similarity index 80%
rename from libc/stdio/fpurge.c
rename to libc/upstream-freebsd/lib/libc/stdio/fpurge.c
index e04c4fe..f205bdf 100644
--- a/libc/stdio/fpurge.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fpurge.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fpurge.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,10 +30,19 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fpurge.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "un-namespace.h"
 #include "local.h"
+#include "libc_private.h"
 
 /*
  * fpurge: like fflush, but without writing anything: leave the
@@ -43,19 +51,19 @@
 int
 fpurge(FILE *fp)
 {
+	int retval;
 	FLOCKFILE(fp);
 	if (!fp->_flags) {
-		FUNLOCKFILE(fp);
 		errno = EBADF;
-		return(EOF);
+		retval = EOF;
+	} else {
+		if (HASUB(fp))
+			FREEUB(fp);
+		fp->_p = fp->_bf._base;
+		fp->_r = 0;
+		fp->_w = fp->_flags & (__SLBF|__SNBF|__SRD) ? 0 : fp->_bf._size;
+		retval = 0;
 	}
-
-	if (HASUB(fp))
-		FREEUB(fp);
-	WCIO_FREE(fp);
-	fp->_p = fp->_bf._base;
-	fp->_r = 0;
-	fp->_w = fp->_flags & (__SLBF|__SNBF) ? 0 : fp->_bf._size;
 	FUNLOCKFILE(fp);
-	return (0);
+	return (retval);
 }
diff --git a/libc/stdio/fputs.c b/libc/upstream-freebsd/lib/libc/stdio/fputs.c
similarity index 83%
rename from libc/stdio/fputs.c
rename to libc/upstream-freebsd/lib/libc/stdio/fputs.c
index c2462ba..3b8f2c9 100644
--- a/libc/stdio/fputs.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fputs.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fputs.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,28 +30,37 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fputs.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
 #include <string.h>
-#include "local.h"
+#include "un-namespace.h"
 #include "fvwrite.h"
+#include "libc_private.h"
+#include "local.h"
 
 /*
  * Write the given string to the given file.
  */
 int
-fputs(const char *s, FILE *fp)
+fputs(const char * __restrict s, FILE * __restrict fp)
 {
+	int retval;
 	struct __suio uio;
 	struct __siov iov;
-	int ret;
 
 	iov.iov_base = (void *)s;
 	iov.iov_len = uio.uio_resid = strlen(s);
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	FLOCKFILE(fp);
-	_SET_ORIENTATION(fp, -1);
-	ret = __sfvwrite(fp, &uio);
+	ORIENT(fp, -1);
+	retval = __sfvwrite(fp, &uio);
 	FUNLOCKFILE(fp);
-	return (ret);
+	return (retval);
 }
diff --git a/libc/stdio/fsetpos.c b/libc/upstream-freebsd/lib/libc/stdio/fsetpos.c
similarity index 88%
rename from libc/stdio/fsetpos.c
rename to libc/upstream-freebsd/lib/libc/stdio/fsetpos.c
index 9624fe5..c6b8b78 100644
--- a/libc/stdio/fsetpos.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fsetpos.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fsetpos.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,10 +30,17 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fsetpos.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
 #include <stdio.h>
 
 /*
- * fsetpos: like fseeko.
+ * fsetpos: like fseek.
  */
 int
 fsetpos(FILE *iop, const fpos_t *pos)
diff --git a/libc/stdio/funopen.c b/libc/upstream-freebsd/lib/libc/stdio/funopen.c
similarity index 86%
rename from libc/stdio/funopen.c
rename to libc/upstream-freebsd/lib/libc/stdio/funopen.c
index b85ee96..983fe50 100644
--- a/libc/stdio/funopen.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/funopen.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: funopen.c,v 1.8 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,14 +30,23 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)funopen.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <stdio.h>
 #include <errno.h>
+
 #include "local.h"
 
 FILE *
-funopen(const void *cookie, int (*readfn)(void *, char *, int),
+funopen(const void *cookie,
+	int (*readfn)(void *, char *, int),
 	int (*writefn)(void *, const char *, int),
-	fpos_t (*seekfn)(void *, fpos_t, int), int (*closefn)(void *))
+	fpos_t (*seekfn)(void *, fpos_t, int),
+	int (*closefn)(void *))
 {
 	FILE *fp;
 	int flags;
@@ -59,7 +67,7 @@
 		return (NULL);
 	fp->_flags = flags;
 	fp->_file = -1;
-	fp->_cookie = (void *)cookie;		/* SAFE: cookie not modified */
+	fp->_cookie = (void *)cookie;
 	fp->_read = readfn;
 	fp->_write = writefn;
 	fp->_seek = seekfn;
diff --git a/libc/stdio/fwalk.c b/libc/upstream-freebsd/lib/libc/stdio/fwalk.c
similarity index 78%
rename from libc/stdio/fwalk.c
rename to libc/upstream-freebsd/lib/libc/stdio/fwalk.c
index b1df891..151837b 100644
--- a/libc/stdio/fwalk.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fwalk.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fwalk.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,7 +30,13 @@
  * SUCH DAMAGE.
  */
 
-#include <errno.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fwalk.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
 #include <stdio.h>
 #include "local.h"
 #include "glue.h"
@@ -44,10 +49,17 @@
 	struct glue *g;
 
 	ret = 0;
+	/*
+	 * It should be safe to walk the list without locking it;
+	 * new nodes are only added to the end and none are ever
+	 * removed.
+	 *
+	 * Avoid locking this list while walking it or else you will
+	 * introduce a potential deadlock in [at least] refill.c.
+	 */
 	for (g = &__sglue; g != NULL; g = g->next)
-		for (fp = g->iobs, n = g->niobs; --n >= 0; fp++) {
+		for (fp = g->iobs, n = g->niobs; --n >= 0; fp++)
 			if ((fp->_flags != 0) && ((fp->_flags & __SIGN) == 0))
 				ret |= (*function)(fp);
-		}
 	return (ret);
 }
diff --git a/libc/stdio/fwrite.c b/libc/upstream-freebsd/lib/libc/stdio/fwrite.c
similarity index 67%
rename from libc/stdio/fwrite.c
rename to libc/upstream-freebsd/lib/libc/stdio/fwrite.c
index a97313e..707d362 100644
--- a/libc/stdio/fwrite.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/fwrite.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: fwrite.c,v 1.5 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,36 +30,67 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fwrite.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
+#include <errno.h>
+#include <stdint.h>
 #include <stdio.h>
+#include "un-namespace.h"
 #include "local.h"
 #include "fvwrite.h"
+#include "libc_private.h"
 
 /*
  * Write `count' objects (each size `size') from memory to the given file.
  * Return the number of whole objects written.
  */
 size_t
-fwrite(const void *buf, size_t size, size_t count, FILE *fp)
+fwrite(const void * __restrict buf, size_t size, size_t count, FILE * __restrict fp)
 {
 	size_t n;
 	struct __suio uio;
 	struct __siov iov;
-	int ret;
+
+	/*
+	 * ANSI and SUSv2 require a return value of 0 if size or count are 0.
+	 */
+	if ((count == 0) || (size == 0))
+		return (0);
+
+	/*
+	 * Check for integer overflow.  As an optimization, first check that
+	 * at least one of {count, size} is at least 2^16, since if both
+	 * values are less than that, their product can't possible overflow
+	 * (size_t is always at least 32 bits on FreeBSD).
+	 */
+	if (((count | size) > 0xFFFF) &&
+	    (count > SIZE_MAX / size)) {
+		errno = EINVAL;
+		fp->_flags |= __SERR;
+		return (0);
+	}
+
+	n = count * size;
 
 	iov.iov_base = (void *)buf;
-	uio.uio_resid = iov.iov_len = n = count * size;
+	uio.uio_resid = iov.iov_len = n;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 
+	FLOCKFILE(fp);
+	ORIENT(fp, -1);
 	/*
 	 * The usual case is success (__sfvwrite returns 0);
 	 * skip the divide if this happens, since divides are
 	 * generally slow and since this occurs whenever size==0.
 	 */
-	FLOCKFILE(fp);
-	ret = __sfvwrite(fp, &uio);
+	if (__sfvwrite(fp, &uio) != 0)
+	    count = (n - uio.uio_resid) / size;
 	FUNLOCKFILE(fp);
-	if (ret == 0)
-		return (count);
-	return ((n - uio.uio_resid) / size);
+	return (count);
 }
diff --git a/libc/stdio/getc.c b/libc/upstream-freebsd/lib/libc/stdio/getc.c
similarity index 82%
rename from libc/stdio/getc.c
rename to libc/upstream-freebsd/lib/libc/stdio/getc.c
index 16a5b1d..4963c8c 100644
--- a/libc/stdio/getc.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/getc.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: getc.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,32 +30,36 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)getc.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
+#include "un-namespace.h"
+#include "libc_private.h"
 #include "local.h"
 
-/*
- * A subroutine version of the macro getc_unlocked.
- */
-#undef getc_unlocked
-
-int
-getc_unlocked(FILE *fp)
-{
-	return (__sgetc(fp));
-}
-
-/*
- * A subroutine version of the macro getc.
- */
 #undef getc
+#undef getc_unlocked
 
 int
 getc(FILE *fp)
 {
-	int c;
-
+	int retval;
 	FLOCKFILE(fp);
-	c = __sgetc(fp);
+	/* Orientation set by __sgetc() when buffer is empty. */
+	/* ORIENT(fp, -1); */
+	retval = __sgetc(fp);
 	FUNLOCKFILE(fp);
-	return (c);
+	return (retval);
+}
+
+int
+getc_unlocked(FILE *fp)
+{
+
+	return (__sgetc(fp));
 }
diff --git a/libc/stdio/getchar.c b/libc/upstream-freebsd/lib/libc/stdio/getchar.c
similarity index 78%
rename from libc/stdio/getchar.c
rename to libc/upstream-freebsd/lib/libc/stdio/getchar.c
index 550817d..21040bc 100644
--- a/libc/stdio/getchar.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/getchar.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: getchar.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,28 +30,39 @@
  * SUCH DAMAGE.
  */
 
-#include <stdio.h>
-
-/*
- * A subroutine version of the macro getchar_unlocked.
- */
-#undef getchar_unlocked
-
-int
-getchar_unlocked(void)
-{
-	return (getc_unlocked(stdin));
-}
-
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)getchar.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
 
 /*
  * A subroutine version of the macro getchar.
  */
+#include "namespace.h"
+#include <stdio.h>
+#include "un-namespace.h"
+#include "local.h"
+#include "libc_private.h"
 
 #undef getchar
+#undef getchar_unlocked
 
 int
-getchar(void)
+getchar()
 {
-	return (getc(stdin));
+	int retval;
+	FLOCKFILE(stdin);
+	/* Orientation set by __sgetc() when buffer is empty. */
+	/* ORIENT(stdin, -1); */
+	retval = __sgetc(stdin);
+	FUNLOCKFILE(stdin);
+	return (retval);
+}
+
+int
+getchar_unlocked(void)
+{
+
+	return (__sgetc(stdin));
 }
diff --git a/libc/stdio/makebuf.c b/libc/upstream-freebsd/lib/libc/stdio/makebuf.c
similarity index 86%
rename from libc/stdio/makebuf.c
rename to libc/upstream-freebsd/lib/libc/stdio/makebuf.c
index d47e27c..a92087e 100644
--- a/libc/stdio/makebuf.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/makebuf.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: makebuf.c,v 1.8 2005/12/28 18:50:22 millert Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,11 +30,21 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)makebuf.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "un-namespace.h"
+
+#include "libc_private.h"
 #include "local.h"
 
 /*
@@ -43,7 +52,7 @@
  * Per the ANSI C standard, ALL tty devices default to line buffered.
  *
  * As a side effect, we set __SOPT or __SNPT (en/dis-able fseek
- * optimisation) right after the fstat() that finds the buffer size.
+ * optimisation) right after the _fstat() that finds the buffer size.
  */
 void
 __smakebuf(FILE *fp)
@@ -65,7 +74,7 @@
 		fp->_bf._size = 1;
 		return;
 	}
-	__atexit_register_cleanup(_cleanup);
+	__cleanup = _cleanup;
 	flags |= __SMBF;
 	fp->_bf._base = fp->_p = p;
 	fp->_bf._size = size;
@@ -82,15 +91,15 @@
 {
 	struct stat st;
 
-	if (fp->_file < 0 || fstat(fp->_file, &st) < 0) {
+	if (fp->_file < 0 || _fstat(fp->_file, &st) < 0) {
 		*couldbetty = 0;
 		*bufsize = BUFSIZ;
 		return (__SNPT);
 	}
 
 	/* could be a tty iff it is a character device */
-	*couldbetty = S_ISCHR(st.st_mode);
-	if (st.st_blksize == 0) {
+	*couldbetty = (st.st_mode & S_IFMT) == S_IFCHR;
+	if (st.st_blksize <= 0) {
 		*bufsize = BUFSIZ;
 		return (__SNPT);
 	}
diff --git a/libc/stdio/mktemp.c b/libc/upstream-freebsd/lib/libc/stdio/mktemp.c
similarity index 60%
rename from libc/stdio/mktemp.c
rename to libc/upstream-freebsd/lib/libc/stdio/mktemp.c
index aaa5640..58783dd 100644
--- a/libc/stdio/mktemp.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/mktemp.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: mktemp.c,v 1.19 2005/08/08 08:05:36 espie Exp $ */
 /*
  * Copyright (c) 1987, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -28,18 +27,30 @@
  * SUCH DAMAGE.
  */
 
-#include <sys/types.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)mktemp.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
+#include <sys/param.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <ctype.h>
 #include <unistd.h>
+#include "un-namespace.h"
+
+char *_mktemp(char *);
 
 static int _gettemp(char *, int *, int, int);
 
-extern uint32_t  arc4random();
+static const unsigned char padchar[] =
+"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
 
 int
 mkstemps(char *path, int slen)
@@ -60,15 +71,13 @@
 char *
 mkdtemp(char *path)
 {
-	return(_gettemp(path, (int *)NULL, 1, 0) ? path : (char *)NULL);
+	return (_gettemp(path, (int *)NULL, 1, 0) ? path : (char *)NULL);
 }
 
-char *_mktemp(char *);
-
-__LIBC_HIDDEN__ char *
+char *
 _mktemp(char *path)
 {
-	return(_gettemp(path, (int *)NULL, 0, 0) ? path : (char *)NULL);
+	return (_gettemp(path, (int *)NULL, 0, 0) ? path : (char *)NULL);
 }
 
 __warn_references(mktemp,
@@ -77,66 +86,62 @@
 char *
 mktemp(char *path)
 {
-	return(_mktemp(path));
+	return (_mktemp(path));
 }
 
-
 static int
 _gettemp(char *path, int *doopen, int domkdir, int slen)
 {
-	char *start, *trv, *suffp;
+	char *start, *trv, *suffp, *carryp;
+	char *pad;
 	struct stat sbuf;
 	int rval;
-	pid_t pid;
+	uint32_t rand;
+	char carrybuf[MAXPATHLEN];
 
-	if (doopen && domkdir) {
-		errno = EINVAL;
-		return(0);
-	}
-
-	for (trv = path; *trv; ++trv)
-		;
-	trv -= slen;
-	suffp = trv;
-	--trv;
-	if (trv < path) {
+	if ((doopen != NULL && domkdir) || slen < 0) {
 		errno = EINVAL;
 		return (0);
 	}
-	pid = getpid();
-	while (trv >= path && *trv == 'X' && pid != 0) {
-		*trv-- = (pid % 10) + '0';
-		pid /= 10;
-	}
-	while (trv >= path && *trv == 'X') {
-		char c;
 
-		pid = (arc4random() & 0xffff) % (26+26);
-		if (pid < 26)
-			c = pid + 'A';
-		else
-			c = (pid - 26) + 'a';
-		*trv-- = c;
+	for (trv = path; *trv != '\0'; ++trv)
+		;
+	if (trv - path >= MAXPATHLEN) {
+		errno = ENAMETOOLONG;
+		return (0);
+	}
+	trv -= slen;
+	suffp = trv;
+	--trv;
+	if (trv < path || NULL != strchr(suffp, '/')) {
+		errno = EINVAL;
+		return (0);
+	}
+
+	/* Fill space with random characters */
+	while (trv >= path && *trv == 'X') {
+		rand = arc4random_uniform(sizeof(padchar) - 1);
+		*trv-- = padchar[rand];
 	}
 	start = trv + 1;
 
+	/* save first combination of random characters */
+	memcpy(carrybuf, start, suffp - start);
+
 	/*
-	 * check the target directory; if you have six X's and it
-	 * doesn't exist this runs for a *very* long time.
+	 * check the target directory.
 	 */
-	if (doopen || domkdir) {
-		for (;; --trv) {
-			if (trv <= path)
-				break;
+	if (doopen != NULL || domkdir) {
+		for (; trv > path; --trv) {
 			if (*trv == '/') {
 				*trv = '\0';
 				rval = stat(path, &sbuf);
 				*trv = '/';
 				if (rval != 0)
-					return(0);
+					return (0);
 				if (!S_ISDIR(sbuf.st_mode)) {
 					errno = ENOTDIR;
-					return(0);
+					return (0);
 				}
 				break;
 			}
@@ -146,36 +151,38 @@
 	for (;;) {
 		if (doopen) {
 			if ((*doopen =
-			    open(path, O_CREAT|O_EXCL|O_RDWR, 0600)) >= 0)
-				return(1);
+			    _open(path, O_CREAT|O_EXCL|O_RDWR, 0600)) >= 0)
+				return (1);
 			if (errno != EEXIST)
-				return(0);
+				return (0);
 		} else if (domkdir) {
 			if (mkdir(path, 0700) == 0)
-				return(1);
+				return (1);
 			if (errno != EEXIST)
-				return(0);
-		} else if (lstat(path, &sbuf))
-			return(errno == ENOENT ? 1 : 0);
-
-		/* tricky little algorithm for backward compatibility */
-		for (trv = start;;) {
-			if (!*trv)
 				return (0);
-			if (*trv == 'Z') {
-				if (trv == suffp)
-					return (0);
-				*trv++ = 'a';
+		} else if (lstat(path, &sbuf))
+			return (errno == ENOENT);
+
+		/* If we have a collision, cycle through the space of filenames */
+		for (trv = start, carryp = carrybuf;;) {
+			/* have we tried all possible permutations? */
+			if (trv == suffp)
+				return (0); /* yes - exit with EEXIST */
+			pad = strchr(padchar, *trv);
+			if (pad == NULL) {
+				/* this should never happen */
+				errno = EIO;
+				return (0);
+			}
+			/* increment character */
+			*trv = (*++pad == '\0') ? padchar[0] : *pad;
+			/* carry to next position? */
+			if (*trv == *carryp) {
+				/* increment position and loop */
+				++trv;
+				++carryp;
 			} else {
-				if (isdigit(*trv))
-					*trv = 'a';
-				else if (*trv == 'z')	/* inc from z to A */
-					*trv = 'A';
-				else {
-					if (trv == suffp)
-						return (0);
-					++*trv;
-				}
+				/* try with new name */
 				break;
 			}
 		}
diff --git a/libc/stdio/putc.c b/libc/upstream-freebsd/lib/libc/stdio/putc.c
similarity index 79%
rename from libc/stdio/putc.c
rename to libc/upstream-freebsd/lib/libc/stdio/putc.c
index 2b05504..aaffece 100644
--- a/libc/stdio/putc.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/putc.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: putc.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,37 +30,36 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)putc.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
-#include <errno.h>
+#include "un-namespace.h"
 #include "local.h"
+#include "libc_private.h"
 
-/*
- * A subroutine version of the macro putc_unlocked.
- */
-#undef putc_unlocked
-
-int
-putc_unlocked(int c, FILE *fp)
-{
-	if (cantwrite(fp)) {
-		errno = EBADF;
-		return (EOF);
-	}
-	return (__sputc(c, fp));
-}
-
-/*
- * A subroutine version of the macro putc.
- */
 #undef putc
+#undef putc_unlocked
 
 int
 putc(int c, FILE *fp)
 {
-	int ret;
-
+	int retval;
 	FLOCKFILE(fp);
-	ret = putc_unlocked(c, fp);
+	/* Orientation set by __sputc() when buffer is full. */
+	/* ORIENT(fp, -1); */
+	retval = __sputc(c, fp);
 	FUNLOCKFILE(fp);
-	return (ret);
+	return (retval);
+}
+
+int
+putc_unlocked(int ch, FILE *fp)
+{
+
+	return (__sputc(ch, fp));
 }
diff --git a/libc/stdio/putchar.c b/libc/upstream-freebsd/lib/libc/stdio/putchar.c
similarity index 78%
rename from libc/stdio/putchar.c
rename to libc/upstream-freebsd/lib/libc/stdio/putchar.c
index eeed0a2..7561559 100644
--- a/libc/stdio/putchar.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/putchar.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: putchar.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,21 +30,20 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)putchar.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
-
-#undef putchar_unlocked
-/*
- * A subrouting version of the macro putchar_unlocked
- */
-int
-putchar_unlocked(int c)
-{
-	FILE *so = stdout;
-
-	return (putc_unlocked(c, so));
-}
+#include "un-namespace.h"
+#include "local.h"
+#include "libc_private.h"
 
 #undef putchar
+#undef putchar_unlocked
 
 /*
  * A subroutine version of the macro putchar
@@ -53,7 +51,20 @@
 int
 putchar(int c)
 {
+	int retval;
 	FILE *so = stdout;
 
-	return (putc(c, so));
+	FLOCKFILE(so);
+	/* Orientation set by __sputc() when buffer is full. */
+	/* ORIENT(so, -1); */
+	retval = __sputc(c, so);
+	FUNLOCKFILE(so);
+	return (retval);
+}
+
+int
+putchar_unlocked(int ch)
+{
+
+	return (__sputc(ch, stdout));
 }
diff --git a/libc/stdio/puts.c b/libc/upstream-freebsd/lib/libc/stdio/puts.c
similarity index 84%
rename from libc/stdio/puts.c
rename to libc/upstream-freebsd/lib/libc/stdio/puts.c
index 4603a3d..5ee7fc1 100644
--- a/libc/stdio/puts.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/puts.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: puts.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,21 +30,30 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)puts.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
 #include <string.h>
-#include "local.h"
+#include "un-namespace.h"
 #include "fvwrite.h"
+#include "libc_private.h"
+#include "local.h"
 
 /*
  * Write the given string to stdout, appending a newline.
  */
 int
-puts(const char *s)
+puts(char const *s)
 {
+	int retval;
 	size_t c = strlen(s);
 	struct __suio uio;
 	struct __siov iov[2];
-	int ret;
 
 	iov[0].iov_base = (void *)s;
 	iov[0].iov_len = c;
@@ -55,7 +63,8 @@
 	uio.uio_iov = &iov[0];
 	uio.uio_iovcnt = 2;
 	FLOCKFILE(stdout);
-	ret = __sfvwrite(stdout, &uio);
+	ORIENT(stdout, -1);
+	retval = __sfvwrite(stdout, &uio) ? EOF : '\n';
 	FUNLOCKFILE(stdout);
-	return (ret ? EOF : '\n');
+	return (retval);
 }
diff --git a/libc/stdio/putw.c b/libc/upstream-freebsd/lib/libc/stdio/putw.c
similarity index 83%
rename from libc/stdio/putw.c
rename to libc/upstream-freebsd/lib/libc/stdio/putw.c
index 12955fe..0360caf 100644
--- a/libc/stdio/putw.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/putw.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: putw.c,v 1.6 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,12 +30,22 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)putw.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
+#include "un-namespace.h"
 #include "fvwrite.h"
+#include "libc_private.h"
 
 int
 putw(int w, FILE *fp)
 {
+	int retval;
 	struct __suio uio;
 	struct __siov iov;
 
@@ -44,5 +53,8 @@
 	iov.iov_len = uio.uio_resid = sizeof(w);
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
-	return (__sfvwrite(fp, &uio));
+	FLOCKFILE(fp);
+	retval = __sfvwrite(fp, &uio);
+	FUNLOCKFILE(fp);
+	return (retval);
 }
diff --git a/libc/stdio/remove.c b/libc/upstream-freebsd/lib/libc/stdio/remove.c
similarity index 86%
rename from libc/stdio/remove.c
rename to libc/upstream-freebsd/lib/libc/stdio/remove.c
index d09d76f..2e984ba 100644
--- a/libc/stdio/remove.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/remove.c
@@ -1,5 +1,3 @@
-/*	$OpenBSD: remove.c,v 1.7 2005/08/08 08:05:36 espie Exp $	*/
-
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,18 +30,25 @@
  * SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include <unistd.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)remove.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
 #include <sys/stat.h>
+#include <unistd.h>
+#include <stdio.h>
 
 int
 remove(const char *file)
 {
-	struct stat st;
+	struct stat sb;
 
-	if (lstat(file, &st) < 0)
+	if (lstat(file, &sb) < 0)
 		return (-1);
-	if (S_ISDIR(st.st_mode))
+	if (S_ISDIR(sb.st_mode))
 		return (rmdir(file));
 	return (unlink(file));
 }
diff --git a/libc/stdio/rget.c b/libc/upstream-freebsd/lib/libc/stdio/rget.c
similarity index 91%
rename from libc/stdio/rget.c
rename to libc/upstream-freebsd/lib/libc/stdio/rget.c
index 4cd97cb..bdc0311 100644
--- a/libc/stdio/rget.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/rget.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: rget.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,6 +30,12 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)rget.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <stdio.h>
 #include "local.h"
 
@@ -42,7 +47,6 @@
 int
 __srget(FILE *fp)
 {
-	_SET_ORIENTATION(fp, -1);
 	if (__srefill(fp) == 0) {
 		fp->_r--;
 		return (*fp->_p++);
diff --git a/libc/stdio/setbuf.c b/libc/upstream-freebsd/lib/libc/stdio/setbuf.c
similarity index 87%
rename from libc/stdio/setbuf.c
rename to libc/upstream-freebsd/lib/libc/stdio/setbuf.c
index 883b895..5c65f97 100644
--- a/libc/stdio/setbuf.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/setbuf.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: setbuf.c,v 1.5 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,11 +30,17 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)setbuf.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <stdio.h>
 #include "local.h"
 
 void
-setbuf(FILE *fp, char *buf)
+setbuf(FILE * __restrict fp, char * __restrict buf)
 {
 	(void) setvbuf(fp, buf, buf ? _IOFBF : _IONBF, BUFSIZ);
 }
diff --git a/libc/stdio/setbuffer.c b/libc/upstream-freebsd/lib/libc/stdio/setbuffer.c
similarity index 87%
rename from libc/stdio/setbuffer.c
rename to libc/upstream-freebsd/lib/libc/stdio/setbuffer.c
index 8725ff7..af5eb3c 100644
--- a/libc/stdio/setbuffer.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/setbuffer.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: setbuffer.c,v 1.5 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,13 +30,19 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)setbuffer.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <stdio.h>
 
 void
 setbuffer(FILE *fp, char *buf, int size)
 {
 
-	(void)setvbuf(fp, buf, buf ? _IOFBF : _IONBF, size);
+	(void)setvbuf(fp, buf, buf ? _IOFBF : _IONBF, (size_t)size);
 }
 
 /*
diff --git a/libc/stdio/setvbuf.c b/libc/upstream-freebsd/lib/libc/stdio/setvbuf.c
similarity index 90%
rename from libc/stdio/setvbuf.c
rename to libc/upstream-freebsd/lib/libc/stdio/setvbuf.c
index 2fb76af..d396960 100644
--- a/libc/stdio/setvbuf.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/setvbuf.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: setvbuf.c,v 1.8 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,16 +30,25 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)setvbuf.c	8.2 (Berkeley) 11/16/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
 #include <stdio.h>
 #include <stdlib.h>
+#include "un-namespace.h"
 #include "local.h"
+#include "libc_private.h"
 
 /*
  * Set one of the three kinds of buffering, optionally including
  * a buffer.
  */
 int
-setvbuf(FILE *fp, char *buf, int mode, size_t size)
+setvbuf(FILE * __restrict fp, char * __restrict buf, int mode, size_t size)
 {
 	int ret, flags;
 	size_t iosize;
@@ -55,23 +63,22 @@
 		if ((mode != _IOFBF && mode != _IOLBF) || (int)size < 0)
 			return (EOF);
 
+	FLOCKFILE(fp);
 	/*
 	 * Write current buffer, if any.  Discard unread input (including
 	 * ungetc data), cancel line buffering, and free old buffer if
 	 * malloc()ed.  We also clear any eof condition, as if this were
 	 * a seek.
 	 */
-	FLOCKFILE(fp);
 	ret = 0;
 	(void)__sflush(fp);
 	if (HASUB(fp))
 		FREEUB(fp);
-	WCIO_FREE(fp);
 	fp->_r = fp->_lbfsize = 0;
 	flags = fp->_flags;
 	if (flags & __SMBF)
 		free((void *)fp->_bf._base);
-	flags &= ~(__SLBF | __SNBF | __SMBF | __SOPT | __SNPT | __SEOF);
+	flags &= ~(__SLBF | __SNBF | __SMBF | __SOPT | __SOFF | __SNPT | __SEOF);
 
 	/* If setting unbuffered mode, skip all the hard work. */
 	if (mode == _IONBF)
@@ -147,8 +154,8 @@
 		/* begin/continue reading, or stay in intermediate state */
 		fp->_w = 0;
 	}
-	FUNLOCKFILE(fp);
-	__atexit_register_cleanup(_cleanup);
+	__cleanup = _cleanup;
 
+	FUNLOCKFILE(fp);
 	return (ret);
 }
diff --git a/libc/stdio/tempnam.c b/libc/upstream-freebsd/lib/libc/stdio/tempnam.c
similarity index 85%
rename from libc/stdio/tempnam.c
rename to libc/upstream-freebsd/lib/libc/stdio/tempnam.c
index 3b7ec75..e15746f 100644
--- a/libc/stdio/tempnam.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/tempnam.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: tempnam.c,v 1.14 2005/08/08 08:05:36 espie Exp $ */
 /*
  * Copyright (c) 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -28,6 +27,12 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)tempnam.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <sys/param.h>
 #include <errno.h>
 #include <stdio.h>
@@ -54,26 +59,26 @@
 		pfx = "tmp.";
 
 	if (issetugid() == 0 && (f = getenv("TMPDIR"))) {
-		(void)snprintf(name, MAXPATHLEN, "%s%s%sXXXXXXXXXX", f,
+		(void)snprintf(name, MAXPATHLEN, "%s%s%sXXXXXX", f,
 		    *(f + strlen(f) - 1) == '/'? "": "/", pfx);
 		if ((f = _mktemp(name)))
 			return(f);
 	}
 
 	if ((f = (char *)dir)) {
-		(void)snprintf(name, MAXPATHLEN, "%s%s%sXXXXXXXXXX", f,
+		(void)snprintf(name, MAXPATHLEN, "%s%s%sXXXXXX", f,
 		    *(f + strlen(f) - 1) == '/'? "": "/", pfx);
 		if ((f = _mktemp(name)))
 			return(f);
 	}
 
 	f = P_tmpdir;
-	(void)snprintf(name, MAXPATHLEN, "%s%sXXXXXXXXX", f, pfx);
+	(void)snprintf(name, MAXPATHLEN, "%s%sXXXXXX", f, pfx);
 	if ((f = _mktemp(name)))
 		return(f);
 
 	f = _PATH_TMP;
-	(void)snprintf(name, MAXPATHLEN, "%s%sXXXXXXXXX", f, pfx);
+	(void)snprintf(name, MAXPATHLEN, "%s%sXXXXXX", f, pfx);
 	if ((f = _mktemp(name)))
 		return(f);
 
diff --git a/libc/stdio/tmpnam.c b/libc/upstream-freebsd/lib/libc/stdio/tmpnam.c
similarity index 88%
rename from libc/stdio/tmpnam.c
rename to libc/upstream-freebsd/lib/libc/stdio/tmpnam.c
index 32e0a22..ce32dcc 100644
--- a/libc/stdio/tmpnam.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/tmpnam.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: tmpnam.c,v 1.10 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
@@ -31,6 +30,12 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)tmpnam.c	8.3 (Berkeley) 3/28/94";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <sys/types.h>
 
 #include <stdio.h>
@@ -49,7 +54,7 @@
 
 	if (s == NULL)
 		s = buf;
-	(void)snprintf(s, L_tmpnam, "%stmp.%lu.XXXXXXXXX", P_tmpdir, tmpcount);
+	(void)snprintf(s, L_tmpnam, "%stmp.%lu.XXXXXX", P_tmpdir, tmpcount);
 	++tmpcount;
 	return (_mktemp(s));
 }
diff --git a/libc/stdio/wsetup.c b/libc/upstream-freebsd/lib/libc/stdio/wsetup.c
similarity index 86%
rename from libc/stdio/wsetup.c
rename to libc/upstream-freebsd/lib/libc/stdio/wsetup.c
index 0834223..70f8247 100644
--- a/libc/stdio/wsetup.c
+++ b/libc/upstream-freebsd/lib/libc/stdio/wsetup.c
@@ -1,4 +1,3 @@
-/*	$OpenBSD: wsetup.c,v 1.7 2005/08/08 08:05:36 espie Exp $ */
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,6 +30,13 @@
  * SUCH DAMAGE.
  */
 
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)wsetup.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "local.h"
@@ -38,7 +44,7 @@
 /*
  * Various output routines call wsetup to be sure it is safe to write,
  * because either _flags does not include __SWR, or _buf is NULL.
- * _wsetup returns 0 if OK to write, nonzero otherwise.
+ * _wsetup returns 0 if OK to write; otherwise, it returns EOF and sets errno.
  */
 int
 __swsetup(FILE *fp)
@@ -51,8 +57,11 @@
 	 * If we are not writing, we had better be reading and writing.
 	 */
 	if ((fp->_flags & __SWR) == 0) {
-		if ((fp->_flags & __SRW) == 0)
+		if ((fp->_flags & __SRW) == 0) {
+			errno = EBADF;
+			fp->_flags |= __SERR;
 			return (EOF);
+		}
 		if (fp->_flags & __SRD) {
 			/* clobber any ungetc data */
 			if (HASUB(fp))
@@ -67,11 +76,8 @@
 	/*
 	 * Make a buffer if necessary, then set _w.
 	 */
-	if (fp->_bf._base == NULL) {
-		if ((fp->_flags & (__SSTR | __SALC)) == __SSTR)
-			return (EOF);
+	if (fp->_bf._base == NULL)
 		__smakebuf(fp);
-	}
 	if (fp->_flags & __SLBF) {
 		/*
 		 * It is line buffered, so make _lbfsize be -_bufsize
diff --git a/libc/unistd/getopt_long.c b/libc/upstream-freebsd/lib/libc/stdlib/getopt_long.c
similarity index 79%
rename from libc/unistd/getopt_long.c
rename to libc/upstream-freebsd/lib/libc/stdlib/getopt_long.c
index dbdf01a..9f7f6d5 100644
--- a/libc/unistd/getopt_long.c
+++ b/libc/upstream-freebsd/lib/libc/stdlib/getopt_long.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: getopt_long.c,v 1.20 2005/10/25 15:49:37 jmc Exp $	*/
+/*	$OpenBSD: getopt_long.c,v 1.22 2006/10/04 21:29:04 jmc Exp $	*/
 /*	$NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $	*/
 
 /*
@@ -35,13 +35,6 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *        This product includes software developed by the NetBSD
- *        Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -56,22 +49,33 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if 0
+#if defined(LIBC_SCCS) && !defined(lint)
+static char *rcsid = "$OpenBSD: getopt_long.c,v 1.16 2004/02/04 18:17:25 millert Exp $";
+#endif /* LIBC_SCCS and not lint */
+#endif
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <err.h>
 #include <errno.h>
 #include <getopt.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdio.h>
 
+#define GNU_COMPATIBLE		/* Be more compatible, configure's use us! */
+
+#if 0				/* we prefer to keep our getopt(3) */
 #define	REPLACE_GETOPT		/* use this getopt as the system getopt(3) */
+#endif
 
 #ifdef REPLACE_GETOPT
 int	opterr = 1;		/* if error message should be printed */
 int	optind = 1;		/* index into parent argv vector */
 int	optopt = '?';		/* character checked for validity */
+int	optreset;		/* reset getopt */
 char    *optarg;		/* argument associated with option */
 #endif
-int	optreset;		/* reset getopt */
 
 #define PRINT_ERROR	((opterr) && (*options != ':'))
 
@@ -86,10 +90,17 @@
 
 #define	EMSG		""
 
+#ifdef GNU_COMPATIBLE
+#define NO_PREFIX	(-1)
+#define D_PREFIX	0
+#define DD_PREFIX	1
+#define W_PREFIX	2
+#endif
+
 static int getopt_internal(int, char * const *, const char *,
 			   const struct option *, int *, int);
 static int parse_long_options(char * const *, const char *,
-			      const struct option *, int *, int);
+			      const struct option *, int *, int, int);
 static int gcd(int, int);
 static void permute_args(int, int, int, char * const *);
 
@@ -101,11 +112,21 @@
 
 /* Error messages */
 static const char recargchar[] = "option requires an argument -- %c";
+static const char illoptchar[] = "illegal option -- %c"; /* From P1003.2 */
+#ifdef GNU_COMPATIBLE
+static int dash_prefix = NO_PREFIX;
+static const char gnuoptchar[] = "invalid option -- %c";
+
+static const char recargstring[] = "option `%s%s' requires an argument";
+static const char ambig[] = "option `%s%.*s' is ambiguous";
+static const char noarg[] = "option `%s%.*s' doesn't allow an argument";
+static const char illoptstring[] = "unrecognized option `%s%s'";
+#else
 static const char recargstring[] = "option requires an argument -- %s";
 static const char ambig[] = "ambiguous option -- %.*s";
 static const char noarg[] = "option doesn't take an argument -- %.*s";
-static const char illoptchar[] = "unknown option -- %c";
 static const char illoptstring[] = "unknown option -- %s";
+#endif
 
 /*
  * Compute the greatest common divisor of a and b.
@@ -169,14 +190,35 @@
  */
 static int
 parse_long_options(char * const *nargv, const char *options,
-	const struct option *long_options, int *idx, int short_too)
+	const struct option *long_options, int *idx, int short_too, int flags)
 {
 	char *current_argv, *has_equal;
+#ifdef GNU_COMPATIBLE
+	char *current_dash;
+#endif
 	size_t current_argv_len;
-	int i, match;
+	int i, match, exact_match, second_partial_match;
 
 	current_argv = place;
+#ifdef GNU_COMPATIBLE
+	switch (dash_prefix) {
+		case D_PREFIX:
+			current_dash = "-";
+			break;
+		case DD_PREFIX:
+			current_dash = "--";
+			break;
+		case W_PREFIX:
+			current_dash = "-W ";
+			break;
+		default:
+			current_dash = "";
+			break;
+	}
+#endif
 	match = -1;
+	exact_match = 0;
+	second_partial_match = 0;
 
 	optind++;
 
@@ -196,6 +238,7 @@
 		if (strlen(long_options[i].name) == current_argv_len) {
 			/* exact match */
 			match = i;
+			exact_match = 1;
 			break;
 		}
 		/*
@@ -205,25 +248,37 @@
 		if (short_too && current_argv_len == 1)
 			continue;
 
-		if (match == -1)	/* partial match */
+		if (match == -1)        /* first partial match */
 			match = i;
-		else {
-			/* ambiguous abbreviation */
-			if (PRINT_ERROR)
-				fprintf(stderr, 
-                        ambig, (int)current_argv_len,
-                        current_argv);
-			optopt = 0;
-			return (BADCH);
-		}
+		else if ((flags & FLAG_LONGONLY) ||
+			 long_options[i].has_arg !=
+			     long_options[match].has_arg ||
+			 long_options[i].flag != long_options[match].flag ||
+			 long_options[i].val != long_options[match].val)
+			second_partial_match = 1;
+	}
+	if (!exact_match && second_partial_match) {
+		/* ambiguous abbreviation */
+		if (PRINT_ERROR)
+			warnx(ambig,
+#ifdef GNU_COMPATIBLE
+			     current_dash,
+#endif
+			     (int)current_argv_len,
+			     current_argv);
+		optopt = 0;
+		return (BADCH);
 	}
 	if (match != -1) {		/* option found */
 		if (long_options[match].has_arg == no_argument
 		    && has_equal) {
 			if (PRINT_ERROR)
-				fprintf(stderr,
-                        noarg, (int)current_argv_len,
-                        current_argv);
+				warnx(noarg,
+#ifdef GNU_COMPATIBLE
+				     current_dash,
+#endif
+				     (int)current_argv_len,
+				     current_argv);
 			/*
 			 * XXX: GNU sets optopt to val regardless of flag
 			 */
@@ -231,7 +286,11 @@
 				optopt = long_options[match].val;
 			else
 				optopt = 0;
+#ifdef GNU_COMPATIBLE
+			return (BADCH);
+#else
 			return (BADARG);
+#endif
 		}
 		if (long_options[match].has_arg == required_argument ||
 		    long_options[match].has_arg == optional_argument) {
@@ -252,9 +311,11 @@
 			 * should be generated.
 			 */
 			if (PRINT_ERROR)
-				fprintf(stderr, 
-                        recargstring,
-                        current_argv);
+				warnx(recargstring,
+#ifdef GNU_COMPATIBLE
+				    current_dash,
+#endif
+				    current_argv);
 			/*
 			 * XXX: GNU sets optopt to val regardless of flag
 			 */
@@ -271,7 +332,11 @@
 			return (-1);
 		}
 		if (PRINT_ERROR)
-			fprintf(stderr, illoptstring, current_argv);
+			warnx(illoptstring,
+#ifdef GNU_COMPATIBLE
+			      current_dash,
+#endif
+			      current_argv);
 		optopt = 0;
 		return (BADCH);
 	}
@@ -294,7 +359,7 @@
 {
 	char *oli;				/* option letter list index */
 	int optchar, short_too;
-	static int posixly_correct = -1;
+	int posixly_correct;	/* no static, can be changed on the fly */
 
 	if (options == NULL)
 		return (-1);
@@ -303,12 +368,18 @@
 	 * Disable GNU extensions if POSIXLY_CORRECT is set or options
 	 * string begins with a '+'.
 	 */
-	if (posixly_correct == -1)
-		posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
+	posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
+#ifdef GNU_COMPATIBLE
+	if (*options == '-')
+		flags |= FLAG_ALLARGS;
+	else if (posixly_correct || *options == '+')
+		flags &= ~FLAG_PERMUTE;
+#else
 	if (posixly_correct || *options == '+')
 		flags &= ~FLAG_PERMUTE;
 	else if (*options == '-')
 		flags |= FLAG_ALLARGS;
+#endif
 	if (*options == '+' || *options == '-')
 		options++;
 
@@ -344,7 +415,11 @@
 			return (-1);
 		}
 		if (*(place = nargv[optind]) != '-' ||
+#ifdef GNU_COMPATIBLE
+		    place[1] == '\0') {
+#else
 		    (place[1] == '\0' && strchr(options, '-') == NULL)) {
+#endif
 			place = EMSG;		/* found non-option */
 			if (flags & FLAG_ALLARGS) {
 				/*
@@ -407,20 +482,26 @@
 	if (long_options != NULL && place != nargv[optind] &&
 	    (*place == '-' || (flags & FLAG_LONGONLY))) {
 		short_too = 0;
-		if (*place == '-')
+#ifdef GNU_COMPATIBLE
+		dash_prefix = D_PREFIX;
+#endif
+		if (*place == '-') {
 			place++;		/* --foo long option */
-		else if (*place != ':' && strchr(options, *place) != NULL)
+#ifdef GNU_COMPATIBLE
+			dash_prefix = DD_PREFIX;
+#endif
+		} else if (*place != ':' && strchr(options, *place) != NULL)
 			short_too = 1;		/* could be short option too */
 
 		optchar = parse_long_options(nargv, options, long_options,
-		    idx, short_too);
+		    idx, short_too, flags);
 		if (optchar != -1) {
 			place = EMSG;
 			return (optchar);
 		}
 	}
 
-	if (((optchar = (int)*place++) == (int)':') ||
+	if ((optchar = (int)*place++) == (int)':' ||
 	    (optchar == (int)'-' && *place != '\0') ||
 	    (oli = strchr(options, optchar)) == NULL) {
 		/*
@@ -432,8 +513,14 @@
 			return (-1);
 		if (!*place)
 			++optind;
+#ifdef GNU_COMPATIBLE
 		if (PRINT_ERROR)
-			fprintf(stderr, illoptchar, optchar);
+			warnx(posixly_correct ? illoptchar : gnuoptchar,
+			      optchar);
+#else
+		if (PRINT_ERROR)
+			warnx(illoptchar, optchar);
+#endif
 		optopt = optchar;
 		return (BADCH);
 	}
@@ -444,13 +531,16 @@
 		else if (++optind >= nargc) {	/* no arg */
 			place = EMSG;
 			if (PRINT_ERROR)
-				fprintf(stderr, recargchar, optchar);
+				warnx(recargchar, optchar);
 			optopt = optchar;
 			return (BADARG);
 		} else				/* white space */
 			place = nargv[optind];
+#ifdef GNU_COMPATIBLE
+		dash_prefix = W_PREFIX;
+#endif
 		optchar = parse_long_options(nargv, options, long_options,
-		    idx, 0);
+		    idx, 0, flags);
 		place = EMSG;
 		return (optchar);
 	}
@@ -461,24 +551,15 @@
 		optarg = NULL;
 		if (*place)			/* no white space */
 			optarg = place;
-		/* XXX: disable test for :: if PC? (GNU doesn't) */
 		else if (oli[1] != ':') {	/* arg not optional */
 			if (++optind >= nargc) {	/* no arg */
 				place = EMSG;
 				if (PRINT_ERROR)
-					fprintf(stderr, recargchar, optchar);
+					warnx(recargchar, optchar);
 				optopt = optchar;
 				return (BADARG);
 			} else
 				optarg = nargv[optind];
-		} else if (!(flags & FLAG_PERMUTE)) {
-			/*
-			 * If permutation is disabled, we can accept an
-			 * optional arg separated by whitespace so long
-			 * as it does not start with a dash (-).
-			 */
-			if (optind + 1 < nargc && *nargv[optind + 1] != '-')
-				optarg = nargv[++optind];
 		}
 		place = EMSG;
 		++optind;
@@ -516,7 +597,7 @@
  */
 int
 getopt_long(int nargc, char * const *nargv, const char *options,
-    const struct option *long_options, int *idx)
+	const struct option *long_options, int *idx)
 {
 
 	return (getopt_internal(nargc, nargv, options, long_options, idx,
@@ -529,7 +610,7 @@
  */
 int
 getopt_long_only(int nargc, char * const *nargv, const char *options,
-    const struct option *long_options, int *idx)
+	const struct option *long_options, int *idx)
 {
 
 	return (getopt_internal(nargc, nargv, options, long_options, idx,
diff --git a/libc/upstream-freebsd/lib/libc/stdlib/qsort.c b/libc/upstream-freebsd/lib/libc/stdlib/qsort.c
new file mode 100644
index 0000000..93e22cd
--- /dev/null
+++ b/libc/upstream-freebsd/lib/libc/stdlib/qsort.c
@@ -0,0 +1,195 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)qsort.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdlib.h>
+
+#ifdef I_AM_QSORT_R
+typedef int		 cmp_t(void *, const void *, const void *);
+#else
+typedef int		 cmp_t(const void *, const void *);
+#endif
+static inline char	*med3(char *, char *, char *, cmp_t *, void *);
+static inline void	 swapfunc(char *, char *, int, int);
+
+#define min(a, b)	(a) < (b) ? a : b
+
+/*
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
+ */
+#define swapcode(TYPE, parmi, parmj, n) { 		\
+	long i = (n) / sizeof (TYPE); 			\
+	TYPE *pi = (TYPE *) (parmi); 		\
+	TYPE *pj = (TYPE *) (parmj); 		\
+	do { 						\
+		TYPE	t = *pi;		\
+		*pi++ = *pj;				\
+		*pj++ = t;				\
+        } while (--i > 0);				\
+}
+
+#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
+	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
+
+static inline void
+swapfunc(a, b, n, swaptype)
+	char *a, *b;
+	int n, swaptype;
+{
+	if(swaptype <= 1)
+		swapcode(long, a, b, n)
+	else
+		swapcode(char, a, b, n)
+}
+
+#define swap(a, b)					\
+	if (swaptype == 0) {				\
+		long t = *(long *)(a);			\
+		*(long *)(a) = *(long *)(b);		\
+		*(long *)(b) = t;			\
+	} else						\
+		swapfunc(a, b, es, swaptype)
+
+#define vecswap(a, b, n) 	if ((n) > 0) swapfunc(a, b, n, swaptype)
+
+#ifdef I_AM_QSORT_R
+#define	CMP(t, x, y) (cmp((t), (x), (y)))
+#else
+#define	CMP(t, x, y) (cmp((x), (y)))
+#endif
+
+static inline char *
+med3(char *a, char *b, char *c, cmp_t *cmp, void *thunk
+#ifndef I_AM_QSORT_R
+__unused
+#endif
+)
+{
+	return CMP(thunk, a, b) < 0 ?
+	       (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
+              :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
+}
+
+#ifdef I_AM_QSORT_R
+void
+qsort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
+#else
+#define thunk NULL
+void
+qsort(void *a, size_t n, size_t es, cmp_t *cmp)
+#endif
+{
+	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+	size_t d, r;
+	int cmp_result;
+	int swaptype, swap_cnt;
+
+loop:	SWAPINIT(a, es);
+	swap_cnt = 0;
+	if (n < 7) {
+		for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+			for (pl = pm; 
+			     pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+			     pl -= es)
+				swap(pl, pl - es);
+		return;
+	}
+	pm = (char *)a + (n / 2) * es;
+	if (n > 7) {
+		pl = a;
+		pn = (char *)a + (n - 1) * es;
+		if (n > 40) {
+			d = (n / 8) * es;
+			pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
+			pm = med3(pm - d, pm, pm + d, cmp, thunk);
+			pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
+		}
+		pm = med3(pl, pm, pn, cmp, thunk);
+	}
+	swap(a, pm);
+	pa = pb = (char *)a + es;
+
+	pc = pd = (char *)a + (n - 1) * es;
+	for (;;) {
+		while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
+			if (cmp_result == 0) {
+				swap_cnt = 1;
+				swap(pa, pb);
+				pa += es;
+			}
+			pb += es;
+		}
+		while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
+			if (cmp_result == 0) {
+				swap_cnt = 1;
+				swap(pc, pd);
+				pd -= es;
+			}
+			pc -= es;
+		}
+		if (pb > pc)
+			break;
+		swap(pb, pc);
+		swap_cnt = 1;
+		pb += es;
+		pc -= es;
+	}
+	if (swap_cnt == 0) {  /* Switch to insertion sort */
+		for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+			for (pl = pm; 
+			     pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+			     pl -= es)
+				swap(pl, pl - es);
+		return;
+	}
+
+	pn = (char *)a + n * es;
+	r = min(pa - (char *)a, pb - pa);
+	vecswap(a, pb - r, r);
+	r = min(pd - pc, pn - pd - es);
+	vecswap(pb, pn - r, r);
+	if ((r = pb - pa) > es)
+#ifdef I_AM_QSORT_R
+		qsort_r(a, r / es, es, thunk, cmp);
+#else
+		qsort(a, r / es, es, cmp);
+#endif
+	if ((r = pd - pc) > es) {
+		/* Iterate rather than recurse to save stack space */
+		a = pn - r;
+		n = r / es;
+		goto loop;
+	}
+/*		qsort(pn - r, r / es, es, cmp);*/
+}
diff --git a/libc/upstream-freebsd/lib/libc/string/wcscmp.c b/libc/upstream-freebsd/lib/libc/string/wcscmp.c
index 2d48914..c2abe4c 100644
--- a/libc/upstream-freebsd/lib/libc/string/wcscmp.c
+++ b/libc/upstream-freebsd/lib/libc/string/wcscmp.c
@@ -13,7 +13,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
diff --git a/libc/upstream-freebsd/lib/libc/string/wcsncmp.c b/libc/upstream-freebsd/lib/libc/string/wcsncmp.c
index 86d7a51..8236d96 100644
--- a/libc/upstream-freebsd/lib/libc/string/wcsncmp.c
+++ b/libc/upstream-freebsd/lib/libc/string/wcsncmp.c
@@ -10,7 +10,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
diff --git a/libc/upstream-freebsd/lib/libc/string/wcsncpy.c b/libc/upstream-freebsd/lib/libc/string/wcsncpy.c
index 00d986b..215e9a1 100644
--- a/libc/upstream-freebsd/lib/libc/string/wcsncpy.c
+++ b/libc/upstream-freebsd/lib/libc/string/wcsncpy.c
@@ -13,7 +13,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
diff --git a/libc/upstream-freebsd/lib/libc/string/wcsstr.c b/libc/upstream-freebsd/lib/libc/string/wcsstr.c
index a9dc27b..ce598a6 100644
--- a/libc/upstream-freebsd/lib/libc/string/wcsstr.c
+++ b/libc/upstream-freebsd/lib/libc/string/wcsstr.c
@@ -13,7 +13,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
diff --git a/libc/upstream-freebsd/lib/libc/string/wcstok.c b/libc/upstream-freebsd/lib/libc/string/wcstok.c
index 5a77117..441fbd4 100644
--- a/libc/upstream-freebsd/lib/libc/string/wcstok.c
+++ b/libc/upstream-freebsd/lib/libc/string/wcstok.c
@@ -15,7 +15,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notices, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
diff --git a/libc/upstream-freebsd/libc_private.h b/libc/upstream-freebsd/libc_private.h
new file mode 100644
index 0000000..ecdbb7e
--- /dev/null
+++ b/libc/upstream-freebsd/libc_private.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _BIONIC_FREEBSD_LIBC_PRIVATE_H_included
+#define _BIONIC_FREEBSD_LIBC_PRIVATE_H_included
+
+#define FLOCKFILE(fp)   do { if (__isthreaded) flockfile(fp); } while (0)
+#define FUNLOCKFILE(fp) do { if (__isthreaded) funlockfile(fp); } while (0)
+
+#define STDIO_THREAD_LOCK()   /* TODO: until we have the FreeBSD findfp.c, this is useless. */
+#define STDIO_THREAD_UNLOCK() /* TODO: until we have the FreeBSD findfp.c, this is useless. */
+
+#define ORIENT(fp, o) /* Only needed for wide-character stream support. */
+
+#endif
diff --git a/libc/upstream-freebsd/namespace.h b/libc/upstream-freebsd/namespace.h
index a3f850e..a980b57 100644
--- a/libc/upstream-freebsd/namespace.h
+++ b/libc/upstream-freebsd/namespace.h
@@ -17,4 +17,6 @@
 #ifndef _BIONIC_FREEBSD_NAMESPACE_H_included
 #define _BIONIC_FREEBSD_NAMESPACE_H_included
 
+__attribute__((visibility("hidden"))) char* _mktemp(char*);
+
 #endif
diff --git a/libc/upstream-freebsd/spinlock.h b/libc/upstream-freebsd/spinlock.h
new file mode 100644
index 0000000..f5c3785
--- /dev/null
+++ b/libc/upstream-freebsd/spinlock.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _BIONIC_FREEBSD_SPINLOCK_H_included
+#define _BIONIC_FREEBSD_SPINLOCK_H_included
+
+/* TODO: until we have the FreeBSD findfp.c, this is useless. */
+
+#endif
diff --git a/libc/upstream-netbsd/port_before.h b/libc/upstream-netbsd/port_before.h
index 70eed26..9f77f79 100644
--- a/libc/upstream-netbsd/port_before.h
+++ b/libc/upstream-netbsd/port_before.h
@@ -21,7 +21,7 @@
 #include <sys/cdefs.h>
 #include <arpa_nameser.h>
 
-#define ISC_FORMAT_PRINTF(a,b) __attribute__((__format__(__printf__,a,b)))
+#define ISC_FORMAT_PRINTF(a,b) __printflike(a,b)
 #define ISC_SOCKLEN_T socklen_t
 
 #endif
diff --git a/libm/upstream-freebsd/lib/msun/src/e_acosh.c b/libm/upstream-freebsd/lib/msun/src/e_acosh.c
index a0cc6cb..358c8bd 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_acosh.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_acosh.c
@@ -29,6 +29,8 @@
  *	acosh(NaN) is NaN without signal.
  */
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 
@@ -60,3 +62,7 @@
 	    return log1p(t+sqrt(2.0*t+t*t));
 	}
 }
+
+#if LDBL_MANT_DIG == 53
+__weak_reference(acosh, acoshl);
+#endif
diff --git a/libm/upstream-freebsd/lib/msun/src/e_atanh.c b/libm/upstream-freebsd/lib/msun/src/e_atanh.c
index ab8a2e1..422ff26 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_atanh.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_atanh.c
@@ -33,6 +33,8 @@
  *
  */
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 
@@ -60,3 +62,7 @@
 	    t = 0.5*log1p((x+x)/(one-x));
 	if(hx>=0) return t; else return -t;
 }
+
+#if LDBL_MANT_DIG == 53
+__weak_reference(atanh, atanhl);
+#endif
diff --git a/libm/upstream-freebsd/lib/msun/src/e_exp.c b/libm/upstream-freebsd/lib/msun/src/e_exp.c
index e432bc8..94c9769 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_exp.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_exp.c
@@ -84,7 +84,6 @@
 static const double
 one	= 1.0,
 halF[2]	= {0.5,-0.5,},
-huge	= 1.0e+300,
 o_threshold=  7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
 u_threshold= -7.45133219101941108420e+02,  /* 0xc0874910, 0xD52D3051 */
 ln2HI[2]   ={ 6.93147180369123816490e-01,  /* 0x3fe62e42, 0xfee00000 */
@@ -99,6 +98,7 @@
 P5   =  4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
 
 static volatile double
+huge	= 1.0e+300,
 twom1000= 9.33263618503218878990e-302;     /* 2**-1000=0x01700000,0*/
 
 double
diff --git a/libm/upstream-freebsd/lib/msun/src/e_expf.c b/libm/upstream-freebsd/lib/msun/src/e_expf.c
index a479076..b1fe2c5 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_expf.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_expf.c
@@ -24,7 +24,6 @@
 static const float
 one	= 1.0,
 halF[2]	= {0.5,-0.5,},
-huge	= 1.0e+30,
 o_threshold=  8.8721679688e+01,  /* 0x42b17180 */
 u_threshold= -1.0397208405e+02,  /* 0xc2cff1b5 */
 ln2HI[2]   ={ 6.9314575195e-01,		/* 0x3f317200 */
@@ -39,7 +38,9 @@
 P1 =  1.6666625440e-1,		/*  0xaaaa8f.0p-26 */
 P2 = -2.7667332906e-3;		/* -0xb55215.0p-32 */
 
-static volatile float twom100 = 7.8886090522e-31;      /* 2**-100=0x0d800000 */
+static volatile float
+huge	= 1.0e+30,
+twom100 = 7.8886090522e-31;      /* 2**-100=0x0d800000 */
 
 float
 __ieee754_expf(float x)
diff --git a/libm/upstream-freebsd/lib/msun/src/e_log.c b/libm/upstream-freebsd/lib/msun/src/e_log.c
index 204fb48..68bc107 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_log.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_log.c
@@ -65,6 +65,8 @@
  * to produce the hexadecimal values shown.
  */
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 
@@ -81,6 +83,7 @@
 Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
 
 static const double zero   =  0.0;
+static volatile double vzero = 0.0;
 
 double
 __ieee754_log(double x)
@@ -94,7 +97,7 @@
 	k=0;
 	if (hx < 0x00100000) {			/* x < 2**-1022  */
 	    if (((hx&0x7fffffff)|lx)==0) 
-		return -two54/zero;		/* log(+-0)=-inf */
+		return -two54/vzero;		/* log(+-0)=-inf */
 	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
 	    k -= 54; x *= two54; /* subnormal number, scale up x */
 	    GET_HIGH_WORD(hx,x);
@@ -138,3 +141,7 @@
 		     return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
 	}
 }
+
+#if (LDBL_MANT_DIG == 53)
+__weak_reference(log, logl);
+#endif
diff --git a/libm/upstream-freebsd/lib/msun/src/e_log10.c b/libm/upstream-freebsd/lib/msun/src/e_log10.c
index 104d257..3c89ed2 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_log10.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_log10.c
@@ -22,6 +22,8 @@
  * in not-quite-routine extra precision.
  */
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 #include "k_log.h"
@@ -34,6 +36,7 @@
 log10_2lo  =  3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
 
 static const double zero   =  0.0;
+static volatile double vzero = 0.0;
 
 double
 __ieee754_log10(double x)
@@ -47,7 +50,7 @@
 	k=0;
 	if (hx < 0x00100000) {			/* x < 2**-1022  */
 	    if (((hx&0x7fffffff)|lx)==0)
-		return -two54/zero;		/* log(+-0)=-inf */
+		return -two54/vzero;		/* log(+-0)=-inf */
 	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
 	    k -= 54; x *= two54; /* subnormal number, scale up x */
 	    GET_HIGH_WORD(hx,x);
@@ -85,3 +88,7 @@
 
 	return val_lo + val_hi;
 }
+
+#if (LDBL_MANT_DIG == 53)
+__weak_reference(log10, log10l);
+#endif
diff --git a/libm/upstream-freebsd/lib/msun/src/e_log10f.c b/libm/upstream-freebsd/lib/msun/src/e_log10f.c
index c876594..9856df2 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_log10f.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_log10f.c
@@ -28,6 +28,7 @@
 log10_2lo  =  7.9034151668e-07; /* 0x355427db */
 
 static const float zero   =  0.0;
+static volatile float vzero = 0.0;
 
 float
 __ieee754_log10f(float x)
@@ -40,7 +41,7 @@
 	k=0;
 	if (hx < 0x00800000) {			/* x < 2**-126  */
 	    if ((hx&0x7fffffff)==0)
-		return -two25/zero;		/* log(+-0)=-inf */
+		return -two25/vzero;		/* log(+-0)=-inf */
 	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
 	    k -= 25; x *= two25; /* subnormal number, scale up x */
 	    GET_FLOAT_WORD(hx,x);
diff --git a/libm/upstream-freebsd/lib/msun/src/e_log2.c b/libm/upstream-freebsd/lib/msun/src/e_log2.c
index 1fc44a5..4766cdb 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_log2.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_log2.c
@@ -24,6 +24,8 @@
  * in not-quite-routine extra precision.
  */
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 #include "k_log.h"
@@ -34,6 +36,7 @@
 ivln2lo    =  1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
 
 static const double zero   =  0.0;
+static volatile double vzero = 0.0;
 
 double
 __ieee754_log2(double x)
@@ -47,7 +50,7 @@
 	k=0;
 	if (hx < 0x00100000) {			/* x < 2**-1022  */
 	    if (((hx&0x7fffffff)|lx)==0)
-		return -two54/zero;		/* log(+-0)=-inf */
+		return -two54/vzero;		/* log(+-0)=-inf */
 	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
 	    k -= 54; x *= two54; /* subnormal number, scale up x */
 	    GET_HIGH_WORD(hx,x);
@@ -108,3 +111,7 @@
 
 	return val_lo + val_hi;
 }
+
+#if (LDBL_MANT_DIG == 53)
+__weak_reference(log2, log2l);
+#endif
diff --git a/libm/upstream-freebsd/lib/msun/src/e_log2f.c b/libm/upstream-freebsd/lib/msun/src/e_log2f.c
index 7166346..1794484 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_log2f.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_log2f.c
@@ -26,6 +26,7 @@
 ivln2lo    = -1.7605285393e-04; /* 0xb9389ad4 */
 
 static const float zero   =  0.0;
+static volatile float vzero = 0.0;
 
 float
 __ieee754_log2f(float x)
@@ -38,7 +39,7 @@
 	k=0;
 	if (hx < 0x00800000) {			/* x < 2**-126  */
 	    if ((hx&0x7fffffff)==0)
-		return -two25/zero;		/* log(+-0)=-inf */
+		return -two25/vzero;		/* log(+-0)=-inf */
 	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
 	    k -= 25; x *= two25; /* subnormal number, scale up x */
 	    GET_FLOAT_WORD(hx,x);
diff --git a/libm/upstream-freebsd/lib/msun/src/e_logf.c b/libm/upstream-freebsd/lib/msun/src/e_logf.c
index c3be6ed..ec3985f 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_logf.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_logf.c
@@ -30,6 +30,7 @@
 Lg4 =      0xf89e26.0p-26;	/* 0.24279078841 */
 
 static const float zero   =  0.0;
+static volatile float vzero = 0.0;
 
 float
 __ieee754_logf(float x)
@@ -42,7 +43,7 @@
 	k=0;
 	if (ix < 0x00800000) {			/* x < 2**-126  */
 	    if ((ix&0x7fffffff)==0)
-		return -two25/zero;		/* log(+-0)=-inf */
+		return -two25/vzero;		/* log(+-0)=-inf */
 	    if (ix<0) return (x-x)/zero;	/* log(-#) = NaN */
 	    k -= 25; x *= two25; /* subnormal number, scale up x */
 	    GET_FLOAT_WORD(ix,x);
diff --git a/libm/upstream-freebsd/lib/msun/src/math_private.h b/libm/upstream-freebsd/lib/msun/src/math_private.h
index 5662df0..8ebc7fb 100644
--- a/libm/upstream-freebsd/lib/msun/src/math_private.h
+++ b/libm/upstream-freebsd/lib/msun/src/math_private.h
@@ -188,6 +188,33 @@
   (d) = sf_u.value;						\
 } while (0)
 
+/*
+ * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
+ * double.
+ */
+
+#define	EXTRACT_LDBL80_WORDS(ix0,ix1,d)				\
+do {								\
+  union IEEEl2bits ew_u;					\
+  ew_u.e = (d);							\
+  (ix0) = ew_u.xbits.expsign;					\
+  (ix1) = ew_u.xbits.man;					\
+} while (0)
+
+/*
+ * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
+ * long double.
+ */
+
+#define	EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d)			\
+do {								\
+  union IEEEl2bits ew_u;					\
+  ew_u.e = (d);							\
+  (ix0) = ew_u.xbits.expsign;					\
+  (ix1) = ew_u.xbits.manh;					\
+  (ix2) = ew_u.xbits.manl;					\
+} while (0)
+
 /* Get expsign as a 16 bit int from a long double.  */
 
 #define	GET_LDBL_EXPSIGN(i,d)					\
@@ -197,6 +224,33 @@
   (i) = ge_u.xbits.expsign;					\
 } while (0)
 
+/*
+ * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
+ * mantissa.
+ */
+
+#define	INSERT_LDBL80_WORDS(d,ix0,ix1)				\
+do {								\
+  union IEEEl2bits iw_u;					\
+  iw_u.xbits.expsign = (ix0);					\
+  iw_u.xbits.man = (ix1);					\
+  (d) = iw_u.e;							\
+} while (0)
+
+/*
+ * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
+ * comprising the mantissa.
+ */
+
+#define	INSERT_LDBL128_WORDS(d,ix0,ix1,ix2)			\
+do {								\
+  union IEEEl2bits iw_u;					\
+  iw_u.xbits.expsign = (ix0);					\
+  iw_u.xbits.manh = (ix1);					\
+  iw_u.xbits.manl = (ix2);					\
+  (d) = iw_u.e;							\
+} while (0)
+
 /* Set expsign of a long double from a 16 bit int.  */
 
 #define	SET_LDBL_EXPSIGN(d,v)					\
@@ -261,6 +315,110 @@
 #define      RETURNF(v)      return (v)
 
 /*
+ * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
+ * a == 0, but is slower.
+ */
+#define	_2sum(a, b) do {	\
+	__typeof(a) __s, __w;	\
+				\
+	__w = (a) + (b);	\
+	__s = __w - (a);	\
+	(b) = ((a) - (__w - __s)) + ((b) - __s); \
+	(a) = __w;		\
+} while (0)
+
+/*
+ * 2sumF algorithm.
+ *
+ * "Normalize" the terms in the infinite-precision expression a + b for
+ * the sum of 2 floating point values so that b is as small as possible
+ * relative to 'a'.  (The resulting 'a' is the value of the expression in
+ * the same precision as 'a' and the resulting b is the rounding error.)
+ * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
+ * exponent overflow or underflow must not occur.  This uses a Theorem of
+ * Dekker (1971).  See Knuth (1981) 4.2.2 Theorem C.  The name "TwoSum"
+ * is apparently due to Skewchuk (1997).
+ *
+ * For this to always work, assignment of a + b to 'a' must not retain any
+ * extra precision in a + b.  This is required by C standards but broken
+ * in many compilers.  The brokenness cannot be worked around using
+ * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
+ * algorithm would be destroyed by non-null strict assignments.  (The
+ * compilers are correct to be broken -- the efficiency of all floating
+ * point code calculations would be destroyed similarly if they forced the
+ * conversions.)
+ *
+ * Fortunately, a case that works well can usually be arranged by building
+ * any extra precision into the type of 'a' -- 'a' should have type float_t,
+ * double_t or long double.  b's type should be no larger than 'a's type.
+ * Callers should use these types with scopes as large as possible, to
+ * reduce their own extra-precision and efficiciency problems.  In
+ * particular, they shouldn't convert back and forth just to call here.
+ */
+#ifdef DEBUG
+#define	_2sumF(a, b) do {				\
+	__typeof(a) __w;				\
+	volatile __typeof(a) __ia, __ib, __r, __vw;	\
+							\
+	__ia = (a);					\
+	__ib = (b);					\
+	assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib));	\
+							\
+	__w = (a) + (b);				\
+	(b) = ((a) - __w) + (b);			\
+	(a) = __w;					\
+							\
+	/* The next 2 assertions are weak if (a) is already long double. */ \
+	assert((long double)__ia + __ib == (long double)(a) + (b));	\
+	__vw = __ia + __ib;				\
+	__r = __ia - __vw;				\
+	__r += __ib;					\
+	assert(__vw == (a) && __r == (b));		\
+} while (0)
+#else /* !DEBUG */
+#define	_2sumF(a, b) do {	\
+	__typeof(a) __w;	\
+				\
+	__w = (a) + (b);	\
+	(b) = ((a) - __w) + (b); \
+	(a) = __w;		\
+} while (0)
+#endif /* DEBUG */
+
+/*
+ * Set x += c, where x is represented in extra precision as a + b.
+ * x must be sufficiently normalized and sufficiently larger than c,
+ * and the result is then sufficiently normalized.
+ *
+ * The details of ordering are that |a| must be >= |c| (so that (a, c)
+ * can be normalized without extra work to swap 'a' with c).  The details of
+ * the normalization are that b must be small relative to the normalized 'a'.
+ * Normalization of (a, c) makes the normalized c tiny relative to the
+ * normalized a, so b remains small relative to 'a' in the result.  However,
+ * b need not ever be tiny relative to 'a'.  For example, b might be about
+ * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
+ * That is usually enough, and adding c (which by normalization is about
+ * 2**53 times smaller than a) cannot change b significantly.  However,
+ * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
+ * significantly relative to b.  The caller must ensure that significant
+ * cancellation doesn't occur, either by having c of the same sign as 'a',
+ * or by having |c| a few percent smaller than |a|.  Pre-normalization of
+ * (a, b) may help.
+ *
+ * This is is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
+ * exercise 19).  We gain considerable efficiency by requiring the terms to
+ * be sufficiently normalized and sufficiently increasing.
+ */
+#define	_3sumF(a, b, c) do {	\
+	__typeof(a) __tmp;	\
+				\
+	__tmp = (c);		\
+	_2sumF(__tmp, (a));	\
+	(b) += (a);		\
+	(a) = __tmp;		\
+} while (0)
+
+/*
  * Common routine to process the arguments to nan(), nanf(), and nanl().
  */
 void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
@@ -370,6 +528,140 @@
 
 #endif /* __GNUCLIKE_ASM */
 
+#ifdef DEBUG
+#if defined(__amd64__) || defined(__i386__)
+#define	breakpoint()	asm("int $3")
+#else
+#include <signal.h>
+
+#define	breakpoint()	raise(SIGTRAP)
+#endif
+#endif
+
+/* Write a pari script to test things externally. */
+#ifdef DOPRINT
+#include <stdio.h>
+
+#ifndef DOPRINT_SWIZZLE
+#define	DOPRINT_SWIZZLE		0
+#endif
+
+#ifdef DOPRINT_LD80
+
+#define	DOPRINT_START(xp) do {						\
+	uint64_t __lx;							\
+	uint16_t __hx;							\
+									\
+	/* Hack to give more-problematic args. */			\
+	EXTRACT_LDBL80_WORDS(__hx, __lx, *xp);				\
+	__lx ^= DOPRINT_SWIZZLE;					\
+	INSERT_LDBL80_WORDS(*xp, __hx, __lx);				\
+	printf("x = %.21Lg; ", (long double)*xp);			\
+} while (0)
+#define	DOPRINT_END1(v)							\
+	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
+#define	DOPRINT_END2(hi, lo)						\
+	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
+	    (long double)(hi), (long double)(lo))
+
+#elif defined(DOPRINT_D64)
+
+#define	DOPRINT_START(xp) do {						\
+	uint32_t __hx, __lx;						\
+									\
+	EXTRACT_WORDS(__hx, __lx, *xp);					\
+	__lx ^= DOPRINT_SWIZZLE;					\
+	INSERT_WORDS(*xp, __hx, __lx);					\
+	printf("x = %.21Lg; ", (long double)*xp);			\
+} while (0)
+#define	DOPRINT_END1(v)							\
+	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
+#define	DOPRINT_END2(hi, lo)						\
+	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
+	    (long double)(hi), (long double)(lo))
+
+#elif defined(DOPRINT_F32)
+
+#define	DOPRINT_START(xp) do {						\
+	uint32_t __hx;							\
+									\
+	GET_FLOAT_WORD(__hx, *xp);					\
+	__hx ^= DOPRINT_SWIZZLE;					\
+	SET_FLOAT_WORD(*xp, __hx);					\
+	printf("x = %.21Lg; ", (long double)*xp);			\
+} while (0)
+#define	DOPRINT_END1(v)							\
+	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
+#define	DOPRINT_END2(hi, lo)						\
+	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n",		\
+	    (long double)(hi), (long double)(lo))
+
+#else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */
+
+#ifndef DOPRINT_SWIZZLE_HIGH
+#define	DOPRINT_SWIZZLE_HIGH	0
+#endif
+
+#define	DOPRINT_START(xp) do {						\
+	uint64_t __lx, __llx;						\
+	uint16_t __hx;							\
+									\
+	EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp);			\
+	__llx ^= DOPRINT_SWIZZLE;					\
+	__lx ^= DOPRINT_SWIZZLE_HIGH;					\
+	INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx);			\
+	printf("x = %.36Lg; ", (long double)*xp);					\
+} while (0)
+#define	DOPRINT_END1(v)							\
+	printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v))
+#define	DOPRINT_END2(hi, lo)						\
+	printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n",		\
+	    (long double)(hi), (long double)(lo))
+
+#endif /* DOPRINT_LD80 */
+
+#else /* !DOPRINT */
+#define	DOPRINT_START(xp)
+#define	DOPRINT_END1(v)
+#define	DOPRINT_END2(hi, lo)
+#endif /* DOPRINT */
+
+#define	RETURNP(x) do {			\
+	DOPRINT_END1(x);		\
+	RETURNF(x);			\
+} while (0)
+#define	RETURNPI(x) do {		\
+	DOPRINT_END1(x);		\
+	RETURNI(x);			\
+} while (0)
+#define	RETURN2P(x, y) do {		\
+	DOPRINT_END2((x), (y));		\
+	RETURNF((x) + (y));		\
+} while (0)
+#define	RETURN2PI(x, y) do {		\
+	DOPRINT_END2((x), (y));		\
+	RETURNI((x) + (y));		\
+} while (0)
+#ifdef STRUCT_RETURN
+#define	RETURNSP(rp) do {		\
+	if (!(rp)->lo_set)		\
+		RETURNP((rp)->hi);	\
+	RETURN2P((rp)->hi, (rp)->lo);	\
+} while (0)
+#define	RETURNSPI(rp) do {		\
+	if (!(rp)->lo_set)		\
+		RETURNPI((rp)->hi);	\
+	RETURN2PI((rp)->hi, (rp)->lo);	\
+} while (0)
+#endif
+#define	SUM2P(x, y) ({			\
+	const __typeof (x) __x = (x);	\
+	const __typeof (y) __y = (y);	\
+					\
+	DOPRINT_END2(__x, __y);		\
+	__x + __y;			\
+})
+
 /*
  * ieee style elementary functions
  *
diff --git a/libm/upstream-freebsd/lib/msun/src/s_asinh.c b/libm/upstream-freebsd/lib/msun/src/s_asinh.c
index f3fdf74..cbb3d46 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_asinh.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_asinh.c
@@ -24,6 +24,8 @@
  *		 := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
  */
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 
@@ -54,3 +56,7 @@
 	}
 	if(hx>0) return w; else return -w;
 }
+
+#if LDBL_MANT_DIG == 53
+__weak_reference(asinh, asinhl);
+#endif
diff --git a/libm/upstream-freebsd/lib/msun/src/s_exp2.c b/libm/upstream-freebsd/lib/msun/src/s_exp2.c
index 485b4e3..fde11c2 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_exp2.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_exp2.c
@@ -36,7 +36,6 @@
 #define	TBLSIZE	(1 << TBLBITS)
 
 static const double
-    huge     = 0x1p1000,
     redux    = 0x1.8p52 / TBLSIZE,
     P1	     = 0x1.62e42fefa39efp-1,
     P2	     = 0x1.ebfbdff82c575p-3,
@@ -44,7 +43,9 @@
     P4	     = 0x1.3b2ab88f70400p-7,
     P5	     = 0x1.5d88003875c74p-10;
 
-static volatile double twom1000 = 0x1p-1000;
+static volatile double
+    huge     = 0x1p1000,
+    twom1000 = 0x1p-1000;
 
 static const double tbl[TBLSIZE * 2] = {
 /*	exp2(z + eps)		eps	*/
diff --git a/libm/upstream-freebsd/lib/msun/src/s_exp2f.c b/libm/upstream-freebsd/lib/msun/src/s_exp2f.c
index 0a97bf6..9ac7c1f 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_exp2f.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_exp2f.c
@@ -36,14 +36,15 @@
 #define	TBLSIZE	(1 << TBLBITS)
 
 static const float
-    huge    = 0x1p100f,
     redux   = 0x1.8p23f / TBLSIZE,
     P1	    = 0x1.62e430p-1f,
     P2	    = 0x1.ebfbe0p-3f,
     P3	    = 0x1.c6b348p-5f,
     P4	    = 0x1.3b2c9cp-7f;
 
-static volatile float twom100 = 0x1p-100f;
+static volatile float
+    huge    = 0x1p100f,
+    twom100 = 0x1p-100f;
 
 static const double exp2ft[TBLSIZE] = {
 	0x1.6a09e667f3bcdp-1,
diff --git a/libm/upstream-freebsd/lib/msun/src/s_expm1.c b/libm/upstream-freebsd/lib/msun/src/s_expm1.c
index 5aa1917..37998a3 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_expm1.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_expm1.c
@@ -115,7 +115,6 @@
 
 static const double
 one		= 1.0,
-huge		= 1.0e+300,
 tiny		= 1.0e-300,
 o_threshold	= 7.09782712893383973096e+02,/* 0x40862E42, 0xFEFA39EF */
 ln2_hi		= 6.93147180369123816490e-01,/* 0x3fe62e42, 0xfee00000 */
@@ -128,6 +127,8 @@
 Q4  =   4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
 Q5  =  -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
 
+static volatile double huge = 1.0e+300;
+
 double
 expm1(double x)
 {
@@ -215,3 +216,7 @@
 	}
 	return y;
 }
+
+#if (LDBL_MANT_DIG == 53)
+__weak_reference(expm1, expm1l);
+#endif
diff --git a/libm/upstream-freebsd/lib/msun/src/s_expm1f.c b/libm/upstream-freebsd/lib/msun/src/s_expm1f.c
index fb37494..c0a3934 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_expm1f.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_expm1f.c
@@ -23,7 +23,6 @@
 
 static const float
 one		= 1.0,
-huge		= 1.0e+30,
 tiny		= 1.0e-30,
 o_threshold	= 8.8721679688e+01,/* 0x42b17180 */
 ln2_hi		= 6.9313812256e-01,/* 0x3f317180 */
@@ -37,6 +36,8 @@
 Q1 = -3.3333212137e-2,		/* -0x888868.0p-28 */
 Q2 =  1.5807170421e-3;		/*  0xcf3010.0p-33 */
 
+static volatile float huge = 1.0e+30;
+
 float
 expm1f(float x)
 {
diff --git a/libm/upstream-freebsd/lib/msun/src/s_fma.c b/libm/upstream-freebsd/lib/msun/src/s_fma.c
index dfbd13c..452bece 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_fma.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_fma.c
@@ -238,6 +238,8 @@
 		zs = copysign(DBL_MIN, zs);
 
 	fesetround(FE_TONEAREST);
+	/* work around clang bug 8100 */
+	volatile double vxs = xs;
 
 	/*
 	 * Basic approach for round-to-nearest:
@@ -247,7 +249,7 @@
 	 *     adj = xy.lo + r.lo		(inexact; low bit is sticky)
 	 *     result = r.hi + adj		(correctly rounded)
 	 */
-	xy = dd_mul(xs, ys);
+	xy = dd_mul(vxs, ys);
 	r = dd_add(xy.hi, zs);
 
 	spread = ex + ey;
@@ -268,7 +270,9 @@
 		 * rounding modes.
 		 */
 		fesetround(oround);
-		adj = r.lo + xy.lo;
+		/* work around clang bug 8100 */
+		volatile double vrlo = r.lo;
+		adj = vrlo + xy.lo;
 		return (ldexp(r.hi + adj, spread));
 	}
 
diff --git a/libm/upstream-freebsd/lib/msun/src/s_fmal.c b/libm/upstream-freebsd/lib/msun/src/s_fmal.c
index c2a6913..9271901 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_fmal.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_fmal.c
@@ -226,6 +226,8 @@
 		zs = copysignl(LDBL_MIN, zs);
 
 	fesetround(FE_TONEAREST);
+	/* work around clang bug 8100 */
+	volatile long double vxs = xs;
 
 	/*
 	 * Basic approach for round-to-nearest:
@@ -235,7 +237,7 @@
 	 *     adj = xy.lo + r.lo		(inexact; low bit is sticky)
 	 *     result = r.hi + adj		(correctly rounded)
 	 */
-	xy = dd_mul(xs, ys);
+	xy = dd_mul(vxs, ys);
 	r = dd_add(xy.hi, zs);
 
 	spread = ex + ey;
@@ -256,7 +258,9 @@
 		 * rounding modes.
 		 */
 		fesetround(oround);
-		adj = r.lo + xy.lo;
+		/* work around clang bug 8100 */
+		volatile long double vrlo = r.lo;
+		adj = vrlo + xy.lo;
 		return (ldexpl(r.hi + adj, spread));
 	}
 
diff --git a/libm/upstream-freebsd/lib/msun/src/s_log1p.c b/libm/upstream-freebsd/lib/msun/src/s_log1p.c
index b062a8a..3cc77bd 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_log1p.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_log1p.c
@@ -96,6 +96,7 @@
 Lp7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
 
 static const double zero = 0.0;
+static volatile double vzero = 0.0;
 
 double
 log1p(double x)
@@ -109,7 +110,7 @@
 	k = 1;
 	if (hx < 0x3FDA827A) {			/* 1+x < sqrt(2)+ */
 	    if(ax>=0x3ff00000) {		/* x <= -1.0 */
-		if(x==-1.0) return -two54/zero; /* log1p(-1)=+inf */
+		if(x==-1.0) return -two54/vzero; /* log1p(-1)=+inf */
 		else return (x-x)/(x-x);	/* log1p(x<-1)=NaN */
 	    }
 	    if(ax<0x3e200000) {			/* |x| < 2**-29 */
@@ -173,3 +174,7 @@
 	if(k==0) return f-(hfsq-s*(hfsq+R)); else
 		 return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
 }
+
+#if (LDBL_MANT_DIG == 53)
+__weak_reference(log1p, log1pl);
+#endif
diff --git a/libm/upstream-freebsd/lib/msun/src/s_log1pf.c b/libm/upstream-freebsd/lib/msun/src/s_log1pf.c
index 01d3457..df04c67 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_log1pf.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_log1pf.c
@@ -34,6 +34,7 @@
 Lp7 = 1.4798198640e-01; /* 3E178897 */
 
 static const float zero = 0.0;
+static volatile float vzero = 0.0;
 
 float
 log1pf(float x)
@@ -47,7 +48,7 @@
 	k = 1;
 	if (hx < 0x3ed413d0) {			/* 1+x < sqrt(2)+  */
 	    if(ax>=0x3f800000) {		/* x <= -1.0 */
-		if(x==(float)-1.0) return -two25/zero; /* log1p(-1)=+inf */
+		if(x==(float)-1.0) return -two25/vzero; /* log1p(-1)=+inf */
 		else return (x-x)/(x-x);	/* log1p(x<-1)=NaN */
 	    }
 	    if(ax<0x38000000) {			/* |x| < 2**-15 */
diff --git a/libm/upstream-freebsd/lib/msun/src/s_nearbyint.c b/libm/upstream-freebsd/lib/msun/src/s_nearbyint.c
index 12493d2..063f8d7 100644
--- a/libm/upstream-freebsd/lib/msun/src/s_nearbyint.c
+++ b/libm/upstream-freebsd/lib/msun/src/s_nearbyint.c
@@ -36,12 +36,16 @@
  * instead of feclearexcept()/feupdateenv() to restore the environment
  * because the only exception defined for rint() is overflow, and
  * rounding can't overflow as long as emax >= p.
+ *
+ * The volatile keyword is needed below because clang incorrectly assumes
+ * that rint won't raise any floating-point exceptions. Declaring ret volatile
+ * is sufficient to trick the compiler into doing the right thing.
  */
 #define	DECL(type, fn, rint)	\
 type				\
 fn(type x)			\
 {				\
-	type ret;		\
+	volatile type ret;	\
 	fenv_t env;		\
 				\
 	fegetenv(&env);		\
diff --git a/linker/debugger.cpp b/linker/debugger.cpp
index a7c0591..d72aa39 100644
--- a/linker/debugger.cpp
+++ b/linker/debugger.cpp
@@ -28,14 +28,15 @@
 
 #include "linker.h"
 
+#include <errno.h>
+#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
+#include <sys/mman.h>
 #include <sys/prctl.h>
-#include <errno.h>
 #include <sys/socket.h>
 #include <sys/un.h>
+#include <unistd.h>
 
 extern "C" int tgkill(int tgid, int tid, int sig);
 
@@ -109,7 +110,7 @@
  * mutex is being held, so we don't want to use any libc functions that
  * could allocate memory or hold a lock.
  */
-static void logSignalSummary(int signum, const siginfo_t* info) {
+static void log_signal_summary(int signum, const siginfo_t* info) {
     const char* signal_name;
     switch (signum) {
         case SIGILL:    signal_name = "SIGILL";     break;
@@ -149,26 +150,26 @@
 /*
  * Returns true if the handler for signal "signum" has SA_SIGINFO set.
  */
-static bool haveSiginfo(int signum) {
-    struct sigaction oldact, newact;
+static bool have_siginfo(int signum) {
+    struct sigaction old_action, new_action;
 
-    memset(&newact, 0, sizeof(newact));
-    newact.sa_handler = SIG_DFL;
-    newact.sa_flags = SA_RESTART;
-    sigemptyset(&newact.sa_mask);
+    memset(&new_action, 0, sizeof(new_action));
+    new_action.sa_handler = SIG_DFL;
+    new_action.sa_flags = SA_RESTART;
+    sigemptyset(&new_action.sa_mask);
 
-    if (sigaction(signum, &newact, &oldact) < 0) {
+    if (sigaction(signum, &new_action, &old_action) < 0) {
       __libc_format_log(ANDROID_LOG_WARN, "libc", "Failed testing for SA_SIGINFO: %s",
                         strerror(errno));
       return false;
     }
-    bool ret = (oldact.sa_flags & SA_SIGINFO) != 0;
+    bool result = (old_action.sa_flags & SA_SIGINFO) != 0;
 
-    if (sigaction(signum, &oldact, NULL) == -1) {
+    if (sigaction(signum, &old_action, NULL) == -1) {
       __libc_format_log(ANDROID_LOG_WARN, "libc", "Restore failed in test for SA_SIGINFO: %s",
                         strerror(errno));
     }
-    return ret;
+    return result;
 }
 
 /*
@@ -180,11 +181,11 @@
      * It's possible somebody cleared the SA_SIGINFO flag, which would mean
      * our "info" arg holds an undefined value.
      */
-    if (!haveSiginfo(n)) {
+    if (!have_siginfo(n)) {
         info = NULL;
     }
 
-    logSignalSummary(n, info);
+    log_signal_summary(n, info);
 
     pid_t tid = gettid();
     int s = socket_abstract_client(DEBUGGER_SOCKET_NAME, SOCK_STREAM);
@@ -245,19 +246,23 @@
 }
 
 void debuggerd_init() {
-    struct sigaction act;
-    memset(&act, 0, sizeof(act));
-    act.sa_sigaction = debuggerd_signal_handler;
-    act.sa_flags = SA_RESTART | SA_SIGINFO;
-    sigemptyset(&act.sa_mask);
+    struct sigaction action;
+    memset(&action, 0, sizeof(action));
+    sigemptyset(&action.sa_mask);
+    action.sa_sigaction = debuggerd_signal_handler;
+    action.sa_flags = SA_RESTART | SA_SIGINFO;
 
-    sigaction(SIGILL, &act, NULL);
-    sigaction(SIGABRT, &act, NULL);
-    sigaction(SIGBUS, &act, NULL);
-    sigaction(SIGFPE, &act, NULL);
-    sigaction(SIGSEGV, &act, NULL);
+    // Use the alternate signal stack if available so we can catch stack overflows.
+    action.sa_flags |= SA_ONSTACK;
+
+    sigaction(SIGABRT, &action, NULL);
+    sigaction(SIGBUS, &action, NULL);
+    sigaction(SIGFPE, &action, NULL);
+    sigaction(SIGILL, &action, NULL);
+    sigaction(SIGPIPE, &action, NULL);
+    sigaction(SIGSEGV, &action, NULL);
 #if defined(SIGSTKFLT)
-    sigaction(SIGSTKFLT, &act, NULL);
+    sigaction(SIGSTKFLT, &action, NULL);
 #endif
-    sigaction(SIGPIPE, &act, NULL);
+    sigaction(SIGTRAP, &action, NULL);
 }
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 7c5165b..386f6dc 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -440,17 +440,14 @@
 #endif
 
 static Elf32_Sym* soinfo_elf_lookup(soinfo* si, unsigned hash, const char* name) {
-    Elf32_Sym* s;
     Elf32_Sym* symtab = si->symtab;
     const char* strtab = si->strtab;
-    unsigned n;
 
     TRACE_TYPE(LOOKUP, "SEARCH %s in %s@0x%08x %08x %d",
                name, si->name, si->base, hash, hash % si->nbucket);
-    n = hash % si->nbucket;
 
-    for (n = si->bucket[hash % si->nbucket]; n != 0; n = si->chain[n]) {
-        s = symtab + n;
+    for (unsigned n = si->bucket[hash % si->nbucket]; n != 0; n = si->chain[n]) {
+        Elf32_Sym* s = symtab + n;
         if (strcmp(strtab + s->st_name, name)) continue;
 
             /* only concern ourselves with global and weak symbol definitions */
@@ -1495,18 +1492,19 @@
         return false;
     }
 
-    /* if this is the main executable, then load all of the preloads now */
+    // If this is the main executable, then load all of the libraries from LD_PRELOAD now.
     if (si->flags & FLAG_EXE) {
         memset(gLdPreloads, 0, sizeof(gLdPreloads));
+        size_t preload_count = 0;
         for (size_t i = 0; gLdPreloadNames[i] != NULL; i++) {
             soinfo* lsi = find_library(gLdPreloadNames[i]);
-            if (lsi == NULL) {
-                strlcpy(tmp_err_buf, linker_get_error_buffer(), sizeof(tmp_err_buf));
-                DL_ERR("could not load library \"%s\" needed by \"%s\"; caused by %s",
-                       gLdPreloadNames[i], si->name, tmp_err_buf);
-                return false;
+            if (lsi != NULL) {
+                gLdPreloads[preload_count++] = lsi;
+            } else {
+                // As with glibc, failure to load an LD_PRELOAD library is just a warning.
+                DL_WARN("could not load library \"%s\" from LD_PRELOAD for \"%s\"; caused by %s",
+                        gLdPreloadNames[i], si->name, linker_get_error_buffer());
             }
-            gLdPreloads[i] = lsi;
         }
     }
 
@@ -1535,6 +1533,8 @@
          * phdr_table_protect_segments() after all of them are applied
          * and all constructors are run.
          */
+        DL_WARN("%s has text relocations. This is wasting memory and is "
+                "a security risk. Please fix.", si->name);
         if (phdr_table_unprotect_segments(si->phdr, si->phnum, si->load_bias) < 0) {
             DL_ERR("can't unprotect loadable segments for \"%s\": %s",
                    si->name, strerror(errno));
@@ -1581,16 +1581,33 @@
         return false;
     }
 
-    // If this is a setuid/setgid program, close the security hole described in
-    // ftp://ftp.freebsd.org/pub/FreeBSD/CERT/advisories/FreeBSD-SA-02:23.stdio.asc
-    if (get_AT_SECURE()) {
-        nullify_closed_stdio();
-    }
     notify_gdb_of_load(si);
     return true;
 }
 
 /*
+ * This function add vdso to internal dso list.
+ * It helps to stack unwinding through signal handlers.
+ * Also, it makes bionic more like glibc.
+ */
+static void add_vdso(KernelArgumentBlock& args UNUSED) {
+#ifdef AT_SYSINFO_EHDR
+    Elf32_Ehdr* ehdr_vdso = reinterpret_cast<Elf32_Ehdr*>(args.getauxval(AT_SYSINFO_EHDR));
+
+    soinfo* si = soinfo_alloc("[vdso]");
+    si->phdr = reinterpret_cast<Elf32_Phdr*>(reinterpret_cast<char*>(ehdr_vdso) + ehdr_vdso->e_phoff);
+    si->phnum = ehdr_vdso->e_phnum;
+    si->link_map.l_name = si->name;
+    for (size_t i = 0; i < si->phnum; ++i) {
+        if (si->phdr[i].p_type == PT_LOAD) {
+            si->link_map.l_addr = reinterpret_cast<Elf32_Addr>(ehdr_vdso) - si->phdr[i].p_vaddr;
+            break;
+        }
+    }
+#endif
+}
+
+/*
  * This code is called after the linker has linked itself and
  * fixed it's own GOT. It is safe to make references to externs
  * and other non-local data at this point.
@@ -1614,6 +1631,12 @@
     // Initialize environment functions, and get to the ELF aux vectors table.
     linker_env_init(args);
 
+    // If this is a setuid/setgid program, close the security hole described in
+    // ftp://ftp.freebsd.org/pub/FreeBSD/CERT/advisories/FreeBSD-SA-02:23.stdio.asc
+    if (get_AT_SECURE()) {
+        nullify_closed_stdio();
+    }
+
     debuggerd_init();
 
     // Get a few environment variables.
@@ -1709,6 +1732,8 @@
         exit(EXIT_FAILURE);
     }
 
+    add_vdso(args);
+
     si->CallPreInitConstructors();
 
     for (size_t i = 0; gLdPreloads[i] != NULL; ++i) {
diff --git a/linker/linker.h b/linker/linker.h
index 61d623a..200a682 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -43,7 +43,16 @@
       __libc_format_buffer(linker_get_error_buffer(), linker_get_error_buffer_size(), fmt, ##x); \
       /* If LD_DEBUG is set high enough, log every dlerror(3) message. */ \
       DEBUG("%s\n", linker_get_error_buffer()); \
-    } while(0)
+    } while (false)
+
+#define DL_WARN(fmt, x...) \
+    do { \
+      __libc_format_log(ANDROID_LOG_WARN, "linker", fmt, ##x); \
+      __libc_format_fd(2, "WARNING: linker: "); \
+      __libc_format_fd(2, fmt, ##x); \
+      __libc_format_fd(2, "\n"); \
+    } while (false)
+
 
 // Returns the address of the page containing address 'x'.
 #define PAGE_START(x)  ((x) & PAGE_MASK)
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index 64dbb70..83f163c 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -226,24 +226,32 @@
   return true;
 }
 
-/* Compute the extent of all loadable segments in an ELF program header
- * table. This corresponds to the page-aligned size in bytes that needs to be
- * reserved in the process' address space
+/* Returns the size of the extent of all the possibly non-contiguous
+ * loadable segments in an ELF program header table. This corresponds
+ * to the page-aligned size in bytes that needs to be reserved in the
+ * process' address space. If there are no loadable segments, 0 is
+ * returned.
  *
- * This returns 0 if there are no loadable segments.
+ * If out_min_vaddr or out_max_vaddr are non-NULL, they will be
+ * set to the minimum and maximum addresses of pages to be reserved,
+ * or 0 if there is nothing to load.
  */
-Elf32_Addr phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
-                                    size_t phdr_count)
+size_t phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
+                                size_t phdr_count,
+                                Elf32_Addr* out_min_vaddr,
+                                Elf32_Addr* out_max_vaddr)
 {
     Elf32_Addr min_vaddr = 0xFFFFFFFFU;
     Elf32_Addr max_vaddr = 0x00000000U;
 
+    bool found_pt_load = false;
     for (size_t i = 0; i < phdr_count; ++i) {
         const Elf32_Phdr* phdr = &phdr_table[i];
 
         if (phdr->p_type != PT_LOAD) {
             continue;
         }
+        found_pt_load = true;
 
         if (phdr->p_vaddr < min_vaddr) {
             min_vaddr = phdr->p_vaddr;
@@ -253,14 +261,19 @@
             max_vaddr = phdr->p_vaddr + phdr->p_memsz;
         }
     }
-
-    if (min_vaddr > max_vaddr) {
-        return 0;
+    if (!found_pt_load) {
+        min_vaddr = 0x00000000U;
     }
 
     min_vaddr = PAGE_START(min_vaddr);
     max_vaddr = PAGE_END(max_vaddr);
 
+    if (out_min_vaddr != NULL) {
+        *out_min_vaddr = min_vaddr;
+    }
+    if (out_max_vaddr != NULL) {
+        *out_max_vaddr = max_vaddr;
+    }
     return max_vaddr - min_vaddr;
 }
 
@@ -268,29 +281,23 @@
 // segments of a program header table. This is done by creating a
 // private anonymous mmap() with PROT_NONE.
 bool ElfReader::ReserveAddressSpace() {
-  load_size_ = phdr_table_get_load_size(phdr_table_, phdr_num_);
+  Elf32_Addr min_vaddr;
+  load_size_ = phdr_table_get_load_size(phdr_table_, phdr_num_, &min_vaddr);
   if (load_size_ == 0) {
     DL_ERR("\"%s\" has no loadable segments", name_);
     return false;
   }
 
+  uint8_t* addr = reinterpret_cast<uint8_t*>(min_vaddr);
   int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
-  void* start = mmap(NULL, load_size_, PROT_NONE, mmap_flags, -1, 0);
+  void* start = mmap(addr, load_size_, PROT_NONE, mmap_flags, -1, 0);
   if (start == MAP_FAILED) {
     DL_ERR("couldn't reserve %d bytes of address space for \"%s\"", load_size_, name_);
     return false;
   }
 
   load_start_ = start;
-  load_bias_ = 0;
-
-  for (size_t i = 0; i < phdr_num_; ++i) {
-    const Elf32_Phdr* phdr = &phdr_table_[i];
-    if (phdr->p_type == PT_LOAD) {
-      load_bias_ = reinterpret_cast<Elf32_Addr>(start) - PAGE_START(phdr->p_vaddr);
-      break;
-    }
-  }
+  load_bias_ = reinterpret_cast<uint8_t*>(start) - addr;
   return true;
 }
 
@@ -320,16 +327,19 @@
     Elf32_Addr file_end   = file_start + phdr->p_filesz;
 
     Elf32_Addr file_page_start = PAGE_START(file_start);
+    Elf32_Addr file_length = file_end - file_page_start;
 
-    void* seg_addr = mmap((void*)seg_page_start,
-                          file_end - file_page_start,
-                          PFLAGS_TO_PROT(phdr->p_flags),
-                          MAP_FIXED|MAP_PRIVATE,
-                          fd_,
-                          file_page_start);
-    if (seg_addr == MAP_FAILED) {
-      DL_ERR("couldn't map \"%s\" segment %d: %s", name_, i, strerror(errno));
-      return false;
+    if (file_length != 0) {
+      void* seg_addr = mmap((void*)seg_page_start,
+                            file_length,
+                            PFLAGS_TO_PROT(phdr->p_flags),
+                            MAP_FIXED|MAP_PRIVATE,
+                            fd_,
+                            file_page_start);
+      if (seg_addr == MAP_FAILED) {
+        DL_ERR("couldn't map \"%s\" segment %d: %s", name_, i, strerror(errno));
+        return false;
+      }
     }
 
     // if the segment is writable, and does not end on a page boundary,
diff --git a/linker/linker_phdr.h b/linker/linker_phdr.h
index a31d1d9..992d95e 100644
--- a/linker/linker_phdr.h
+++ b/linker/linker_phdr.h
@@ -80,7 +80,11 @@
   const Elf32_Phdr* loaded_phdr_;
 };
 
-Elf32_Addr phdr_table_get_load_size(const Elf32_Phdr* phdr, size_t phnum);
+size_t
+phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
+                         size_t phdr_count,
+                         Elf32_Addr* min_vaddr = NULL,
+                         Elf32_Addr* max_vaddr = NULL);
 
 int
 phdr_table_protect_segments(const Elf32_Phdr* phdr_table,
diff --git a/tests/Android.mk b/tests/Android.mk
index dee5e33..177e452 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -31,6 +31,7 @@
 benchmark_src_files = \
     benchmark_main.cpp \
     math_benchmark.cpp \
+    property_benchmark.cpp \
     string_benchmark.cpp \
     time_benchmark.cpp \
 
@@ -62,19 +63,25 @@
     fenv_test.cpp \
     getauxval_test.cpp \
     getcwd_test.cpp \
+    inttypes_test.cpp \
     libc_logging_test.cpp \
     libgen_test.cpp \
+    malloc_test.cpp \
     math_test.cpp \
     netdb_test.cpp \
     pthread_test.cpp \
     regex_test.cpp \
     signal_test.cpp \
     stack_protector_test.cpp \
+    stack_unwinding_test.cpp \
+    statvfs_test.cpp \
     stdio_test.cpp \
     stdlib_test.cpp \
     string_test.cpp \
     strings_test.cpp \
     stubs_test.cpp \
+    sys_stat_test.cpp \
+    system_properties_test.cpp \
     time_test.cpp \
     unistd_test.cpp \
 
@@ -82,6 +89,21 @@
 test_dynamic_src_files = \
     dlfcn_test.cpp \
 
+test_fortify_static_libraries = \
+    fortify1-tests-gcc fortify2-tests-gcc fortify1-tests-clang fortify2-tests-clang
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := bionic-unit-tests-unwind-test-impl
+LOCAL_CFLAGS += $(test_c_flags) -fexceptions -fnon-call-exceptions
+LOCAL_SRC_FILES := stack_unwinding_test_impl.c
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := bionic-unit-tests-unwind-test-impl-host
+LOCAL_CFLAGS += $(test_c_flags) -fexceptions -fnon-call-exceptions
+LOCAL_SRC_FILES := stack_unwinding_test_impl.c
+include $(BUILD_HOST_STATIC_LIBRARY)
+
 # Build tests for the device (with bionic's .so). Run with:
 #   adb shell /data/nativetest/bionic-unit-tests/bionic-unit-tests
 include $(CLEAR_VARS)
@@ -91,6 +113,8 @@
 LOCAL_LDFLAGS += $(test_dynamic_ldflags)
 LOCAL_SHARED_LIBRARIES += libdl
 LOCAL_SRC_FILES := $(test_src_files) $(test_dynamic_src_files)
+LOCAL_WHOLE_STATIC_LIBRARIES := $(test_fortify_static_libraries)
+LOCAL_STATIC_LIBRARIES += bionic-unit-tests-unwind-test-impl
 include $(BUILD_NATIVE_TEST)
 
 # Build tests for the device (with bionic's .a). Run with:
@@ -101,7 +125,8 @@
 LOCAL_CFLAGS += $(test_c_flags)
 LOCAL_FORCE_STATIC_EXECUTABLE := true
 LOCAL_SRC_FILES := $(test_src_files)
-LOCAL_STATIC_LIBRARIES += libstlport_static libstdc++ libm libc
+LOCAL_STATIC_LIBRARIES += libstlport_static libstdc++ libm libc bionic-unit-tests-unwind-test-impl
+LOCAL_WHOLE_STATIC_LIBRARIES := $(test_fortify_static_libraries)
 include $(BUILD_NATIVE_TEST)
 
 # -----------------------------------------------------------------------------
@@ -135,7 +160,59 @@
 LOCAL_LDFLAGS += -lpthread -ldl
 LOCAL_LDFLAGS += $(test_dynamic_ldflags)
 LOCAL_SRC_FILES := $(test_src_files) $(test_dynamic_src_files)
+LOCAL_STATIC_LIBRARIES += bionic-unit-tests-unwind-test-impl-host
 include $(BUILD_HOST_NATIVE_TEST)
 endif
 
+# -----------------------------------------------------------------------------
+# FORTIFY_SOURCE tests
+# -----------------------------------------------------------------------------
+
+fortify_c_includes = \
+    bionic \
+    bionic/libstdc++/include \
+    external/stlport/stlport \
+    external/gtest/include
+fortify_test_files = fortify_test.cpp
+
+# -Wno-error=unused-parameter needed as
+# external/stlport/stlport/stl/_threads.c (included from
+# external/gtest/include/gtest/gtest.h) does not compile cleanly under
+# clang. TODO: fix this.
+fortify_c_flags = $(test_c_flags) -Wno-error=unused-parameter
+
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := $(fortify_test_files)
+LOCAL_MODULE := fortify1-tests-gcc
+LOCAL_CFLAGS += $(fortify_c_flags) -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1 -DTEST_NAME=Fortify1_Gcc
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_C_INCLUDES += $(fortify_c_includes)
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := $(fortify_test_files)
+LOCAL_MODULE := fortify2-tests-gcc
+LOCAL_CFLAGS += $(fortify_c_flags) -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -DTEST_NAME=Fortify2_Gcc
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_C_INCLUDES += $(fortify_c_includes)
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := $(fortify_test_files)
+LOCAL_MODULE := fortify1-tests-clang
+LOCAL_CLANG := true
+LOCAL_CFLAGS += $(fortify_c_flags) -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1 -DTEST_NAME=Fortify1_Clang
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_C_INCLUDES += $(fortify_c_includes)
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := $(fortify_test_files)
+LOCAL_MODULE := fortify2-tests-clang
+LOCAL_CLANG := true
+LOCAL_CFLAGS += $(fortify_c_flags) -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -DTEST_NAME=Fortify2_Clang
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_C_INCLUDES += $(fortify_c_includes)
+include $(BUILD_STATIC_LIBRARY)
+
 endif # !BUILD_TINY_ANDROID
diff --git a/tests/fortify_test.cpp b/tests/fortify_test.cpp
new file mode 100644
index 0000000..d8f0e76
--- /dev/null
+++ b/tests/fortify_test.cpp
@@ -0,0 +1,659 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+// We have to say "DeathTest" here so gtest knows to run this test (which exits)
+// in its own process. Unfortunately, the C preprocessor doesn't give us an
+// easy way to concatenate strings, so we need to use the complicated method
+// below. *sigh*
+#define DEATHTEST_PASTER(name) name##_DeathTest
+#define DEATHTEST_EVALUATOR(name) DEATHTEST_PASTER(name)
+#define DEATHTEST DEATHTEST_EVALUATOR(TEST_NAME)
+
+#if defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE == 2
+struct foo {
+  char empty[0];
+  char one[1];
+  char a[10];
+  char b[10];
+};
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strncpy_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  int copy_amt = atoi("11");
+  ASSERT_EXIT(strncpy(myfoo.a, "01234567890", copy_amt),
+              testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, sprintf_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  char source_buf[15];
+  memcpy(source_buf, "12345678901234", 15);
+  ASSERT_EXIT(sprintf(myfoo.a, "%s", source_buf),
+              testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, sprintf2_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  ASSERT_EXIT(sprintf(myfoo.a, "0123456789"),
+              testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#ifndef __clang__
+// These tests are disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+static int vsprintf_helper2(const char *fmt, ...) {
+  foo myfoo;
+  va_list va;
+  int result;
+
+  va_start(va, fmt);
+  result = vsprintf(myfoo.a, fmt, va); // should crash here
+  va_end(va);
+  return result;
+}
+
+TEST(DEATHTEST, vsprintf_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(vsprintf_helper2("%s", "0123456789"), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, vsprintf2_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(vsprintf_helper2("0123456789"), testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#ifndef __clang__
+// These tests are disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+static int vsnprintf_helper2(const char *fmt, ...) {
+  foo myfoo;
+  va_list va;
+  int result;
+  size_t size = atoi("11");
+
+  va_start(va, fmt);
+  result = vsnprintf(myfoo.a, size, fmt, va); // should crash here
+  va_end(va);
+  return result;
+}
+
+TEST(DEATHTEST, vsnprintf_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(vsnprintf_helper2("%s", "0123456789"), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, vsnprintf2_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(vsnprintf_helper2("0123456789"), testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#if __BIONIC__
+
+#ifndef __clang__
+// zero sized target with "\0" source (should fail)
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strcpy_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  char* src = strdup("");
+  ASSERT_EXIT(strcpy(myfoo.empty, src),
+              testing::KilledBySignal(SIGABRT), "");
+  free(src);
+}
+#endif
+
+#ifndef __clang__
+// zero sized target with longer source (should fail)
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strcpy2_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  char* src = strdup("1");
+  ASSERT_EXIT(strcpy(myfoo.empty, src),
+              testing::KilledBySignal(SIGABRT), "");
+  free(src);
+}
+#endif
+
+#ifndef __clang__
+// one byte target with longer source (should fail)
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strcpy3_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  char* src = strdup("12");
+  ASSERT_EXIT(strcpy(myfoo.one, src),
+              testing::KilledBySignal(SIGABRT), "");
+  free(src);
+}
+#endif
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strchr_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  memcpy(myfoo.a, "0123456789", sizeof(myfoo.a));
+  myfoo.b[0] = '\0';
+  ASSERT_EXIT(printf("%s", strchr(myfoo.a, 'a')),
+              testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strrchr_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  memcpy(myfoo.a, "0123456789", 10);
+  memcpy(myfoo.b, "01234", 6);
+  ASSERT_EXIT(printf("%s", strrchr(myfoo.a, 'a')),
+              testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strlcpy_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  strcpy(myfoo.a, "01");
+  size_t n = strlen(myfoo.a);
+  ASSERT_EXIT(strlcpy(myfoo.one, myfoo.a, n),
+              testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strlcat_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  strcpy(myfoo.a, "01");
+  myfoo.one[0] = '\0';
+  size_t n = strlen(myfoo.a);
+  ASSERT_EXIT(strlcat(myfoo.one, myfoo.a, n),
+              testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#endif /* __BIONIC__ */
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strncat_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  size_t n = atoi("10"); // avoid compiler optimizations
+  strncpy(myfoo.a, "012345678", n);
+  ASSERT_EXIT(strncat(myfoo.a, "9", n), testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strncat2_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  myfoo.a[0] = '\0';
+  size_t n = atoi("10"); // avoid compiler optimizations
+  ASSERT_EXIT(strncat(myfoo.a, "0123456789", n), testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+TEST(DEATHTEST, strncat3_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  memcpy(myfoo.a, "0123456789", sizeof(myfoo.a)); // unterminated string
+  myfoo.b[0] = '\0';
+  size_t n = atoi("10"); // avoid compiler optimizations
+  ASSERT_EXIT(strncat(myfoo.b, myfoo.a, n), testing::KilledBySignal(SIGABRT), "");
+}
+
+#ifndef __clang__
+// This test is disabled in clang because clang doesn't properly detect
+// this buffer overflow. TODO: Fix clang.
+TEST(DEATHTEST, strcat_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char src[11];
+  strcpy(src, "0123456789");
+  foo myfoo;
+  myfoo.a[0] = '\0';
+  ASSERT_EXIT(strcat(myfoo.a, src), testing::KilledBySignal(SIGABRT), "");
+}
+#endif
+
+TEST(DEATHTEST, strcat2_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  memcpy(myfoo.a, "0123456789", sizeof(myfoo.a)); // unterminated string
+  myfoo.b[0] = '\0';
+  ASSERT_EXIT(strcat(myfoo.b, myfoo.a), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, snprintf_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  strcpy(myfoo.a, "012345678");
+  size_t n = strlen(myfoo.a) + 2;
+  ASSERT_EXIT(snprintf(myfoo.b, n, "a%s", myfoo.a), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, bzero_fortified2) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  foo myfoo;
+  memcpy(myfoo.b, "0123456789", sizeof(myfoo.b));
+  size_t n = atoi("11");
+  ASSERT_EXIT(bzero(myfoo.b, n), testing::KilledBySignal(SIGABRT), "");
+}
+
+#endif /* defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE=2 */
+
+#if __BIONIC__
+// multibyte target where we over fill (should fail)
+TEST(DEATHTEST, strcpy_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[10];
+  char *orig = strdup("0123456789");
+  ASSERT_EXIT(strcpy(buf, orig), testing::KilledBySignal(SIGABRT), "");
+  free(orig);
+}
+
+// zero sized target with "\0" source (should fail)
+TEST(DEATHTEST, strcpy2_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[0];
+  char *orig = strdup("");
+  ASSERT_EXIT(strcpy(buf, orig), testing::KilledBySignal(SIGABRT), "");
+  free(orig);
+}
+
+// zero sized target with longer source (should fail)
+TEST(DEATHTEST, strcpy3_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[0];
+  char *orig = strdup("1");
+  ASSERT_EXIT(strcpy(buf, orig), testing::KilledBySignal(SIGABRT), "");
+  free(orig);
+}
+
+// one byte target with longer source (should fail)
+TEST(DEATHTEST, strcpy4_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[1];
+  char *orig = strdup("12");
+  ASSERT_EXIT(strcpy(buf, orig), testing::KilledBySignal(SIGABRT), "");
+  free(orig);
+}
+
+TEST(DEATHTEST, strlen_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[10];
+  memcpy(buf, "0123456789", sizeof(buf));
+  ASSERT_EXIT(printf("%d", strlen(buf)), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, strchr_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[10];
+  memcpy(buf, "0123456789", sizeof(buf));
+  ASSERT_EXIT(printf("%s", strchr(buf, 'a')), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, strrchr_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[10];
+  memcpy(buf, "0123456789", sizeof(buf));
+  ASSERT_EXIT(printf("%s", strrchr(buf, 'a')), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, strlcpy_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char bufa[15];
+  char bufb[10];
+  strcpy(bufa, "01234567890123");
+  size_t n = strlen(bufa);
+  ASSERT_EXIT(strlcpy(bufb, bufa, n), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, strlcat_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char bufa[15];
+  char bufb[10];
+  bufb[0] = '\0';
+  strcpy(bufa, "01234567890123");
+  size_t n = strlen(bufa);
+  ASSERT_EXIT(strlcat(bufb, bufa, n), testing::KilledBySignal(SIGABRT), "");
+}
+
+#endif
+
+TEST(DEATHTEST, sprintf_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[10];
+  char source_buf[15];
+  memcpy(source_buf, "12345678901234", 15);
+  ASSERT_EXIT(sprintf(buf, "%s", source_buf), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, sprintf2_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[5];
+  ASSERT_EXIT(sprintf(buf, "aaaaa"), testing::KilledBySignal(SIGABRT), "");
+}
+
+static int vsprintf_helper(const char *fmt, ...) {
+  char buf[10];
+  va_list va;
+  int result;
+
+  va_start(va, fmt);
+  result = vsprintf(buf, fmt, va); // should crash here
+  va_end(va);
+  return result;
+}
+
+TEST(DEATHTEST, vsprintf_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(vsprintf_helper("%s", "0123456789"), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, vsprintf2_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(vsprintf_helper("0123456789"), testing::KilledBySignal(SIGABRT), "");
+}
+
+static int vsnprintf_helper(const char *fmt, ...) {
+  char buf[10];
+  va_list va;
+  int result;
+  size_t size = atoi("11");
+
+  va_start(va, fmt);
+  result = vsnprintf(buf, size, fmt, va); // should crash here
+  va_end(va);
+  return result;
+}
+
+TEST(DEATHTEST, vsnprintf_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(vsnprintf_helper("%s", "0123456789"), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, vsnprintf2_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(vsnprintf_helper("0123456789"), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, strncat_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[10];
+  size_t n = atoi("10"); // avoid compiler optimizations
+  strncpy(buf, "012345678", n);
+  ASSERT_EXIT(strncat(buf, "9", n), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, strncat2_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[10];
+  buf[0] = '\0';
+  size_t n = atoi("10"); // avoid compiler optimizations
+  ASSERT_EXIT(strncat(buf, "0123456789", n), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, strcat_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char src[11];
+  strcpy(src, "0123456789");
+  char buf[10];
+  buf[0] = '\0';
+  ASSERT_EXIT(strcat(buf, src), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, memmove_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[20];
+  strcpy(buf, "0123456789");
+  size_t n = atoi("10");
+  ASSERT_EXIT(memmove(buf + 11, buf, n), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, memcpy_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char bufa[10];
+  char bufb[10];
+  strcpy(bufa, "012345678");
+  size_t n = atoi("11");
+  ASSERT_EXIT(memcpy(bufb, bufa, n), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, strncpy_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char bufa[15];
+  char bufb[10];
+  strcpy(bufa, "01234567890123");
+  size_t n = strlen(bufa);
+  ASSERT_EXIT(strncpy(bufb, bufa, n), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, snprintf_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char bufa[15];
+  char bufb[10];
+  strcpy(bufa, "0123456789");
+  size_t n = strlen(bufa) + 1;
+  ASSERT_EXIT(snprintf(bufb, n, "%s", bufa), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, bzero_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  char buf[10];
+  memcpy(buf, "0123456789", sizeof(buf));
+  size_t n = atoi("11");
+  ASSERT_EXIT(bzero(buf, n), testing::KilledBySignal(SIGABRT), "");
+}
+
+TEST(DEATHTEST, umask_fortified) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  mode_t mask = atoi("1023");  // 01777 in octal
+  ASSERT_EXIT(umask(mask), testing::KilledBySignal(SIGABRT), "");
+}
+
+extern "C" char* __strncat_chk(char*, const char*, size_t, size_t);
+extern "C" char* __strcat_chk(char*, const char*, size_t);
+
+TEST(TEST_NAME, strncat) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = __strncat_chk(buf, "01234", sizeof(buf) - strlen(buf) - 1, sizeof(buf));
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('\0', buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('A',  buf[9]);
+}
+
+TEST(TEST_NAME, strncat2) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = __strncat_chk(buf, "0123456789", 5, sizeof(buf));
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('\0', buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('A',  buf[9]);
+}
+
+TEST(TEST_NAME, strncat3) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = '\0';
+  char* res = __strncat_chk(buf, "0123456789", 5, sizeof(buf));
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('0',  buf[0]);
+  ASSERT_EQ('1',  buf[1]);
+  ASSERT_EQ('2',  buf[2]);
+  ASSERT_EQ('3',  buf[3]);
+  ASSERT_EQ('4',  buf[4]);
+  ASSERT_EQ('\0', buf[5]);
+  ASSERT_EQ('A',  buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('A',  buf[9]);
+}
+
+TEST(TEST_NAME, strncat4) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[9] = '\0';
+  char* res = __strncat_chk(buf, "", 5, sizeof(buf));
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('A',  buf[0]);
+  ASSERT_EQ('A',  buf[1]);
+  ASSERT_EQ('A',  buf[2]);
+  ASSERT_EQ('A',  buf[3]);
+  ASSERT_EQ('A',  buf[4]);
+  ASSERT_EQ('A',  buf[5]);
+  ASSERT_EQ('A',  buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('\0', buf[9]);
+}
+
+TEST(TEST_NAME, strncat5) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = __strncat_chk(buf, "01234567", 8, sizeof(buf));
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('5', buf[6]);
+  ASSERT_EQ('6',  buf[7]);
+  ASSERT_EQ('7',  buf[8]);
+  ASSERT_EQ('\0',  buf[9]);
+}
+
+TEST(TEST_NAME, strncat6) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = __strncat_chk(buf, "01234567", 9, sizeof(buf));
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('5', buf[6]);
+  ASSERT_EQ('6',  buf[7]);
+  ASSERT_EQ('7',  buf[8]);
+  ASSERT_EQ('\0',  buf[9]);
+}
+
+
+TEST(TEST_NAME, strcat) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = __strcat_chk(buf, "01234", sizeof(buf));
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('\0', buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('A',  buf[9]);
+}
+
+TEST(TEST_NAME, strcat2) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = __strcat_chk(buf, "01234567", sizeof(buf));
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('5', buf[6]);
+  ASSERT_EQ('6',  buf[7]);
+  ASSERT_EQ('7',  buf[8]);
+  ASSERT_EQ('\0',  buf[9]);
+}
diff --git a/tests/inttypes_test.cpp b/tests/inttypes_test.cpp
new file mode 100644
index 0000000..ec4a104
--- /dev/null
+++ b/tests/inttypes_test.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#define __STDC_FORMAT_MACROS // Otherwise not available in C++.
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#if defined(__BIONIC__) // Doesn't work on glibc because we use -m32.
+TEST(inttypes, misc) {
+  char buf[512];
+
+  intptr_t i = 0;
+  uintptr_t u = 0;
+
+  snprintf(buf, sizeof(buf), "%08" PRIdPTR, i);
+  snprintf(buf, sizeof(buf), "%08" PRIiPTR, i);
+  snprintf(buf, sizeof(buf), "%08" PRIoPTR, i);
+  snprintf(buf, sizeof(buf), "%08" PRIuPTR, u);
+  snprintf(buf, sizeof(buf), "%08" PRIxPTR, u);
+  snprintf(buf, sizeof(buf), "%08" PRIXPTR, u);
+
+  sscanf(buf, "%08" SCNdPTR, &i);
+  sscanf(buf, "%08" SCNiPTR, &i);
+  sscanf(buf, "%08" SCNoPTR, &u);
+  sscanf(buf, "%08" SCNuPTR, &u);
+  sscanf(buf, "%08" SCNxPTR, &u);
+}
+#endif
diff --git a/tests/malloc_test.cpp b/tests/malloc_test.cpp
new file mode 100644
index 0000000..259853d
--- /dev/null
+++ b/tests/malloc_test.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <stdlib.h>
+#include <malloc.h>
+
+TEST(malloc, malloc_std) {
+  // Simple malloc test.
+  void *ptr = malloc(100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+
+  free(ptr);
+}
+
+TEST(malloc, calloc_std) {
+  // Simple calloc test.
+  size_t alloc_len = 100;
+  char *ptr = (char *)calloc(1, alloc_len);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(alloc_len, malloc_usable_size(ptr));
+  for (size_t i = 0; i < alloc_len; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, memalign_multiple) {
+  // Memalign test where the alignment is any value.
+  for (size_t i = 0; i <= 12; i++) {
+    for (size_t alignment = 1 << i; alignment < (1U << (i+1)); alignment++) {
+      char *ptr = (char*)memalign(alignment, 100);
+      ASSERT_TRUE(ptr != NULL);
+      ASSERT_LE(100U, malloc_usable_size(ptr));
+      ASSERT_EQ(0, (intptr_t)ptr % (1 << i));
+
+      free(ptr);
+    }
+  }
+}
+
+TEST(malloc, memalign_realloc) {
+  // Memalign and then realloc the pointer a couple of times.
+  for (size_t alignment = 1; alignment <= 4096; alignment <<= 1) {
+    char *ptr = (char*)memalign(alignment, 100);
+    ASSERT_TRUE(ptr != NULL);
+    ASSERT_LE(100U, malloc_usable_size(ptr));
+    ASSERT_EQ(0U, (intptr_t)ptr % alignment);
+    memset(ptr, 0x23, 100);
+
+    ptr = (char*)realloc(ptr, 200);
+    ASSERT_TRUE(ptr != NULL);
+    ASSERT_LE(200U, malloc_usable_size(ptr));
+    ASSERT_TRUE(ptr != NULL);
+    for (size_t i = 0; i < 100; i++) {
+      ASSERT_EQ(0x23, ptr[i]);
+    }
+    memset(ptr, 0x45, 200);
+
+    ptr = (char*)realloc(ptr, 300);
+    ASSERT_TRUE(ptr != NULL);
+    ASSERT_LE(300U, malloc_usable_size(ptr));
+    for (size_t i = 0; i < 200; i++) {
+      ASSERT_EQ(0x45, ptr[i]);
+    }
+    memset(ptr, 0x67, 300);
+
+    ptr = (char*)realloc(ptr, 250);
+    ASSERT_TRUE(ptr != NULL);
+    ASSERT_LE(250U, malloc_usable_size(ptr));
+    for (size_t i = 0; i < 250; i++) {
+      ASSERT_EQ(0x67, ptr[i]);
+    }
+
+    free(ptr);
+  }
+}
+
+TEST(malloc, malloc_realloc_larger) {
+  // Realloc to a larger size, malloc is used for the original allocation.
+  char *ptr = (char *)malloc(100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  memset(ptr, 67, 100);
+
+  ptr = (char *)realloc(ptr, 200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(67, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, malloc_realloc_smaller) {
+  // Realloc to a smaller size, malloc is used for the original allocation.
+  char *ptr = (char *)malloc(200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+  memset(ptr, 67, 200);
+
+  ptr = (char *)realloc(ptr, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(67, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, malloc_multiple_realloc) {
+  // Multiple reallocs, malloc is used for the original allocation.
+  char *ptr = (char *)malloc(200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+  memset(ptr, 0x23, 200);
+
+  ptr = (char *)realloc(ptr, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(0x23, ptr[i]);
+  }
+
+  ptr = (char*)realloc(ptr, 50);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(50U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 50; i++) {
+    ASSERT_EQ(0x23, ptr[i]);
+  }
+
+  ptr = (char*)realloc(ptr, 150);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(150U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 50; i++) {
+    ASSERT_EQ(0x23, ptr[i]);
+  }
+  memset(ptr, 0x23, 150);
+
+  ptr = (char*)realloc(ptr, 425);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(425U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 150; i++) {
+    ASSERT_EQ(0x23, ptr[i]);
+  }
+
+  free(ptr);
+}
+TEST(malloc, calloc_realloc_larger) {
+  // Realloc to a larger size, calloc is used for the original allocation.
+  char *ptr = (char *)calloc(1, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+
+  ptr = (char *)realloc(ptr, 200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, calloc_realloc_smaller) {
+  // Realloc to a smaller size, calloc is used for the original allocation.
+  char *ptr = (char *)calloc(1, 200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+
+  ptr = (char *)realloc(ptr, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, calloc_multiple_realloc) {
+  // Multiple reallocs, calloc is used for the original allocation.
+  char *ptr = (char *)calloc(1, 200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+
+  ptr = (char *)realloc(ptr, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  ptr = (char*)realloc(ptr, 50);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(50U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 50; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  ptr = (char*)realloc(ptr, 150);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(150U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 50; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+  memset(ptr, 0, 150);
+
+  ptr = (char*)realloc(ptr, 425);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(425U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 150; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  free(ptr);
+}
diff --git a/tests/property_benchmark.cpp b/tests/property_benchmark.cpp
new file mode 100644
index 0000000..d10be91
--- /dev/null
+++ b/tests/property_benchmark.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "benchmark.h"
+#include <unistd.h>
+
+#define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
+#include <sys/_system_properties.h>
+
+#include <vector>
+#include <string>
+
+extern void *__system_property_area__;
+
+#define TEST_NUM_PROPS \
+    Arg(1)->Arg(4)->Arg(16)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024)
+
+struct LocalPropertyTestState {
+    LocalPropertyTestState(int nprops) : nprops(nprops), valid(false) {
+        static const char prop_name_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_";
+
+        char dir_template[] = "/data/nativetest/prop-XXXXXX";
+        char *dirname = mkdtemp(dir_template);
+        if (!dirname) {
+            perror("making temp file for test state failed (is /data/nativetest writable?)");
+            return;
+        }
+
+        old_pa = __system_property_area__;
+        __system_property_area__ = NULL;
+
+        pa_dirname = dirname;
+        pa_filename = pa_dirname + "/__properties__";
+
+        __system_property_set_filename(pa_filename.c_str());
+        __system_property_area_init();
+
+        names = new char* [nprops];
+        name_lens = new int[nprops];
+        values = new char* [nprops];
+        value_lens = new int[nprops];
+
+        srandom(nprops);
+
+        for (int i = 0; i < nprops; i++) {
+            name_lens[i] = random() % PROP_NAME_MAX;
+            names[i] = new char[PROP_NAME_MAX + 1];
+            for (int j = 0; j < name_lens[i]; j++) {
+                names[i][j] = prop_name_chars[random() % (sizeof(prop_name_chars) - 1)];
+            }
+            names[i][name_lens[i]] = 0;
+            value_lens[i] = random() % PROP_VALUE_MAX;
+            values[i] = new char[PROP_VALUE_MAX];
+            for (int j = 0; j < value_lens[i]; j++) {
+                values[i][j] = prop_name_chars[random() % (sizeof(prop_name_chars) - 1)];
+            }
+            __system_property_add(names[i], name_lens[i], values[i], value_lens[i]);
+        }
+
+        valid = true;
+    }
+
+    ~LocalPropertyTestState() {
+        if (!valid)
+            return;
+
+        __system_property_area__ = old_pa;
+
+        __system_property_set_filename(PROP_FILENAME);
+        unlink(pa_filename.c_str());
+        rmdir(pa_dirname.c_str());
+
+        for (int i = 0; i < nprops; i++) {
+            delete names[i];
+            delete values[i];
+        }
+        delete[] names;
+        delete[] name_lens;
+        delete[] values;
+        delete[] value_lens;
+    }
+public:
+    const int nprops;
+    char **names;
+    int *name_lens;
+    char **values;
+    int *value_lens;
+    bool valid;
+
+private:
+    std::string pa_dirname;
+    std::string pa_filename;
+    void *old_pa;
+};
+
+static void BM_property_get(int iters, int nprops)
+{
+    StopBenchmarkTiming();
+
+    LocalPropertyTestState pa(nprops);
+    char value[PROP_VALUE_MAX];
+
+    if (!pa.valid)
+        return;
+
+    srandom(iters * nprops);
+
+    StartBenchmarkTiming();
+
+    for (int i = 0; i < iters; i++) {
+        __system_property_get(pa.names[random() % nprops], value);
+    }
+    StopBenchmarkTiming();
+}
+BENCHMARK(BM_property_get)->TEST_NUM_PROPS;
+
+static void BM_property_find(int iters, int nprops)
+{
+    StopBenchmarkTiming();
+
+    LocalPropertyTestState pa(nprops);
+
+    if (!pa.valid)
+        return;
+
+    srandom(iters * nprops);
+
+    StartBenchmarkTiming();
+
+    for (int i = 0; i < iters; i++) {
+        __system_property_find(pa.names[random() % nprops]);
+    }
+    StopBenchmarkTiming();
+}
+BENCHMARK(BM_property_find)->TEST_NUM_PROPS;
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index a86cadc..c7dbdc7 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -17,6 +17,7 @@
 #include <gtest/gtest.h>
 
 #include <errno.h>
+#include <limits.h>
 #include <pthread.h>
 #include <unistd.h>
 
@@ -317,3 +318,117 @@
 
   ASSERT_EQ(ESRCH, pthread_kill(dead_thread, 0));
 }
+
+TEST(pthread, pthread_join__multijoin) {
+  bool done = false;
+
+  pthread_t t1;
+  ASSERT_EQ(0, pthread_create(&t1, NULL, SpinFn, &done));
+
+  pthread_t t2;
+  ASSERT_EQ(0, pthread_create(&t2, NULL, JoinFn, reinterpret_cast<void*>(t1)));
+
+  sleep(1); // (Give t2 a chance to call pthread_join.)
+
+  // Multiple joins to the same thread should fail.
+  ASSERT_EQ(EINVAL, pthread_join(t1, NULL));
+
+  done = true;
+
+  // ...but t2's join on t1 still goes ahead (which we can tell because our join on t2 finishes).
+  void* join_result;
+  ASSERT_EQ(0, pthread_join(t2, &join_result));
+  ASSERT_EQ(0, reinterpret_cast<int>(join_result));
+}
+
+static void* GetActualGuardSizeFn(void* arg) {
+  pthread_attr_t attributes;
+  pthread_getattr_np(pthread_self(), &attributes);
+  pthread_attr_getguardsize(&attributes, reinterpret_cast<size_t*>(arg));
+  return NULL;
+}
+
+static size_t GetActualGuardSize(const pthread_attr_t& attributes) {
+  size_t result;
+  pthread_t t;
+  pthread_create(&t, &attributes, GetActualGuardSizeFn, &result);
+  void* join_result;
+  pthread_join(t, &join_result);
+  return result;
+}
+
+static void* GetActualStackSizeFn(void* arg) {
+  pthread_attr_t attributes;
+  pthread_getattr_np(pthread_self(), &attributes);
+  pthread_attr_getstacksize(&attributes, reinterpret_cast<size_t*>(arg));
+  return NULL;
+}
+
+static size_t GetActualStackSize(const pthread_attr_t& attributes) {
+  size_t result;
+  pthread_t t;
+  pthread_create(&t, &attributes, GetActualStackSizeFn, &result);
+  void* join_result;
+  pthread_join(t, &join_result);
+  return result;
+}
+
+TEST(pthread, pthread_attr_setguardsize) {
+  pthread_attr_t attributes;
+  ASSERT_EQ(0, pthread_attr_init(&attributes));
+
+  // Get the default guard size.
+  size_t default_guard_size;
+  ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &default_guard_size));
+
+  // No such thing as too small: will be rounded up to one page by pthread_create.
+  ASSERT_EQ(0, pthread_attr_setguardsize(&attributes, 128));
+  size_t guard_size;
+  ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &guard_size));
+  ASSERT_EQ(128U, guard_size);
+  ASSERT_EQ(4096U, GetActualGuardSize(attributes));
+
+  // Large enough and a multiple of the page size.
+  ASSERT_EQ(0, pthread_attr_setguardsize(&attributes, 32*1024));
+  ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &guard_size));
+  ASSERT_EQ(32*1024U, guard_size);
+
+  // Large enough but not a multiple of the page size; will be rounded up by pthread_create.
+  ASSERT_EQ(0, pthread_attr_setguardsize(&attributes, 32*1024 + 1));
+  ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &guard_size));
+  ASSERT_EQ(32*1024U + 1, guard_size);
+}
+
+TEST(pthread, pthread_attr_setstacksize) {
+  pthread_attr_t attributes;
+  ASSERT_EQ(0, pthread_attr_init(&attributes));
+
+  // Get the default stack size.
+  size_t default_stack_size;
+  ASSERT_EQ(0, pthread_attr_getstacksize(&attributes, &default_stack_size));
+
+  // Too small.
+  ASSERT_EQ(EINVAL, pthread_attr_setstacksize(&attributes, 128));
+  size_t stack_size;
+  ASSERT_EQ(0, pthread_attr_getstacksize(&attributes, &stack_size));
+  ASSERT_EQ(default_stack_size, stack_size);
+  ASSERT_GE(GetActualStackSize(attributes), default_stack_size);
+
+  // Large enough and a multiple of the page size.
+  ASSERT_EQ(0, pthread_attr_setstacksize(&attributes, 32*1024));
+  ASSERT_EQ(0, pthread_attr_getstacksize(&attributes, &stack_size));
+  ASSERT_EQ(32*1024U, stack_size);
+  ASSERT_EQ(GetActualStackSize(attributes), 32*1024U);
+
+  // Large enough but not a multiple of the page size; will be rounded up by pthread_create.
+  ASSERT_EQ(0, pthread_attr_setstacksize(&attributes, 32*1024 + 1));
+  ASSERT_EQ(0, pthread_attr_getstacksize(&attributes, &stack_size));
+  ASSERT_EQ(32*1024U + 1, stack_size);
+#if __BIONIC__
+  // Bionic rounds up, which is what POSIX allows.
+  ASSERT_EQ(GetActualStackSize(attributes), (32 + 4)*1024U);
+#else
+  // glibc rounds down, in violation of POSIX. They document this in their BUGS section.
+  ASSERT_EQ(GetActualStackSize(attributes), 32*1024U);
+#endif
+}
diff --git a/tests/stack_protector_test.cpp b/tests/stack_protector_test.cpp
index 7946bbc..2ba8a87 100644
--- a/tests/stack_protector_test.cpp
+++ b/tests/stack_protector_test.cpp
@@ -119,7 +119,7 @@
 
 TEST(stack_protector_DeathTest, modify_stack_protector) {
   ::testing::FLAGS_gtest_death_test_style = "threadsafe";
-  ASSERT_EXIT(do_modify_stack_chk_guard(), testing::KilledBySignal(SIGSEGV), "");
+  ASSERT_EXIT(do_modify_stack_chk_guard(), testing::KilledBySignal(SIGABRT), "");
 }
 
 #endif
diff --git a/tests/stack_unwinding_test.cpp b/tests/stack_unwinding_test.cpp
new file mode 100644
index 0000000..3b18daa
--- /dev/null
+++ b/tests/stack_unwinding_test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Contributed by: Intel Corporation
+ */
+
+#include <gtest/gtest.h>
+
+#if defined(i386) // Only our x86 unwinding is good enough. Switch to libunwind?
+
+extern "C" {
+  void do_test();
+}
+
+// We have to say "DeathTest" here so gtest knows to run this test (which exits)
+// in its own process.
+TEST(stack_unwinding_DeathTest, unwinding_through_signal_frame) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  ASSERT_EXIT(do_test(), ::testing::ExitedWithCode(42), "");
+}
+
+#endif
diff --git a/tests/stack_unwinding_test_impl.c b/tests/stack_unwinding_test_impl.c
new file mode 100644
index 0000000..b0099f0
--- /dev/null
+++ b/tests/stack_unwinding_test_impl.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Contributed by: Intel Corporation
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unwind.h>
+
+#define noinline __attribute__((__noinline__))
+#define unused __attribute__((__unused__))
+
+static noinline _Unwind_Reason_Code stop_fn(int a unused,
+    _Unwind_Action action,
+    _Unwind_Exception_Class b unused, struct _Unwind_Exception* c unused,
+    struct _Unwind_Context* d unused, void* e unused) {
+  if ((action & _UA_END_OF_STACK) != 0) {
+    // We reached the end of the stack without executing foo_cleanup. Test failed.
+    abort();
+  }
+  return _URC_NO_REASON;
+}
+
+static void noinline foo_cleanup(char* param unused) {
+  exit(42);
+}
+
+static void noinline do_crash() {
+  char* ptr = NULL;
+  *ptr = 0; // Deliberately cause a SIGSEGV.
+}
+
+static void noinline foo() {
+  char c1 __attribute__((cleanup(foo_cleanup)));
+  do_crash();
+}
+
+// It's SEGSEGV handler. We start forced stack unwinding here.
+// If libgcc don't find dso for signal frame stack unwinding will be finished.
+// libgcc pass to stop_fn _UA_END_OF_STACK flag.
+// Test pass condition: stack unwinding through signal frame and foo1_handler execution.
+static void noinline sigsegv_handler(int param unused) {
+  struct _Unwind_Exception* exception = (struct _Unwind_Exception*) malloc(sizeof(*exception));
+  memset(&exception->exception_class, 0, sizeof(exception->exception_class));
+  exception->exception_cleanup = 0;
+  _Unwind_ForcedUnwind(exception, stop_fn, 0);
+}
+
+void do_test() {
+  signal(SIGSEGV, &sigsegv_handler);
+  foo();
+}
diff --git a/tests/statvfs_test.cpp b/tests/statvfs_test.cpp
new file mode 100644
index 0000000..8afc6fd
--- /dev/null
+++ b/tests/statvfs_test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <sys/statvfs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+TEST(statvfs, statvfs) {
+  struct statvfs sb;
+  memset(&sb, 0, sizeof(sb));
+
+  ASSERT_EQ(0, statvfs("/", &sb));
+#if __BIONIC__
+  ASSERT_EQ(0U, sb.f_bfree);
+  ASSERT_EQ(0U, sb.f_ffree);
+  ASSERT_EQ(0U, sb.f_fsid);
+  ASSERT_TRUE((sb.f_flag & ST_RDONLY) != 0);
+#endif
+
+#if __BIONIC__
+  ASSERT_EQ(0, statvfs("/data/data", &sb));
+  ASSERT_NE(0U, sb.f_bfree);
+  ASSERT_NE(0U, sb.f_ffree);
+  ASSERT_NE(0U, sb.f_fsid);
+  ASSERT_FALSE((sb.f_flag & ST_RDONLY) != 0);
+  ASSERT_TRUE((sb.f_flag & ST_NOSUID) != 0);
+#endif
+}
+
+TEST(statvfs, fstatvfs) {
+  struct statvfs sb;
+  memset(&sb, 0, sizeof(sb));
+
+  int fd = open("/", O_RDONLY);
+  ASSERT_EQ(0, fstatvfs(fd, &sb));
+  close(fd);
+#if __BIONIC__
+  ASSERT_EQ(0U, sb.f_bfree);
+  ASSERT_EQ(0U, sb.f_ffree);
+  ASSERT_EQ(0U, sb.f_fsid);
+  ASSERT_TRUE((sb.f_flag & ST_RDONLY) != 0);
+#endif
+
+#if __BIONIC__
+  fd = open("/data/data", O_RDONLY);
+  ASSERT_EQ(0, fstatvfs(fd, &sb));
+  close(fd);
+  ASSERT_NE(0U, sb.f_bfree);
+  ASSERT_NE(0U, sb.f_ffree);
+  ASSERT_NE(0U, sb.f_fsid);
+  ASSERT_FALSE((sb.f_flag & ST_RDONLY) != 0);
+  ASSERT_TRUE((sb.f_flag & ST_NOSUID) != 0);
+#endif
+}
diff --git a/tests/stdio_test.cpp b/tests/stdio_test.cpp
index 4b5a1f9..2e779d8 100644
--- a/tests/stdio_test.cpp
+++ b/tests/stdio_test.cpp
@@ -193,3 +193,21 @@
 
   ASSERT_EQ(0, pclose(fp));
 }
+
+TEST(stdio, getc) {
+  FILE* fp = fopen("/proc/version", "r");
+  ASSERT_TRUE(fp != NULL);
+  ASSERT_EQ('L', getc(fp));
+  ASSERT_EQ('i', getc(fp));
+  ASSERT_EQ('n', getc(fp));
+  ASSERT_EQ('u', getc(fp));
+  ASSERT_EQ('x', getc(fp));
+  fclose(fp);
+}
+
+TEST(stdio, putc) {
+  FILE* fp = fopen("/proc/version", "r");
+  ASSERT_TRUE(fp != NULL);
+  ASSERT_EQ(EOF, putc('x', fp));
+  fclose(fp);
+}
diff --git a/tests/stdlib_test.cpp b/tests/stdlib_test.cpp
index fed39f8..e5d7812 100644
--- a/tests/stdlib_test.cpp
+++ b/tests/stdlib_test.cpp
@@ -109,3 +109,26 @@
   ASSERT_STREQ(executable_path, p);
   free(p);
 }
+
+TEST(stdlib, qsort) {
+  struct s {
+    char name[16];
+    static int comparator(const void* lhs, const void* rhs) {
+      return strcmp(reinterpret_cast<const s*>(lhs)->name, reinterpret_cast<const s*>(rhs)->name);
+    }
+  };
+  s entries[3];
+  strcpy(entries[0].name, "charlie");
+  strcpy(entries[1].name, "bravo");
+  strcpy(entries[2].name, "alpha");
+
+  qsort(entries, 3, sizeof(s), s::comparator);
+  ASSERT_STREQ("alpha", entries[0].name);
+  ASSERT_STREQ("bravo", entries[1].name);
+  ASSERT_STREQ("charlie", entries[2].name);
+
+  qsort(entries, 3, sizeof(s), s::comparator);
+  ASSERT_STREQ("alpha", entries[0].name);
+  ASSERT_STREQ("bravo", entries[1].name);
+  ASSERT_STREQ("charlie", entries[2].name);
+}
diff --git a/tests/string_test.cpp b/tests/string_test.cpp
index 1720058..ef43f5d 100644
--- a/tests/string_test.cpp
+++ b/tests/string_test.cpp
@@ -131,14 +131,15 @@
 // expected result and then run function and compare what we got.
 // These tests contributed by Intel Corporation.
 // TODO: make these tests more intention-revealing and less random.
+template<class Character>
 struct StringTestState {
   StringTestState(size_t MAX_LEN) : MAX_LEN(MAX_LEN) {
     int max_alignment = 64;
 
     // TODO: fix the tests to not sometimes use twice their specified "MAX_LEN".
-    glob_ptr = reinterpret_cast<char*>(valloc(2 * MAX_LEN + max_alignment));
-    glob_ptr1 = reinterpret_cast<char*>(valloc(2 * MAX_LEN + max_alignment));
-    glob_ptr2 = reinterpret_cast<char*>(valloc(2 * MAX_LEN + max_alignment));
+    glob_ptr = reinterpret_cast<Character*>(valloc(2 * sizeof(Character) * MAX_LEN + max_alignment));
+    glob_ptr1 = reinterpret_cast<Character*>(valloc(2 * sizeof(Character) * MAX_LEN + max_alignment));
+    glob_ptr2 = reinterpret_cast<Character*>(valloc(2 * sizeof(Character) * MAX_LEN + max_alignment));
 
     InitLenArray();
 
@@ -163,12 +164,12 @@
   }
 
   const size_t MAX_LEN;
-  char *ptr, *ptr1, *ptr2;
+  Character *ptr, *ptr1, *ptr2;
   size_t n;
   int len[ITER + 1];
 
  private:
-  char *glob_ptr, *glob_ptr1, *glob_ptr2;
+  Character *glob_ptr, *glob_ptr1, *glob_ptr2;
 
   // Calculate input lengths and fill state.len with them.
   // Test small lengths with more density than big ones. Manually push
@@ -188,7 +189,7 @@
 };
 
 TEST(string, strcat) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 1; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -209,10 +210,178 @@
   }
 }
 
+// one byte target with "\0" source
+TEST(string, strcpy2) {
+  char buf[1];
+  char* orig = strdup("");
+  strcpy(buf, orig);
+  ASSERT_EQ('\0', buf[0]);
+  free(orig);
+}
+
+// multibyte target where we under fill target
+TEST(string, strcpy3) {
+  char buf[10];
+  char* orig = strdup("12345");
+  memset(buf, 'A', sizeof(buf));
+  strcpy(buf, orig);
+  ASSERT_EQ('1',  buf[0]);
+  ASSERT_EQ('2',  buf[1]);
+  ASSERT_EQ('3',  buf[2]);
+  ASSERT_EQ('4',  buf[3]);
+  ASSERT_EQ('5',  buf[4]);
+  ASSERT_EQ('\0', buf[5]);
+  ASSERT_EQ('A',  buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('A',  buf[9]);
+  free(orig);
+}
+
+// multibyte target where we fill target exactly
+TEST(string, strcpy4) {
+  char buf[10];
+  char* orig = strdup("123456789");
+  memset(buf, 'A', sizeof(buf));
+  strcpy(buf, orig);
+  ASSERT_EQ('1',  buf[0]);
+  ASSERT_EQ('2',  buf[1]);
+  ASSERT_EQ('3',  buf[2]);
+  ASSERT_EQ('4',  buf[3]);
+  ASSERT_EQ('5',  buf[4]);
+  ASSERT_EQ('6',  buf[5]);
+  ASSERT_EQ('7',  buf[6]);
+  ASSERT_EQ('8',  buf[7]);
+  ASSERT_EQ('9',  buf[8]);
+  ASSERT_EQ('\0', buf[9]);
+  free(orig);
+}
+
+TEST(string, strcat2) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = strcat(buf, "01234");
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('\0', buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('A',  buf[9]);
+}
+
+TEST(string, strcat3) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = strcat(buf, "01234567");
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('5', buf[6]);
+  ASSERT_EQ('6',  buf[7]);
+  ASSERT_EQ('7',  buf[8]);
+  ASSERT_EQ('\0',  buf[9]);
+}
+
+TEST(string, strncat2) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = strncat(buf, "01234", sizeof(buf) - strlen(buf) - 1);
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('\0', buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('A',  buf[9]);
+}
+
+TEST(string, strncat3) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = strncat(buf, "0123456789", 5);
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('\0', buf[6]);
+  ASSERT_EQ('A',  buf[7]);
+  ASSERT_EQ('A',  buf[8]);
+  ASSERT_EQ('A',  buf[9]);
+}
+
+TEST(string, strncat4) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = strncat(buf, "01234567", 8);
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('5', buf[6]);
+  ASSERT_EQ('6',  buf[7]);
+  ASSERT_EQ('7',  buf[8]);
+  ASSERT_EQ('\0',  buf[9]);
+}
+
+TEST(string, strncat5) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = strncat(buf, "01234567", 9);
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('5', buf[6]);
+  ASSERT_EQ('6',  buf[7]);
+  ASSERT_EQ('7',  buf[8]);
+  ASSERT_EQ('\0',  buf[9]);
+}
+
+TEST(string, strchr_with_0) {
+  char buf[10];
+  const char* s = "01234";
+  memcpy(buf, s, strlen(s) + 1);
+  EXPECT_TRUE(strchr(buf, '\0') == (buf + strlen(s)));
+}
+
 TEST(string, strchr) {
   int seek_char = random() & 255;
 
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 1; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -243,7 +412,7 @@
 }
 
 TEST(string, strcmp) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 1; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -280,7 +449,7 @@
 }
 
 TEST(string, strcpy) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t j = 0; j < POS_ITER; j++) {
     state.NewIteration();
 
@@ -307,41 +476,8 @@
 
 
 #if __BIONIC__
-// We have to say "DeathTest" here so gtest knows to run this test (which exits)
-// in its own process.
-TEST(string_DeathTest, strcpy_fortified) {
-  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
-  char buf[10];
-  char *orig = strdup("0123456789");
-  ASSERT_EXIT(strcpy(buf, orig), testing::KilledBySignal(SIGSEGV), "");
-  free(orig);
-}
-
-TEST(string_DeathTest, strlen_fortified) {
-  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
-  char buf[10];
-  memcpy(buf, "0123456789", sizeof(buf));
-  ASSERT_EXIT(printf("%d", strlen(buf)), testing::KilledBySignal(SIGSEGV), "");
-}
-
-TEST(string_DeathTest, strchr_fortified) {
-  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
-  char buf[10];
-  memcpy(buf, "0123456789", sizeof(buf));
-  ASSERT_EXIT(printf("%s", strchr(buf, 'a')), testing::KilledBySignal(SIGSEGV), "");
-}
-
-TEST(string_DeathTest, strrchr_fortified) {
-  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
-  char buf[10];
-  memcpy(buf, "0123456789", sizeof(buf));
-  ASSERT_EXIT(printf("%s", strrchr(buf, 'a')), testing::KilledBySignal(SIGSEGV), "");
-}
-#endif
-
-#if __BIONIC__
 TEST(string, strlcat) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 0; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -370,7 +506,7 @@
 
 #if __BIONIC__
 TEST(string, strlcpy) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t j = 0; j < POS_ITER; j++) {
     state.NewIteration();
 
@@ -404,7 +540,7 @@
 #endif
 
 TEST(string, strncat) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 1; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -430,7 +566,7 @@
 }
 
 TEST(string, strncmp) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 1; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -467,7 +603,7 @@
 }
 
 TEST(string, strncpy) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t j = 0; j < ITER; j++) {
     state.NewIteration();
 
@@ -495,7 +631,7 @@
 
 TEST(string, strrchr) {
   int seek_char = random() & 255;
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 1; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -527,7 +663,7 @@
 
 TEST(string, memchr) {
   int seek_char = random() & 255;
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 0; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -550,7 +686,7 @@
 
 TEST(string, memrchr) {
   int seek_char = random() & 255;
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 0; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -572,7 +708,7 @@
 }
 
 TEST(string, memcmp) {
-  StringTestState state(SMALL);
+  StringTestState<char> state(SMALL);
   for (size_t i = 0; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -593,8 +729,61 @@
   }
 }
 
+#if defined(__BIONIC__)
+extern "C" int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n);
+
+TEST(string, __memcmp16) {
+  StringTestState<unsigned short> state(SMALL);
+
+  for (size_t i = 0; i < state.n; i++) {
+    for (size_t j = 0; j < POS_ITER; j++) {
+      state.NewIteration();
+
+      unsigned short mask = 0xffff;
+      unsigned short c1 = rand() & mask;
+      unsigned short c2 = rand() & mask;
+
+      std::fill(state.ptr1, state.ptr1 + state.MAX_LEN, c1);
+      std::fill(state.ptr2, state.ptr2 + state.MAX_LEN, c1);
+
+      int pos = (state.len[i] == 0) ? 0 : (random() % state.len[i]);
+      state.ptr2[pos] = c2;
+
+      int expected = (static_cast<unsigned short>(c1) - static_cast<unsigned short>(c2));
+      int actual = __memcmp16(state.ptr1, state.ptr2, (size_t) state.MAX_LEN);
+
+      ASSERT_EQ(expected, actual);
+    }
+  }
+}
+#endif
+
+TEST(string, wmemcmp) {
+  StringTestState<wchar_t> state(SMALL);
+
+  for (size_t i = 0; i < state.n; i++) {
+    for (size_t j = 0; j < POS_ITER; j++) {
+      state.NewIteration();
+
+      long long mask = ((long long) 1 << 8 * sizeof(wchar_t)) - 1;
+      int c1 = rand() & mask;
+      int c2 = rand() & mask;
+      wmemset(state.ptr1, c1, state.MAX_LEN);
+      wmemset(state.ptr2, c1, state.MAX_LEN);
+
+      int pos = (state.len[i] == 0) ? 0 : (random() % state.len[i]);
+      state.ptr2[pos] = c2;
+
+      int expected = (static_cast<int>(c1) - static_cast<int>(c2));
+      int actual = wmemcmp(state.ptr1, state.ptr2, (size_t) state.MAX_LEN);
+
+      ASSERT_EQ(signum(expected), signum(actual));
+    }
+  }
+}
+
 TEST(string, memcpy) {
-  StringTestState state(LARGE);
+  StringTestState<char> state(LARGE);
   int rand = random() & 255;
   for (size_t i = 0; i < state.n - 1; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
@@ -616,7 +805,7 @@
 }
 
 TEST(string, memset) {
-  StringTestState state(LARGE);
+  StringTestState<char> state(LARGE);
   char ch = random () & 255;
   for (size_t i = 0; i < state.n - 1; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
@@ -638,7 +827,7 @@
 }
 
 TEST(string, memmove) {
-  StringTestState state(LARGE);
+  StringTestState<char> state(LARGE);
   for (size_t i = 0; i < state.n - 1; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -659,7 +848,7 @@
 }
 
 TEST(string, bcopy) {
-  StringTestState state(LARGE);
+  StringTestState<char> state(LARGE);
   for (size_t i = 0; i < state.n; i++) {
     for (size_t j = 0; j < POS_ITER; j++) {
       state.NewIteration();
@@ -678,7 +867,7 @@
 }
 
 TEST(string, bzero) {
-  StringTestState state(LARGE);
+  StringTestState<char> state(LARGE);
   for (size_t j = 0; j < ITER; j++) {
     state.NewIteration();
 
diff --git a/tests/sys_stat_test.cpp b/tests/sys_stat_test.cpp
new file mode 100644
index 0000000..a23100a
--- /dev/null
+++ b/tests/sys_stat_test.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+TEST(sys_stat, futimens) {
+  FILE* fp = tmpfile();
+  ASSERT_TRUE(fp != NULL);
+
+  int fd = fileno(fp);
+  ASSERT_NE(fd, -1);
+
+  timespec times[2];
+  times[0].tv_sec = 123;
+  times[0].tv_nsec = 0;
+  times[1].tv_sec = 456;
+  times[1].tv_nsec = 0;
+  ASSERT_EQ(0, futimens(fd, times)) << strerror(errno);
+
+  struct stat sb;
+  ASSERT_EQ(0, fstat(fd, &sb));
+  ASSERT_EQ(times[0].tv_sec, static_cast<long>(sb.st_atime));
+  ASSERT_EQ(times[1].tv_sec, static_cast<long>(sb.st_mtime));
+
+  fclose(fp);
+}
+
+TEST(sys_stat, futimens_EBADF) {
+  timespec times[2];
+  times[0].tv_sec = 123;
+  times[0].tv_nsec = 0;
+  times[1].tv_sec = 456;
+  times[1].tv_nsec = 0;
+  ASSERT_EQ(-1, futimens(-1, times));
+  ASSERT_EQ(EBADF, errno);
+}
diff --git a/tests/system_properties_test.cpp b/tests/system_properties_test.cpp
new file mode 100644
index 0000000..9602607
--- /dev/null
+++ b/tests/system_properties_test.cpp
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <string>
+
+#if __BIONIC__
+
+#define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
+#include <sys/_system_properties.h>
+
+extern void *__system_property_area__;
+
+struct LocalPropertyTestState {
+    LocalPropertyTestState() : valid(false) {
+        char dir_template[] = "/data/nativetest/prop-XXXXXX";
+        char *dirname = mkdtemp(dir_template);
+        if (!dirname) {
+            perror("making temp file for test state failed (is /data/nativetest writable?)");
+            return;
+        }
+
+        old_pa = __system_property_area__;
+        __system_property_area__ = NULL;
+
+        pa_dirname = dirname;
+        pa_filename = pa_dirname + "/__properties__";
+
+        __system_property_set_filename(pa_filename.c_str());
+        __system_property_area_init();
+        valid = true;
+    }
+
+    ~LocalPropertyTestState() {
+        if (!valid)
+            return;
+
+        __system_property_area__ = old_pa;
+
+        __system_property_set_filename(PROP_FILENAME);
+        unlink(pa_filename.c_str());
+        rmdir(pa_dirname.c_str());
+    }
+public:
+    bool valid;
+private:
+    std::string pa_dirname;
+    std::string pa_filename;
+    void *old_pa;
+};
+
+TEST(properties, add) {
+    LocalPropertyTestState pa;
+    ASSERT_TRUE(pa.valid);
+
+    char propvalue[PROP_VALUE_MAX];
+
+    ASSERT_EQ(0, __system_property_add("property", 8, "value1", 6));
+    ASSERT_EQ(0, __system_property_add("other_property", 14, "value2", 6));
+    ASSERT_EQ(0, __system_property_add("property_other", 14, "value3", 6));
+
+    ASSERT_EQ(6, __system_property_get("property", propvalue));
+    ASSERT_STREQ(propvalue, "value1");
+
+    ASSERT_EQ(6, __system_property_get("other_property", propvalue));
+    ASSERT_STREQ(propvalue, "value2");
+
+    ASSERT_EQ(6, __system_property_get("property_other", propvalue));
+    ASSERT_STREQ(propvalue, "value3");
+}
+
+TEST(properties, update) {
+    LocalPropertyTestState pa;
+    ASSERT_TRUE(pa.valid);
+
+    char propvalue[PROP_VALUE_MAX];
+    prop_info *pi;
+
+    ASSERT_EQ(0, __system_property_add("property", 8, "oldvalue1", 9));
+    ASSERT_EQ(0, __system_property_add("other_property", 14, "value2", 6));
+    ASSERT_EQ(0, __system_property_add("property_other", 14, "value3", 6));
+
+    pi = (prop_info *)__system_property_find("property");
+    ASSERT_NE((prop_info *)NULL, pi);
+    __system_property_update(pi, "value4", 6);
+
+    pi = (prop_info *)__system_property_find("other_property");
+    ASSERT_NE((prop_info *)NULL, pi);
+    __system_property_update(pi, "newvalue5", 9);
+
+    pi = (prop_info *)__system_property_find("property_other");
+    ASSERT_NE((prop_info *)NULL, pi);
+    __system_property_update(pi, "value6", 6);
+
+    ASSERT_EQ(6, __system_property_get("property", propvalue));
+    ASSERT_STREQ(propvalue, "value4");
+
+    ASSERT_EQ(9, __system_property_get("other_property", propvalue));
+    ASSERT_STREQ(propvalue, "newvalue5");
+
+    ASSERT_EQ(6, __system_property_get("property_other", propvalue));
+    ASSERT_STREQ(propvalue, "value6");
+}
+
+TEST(properties, fill) {
+    LocalPropertyTestState pa;
+    ASSERT_TRUE(pa.valid);
+    char prop_name[PROP_NAME_MAX];
+    char prop_value[PROP_VALUE_MAX];
+    char prop_value_ret[PROP_VALUE_MAX];
+    int count = 0;
+    int ret;
+
+    while (true) {
+        ret = snprintf(prop_name, PROP_NAME_MAX - 1, "property_%d", count);
+        memset(prop_name + ret, 'a', PROP_NAME_MAX - 1 - ret);
+        ret = snprintf(prop_value, PROP_VALUE_MAX - 1, "value_%d", count);
+        memset(prop_value + ret, 'b', PROP_VALUE_MAX - 1 - ret);
+        prop_name[PROP_NAME_MAX - 1] = 0;
+        prop_value[PROP_VALUE_MAX - 1] = 0;
+
+        ret = __system_property_add(prop_name, PROP_NAME_MAX - 1, prop_value, PROP_VALUE_MAX - 1);
+        if (ret < 0)
+            break;
+
+        count++;
+    }
+
+    // For historical reasons at least 247 properties must be supported
+    ASSERT_GE(count, 247);
+
+    for (int i = 0; i < count; i++) {
+        ret = snprintf(prop_name, PROP_NAME_MAX - 1, "property_%d", i);
+        memset(prop_name + ret, 'a', PROP_NAME_MAX - 1 - ret);
+        ret = snprintf(prop_value, PROP_VALUE_MAX - 1, "value_%d", i);
+        memset(prop_value + ret, 'b', PROP_VALUE_MAX - 1 - ret);
+        prop_name[PROP_NAME_MAX - 1] = 0;
+        prop_value[PROP_VALUE_MAX - 1] = 0;
+        memset(prop_value_ret, '\0', PROP_VALUE_MAX);
+
+        ASSERT_EQ(PROP_VALUE_MAX - 1, __system_property_get(prop_name, prop_value_ret));
+        ASSERT_EQ(0, memcmp(prop_value, prop_value_ret, PROP_VALUE_MAX));
+    }
+}
+
+static void foreach_test_callback(const prop_info *pi, void* cookie) {
+    size_t *count = static_cast<size_t *>(cookie);
+
+    ASSERT_NE((prop_info *)NULL, pi);
+    (*count)++;
+}
+
+TEST(properties, foreach) {
+    LocalPropertyTestState pa;
+    ASSERT_TRUE(pa.valid);
+    size_t count = 0;
+
+    ASSERT_EQ(0, __system_property_add("property", 8, "value1", 6));
+    ASSERT_EQ(0, __system_property_add("other_property", 14, "value2", 6));
+    ASSERT_EQ(0, __system_property_add("property_other", 14, "value3", 6));
+
+    ASSERT_EQ(0, __system_property_foreach(foreach_test_callback, &count));
+    ASSERT_EQ(3U, count);
+}
+
+TEST(properties, find_nth) {
+    LocalPropertyTestState pa;
+    ASSERT_TRUE(pa.valid);
+
+    ASSERT_EQ(0, __system_property_add("property", 8, "value1", 6));
+    ASSERT_EQ(0, __system_property_add("other_property", 14, "value2", 6));
+    ASSERT_EQ(0, __system_property_add("property_other", 14, "value3", 6));
+
+    ASSERT_NE((const prop_info *)NULL, __system_property_find_nth(0));
+    ASSERT_NE((const prop_info *)NULL, __system_property_find_nth(1));
+    ASSERT_NE((const prop_info *)NULL, __system_property_find_nth(2));
+
+    ASSERT_EQ((const prop_info *)NULL, __system_property_find_nth(3));
+    ASSERT_EQ((const prop_info *)NULL, __system_property_find_nth(4));
+    ASSERT_EQ((const prop_info *)NULL, __system_property_find_nth(5));
+    ASSERT_EQ((const prop_info *)NULL, __system_property_find_nth(100));
+    ASSERT_EQ((const prop_info *)NULL, __system_property_find_nth(200));
+    ASSERT_EQ((const prop_info *)NULL, __system_property_find_nth(247));
+}
+
+TEST(properties, errors) {
+    LocalPropertyTestState pa;
+    ASSERT_TRUE(pa.valid);
+    char prop_value[PROP_NAME_MAX];
+
+    ASSERT_EQ(0, __system_property_add("property", 8, "value1", 6));
+    ASSERT_EQ(0, __system_property_add("other_property", 14, "value2", 6));
+    ASSERT_EQ(0, __system_property_add("property_other", 14, "value3", 6));
+
+    ASSERT_EQ(0, __system_property_find("property1"));
+    ASSERT_EQ(0, __system_property_get("property1", prop_value));
+
+    ASSERT_EQ(-1, __system_property_add("name", PROP_NAME_MAX, "value", 5));
+    ASSERT_EQ(-1, __system_property_add("name", 4, "value", PROP_VALUE_MAX));
+    ASSERT_EQ(-1, __system_property_update(NULL, "value", PROP_VALUE_MAX));
+}
+
+TEST(properties, serial) {
+    LocalPropertyTestState pa;
+    ASSERT_TRUE(pa.valid);
+    const prop_info *pi;
+    unsigned int serial;
+
+    ASSERT_EQ(0, __system_property_add("property", 8, "value1", 6));
+    ASSERT_NE((const prop_info *)NULL, pi = __system_property_find("property"));
+    serial = __system_property_serial(pi);
+    ASSERT_EQ(0, __system_property_update((prop_info *)pi, "value2", 6));
+    ASSERT_NE(serial, __system_property_serial(pi));
+}
+
+static void *PropertyWaitHelperFn(void *arg)
+{
+    int *flag = (int *)arg;
+    prop_info *pi;
+    pi = (prop_info *)__system_property_find("property");
+    usleep(100000);
+
+    *flag = 1;
+    __system_property_update(pi, "value3", 6);
+
+    return NULL;
+}
+
+TEST(properties, wait) {
+    LocalPropertyTestState pa;
+    ASSERT_TRUE(pa.valid);
+    unsigned int serial;
+    prop_info *pi;
+    pthread_t t;
+    int flag = 0;
+
+    ASSERT_EQ(0, __system_property_add("property", 8, "value1", 6));
+    serial = __system_property_wait_any(0);
+    pi = (prop_info *)__system_property_find("property");
+    ASSERT_NE((prop_info *)NULL, pi);
+    __system_property_update(pi, "value2", 6);
+    serial = __system_property_wait_any(serial);
+
+    ASSERT_EQ(0, pthread_create(&t, NULL, PropertyWaitHelperFn, &flag));
+    ASSERT_EQ(flag, 0);
+    serial = __system_property_wait_any(serial);
+    ASSERT_EQ(flag, 1);
+
+    void* result;
+    ASSERT_EQ(0, pthread_join(t, &result));
+}
+
+class KilledByFault {
+    public:
+        explicit KilledByFault() {};
+        bool operator()(int exit_status) const;
+};
+
+bool KilledByFault::operator()(int exit_status) const {
+    return WIFSIGNALED(exit_status) &&
+        (WTERMSIG(exit_status) == SIGSEGV ||
+         WTERMSIG(exit_status) == SIGBUS ||
+         WTERMSIG(exit_status) == SIGABRT);
+}
+
+TEST(properties_DeathTest, read_only) {
+      ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+      ASSERT_EXIT(__system_property_add("property", 8, "value", 5),
+                  KilledByFault(), "");
+}
+#endif
diff --git a/tests/time_test.cpp b/tests/time_test.cpp
index 9a5a706..0ad4763 100644
--- a/tests/time_test.cpp
+++ b/tests/time_test.cpp
@@ -42,3 +42,15 @@
   ASSERT_EQ(2678400, mktime_tz(&epoch, "PST"));
 }
 #endif
+
+TEST(time, gmtime) {
+  time_t t = 0;
+  tm* broken_down = gmtime(&t);
+  ASSERT_TRUE(broken_down != NULL);
+  ASSERT_EQ(0, broken_down->tm_sec);
+  ASSERT_EQ(0, broken_down->tm_min);
+  ASSERT_EQ(0, broken_down->tm_hour);
+  ASSERT_EQ(1, broken_down->tm_mday);
+  ASSERT_EQ(0, broken_down->tm_mon);
+  ASSERT_EQ(1970, broken_down->tm_year + 1900);
+}