Merge "Explain MTE mode upgrade use of BIONIC_SIGNAL_ART_PROFILER" into main
diff --git a/METADATA b/METADATA
deleted file mode 100644
index d97975c..0000000
--- a/METADATA
+++ /dev/null
@@ -1,3 +0,0 @@
-third_party {
-  license_type: NOTICE
-}
diff --git a/OWNERS b/OWNERS
index 3818b1d..1859b9e 100644
--- a/OWNERS
+++ b/OWNERS
@@ -5,3 +5,5 @@
 danalbert@google.com
 rprichard@google.com
 yabinc@google.com
+
+per-file docs/mte.md=eugenis@google.com,fmayer@google.com,pcc@google.com
diff --git a/android-changes-for-ndk-developers.md b/android-changes-for-ndk-developers.md
index e9cfbac..a96e105 100644
--- a/android-changes-for-ndk-developers.md
+++ b/android-changes-for-ndk-developers.md
@@ -47,10 +47,12 @@
 dynamic linker's caching code cached failures too, so it was necessary
 to topologically sort your libraries and load them in reverse order.
 
-If you need to support Android devices running OS versions older than
+This issue is no longer relevant to most developers,
+but if you need to support Android devices running OS versions older than
 API level 23, you might want to consider
-[ReLinker](https://github.com/KeepSafe/ReLinker) which claims to solve
-these and other problems automatically.
+[ReLinker](https://github.com/KeepSafe/ReLinker) or
+[SoLoader](https://github.com/facebook/SoLoader),
+which claim to solve these problems automatically.
 
 Alternatively, if you don't have too many dependencies, it can be easiest to
 simply link all of your code into one big library and sidestep the details of
@@ -76,6 +78,17 @@
 the local group. This allows ASAN, for example, to ensure that it can
 intercept any symbol.
 
+This issue is no longer relevant to most developers,
+but if you need to support Android devices running OS versions older than
+API level 23, you might want to consider
+[ReLinker](https://github.com/KeepSafe/ReLinker) or
+[SoLoader](https://github.com/facebook/SoLoader),
+which claim to solve these problems automatically.
+
+Alternatively, if you don't have too many dependencies, it can be easiest to
+simply link all of your code into one big library and sidestep the details of
+library and symbol lookup changes on all past (and future) Android versions.
+
 
 ## LD_PRELOAD and 32/64 bit
 
diff --git a/docs/mte.md b/docs/mte.md
new file mode 100644
index 0000000..3034cc7
--- /dev/null
+++ b/docs/mte.md
@@ -0,0 +1,244 @@
+# Arm Memory Tagging Extension (MTE) implementation
+
+AOSP supports Arm MTE to detect invalid memory accesses. The implementation is
+spread across multiple components, both within and out of the AOSP tree. This
+document gives an overview and pointers about how the various MTE features are
+implemented.
+
+For documentation of the behavior rather than the implementation, see the
+[SAC page on MTE] instead. For MTE for apps, see the [NDK page on MTE].
+
+The relevant components are:
+
+* [LLVM Project] (out of AOSP tree)
+    * Stack tagging instrumentation pass
+    * Scudo memory allocator
+* bionic
+    * libc
+    * dynamic loader
+* Zygote
+* debuggerd
+* [NDK]
+
+## MTE enablement
+
+The way MTE is requested and enabled differs between native binaries and Java
+apps. This is necessarily so, because Java apps get forked from the Zygote,
+while native executables get inintialized by the linker.
+
+### Native binaries
+
+Both AOSP and the NDK allow you to compile C/C++ code that use MTE to detect
+memory safety issues. The [NDK legacy cmake toolchain] and the
+[NDK new cmake toolchain] both support "memtag" as an argument for
+`ANDROID_SANITIZE`. NDK make has no specific support for MTE, but the
+relevant flags can be passed directly as `CFLAGS` and `LDFLAGS`.
+
+For the OS itself, [Soong] supports "memtag_[heap|stack|globals]" as
+`SANITIZE_TARGET  and as `sanitize:` attribute in Android.bp files;
+[Android make] supports the same environment variables as Soong. This passes
+the appropriate flags to the clang driver for both compile and link steps.
+
+#### Linker
+
+* For **dynamic executables** LLD has support to
+  [add appropriate dynamic sections] as defined in the [ELF standard]
+* For **static executables** and as a fallback for older devices, LLD
+  also supports [adding the Android-specific ELF note]
+
+Both of the above are controlled by the linker flag `--android-memtag-mode`
+which is [passed in by the clang driver] if
+`-fsanitize=memtag-[stack|heap|globals]` is [passed in].
+`-fsanitize=memtag` [enables all three] (even for API levels that don't
+implement the runtime for globals, which means builds from old versions
+of clang may no work with newer platform versions that support globals).
+`-fsanitize-memtag-mode` allows to choose between ASYNC and SYNC.
+
+This information can be queried using `llvm-readelf --memtag`.
+
+This information is [picked up by libc init] to decide whether to enable MTE.
+`-fsanitize-heap` controls both whether scudo tags allocations, and whether
+tag checking is enabled.
+
+#### Runtime environment (dynamic loader, libc)
+
+There are two different initialization sequences for libc, both of which end up
+calling `__libc_init_mte`.
+
+N.B. the linker has its own copy of libc, which is used when executing these
+functions. That is why we have to use `__libc_shared_globals` to communicate
+with the libc of the process we are starting.
+
+* **static executables** `__libc_init` is called from `crtbegin.c`, which calls
+                         `__libc_init_mte`
+* **dynamic executables** the linker calls `__libc_init_mte`
+
+`__libc_init_mte` figures out the appropriate MTE level that is requested by
+the process, calls `prctl` to request this from the kernel, and stores data in
+`__libc_shared_globals` which gets picked up later to enable MTE in scudo.
+
+It also does work related to stack tagging and permissive mode, which will be
+detailed later.
+
+### Apps
+
+Apps can request MTE be enabled for their process via the manifest attribute
+`android:memtagMode`. This gets interpreted by Zygote, which always runs with
+`ASYNC` MTE enabled, because MTE for a process can only be disabled after
+it has been initialized (see [Native binaries](#native-binaries)), not enabled.
+
+[decideTaggingLevel] in the Zygote figures out whether to enable MTE for
+an app, and stores it in the `runtimeFlags`, which get picked up by
+[SpecializeCommon] after forking from the Zygote.
+
+## MTE implementation
+
+### Heap Tagging
+
+Heap tagging is implemented in the scudo allocator. On `malloc` and `free`,
+scudo will update the memory's tags to prevent use-after-free and buffer
+overflows.
+
+[scudo's memtag.h] contains helper functions to deal with MTE tag management,
+which are used in [combined.h] and [secondary.h].
+
+
+### Stack Tagging
+
+Stack tagging requires instrumenting function bodies. It is implemented as
+an instrumentation pass in LLVM called [AArch64StackTagging], which sets
+the tags according to the lifetime of stack objects.
+
+The instrumentation pass also supports recording stack history, consisting of:
+
+* PC
+* Frame pointer
+* Base tag
+
+This can be used to reconstruct which stack object was referred to in an
+invalid access. The logic to reconstruct this can be found in the
+[stack script].
+
+
+Stack tagging is enabled in one of two circumstances:
+* at process startup, if the main binary or any of its dependencies are
+  compiled with `memtag-stack`
+* library compiled with `memtag-stack` is `dlopen`ed later, either directly or
+  as a dependency of a `dlopen`ed library. In this case, the
+  [__pthread_internal_remap_stack_with_mte] function is used (called from
+  `memtag_stack_dlopen_callback`). Because `dlopen`
+  is handled by the linker, we have to [store a function pointer] to the
+  process's version of the function in `__libc_shared_globals`.
+
+Enabling stack MTE consists of two operations:
+* Remapping the stacks as `PROT_MTE`
+* Allocating a stack history buffer.
+
+The first operation is only necessary when the process is running with MTE
+enabled. The second operation is also necessary when the process is not running
+with MTE enabled, because the writes to the stack history buffer are
+unconditional.
+
+libc keeps track of this through two globals:
+
+* `__libc_memtag_stack`:  whether stack MTE is enabled on the process, i.e.
+  whether the stack pages are mapped with PROT\_MTE. This is always false if
+  MTE is disabled for the process (i.e. `libc_globals.memtag` is false).
+* `__libc_memtag_stack_abi`: whether the process contains any code that was
+  compiled with memtag-stack. This is true even if the process does not have
+  MTE enabled.
+
+### Globals Tagging
+
+TODO(fmayer): write once submitted
+
+### Crash reporting
+
+For MTE crashes, debuggerd serializes special information into the Tombstone
+proto:
+
+* Tags around fault address
+* Scudo allocation history
+
+This is done in [tombstone\_proto.cpp]. The information is converted to a text
+proto in [tombstone\_proto\_to\_text.cpp].
+
+## Bootloader control
+
+The bootloader API allows userspace to enable MTE on devices that do not ship
+with MTE enabled by default.
+
+See [SAC MTE bootloader support] for the API definition. In AOSP, this API is
+implemented in [system/extras/mtectrl]. mtectrl.rc handles the property
+changes and invokes mtectrl to update the misc partition to communicate
+with the bootloader.
+
+There is also an [API in Device Policy Manager] that allows the device admin
+to enable or disable MTE under certain circumstances.
+
+The device can opt in or out of these APIs by a set of system properties:
+
+* `ro.arm64.memtag.bootctl_supported`: the system property API is supported,
+  and an option is displayed in Developer Options.
+* `ro.arm64.memtag.bootctl_settings_toggle`: an option is displayed in the
+  normal settings. This requires `ro.arm64.memtag.bootctl_supported` to be
+  true. This implies `ro.arm64.memtag.bootctl_device_policy_manager`, if it
+  is not explicitely set.
+* `ro.arm64.memtag.bootctl_device_policy_manager`: the Device Policy Manager
+  API is supported.
+
+## Permissive MTE
+
+Permissive MTE refers to a mode which, instead of crashing the process on an
+MTE fault, records a tombstone but then continues execution of the process.
+An important caveat is that system calls with invalid pointers (where the
+pointer tag does not match the memory tag) still return an error code.
+
+This mode is only available for system services, not apps. It is implemented
+in the [debugger\_signal\_handler] by disabling MTE for the faulting thread.
+Optionally, the user can ask for MTE to be re-enabled after some time.
+This is achieved by arming a timer that calls [enable_mte_signal_handler]
+upon expiry.
+
+## MTE Mode Upgrade
+
+When a system service [crashes in ASYNC mode], we set an impossible signal
+as an exit code (because that signal is always gracefully handled by libc),
+and [in init] we set `BIONIC_MEMTAG_UPGRADE_SECS`, which gets handled by
+[libc startup].
+
+[SpecializeCommon]: https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/jni/com_android_internal_os_Zygote.cpp?q=f:frameworks%2Fbase%2Fcore%2Fjni%2Fcom_android_internal_os_Zygote.cpp%20%22%20mallopt(M_BIONIC_SET_HEAP_TAGGING_LEVEL,%22&ss=android%2Fplatform%2Fsuperproject%2Fmain
+[LLVM Project]: https://github.com/llvm/llvm-project/
+[NDK]: https://android.googlesource.com/platform/ndk/
+[NDK legacy cmake toolchain]: https://android.googlesource.com/platform/ndk/+/refs/heads/main/build/cmake/android-legacy.toolchain.cmake#490
+[NDK new cmake toolchain]: https://android.googlesource.com/platform/ndk/+/refs/heads/main/build/cmake/flags.cmake#56
+[Soong]: https://cs.android.com/android/platform/superproject/main/+/main:build/soong/cc/sanitize.go?q=sanitize.go&ss=android%2Fplatform%2Fsuperproject%2Fmain
+[decideTaggingLevel]: https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/com/android/internal/os/Zygote.java?q=symbol:decideTaggingLevel
+[picked up by libc init]: https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/bionic/libc_init_mte.cpp?q=symbol:__get_tagging_level%20f:bionic
+[enables all three]: https://github.com/llvm/llvm-project/blob/e732d1ce86783b1d7fe30645fcb30434109505b9/clang/include/clang/Basic/Sanitizers.def#L62
+[passed in]: https://github.com/llvm/llvm-project/blob/ff2e619dfcd77328812a42d2ba2b11c3ff96f410/clang/lib/Driver/SanitizerArgs.cpp#L719
+[passed in by the clang driver]: https://github.com/llvm/llvm-project/blob/ff2e619dfcd77328812a42d2ba2b11c3ff96f410/clang/lib/Driver/ToolChains/CommonArgs.cpp#L1595
+[adding the Android-specific ELF note]: https://github.com/llvm/llvm-project/blob/435cb0dc5eca08cdd8d9ed0d887fa1693cc2bf33/lld/ELF/Driver.cpp#L1258
+[ELF standard]: https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#6dynamic-section
+[add appropriate dynamic sections]: https://github.com/llvm/llvm-project/blob/7022498ac2f236e411e8a0f9a48669e754000a4b/lld/ELF/SyntheticSections.cpp#L1473
+[storeTags]: https://cs.android.com/android/platform/superproject/main/+/main:external/scudo/standalone/memtag.h?q=f:scudo%20f:memtag.h%20function:storeTags
+[SAC page on MTE]: https://source.android.com/docs/security/test/memory-safety/arm-mte
+[NDK page on MTE]: https://developer.android.com/ndk/guides/arm-mte
+[AArch64StackTagging]: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+[scudo's memtag.h]: https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/scudo/standalone/memtag.h
+[combined.h]: https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/scudo/standalone/combined.h
+[secondary.h]: https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/scudo/standalone/secondary.h
+[__pthread_internal_remap_stack_with_mte]: https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/bionic/pthread_internal.cpp?q=__pthread_internal_remap_stack_with_mte
+[stack script]: https://cs.android.com/android/platform/superproject/main/+/main:development/scripts/stack?q=stack
+[Android make]: https://cs.android.com/android/platform/superproject/main/+/main:build/make/core/config_sanitizers.mk
+[store a function pointer]: https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/bionic/libc_init_dynamic.cpp;l=168?q=memtag_stack_dlopen_callback
+[tombstone\_proto.cpp]: https://cs.android.com/android/platform/superproject/main/+/main:system/core/debuggerd/libdebuggerd/tombstone_proto.cpp?q=tombstone_proto.cpp
+[tombstone\_proto\_to\_text.cpp]: https://cs.android.com/android/platform/superproject/main/+/main:system/core/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+[SAC MTE bootloader support]: https://source.android.com/docs/security/test/memory-safety/bootloader-support
+[system/extras/mtectrl]: https://cs.android.com/android/platform/superproject/main/+/main:system/extras/mtectrl/
+[API in Device Policy Manager]: https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/android/app/admin/DevicePolicyManager.java?q=symbol:setMtePolicy%20f:DevicePolicyManager.java
+[debuggerd\_signal_handler]: https://cs.android.com/android/platform/superproject/main/+/main:system/core/debuggerd/handler/debuggerd_handler.cpp?q=f:debuggerd_handler.cpp%20symbol:debuggerd_signal_handler
+[enable_mte_signal_handler]: https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/bionic/libc_init_mte.cpp?q=symbol:__enable_mte_signal_handler
+[in init]: https://cs.android.com/android/platform/superproject/main/+/main:system/core/init/service.cpp?q=f:system%2Fcore%2Finit%2Fservice.cpp%20should_upgrade_mte
+[crashes in ASYNC mode]: https://cs.android.com/android/platform/superproject/main/+/main:system/core/debuggerd/handler/debuggerd_handler.cpp;l=799?q=BIONIC_SIGNAL_ART_PROFILER
+[libc startup]: https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/bionic/libc_init_mte.cpp?q=BIONIC_MEMTAG_UPGRADE_SECS
diff --git a/docs/status.md b/docs/status.md
index 94784de..e7111d9 100644
--- a/docs/status.md
+++ b/docs/status.md
@@ -62,6 +62,7 @@
 New libc functions in API level 36:
   * `qsort_r`, `sig2str`/`str2sig` (POSIX Issue 8 additions).
   * GNU/BSD extension `lchmod`.
+  * GNU extensions `pthread_getaffinity_np`/`pthread_setaffinity_np`.
   * New system call wrapper: `mseal` (`<sys/mman.h>`).
 
 New libc functions in V (API level 35):
diff --git a/libc/Android.bp b/libc/Android.bp
index d9b3658..13d95b5 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -264,6 +264,7 @@
     name: "libc_init_static",
     defaults: ["libc_defaults"],
     srcs: [
+        "bionic/libc_init_mte.cpp",
         "bionic/libc_init_static.cpp",
         ":elf_note_sources",
     ],
@@ -470,7 +471,6 @@
         "upstream-netbsd/lib/libc/regex/regerror.c",
         "upstream-netbsd/lib/libc/regex/regexec.c",
         "upstream-netbsd/lib/libc/regex/regfree.c",
-        "upstream-netbsd/lib/libc/stdlib/bsearch.c",
         "upstream-netbsd/lib/libc/stdlib/drand48.c",
         "upstream-netbsd/lib/libc/stdlib/erand48.c",
         "upstream-netbsd/lib/libc/stdlib/jrand48.c",
@@ -622,11 +622,7 @@
     arch: {
         arm: {
             srcs: [
-                "upstream-openbsd/lib/libc/string/memchr.c",
-                "upstream-openbsd/lib/libc/string/memrchr.c",
                 "upstream-openbsd/lib/libc/string/stpncpy.c",
-                "upstream-openbsd/lib/libc/string/strlcat.c",
-                "upstream-openbsd/lib/libc/string/strlcpy.c",
                 "upstream-openbsd/lib/libc/string/strncat.c",
                 "upstream-openbsd/lib/libc/string/strncmp.c",
                 "upstream-openbsd/lib/libc/string/strncpy.c",
@@ -636,18 +632,13 @@
             srcs: [
                 "upstream-openbsd/lib/libc/string/strcat.c",
                 "upstream-openbsd/lib/libc/string/stpncpy.c",
-                "upstream-openbsd/lib/libc/string/strlcat.c",
-                "upstream-openbsd/lib/libc/string/strlcpy.c",
                 "upstream-openbsd/lib/libc/string/strncat.c",
                 "upstream-openbsd/lib/libc/string/strncpy.c",
             ],
         },
         riscv64: {
             srcs: [
-                "upstream-openbsd/lib/libc/string/memrchr.c",
                 "upstream-openbsd/lib/libc/string/stpncpy.c",
-                "upstream-openbsd/lib/libc/string/strlcat.c",
-                "upstream-openbsd/lib/libc/string/strlcpy.c",
             ],
         },
         x86: {
@@ -657,10 +648,7 @@
         },
         x86_64: {
             srcs: [
-                "upstream-openbsd/lib/libc/string/memchr.c",
-                "upstream-openbsd/lib/libc/string/memrchr.c",
-                "upstream-openbsd/lib/libc/string/strlcat.c",
-                "upstream-openbsd/lib/libc/string/strlcpy.c",
+                // x86_64 has custom/llvm-libc implementations of all of these.
             ],
         },
     },
@@ -940,6 +928,7 @@
         "bionic/pthread_detach.cpp",
         "bionic/pthread_equal.cpp",
         "bionic/pthread_exit.cpp",
+        "bionic/pthread_getaffinity.cpp",
         "bionic/pthread_getcpuclockid.cpp",
         "bionic/pthread_getschedparam.cpp",
         "bionic/pthread_gettid_np.cpp",
@@ -953,6 +942,7 @@
         "bionic/pthread_sigqueue.cpp",
         "bionic/pthread_self.cpp",
         "bionic/pthread_setname_np.cpp",
+        "bionic/pthread_setaffinity.cpp",
         "bionic/pthread_setschedparam.cpp",
         "bionic/pthread_spinlock.cpp",
         "bionic/ptrace.cpp",
@@ -1092,11 +1082,6 @@
                 "arch-arm/krait/bionic/memset.S",
 
                 "arch-arm/kryo/bionic/memcpy.S",
-
-                "bionic/strchr.cpp",
-                "bionic/strchrnul.cpp",
-                "bionic/strnlen.cpp",
-                "bionic/strrchr.cpp",
             ],
         },
         arm64: {
@@ -1134,25 +1119,6 @@
                 "arch-riscv64/string/strncmp_v.S",
                 "arch-riscv64/string/strncpy_v.S",
                 "arch-riscv64/string/strnlen_v.S",
-
-                "arch-riscv64/string/memchr.c",
-                "arch-riscv64/string/memcmp.c",
-                "arch-riscv64/string/memcpy.c",
-                "arch-riscv64/string/memmove.c",
-                "arch-riscv64/string/memset.c",
-                "arch-riscv64/string/stpcpy.c",
-                "arch-riscv64/string/strcat.c",
-                "arch-riscv64/string/strchr.c",
-                "arch-riscv64/string/strcmp.c",
-                "arch-riscv64/string/strcpy.c",
-                "arch-riscv64/string/strlen.c",
-                "arch-riscv64/string/strncat.c",
-                "arch-riscv64/string/strncmp.c",
-                "arch-riscv64/string/strncpy.c",
-                "arch-riscv64/string/strnlen.c",
-
-                "bionic/strchrnul.cpp",
-                "bionic/strrchr.cpp",
             ],
         },
 
@@ -1166,30 +1132,21 @@
                 "arch-x86/bionic/vfork.S",
                 "arch-x86/bionic/__x86.get_pc_thunk.S",
 
-                "arch-x86/string/sse2-memchr-atom.S",
                 "arch-x86/string/sse2-memmove-slm.S",
-                "arch-x86/string/sse2-memrchr-atom.S",
                 "arch-x86/string/sse2-memset-slm.S",
                 "arch-x86/string/sse2-stpcpy-slm.S",
                 "arch-x86/string/sse2-stpncpy-slm.S",
-                "arch-x86/string/sse2-strchr-atom.S",
                 "arch-x86/string/sse2-strcpy-slm.S",
                 "arch-x86/string/sse2-strlen-slm.S",
                 "arch-x86/string/sse2-strncpy-slm.S",
-                "arch-x86/string/sse2-strnlen-atom.S",
-                "arch-x86/string/sse2-strrchr-atom.S",
 
                 "arch-x86/string/ssse3-memcmp-atom.S",
                 "arch-x86/string/ssse3-strcat-atom.S",
                 "arch-x86/string/ssse3-strcmp-atom.S",
-                "arch-x86/string/ssse3-strlcat-atom.S",
-                "arch-x86/string/ssse3-strlcpy-atom.S",
                 "arch-x86/string/ssse3-strncat-atom.S",
                 "arch-x86/string/ssse3-strncmp-atom.S",
 
                 "arch-x86/string/sse4-memcmp-slm.S",
-
-                "bionic/strchrnul.cpp",
             ],
         },
         x86_64: {
@@ -1214,11 +1171,6 @@
                 "arch-x86_64/string/sse4-memcmp-slm.S",
                 "arch-x86_64/string/ssse3-strcmp-slm.S",
                 "arch-x86_64/string/ssse3-strncmp-slm.S",
-
-                "bionic/strchr.cpp",
-                "bionic/strchrnul.cpp",
-                "bionic/strnlen.cpp",
-                "bionic/strrchr.cpp",
             ],
         },
     },
@@ -1237,6 +1189,7 @@
     generated_headers: ["generated_android_ids"],
 
     whole_static_libs: [
+        "//external/llvm-libc:llvmlibc",
         "libsystemproperties",
     ],
 
@@ -1393,9 +1346,6 @@
         arm64: {
             srcs: ["arch-arm64/dynamic_function_dispatch.cpp"],
         },
-        riscv64: {
-            srcs: ["arch-riscv64/dynamic_function_dispatch.cpp"],
-        },
     },
     // Prevent the compiler from inserting calls to libc/taking the address of
     // a jump table from within an ifunc (or, in the static case, code that
@@ -1489,6 +1439,7 @@
     srcs: [
         "arch-common/bionic/crtbegin_so.c",
         "arch-common/bionic/crtbrand.S",
+        "bionic/android_mallopt.cpp",
         "bionic/gwp_asan_wrappers.cpp",
         "bionic/heap_tagging.cpp",
         "bionic/icu.cpp",
@@ -1507,6 +1458,7 @@
 filegroup {
     name: "libc_sources_static",
     srcs: [
+        "bionic/android_mallopt.cpp",
         "bionic/gwp_asan_wrappers.cpp",
         "bionic/heap_tagging.cpp",
         "bionic/icu_static.cpp",
@@ -1673,8 +1625,14 @@
     },
     native_bridge_supported: false,
     // It is never correct to depend on this directly. This is only
-    // needed for the runtime apex, and in base_system.mk.
-    visibility: ["//bionic/apex"],
+    // needed for the runtime apex, and in base_system.mk, and system_image_defaults
+    // which is default module for soong-defined system image.
+    visibility: [
+        "//bionic/apex",
+        "//build/make/target/product/generic",
+        //TODO(b/381985636) : Remove visibility to Soong-defined GSI once resolved
+        "//build/make/target/product/gsi",
+    ],
 }
 
 genrule {
@@ -2162,6 +2120,7 @@
         },
     },
     whole_static_libs: [
+        "//external/llvm-libc:llvmlibc",
         "libarm-optimized-routines-mem",
         "libc_netbsd",
     ],
diff --git a/libc/NOTICE b/libc/NOTICE
index 1fa036a..88d022f 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -3585,22 +3585,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (c) 2007 Todd C. Miller <millert@openbsd.org>
-
-Permission to use, copy, modify, and distribute this software for any
-purpose with or without fee is hereby granted, provided that the above
-copyright notice and this permission notice appear in all copies.
-
-THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
--------------------------------------------------------------------
-
 Copyright (c) 2007-2008  Michael G Schwern
 
 This software originally derived from Paul Sheer's pivotal_gmtime_r.c.
@@ -4730,40 +4714,6 @@
 
 SPDX-License-Identifier: BSD-3-Clause
 
-Copyright (c) 1990, 1993
-   The Regents of the University of California.  All rights reserved.
-
-This code is derived from software contributed to Berkeley by
-Mike Hibler and Chris Torek.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-3. Neither the name of the University nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
--------------------------------------------------------------------
-
-SPDX-License-Identifier: BSD-3-Clause
-
 Copyright (c) 1992, 1993
    The Regents of the University of California.  All rights reserved.
 
diff --git a/libc/arch-riscv64/dynamic_function_dispatch.cpp b/libc/arch-riscv64/dynamic_function_dispatch.cpp
deleted file mode 100644
index ce6c028..0000000
--- a/libc/arch-riscv64/dynamic_function_dispatch.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2023 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <fcntl.h>
-#include <private/bionic_ifuncs.h>
-#include <stddef.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-
-extern "C" {
-
-static inline __always_inline int ifunc_faccessat(int dir_fd, const char* path, int mode) {
-  register long a0 __asm__("a0") = dir_fd;
-  register long a1 __asm__("a1") = reinterpret_cast<long>(path);
-  register long a2 __asm__("a2") = mode;
-  register long a7 __asm__("a7") = __NR_faccessat;
-  __asm__("ecall" : "=r"(a0) : "r"(a0), "r"(a1), "r"(a2), "r"(a7) : "memory");
-  return a0;
-}
-
-static bool have_fast_v() {
-  static bool result = []() {
-    // We don't want to do a full "bogomips" test, so just check for the
-    // presence of a file that would indicate that we're running in qemu.
-    return ifunc_faccessat(AT_FDCWD, "/dev/hvc0", F_OK) != 0;
-  }();
-  return result;
-}
-
-DEFINE_IFUNC_FOR(memchr) {
-  if (have_fast_v()) RETURN_FUNC(memchr_func_t, memchr_v);
-  RETURN_FUNC(memchr_func_t, memchr_gc);
-}
-MEMCHR_SHIM()
-
-DEFINE_IFUNC_FOR(memcmp) {
-  if (have_fast_v()) RETURN_FUNC(memcmp_func_t, memcmp_v);
-  RETURN_FUNC(memcmp_func_t, memcmp_gc);
-}
-MEMCMP_SHIM()
-
-DEFINE_IFUNC_FOR(memcpy) {
-  if (have_fast_v()) RETURN_FUNC(memcpy_func_t, memcpy_v);
-  RETURN_FUNC(memcpy_func_t, memcpy_gc);
-}
-MEMCPY_SHIM()
-
-DEFINE_IFUNC_FOR(memmove) {
-  if (have_fast_v()) RETURN_FUNC(memmove_func_t, memmove_v);
-  RETURN_FUNC(memmove_func_t, memmove_gc);
-}
-MEMMOVE_SHIM()
-
-DEFINE_IFUNC_FOR(memset) {
-  if (have_fast_v()) RETURN_FUNC(memset_func_t, memset_v);
-  RETURN_FUNC(memset_func_t, memset_gc);
-}
-MEMSET_SHIM()
-
-DEFINE_IFUNC_FOR(stpcpy) {
-  if (have_fast_v()) RETURN_FUNC(stpcpy_func_t, stpcpy_v);
-  RETURN_FUNC(stpcpy_func_t, stpcpy_gc);
-}
-STPCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strcat) {
-  if (have_fast_v()) RETURN_FUNC(strcat_func_t, strcat_v);
-  RETURN_FUNC(strcat_func_t, strcat_gc);
-}
-STRCAT_SHIM()
-
-DEFINE_IFUNC_FOR(strchr) {
-  if (have_fast_v()) RETURN_FUNC(strchr_func_t, strchr_v);
-  RETURN_FUNC(strchr_func_t, strchr_gc);
-}
-STRCHR_SHIM()
-
-DEFINE_IFUNC_FOR(strcmp) {
-  if (have_fast_v()) RETURN_FUNC(strcmp_func_t, strcmp_v);
-  RETURN_FUNC(strcmp_func_t, strcmp_gc);
-}
-STRCMP_SHIM()
-
-DEFINE_IFUNC_FOR(strcpy) {
-  if (have_fast_v()) RETURN_FUNC(strcpy_func_t, strcpy_v);
-  RETURN_FUNC(strcpy_func_t, strcpy_gc);
-}
-STRCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strlen) {
-  if (have_fast_v()) RETURN_FUNC(strlen_func_t, strlen_v);
-  RETURN_FUNC(strlen_func_t, strlen_gc);
-}
-STRLEN_SHIM()
-
-DEFINE_IFUNC_FOR(strncat) {
-  if (have_fast_v()) RETURN_FUNC(strncat_func_t, strncat_v);
-  RETURN_FUNC(strncat_func_t, strncat_gc);
-}
-STRNCAT_SHIM()
-
-DEFINE_IFUNC_FOR(strncmp) {
-  if (have_fast_v()) RETURN_FUNC(strncmp_func_t, strncmp_v);
-  RETURN_FUNC(strncmp_func_t, strncmp_gc);
-}
-STRNCMP_SHIM()
-
-DEFINE_IFUNC_FOR(strncpy) {
-  if (have_fast_v()) RETURN_FUNC(strncpy_func_t, strncpy_v);
-  RETURN_FUNC(strncpy_func_t, strncpy_gc);
-}
-STRNCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strnlen) {
-  if (have_fast_v()) RETURN_FUNC(strnlen_func_t, strnlen_v);
-  RETURN_FUNC(strnlen_func_t, strnlen_gc);
-}
-STRNLEN_SHIM()
-
-}  // extern "C"
diff --git a/libc/arch-riscv64/string/memchr.c b/libc/arch-riscv64/string/memchr.c
deleted file mode 100644
index 34eb6d7..0000000
--- a/libc/arch-riscv64/string/memchr.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2023 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <upstream-openbsd/android/include/openbsd-compat.h>
-
-#define memchr memchr_gc
-#include <upstream-openbsd/lib/libc/string/memchr.c>
diff --git a/libc/arch-riscv64/string/memchr_v.S b/libc/arch-riscv64/string/memchr_v.S
index d4999c3..8833436 100644
--- a/libc/arch-riscv64/string/memchr_v.S
+++ b/libc/arch-riscv64/string/memchr_v.S
@@ -68,7 +68,7 @@
 #define vData v0
 #define vMask v8
 
-ENTRY(memchr_v)
+ENTRY(memchr)
 
 L(loop):
     vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
@@ -93,4 +93,4 @@
     add iResult, pSrc, iTemp
     ret
 
-END(memchr_v)
+END(memchr)
diff --git a/libc/arch-riscv64/string/memcmp.c b/libc/arch-riscv64/string/memcmp.c
deleted file mode 100644
index 2d7335a..0000000
--- a/libc/arch-riscv64/string/memcmp.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <string.h>
-
-/*
- * Compare memory regions.
- */
-int
-memcmp_gc(const void *s1, const void *s2, size_t n)
-{
-	if (n != 0) {
-		const unsigned char *p1 = s1, *p2 = s2;
-
-		do {
-			if (*p1++ != *p2++)
-				return (*--p1 - *--p2);
-		} while (--n != 0);
-	}
-	return (0);
-}
diff --git a/libc/arch-riscv64/string/memcmp_v.S b/libc/arch-riscv64/string/memcmp_v.S
index 55e08db..9c1ecdc 100644
--- a/libc/arch-riscv64/string/memcmp_v.S
+++ b/libc/arch-riscv64/string/memcmp_v.S
@@ -71,7 +71,7 @@
 #define vData2 v8
 #define vMask v16
 
-ENTRY(memcmp_v)
+ENTRY(memcmp)
 
 L(loop):
     vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
@@ -103,4 +103,4 @@
     sub iResult, iTemp1, iTemp2
     ret
 
-END(memcmp_v)
+END(memcmp)
diff --git a/libc/arch-riscv64/string/memcpy.c b/libc/arch-riscv64/string/memcpy.c
deleted file mode 100644
index ee11504..0000000
--- a/libc/arch-riscv64/string/memcpy.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define	MEMCOPY
-#include "bcopy.c"
diff --git a/libc/arch-riscv64/string/memcpy_v.S b/libc/arch-riscv64/string/memcpy_v.S
index 93ec60f..def1d9b 100644
--- a/libc/arch-riscv64/string/memcpy_v.S
+++ b/libc/arch-riscv64/string/memcpy_v.S
@@ -65,7 +65,7 @@
 #define ELEM_LMUL_SETTING m8
 #define vData v0
 
-ENTRY(memcpy_v)
+ENTRY(memcpy)
 
     mv pDstPtr, pDst
 
@@ -82,4 +82,4 @@
 
     ret
 
-END(memcpy_v)
+END(memcpy)
diff --git a/libc/arch-riscv64/string/memmove.c b/libc/arch-riscv64/string/memmove.c
deleted file mode 100644
index e9bb2c2..0000000
--- a/libc/arch-riscv64/string/memmove.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define	MEMMOVE
-#include "bcopy.c"
diff --git a/libc/arch-riscv64/string/memmove_v.S b/libc/arch-riscv64/string/memmove_v.S
index cad2b05..fa70f76 100644
--- a/libc/arch-riscv64/string/memmove_v.S
+++ b/libc/arch-riscv64/string/memmove_v.S
@@ -67,7 +67,7 @@
 #define ELEM_LMUL_SETTING m8
 #define vData v0
 
-ENTRY(memmove_v)
+ENTRY(memmove)
 
     mv pDstPtr, pDst
 
@@ -99,4 +99,4 @@
     bnez iNum, L(backward_copy_loop)
     ret
 
-END(memmove_v)
+END(memmove)
diff --git a/libc/arch-riscv64/string/memset.c b/libc/arch-riscv64/string/memset.c
deleted file mode 100644
index d51cbf9..0000000
--- a/libc/arch-riscv64/string/memset.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Hibler and Chris Torek.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/types.h>
-
-#include <limits.h>
-
-#define	wsize	sizeof(u_long)
-#define	wmask	(wsize - 1)
-
-#include <string.h>
-
-#define	RETURN	return (dst0)
-#define	VAL	c0
-#define	WIDEVAL	c
-
-void *
-memset_gc(void *dst0, int c0, size_t length)
-{
-	size_t t;
-	u_long c;
-	u_char *dst;
-
-	dst = dst0;
-	/*
-	 * If not enough words, just fill bytes.  A length >= 2 words
-	 * guarantees that at least one of them is `complete' after
-	 * any necessary alignment.  For instance:
-	 *
-	 *	|-----------|-----------|-----------|
-	 *	|00|01|02|03|04|05|06|07|08|09|0A|00|
-	 *	          ^---------------------^
-	 *		 dst		 dst+length-1
-	 *
-	 * but we use a minimum of 3 here since the overhead of the code
-	 * to do word writes is substantial.
-	 *
-	 * TODO: This threshold might not be sensible for 64-bit u_long.
-	 * We should benchmark and revisit this decision.
-	 */
-	if (length < 3 * wsize) {
-		while (length != 0) {
-			*dst++ = VAL;
-			--length;
-		}
-		RETURN;
-	}
-
-	if ((c = (u_char)c0) != 0) {	/* Fill the word. */
-		c = (c << 8) | c;	/* u_long is 16 bits. */
-		c = (c << 16) | c;	/* u_long is 32 bits. */
-		c = (c << 32) | c;	/* u_long is 64 bits. */
-	}
-	/* Align destination by filling in bytes. */
-	if ((t = (long)dst & wmask) != 0) {
-		t = wsize - t;
-		length -= t;
-		do {
-			*dst++ = VAL;
-		} while (--t != 0);
-	}
-
-	/* Fill words.  Length was >= 2*words so we know t >= 1 here. */
-	t = length / wsize;
-	do {
-		*(u_long *)(void *)dst = WIDEVAL;
-		dst += wsize;
-	} while (--t != 0);
-
-	/* Mop up trailing bytes, if any. */
-	t = length & wmask;
-	if (t != 0)
-		do {
-			*dst++ = VAL;
-		} while (--t != 0);
-	RETURN;
-}
diff --git a/libc/arch-riscv64/string/memset_v.S b/libc/arch-riscv64/string/memset_v.S
index 06a2c6a..5aa525e 100644
--- a/libc/arch-riscv64/string/memset_v.S
+++ b/libc/arch-riscv64/string/memset_v.S
@@ -66,7 +66,7 @@
 #define ELEM_LMUL_SETTING m8
 #define vData v0
 
-ENTRY(memset_v)
+ENTRY(memset)
 
     mv pDstPtr, pDst
 
@@ -82,4 +82,4 @@
 
     ret
 
-END(memset_v)
+END(memset)
diff --git a/libc/arch-riscv64/string/stpcpy.c b/libc/arch-riscv64/string/stpcpy.c
deleted file mode 100644
index 2afcf99..0000000
--- a/libc/arch-riscv64/string/stpcpy.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2023 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <upstream-openbsd/android/include/openbsd-compat.h>
-
-#define stpcpy stpcpy_gc
-#include <upstream-openbsd/lib/libc/string/stpcpy.c>
diff --git a/libc/arch-riscv64/string/stpcpy_v.S b/libc/arch-riscv64/string/stpcpy_v.S
index 6a853ec..c5d0945 100644
--- a/libc/arch-riscv64/string/stpcpy_v.S
+++ b/libc/arch-riscv64/string/stpcpy_v.S
@@ -68,7 +68,7 @@
 #define vStr1 v8
 #define vStr2 v16
 
-ENTRY(stpcpy_v)
+ENTRY(stpcpy)
 L(stpcpy_loop):
     vsetvli iVL, zero, e8, ELEM_LMUL_SETTING, ta, ma
     vle8ff.v vStr1, (pSrc)
@@ -85,4 +85,4 @@
     sub pDstPtr, pDstPtr, iCurrentVL
     add pDstPtr, pDstPtr, iActiveElemPos
     ret
-END(stpcpy_v)
+END(stpcpy)
diff --git a/libc/arch-riscv64/string/strcat.c b/libc/arch-riscv64/string/strcat.c
deleted file mode 100644
index 5fb1621..0000000
--- a/libc/arch-riscv64/string/strcat.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2023 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <upstream-openbsd/android/include/openbsd-compat.h>
-
-#define strcat strcat_gc
-#include <upstream-openbsd/lib/libc/string/strcat.c>
diff --git a/libc/arch-riscv64/string/strcat_v.S b/libc/arch-riscv64/string/strcat_v.S
index 3d348e7..5abf295 100644
--- a/libc/arch-riscv64/string/strcat_v.S
+++ b/libc/arch-riscv64/string/strcat_v.S
@@ -69,7 +69,7 @@
 #define vStr1 v8
 #define vStr2 v16
 
-ENTRY(strcat_v)
+ENTRY(strcat)
 
     mv pDstPtr, pDst
 
@@ -104,4 +104,4 @@
 
     ret
 
-END(strcat_v)
+END(strcat)
diff --git a/libc/arch-riscv64/string/strchr.c b/libc/arch-riscv64/string/strchr.c
deleted file mode 100644
index dc07766..0000000
--- a/libc/arch-riscv64/string/strchr.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*	$OpenBSD: strchr.c,v 1.4 2018/10/01 06:37:37 martijn Exp $ */
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <string.h>
-
-char *
-strchr_gc(const char *p, int ch)
-{
-	for (;; ++p) {
-		if (*p == (char) ch)
-			return((char *)p);
-		if (!*p)
-			return((char *)NULL);
-	}
-	/* NOTREACHED */
-}
diff --git a/libc/arch-riscv64/string/strchr_v.S b/libc/arch-riscv64/string/strchr_v.S
index bc7b58a..ea13c5d 100644
--- a/libc/arch-riscv64/string/strchr_v.S
+++ b/libc/arch-riscv64/string/strchr_v.S
@@ -69,7 +69,7 @@
 #define vMaskEnd v8
 #define vMaskCh v9
 
-ENTRY(strchr_v)
+ENTRY(strchr)
 
 L(strchr_loop):
     vsetvli iVL, zero, e8, ELEM_LMUL_SETTING, ta, ma
@@ -91,4 +91,4 @@
     add pStr, pStr, iChOffset
     ret
 
-END(strchr_v)
+END(strchr)
diff --git a/libc/arch-riscv64/string/strcmp.c b/libc/arch-riscv64/string/strcmp.c
deleted file mode 100644
index 7a1fefe..0000000
--- a/libc/arch-riscv64/string/strcmp.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*	$OpenBSD: strcmp.c,v 1.9 2015/08/31 02:53:57 guenther Exp $	*/
-
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <string.h>
-
-/*
- * Compare strings.
- */
-int
-strcmp_gc(const char *s1, const char *s2)
-{
-	while (*s1 == *s2++)
-		if (*s1++ == 0)
-			return (0);
-	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
-}
diff --git a/libc/arch-riscv64/string/strcmp_v.S b/libc/arch-riscv64/string/strcmp_v.S
index 01e72b1..3332c83 100644
--- a/libc/arch-riscv64/string/strcmp_v.S
+++ b/libc/arch-riscv64/string/strcmp_v.S
@@ -74,7 +74,7 @@
 #define vMask1 v16
 #define vMask2 v17
 
-ENTRY(strcmp_v)
+ENTRY(strcmp)
 
     # increase the lmul using the following sequences:
     # 1/2, 1/2, 1, 2, 4, 4, 4, ...
@@ -166,4 +166,4 @@
     sub iResult, iTemp1, iTemp2
     ret
 
-END(strcmp_v)
+END(strcmp)
diff --git a/libc/arch-riscv64/string/strcpy.c b/libc/arch-riscv64/string/strcpy.c
deleted file mode 100644
index a624541..0000000
--- a/libc/arch-riscv64/string/strcpy.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*	$OpenBSD: strcpy.c,v 1.10 2017/11/28 06:55:49 tb Exp $	*/
-
-/*
- * Copyright (c) 1988 Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <string.h>
-
-char *
-strcpy_gc(char *to, const char *from)
-{
-	char *save = to;
-
-	for (; (*to = *from) != '\0'; ++from, ++to);
-	return(save);
-}
diff --git a/libc/arch-riscv64/string/strcpy_v.S b/libc/arch-riscv64/string/strcpy_v.S
index 084b3a5..b89b1a8 100644
--- a/libc/arch-riscv64/string/strcpy_v.S
+++ b/libc/arch-riscv64/string/strcpy_v.S
@@ -69,7 +69,7 @@
 #define vStr1 v8
 #define vStr2 v16
 
-ENTRY(strcpy_v)
+ENTRY(strcpy)
 
     mv pDstPtr, pDst
 
@@ -88,4 +88,4 @@
 
     ret
 
-END(strcpy_v)
+END(strcpy)
diff --git a/libc/arch-riscv64/string/strlen.c b/libc/arch-riscv64/string/strlen.c
deleted file mode 100644
index ac8d27f..0000000
--- a/libc/arch-riscv64/string/strlen.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*	$OpenBSD: strlen.c,v 1.9 2015/08/31 02:53:57 guenther Exp $	*/
-
-/*-
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <string.h>
-
-size_t
-strlen_gc(const char *str)
-{
-	const char *s;
-
-	for (s = str; *s; ++s)
-		;
-	return (s - str);
-}
diff --git a/libc/arch-riscv64/string/strlen_v.S b/libc/arch-riscv64/string/strlen_v.S
index c284021..7f7d2dd 100644
--- a/libc/arch-riscv64/string/strlen_v.S
+++ b/libc/arch-riscv64/string/strlen_v.S
@@ -66,7 +66,7 @@
 #define vStr v0
 #define vMaskEnd v2
 
-ENTRY(strlen_v)
+ENTRY(strlen)
 
     mv pCopyStr, pStr
 L(loop):
@@ -84,4 +84,4 @@
 
     ret
 
-END(strlen_v)
+END(strlen)
diff --git a/libc/arch-riscv64/string/strncat.c b/libc/arch-riscv64/string/strncat.c
deleted file mode 100644
index 8c26b95..0000000
--- a/libc/arch-riscv64/string/strncat.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2023 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <upstream-openbsd/android/include/openbsd-compat.h>
-
-#define strncat strncat_gc
-#include <upstream-openbsd/lib/libc/string/strncat.c>
diff --git a/libc/arch-riscv64/string/strncat_v.S b/libc/arch-riscv64/string/strncat_v.S
index adc768d..01cb14f 100644
--- a/libc/arch-riscv64/string/strncat_v.S
+++ b/libc/arch-riscv64/string/strncat_v.S
@@ -70,7 +70,7 @@
 #define vStr1 v8
 #define vStr2 v16
 
-ENTRY(strncat_v)
+ENTRY(strncat)
 
     mv pDstPtr, pDst
 
@@ -114,4 +114,4 @@
 L(fill_zero_end):
     ret
 
-END(strncat_v)
+END(strncat)
diff --git a/libc/arch-riscv64/string/strncmp.c b/libc/arch-riscv64/string/strncmp.c
deleted file mode 100644
index ebc5357..0000000
--- a/libc/arch-riscv64/string/strncmp.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2023 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <upstream-openbsd/android/include/openbsd-compat.h>
-
-#define strncmp strncmp_gc
-#include <upstream-openbsd/lib/libc/string/strncmp.c>
diff --git a/libc/arch-riscv64/string/strncmp_v.S b/libc/arch-riscv64/string/strncmp_v.S
index 1ce4817..b9e6ee2 100644
--- a/libc/arch-riscv64/string/strncmp_v.S
+++ b/libc/arch-riscv64/string/strncmp_v.S
@@ -71,7 +71,7 @@
 #define vMask1 v8
 #define vMask2 v9
 
-ENTRY(strncmp_v)
+ENTRY(strncmp)
 
     beqz iLength, L(zero_length)
 
@@ -116,4 +116,4 @@
     li iResult, 0
     ret
 
-END(strncmp_v)
+END(strncmp)
diff --git a/libc/arch-riscv64/string/strncpy.c b/libc/arch-riscv64/string/strncpy.c
deleted file mode 100644
index bbd1bd7..0000000
--- a/libc/arch-riscv64/string/strncpy.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2023 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <upstream-openbsd/android/include/openbsd-compat.h>
-
-#define strncpy strncpy_gc
-#include <upstream-openbsd/lib/libc/string/strncpy.c>
diff --git a/libc/arch-riscv64/string/strncpy_v.S b/libc/arch-riscv64/string/strncpy_v.S
index f133f28..651a064 100644
--- a/libc/arch-riscv64/string/strncpy_v.S
+++ b/libc/arch-riscv64/string/strncpy_v.S
@@ -71,7 +71,7 @@
 #define vStr1 v8
 #define vStr2 v16
 
-ENTRY(strncpy_v)
+ENTRY(strncpy)
 
     mv pDstPtr, pDst
 
@@ -111,4 +111,4 @@
 
     ret
 
-END(strncpy_v)
+END(strncpy)
diff --git a/libc/arch-riscv64/string/strnlen.c b/libc/arch-riscv64/string/strnlen.c
deleted file mode 100644
index 0e31c3b..0000000
--- a/libc/arch-riscv64/string/strnlen.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*	$OpenBSD: strnlen.c,v 1.9 2019/01/25 00:19:25 millert Exp $	*/
-
-/*
- * Copyright (c) 2010 Todd C. Miller <millert@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <sys/types.h>
-
-#include <string.h>
-
-size_t
-strnlen_gc(const char *str, size_t maxlen)
-{
-	const char *cp;
-
-	for (cp = str; maxlen != 0 && *cp != '\0'; cp++, maxlen--)
-		;
-
-	return (size_t)(cp - str);
-}
diff --git a/libc/arch-riscv64/string/strnlen_v.S b/libc/arch-riscv64/string/strnlen_v.S
index bd1bb9a..66366f0 100644
--- a/libc/arch-riscv64/string/strnlen_v.S
+++ b/libc/arch-riscv64/string/strnlen_v.S
@@ -66,7 +66,7 @@
 #define vStr v0
 #define vMaskEnd v8
 
-ENTRY(strnlen_v)
+ENTRY(strnlen)
 
     mv pCopyStr, pStr
     mv iRetValue, iMaxlen
@@ -86,4 +86,4 @@
 L(end_strnlen_loop):
     ret
 
-END(strnlen_v)
+END(strnlen)
diff --git a/libc/arch-x86/string/sse2-memchr-atom.S b/libc/arch-x86/string/sse2-memchr-atom.S
deleted file mode 100644
index 013af9b..0000000
--- a/libc/arch-x86/string/sse2-memchr-atom.S
+++ /dev/null
@@ -1,556 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#define ENTRANCE PUSH (%edi);
-#define PARMS  8
-#define RETURN  POP (%edi); ret; CFI_PUSH (%edi);
-
-#define STR1  PARMS
-#define STR2  STR1+4
-#define LEN   STR2+4
-
-	.text
-ENTRY (memchr)
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-	mov	LEN(%esp), %edx
-	test	%edx, %edx
-	jz	L(return_null)
-
-	punpcklbw %xmm1, %xmm1
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-
-	and	$63, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-	movdqu	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case2_prolog)
-
-	sub	$16, %edx
-	jbe	L(return_null)
-	lea	16(%edi), %edi
-	and	$15, %ecx
-	and	$-16, %edi
-	add	%ecx, %edx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	jmp	L(loop_prolog)
-
-	.p2align 4
-L(crosscache):
-	and	$15, %ecx
-	and	$-16, %edi
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	sar	%cl, %eax
-	test	%eax, %eax
-
-	jnz	L(match_case2_prolog1)
-	lea	-16(%edx), %edx
-	add	%ecx, %edx
-	jle	L(return_null)
-	lea	16(%edi), %edi
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	.p2align 4
-L(loop_prolog):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	16(%edi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	48(%edi), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	lea	64(%edi), %edi
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	16(%edi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	48(%edi), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	lea	64(%edi), %edi
-	mov	%edi, %ecx
-	and	$-64, %edi
-	and	$63, %ecx
-	add	%ecx, %edx
-
-	.p2align 4
-L(align64_loop):
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	movdqa	(%edi), %xmm0
-	movdqa	16(%edi), %xmm2
-	movdqa	32(%edi), %xmm3
-	movdqa	48(%edi), %xmm4
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm0, %xmm3
-	pmaxub	%xmm2, %xmm4
-	pmaxub	%xmm3, %xmm4
-	add	$64, %edi
-	pmovmskb %xmm4, %eax
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	sub	$64, %edi
-
-	pmovmskb %xmm0, %eax
-	xor	%ecx, %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	pmovmskb %xmm2, %eax
-	lea	16(%ecx), %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	lea	16(%ecx), %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	pcmpeqb	48(%edi), %xmm1
-	pmovmskb %xmm1, %eax
-	lea	16(%ecx), %ecx
-
-	.p2align 4
-L(match_case1):
-	add	%ecx, %edi
-	test	%al, %al
-	jz	L(match_case1_high)
-	mov	%al, %cl
-	and	$15, %cl
-	jz	L(match_case1_8)
-	test	$0x01, %al
-	jnz	L(exit_case1_1)
-	test	$0x02, %al
-	jnz	L(exit_case1_2)
-	test	$0x04, %al
-	jnz	L(exit_case1_3)
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_8):
-	test	$0x10, %al
-	jnz	L(exit_case1_5)
-	test	$0x20, %al
-	jnz	L(exit_case1_6)
-	test	$0x40, %al
-	jnz	L(exit_case1_7)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_high):
-	mov	%ah, %ch
-	and	$15, %ch
-	jz	L(match_case1_high_8)
-	test	$0x01, %ah
-	jnz	L(exit_case1_9)
-	test	$0x02, %ah
-	jnz	L(exit_case1_10)
-	test	$0x04, %ah
-	jnz	L(exit_case1_11)
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_high_8):
-	test	$0x10, %ah
-	jnz	L(exit_case1_13)
-	test	$0x20, %ah
-	jnz	L(exit_case1_14)
-	test	$0x40, %ah
-	jnz	L(exit_case1_15)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_loop):
-	add	$64, %edx
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$16, %edx
-	jbe	L(return_null)
-
-	movdqa	16(%edi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$32, %edx
-	jbe	L(return_null)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$48, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	48(%edi), %xmm1
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_1):
-	mov	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_2):
-	lea	1(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_3):
-	lea	2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_5):
-	lea	4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_6):
-	lea	5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_7):
-	lea	6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_9):
-	lea	8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_10):
-	lea	9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_11):
-	lea	10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_13):
-	lea	12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_14):
-	lea	13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_15):
-	lea	14(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2):
-	sub	%ecx, %edx
-L(match_case2_prolog1):
-	add	%ecx, %edi
-L(match_case2_prolog):
-	test	%al, %al
-	jz	L(match_case2_high)
-	mov	%al, %cl
-	and	$15, %cl
-	jz	L(match_case2_8)
-	test	$0x01, %al
-	jnz	L(exit_case2_1)
-	test	$0x02, %al
-	jnz	L(exit_case2_2)
-	test	$0x04, %al
-	jnz	L(exit_case2_3)
-	sub	$4, %edx
-	jb	L(return_null)
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_8):
-	test	$0x10, %al
-	jnz	L(exit_case2_5)
-	test	$0x20, %al
-	jnz	L(exit_case2_6)
-	test	$0x40, %al
-	jnz	L(exit_case2_7)
-	sub	$8, %edx
-	jb	L(return_null)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_high):
-	mov	%ah, %ch
-	and	$15, %ch
-	jz	L(match_case2_high_8)
-	test	$0x01, %ah
-	jnz	L(exit_case2_9)
-	test	$0x02, %ah
-	jnz	L(exit_case2_10)
-	test	$0x04, %ah
-	jnz	L(exit_case2_11)
-	sub	$12, %edx
-	jb	L(return_null)
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_high_8):
-	test	$0x10, %ah
-	jnz	L(exit_case2_13)
-	test	$0x20, %ah
-	jnz	L(exit_case2_14)
-	test	$0x40, %ah
-	jnz	L(exit_case2_15)
-	sub	$16, %edx
-	jb	L(return_null)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_1):
-	mov	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_2):
-	sub	$2, %edx
-	jb	L(return_null)
-	lea	1(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_3):
-	sub	$3, %edx
-	jb	L(return_null)
-	lea	2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_5):
-	sub	$5, %edx
-	jb	L(return_null)
-	lea	4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_6):
-	sub	$6, %edx
-	jb	L(return_null)
-	lea	5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_7):
-	sub	$7, %edx
-	jb	L(return_null)
-	lea	6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_9):
-	sub	$9, %edx
-	jb	L(return_null)
-	lea	8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_10):
-	sub	$10, %edx
-	jb	L(return_null)
-	lea	9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_11):
-	sub	$11, %edx
-	jb	L(return_null)
-	lea	10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_13):
-	sub	$13, %edx
-	jb	L(return_null)
-	lea	12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_14):
-	sub	$14, %edx
-	jb	L(return_null)
-	lea	13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_15):
-	sub	$15, %edx
-	jb	L(return_null)
-	lea	14(%edi), %eax
-	RETURN
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-END (memchr)
diff --git a/libc/arch-x86/string/sse2-memrchr-atom.S b/libc/arch-x86/string/sse2-memrchr-atom.S
deleted file mode 100644
index 1aa1a1a..0000000
--- a/libc/arch-x86/string/sse2-memrchr-atom.S
+++ /dev/null
@@ -1,778 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#define PARMS  4
-#define STR1  PARMS
-#define STR2  STR1+4
-#define LEN   STR2+4
-
-	.text
-ENTRY (memrchr)
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-	mov	LEN(%esp), %edx
-
-	test	%edx, %edx
-	jz	L(return_null)
-	sub	$16, %edx
-	jbe	L(length_less16)
-
-	punpcklbw %xmm1, %xmm1
-	add	%edx, %ecx
-	punpcklbw %xmm1, %xmm1
-
-	movdqu	(%ecx), %xmm0
-	pshufd	$0, %xmm1, %xmm1
-	pcmpeqb	%xmm1, %xmm0
-
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	sub	$64, %ecx
-	mov	%ecx, %eax
-	and	$15, %eax
-	jz	L(loop_prolog)
-
-	add	$16, %ecx
-	add	$16, %edx
-	and	$-16, %ecx
-	sub	%eax, %edx
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop_prolog):
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	(%ecx), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	sub	$64, %ecx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	mov	%ecx, %eax
-	and	$63, %eax
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	add	$64, %ecx
-	add	$64, %edx
-	and	$-64, %ecx
-	sub	%eax, %edx
-
-	.p2align 4
-L(align64_loop):
-	sub	$64, %ecx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	(%ecx), %xmm0
-	movdqa	16(%ecx), %xmm2
-	movdqa	32(%ecx), %xmm3
-	movdqa	48(%ecx), %xmm4
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm3, %xmm0
-	pmaxub	%xmm4, %xmm2
-	pmaxub	%xmm0, %xmm2
-	pmovmskb %xmm2, %eax
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm2
-
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	(%ecx), %xmm1
-
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	pmovmskb %xmm1, %eax
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_loop):
-	add	$64, %edx
-	cmp	$32, %edx
-	jbe	L(exit_loop_32)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16_1)
-	cmp	$48, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	(%ecx), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches0_1)
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(exit_loop_32):
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48_1)
-	cmp	$16, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	32(%ecx), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches32_1)
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(matches16):
-	lea	16(%ecx), %ecx
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches32):
-	lea	32(%ecx), %ecx
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches48):
-	lea	48(%ecx), %ecx
-
-	.p2align 4
-L(exit_dispatch):
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_8):
-	test	$0x80, %al
-	jnz	L(exit_8)
-	test	$0x40, %al
-	jnz	L(exit_7)
-	test	$0x20, %al
-	jnz	L(exit_6)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_high):
-	mov	%ah, %dh
-	and	$15 << 4, %dh
-	jnz	L(exit_dispatch_high_8)
-	test	$0x08, %ah
-	jnz	L(exit_12)
-	test	$0x04, %ah
-	jnz	L(exit_11)
-	test	$0x02, %ah
-	jnz	L(exit_10)
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_high_8):
-	test	$0x80, %ah
-	jnz	L(exit_16)
-	test	$0x40, %ah
-	jnz	L(exit_15)
-	test	$0x20, %ah
-	jnz	L(exit_14)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_2):
-	lea	1(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_3):
-	lea	2(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_4):
-	lea	3(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_6):
-	lea	5(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_7):
-	lea	6(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_8):
-	lea	7(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_10):
-	lea	9(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_11):
-	lea	10(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_12):
-	lea	11(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_14):
-	lea	13(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_15):
-	lea	14(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_16):
-	lea	15(%ecx), %eax
-	ret
-
-	.p2align 4
-L(matches0_1):
-	lea	-64(%edx), %edx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches16_1):
-	lea	-48(%edx), %edx
-	lea	16(%ecx), %ecx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches32_1):
-	lea	-32(%edx), %edx
-	lea	32(%ecx), %ecx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches48_1):
-	lea	-16(%edx), %edx
-	lea	48(%ecx), %ecx
-
-	.p2align 4
-L(exit_dispatch_1):
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_8):
-	test	$0x80, %al
-	jnz	L(exit_1_8)
-	test	$0x40, %al
-	jnz	L(exit_1_7)
-	test	$0x20, %al
-	jnz	L(exit_1_6)
-
-	add	$4, %edx
-	jl	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_high):
-	mov	%ah, %al
-	and	$15 << 4, %al
-	jnz	L(exit_dispatch_1_high_8)
-	test	$0x08, %ah
-	jnz	L(exit_1_12)
-	test	$0x04, %ah
-	jnz	L(exit_1_11)
-	test	$0x02, %ah
-	jnz	L(exit_1_10)
-
-	add	$8, %edx
-	jl	L(return_null)
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_high_8):
-	test	$0x80, %ah
-	jnz	L(exit_1_16)
-	test	$0x40, %ah
-	jnz	L(exit_1_15)
-	test	$0x20, %ah
-	jnz	L(exit_1_14)
-
-	add	$12, %edx
-	jl	L(return_null)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_2):
-	add	$1, %edx
-	jl	L(return_null)
-	lea	1(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_3):
-	add	$2, %edx
-	jl	L(return_null)
-	lea	2(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_4):
-	add	$3, %edx
-	jl	L(return_null)
-	lea	3(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_6):
-	add	$5, %edx
-	jl	L(return_null)
-	lea	5(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_7):
-	add	$6, %edx
-	jl	L(return_null)
-	lea	6(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_8):
-	add	$7, %edx
-	jl	L(return_null)
-	lea	7(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_10):
-	add	$9, %edx
-	jl	L(return_null)
-	lea	9(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_11):
-	add	$10, %edx
-	jl	L(return_null)
-	lea	10(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_12):
-	add	$11, %edx
-	jl	L(return_null)
-	lea	11(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_14):
-	add	$13, %edx
-	jl	L(return_null)
-	lea	13(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_15):
-	add	$14, %edx
-	jl	L(return_null)
-	lea	14(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_16):
-	add	$15, %edx
-	jl	L(return_null)
-	lea	15(%ecx), %eax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(length_less16_offset0):
-	mov	%dl, %cl
-	pcmpeqb	(%eax), %xmm1
-
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	mov	%eax, %ecx
-	pmovmskb %xmm1, %eax
-
-	and	%edx, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(length_less16):
-	punpcklbw %xmm1, %xmm1
-	add	$16, %edx
-	punpcklbw %xmm1, %xmm1
-
-	mov	%ecx, %eax
-	pshufd	$0, %xmm1, %xmm1
-
-	and	$15, %ecx
-	jz	L(length_less16_offset0)
-
-	PUSH	(%edi)
-
-	mov	%cl, %dh
-	add	%dl, %dh
-	and	$-16, %eax
-
-	sub	$16, %dh
-	ja	L(length_less16_part2)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edi
-
-	sar	%cl, %edi
-	add	%ecx, %eax
-	mov	%dl, %cl
-
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	and	%edx, %edi
-	test	%edi, %edi
-	jz	L(ret_null)
-
-	bsr	%edi, %edi
-	add	%edi, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(length_less16_part2):
-	movdqa	16(%eax), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %edi
-
-	mov	%cl, %ch
-
-	mov	%dh, %cl
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	and	%edx, %edi
-
-	test	%edi, %edi
-	jnz	L(length_less16_part2_return)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edi
-
-	mov	%ch, %cl
-	sar	%cl, %edi
-	test	%edi, %edi
-	jz	L(ret_null)
-
-	bsr	%edi, %edi
-	add	%edi, %eax
-	xor	%ch, %ch
-	add	%ecx, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(length_less16_part2_return):
-	bsr	%edi, %edi
-	lea	16(%eax, %edi), %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ret_null):
-	xor	%eax, %eax
-	POP	(%edi)
-	ret
-
-END (memrchr)
diff --git a/libc/arch-x86/string/sse2-strchr-atom.S b/libc/arch-x86/string/sse2-strchr-atom.S
deleted file mode 100644
index e325181..0000000
--- a/libc/arch-x86/string/sse2-strchr-atom.S
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
-#define POP(REG)	popl REG;	CFI_POP (REG)
-
-#define PARMS	8
-#define ENTRANCE	PUSH(%edi)
-#define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
-
-
-#define STR1	PARMS
-#define STR2	STR1+4
-
-	.text
-ENTRY (strchr)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	pxor	%xmm2, %xmm2
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$15, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	je	L(loop)
-
-/* Handle unaligned string.  */
-	and	$-16, %edi
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	/* Remove the leading bytes.  */
-	sarl	%cl, %edx
-	sarl	%cl, %eax
-	test	%eax, %eax
-	jz	L(unaligned_no_match)
-	add	%ecx, %edi
-	test	%edx, %edx
-	jz	L(match_case1)
-	jmp	L(match_case2)
-
-	.p2align 4
-L(unaligned_no_match):
-	test	%edx, %edx
-	jne	L(return_null)
-
-	pxor	%xmm2, %xmm2
-	add	$16, %edi
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-	jmp	L(loop)
-
-L(matches):
-	/* There is a match.  First find where NULL is.  */
-	test	%edx, %edx
-	jz	L(match_case1)
-
-	.p2align 4
-L(match_case2):
-	test	%al, %al
-	jz	L(match_higth_case2)
-
-	mov	%al, %cl
-	and	$15, %cl
-	jnz	L(match_case2_4)
-
-	mov	%dl, %ch
-	and	$15, %ch
-	jnz	L(return_null)
-
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x10, %dl
-	jnz	L(return_null)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x20, %dl
-	jnz	L(return_null)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	test	$0x40, %dl
-	jnz	L(return_null)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_4):
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x01, %dl
-	jnz	L(return_null)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x02, %dl
-	jnz	L(return_null)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x04, %dl
-	jnz	L(return_null)
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_higth_case2):
-	test	%dl, %dl
-	jnz	L(return_null)
-
-	mov	%ah, %cl
-	and	$15, %cl
-	jnz	L(match_case2_12)
-
-	mov	%dh, %ch
-	and	$15, %ch
-	jnz	L(return_null)
-
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x10, %dh
-	jnz	L(return_null)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x20, %dh
-	jnz	L(return_null)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	test	$0x40, %dh
-	jnz	L(return_null)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_12):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x01, %dh
-	jnz	L(return_null)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x02, %dh
-	jnz	L(return_null)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x04, %dh
-	jnz	L(return_null)
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1):
-	test	%al, %al
-	jz	L(match_higth_case1)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_higth_case1):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit1):
-	lea	(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	lea	1(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	lea	2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit5):
-	lea	4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	lea	5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	lea	6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit9):
-	lea	8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	lea	9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	lea	10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit13):
-	lea	12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	lea	13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	lea	14(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-END (strchr)
diff --git a/libc/arch-x86/string/sse2-strnlen-atom.S b/libc/arch-x86/string/sse2-strnlen-atom.S
deleted file mode 100644
index 1f89b4e..0000000
--- a/libc/arch-x86/string/sse2-strnlen-atom.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_STRNLEN 1
-#define STRLEN  strnlen
-#include "sse2-strlen-atom.S"
diff --git a/libc/arch-x86/string/sse2-strrchr-atom.S b/libc/arch-x86/string/sse2-strrchr-atom.S
deleted file mode 100644
index e916bc1..0000000
--- a/libc/arch-x86/string/sse2-strrchr-atom.S
+++ /dev/null
@@ -1,753 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#define PARMS	8
-#define ENTRANCE	PUSH(%edi);
-#define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
-
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (strrchr)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	pxor	%xmm2, %xmm2
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$63, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-/* unaligned string. */
-	movdqu	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %ecx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match1)
-
-	test	%ecx, %ecx
-	jnz	L(return_null)
-
-	and	$-16, %edi
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	xor	%ebx, %ebx
-	jmp	L(loop)
-
-	CFI_POP    (%esi)
-	CFI_POP    (%ebx)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%ecx, %ecx
-	jnz	L(prolog_find_zero_1)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	mov	%eax, %ebx
-	mov	%edi, %esi
-	and	$-16, %edi
-	jmp	L(loop)
-
-	CFI_POP    (%esi)
-	CFI_POP    (%ebx)
-
-	.p2align 4
-L(crosscache):
-/* Hancle unaligned string.  */
-	and	$15, %ecx
-	and	$-16, %edi
-	pxor	%xmm3, %xmm3
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm3, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	/* Remove the leading bytes.  */
-	shr	%cl, %edx
-	shr	%cl, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	xor	%ebx, %ebx
-	jmp	L(loop)
-
-	CFI_POP    (%esi)
-	CFI_POP    (%ebx)
-
-	.p2align 4
-L(unaligned_match):
-	test	%edx, %edx
-	jnz	L(prolog_find_zero)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	mov	%eax, %ebx
-	lea	(%edi, %ecx), %esi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jz	L(loop)
-
-L(matches):
-	test	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	test	%ebx, %ebx
-	jz	L(return_null_1)
-	mov	%ebx, %eax
-	mov	%esi, %edi
-
-	POP	(%ebx)
-	POP	(%esi)
-
-	jmp	L(match_case1)
-
-	CFI_PUSH    (%ebx)
-	CFI_PUSH    (%esi)
-
-	.p2align 4
-L(return_null_1):
-	POP	(%ebx)
-	POP	(%esi)
-
-	xor	%eax, %eax
-	RETURN
-
-	CFI_PUSH    (%ebx)
-	CFI_PUSH    (%esi)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %ecx
-	test	%ecx, %ecx
-	jnz	L(find_zero)
-	mov	%eax, %ebx
-	mov	%edi, %esi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	%cl, %cl
-	jz	L(find_zero_high)
-	mov	%cl, %dl
-	and	$15, %dl
-	jz	L(find_zero_8)
-	test	$0x01, %cl
-	jnz	L(FindZeroExit1)
-	test	$0x02, %cl
-	jnz	L(FindZeroExit2)
-	test	$0x04, %cl
-	jnz	L(FindZeroExit3)
-	and	$(1 << 4) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_8):
-	test	$0x10, %cl
-	jnz	L(FindZeroExit5)
-	test	$0x20, %cl
-	jnz	L(FindZeroExit6)
-	test	$0x40, %cl
-	jnz	L(FindZeroExit7)
-	and	$(1 << 8) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_high):
-	mov	%ch, %dh
-	and	$15, %dh
-	jz	L(find_zero_high_8)
-	test	$0x01, %ch
-	jnz	L(FindZeroExit9)
-	test	$0x02, %ch
-	jnz	L(FindZeroExit10)
-	test	$0x04, %ch
-	jnz	L(FindZeroExit11)
-	and	$(1 << 12) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_high_8):
-	test	$0x10, %ch
-	jnz	L(FindZeroExit13)
-	test	$0x20, %ch
-	jnz	L(FindZeroExit14)
-	test	$0x40, %ch
-	jnz	L(FindZeroExit15)
-	and	$(1 << 16) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit1):
-	and	$1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit2):
-	and	$(1 << 2) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit3):
-	and	$(1 << 3) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit5):
-	and	$(1 << 5) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit6):
-	and	$(1 << 6) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit7):
-	and	$(1 << 7) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit9):
-	and	$(1 << 9) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit10):
-	and	$(1 << 10) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit11):
-	and	$(1 << 11) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit13):
-	and	$(1 << 13) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit14):
-	and	$(1 << 14) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit15):
-	and	$(1 << 15) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-
-	.p2align 4
-L(match_case1):
-	test	%ah, %ah
-	jnz	L(match_case1_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(match_case1_8)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_8):
-	test	$0x80, %al
-	jnz	L(Exit8)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	lea	-12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_high):
-	mov	%ah, %dh
-	and	$15 << 4, %dh
-	jnz	L(match_case1_high_8)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_high_8):
-	test	$0x80, %ah
-	jnz	L(Exit16)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	lea	-4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	lea	-15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	lea	-14(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	lea	-13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	lea	-11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	lea	-10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit8):
-	lea	-9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	lea	-7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	lea	-6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	lea	-5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	lea	-3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	lea	-2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit16):
-	lea	-1(%edi), %eax
-	RETURN
-
-/* Return NULL.  */
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%ecx, %edi
-	mov     %edx, %ecx
-L(prolog_find_zero_1):
-	test	%cl, %cl
-	jz	L(prolog_find_zero_high)
-	mov	%cl, %dl
-	and	$15, %dl
-	jz	L(prolog_find_zero_8)
-	test	$0x01, %cl
-	jnz	L(PrologFindZeroExit1)
-	test	$0x02, %cl
-	jnz	L(PrologFindZeroExit2)
-	test	$0x04, %cl
-	jnz	L(PrologFindZeroExit3)
-	and	$(1 << 4) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_8):
-	test	$0x10, %cl
-	jnz	L(PrologFindZeroExit5)
-	test	$0x20, %cl
-	jnz	L(PrologFindZeroExit6)
-	test	$0x40, %cl
-	jnz	L(PrologFindZeroExit7)
-	and	$(1 << 8) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_high):
-	mov	%ch, %dh
-	and	$15, %dh
-	jz	L(prolog_find_zero_high_8)
-	test	$0x01, %ch
-	jnz	L(PrologFindZeroExit9)
-	test	$0x02, %ch
-	jnz	L(PrologFindZeroExit10)
-	test	$0x04, %ch
-	jnz	L(PrologFindZeroExit11)
-	and	$(1 << 12) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_high_8):
-	test	$0x10, %ch
-	jnz	L(PrologFindZeroExit13)
-	test	$0x20, %ch
-	jnz	L(PrologFindZeroExit14)
-	test	$0x40, %ch
-	jnz	L(PrologFindZeroExit15)
-	and	$(1 << 16) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit1):
-	and	$1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit2):
-	and	$(1 << 2) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit3):
-	and	$(1 << 3) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit5):
-	and	$(1 << 5) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit6):
-	and	$(1 << 6) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit7):
-	and	$(1 << 7) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit9):
-	and	$(1 << 9) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit10):
-	and	$(1 << 10) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit11):
-	and	$(1 << 11) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit13):
-	and	$(1 << 13) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit14):
-	and	$(1 << 14) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit15):
-	and	$(1 << 15) - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-END (strrchr)
diff --git a/libc/arch-x86/string/ssse3-strlcat-atom.S b/libc/arch-x86/string/ssse3-strlcat-atom.S
deleted file mode 100644
index daaf254..0000000
--- a/libc/arch-x86/string/ssse3-strlcat-atom.S
+++ /dev/null
@@ -1,1225 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Optimized strlcat with SSSE3 */
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl	REG;	CFI_PUSH (REG)
-#define POP(REG)	popl	REG;	CFI_POP (REG)
-#define L(label)	.L##Prolog_##label
-
-#define DST	4
-#define SRC	DST+8
-#define LEN	SRC+4
-
-	.text
-ENTRY (strlcat)
-	mov	DST(%esp), %edx
-	PUSH	(%ebx)
-	mov	LEN(%esp), %ebx
-	sub	$4, %ebx
-	jbe	L(len_less4_prolog)
-
-#define RETURN	jmp	L(StrcpyStep)
-#define edi	ebx
-
-#define USE_AS_STRNLEN
-#define USE_AS_STRCAT
-#define USE_AS_STRLCAT
-
-#include "sse2-strlen-atom.S"
-
-	.p2align 4
-L(StrcpyStep):
-
-#undef edi
-#undef L
-#define L(label) .L##label
-#undef RETURN
-#define RETURN	POP (%ebx); ret; CFI_PUSH (%ebx);
-#define RETURN1	POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
-
-        movl	SRC(%esp), %ecx
-	movl	LEN(%esp), %ebx
-
-	cmp	%eax, %ebx
-	je	L(CalculateLengthOfSrcProlog)
-	sub	%eax, %ebx
-
-	test	%ebx, %ebx
-	jz	L(CalculateLengthOfSrcProlog)
-
-	mov	DST + 4(%esp), %edx
-
-	PUSH	(%edi)
-	add	%eax, %edx
-	mov	%ecx, %edi
-	sub	%eax, %edi
-
-	cmp	$8, %ebx
-	jbe	L(StrncpyExit8Bytes)
-
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmpb	$0, 7(%ecx)
-	jz	L(Exit8)
-	cmp	$16, %ebx
-	jb	L(StrncpyExit15Bytes)
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmpb	$0, 14(%ecx)
-	jz	L(Exit15)
-	cmpb	$0, 15(%ecx)
-	jz	L(Exit16)
-	cmp	$16, %ebx
-	je	L(StrlcpyExit16)
-
-#define USE_AS_STRNCPY
-#include "ssse3-strcpy-atom.S"
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh8)
-
-L(CopyFrom1To16BytesLess8):
-	mov	%al, %ah
-	and	$15, %ah
-	jz	L(ExitHigh4)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-
-	lea	3(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh4):
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	7(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh8):
-	mov	%ah, %al
-	and	$15, %al
-	jz	L(ExitHigh12)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	11(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh12):
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-L(Exit16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-
-	lea	15(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-	add	%esi, %edx
-
-	POP	(%esi)
-
-	test	%al, %al
-	jz	L(ExitHighCase2)
-
-	cmp	$8, %ebx
-	ja	L(CopyFrom1To16BytesLess8)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrlcpyExit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-	test	$0x80, %al
-	jnz	L(Exit8)
-	jmp	L(StrlcpyExit8)
-
-	.p2align 4
-L(ExitHighCase2):
-	cmp	$8, %ebx
-	jbe	L(CopyFrom1To16BytesLess8Case3)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrlcpyExit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(StrlcpyExit15)
-	test	$0x80, %ah
-	jnz	L(Exit16)
-	jmp	L(StrlcpyExit16)
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHigh8Case3)
-
-L(CopyFrom1To16BytesLess8Case3):
-	cmp	$4, %ebx
-	ja	L(ExitHigh4Case3)
-
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-L(StrlcpyExit4):
-	movb	%bh, 3(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	4(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh4Case3):
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-L(StrlcpyExit8):
-	movb	%bh, 7(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	8(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh8Case3):
-	cmp	$12, %ebx
-	ja	L(ExitHigh12Case3)
-
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-L(StrlcpyExit12):
-	movb	%bh, 11(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	12(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh12Case3):
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-	cmp	$15, %ebx
-	je	L(StrlcpyExit15)
-L(StrlcpyExit16):
-	movb	%bh, 15(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	16(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(StrlcpyExit1):
-	movb	%bh, (%edx)
-
-	lea	1(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	mov	%ecx, %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit2):
-	movb	%bh, 1(%edx)
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	lea	2(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edi, %eax
-
-	lea	1(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit3):
-	movb	%bh, 2(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-
-	lea	3(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	2(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit5):
-	movb	%bh, 4(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-
-	lea	5(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	4(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit6):
-	movb	%bh, 5(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	6(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	5(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit7):
-	movb	%bh, 6(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	7(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	6(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit9):
-	movb	%bh, 8(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	9(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	8(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit10):
-	movb	%bh, 9(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	10(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	9(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit11):
-	movb	%bh, 10(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	11(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	10(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit13):
-	movb	%bh, 12(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	13(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	12(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit14):
-	movb	%bh, 13(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	14(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	13(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit15):
-	movb	%bh, 14(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	15(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	14(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncpyExit15Bytes):
-	cmp	$12, %ebx
-	ja	L(StrncpyExit15Bytes1)
-
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	jmp	L(StrlcpyExit12)
-
-	.p2align 4
-L(StrncpyExit15Bytes1):
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-
-	cmpb	$0, 14(%ecx)
-	jz	L(Exit15)
-	jmp	L(StrlcpyExit15)
-
-	.p2align 4
-L(StrncpyExit8Bytes):
-	cmp	$4, %ebx
-	ja	L(StrncpyExit8Bytes1)
-
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	jmp	L(StrlcpyExit4)
-
-	.p2align 4
-L(StrncpyExit8Bytes1):
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-
-	cmpb	$0, 7(%ecx)
-	jz	L(Exit8)
-	jmp	L(StrlcpyExit8)
-
-	CFI_POP	(%edi)
-
-
-	.p2align 4
-L(Prolog_return_start_len):
-	movl	LEN(%esp), %ebx
-        movl	SRC(%esp), %ecx
-L(CalculateLengthOfSrcProlog):
-	mov	%ecx, %edx
-	sub	%ebx, %ecx
-
-	.p2align 4
-L(CalculateLengthOfSrc):
-	cmpb	$0, (%edx)
-	jz	L(exit_tail0)
-	cmpb	$0, 1(%edx)
-	jz	L(exit_tail1)
-	cmpb	$0, 2(%edx)
-	jz	L(exit_tail2)
-	cmpb	$0, 3(%edx)
-	jz	L(exit_tail3)
-
-	cmpb	$0, 4(%edx)
-	jz	L(exit_tail4)
-	cmpb	$0, 5(%edx)
-	jz	L(exit_tail5)
-	cmpb	$0, 6(%edx)
-	jz	L(exit_tail6)
-	cmpb	$0, 7(%edx)
-	jz	L(exit_tail7)
-
-	cmpb	$0, 8(%edx)
-	jz	L(exit_tail8)
-	cmpb	$0, 9(%edx)
-	jz	L(exit_tail9)
-	cmpb	$0, 10(%edx)
-	jz	L(exit_tail10)
-	cmpb	$0, 11(%edx)
-	jz	L(exit_tail11)
-
-	cmpb	$0, 12(%edx)
-	jz	L(exit_tail12)
-	cmpb	$0, 13(%edx)
-	jz	L(exit_tail13)
-	cmpb	$0, 14(%edx)
-	jz	L(exit_tail14)
-	cmpb	$0, 15(%edx)
-	jz	L(exit_tail15)
-
-	pxor	%xmm0, %xmm0
-	lea	16(%edx), %eax
-	add	$16, %ecx
-	and	$-16, %eax
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqb	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(aligned_64_loop)
-
-	pcmpeqb	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	48(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-
-	.p2align 4
-L(exit):
-	sub	%ecx, %eax
-	test	%dl, %dl
-	jz	L(exit_more_8)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_more_4)
-	test	$0x01, %dl
-	jnz	L(exit_0)
-	test	$0x02, %dl
-	jnz	L(exit_1)
-	test	$0x04, %dl
-	jnz	L(exit_2)
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_4):
-	test	$0x10, %dl
-	jnz	L(exit_4)
-	test	$0x20, %dl
-	jnz	L(exit_5)
-	test	$0x40, %dl
-	jnz	L(exit_6)
-	add	$7, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_8):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_more_12)
-	test	$0x01, %dh
-	jnz	L(exit_8)
-	test	$0x02, %dh
-	jnz	L(exit_9)
-	test	$0x04, %dh
-	jnz	L(exit_10)
-	add	$11, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_12):
-	test	$0x10, %dh
-	jnz	L(exit_12)
-	test	$0x20, %dh
-	jnz	L(exit_13)
-	test	$0x40, %dh
-	jnz	L(exit_14)
-	add	$15, %eax
-L(exit_0):
-	RETURN
-
-	.p2align 4
-L(exit_1):
-	add	$1, %eax
-	RETURN
-
-L(exit_2):
-	add	$2, %eax
-	RETURN
-
-L(exit_3):
-	add	$3, %eax
-	RETURN
-
-L(exit_4):
-	add	$4, %eax
-	RETURN
-
-L(exit_5):
-	add	$5, %eax
-	RETURN
-
-L(exit_6):
-	add	$6, %eax
-	RETURN
-
-L(exit_7):
-	add	$7, %eax
-	RETURN
-
-L(exit_8):
-	add	$8, %eax
-	RETURN
-
-L(exit_9):
-	add	$9, %eax
-	RETURN
-
-L(exit_10):
-	add	$10, %eax
-	RETURN
-
-L(exit_11):
-	add	$11, %eax
-	RETURN
-
-L(exit_12):
-	add	$12, %eax
-	RETURN
-
-L(exit_13):
-	add	$13, %eax
-	RETURN
-
-L(exit_14):
-	add	$14, %eax
-	RETURN
-
-L(exit_15):
-	add	$15, %eax
-	RETURN
-
-L(exit_tail0):
-	mov	%edx, %eax
-	sub	%ecx, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail1):
-	lea	1(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail2):
-	lea	2(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail3):
-	lea	3(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail4):
-	lea	4(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail5):
-	lea	5(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail6):
-	lea	6(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail7):
-	lea	7(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail8):
-	lea	8(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail9):
-	lea	9(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail10):
-	lea	10(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail11):
-	lea	11(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail12):
-	lea	12(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail13):
-	lea	13(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail14):
-	lea	14(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail15):
-	lea	15(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-END (strlcat)
diff --git a/libc/arch-x86/string/ssse3-strlcpy-atom.S b/libc/arch-x86/string/ssse3-strlcpy-atom.S
deleted file mode 100644
index cdb17cc..0000000
--- a/libc/arch-x86/string/ssse3-strlcpy-atom.S
+++ /dev/null
@@ -1,1403 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_STRNCPY
-#define STRCPY strlcpy
-#define STRLEN strlcpy
-#define USE_AS_STRLCPY
-#include "ssse3-strcpy-atom.S"
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh8)
-
-L(CopyFrom1To16BytesLess8):
-	mov	%al, %ah
-	and	$15, %ah
-	jz	L(ExitHigh4)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-
-	lea	3(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh4):
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	7(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh8):
-	mov	%ah, %al
-	and	$15, %al
-	jz	L(ExitHigh12)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	11(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh12):
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-L(Exit16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-
-	lea	15(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-        add     %esi, %edx
-
-	POP	(%esi)
-
-        test    %al, %al
-        jz      L(ExitHighCase2)
-
-        cmp     $8, %ebx
-        ja      L(CopyFrom1To16BytesLess8)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrlcpyExit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-	test	$0x80, %al
-	jnz	L(Exit8)
-	jmp	L(StrlcpyExit8)
-
-	.p2align 4
-L(ExitHighCase2):
-        cmp     $8, %ebx
-        jbe      L(CopyFrom1To16BytesLess8Case3)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrlcpyExit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(StrlcpyExit15)
-	test	$0x80, %ah
-	jnz	L(Exit16)
-	jmp	L(StrlcpyExit16)
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHigh8Case3)
-
-L(CopyFrom1To16BytesLess8Case3):
-	cmp	$4, %ebx
-	ja	L(ExitHigh4Case3)
-
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-L(StrlcpyExit4):
-	movb	%bh, 3(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	4(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh4Case3):
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-L(StrlcpyExit8):
-	movb	%bh, 7(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	8(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh8Case3):
-	cmp	$12, %ebx
-	ja	L(ExitHigh12Case3)
-
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-L(StrlcpyExit12):
-	movb	%bh, 11(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	12(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh12Case3):
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-	cmp	$15, %ebx
-	je	L(StrlcpyExit15)
-L(StrlcpyExit16):
-	movb	%bh, 15(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	16(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(StrlcpyExit1):
-	movb	%bh, (%edx)
-
-	lea	1(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	mov	%ecx, %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit2):
-	movb	%bh, 1(%edx)
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	lea	2(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edi, %eax
-
-	lea	1(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit3):
-	movb	%bh, 2(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-
-	lea	3(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	2(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit5):
-	movb	%bh, 4(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-
-	lea	5(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	4(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit6):
-	movb	%bh, 5(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	6(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	5(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit7):
-	movb	%bh, 6(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	7(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	6(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit9):
-	movb	%bh, 8(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	9(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	8(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit10):
-	movb	%bh, 9(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	10(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	9(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit11):
-	movb	%bh, 10(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	11(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	10(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit13):
-	movb	%bh, 12(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	13(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	12(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit14):
-	movb	%bh, 13(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	14(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	13(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit15):
-	movb	%bh, 14(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	15(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	14(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-        CFI_POP (%edi)
-
-	.p2align 4
-L(StrlcpyExit0):
-	movl	$0, %eax
-	RETURN
-
-	.p2align 4
-L(StrncpyExit15Bytes):
-	cmp	$12, %ebx
-	ja	L(StrncpyExit15Bytes1)
-
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmp	$9, %ebx
-	je	L(StrlcpyExitTail9)
-
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmp	$10, %ebx
-	je	L(StrlcpyExitTail10)
-
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmp	$11, %ebx
-	je	L(StrlcpyExitTail11)
-
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-
-	movb	%bh, 11(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	12(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrncpyExit15Bytes1):
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-
-	cmpb	$0, 12(%ecx)
-	jz	L(ExitTail13)
-	cmp	$13, %ebx
-	je	L(StrlcpyExitTail13)
-
-	cmpb	$0, 13(%ecx)
-	jz	L(ExitTail14)
-	cmp	$14, %ebx
-	je	L(StrlcpyExitTail14)
-
-	cmpb	$0, 14(%ecx)
-	jz	L(ExitTail15)
-
-	movb	%bh, 14(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	15(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrncpyExit8Bytes):
-	cmp	$4, %ebx
-	ja	L(StrncpyExit8Bytes1)
-
-	test	%ebx, %ebx
-	jz	L(StrlcpyExitTail0)
-
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmp	$1, %ebx
-	je	L(StrlcpyExitTail1)
-
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmp	$2, %ebx
-	je	L(StrlcpyExitTail2)
-
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmp	$3, %ebx
-	je	L(StrlcpyExitTail3)
-
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-
-	movb	%bh, 3(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	4(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrncpyExit8Bytes1):
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-
-	cmpb	$0, 4(%ecx)
-	jz	L(ExitTail5)
-	cmp	$5, %ebx
-	je	L(StrlcpyExitTail5)
-
-	cmpb	$0, 5(%ecx)
-	jz	L(ExitTail6)
-	cmp	$6, %ebx
-	je	L(StrlcpyExitTail6)
-
-	cmpb	$0, 6(%ecx)
-	jz	L(ExitTail7)
-	cmp	$7, %ebx
-	je	L(StrlcpyExitTail7)
-
-	cmpb	$0, 7(%ecx)
-	jz	L(ExitTail8)
-
-	movb	%bh, 7(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	8(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrlcpyExitTail0):
-	mov	%ecx, %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrlcpyExitTail1):
-	movb	%bh, (%edx)
-
-	lea	1(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	mov	$0, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail2):
-	movb	%bh, 1(%edx)
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	lea	2(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edx, %eax
-
-	mov	$1, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail3):
-	movb	%bh, 2(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-
-	lea	3(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	mov	$2, %eax
-	RETURN
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-
-	mov	$3, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail5):
-	movb	%bh, 4(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edx, %eax
-
-	lea	5(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	mov	$4, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail6):
-	movb	%bh, 5(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	6(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	mov	$5, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail7):
-	movb	%bh, 6(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	7(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	mov	$6, %eax
-	RETURN
-
-	.p2align 4
-L(ExitTail8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	mov	$7, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail9):
-	movb	%bh, 8(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	9(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	mov	$8, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail10):
-	movb	%bh, 9(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	10(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	mov	$9, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail11):
-	movb	%bh, 10(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	11(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	mov	$10, %eax
-	RETURN
-
-	.p2align 4
-L(ExitTail12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	mov	$11, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail13):
-	movb	%bh, 12(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	13(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	mov	$12, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail14):
-	movb	%bh, 13(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	14(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	mov	$13, %eax
-	RETURN
-
-	.p2align 4
-L(ExitTail15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	mov	$14, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail16):
-	movb	%bh, 15(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	16(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-
-	mov	$15, %eax
-	RETURN
-
-	.p2align 4
-L(CalculateLengthOfSrc):
-	xor	%eax, %eax
-	cmpb	$0, (%edx)
-	jz	L(exit_tail0)
-	cmpb	$0, 1(%edx)
-	jz	L(exit_tail1)
-	cmpb	$0, 2(%edx)
-	jz	L(exit_tail2)
-	cmpb	$0, 3(%edx)
-	jz	L(exit_tail3)
-
-	cmpb	$0, 4(%edx)
-	jz	L(exit_tail4)
-	cmpb	$0, 5(%edx)
-	jz	L(exit_tail5)
-	cmpb	$0, 6(%edx)
-	jz	L(exit_tail6)
-	cmpb	$0, 7(%edx)
-	jz	L(exit_tail7)
-
-	cmpb	$0, 8(%edx)
-	jz	L(exit_tail8)
-	cmpb	$0, 9(%edx)
-	jz	L(exit_tail9)
-	cmpb	$0, 10(%edx)
-	jz	L(exit_tail10)
-	cmpb	$0, 11(%edx)
-	jz	L(exit_tail11)
-
-	cmpb	$0, 12(%edx)
-	jz	L(exit_tail12)
-	cmpb	$0, 13(%edx)
-	jz	L(exit_tail13)
-	cmpb	$0, 14(%edx)
-	jz	L(exit_tail14)
-	cmpb	$0, 15(%edx)
-	jz	L(exit_tail15)
-
-	pxor	%xmm0, %xmm0
-	lea	16(%edx), %eax
-	add	$16, %ecx
-	and	$-16, %eax
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqb	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(aligned_64_loop)
-
-	pcmpeqb	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	48(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-
-	.p2align 4
-L(exit):
-	sub	%ecx, %eax
-	test	%dl, %dl
-	jz	L(exit_more_8)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_more_4)
-	test	$0x01, %dl
-	jnz	L(exit_0)
-	test	$0x02, %dl
-	jnz	L(exit_1)
-	test	$0x04, %dl
-	jnz	L(exit_2)
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_4):
-	test	$0x10, %dl
-	jnz	L(exit_4)
-	test	$0x20, %dl
-	jnz	L(exit_5)
-	test	$0x40, %dl
-	jnz	L(exit_6)
-	add	$7, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_8):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_more_12)
-	test	$0x01, %dh
-	jnz	L(exit_8)
-	test	$0x02, %dh
-	jnz	L(exit_9)
-	test	$0x04, %dh
-	jnz	L(exit_10)
-	add	$11, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_12):
-	test	$0x10, %dh
-	jnz	L(exit_12)
-	test	$0x20, %dh
-	jnz	L(exit_13)
-	test	$0x40, %dh
-	jnz	L(exit_14)
-	add	$15, %eax
-L(exit_0):
-	RETURN
-
-	.p2align 4
-L(exit_1):
-	add	$1, %eax
-	RETURN
-
-L(exit_2):
-	add	$2, %eax
-	RETURN
-
-L(exit_3):
-	add	$3, %eax
-	RETURN
-
-L(exit_4):
-	add	$4, %eax
-	RETURN
-
-L(exit_5):
-	add	$5, %eax
-	RETURN
-
-L(exit_6):
-	add	$6, %eax
-	RETURN
-
-L(exit_7):
-	add	$7, %eax
-	RETURN
-
-L(exit_8):
-	add	$8, %eax
-	RETURN
-
-L(exit_9):
-	add	$9, %eax
-	RETURN
-
-L(exit_10):
-	add	$10, %eax
-	RETURN
-
-L(exit_11):
-	add	$11, %eax
-	RETURN
-
-L(exit_12):
-	add	$12, %eax
-	RETURN
-
-L(exit_13):
-	add	$13, %eax
-	RETURN
-
-L(exit_14):
-	add	$14, %eax
-	RETURN
-
-L(exit_15):
-	add	$15, %eax
-	RETURN
-
-L(exit_tail0):
-	mov	%edx, %eax
-	sub	%ecx, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail1):
-	lea	1(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail2):
-	lea	2(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail3):
-	lea	3(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail4):
-	lea	4(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail5):
-	lea	5(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail6):
-	lea	6(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail7):
-	lea	7(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail8):
-	lea	8(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail9):
-	lea	9(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail10):
-	lea	10(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail11):
-	lea	11(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail12):
-	lea	12(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail13):
-	lea	13(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail14):
-	lea	14(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail15):
-	lea	15(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-END (STRCPY)
-
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 1b539f2..0d557f1 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -44,7 +44,7 @@
 // Declared in "private/bionic_ssp.h".
 uintptr_t __stack_chk_guard = 0;
 
-static pthread_internal_t main_thread;
+BIONIC_USED_BEFORE_LINKER_RELOCATES static pthread_internal_t main_thread;
 
 // Setup for the main thread. For dynamic executables, this is called by the
 // linker _before_ libc is mapped in memory. This means that all writes to
diff --git a/libc/bionic/android_mallopt.cpp b/libc/bionic/android_mallopt.cpp
new file mode 100644
index 0000000..79e4072
--- /dev/null
+++ b/libc/bionic/android_mallopt.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdatomic.h>
+
+#include <platform/bionic/malloc.h>
+#include <private/bionic_globals.h>
+
+#include "gwp_asan_wrappers.h"
+#include "malloc_limit.h"
+
+#if !defined(LIBC_STATIC)
+#include <stdio.h>
+
+#include <private/bionic_defs.h>
+
+#include "malloc_heapprofd.h"
+
+extern bool gZygoteChild;
+extern _Atomic bool gZygoteChildProfileable;
+
+bool WriteMallocLeakInfo(FILE* fp);
+bool GetMallocLeakInfo(android_mallopt_leak_info_t* leak_info);
+bool FreeMallocLeakInfo(android_mallopt_leak_info_t* leak_info);
+#endif
+
+// =============================================================================
+// Platform-internal mallopt variant.
+// =============================================================================
+#if !defined(LIBC_STATIC)
+__BIONIC_WEAK_FOR_NATIVE_BRIDGE
+#endif
+extern "C" bool android_mallopt(int opcode, void* arg, size_t arg_size) {
+  // Functionality available in both static and dynamic libc.
+  if (opcode == M_GET_DECAY_TIME_ENABLED) {
+    if (arg == nullptr || arg_size != sizeof(bool)) {
+      errno = EINVAL;
+      return false;
+    }
+    *reinterpret_cast<bool*>(arg) = atomic_load(&__libc_globals->decay_time_enabled);
+    return true;
+  }
+  if (opcode == M_INITIALIZE_GWP_ASAN) {
+    if (arg == nullptr || arg_size != sizeof(android_mallopt_gwp_asan_options_t)) {
+      errno = EINVAL;
+      return false;
+    }
+
+    return EnableGwpAsan(*reinterpret_cast<android_mallopt_gwp_asan_options_t*>(arg));
+  }
+  if (opcode == M_MEMTAG_STACK_IS_ON) {
+    if (arg == nullptr || arg_size != sizeof(bool)) {
+      errno = EINVAL;
+      return false;
+    }
+    *reinterpret_cast<bool*>(arg) = atomic_load(&__libc_memtag_stack);
+    return true;
+  }
+  if (opcode == M_SET_ALLOCATION_LIMIT_BYTES) {
+    return LimitEnable(arg, arg_size);
+  }
+
+#if defined(LIBC_STATIC)
+  errno = ENOTSUP;
+  return false;
+#else
+  if (opcode == M_SET_ZYGOTE_CHILD) {
+    if (arg != nullptr || arg_size != 0) {
+      errno = EINVAL;
+      return false;
+    }
+    gZygoteChild = true;
+    return true;
+  }
+  if (opcode == M_INIT_ZYGOTE_CHILD_PROFILING) {
+    if (arg != nullptr || arg_size != 0) {
+      errno = EINVAL;
+      return false;
+    }
+    atomic_store_explicit(&gZygoteChildProfileable, true, memory_order_release);
+    // Also check if heapprofd should start profiling from app startup.
+    HeapprofdInitZygoteChildProfiling();
+    return true;
+  }
+  if (opcode == M_GET_PROCESS_PROFILEABLE) {
+    if (arg == nullptr || arg_size != sizeof(bool)) {
+      errno = EINVAL;
+      return false;
+    }
+    // Native processes are considered profileable. Zygote children are considered
+    // profileable only when appropriately tagged.
+    *reinterpret_cast<bool*>(arg) =
+        !gZygoteChild || atomic_load_explicit(&gZygoteChildProfileable, memory_order_acquire);
+    return true;
+  }
+  if (opcode == M_WRITE_MALLOC_LEAK_INFO_TO_FILE) {
+    if (arg == nullptr || arg_size != sizeof(FILE*)) {
+      errno = EINVAL;
+      return false;
+    }
+    return WriteMallocLeakInfo(reinterpret_cast<FILE*>(arg));
+  }
+  if (opcode == M_GET_MALLOC_LEAK_INFO) {
+    if (arg == nullptr || arg_size != sizeof(android_mallopt_leak_info_t)) {
+      errno = EINVAL;
+      return false;
+    }
+    return GetMallocLeakInfo(reinterpret_cast<android_mallopt_leak_info_t*>(arg));
+  }
+  if (opcode == M_FREE_MALLOC_LEAK_INFO) {
+    if (arg == nullptr || arg_size != sizeof(android_mallopt_leak_info_t)) {
+      errno = EINVAL;
+      return false;
+    }
+    return FreeMallocLeakInfo(reinterpret_cast<android_mallopt_leak_info_t*>(arg));
+  }
+  // Try heapprofd's mallopt, as it handles options not covered here.
+  return HeapprofdMallopt(opcode, arg, arg_size);
+#endif
+}
diff --git a/libc/bionic/bionic_call_ifunc_resolver.cpp b/libc/bionic/bionic_call_ifunc_resolver.cpp
index e44d998..d5a812c 100644
--- a/libc/bionic/bionic_call_ifunc_resolver.cpp
+++ b/libc/bionic/bionic_call_ifunc_resolver.cpp
@@ -31,6 +31,7 @@
 #include <sys/hwprobe.h>
 #include <sys/ifunc.h>
 
+#include "bionic/macros.h"
 #include "private/bionic_auxv.h"
 
 // This code is called in the linker before it has been relocated, so minimize calls into other
@@ -40,8 +41,8 @@
 ElfW(Addr) __bionic_call_ifunc_resolver(ElfW(Addr) resolver_addr) {
 #if defined(__aarch64__)
   typedef ElfW(Addr) (*ifunc_resolver_t)(uint64_t, __ifunc_arg_t*);
-  static __ifunc_arg_t arg;
-  static bool initialized = false;
+  BIONIC_USED_BEFORE_LINKER_RELOCATES static __ifunc_arg_t arg;
+  BIONIC_USED_BEFORE_LINKER_RELOCATES static bool initialized = false;
   if (!initialized) {
     initialized = true;
     arg._size = sizeof(__ifunc_arg_t);
diff --git a/libc/bionic/elf_note.cpp b/libc/bionic/elf_note.cpp
index d5cd5de..efe3844 100644
--- a/libc/bionic/elf_note.cpp
+++ b/libc/bionic/elf_note.cpp
@@ -38,31 +38,34 @@
     return false;
   }
 
+  size_t note_name_len = strlen(note_name) + 1;
+
   ElfW(Addr) p = note_addr;
   ElfW(Addr) note_end = p + phdr_note->p_memsz;
-
   while (p + sizeof(ElfW(Nhdr)) <= note_end) {
+    // Parse the note and check it's structurally valid.
     const ElfW(Nhdr)* note = reinterpret_cast<const ElfW(Nhdr)*>(p);
     p += sizeof(ElfW(Nhdr));
     const char* name = reinterpret_cast<const char*>(p);
-    p += align_up(note->n_namesz, 4);
+    if (__builtin_add_overflow(p, __builtin_align_up(note->n_namesz, 4), &p)) {
+      return false;
+    }
     const char* desc = reinterpret_cast<const char*>(p);
-    p += align_up(note->n_descsz, 4);
+    if (__builtin_add_overflow(p, __builtin_align_up(note->n_descsz, 4), &p)) {
+      return false;
+    }
     if (p > note_end) {
-      break;
-    }
-    if (note->n_type != note_type) {
-      continue;
-    }
-    size_t note_name_len = strlen(note_name) + 1;
-    if (note->n_namesz != note_name_len || strncmp(note_name, name, note_name_len) != 0) {
-      break;
+      return false;
     }
 
-    *note_hdr = note;
-    *note_desc = desc;
-
-    return true;
+    // Is this the note we're looking for?
+    if (note->n_type == note_type &&
+        note->n_namesz == note_name_len &&
+        strncmp(note_name, name, note_name_len) == 0) {
+      *note_hdr = note;
+      *note_desc = desc;
+      return true;
+    }
   }
   return false;
 }
diff --git a/libc/bionic/libc_init_common.h b/libc/bionic/libc_init_common.h
index 126f002..9e15811 100644
--- a/libc/bionic/libc_init_common.h
+++ b/libc/bionic/libc_init_common.h
@@ -44,10 +44,12 @@
   size_t fini_array_count;
 } structors_array_t;
 
-__BEGIN_DECLS
-
+// The main function must not be declared with a linkage-specification
+// ('extern "C"' or 'extern "C++"'), so declare it before __BEGIN_DECLS.
 extern int main(int argc, char** argv, char** env);
 
+__BEGIN_DECLS
+
 __noreturn void __libc_init(void* raw_args,
                             void (*onexit)(void),
                             int (*slingshot)(int, char**, char**),
diff --git a/libc/bionic/libc_init_mte.cpp b/libc/bionic/libc_init_mte.cpp
new file mode 100644
index 0000000..3c8ef7d
--- /dev/null
+++ b/libc/bionic/libc_init_mte.cpp
@@ -0,0 +1,325 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <android/api-level.h>
+#include <elf.h>
+#include <errno.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+
+#include "async_safe/log.h"
+#include "heap_tagging.h"
+#include "libc_init_common.h"
+#include "platform/bionic/macros.h"
+#include "platform/bionic/mte.h"
+#include "platform/bionic/page.h"
+#include "platform/bionic/reserved_signals.h"
+#include "private/KernelArgumentBlock.h"
+#include "private/bionic_asm.h"
+#include "private/bionic_asm_note.h"
+#include "private/bionic_call_ifunc_resolver.h"
+#include "private/bionic_elf_tls.h"
+#include "private/bionic_globals.h"
+#include "private/bionic_tls.h"
+#include "private/elf_note.h"
+#include "pthread_internal.h"
+#include "sys/system_properties.h"
+#include "sysprop_helpers.h"
+
+#ifdef __aarch64__
+extern "C" const char* __gnu_basename(const char* path);
+
+static HeapTaggingLevel __get_memtag_level_from_note(const ElfW(Phdr) * phdr_start, size_t phdr_ct,
+                                                     const ElfW(Addr) load_bias, bool* stack) {
+  const ElfW(Nhdr) * note;
+  const char* desc;
+  if (!__find_elf_note(NT_ANDROID_TYPE_MEMTAG, "Android", phdr_start, phdr_ct, &note, &desc,
+                       load_bias)) {
+    return M_HEAP_TAGGING_LEVEL_TBI;
+  }
+
+  // Previously (in Android 12), if the note was != 4 bytes, we check-failed
+  // here. Let's be more permissive to allow future expansion.
+  if (note->n_descsz < 4) {
+    async_safe_fatal("unrecognized android.memtag note: n_descsz = %d, expected >= 4",
+                     note->n_descsz);
+  }
+
+  // `desc` is always aligned due to ELF requirements, enforced in __find_elf_note().
+  ElfW(Word) note_val = *reinterpret_cast<const ElfW(Word)*>(desc);
+  *stack = (note_val & NT_MEMTAG_STACK) != 0;
+
+  // Warning: In Android 12, any value outside of bits [0..3] resulted in a check-fail.
+  if (!(note_val & (NT_MEMTAG_HEAP | NT_MEMTAG_STACK))) {
+    async_safe_format_log(ANDROID_LOG_INFO, "libc",
+                          "unrecognised memtag note_val did not specificy heap or stack: %u",
+                          note_val);
+    return M_HEAP_TAGGING_LEVEL_TBI;
+  }
+
+  unsigned mode = note_val & NT_MEMTAG_LEVEL_MASK;
+  switch (mode) {
+    case NT_MEMTAG_LEVEL_NONE:
+      // Note, previously (in Android 12), NT_MEMTAG_LEVEL_NONE was
+      // NT_MEMTAG_LEVEL_DEFAULT, which implied SYNC mode. This was never used
+      // by anyone, but we note it (heh) here for posterity, in case the zero
+      // level becomes meaningful, and binaries with this note can be executed
+      // on Android 12 devices.
+      return M_HEAP_TAGGING_LEVEL_TBI;
+    case NT_MEMTAG_LEVEL_ASYNC:
+      return M_HEAP_TAGGING_LEVEL_ASYNC;
+    case NT_MEMTAG_LEVEL_SYNC:
+    default:
+      // We allow future extensions to specify mode 3 (currently unused), with
+      // the idea that it might be used for ASYMM mode or something else. On
+      // this version of Android, it falls back to SYNC mode.
+      return M_HEAP_TAGGING_LEVEL_SYNC;
+  }
+}
+
+// Returns true if there's an environment setting (either sysprop or env var)
+// that should overwrite the ELF note, and places the equivalent heap tagging
+// level into *level.
+static bool get_environment_memtag_setting(HeapTaggingLevel* level) {
+  static const char kMemtagPrognameSyspropPrefix[] = "arm64.memtag.process.";
+  static const char kMemtagGlobalSysprop[] = "persist.arm64.memtag.default";
+  static const char kMemtagOverrideSyspropPrefix[] =
+      "persist.device_config.memory_safety_native.mode_override.process.";
+
+  const char* progname = __libc_shared_globals()->init_progname;
+  if (progname == nullptr) return false;
+
+  const char* basename = __gnu_basename(progname);
+
+  char options_str[PROP_VALUE_MAX];
+  char sysprop_name[512];
+  async_safe_format_buffer(sysprop_name, sizeof(sysprop_name), "%s%s", kMemtagPrognameSyspropPrefix,
+                           basename);
+  char remote_sysprop_name[512];
+  async_safe_format_buffer(remote_sysprop_name, sizeof(remote_sysprop_name), "%s%s",
+                           kMemtagOverrideSyspropPrefix, basename);
+  const char* sys_prop_names[] = {sysprop_name, remote_sysprop_name, kMemtagGlobalSysprop};
+
+  if (!get_config_from_env_or_sysprops("MEMTAG_OPTIONS", sys_prop_names, arraysize(sys_prop_names),
+                                       options_str, sizeof(options_str))) {
+    return false;
+  }
+
+  if (strcmp("sync", options_str) == 0) {
+    *level = M_HEAP_TAGGING_LEVEL_SYNC;
+  } else if (strcmp("async", options_str) == 0) {
+    *level = M_HEAP_TAGGING_LEVEL_ASYNC;
+  } else if (strcmp("off", options_str) == 0) {
+    *level = M_HEAP_TAGGING_LEVEL_TBI;
+  } else {
+    async_safe_format_log(
+        ANDROID_LOG_ERROR, "libc",
+        "unrecognized memtag level: \"%s\" (options are \"sync\", \"async\", or \"off\").",
+        options_str);
+    return false;
+  }
+
+  return true;
+}
+
+// Returns the initial heap tagging level. Note: This function will never return
+// M_HEAP_TAGGING_LEVEL_NONE, if MTE isn't enabled for this process we enable
+// M_HEAP_TAGGING_LEVEL_TBI.
+static HeapTaggingLevel __get_tagging_level(const memtag_dynamic_entries_t* memtag_dynamic_entries,
+                                            const void* phdr_start, size_t phdr_ct,
+                                            uintptr_t load_bias, bool* stack) {
+  HeapTaggingLevel level = M_HEAP_TAGGING_LEVEL_TBI;
+
+  // If the dynamic entries exist, use those. Otherwise, fall back to the old
+  // Android note, which is still used for fully static executables. When
+  // -fsanitize=memtag* is used in newer toolchains, currently both the dynamic
+  // entries and the old note are created, but we'd expect to move to just the
+  // dynamic entries for dynamically linked executables in the future. In
+  // addition, there's still some cleanup of the build system (that uses a
+  // manually-constructed note) needed. For more information about the dynamic
+  // entries, see:
+  // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#dynamic-section
+  if (memtag_dynamic_entries && memtag_dynamic_entries->has_memtag_mode) {
+    switch (memtag_dynamic_entries->memtag_mode) {
+      case 0:
+        level = M_HEAP_TAGGING_LEVEL_SYNC;
+        break;
+      case 1:
+        level = M_HEAP_TAGGING_LEVEL_ASYNC;
+        break;
+      default:
+        async_safe_format_log(ANDROID_LOG_INFO, "libc",
+                              "unrecognised DT_AARCH64_MEMTAG_MODE value: %u",
+                              memtag_dynamic_entries->memtag_mode);
+    }
+    *stack = memtag_dynamic_entries->memtag_stack;
+  } else {
+    level = __get_memtag_level_from_note(reinterpret_cast<const ElfW(Phdr)*>(phdr_start), phdr_ct,
+                                         load_bias, stack);
+  }
+
+  // We can't short-circuit the environment override, as `stack` is still inherited from the
+  // binary's settings.
+  get_environment_memtag_setting(&level);
+  return level;
+}
+
+static void __enable_mte_signal_handler(int, siginfo_t* info, void*) {
+  if (info->si_code != SI_TIMER) {
+    async_safe_format_log(ANDROID_LOG_ERROR, "libc", "Got BIONIC_ENABLE_MTE not from SI_TIMER");
+    return;
+  }
+  int tagged_addr_ctrl = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+  if (tagged_addr_ctrl < 0) {
+    async_safe_fatal("failed to PR_GET_TAGGED_ADDR_CTRL: %m");
+  }
+  if ((tagged_addr_ctrl & PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE) {
+    return;
+  }
+  async_safe_format_log(ANDROID_LOG_INFO, "libc",
+                        "Re-enabling MTE, value: %x (tagged_addr_ctrl %lu)",
+                        info->si_value.sival_int, info->si_value.sival_int & PR_MTE_TCF_MASK);
+  tagged_addr_ctrl =
+      (tagged_addr_ctrl & ~PR_MTE_TCF_MASK) | (info->si_value.sival_int & PR_MTE_TCF_MASK);
+  if (prctl(PR_SET_TAGGED_ADDR_CTRL, tagged_addr_ctrl, 0, 0, 0) < 0) {
+    async_safe_fatal("failed to PR_SET_TAGGED_ADDR_CTRL %d: %m", tagged_addr_ctrl);
+  }
+}
+
+static int64_t __get_memtag_upgrade_secs() {
+  char* env = getenv("BIONIC_MEMTAG_UPGRADE_SECS");
+  if (!env) return 0;
+  int64_t timed_upgrade = 0;
+  static const char kAppProcessName[] = "app_process64";
+  const char* progname = __libc_shared_globals()->init_progname;
+  progname = progname ? __gnu_basename(progname) : nullptr;
+  // disable timed upgrade for zygote, as the thread spawned will violate the requirement
+  // that it be single-threaded.
+  if (!progname || strncmp(progname, kAppProcessName, sizeof(kAppProcessName)) != 0) {
+    char* endptr;
+    timed_upgrade = strtoll(env, &endptr, 10);
+    if (*endptr != '\0' || timed_upgrade < 0) {
+      async_safe_format_log(ANDROID_LOG_ERROR, "libc",
+                            "Invalid value for BIONIC_MEMTAG_UPGRADE_SECS: %s", env);
+      timed_upgrade = 0;
+    }
+  }
+  // Make sure that this does not get passed to potential processes inheriting
+  // this environment.
+  unsetenv("BIONIC_MEMTAG_UPGRADE_SECS");
+  return timed_upgrade;
+}
+
+// Figure out the desired memory tagging mode (sync/async, heap/globals/stack) for this executable.
+// This function is called from the linker before the main executable is relocated.
+__attribute__((no_sanitize("hwaddress", "memtag"))) void __libc_init_mte(
+    const memtag_dynamic_entries_t* memtag_dynamic_entries, const void* phdr_start, size_t phdr_ct,
+    uintptr_t load_bias) {
+  bool memtag_stack = false;
+  HeapTaggingLevel level =
+      __get_tagging_level(memtag_dynamic_entries, phdr_start, phdr_ct, load_bias, &memtag_stack);
+  if (memtag_stack) __libc_shared_globals()->initial_memtag_stack_abi = true;
+
+  if (int64_t timed_upgrade = __get_memtag_upgrade_secs()) {
+    if (level == M_HEAP_TAGGING_LEVEL_ASYNC) {
+      async_safe_format_log(ANDROID_LOG_INFO, "libc",
+                            "Attempting timed MTE upgrade from async to sync.");
+      __libc_shared_globals()->heap_tagging_upgrade_timer_sec = timed_upgrade;
+      level = M_HEAP_TAGGING_LEVEL_SYNC;
+    } else if (level != M_HEAP_TAGGING_LEVEL_SYNC) {
+      async_safe_format_log(ANDROID_LOG_ERROR, "libc",
+                            "Requested timed MTE upgrade from invalid %s to sync. Ignoring.",
+                            DescribeTaggingLevel(level));
+    }
+  }
+  if (level == M_HEAP_TAGGING_LEVEL_SYNC || level == M_HEAP_TAGGING_LEVEL_ASYNC) {
+    unsigned long prctl_arg = PR_TAGGED_ADDR_ENABLE | PR_MTE_TAG_SET_NONZERO;
+    prctl_arg |= (level == M_HEAP_TAGGING_LEVEL_SYNC) ? PR_MTE_TCF_SYNC : PR_MTE_TCF_ASYNC;
+
+    // When entering ASYNC mode, specify that we want to allow upgrading to SYNC by OR'ing in the
+    // SYNC flag. But if the kernel doesn't support specifying multiple TCF modes, fall back to
+    // specifying a single mode.
+    if (prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_arg | PR_MTE_TCF_SYNC, 0, 0, 0) == 0 ||
+        prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_arg, 0, 0, 0) == 0) {
+      __libc_shared_globals()->initial_heap_tagging_level = level;
+
+      struct sigaction action = {};
+      action.sa_flags = SA_SIGINFO | SA_RESTART;
+      action.sa_sigaction = __enable_mte_signal_handler;
+      sigaction(BIONIC_ENABLE_MTE, &action, nullptr);
+      return;
+    }
+  }
+
+  // MTE was either not enabled, or wasn't supported on this device. Try and use
+  // TBI.
+  if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0) {
+    __libc_shared_globals()->initial_heap_tagging_level = M_HEAP_TAGGING_LEVEL_TBI;
+  }
+  // We did not enable MTE, so we do not need to arm the upgrade timer.
+  __libc_shared_globals()->heap_tagging_upgrade_timer_sec = 0;
+}
+
+// Figure out whether we need to map the stack as PROT_MTE.
+// For dynamic executables, this has to be called after loading all
+// DT_NEEDED libraries, in case one of them needs stack MTE.
+__attribute__((no_sanitize("hwaddress", "memtag"))) void __libc_init_mte_stack(void* stack_top) {
+  if (!__libc_shared_globals()->initial_memtag_stack_abi) {
+    return;
+  }
+
+  // Even if the device doesn't support MTE, we have to allocate stack
+  // history buffers for code compiled for stack MTE. That is because the
+  // codegen expects a buffer to be present in TLS_SLOT_STACK_MTE either
+  // way.
+  __get_bionic_tcb()->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, nullptr);
+
+  if (__libc_mte_enabled()) {
+    __libc_shared_globals()->initial_memtag_stack = true;
+    void* pg_start = reinterpret_cast<void*>(page_start(reinterpret_cast<uintptr_t>(stack_top)));
+    if (mprotect(pg_start, page_size(), PROT_READ | PROT_WRITE | PROT_MTE | PROT_GROWSDOWN)) {
+      async_safe_fatal("error: failed to set PROT_MTE on main thread stack: %m");
+    }
+  }
+}
+
+#else   // __aarch64__
+void __libc_init_mte(const memtag_dynamic_entries_t*, const void*, size_t, uintptr_t) {}
+void __libc_init_mte_stack(void*) {}
+#endif  // __aarch64__
+
+bool __libc_mte_enabled() {
+  HeapTaggingLevel lvl = __libc_shared_globals()->initial_heap_tagging_level;
+  return lvl == M_HEAP_TAGGING_LEVEL_SYNC || lvl == M_HEAP_TAGGING_LEVEL_ASYNC;
+}
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index b54ef85..cd96375 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -157,268 +157,6 @@
 
   layout.finish_layout();
 }
-
-#ifdef __aarch64__
-static HeapTaggingLevel __get_memtag_level_from_note(const ElfW(Phdr) * phdr_start, size_t phdr_ct,
-                                                     const ElfW(Addr) load_bias, bool* stack) {
-  const ElfW(Nhdr) * note;
-  const char* desc;
-  if (!__find_elf_note(NT_ANDROID_TYPE_MEMTAG, "Android", phdr_start, phdr_ct, &note, &desc,
-                       load_bias)) {
-    return M_HEAP_TAGGING_LEVEL_TBI;
-  }
-
-  // Previously (in Android 12), if the note was != 4 bytes, we check-failed
-  // here. Let's be more permissive to allow future expansion.
-  if (note->n_descsz < 4) {
-    async_safe_fatal("unrecognized android.memtag note: n_descsz = %d, expected >= 4",
-                     note->n_descsz);
-  }
-
-  // `desc` is always aligned due to ELF requirements, enforced in __find_elf_note().
-  ElfW(Word) note_val = *reinterpret_cast<const ElfW(Word)*>(desc);
-  *stack = (note_val & NT_MEMTAG_STACK) != 0;
-
-  // Warning: In Android 12, any value outside of bits [0..3] resulted in a check-fail.
-  if (!(note_val & (NT_MEMTAG_HEAP | NT_MEMTAG_STACK))) {
-    async_safe_format_log(ANDROID_LOG_INFO, "libc",
-                          "unrecognised memtag note_val did not specificy heap or stack: %u",
-                          note_val);
-    return M_HEAP_TAGGING_LEVEL_TBI;
-  }
-
-  unsigned mode = note_val & NT_MEMTAG_LEVEL_MASK;
-  switch (mode) {
-    case NT_MEMTAG_LEVEL_NONE:
-      // Note, previously (in Android 12), NT_MEMTAG_LEVEL_NONE was
-      // NT_MEMTAG_LEVEL_DEFAULT, which implied SYNC mode. This was never used
-      // by anyone, but we note it (heh) here for posterity, in case the zero
-      // level becomes meaningful, and binaries with this note can be executed
-      // on Android 12 devices.
-      return M_HEAP_TAGGING_LEVEL_TBI;
-    case NT_MEMTAG_LEVEL_ASYNC:
-      return M_HEAP_TAGGING_LEVEL_ASYNC;
-    case NT_MEMTAG_LEVEL_SYNC:
-    default:
-      // We allow future extensions to specify mode 3 (currently unused), with
-      // the idea that it might be used for ASYMM mode or something else. On
-      // this version of Android, it falls back to SYNC mode.
-      return M_HEAP_TAGGING_LEVEL_SYNC;
-  }
-}
-
-// Returns true if there's an environment setting (either sysprop or env var)
-// that should overwrite the ELF note, and places the equivalent heap tagging
-// level into *level.
-static bool get_environment_memtag_setting(HeapTaggingLevel* level) {
-  static const char kMemtagPrognameSyspropPrefix[] = "arm64.memtag.process.";
-  static const char kMemtagGlobalSysprop[] = "persist.arm64.memtag.default";
-  static const char kMemtagOverrideSyspropPrefix[] =
-      "persist.device_config.memory_safety_native.mode_override.process.";
-
-  const char* progname = __libc_shared_globals()->init_progname;
-  if (progname == nullptr) return false;
-
-  const char* basename = __gnu_basename(progname);
-
-  char options_str[PROP_VALUE_MAX];
-  char sysprop_name[512];
-  async_safe_format_buffer(sysprop_name, sizeof(sysprop_name), "%s%s", kMemtagPrognameSyspropPrefix,
-                           basename);
-  char remote_sysprop_name[512];
-  async_safe_format_buffer(remote_sysprop_name, sizeof(remote_sysprop_name), "%s%s",
-                           kMemtagOverrideSyspropPrefix, basename);
-  const char* sys_prop_names[] = {sysprop_name, remote_sysprop_name, kMemtagGlobalSysprop};
-
-  if (!get_config_from_env_or_sysprops("MEMTAG_OPTIONS", sys_prop_names, arraysize(sys_prop_names),
-                                       options_str, sizeof(options_str))) {
-    return false;
-  }
-
-  if (strcmp("sync", options_str) == 0) {
-    *level = M_HEAP_TAGGING_LEVEL_SYNC;
-  } else if (strcmp("async", options_str) == 0) {
-    *level = M_HEAP_TAGGING_LEVEL_ASYNC;
-  } else if (strcmp("off", options_str) == 0) {
-    *level = M_HEAP_TAGGING_LEVEL_TBI;
-  } else {
-    async_safe_format_log(
-        ANDROID_LOG_ERROR, "libc",
-        "unrecognized memtag level: \"%s\" (options are \"sync\", \"async\", or \"off\").",
-        options_str);
-    return false;
-  }
-
-  return true;
-}
-
-// Returns the initial heap tagging level. Note: This function will never return
-// M_HEAP_TAGGING_LEVEL_NONE, if MTE isn't enabled for this process we enable
-// M_HEAP_TAGGING_LEVEL_TBI.
-static HeapTaggingLevel __get_tagging_level(const memtag_dynamic_entries_t* memtag_dynamic_entries,
-                                            const void* phdr_start, size_t phdr_ct,
-                                            uintptr_t load_bias, bool* stack) {
-  HeapTaggingLevel level = M_HEAP_TAGGING_LEVEL_TBI;
-
-  // If the dynamic entries exist, use those. Otherwise, fall back to the old
-  // Android note, which is still used for fully static executables. When
-  // -fsanitize=memtag* is used in newer toolchains, currently both the dynamic
-  // entries and the old note are created, but we'd expect to move to just the
-  // dynamic entries for dynamically linked executables in the future. In
-  // addition, there's still some cleanup of the build system (that uses a
-  // manually-constructed note) needed. For more information about the dynamic
-  // entries, see:
-  // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#dynamic-section
-  if (memtag_dynamic_entries && memtag_dynamic_entries->has_memtag_mode) {
-    switch (memtag_dynamic_entries->memtag_mode) {
-      case 0:
-        level = M_HEAP_TAGGING_LEVEL_SYNC;
-        break;
-      case 1:
-        level = M_HEAP_TAGGING_LEVEL_ASYNC;
-        break;
-      default:
-        async_safe_format_log(ANDROID_LOG_INFO, "libc",
-                              "unrecognised DT_AARCH64_MEMTAG_MODE value: %u",
-                              memtag_dynamic_entries->memtag_mode);
-    }
-    *stack = memtag_dynamic_entries->memtag_stack;
-  } else {
-    level = __get_memtag_level_from_note(reinterpret_cast<const ElfW(Phdr)*>(phdr_start), phdr_ct,
-                                         load_bias, stack);
-  }
-
-  // We can't short-circuit the environment override, as `stack` is still inherited from the
-  // binary's settings.
-  get_environment_memtag_setting(&level);
-  return level;
-}
-
-static void __enable_mte_signal_handler(int, siginfo_t* info, void*) {
-  if (info->si_code != SI_TIMER) {
-    async_safe_format_log(ANDROID_LOG_ERROR, "libc", "Got BIONIC_ENABLE_MTE not from SI_TIMER");
-    return;
-  }
-  int tagged_addr_ctrl = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
-  if (tagged_addr_ctrl < 0) {
-    async_safe_fatal("failed to PR_GET_TAGGED_ADDR_CTRL: %m");
-  }
-  if ((tagged_addr_ctrl & PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE) {
-    return;
-  }
-  async_safe_format_log(ANDROID_LOG_INFO, "libc",
-                        "Re-enabling MTE, value: %x (tagged_addr_ctrl %lu)",
-                        info->si_value.sival_int, info->si_value.sival_int & PR_MTE_TCF_MASK);
-  tagged_addr_ctrl =
-      (tagged_addr_ctrl & ~PR_MTE_TCF_MASK) | (info->si_value.sival_int & PR_MTE_TCF_MASK);
-  if (prctl(PR_SET_TAGGED_ADDR_CTRL, tagged_addr_ctrl, 0, 0, 0) < 0) {
-    async_safe_fatal("failed to PR_SET_TAGGED_ADDR_CTRL %d: %m", tagged_addr_ctrl);
-  }
-}
-
-static int64_t __get_memtag_upgrade_secs() {
-  char* env = getenv("BIONIC_MEMTAG_UPGRADE_SECS");
-  if (!env) return 0;
-  int64_t timed_upgrade = 0;
-  static const char kAppProcessName[] = "app_process64";
-  const char* progname = __libc_shared_globals()->init_progname;
-  progname = progname ? __gnu_basename(progname) : nullptr;
-  // disable timed upgrade for zygote, as the thread spawned will violate the requirement
-  // that it be single-threaded.
-  if (!progname || strncmp(progname, kAppProcessName, sizeof(kAppProcessName)) != 0) {
-    char* endptr;
-    timed_upgrade = strtoll(env, &endptr, 10);
-    if (*endptr != '\0' || timed_upgrade < 0) {
-      async_safe_format_log(ANDROID_LOG_ERROR, "libc",
-                            "Invalid value for BIONIC_MEMTAG_UPGRADE_SECS: %s", env);
-      timed_upgrade = 0;
-    }
-  }
-  // Make sure that this does not get passed to potential processes inheriting
-  // this environment.
-  unsetenv("BIONIC_MEMTAG_UPGRADE_SECS");
-  return timed_upgrade;
-}
-
-// Figure out the desired memory tagging mode (sync/async, heap/globals/stack) for this executable.
-// This function is called from the linker before the main executable is relocated.
-__attribute__((no_sanitize("hwaddress", "memtag"))) void __libc_init_mte(
-    const memtag_dynamic_entries_t* memtag_dynamic_entries, const void* phdr_start, size_t phdr_ct,
-    uintptr_t load_bias) {
-  bool memtag_stack = false;
-  HeapTaggingLevel level =
-      __get_tagging_level(memtag_dynamic_entries, phdr_start, phdr_ct, load_bias, &memtag_stack);
-  if (memtag_stack) __libc_shared_globals()->initial_memtag_stack_abi = true;
-
-  if (int64_t timed_upgrade = __get_memtag_upgrade_secs()) {
-    if (level == M_HEAP_TAGGING_LEVEL_ASYNC) {
-      async_safe_format_log(ANDROID_LOG_INFO, "libc",
-                            "Attempting timed MTE upgrade from async to sync.");
-      __libc_shared_globals()->heap_tagging_upgrade_timer_sec = timed_upgrade;
-      level = M_HEAP_TAGGING_LEVEL_SYNC;
-    } else if (level != M_HEAP_TAGGING_LEVEL_SYNC) {
-      async_safe_format_log(
-          ANDROID_LOG_ERROR, "libc",
-          "Requested timed MTE upgrade from invalid %s to sync. Ignoring.",
-          DescribeTaggingLevel(level));
-    }
-  }
-  if (level == M_HEAP_TAGGING_LEVEL_SYNC || level == M_HEAP_TAGGING_LEVEL_ASYNC) {
-    unsigned long prctl_arg = PR_TAGGED_ADDR_ENABLE | PR_MTE_TAG_SET_NONZERO;
-    prctl_arg |= (level == M_HEAP_TAGGING_LEVEL_SYNC) ? PR_MTE_TCF_SYNC : PR_MTE_TCF_ASYNC;
-
-    // When entering ASYNC mode, specify that we want to allow upgrading to SYNC by OR'ing in the
-    // SYNC flag. But if the kernel doesn't support specifying multiple TCF modes, fall back to
-    // specifying a single mode.
-    if (prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_arg | PR_MTE_TCF_SYNC, 0, 0, 0) == 0 ||
-        prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_arg, 0, 0, 0) == 0) {
-      __libc_shared_globals()->initial_heap_tagging_level = level;
-
-      struct sigaction action = {};
-      action.sa_flags = SA_SIGINFO | SA_RESTART;
-      action.sa_sigaction = __enable_mte_signal_handler;
-      sigaction(BIONIC_ENABLE_MTE, &action, nullptr);
-      return;
-    }
-  }
-
-  // MTE was either not enabled, or wasn't supported on this device. Try and use
-  // TBI.
-  if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0) {
-    __libc_shared_globals()->initial_heap_tagging_level = M_HEAP_TAGGING_LEVEL_TBI;
-  }
-  // We did not enable MTE, so we do not need to arm the upgrade timer.
-  __libc_shared_globals()->heap_tagging_upgrade_timer_sec = 0;
-}
-
-// Figure out whether we need to map the stack as PROT_MTE.
-// For dynamic executables, this has to be called after loading all
-// DT_NEEDED libraries, in case one of them needs stack MTE.
-__attribute__((no_sanitize("hwaddress", "memtag"))) void __libc_init_mte_stack(void* stack_top) {
-  if (!__libc_shared_globals()->initial_memtag_stack_abi) {
-    return;
-  }
-
-  // Even if the device doesn't support MTE, we have to allocate stack
-  // history buffers for code compiled for stack MTE. That is because the
-  // codegen expects a buffer to be present in TLS_SLOT_STACK_MTE either
-  // way.
-  __get_bionic_tcb()->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, nullptr);
-
-  if (__libc_mte_enabled()) {
-    __libc_shared_globals()->initial_memtag_stack = true;
-    void* pg_start = reinterpret_cast<void*>(page_start(reinterpret_cast<uintptr_t>(stack_top)));
-    if (mprotect(pg_start, page_size(), PROT_READ | PROT_WRITE | PROT_MTE | PROT_GROWSDOWN)) {
-      async_safe_fatal("error: failed to set PROT_MTE on main thread stack: %m");
-    }
-  }
-}
-
-#else   // __aarch64__
-void __libc_init_mte(const memtag_dynamic_entries_t*, const void*, size_t, uintptr_t) {}
-void __libc_init_mte_stack(void*) {}
-#endif  // __aarch64__
-
 void __libc_init_profiling_handlers() {
   // The dynamic variant of this function is more interesting, but this
   // at least ensures that static binaries aren't killed by the kernel's
@@ -517,11 +255,6 @@
 // compiled with -ffreestanding to avoid implicit string.h function calls. (It shouldn't strictly
 // be necessary, though.)
 __LIBC_HIDDEN__ libc_shared_globals* __libc_shared_globals() {
-  static libc_shared_globals globals;
+  BIONIC_USED_BEFORE_LINKER_RELOCATES static libc_shared_globals globals;
   return &globals;
 }
-
-__LIBC_HIDDEN__ bool __libc_mte_enabled() {
-  HeapTaggingLevel lvl = __libc_shared_globals()->initial_heap_tagging_level;
-  return lvl == M_HEAP_TAGGING_LEVEL_SYNC || lvl == M_HEAP_TAGGING_LEVEL_ASYNC;
-}
diff --git a/libc/bionic/malloc_common.cpp b/libc/bionic/malloc_common.cpp
index 596a1fc..441d884 100644
--- a/libc/bionic/malloc_common.cpp
+++ b/libc/bionic/malloc_common.cpp
@@ -332,44 +332,6 @@
 #endif
 // =============================================================================
 
-// =============================================================================
-// Platform-internal mallopt variant.
-// =============================================================================
-#if defined(LIBC_STATIC)
-extern "C" bool android_mallopt(int opcode, void* arg, size_t arg_size) {
-  if (opcode == M_SET_ALLOCATION_LIMIT_BYTES) {
-    return LimitEnable(arg, arg_size);
-  }
-  if (opcode == M_INITIALIZE_GWP_ASAN) {
-    if (arg == nullptr || arg_size != sizeof(android_mallopt_gwp_asan_options_t)) {
-      errno = EINVAL;
-      return false;
-    }
-
-    return EnableGwpAsan(*reinterpret_cast<android_mallopt_gwp_asan_options_t*>(arg));
-  }
-  if (opcode == M_MEMTAG_STACK_IS_ON) {
-    if (arg == nullptr || arg_size != sizeof(bool)) {
-      errno = EINVAL;
-      return false;
-    }
-    *reinterpret_cast<bool*>(arg) = atomic_load(&__libc_memtag_stack);
-    return true;
-  }
-  if (opcode == M_GET_DECAY_TIME_ENABLED) {
-    if (arg == nullptr || arg_size != sizeof(bool)) {
-      errno = EINVAL;
-      return false;
-    }
-    *reinterpret_cast<bool*>(arg) = atomic_load(&__libc_globals->decay_time_enabled);
-    return true;
-  }
-  errno = ENOTSUP;
-  return false;
-}
-#endif
-// =============================================================================
-
 static constexpr MallocDispatch __libc_malloc_default_dispatch __attribute__((unused)) = {
   Malloc(calloc),
   Malloc(free),
diff --git a/libc/bionic/malloc_common_dynamic.cpp b/libc/bionic/malloc_common_dynamic.cpp
index 6db6251..7b6d7d4 100644
--- a/libc/bionic/malloc_common_dynamic.cpp
+++ b/libc/bionic/malloc_common_dynamic.cpp
@@ -80,7 +80,7 @@
 
 _Atomic bool gGlobalsMutating = false;
 
-static bool gZygoteChild = false;
+bool gZygoteChild = false;
 
 // In a Zygote child process, this is set to true if profiling of this process
 // is allowed. Note that this is set at a later time than gZygoteChild. The
@@ -89,7 +89,7 @@
 // domains if applicable). These two flags are read by the
 // BIONIC_SIGNAL_PROFILER handler, which does nothing if the process is not
 // profileable.
-static _Atomic bool gZygoteChildProfileable = false;
+_Atomic bool gZygoteChildProfileable = false;
 
 // =============================================================================
 
@@ -471,93 +471,6 @@
 }
 // =============================================================================
 
-// =============================================================================
-// Platform-internal mallopt variant.
-// =============================================================================
-__BIONIC_WEAK_FOR_NATIVE_BRIDGE
-extern "C" bool android_mallopt(int opcode, void* arg, size_t arg_size) {
-  if (opcode == M_SET_ZYGOTE_CHILD) {
-    if (arg != nullptr || arg_size != 0) {
-      errno = EINVAL;
-      return false;
-    }
-    gZygoteChild = true;
-    return true;
-  }
-  if (opcode == M_INIT_ZYGOTE_CHILD_PROFILING) {
-    if (arg != nullptr || arg_size != 0) {
-      errno = EINVAL;
-      return false;
-    }
-    atomic_store_explicit(&gZygoteChildProfileable, true, memory_order_release);
-    // Also check if heapprofd should start profiling from app startup.
-    HeapprofdInitZygoteChildProfiling();
-    return true;
-  }
-  if (opcode == M_GET_PROCESS_PROFILEABLE) {
-    if (arg == nullptr || arg_size != sizeof(bool)) {
-      errno = EINVAL;
-      return false;
-    }
-    // Native processes are considered profileable. Zygote children are considered
-    // profileable only when appropriately tagged.
-    *reinterpret_cast<bool*>(arg) =
-        !gZygoteChild || atomic_load_explicit(&gZygoteChildProfileable, memory_order_acquire);
-    return true;
-  }
-  if (opcode == M_SET_ALLOCATION_LIMIT_BYTES) {
-    return LimitEnable(arg, arg_size);
-  }
-  if (opcode == M_WRITE_MALLOC_LEAK_INFO_TO_FILE) {
-    if (arg == nullptr || arg_size != sizeof(FILE*)) {
-      errno = EINVAL;
-      return false;
-    }
-    return WriteMallocLeakInfo(reinterpret_cast<FILE*>(arg));
-  }
-  if (opcode == M_GET_MALLOC_LEAK_INFO) {
-    if (arg == nullptr || arg_size != sizeof(android_mallopt_leak_info_t)) {
-      errno = EINVAL;
-      return false;
-    }
-    return GetMallocLeakInfo(reinterpret_cast<android_mallopt_leak_info_t*>(arg));
-  }
-  if (opcode == M_FREE_MALLOC_LEAK_INFO) {
-    if (arg == nullptr || arg_size != sizeof(android_mallopt_leak_info_t)) {
-      errno = EINVAL;
-      return false;
-    }
-    return FreeMallocLeakInfo(reinterpret_cast<android_mallopt_leak_info_t*>(arg));
-  }
-  if (opcode == M_INITIALIZE_GWP_ASAN) {
-    if (arg == nullptr || arg_size != sizeof(android_mallopt_gwp_asan_options_t)) {
-      errno = EINVAL;
-      return false;
-    }
-
-    return EnableGwpAsan(*reinterpret_cast<android_mallopt_gwp_asan_options_t*>(arg));
-  }
-  if (opcode == M_MEMTAG_STACK_IS_ON) {
-    if (arg == nullptr || arg_size != sizeof(bool)) {
-      errno = EINVAL;
-      return false;
-    }
-    *reinterpret_cast<bool*>(arg) = atomic_load(&__libc_memtag_stack);
-    return true;
-  }
-  if (opcode == M_GET_DECAY_TIME_ENABLED) {
-    if (arg == nullptr || arg_size != sizeof(bool)) {
-      errno = EINVAL;
-      return false;
-    }
-    *reinterpret_cast<bool*>(arg) = atomic_load(&__libc_globals->decay_time_enabled);
-    return true;
-  }
-  // Try heapprofd's mallopt, as it handles options not covered here.
-  return HeapprofdMallopt(opcode, arg, arg_size);
-}
-// =============================================================================
-
 #if !defined(__LP64__) && defined(__arm__)
 // =============================================================================
 // Old platform only functions that some old 32 bit apps are still using.
diff --git a/libc/bionic/posix_timers.cpp b/libc/bionic/posix_timers.cpp
index 65749a4..9516059 100644
--- a/libc/bionic/posix_timers.cpp
+++ b/libc/bionic/posix_timers.cpp
@@ -141,7 +141,7 @@
   // Otherwise, this must be SIGEV_THREAD timer...
   timer->callback = evp->sigev_notify_function;
   timer->callback_argument = evp->sigev_value;
-  atomic_init(&timer->deleted, false);
+  atomic_store_explicit(&timer->deleted, false, memory_order_relaxed);
 
   // Check arguments that the kernel doesn't care about but we do.
   if (timer->callback == nullptr) {
diff --git a/libc/bionic/pthread_barrier.cpp b/libc/bionic/pthread_barrier.cpp
index 1618222..ff048a6 100644
--- a/libc/bionic/pthread_barrier.cpp
+++ b/libc/bionic/pthread_barrier.cpp
@@ -95,8 +95,8 @@
     return EINVAL;
   }
   barrier->init_count = count;
-  atomic_init(&barrier->state, WAIT);
-  atomic_init(&barrier->wait_count, 0);
+  atomic_store_explicit(&barrier->state, WAIT, memory_order_relaxed);
+  atomic_store_explicit(&barrier->wait_count, 0, memory_order_relaxed);
   barrier->pshared = false;
   if (attr != nullptr && (*attr & 1)) {
     barrier->pshared = true;
diff --git a/libc/bionic/pthread_cond.cpp b/libc/bionic/pthread_cond.cpp
index f444676..197fd19 100644
--- a/libc/bionic/pthread_cond.cpp
+++ b/libc/bionic/pthread_cond.cpp
@@ -140,10 +140,10 @@
   if (attr != nullptr) {
     init_state = (*attr & COND_FLAGS_MASK);
   }
-  atomic_init(&cond->state, init_state);
+  atomic_store_explicit(&cond->state, init_state, memory_order_relaxed);
 
 #if defined(__LP64__)
-  atomic_init(&cond->waiters, 0);
+  atomic_store_explicit(&cond->waiters, 0, memory_order_relaxed);
 #endif
 
   return 0;
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index ba20c51..1bd2da7 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -129,7 +129,7 @@
   // Align the address to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
   // in jmp_buf. See the SCS commentary in pthread_internal.h for more detail.
   char* scs_aligned_guard_region =
-      reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
+      reinterpret_cast<char*>(__builtin_align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
 
   // We need to ensure that [scs_offset,scs_offset+SCS_SIZE) is in the guard region and that there
   // is at least one unmapped page after the shadow call stack (to catch stack overflows). We can't
@@ -159,11 +159,11 @@
 int __init_thread(pthread_internal_t* thread) {
   thread->cleanup_stack = nullptr;
 
-  if (__predict_true((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) == 0)) {
-    atomic_init(&thread->join_state, THREAD_NOT_JOINED);
-  } else {
-    atomic_init(&thread->join_state, THREAD_DETACHED);
+  ThreadJoinState state = THREAD_NOT_JOINED;
+  if (__predict_false((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0)) {
+    state = THREAD_DETACHED;
   }
+  atomic_store_explicit(&thread->join_state, state, memory_order_relaxed);
 
   // Set the scheduling policy/priority of the thread if necessary.
   bool need_set = true;
@@ -296,7 +296,7 @@
   // memory isn't counted in pthread_attr_getstacksize.
 
   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
-  stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
+  stack_top = __builtin_align_down(stack_top - sizeof(pthread_internal_t), 16);
 
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
   if (!stack_clean) {
@@ -351,15 +351,20 @@
 
 extern "C" int __rt_sigprocmask(int, const sigset64_t*, sigset64_t*, size_t);
 
-__attribute__((no_sanitize("hwaddress")))
+__attribute__((no_sanitize("hwaddress", "memtag")))
 #if defined(__aarch64__)
 // This function doesn't return, but it does appear in stack traces. Avoid using return PAC in this
 // function because we may end up resetting IA, which may confuse unwinders due to mismatching keys.
 __attribute__((target("branch-protection=bti")))
 #endif
-static int __pthread_start(void* arg) {
+static int
+__pthread_start(void* arg) {
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
-
+#if defined(__aarch64__)
+  if (thread->should_allocate_stack_mte_ringbuffer) {
+    thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
+  }
+#endif
   __hwasan_thread_enter();
 
   // Wait for our creating thread to release us. This lets it have time to
@@ -450,9 +455,9 @@
 // This has to be done under g_thread_creation_lock or g_thread_list_lock to avoid racing with
 // __pthread_internal_remap_stack_with_mte.
 #ifdef __aarch64__
-  if (__libc_memtag_stack_abi) {
-    tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
-  }
+  thread->should_allocate_stack_mte_ringbuffer = __libc_memtag_stack_abi;
+#else
+  thread->should_allocate_stack_mte_ringbuffer = false;
 #endif
 
   sigset64_t block_all_mask;
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 0181aba..27d05c2 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -33,10 +33,11 @@
 #include <string.h>
 #include <sys/mman.h>
 
-#include "private/bionic_constants.h"
-#include "private/bionic_defs.h"
+#include "platform/bionic/mte.h"
 #include "private/ScopedRWLock.h"
 #include "private/ScopedSignalBlocker.h"
+#include "private/bionic_constants.h"
+#include "private/bionic_defs.h"
 #include "pthread_internal.h"
 
 extern "C" __noreturn void _exit_with_stack_teardown(void*, size_t);
@@ -67,7 +68,7 @@
 }
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
-void pthread_exit(void* return_value) {
+__attribute__((no_sanitize("memtag"))) void pthread_exit(void* return_value) {
   // Call dtors for thread_local objects first.
   __cxa_thread_finalize();
 
@@ -138,6 +139,13 @@
   __notify_thread_exit_callbacks();
   __hwasan_thread_exit();
 
+#if defined(__aarch64__)
+  if (void* stack_mte_tls = thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE)) {
+    stack_mte_free_ringbuffer(reinterpret_cast<uintptr_t>(stack_mte_tls));
+  }
+#endif
+  // Everything below this line needs to be no_sanitize("memtag").
+
   if (old_state == THREAD_DETACHED && thread->mmap_size != 0) {
     // We need to free mapped space for detached threads when they exit.
     // That's not something we can do in C.
diff --git a/libc/bionic/strnlen.cpp b/libc/bionic/pthread_getaffinity.cpp
similarity index 75%
copy from libc/bionic/strnlen.cpp
copy to libc/bionic/pthread_getaffinity.cpp
index 7101b21..9ce436c 100644
--- a/libc/bionic/strnlen.cpp
+++ b/libc/bionic/pthread_getaffinity.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2024 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,9 +26,17 @@
  * SUCH DAMAGE.
  */
 
-#include <string.h>
+#include <errno.h>
 
-size_t strnlen(const char* s, size_t n) {
-  const char* p = static_cast<const char*>(memchr(s, 0, n));
-  return p ? (p - s) : n;
+#include "private/ErrnoRestorer.h"
+#include "pthread_internal.h"
+
+int pthread_getaffinity_np(pthread_t t, size_t cpu_set_size, cpu_set_t* cpu_set) {
+  ErrnoRestorer errno_restorer;
+
+  pid_t tid = __pthread_internal_gettid(t, "pthread_getaffinity_np");
+  if (tid == -1) return ESRCH;
+
+  if (sched_getaffinity(tid, cpu_set_size, cpu_set) == -1) return errno;
+  return 0;
 }
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index 14cc7da..4f2ad0c 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -34,6 +34,7 @@
 #include <string.h>
 #include <sys/mman.h>
 #include <sys/prctl.h>
+#include <sys/types.h>
 
 #include <async_safe/log.h>
 #include <bionic/mte.h>
@@ -76,15 +77,6 @@
 }
 
 static void __pthread_internal_free(pthread_internal_t* thread) {
-#ifdef __aarch64__
-  if (void* stack_mte_tls = thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE)) {
-    size_t size =
-        stack_mte_ringbuffer_size_from_pointer(reinterpret_cast<uintptr_t>(stack_mte_tls));
-    void* ptr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(stack_mte_tls) &
-                                        ((1ULL << 56ULL) - 1ULL));
-    munmap(ptr, size);
-  }
-#endif
   if (thread->mmap_size != 0) {
     // Free mapped space, including thread stack and pthread_internal_t.
     munmap(thread->mmap_base, thread->mmap_size);
@@ -216,7 +208,10 @@
   __libc_memtag_stack_abi = true;
 
   for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
-    if (t->terminating) continue;
+    // should_allocate_stack_mte_ringbuffer indicates the thread is already
+    // aware that this process requires stack MTE, and will allocate the
+    // ring buffer in __pthread_start.
+    if (t->terminating || t->should_allocate_stack_mte_ringbuffer) continue;
     t->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE) =
         __allocate_stack_mte_ringbuffer(0, t->is_main() ? nullptr : t);
   }
@@ -264,8 +259,7 @@
   g_func = func;
   g_arg = arg;
 
-  static _Atomic(bool) g_retval;
-  atomic_init(&g_retval, true);
+  static _Atomic(bool) g_retval(true);
 
   auto handler = [](int, siginfo_t*, void*) {
     ErrnoRestorer restorer;
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 5db42ab..cbaa9a6 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -181,6 +181,7 @@
 
   bionic_tcb* bionic_tcb;
   char stack_mte_ringbuffer_vma_name_buffer[32];
+  bool should_allocate_stack_mte_ringbuffer;
 
   bool is_main() { return start_routine == nullptr; }
 };
diff --git a/libc/bionic/pthread_mutex.cpp b/libc/bionic/pthread_mutex.cpp
index 0a452e9..c99717a 100644
--- a/libc/bionic/pthread_mutex.cpp
+++ b/libc/bionic/pthread_mutex.cpp
@@ -509,8 +509,8 @@
     memset(mutex, 0, sizeof(pthread_mutex_internal_t));
 
     if (__predict_true(attr == nullptr)) {
-        atomic_init(&mutex->state, MUTEX_TYPE_BITS_NORMAL);
-        return 0;
+      atomic_store_explicit(&mutex->state, MUTEX_TYPE_BITS_NORMAL, memory_order_relaxed);
+      return 0;
     }
 
     uint16_t state = 0;
@@ -543,13 +543,13 @@
         }
         mutex->pi_mutex_id = id;
 #endif
-        atomic_init(&mutex->state, PI_MUTEX_STATE);
+        atomic_store_explicit(&mutex->state, PI_MUTEX_STATE, memory_order_relaxed);
         PIMutex& pi_mutex = mutex->ToPIMutex();
         pi_mutex.type = *attr & MUTEXATTR_TYPE_MASK;
         pi_mutex.shared = (*attr & MUTEXATTR_SHARED_MASK) != 0;
     } else {
-        atomic_init(&mutex->state, state);
-        atomic_init(&mutex->owner_tid, 0);
+      atomic_store_explicit(&mutex->state, state, memory_order_relaxed);
+      atomic_store_explicit(&mutex->owner_tid, 0, memory_order_relaxed);
     }
     return 0;
 }
diff --git a/libc/bionic/pthread_rwlock.cpp b/libc/bionic/pthread_rwlock.cpp
index 6f3c6fe..92134b4 100644
--- a/libc/bionic/pthread_rwlock.cpp
+++ b/libc/bionic/pthread_rwlock.cpp
@@ -247,7 +247,7 @@
     }
   }
 
-  atomic_init(&rwlock->state, 0);
+  atomic_store_explicit(&rwlock->state, 0, memory_order_relaxed);
   rwlock->pending_lock.init(rwlock->pshared);
   return 0;
 }
diff --git a/libc/bionic/strnlen.cpp b/libc/bionic/pthread_setaffinity.cpp
similarity index 74%
copy from libc/bionic/strnlen.cpp
copy to libc/bionic/pthread_setaffinity.cpp
index 7101b21..6db418e 100644
--- a/libc/bionic/strnlen.cpp
+++ b/libc/bionic/pthread_setaffinity.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2024 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,9 +26,17 @@
  * SUCH DAMAGE.
  */
 
-#include <string.h>
+#include <errno.h>
 
-size_t strnlen(const char* s, size_t n) {
-  const char* p = static_cast<const char*>(memchr(s, 0, n));
-  return p ? (p - s) : n;
+#include "private/ErrnoRestorer.h"
+#include "pthread_internal.h"
+
+int pthread_setaffinity_np(pthread_t t, size_t cpu_set_size, const cpu_set_t* cpu_set) {
+  ErrnoRestorer errno_restorer;
+
+  pid_t tid = __pthread_internal_gettid(t, "pthread_setaffinity_np");
+  if (tid == -1) return ESRCH;
+
+  if (sched_setaffinity(tid, cpu_set_size, cpu_set) == -1) return errno;
+  return 0;
 }
diff --git a/libc/bionic/semaphore.cpp b/libc/bionic/semaphore.cpp
index 33552a9..2c9b745 100644
--- a/libc/bionic/semaphore.cpp
+++ b/libc/bionic/semaphore.cpp
@@ -113,7 +113,7 @@
   }
 
   atomic_uint* sem_count_ptr = SEM_TO_ATOMIC_POINTER(sem);
-  atomic_init(sem_count_ptr, count);
+  atomic_store_explicit(sem_count_ptr, count, memory_order_relaxed);
   return 0;
 }
 
diff --git a/libc/bionic/strchr.cpp b/libc/bionic/strchr.cpp
deleted file mode 100644
index fd8a924..0000000
--- a/libc/bionic/strchr.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <string.h>
-
-char* strchr(const char* p, int ch) {
-  return __strchr_chk(p, ch, __BIONIC_FORTIFY_UNKNOWN_SIZE);
-}
diff --git a/libc/bionic/strchrnul.cpp b/libc/bionic/strchrnul.cpp
deleted file mode 100644
index 55422e0..0000000
--- a/libc/bionic/strchrnul.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-extern "C" const char* strchrnul(const char* s, int ch) {
-  while (*s && *s != ch) {
-    ++s;
-  }
-  return s;
-}
diff --git a/libc/bionic/strrchr.cpp b/libc/bionic/strrchr.cpp
deleted file mode 100644
index b6c40f4..0000000
--- a/libc/bionic/strrchr.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 1988 Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <string.h>
-
-char* strrchr(const char* p, int ch) {
-  return __strrchr_chk(p, ch, __BIONIC_FORTIFY_UNKNOWN_SIZE);
-}
diff --git a/libc/include/android/versioning.h b/libc/include/android/versioning.h
index 1676a72..1cf6e51 100644
--- a/libc/include/android/versioning.h
+++ b/libc/include/android/versioning.h
@@ -80,10 +80,3 @@
 #define __INTRODUCED_IN_32(api_level)
 #define __INTRODUCED_IN_64(api_level) __BIONIC_AVAILABILITY(introduced=api_level)
 #endif
-
-// Vendor and product modules do not follow SDK versioning. Ignore NDK guards for these modules.
-#if defined(__ANDROID_VNDK__)
-#undef __BIONIC_AVAILABILITY
-#define __BIONIC_AVAILABILITY(api_level, ...)
-#define __BIONIC_AVAILABILITY_GUARD(api_level) 1
-#endif // defined(__ANDROID_VNDK__)
diff --git a/libc/include/ctype.h b/libc/include/ctype.h
index cb926a4..dc3f673 100644
--- a/libc/include/ctype.h
+++ b/libc/include/ctype.h
@@ -95,7 +95,7 @@
 
 /** Internal implementation detail. Do not use. */
 __attribute__((__no_sanitize__("unsigned-integer-overflow")))
-static inline int __bionic_ctype_in_range(unsigned __lo, int __ch, unsigned __hi) {
+__BIONIC_CTYPE_INLINE int __bionic_ctype_in_range(unsigned __lo, int __ch, unsigned __hi) {
   return (__BIONIC_CAST(static_cast, unsigned, __ch) - __lo) < (__hi - __lo + 1);
 }
 
diff --git a/libc/include/dlfcn.h b/libc/include/dlfcn.h
index 67759b8..81045fd 100644
--- a/libc/include/dlfcn.h
+++ b/libc/include/dlfcn.h
@@ -32,6 +32,17 @@
 
 #include <stdint.h>
 
+/**
+ * @addtogroup libdl Dynamic Linker
+ * @{
+ */
+
+/**
+ * \file
+ * Standard dynamic library support.
+ * See also the Android-specific functionality in `<android/dlext.h>`.
+ */
+
 __BEGIN_DECLS
 
 /**
@@ -52,6 +63,8 @@
  * [dlopen(3)](https://man7.org/linux/man-pages/man3/dlopen.3.html)
  * loads the given shared library.
  *
+ * See also android_dlopen_ext().
+ *
  * Returns a pointer to an opaque handle for use with other <dlfcn.h> functions
  * on success, and returns NULL on failure, in which case dlerror() can be used
  * to retrieve the specific error.
@@ -191,3 +204,5 @@
 #endif
 
 __END_DECLS
+
+/** @} */
diff --git a/libc/include/limits.h b/libc/include/limits.h
index 5e9ce59..5c0ef6d 100644
--- a/libc/include/limits.h
+++ b/libc/include/limits.h
@@ -60,24 +60,6 @@
 /* Many of the POSIX limits come from the kernel. */
 #include <linux/limits.h>
 
-/*
- * bionic always exposed these alternative names,
- * but clang's <limits.h> considers them GNU extensions,
- * and may or may not have defined them.
- */
-#ifndef LONG_LONG_MIN
-/** Non-portable synonym; use LLONG_MIN directly instead. */
-#define LONG_LONG_MIN LLONG_MIN
-#endif
-#ifndef LONG_LONG_MAX
-/** Non-portable synonym; use LLONG_MAX directly instead. */
-#define LONG_LONG_MAX LLONG_MAX
-#endif
-#ifndef ULONG_LONG_MAX
-/** Non-portable synonym; use ULLONG_MAX directly instead. */
-#define ULONG_LONG_MAX ULLONG_MAX
-#endif
-
 /** Maximum number of positional arguments in a printf()/scanf() format string. */
 #define NL_ARGMAX 9
 /** Maximum number of bytes in a $LANG name. */
diff --git a/libc/include/malloc.h b/libc/include/malloc.h
index dc2ca2b..ac27467 100644
--- a/libc/include/malloc.h
+++ b/libc/include/malloc.h
@@ -89,9 +89,9 @@
  */
 #if __ANDROID_API__ >= 29
 __nodiscard void* _Nullable reallocarray(void* _Nullable __ptr, size_t __item_count, size_t __item_size) __BIONIC_ALLOC_SIZE(2, 3) __INTRODUCED_IN(29);
-#else
+#elif defined(__ANDROID_UNAVAILABLE_SYMBOLS_ARE_WEAK__)
 #include <errno.h>
-static __inline __nodiscard void* _Nullable reallocarray(void* _Nullable __ptr, size_t __item_count, size_t __item_size) {
+static __inline __nodiscard void* _Nullable reallocarray(void* _Nullable __ptr, size_t __item_count, size_t __item_size) __BIONIC_ALLOC_SIZE(2, 3) {
   size_t __new_size;
   if (__builtin_mul_overflow(__item_count, __item_size, &__new_size)) {
     errno = ENOMEM;
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 8b78a74..cdf1b8c 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -314,17 +314,60 @@
 
 pthread_t pthread_self(void) __attribute_const__;
 
-#if defined(__USE_GNU)
-
-#if __BIONIC_AVAILABILITY_GUARD(26)
+#if defined(__USE_GNU) && __BIONIC_AVAILABILITY_GUARD(26)
+/**
+ * [pthread_getname_np(3)](https://man7.org/linux/man-pages/man3/pthread_getname_np.3.html)
+ * gets the name of the given thread.
+ * Names are at most 16 bytes (including '\0').
+ *
+ * Returns 0 on success and returns an error number on failure.
+ *
+ * Available since API level 26.
+ */
 int pthread_getname_np(pthread_t __pthread, char* _Nonnull __buf, size_t __n) __INTRODUCED_IN(26);
-#endif /* __BIONIC_AVAILABILITY_GUARD(26) */
-
 #endif
-/* TODO: this should be __USE_GNU too. */
+
+/**
+ * [pthread_setname_np(3)](https://man7.org/linux/man-pages/man3/pthread_setname_np.3.html)
+ * sets the name of the given thread.
+ * Names are at most 16 bytes (including '\0').
+ * Truncation must be done by the caller;
+ * calls with longer names will fail with ERANGE.
+ *
+ * Returns 0 on success and returns an error number on failure.
+ *
+ * This should only have been available under _GNU_SOURCE,
+ * but is always available on Android by historical accident.
+ */
 int pthread_setname_np(pthread_t __pthread, const char* _Nonnull __name);
 
 /**
+ * [pthread_getaffinity_np(3)](https://man7.org/linux/man-pages/man3/pthread_getaffinity_np.3.html)
+ * gets the CPU affinity mask for the given thread.
+ *
+ * Returns 0 on success and returns an error number on failure.
+ *
+ * Available since API level 36.
+ * See sched_getaffinity() and pthread_gettid_np() for greater portability.
+ */
+#if defined(__USE_GNU) && __BIONIC_AVAILABILITY_GUARD(36)
+int pthread_getaffinity_np(pthread_t __pthread, size_t __cpu_set_size, cpu_set_t* __cpu_set) __INTRODUCED_IN(36);
+#endif
+
+/**
+ * [pthread_setaffinity_np(3)](https://man7.org/linux/man-pages/man3/pthread_setaffinity_np.3.html)
+ * sets the CPU affinity mask for the given thread.
+ *
+ * Returns 0 on success and returns an error number on failure.
+ *
+ * Available since API level 36.
+ * See sched_getaffinity() and pthread_gettid_np() for greater portability.
+ */
+#if defined(__USE_GNU) && __BIONIC_AVAILABILITY_GUARD(36)
+int pthread_setaffinity_np(pthread_t __pthread, size_t __cpu_set_size, const cpu_set_t* __cpu_set) __INTRODUCED_IN(36);
+#endif
+
+/**
  * [pthread_setschedparam(3)](https://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html)
  * sets the scheduler policy and parameters of the given thread.
  *
diff --git a/libc/include/sys/select.h b/libc/include/sys/select.h
index a7227b0..4183d90 100644
--- a/libc/include/sys/select.h
+++ b/libc/include/sys/select.h
@@ -30,7 +30,9 @@
 
 /**
  * @file sys/select.h
- * @brief Wait for events on a set of file descriptors (but use <poll.h> instead).
+ * @brief Wait for events on a set of file descriptors.
+ * New code should prefer the different interface specified in <poll.h> instead,
+ * because it scales better and easily avoids the limits on `fd_set` size.
  */
 
 #include <sys/cdefs.h>
@@ -44,8 +46,10 @@
 typedef unsigned long fd_mask;
 
 /**
- * The limit on the largest fd that can be used with fd_set.
- * Use <poll.h> instead.
+ * The limit on the largest fd that can be used with type `fd_set`.
+ * You can allocate your own memory,
+ * but new code should prefer the different interface specified in <poll.h> instead,
+ * because it scales better and easily avoids the limits on `fd_set` size.
  */
 #define FD_SETSIZE 1024
 #define NFDBITS (8 * sizeof(fd_mask))
@@ -55,7 +59,8 @@
  * The underlying system calls do not have this limit,
  * and callers can allocate their own sets with calloc().
  *
- * Use <poll.h> instead.
+ * New code should prefer the different interface specified in <poll.h> instead,
+ * because it scales better and easily avoids the limits on `fd_set` size.
  */
 typedef struct {
   fd_mask fds_bits[FD_SETSIZE/NFDBITS];
@@ -69,28 +74,62 @@
 void __FD_SET_chk(int, fd_set* _Nonnull, size_t);
 int __FD_ISSET_chk(int, const fd_set* _Nonnull, size_t);
 
-/** FD_CLR() with no bounds checking for users that allocated their own set. */
+/**
+ * FD_CLR() with no bounds checking for users that allocated their own set.
+ * New code should prefer <poll.h> instead.
+ */
 #define __FD_CLR(fd, set) (__FDS_BITS(fd_set*, set)[__FDELT(fd)] &= ~__FDMASK(fd))
-/** FD_SET() with no bounds checking for users that allocated their own set. */
+
+/**
+ * FD_SET() with no bounds checking for users that allocated their own set.
+ * New code should prefer <poll.h> instead.
+ */
 #define __FD_SET(fd, set) (__FDS_BITS(fd_set*, set)[__FDELT(fd)] |= __FDMASK(fd))
-/** FD_ISSET() with no bounds checking for users that allocated their own set. */
+
+/**
+ * FD_ISSET() with no bounds checking for users that allocated their own set.
+ * New code should prefer <poll.h> instead.
+ */
 #define __FD_ISSET(fd, set) ((__FDS_BITS(const fd_set*, set)[__FDELT(fd)] & __FDMASK(fd)) != 0)
 
-/** Removes all 1024 fds from the given set. Use <poll.h> instead. */
+/**
+ * Removes all 1024 fds from the given set.
+ * Limited to fds under 1024.
+ * New code should prefer <poll.h> instead for this reason,
+ * rather than using memset() directly.
+ */
 #define FD_ZERO(set) __builtin_memset(set, 0, sizeof(*__BIONIC_CAST(static_cast, const fd_set*, set)))
 
-/** Removes `fd` from the given set. Limited to fds under 1024. Use <poll.h> instead. */
+/**
+ * Removes `fd` from the given set.
+ * Limited to fds under 1024.
+ * New code should prefer <poll.h> instead for this reason,
+ * rather than using __FD_CLR().
+ */
 #define FD_CLR(fd, set) __FD_CLR_chk(fd, set, __bos(set))
-/** Adds `fd` to the given set. Limited to fds under 1024. Use <poll.h> instead. */
+
+/**
+ * Adds `fd` to the given set.
+ * Limited to fds under 1024.
+ * New code should prefer <poll.h> instead for this reason,
+ * rather than using __FD_SET().
+ */
 #define FD_SET(fd, set) __FD_SET_chk(fd, set, __bos(set))
-/** Tests whether `fd` is in the given set. Limited to fds under 1024. Use <poll.h> instead. */
+
+/**
+ * Tests whether `fd` is in the given set.
+ * Limited to fds under 1024.
+ * New code should prefer <poll.h> instead for this reason,
+ * rather than using __FD_ISSET().
+ */
 #define FD_ISSET(fd, set) __FD_ISSET_chk(fd, set, __bos(set))
 
 /**
  * [select(2)](https://man7.org/linux/man-pages/man2/select.2.html) waits on a
  * set of file descriptors.
  *
- * Use poll() instead.
+ * New code should prefer poll() from <poll.h> instead,
+ * because it scales better and easily avoids the limits on `fd_set` size.
  *
  * Returns the number of ready file descriptors on success, 0 for timeout,
  * and returns -1 and sets `errno` on failure.
@@ -101,7 +140,8 @@
  * [pselect(2)](https://man7.org/linux/man-pages/man2/select.2.html) waits on a
  * set of file descriptors.
  *
- * Use ppoll() instead.
+ * New code should prefer ppoll() from <poll.h> instead,
+ * because it scales better and easily avoids the limits on `fd_set` size.
  *
  * Returns the number of ready file descriptors on success, 0 for timeout,
  * and returns -1 and sets `errno` on failure.
@@ -112,17 +152,16 @@
  * [pselect64(2)](https://man7.org/linux/man-pages/man2/select.2.html) waits on a
  * set of file descriptors.
  *
- * Use ppoll64() instead.
+ * New code should prefer ppoll64() from <poll.h> instead,
+ * because it scales better and easily avoids the limits on `fd_set` size.
  *
  * Returns the number of ready file descriptors on success, 0 for timeout,
  * and returns -1 and sets `errno` on failure.
  *
  * Available since API level 28.
  */
-
 #if __BIONIC_AVAILABILITY_GUARD(28)
 int pselect64(int __max_fd_plus_one, fd_set* _Nullable __read_fds, fd_set* _Nullable __write_fds, fd_set* _Nullable __exception_fds, const struct timespec* _Nullable __timeout, const sigset64_t* _Nullable __mask) __INTRODUCED_IN(28);
 #endif /* __BIONIC_AVAILABILITY_GUARD(28) */
 
-
 __END_DECLS
diff --git a/libc/kernel/tools/defaults.py b/libc/kernel/tools/defaults.py
index a71318e..2994e5e 100644
--- a/libc/kernel/tools/defaults.py
+++ b/libc/kernel/tools/defaults.py
@@ -25,6 +25,23 @@
     # Otherwise, there will be two struct timeval definitions when
     # __kernel_old_timeval is renamed to timeval.
     "__kernel_old_timeval": "1",
+    # Drop the custom byte swap functions and just use the clang builtins.
+    # https://github.com/android/ndk/issues/2107
+    "__arch_swab16": kCppUndefinedMacro,
+    "__arch_swab16p": kCppUndefinedMacro,
+    "__arch_swab16s": kCppUndefinedMacro,
+    "__arch_swab32": kCppUndefinedMacro,
+    "__arch_swab32p": kCppUndefinedMacro,
+    "__arch_swab32s": kCppUndefinedMacro,
+    "__arch_swab64": kCppUndefinedMacro,
+    "__arch_swab64p": kCppUndefinedMacro,
+    "__arch_swab64s": kCppUndefinedMacro,
+    "__arch_swahb32": kCppUndefinedMacro,
+    "__arch_swahb32p": kCppUndefinedMacro,
+    "__arch_swahb32s": kCppUndefinedMacro,
+    "__arch_swahw32": kCppUndefinedMacro,
+    "__arch_swahw32p": kCppUndefinedMacro,
+    "__arch_swahw32s": kCppUndefinedMacro,
     }
 
 # This is the set of known kernel data structures we want to remove from
@@ -133,9 +150,6 @@
           # These are required to support the above functions.
           "__fswahw32",
           "__fswahb32",
-          # As are these, for ILP32.
-          "__arch_swab32",
-          "__arch_swab64",
           # This is used by various macros in <linux/ioprio.h>.
           "ioprio_value",
 
diff --git a/libc/kernel/uapi/asm-arm/asm/swab.h b/libc/kernel/uapi/asm-arm/asm/swab.h
index 3fff953..7684c22 100644
--- a/libc/kernel/uapi/asm-arm/asm/swab.h
+++ b/libc/kernel/uapi/asm-arm/asm/swab.h
@@ -11,18 +11,7 @@
 #ifndef __STRICT_ANSI__
 #define __SWAB_64_THRU_32__
 #endif
-static inline __attribute__((__const__)) __u32 __arch_swab32(__u32 x) {
-  __u32 t;
 #ifndef __thumb__
-  if(! __builtin_constant_p(x)) {
-    asm("eor\t%0, %1, %1, ror #16" : "=r" (t) : "r" (x));
-  } else
 #endif
-  t = x ^ ((x << 16) | (x >> 16));
-  x = (x << 24) | (x >> 8);
-  t &= ~0x00FF0000;
-  x ^= (t >> 8);
-  return x;
-}
 #define __arch_swab32 __arch_swab32
 #endif
diff --git a/libc/kernel/uapi/asm-arm64/asm/hwcap.h b/libc/kernel/uapi/asm-arm64/asm/hwcap.h
index f5c720a..45cc945 100644
--- a/libc/kernel/uapi/asm-arm64/asm/hwcap.h
+++ b/libc/kernel/uapi/asm-arm64/asm/hwcap.h
@@ -101,4 +101,5 @@
 #define HWCAP2_SME_SF8FMA (1UL << 60)
 #define HWCAP2_SME_SF8DP4 (1UL << 61)
 #define HWCAP2_SME_SF8DP2 (1UL << 62)
+#define HWCAP2_POE (1UL << 63)
 #endif
diff --git a/libc/kernel/uapi/asm-arm64/asm/mman.h b/libc/kernel/uapi/asm-arm64/asm/mman.h
index 1561053..cc92abe 100644
--- a/libc/kernel/uapi/asm-arm64/asm/mman.h
+++ b/libc/kernel/uapi/asm-arm64/asm/mman.h
@@ -9,4 +9,8 @@
 #include <asm-generic/mman.h>
 #define PROT_BTI 0x10
 #define PROT_MTE 0x20
+#define PKEY_DISABLE_EXECUTE 0x4
+#define PKEY_DISABLE_READ 0x8
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE | PKEY_DISABLE_READ | PKEY_DISABLE_EXECUTE)
 #endif
diff --git a/libc/kernel/uapi/asm-arm64/asm/sigcontext.h b/libc/kernel/uapi/asm-arm64/asm/sigcontext.h
index 8e48d55..a845a03 100644
--- a/libc/kernel/uapi/asm-arm64/asm/sigcontext.h
+++ b/libc/kernel/uapi/asm-arm64/asm/sigcontext.h
@@ -32,6 +32,11 @@
   struct _aarch64_ctx head;
   __u64 esr;
 };
+#define POE_MAGIC 0x504f4530
+struct poe_context {
+  struct _aarch64_ctx head;
+  __u64 por_el0;
+};
 #define EXTRA_MAGIC 0x45585401
 struct extra_context {
   struct _aarch64_ctx head;
@@ -95,12 +100,12 @@
 #define SVE_SIG_REGS_SIZE(vq) (__SVE_FFR_OFFSET(vq) + __SVE_FFR_SIZE(vq))
 #define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
 #define ZA_SIG_REGS_OFFSET ((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) / __SVE_VQ_BYTES * __SVE_VQ_BYTES)
-#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
-#define ZA_SIG_ZAV_OFFSET(vq,n) (ZA_SIG_REGS_OFFSET + (SVE_SIG_ZREG_SIZE(vq) * n))
+#define ZA_SIG_REGS_SIZE(vq) (((vq) * __SVE_VQ_BYTES) * ((vq) * __SVE_VQ_BYTES))
+#define ZA_SIG_ZAV_OFFSET(vq,n) (ZA_SIG_REGS_OFFSET + (SVE_SIG_ZREG_SIZE(vq) * (n)))
 #define ZA_SIG_CONTEXT_SIZE(vq) (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
 #define ZT_SIG_REG_SIZE 512
 #define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8)
 #define ZT_SIG_REGS_OFFSET sizeof(struct zt_context)
-#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n)
+#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * (n))
 #define ZT_SIG_CONTEXT_SIZE(n) (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n))
 #endif
diff --git a/libc/kernel/uapi/asm-generic/socket.h b/libc/kernel/uapi/asm-generic/socket.h
index 2d90586..a580c4c 100644
--- a/libc/kernel/uapi/asm-generic/socket.h
+++ b/libc/kernel/uapi/asm-generic/socket.h
@@ -92,6 +92,11 @@
 #define SO_RCVMARK 75
 #define SO_PASSPIDFD 76
 #define SO_PEERPIDFD 77
+#define SO_DEVMEM_LINEAR 78
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 79
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 80
 #if __BITS_PER_LONG == 64 || defined(__x86_64__) && defined(__ILP32__)
 #define SO_TIMESTAMP SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD
diff --git a/libc/kernel/uapi/asm-x86/asm/elf.h b/libc/kernel/uapi/asm-x86/asm/elf.h
new file mode 100644
index 0000000..b66a229
--- /dev/null
+++ b/libc/kernel/uapi/asm-x86/asm/elf.h
@@ -0,0 +1,16 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef _UAPI_ASM_X86_ELF_H
+#define _UAPI_ASM_X86_ELF_H
+#include <linux/types.h>
+struct x86_xfeat_component {
+  __u32 type;
+  __u32 size;
+  __u32 offset;
+  __u32 flags;
+} __attribute__((__packed__));
+#endif
diff --git a/libc/kernel/uapi/asm-x86/asm/kvm.h b/libc/kernel/uapi/asm-x86/asm/kvm.h
index cd647b6..0a35412 100644
--- a/libc/kernel/uapi/asm-x86/asm/kvm.h
+++ b/libc/kernel/uapi/asm-x86/asm/kvm.h
@@ -342,6 +342,7 @@
 #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
 #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
 #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
+#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
 #define KVM_STATE_NESTED_FORMAT_VMX 0
 #define KVM_STATE_NESTED_FORMAT_SVM 1
 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001
diff --git a/libc/kernel/uapi/asm-x86/asm/swab.h b/libc/kernel/uapi/asm-x86/asm/swab.h
index ce43658..31c850d 100644
--- a/libc/kernel/uapi/asm-x86/asm/swab.h
+++ b/libc/kernel/uapi/asm-x86/asm/swab.h
@@ -8,27 +8,9 @@
 #define _ASM_X86_SWAB_H
 #include <linux/types.h>
 #include <linux/compiler.h>
-static inline __attribute__((__const__)) __u32 __arch_swab32(__u32 val) {
-  asm("bswapl %0" : "=r" (val) : "0" (val));
-  return val;
-}
 #define __arch_swab32 __arch_swab32
-static inline __attribute__((__const__)) __u64 __arch_swab64(__u64 val) {
 #ifdef __i386__
-  union {
-    struct {
-      __u32 a;
-      __u32 b;
-    } s;
-    __u64 u;
-  } v;
-  v.u = val;
-  asm("bswapl %0; bswapl %1; xchgl %0,%1" : "=r" (v.s.a), "=r" (v.s.b) : "0" (v.s.a), "1" (v.s.b));
-  return v.u;
 #else
-  asm("bswapq %0" : "=r" (val) : "0" (val));
-  return val;
 #endif
-}
 #define __arch_swab64 __arch_swab64
 #endif
diff --git a/libc/kernel/uapi/drm/drm_fourcc.h b/libc/kernel/uapi/drm/drm_fourcc.h
index 4902d6c..c0f5ff1 100644
--- a/libc/kernel/uapi/drm/drm_fourcc.h
+++ b/libc/kernel/uapi/drm/drm_fourcc.h
@@ -172,6 +172,8 @@
 #define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS fourcc_mod_code(INTEL, 13)
 #define I915_FORMAT_MOD_4_TILED_MTL_MC_CCS fourcc_mod_code(INTEL, 14)
 #define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15)
+#define I915_FORMAT_MOD_4_TILED_LNL_CCS fourcc_mod_code(INTEL, 16)
+#define I915_FORMAT_MOD_4_TILED_BMG_CCS fourcc_mod_code(INTEL, 17)
 #define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE fourcc_mod_code(SAMSUNG, 1)
 #define DRM_FORMAT_MOD_SAMSUNG_16_16_TILE fourcc_mod_code(SAMSUNG, 2)
 #define DRM_FORMAT_MOD_QCOM_COMPRESSED fourcc_mod_code(QCOM, 1)
diff --git a/libc/kernel/uapi/drm/msm_drm.h b/libc/kernel/uapi/drm/msm_drm.h
index 7ec5ed2..582da62 100644
--- a/libc/kernel/uapi/drm/msm_drm.h
+++ b/libc/kernel/uapi/drm/msm_drm.h
@@ -38,6 +38,8 @@
 #define MSM_PARAM_VA_SIZE 0x0f
 #define MSM_PARAM_HIGHEST_BANK_BIT 0x10
 #define MSM_PARAM_RAYTRACING 0x11
+#define MSM_PARAM_UBWC_SWIZZLE 0x12
+#define MSM_PARAM_MACROTILE_MODE 0x13
 #define MSM_PARAM_NR_RINGS MSM_PARAM_PRIORITIES
 struct drm_msm_param {
   __u32 pipe;
diff --git a/libc/kernel/uapi/drm/xe_drm.h b/libc/kernel/uapi/drm/xe_drm.h
index a034b29..16bc3b3 100644
--- a/libc/kernel/uapi/drm/xe_drm.h
+++ b/libc/kernel/uapi/drm/xe_drm.h
@@ -124,6 +124,7 @@
 #define DRM_XE_TOPO_DSS_COMPUTE 2
 #define DRM_XE_TOPO_L3_BANK 3
 #define DRM_XE_TOPO_EU_PER_DSS 4
+#define DRM_XE_TOPO_SIMD16_EU_PER_DSS 5
   __u16 type;
   __u32 num_bytes;
   __u8 mask[];
diff --git a/libc/kernel/uapi/linux/android/binder.h b/libc/kernel/uapi/linux/android/binder.h
index 6e64ebc..273ee59 100644
--- a/libc/kernel/uapi/linux/android/binder.h
+++ b/libc/kernel/uapi/linux/android/binder.h
@@ -115,6 +115,11 @@
   __u32 sync_recv;
   __u32 async_recv;
 };
+struct binder_frozen_state_info {
+  binder_uintptr_t cookie;
+  __u32 is_frozen;
+  __u32 reserved;
+};
 struct binder_extended_error {
   __u32 id;
   __u32 command;
@@ -212,6 +217,8 @@
   BR_FROZEN_REPLY = _IO('r', 18),
   BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19),
   BR_TRANSACTION_PENDING_FROZEN = _IO('r', 20),
+  BR_FROZEN_BINDER = _IOR('r', 21, struct binder_frozen_state_info),
+  BR_CLEAR_FREEZE_NOTIFICATION_DONE = _IOR('r', 22, binder_uintptr_t),
 };
 enum binder_driver_command_protocol {
   BC_TRANSACTION = _IOW('c', 0, struct binder_transaction_data),
@@ -233,5 +240,8 @@
   BC_DEAD_BINDER_DONE = _IOW('c', 16, binder_uintptr_t),
   BC_TRANSACTION_SG = _IOW('c', 17, struct binder_transaction_data_sg),
   BC_REPLY_SG = _IOW('c', 18, struct binder_transaction_data_sg),
+  BC_REQUEST_FREEZE_NOTIFICATION = _IOW('c', 19, struct binder_handle_cookie),
+  BC_CLEAR_FREEZE_NOTIFICATION = _IOW('c', 20, struct binder_handle_cookie),
+  BC_FREEZE_NOTIFICATION_DONE = _IOW('c', 21, binder_uintptr_t),
 };
 #endif
diff --git a/libc/kernel/uapi/linux/audit.h b/libc/kernel/uapi/linux/audit.h
index 98849f1..ae50fcc 100644
--- a/libc/kernel/uapi/linux/audit.h
+++ b/libc/kernel/uapi/linux/audit.h
@@ -95,6 +95,9 @@
 #define AUDIT_MAC_UNLBL_STCDEL 1417
 #define AUDIT_MAC_CALIPSO_ADD 1418
 #define AUDIT_MAC_CALIPSO_DEL 1419
+#define AUDIT_IPE_ACCESS 1420
+#define AUDIT_IPE_CONFIG_CHANGE 1421
+#define AUDIT_IPE_POLICY_LOAD 1422
 #define AUDIT_FIRST_KERN_ANOM_MSG 1700
 #define AUDIT_LAST_KERN_ANOM_MSG 1799
 #define AUDIT_ANOM_PROMISCUOUS 1700
diff --git a/libc/kernel/uapi/linux/auto_fs.h b/libc/kernel/uapi/linux/auto_fs.h
index dd11a93..a48a887 100644
--- a/libc/kernel/uapi/linux/auto_fs.h
+++ b/libc/kernel/uapi/linux/auto_fs.h
@@ -12,7 +12,7 @@
 #define AUTOFS_PROTO_VERSION 5
 #define AUTOFS_MIN_PROTO_VERSION 3
 #define AUTOFS_MAX_PROTO_VERSION 5
-#define AUTOFS_PROTO_SUBVERSION 5
+#define AUTOFS_PROTO_SUBVERSION 6
 #if defined(__ia64__) || defined(__alpha__)
 typedef unsigned long autofs_wqt_t;
 #else
diff --git a/libc/kernel/uapi/linux/bits.h b/libc/kernel/uapi/linux/bits.h
index d747e24..2b8dbe2 100644
--- a/libc/kernel/uapi/linux/bits.h
+++ b/libc/kernel/uapi/linux/bits.h
@@ -8,4 +8,5 @@
 #define _UAPI_LINUX_BITS_H
 #define __GENMASK(h,l) (((~_UL(0)) - (_UL(1) << (l)) + 1) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
 #define __GENMASK_ULL(h,l) (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+#define __GENMASK_U128(h,l) ((_BIT128((h)) << 1) - (_BIT128(l)))
 #endif
diff --git a/libc/kernel/uapi/linux/blkdev.h b/libc/kernel/uapi/linux/blkdev.h
new file mode 100644
index 0000000..103fa0f
--- /dev/null
+++ b/libc/kernel/uapi/linux/blkdev.h
@@ -0,0 +1,12 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef _UAPI_LINUX_BLKDEV_H
+#define _UAPI_LINUX_BLKDEV_H
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#define BLOCK_URING_CMD_DISCARD _IO(0x12, 0)
+#endif
diff --git a/libc/kernel/uapi/linux/bpf.h b/libc/kernel/uapi/linux/bpf.h
index 8d64816..c0d862d 100644
--- a/libc/kernel/uapi/linux/bpf.h
+++ b/libc/kernel/uapi/linux/bpf.h
@@ -671,9 +671,6 @@
   BPF_F_MARK_ENFORCE = (1ULL << 6),
 };
 enum {
-  BPF_F_INGRESS = (1ULL << 0),
-};
-enum {
   BPF_F_TUNINFO_IPV6 = (1ULL << 0),
 };
 enum {
@@ -768,8 +765,10 @@
   BPF_F_BPRM_SECUREEXEC = (1ULL << 0),
 };
 enum {
+  BPF_F_INGRESS = (1ULL << 0),
   BPF_F_BROADCAST = (1ULL << 3),
   BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS)
 };
 #define __bpf_md_ptr(type,name) union { type name; __u64 : 64; \
 } __attribute__((aligned(8)))
@@ -1275,6 +1274,7 @@
   TCP_BPF_SYN = 1005,
   TCP_BPF_SYN_IP = 1006,
   TCP_BPF_SYN_MAC = 1007,
+  TCP_BPF_SOCK_OPS_CB_FLAGS = 1008,
 };
 enum {
   BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
@@ -1528,4 +1528,7 @@
 struct bpf_iter_num {
   __u64 __opaque[1];
 } __attribute__((aligned(8)));
+enum bpf_kfunc_flags {
+  BPF_F_PAD_ZEROS = (1ULL << 0),
+};
 #endif
diff --git a/libc/kernel/uapi/linux/cec.h b/libc/kernel/uapi/linux/cec.h
index 43e8456..91f4d67 100644
--- a/libc/kernel/uapi/linux/cec.h
+++ b/libc/kernel/uapi/linux/cec.h
@@ -27,6 +27,7 @@
 };
 #define CEC_MSG_FL_REPLY_TO_FOLLOWERS (1 << 0)
 #define CEC_MSG_FL_RAW (1 << 1)
+#define CEC_MSG_FL_REPLY_VENDOR_ID (1 << 2)
 #define CEC_TX_STATUS_OK (1 << 0)
 #define CEC_TX_STATUS_ARB_LOST (1 << 1)
 #define CEC_TX_STATUS_NACK (1 << 2)
@@ -96,6 +97,7 @@
 #define CEC_CAP_NEEDS_HPD (1 << 6)
 #define CEC_CAP_MONITOR_PIN (1 << 7)
 #define CEC_CAP_CONNECTOR_INFO (1 << 8)
+#define CEC_CAP_REPLY_VENDOR_ID (1 << 9)
 struct cec_caps {
   char driver[32];
   char name[32];
diff --git a/libc/kernel/uapi/linux/const.h b/libc/kernel/uapi/linux/const.h
index c091f8d..b45b722 100644
--- a/libc/kernel/uapi/linux/const.h
+++ b/libc/kernel/uapi/linux/const.h
@@ -18,6 +18,9 @@
 #define _ULL(x) (_AC(x, ULL))
 #define _BITUL(x) (_UL(1) << (x))
 #define _BITULL(x) (_ULL(1) << (x))
+#ifndef __ASSEMBLY__
+#define _BIT128(x) ((unsigned __int128) (1) << (x))
+#endif
 #define __ALIGN_KERNEL(x,a) __ALIGN_KERNEL_MASK(x, (__typeof__(x)) (a) - 1)
 #define __ALIGN_KERNEL_MASK(x,mask) (((x) + (mask)) & ~(mask))
 #define __KERNEL_DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
diff --git a/libc/kernel/uapi/linux/dpll.h b/libc/kernel/uapi/linux/dpll.h
index dd692f6..7d6182b 100644
--- a/libc/kernel/uapi/linux/dpll.h
+++ b/libc/kernel/uapi/linux/dpll.h
@@ -108,6 +108,9 @@
   DPLL_A_PIN_PHASE_ADJUST,
   DPLL_A_PIN_PHASE_OFFSET,
   DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET,
+  DPLL_A_PIN_ESYNC_FREQUENCY,
+  DPLL_A_PIN_ESYNC_FREQUENCY_SUPPORTED,
+  DPLL_A_PIN_ESYNC_PULSE,
   __DPLL_A_PIN_MAX,
   DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1)
 };
diff --git a/libc/kernel/uapi/linux/elf.h b/libc/kernel/uapi/linux/elf.h
index f1cf522..ea40103 100644
--- a/libc/kernel/uapi/linux/elf.h
+++ b/libc/kernel/uapi/linux/elf.h
@@ -329,6 +329,7 @@
 #define NT_386_IOPERM 0x201
 #define NT_X86_XSTATE 0x202
 #define NT_X86_SHSTK 0x204
+#define NT_X86_XSAVE_LAYOUT 0x205
 #define NT_S390_HIGH_GPRS 0x300
 #define NT_S390_TIMER 0x301
 #define NT_S390_TODCMP 0x302
@@ -359,6 +360,7 @@
 #define NT_ARM_ZA 0x40c
 #define NT_ARM_ZT 0x40d
 #define NT_ARM_FPMR 0x40e
+#define NT_ARM_POE 0x40f
 #define NT_ARC_V2 0x600
 #define NT_VMCOREDD 0x700
 #define NT_MIPS_DSP 0x800
diff --git a/libc/kernel/uapi/linux/ethtool.h b/libc/kernel/uapi/linux/ethtool.h
index e213ba1..323c4fc 100644
--- a/libc/kernel/uapi/linux/ethtool.h
+++ b/libc/kernel/uapi/linux/ethtool.h
@@ -1026,4 +1026,8 @@
   __u32 reserved[7];
   __u32 link_mode_masks[];
 };
+enum phy_upstream {
+  PHY_UPSTREAM_MAC,
+  PHY_UPSTREAM_PHY,
+};
 #endif
diff --git a/libc/kernel/uapi/linux/ethtool_netlink.h b/libc/kernel/uapi/linux/ethtool_netlink.h
index ac6391a..7120c03 100644
--- a/libc/kernel/uapi/linux/ethtool_netlink.h
+++ b/libc/kernel/uapi/linux/ethtool_netlink.h
@@ -53,6 +53,7 @@
   ETHTOOL_MSG_MM_GET,
   ETHTOOL_MSG_MM_SET,
   ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
+  ETHTOOL_MSG_PHY_GET,
   __ETHTOOL_MSG_USER_CNT,
   ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1
 };
@@ -102,6 +103,8 @@
   ETHTOOL_MSG_MM_GET_REPLY,
   ETHTOOL_MSG_MM_NTF,
   ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
+  ETHTOOL_MSG_PHY_GET_REPLY,
+  ETHTOOL_MSG_PHY_NTF,
   __ETHTOOL_MSG_KERNEL_CNT,
   ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1
 };
@@ -116,6 +119,7 @@
   ETHTOOL_A_HEADER_DEV_INDEX,
   ETHTOOL_A_HEADER_DEV_NAME,
   ETHTOOL_A_HEADER_FLAGS,
+  ETHTOOL_A_HEADER_PHY_INDEX,
   __ETHTOOL_A_HEADER_CNT,
   ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1
 };
@@ -408,6 +412,8 @@
   ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT,
   ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT,
   ETHTOOL_A_CABLE_RESULT_CODE_IMPEDANCE_MISMATCH,
+  ETHTOOL_A_CABLE_RESULT_CODE_NOISE,
+  ETHTOOL_A_CABLE_RESULT_CODE_RESOLUTION_NOT_POSSIBLE,
 };
 enum {
   ETHTOOL_A_CABLE_PAIR_A,
@@ -416,9 +422,15 @@
   ETHTOOL_A_CABLE_PAIR_D,
 };
 enum {
+  ETHTOOL_A_CABLE_INF_SRC_UNSPEC,
+  ETHTOOL_A_CABLE_INF_SRC_TDR,
+  ETHTOOL_A_CABLE_INF_SRC_ALCD,
+};
+enum {
   ETHTOOL_A_CABLE_RESULT_UNSPEC,
   ETHTOOL_A_CABLE_RESULT_PAIR,
   ETHTOOL_A_CABLE_RESULT_CODE,
+  ETHTOOL_A_CABLE_RESULT_SRC,
   __ETHTOOL_A_CABLE_RESULT_CNT,
   ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1)
 };
@@ -426,6 +438,7 @@
   ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC,
   ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR,
   ETHTOOL_A_CABLE_FAULT_LENGTH_CM,
+  ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,
   __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT,
   ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1)
 };
@@ -683,6 +696,7 @@
   ETHTOOL_A_RSS_INDIR,
   ETHTOOL_A_RSS_HKEY,
   ETHTOOL_A_RSS_INPUT_XFRM,
+  ETHTOOL_A_RSS_START_CONTEXT,
   __ETHTOOL_A_RSS_CNT,
   ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1),
 };
@@ -740,6 +754,19 @@
   __ETHTOOL_A_MODULE_FW_FLASH_CNT,
   ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
 };
+enum {
+  ETHTOOL_A_PHY_UNSPEC,
+  ETHTOOL_A_PHY_HEADER,
+  ETHTOOL_A_PHY_INDEX,
+  ETHTOOL_A_PHY_DRVNAME,
+  ETHTOOL_A_PHY_NAME,
+  ETHTOOL_A_PHY_UPSTREAM_TYPE,
+  ETHTOOL_A_PHY_UPSTREAM_INDEX,
+  ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,
+  ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,
+  __ETHTOOL_A_PHY_CNT,
+  ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1)
+};
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
 #define ETHTOOL_MCGRP_MONITOR_NAME "monitor"
diff --git a/libc/kernel/uapi/linux/exfat.h b/libc/kernel/uapi/linux/exfat.h
new file mode 100644
index 0000000..b813581
--- /dev/null
+++ b/libc/kernel/uapi/linux/exfat.h
@@ -0,0 +1,15 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef _UAPI_LINUX_EXFAT_H
+#define _UAPI_LINUX_EXFAT_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#define EXFAT_IOC_SHUTDOWN _IOR('X', 125, __u32)
+#define EXFAT_GOING_DOWN_DEFAULT 0x0
+#define EXFAT_GOING_DOWN_FULLSYNC 0x1
+#define EXFAT_GOING_DOWN_NOSYNC 0x2
+#endif
diff --git a/libc/kernel/uapi/linux/falloc.h b/libc/kernel/uapi/linux/falloc.h
index cca488e..cd7017e 100644
--- a/libc/kernel/uapi/linux/falloc.h
+++ b/libc/kernel/uapi/linux/falloc.h
@@ -6,6 +6,7 @@
  */
 #ifndef _UAPI_FALLOC_H_
 #define _UAPI_FALLOC_H_
+#define FALLOC_FL_ALLOCATE_RANGE 0x00
 #define FALLOC_FL_KEEP_SIZE 0x01
 #define FALLOC_FL_PUNCH_HOLE 0x02
 #define FALLOC_FL_NO_HIDE_STALE 0x04
diff --git a/libc/kernel/uapi/linux/fcntl.h b/libc/kernel/uapi/linux/fcntl.h
index 9f32f9f..22ca65d 100644
--- a/libc/kernel/uapi/linux/fcntl.h
+++ b/libc/kernel/uapi/linux/fcntl.h
@@ -12,6 +12,7 @@
 #define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
 #define F_NOTIFY (F_LINUX_SPECIFIC_BASE + 2)
 #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
+#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
 #define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
 #define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
 #define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
@@ -44,8 +45,6 @@
 #define DN_MULTISHOT 0x80000000
 #define AT_FDCWD - 100
 #define AT_SYMLINK_NOFOLLOW 0x100
-#define AT_EACCESS 0x200
-#define AT_REMOVEDIR 0x200
 #define AT_SYMLINK_FOLLOW 0x400
 #define AT_NO_AUTOMOUNT 0x800
 #define AT_EMPTY_PATH 0x1000
@@ -54,5 +53,11 @@
 #define AT_STATX_FORCE_SYNC 0x2000
 #define AT_STATX_DONT_SYNC 0x4000
 #define AT_RECURSIVE 0x8000
-#define AT_HANDLE_FID AT_REMOVEDIR
+#define AT_RENAME_NOREPLACE 0x0001
+#define AT_RENAME_EXCHANGE 0x0002
+#define AT_RENAME_WHITEOUT 0x0004
+#define AT_EACCESS 0x200
+#define AT_REMOVEDIR 0x200
+#define AT_HANDLE_FID 0x200
+#define AT_HANDLE_MNT_ID_UNIQUE 0x001
 #endif
diff --git a/libc/kernel/uapi/linux/fib_rules.h b/libc/kernel/uapi/linux/fib_rules.h
index ee9cabc..339ccce 100644
--- a/libc/kernel/uapi/linux/fib_rules.h
+++ b/libc/kernel/uapi/linux/fib_rules.h
@@ -61,6 +61,7 @@
   FRA_IP_PROTO,
   FRA_SPORT_RANGE,
   FRA_DPORT_RANGE,
+  FRA_DSCP,
   __FRA_MAX
 };
 #define FRA_MAX (__FRA_MAX - 1)
diff --git a/libc/kernel/uapi/linux/fuse.h b/libc/kernel/uapi/linux/fuse.h
index 4ac2d2c..c1d64ce 100644
--- a/libc/kernel/uapi/linux/fuse.h
+++ b/libc/kernel/uapi/linux/fuse.h
@@ -8,7 +8,7 @@
 #define _LINUX_FUSE_H
 #include <stdint.h>
 #define FUSE_KERNEL_VERSION 7
-#define FUSE_KERNEL_MINOR_VERSION 40
+#define FUSE_KERNEL_MINOR_VERSION 41
 #define FUSE_ROOT_ID 1
 struct fuse_attr {
   uint64_t ino;
@@ -135,6 +135,7 @@
 #define FUSE_NO_EXPORT_SUPPORT (1ULL << 38)
 #define FUSE_HAS_RESEND (1ULL << 39)
 #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
+#define FUSE_ALLOW_IDMAP (1ULL << 40)
 #define CUSE_UNRESTRICTED_IOCTL (1 << 0)
 #define FUSE_RELEASE_FLUSH (1 << 0)
 #define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1)
@@ -214,6 +215,7 @@
   FUSE_SYNCFS = 50,
   FUSE_TMPFILE = 51,
   FUSE_STATX = 52,
+  FUSE_CANONICAL_PATH = 2016,
   CUSE_INIT = 4096,
   CUSE_INIT_BSWAP_RESERVED = 1048576,
   FUSE_INIT_BSWAP_RESERVED = 436207616,
@@ -497,6 +499,7 @@
   uint32_t padding;
 };
 #define FUSE_UNIQUE_RESEND (1ULL << 63)
+#define FUSE_INVALID_UIDGID ((uint32_t) (- 1))
 struct fuse_in_header {
   uint32_t len;
   uint32_t opcode;
diff --git a/libc/kernel/uapi/linux/hidraw.h b/libc/kernel/uapi/linux/hidraw.h
index 25a9a17..1eb024c 100644
--- a/libc/kernel/uapi/linux/hidraw.h
+++ b/libc/kernel/uapi/linux/hidraw.h
@@ -29,6 +29,7 @@
 #define HIDIOCGINPUT(len) _IOC(_IOC_WRITE | _IOC_READ, 'H', 0x0A, len)
 #define HIDIOCSOUTPUT(len) _IOC(_IOC_WRITE | _IOC_READ, 'H', 0x0B, len)
 #define HIDIOCGOUTPUT(len) _IOC(_IOC_WRITE | _IOC_READ, 'H', 0x0C, len)
+#define HIDIOCREVOKE _IOW('H', 0x0D, int)
 #define HIDRAW_FIRST_MINOR 0
 #define HIDRAW_MAX_DEVICES 64
 #define HIDRAW_BUFFER_SIZE 64
diff --git a/libc/kernel/uapi/linux/io_uring.h b/libc/kernel/uapi/linux/io_uring.h
index 6b4f2ea..5564bff 100644
--- a/libc/kernel/uapi/linux/io_uring.h
+++ b/libc/kernel/uapi/linux/io_uring.h
@@ -230,6 +230,7 @@
 #define IORING_CQE_F_MORE (1U << 1)
 #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
 #define IORING_CQE_F_NOTIF (1U << 3)
+#define IORING_CQE_F_BUF_MORE (1U << 4)
 #define IORING_CQE_BUFFER_SHIFT 16
 #define IORING_OFF_SQ_RING 0ULL
 #define IORING_OFF_CQ_RING 0x8000000ULL
@@ -268,6 +269,7 @@
 #define IORING_ENTER_SQ_WAIT (1U << 2)
 #define IORING_ENTER_EXT_ARG (1U << 3)
 #define IORING_ENTER_REGISTERED_RING (1U << 4)
+#define IORING_ENTER_ABS_TIMER (1U << 5)
 struct io_uring_params {
   __u32 sq_entries;
   __u32 cq_entries;
@@ -295,6 +297,7 @@
 #define IORING_FEAT_LINKED_FILE (1U << 12)
 #define IORING_FEAT_REG_REG_RING (1U << 13)
 #define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
+#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
 enum io_uring_register_op {
   IORING_REGISTER_BUFFERS = 0,
   IORING_UNREGISTER_BUFFERS = 1,
@@ -325,6 +328,8 @@
   IORING_REGISTER_PBUF_STATUS = 26,
   IORING_REGISTER_NAPI = 27,
   IORING_UNREGISTER_NAPI = 28,
+  IORING_REGISTER_CLOCK = 29,
+  IORING_REGISTER_CLONE_BUFFERS = 30,
   IORING_REGISTER_LAST,
   IORING_REGISTER_USE_REGISTERED_RING = 1U << 31
 };
@@ -383,6 +388,18 @@
   __u8 resv;
   __u32 resv2[3];
 };
+struct io_uring_clock_register {
+  __u32 clockid;
+  __u32 __resv[3];
+};
+enum {
+  IORING_REGISTER_SRC_REGISTERED = 1,
+};
+struct io_uring_clone_buffers {
+  __u32 src_fd;
+  __u32 flags;
+  __u32 pad[6];
+};
 struct io_uring_buf {
   __u64 addr;
   __u32 len;
@@ -402,6 +419,7 @@
 };
 enum io_uring_register_pbuf_ring_flags {
   IOU_PBUF_RING_MMAP = 1,
+  IOU_PBUF_RING_INC = 2,
 };
 struct io_uring_buf_reg {
   __u64 ring_addr;
@@ -431,7 +449,7 @@
 struct io_uring_getevents_arg {
   __u64 sigmask;
   __u32 sigmask_sz;
-  __u32 pad;
+  __u32 min_wait_usec;
   __u64 ts;
 };
 struct io_uring_sync_cancel_reg {
diff --git a/libc/kernel/uapi/linux/ioam6_iptunnel.h b/libc/kernel/uapi/linux/ioam6_iptunnel.h
index 34317fc..e1a0223 100644
--- a/libc/kernel/uapi/linux/ioam6_iptunnel.h
+++ b/libc/kernel/uapi/linux/ioam6_iptunnel.h
@@ -24,6 +24,7 @@
 #define IOAM6_IPTUNNEL_FREQ_MAX 1000000
   IOAM6_IPTUNNEL_FREQ_K,
   IOAM6_IPTUNNEL_FREQ_N,
+  IOAM6_IPTUNNEL_SRC,
   __IOAM6_IPTUNNEL_MAX,
 };
 #define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1)
diff --git a/libc/kernel/uapi/linux/iommufd.h b/libc/kernel/uapi/linux/iommufd.h
index 6f663b4..3bbcd40c6 100644
--- a/libc/kernel/uapi/linux/iommufd.h
+++ b/libc/kernel/uapi/linux/iommufd.h
@@ -6,8 +6,8 @@
  */
 #ifndef _UAPI_IOMMUFD_H
 #define _UAPI_IOMMUFD_H
-#include <linux/types.h>
 #include <linux/ioctl.h>
+#include <linux/types.h>
 #define IOMMUFD_TYPE (';')
 enum {
   IOMMUFD_CMD_BASE = 0x80,
diff --git a/libc/kernel/uapi/linux/kfd_ioctl.h b/libc/kernel/uapi/linux/kfd_ioctl.h
index 193dd8e..8948a13 100644
--- a/libc/kernel/uapi/linux/kfd_ioctl.h
+++ b/libc/kernel/uapi/linux/kfd_ioctl.h
@@ -9,7 +9,7 @@
 #include <drm/drm.h>
 #include <linux/ioctl.h>
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 16
+#define KFD_IOCTL_MINOR_VERSION 17
 struct kfd_ioctl_get_version_args {
   __u32 major_version;
   __u32 minor_version;
@@ -18,6 +18,7 @@
 #define KFD_IOC_QUEUE_TYPE_SDMA 0x1
 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3
+#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4
 #define KFD_MAX_QUEUE_PERCENTAGE 100
 #define KFD_MAX_QUEUE_PRIORITY 15
 struct kfd_ioctl_create_queue_args {
@@ -36,6 +37,8 @@
   __u64 ctx_save_restore_address;
   __u32 ctx_save_restore_size;
   __u32 ctl_stack_size;
+  __u32 sdma_engine_id;
+  __u32 pad;
 };
 struct kfd_ioctl_destroy_queue_args {
   __u32 queue_id;
@@ -358,6 +361,16 @@
   __u32 gpuid;
   __u32 anon_fd;
 };
+#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num,reset_cause) "%x %s\n", (reset_seq_num), (reset_cause)
+#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask,counter) "%llx:%llx\n", (bitmask), (counter)
+#define KFD_EVENT_FMT_VMFAULT(pid,task_name) "%x:%s\n", (pid), (task_name)
+#define KFD_EVENT_FMT_PAGEFAULT_START(ns,pid,addr,node,rw) "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw)
+#define KFD_EVENT_FMT_PAGEFAULT_END(ns,pid,addr,node,migrate_update) "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update)
+#define KFD_EVENT_FMT_MIGRATE_START(ns,pid,start,size,from,to,prefetch_loc,preferred_loc,migrate_trigger) "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size), (from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
+#define KFD_EVENT_FMT_MIGRATE_END(ns,pid,start,size,from,to,migrate_trigger) "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size), (from), (to), (migrate_trigger)
+#define KFD_EVENT_FMT_QUEUE_EVICTION(ns,pid,node,evict_trigger) "%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)
+#define KFD_EVENT_FMT_QUEUE_RESTORE(ns,pid,node,rescheduled) "%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled)
+#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns,pid,addr,size,node,unmap_trigger) "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size), (node), (unmap_trigger)
 enum kfd_criu_op {
   KFD_CRIU_OP_PROCESS_INFO,
   KFD_CRIU_OP_CHECKPOINT,
diff --git a/libc/kernel/uapi/linux/landlock.h b/libc/kernel/uapi/linux/landlock.h
index f903ae6..8f83780 100644
--- a/libc/kernel/uapi/linux/landlock.h
+++ b/libc/kernel/uapi/linux/landlock.h
@@ -10,6 +10,7 @@
 struct landlock_ruleset_attr {
   __u64 handled_access_fs;
   __u64 handled_access_net;
+  __u64 scoped;
 };
 #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0)
 enum landlock_rule_type {
@@ -42,4 +43,6 @@
 #define LANDLOCK_ACCESS_FS_IOCTL_DEV (1ULL << 15)
 #define LANDLOCK_ACCESS_NET_BIND_TCP (1ULL << 0)
 #define LANDLOCK_ACCESS_NET_CONNECT_TCP (1ULL << 1)
+#define LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET (1ULL << 0)
+#define LANDLOCK_SCOPE_SIGNAL (1ULL << 1)
 #endif
diff --git a/libc/kernel/uapi/linux/libc-compat.h b/libc/kernel/uapi/linux/libc-compat.h
index 289b7c5..0b5ba60 100644
--- a/libc/kernel/uapi/linux/libc-compat.h
+++ b/libc/kernel/uapi/linux/libc-compat.h
@@ -59,19 +59,6 @@
 #define __UAPI_DEF_IN6_PKTINFO 1
 #define __UAPI_DEF_IP6_MTUINFO 1
 #endif
-#ifdef __NETIPX_IPX_H
-#define __UAPI_DEF_SOCKADDR_IPX 0
-#define __UAPI_DEF_IPX_ROUTE_DEFINITION 0
-#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 0
-#define __UAPI_DEF_IPX_CONFIG_DATA 0
-#define __UAPI_DEF_IPX_ROUTE_DEF 0
-#else
-#define __UAPI_DEF_SOCKADDR_IPX 1
-#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1
-#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1
-#define __UAPI_DEF_IPX_CONFIG_DATA 1
-#define __UAPI_DEF_IPX_ROUTE_DEF 1
-#endif
 #ifdef _SYS_XATTR_H
 #define __UAPI_DEF_XATTR 0
 #else
@@ -138,21 +125,6 @@
 #ifndef __UAPI_DEF_IP6_MTUINFO
 #define __UAPI_DEF_IP6_MTUINFO 1
 #endif
-#ifndef __UAPI_DEF_SOCKADDR_IPX
-#define __UAPI_DEF_SOCKADDR_IPX 1
-#endif
-#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION
-#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1
-#endif
-#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION
-#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1
-#endif
-#ifndef __UAPI_DEF_IPX_CONFIG_DATA
-#define __UAPI_DEF_IPX_CONFIG_DATA 1
-#endif
-#ifndef __UAPI_DEF_IPX_ROUTE_DEF
-#define __UAPI_DEF_IPX_ROUTE_DEF 1
-#endif
 #ifndef __UAPI_DEF_XATTR
 #define __UAPI_DEF_XATTR 1
 #endif
diff --git a/libc/kernel/uapi/linux/lsm.h b/libc/kernel/uapi/linux/lsm.h
index 3a3f152..b12ca64 100644
--- a/libc/kernel/uapi/linux/lsm.h
+++ b/libc/kernel/uapi/linux/lsm.h
@@ -30,6 +30,7 @@
 #define LSM_ID_LANDLOCK 110
 #define LSM_ID_IMA 111
 #define LSM_ID_EVM 112
+#define LSM_ID_IPE 113
 #define LSM_ATTR_UNDEF 0
 #define LSM_ATTR_CURRENT 100
 #define LSM_ATTR_EXEC 101
diff --git a/libc/kernel/uapi/linux/mdio.h b/libc/kernel/uapi/linux/mdio.h
index 7b51b73..7a4d4db 100644
--- a/libc/kernel/uapi/linux/mdio.h
+++ b/libc/kernel/uapi/linux/mdio.h
@@ -15,6 +15,7 @@
 #define MDIO_MMD_DTEXS 5
 #define MDIO_MMD_TC 6
 #define MDIO_MMD_AN 7
+#define MDIO_MMD_POWER_UNIT 13
 #define MDIO_MMD_C22EXT 29
 #define MDIO_MMD_VEND1 30
 #define MDIO_MMD_VEND2 31
diff --git a/libc/kernel/uapi/linux/nbd.h b/libc/kernel/uapi/linux/nbd.h
index d47c28f..110220f 100644
--- a/libc/kernel/uapi/linux/nbd.h
+++ b/libc/kernel/uapi/linux/nbd.h
@@ -23,15 +23,19 @@
   NBD_CMD_WRITE = 1,
   NBD_CMD_DISC = 2,
   NBD_CMD_FLUSH = 3,
-  NBD_CMD_TRIM = 4
+  NBD_CMD_TRIM = 4,
+  NBD_CMD_WRITE_ZEROES = 6,
 };
 #define NBD_FLAG_HAS_FLAGS (1 << 0)
 #define NBD_FLAG_READ_ONLY (1 << 1)
 #define NBD_FLAG_SEND_FLUSH (1 << 2)
 #define NBD_FLAG_SEND_FUA (1 << 3)
+#define NBD_FLAG_ROTATIONAL (1 << 4)
 #define NBD_FLAG_SEND_TRIM (1 << 5)
+#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6)
 #define NBD_FLAG_CAN_MULTI_CONN (1 << 8)
 #define NBD_CMD_FLAG_FUA (1 << 16)
+#define NBD_CMD_FLAG_NO_HOLE (1 << 17)
 #define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0)
 #define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1)
 #define NBD_REQUEST_MAGIC 0x25609513
diff --git a/libc/kernel/uapi/linux/net_tstamp.h b/libc/kernel/uapi/linux/net_tstamp.h
index 9bbd309..b0df344 100644
--- a/libc/kernel/uapi/linux/net_tstamp.h
+++ b/libc/kernel/uapi/linux/net_tstamp.h
@@ -26,7 +26,8 @@
   SOF_TIMESTAMPING_OPT_TX_SWHW = (1 << 14),
   SOF_TIMESTAMPING_BIND_PHC = (1 << 15),
   SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16),
-  SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP,
+  SOF_TIMESTAMPING_OPT_RX_FILTER = (1 << 17),
+  SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_RX_FILTER,
   SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST
 };
 #define SOF_TIMESTAMPING_TX_RECORD_MASK (SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_ACK)
diff --git a/libc/kernel/uapi/linux/netdev.h b/libc/kernel/uapi/linux/netdev.h
index b084297..a7c5706 100644
--- a/libc/kernel/uapi/linux/netdev.h
+++ b/libc/kernel/uapi/linux/netdev.h
@@ -51,6 +51,7 @@
   NETDEV_A_PAGE_POOL_INFLIGHT,
   NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
   NETDEV_A_PAGE_POOL_DETACH_TIME,
+  NETDEV_A_PAGE_POOL_DMABUF,
   __NETDEV_A_PAGE_POOL_MAX,
   NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
 };
@@ -83,6 +84,7 @@
   NETDEV_A_QUEUE_IFINDEX,
   NETDEV_A_QUEUE_TYPE,
   NETDEV_A_QUEUE_NAPI_ID,
+  NETDEV_A_QUEUE_DMABUF,
   __NETDEV_A_QUEUE_MAX,
   NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
 };
@@ -122,6 +124,14 @@
   NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
 };
 enum {
+  NETDEV_A_DMABUF_IFINDEX = 1,
+  NETDEV_A_DMABUF_QUEUES,
+  NETDEV_A_DMABUF_FD,
+  NETDEV_A_DMABUF_ID,
+  __NETDEV_A_DMABUF_MAX,
+  NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+enum {
   NETDEV_CMD_DEV_GET = 1,
   NETDEV_CMD_DEV_ADD_NTF,
   NETDEV_CMD_DEV_DEL_NTF,
@@ -134,6 +144,7 @@
   NETDEV_CMD_QUEUE_GET,
   NETDEV_CMD_NAPI_GET,
   NETDEV_CMD_QSTATS_GET,
+  NETDEV_CMD_BIND_RX,
   __NETDEV_CMD_MAX,
   NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
 };
diff --git a/libc/kernel/uapi/linux/nexthop.h b/libc/kernel/uapi/linux/nexthop.h
index 5726a66..2443c18 100644
--- a/libc/kernel/uapi/linux/nexthop.h
+++ b/libc/kernel/uapi/linux/nexthop.h
@@ -17,7 +17,7 @@
 struct nexthop_grp {
   __u32 id;
   __u8 weight;
-  __u8 resvd1;
+  __u8 weight_high;
   __u16 resvd2;
 };
 enum {
@@ -28,6 +28,7 @@
 #define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
 #define NHA_OP_FLAG_DUMP_STATS BIT(0)
 #define NHA_OP_FLAG_DUMP_HW_STATS BIT(1)
+#define NHA_OP_FLAG_RESP_GRP_RESVD_0 BIT(31)
 enum {
   NHA_UNSPEC,
   NHA_ID,
diff --git a/libc/kernel/uapi/linux/nsfs.h b/libc/kernel/uapi/linux/nsfs.h
index c8f2208..870afe7 100644
--- a/libc/kernel/uapi/linux/nsfs.h
+++ b/libc/kernel/uapi/linux/nsfs.h
@@ -18,4 +18,13 @@
 #define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
 #define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
 #define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
+struct mnt_ns_info {
+  __u32 size;
+  __u32 nr_mounts;
+  __u64 mnt_ns_id;
+};
+#define MNT_NS_INFO_SIZE_VER0 16
+#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info)
+#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info)
+#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
 #endif
diff --git a/libc/kernel/uapi/linux/pci_regs.h b/libc/kernel/uapi/linux/pci_regs.h
index 703d398..7083391 100644
--- a/libc/kernel/uapi/linux/pci_regs.h
+++ b/libc/kernel/uapi/linux/pci_regs.h
@@ -531,9 +531,11 @@
 #define PCI_EXP_RTCTL_SENFEE 0x0002
 #define PCI_EXP_RTCTL_SEFEE 0x0004
 #define PCI_EXP_RTCTL_PMEIE 0x0008
-#define PCI_EXP_RTCTL_CRSSVE 0x0010
+#define PCI_EXP_RTCTL_RRS_SVE 0x0010
+#define PCI_EXP_RTCTL_CRSSVE PCI_EXP_RTCTL_RRS_SVE
 #define PCI_EXP_RTCAP 0x1e
-#define PCI_EXP_RTCAP_CRSVIS 0x0001
+#define PCI_EXP_RTCAP_RRS_SV 0x0001
+#define PCI_EXP_RTCAP_CRSVIS PCI_EXP_RTCAP_RRS_SV
 #define PCI_EXP_RTSTA 0x20
 #define PCI_EXP_RTSTA_PME_RQ_ID 0x0000ffff
 #define PCI_EXP_RTSTA_PME 0x00010000
@@ -626,6 +628,7 @@
 #define PCI_EXT_CAP_ID_DVSEC 0x23
 #define PCI_EXT_CAP_ID_DLF 0x25
 #define PCI_EXT_CAP_ID_PL_16GT 0x26
+#define PCI_EXT_CAP_ID_NPEM 0x29
 #define PCI_EXT_CAP_ID_PL_32GT 0x2A
 #define PCI_EXT_CAP_ID_DOE 0x2E
 #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE
@@ -944,6 +947,31 @@
 #define PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK 0x0000000F
 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0
 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4
+#define PCI_NPEM_CAP 0x04
+#define PCI_NPEM_CAP_CAPABLE 0x00000001
+#define PCI_NPEM_CTRL 0x08
+#define PCI_NPEM_CTRL_ENABLE 0x00000001
+#define PCI_NPEM_CMD_RESET 0x00000002
+#define PCI_NPEM_IND_OK 0x00000004
+#define PCI_NPEM_IND_LOCATE 0x00000008
+#define PCI_NPEM_IND_FAIL 0x00000010
+#define PCI_NPEM_IND_REBUILD 0x00000020
+#define PCI_NPEM_IND_PFA 0x00000040
+#define PCI_NPEM_IND_HOTSPARE 0x00000080
+#define PCI_NPEM_IND_ICA 0x00000100
+#define PCI_NPEM_IND_IFA 0x00000200
+#define PCI_NPEM_IND_IDT 0x00000400
+#define PCI_NPEM_IND_DISABLED 0x00000800
+#define PCI_NPEM_IND_SPEC_0 0x01000000
+#define PCI_NPEM_IND_SPEC_1 0x02000000
+#define PCI_NPEM_IND_SPEC_2 0x04000000
+#define PCI_NPEM_IND_SPEC_3 0x08000000
+#define PCI_NPEM_IND_SPEC_4 0x10000000
+#define PCI_NPEM_IND_SPEC_5 0x20000000
+#define PCI_NPEM_IND_SPEC_6 0x40000000
+#define PCI_NPEM_IND_SPEC_7 0x80000000
+#define PCI_NPEM_STATUS 0x0c
+#define PCI_NPEM_STATUS_CC 0x00000001
 #define PCI_DOE_CAP 0x04
 #define PCI_DOE_CAP_INT_SUP 0x00000001
 #define PCI_DOE_CAP_INT_MSG_NUM 0x00000ffe
diff --git a/libc/kernel/uapi/linux/pkt_cls.h b/libc/kernel/uapi/linux/pkt_cls.h
index bdca553..c5d8d79 100644
--- a/libc/kernel/uapi/linux/pkt_cls.h
+++ b/libc/kernel/uapi/linux/pkt_cls.h
@@ -179,7 +179,12 @@
   int offmask;
 };
 struct tc_u32_sel {
-  unsigned char flags;
+  /**
+   ** ANDROID FIX: Comment out TAG value to avoid C++ error about using
+   ** a type declared in an anonymous union. This is being fixed upstream
+   ** and should be corrected by the next kernel import.
+   */
+  __struct_group(/*tc_u32_sel_hdr*/, hdr,, unsigned char flags;
   unsigned char offshift;
   unsigned char nkeys;
   __be16 offmask;
@@ -187,6 +192,7 @@
   short offoff;
   short hoff;
   __be32 hmask;
+ );
   struct tc_u32_key keys[];
 };
 struct tc_u32_mark {
diff --git a/libc/kernel/uapi/linux/ptp_clock.h b/libc/kernel/uapi/linux/ptp_clock.h
index 5014936..88c6786 100644
--- a/libc/kernel/uapi/linux/ptp_clock.h
+++ b/libc/kernel/uapi/linux/ptp_clock.h
@@ -65,7 +65,8 @@
 };
 struct ptp_sys_offset_extended {
   unsigned int n_samples;
-  unsigned int rsv[3];
+  __kernel_clockid_t clockid;
+  unsigned int rsv[2];
   struct ptp_clock_time ts[PTP_MAX_SAMPLES][3];
 };
 struct ptp_sys_offset_precise {
diff --git a/libc/kernel/uapi/linux/rkisp1-config.h b/libc/kernel/uapi/linux/rkisp1-config.h
index d4206a0..d5cf92a 100644
--- a/libc/kernel/uapi/linux/rkisp1-config.h
+++ b/libc/kernel/uapi/linux/rkisp1-config.h
@@ -88,6 +88,7 @@
 #define RKISP1_CIF_ISP_DPCC_RND_OFFS_n_RB(n,v) ((v) << ((n) * 4 + 2))
 #define RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS 17
 #define RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS 6
+#define RKISP1_CIF_ISP_COMPAND_NUM_POINTS 64
 #define RKISP1_CIF_ISP_STAT_AWB (1U << 0)
 #define RKISP1_CIF_ISP_STAT_AUTOEXP (1U << 1)
 #define RKISP1_CIF_ISP_STAT_AFM (1U << 2)
@@ -348,6 +349,17 @@
   struct rkisp1_cif_isp_isp_meas_cfg meas;
   struct rkisp1_cif_isp_isp_other_cfg others;
 };
+struct rkisp1_cif_isp_compand_bls_config {
+  __u32 r;
+  __u32 gr;
+  __u32 gb;
+  __u32 b;
+};
+struct rkisp1_cif_isp_compand_curve_config {
+  __u8 px[RKISP1_CIF_ISP_COMPAND_NUM_POINTS];
+  __u32 x[RKISP1_CIF_ISP_COMPAND_NUM_POINTS];
+  __u32 y[RKISP1_CIF_ISP_COMPAND_NUM_POINTS];
+};
 struct rkisp1_cif_isp_awb_meas {
   __u32 cnt;
   __u8 mean_y_or_g;
@@ -388,4 +400,118 @@
   __u32 frame_id;
   struct rkisp1_cif_isp_stat params;
 };
+enum rkisp1_ext_params_block_type {
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_BLS,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_DPCC,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_SDG,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_GAIN,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_FLT,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_BDM,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_CTK,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_GOC,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_DPF_STRENGTH,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_CPROC,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_IE,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_LSC,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_AWB_MEAS,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_HST_MEAS,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_AEC_MEAS,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_AFC_MEAS,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_BLS,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_EXPAND,
+  RKISP1_EXT_PARAMS_BLOCK_TYPE_COMPAND_COMPRESS,
+};
+#define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE (1U << 0)
+#define RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE (1U << 1)
+struct rkisp1_ext_params_block_header {
+  __u16 type;
+  __u16 flags;
+  __u32 size;
+};
+struct rkisp1_ext_params_bls_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_bls_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_dpcc_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_dpcc_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_sdg_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_sdg_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_lsc_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_lsc_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_awb_gain_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_awb_gain_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_flt_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_flt_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_bdm_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_bdm_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_ctk_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_ctk_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_goc_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_goc_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_dpf_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_dpf_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_dpf_strength_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_dpf_strength_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_cproc_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_cproc_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_ie_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_ie_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_awb_meas_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_awb_meas_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_hst_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_hst_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_aec_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_aec_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_afc_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_afc_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_compand_bls_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_compand_bls_config config;
+} __attribute__((aligned(8)));
+struct rkisp1_ext_params_compand_curve_config {
+  struct rkisp1_ext_params_block_header header;
+  struct rkisp1_cif_isp_compand_curve_config config;
+} __attribute__((aligned(8)));
+#define RKISP1_EXT_PARAMS_MAX_SIZE (sizeof(struct rkisp1_ext_params_bls_config) + sizeof(struct rkisp1_ext_params_dpcc_config) + sizeof(struct rkisp1_ext_params_sdg_config) + sizeof(struct rkisp1_ext_params_lsc_config) + sizeof(struct rkisp1_ext_params_awb_gain_config) + sizeof(struct rkisp1_ext_params_flt_config) + sizeof(struct rkisp1_ext_params_bdm_config) + sizeof(struct rkisp1_ext_params_ctk_config) + sizeof(struct rkisp1_ext_params_goc_config) + sizeof(struct rkisp1_ext_params_dpf_config) + sizeof(struct rkisp1_ext_params_dpf_strength_config) + sizeof(struct rkisp1_ext_params_cproc_config) + sizeof(struct rkisp1_ext_params_ie_config) + sizeof(struct rkisp1_ext_params_awb_meas_config) + sizeof(struct rkisp1_ext_params_hst_config) + sizeof(struct rkisp1_ext_params_aec_config) + sizeof(struct rkisp1_ext_params_afc_config) + sizeof(struct rkisp1_ext_params_compand_bls_config) + sizeof(struct rkisp1_ext_params_compand_curve_config) + sizeof(struct rkisp1_ext_params_compand_curve_config))
+enum rksip1_ext_param_buffer_version {
+  RKISP1_EXT_PARAM_BUFFER_V1 = 1,
+};
+struct rkisp1_ext_params_cfg {
+  __u32 version;
+  __u32 data_size;
+  __u8 data[RKISP1_EXT_PARAMS_MAX_SIZE];
+};
 #endif
diff --git a/libc/kernel/uapi/linux/sched.h b/libc/kernel/uapi/linux/sched.h
index ae914f7..eaeeee3 100644
--- a/libc/kernel/uapi/linux/sched.h
+++ b/libc/kernel/uapi/linux/sched.h
@@ -59,6 +59,7 @@
 #define SCHED_BATCH 3
 #define SCHED_IDLE 5
 #define SCHED_DEADLINE 6
+#define SCHED_EXT 7
 #define SCHED_RESET_ON_FORK 0x40000000
 #define SCHED_FLAG_RESET_ON_FORK 0x01
 #define SCHED_FLAG_RECLAIM 0x02
diff --git a/libc/kernel/uapi/linux/serio.h b/libc/kernel/uapi/linux/serio.h
index 424144e..0f0668f 100644
--- a/libc/kernel/uapi/linux/serio.h
+++ b/libc/kernel/uapi/linux/serio.h
@@ -66,4 +66,5 @@
 #define SERIO_PULSE8_CEC 0x40
 #define SERIO_RAINSHADOW_CEC 0x41
 #define SERIO_FSIA6B 0x42
+#define SERIO_EXTRON_DA_HD_4K_PLUS 0x43
 #endif
diff --git a/libc/kernel/uapi/linux/smc.h b/libc/kernel/uapi/linux/smc.h
index 5e75fac..52a0da1 100644
--- a/libc/kernel/uapi/linux/smc.h
+++ b/libc/kernel/uapi/linux/smc.h
@@ -103,6 +103,8 @@
   SMC_NLA_LGR_R_NET_COOKIE,
   SMC_NLA_LGR_R_PAD,
   SMC_NLA_LGR_R_BUF_TYPE,
+  SMC_NLA_LGR_R_SNDBUF_ALLOC,
+  SMC_NLA_LGR_R_RMB_ALLOC,
   __SMC_NLA_LGR_R_MAX,
   SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
 };
@@ -134,6 +136,8 @@
   SMC_NLA_LGR_D_V2_COMMON,
   SMC_NLA_LGR_D_EXT_GID,
   SMC_NLA_LGR_D_PEER_EXT_GID,
+  SMC_NLA_LGR_D_SNDBUF_ALLOC,
+  SMC_NLA_LGR_D_DMB_ALLOC,
   __SMC_NLA_LGR_D_MAX,
   SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
 };
@@ -210,6 +214,8 @@
   SMC_NLA_STATS_T_TX_BYTES,
   SMC_NLA_STATS_T_RX_CNT,
   SMC_NLA_STATS_T_TX_CNT,
+  SMC_NLA_STATS_T_RX_RMB_USAGE,
+  SMC_NLA_STATS_T_TX_RMB_USAGE,
   __SMC_NLA_STATS_T_MAX,
   SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1
 };
diff --git a/libc/kernel/uapi/linux/spi/spi.h b/libc/kernel/uapi/linux/spi/spi.h
index 45c45cd..d0ead4b 100644
--- a/libc/kernel/uapi/linux/spi/spi.h
+++ b/libc/kernel/uapi/linux/spi/spi.h
@@ -30,5 +30,6 @@
 #define SPI_3WIRE_HIZ _BITUL(15)
 #define SPI_RX_CPHA_FLIP _BITUL(16)
 #define SPI_MOSI_IDLE_LOW _BITUL(17)
-#define SPI_MODE_USER_MASK (_BITUL(18) - 1)
+#define SPI_MOSI_IDLE_HIGH _BITUL(18)
+#define SPI_MODE_USER_MASK (_BITUL(19) - 1)
 #endif
diff --git a/libc/kernel/uapi/linux/swab.h b/libc/kernel/uapi/linux/swab.h
index 6225a76..5d240e1 100644
--- a/libc/kernel/uapi/linux/swab.h
+++ b/libc/kernel/uapi/linux/swab.h
@@ -15,29 +15,14 @@
 #define ___constant_swab64(x) ((__u64) ((((__u64) (x) & (__u64) 0x00000000000000ffULL) << 56) | (((__u64) (x) & (__u64) 0x000000000000ff00ULL) << 40) | (((__u64) (x) & (__u64) 0x0000000000ff0000ULL) << 24) | (((__u64) (x) & (__u64) 0x00000000ff000000ULL) << 8) | (((__u64) (x) & (__u64) 0x000000ff00000000ULL) >> 8) | (((__u64) (x) & (__u64) 0x0000ff0000000000ULL) >> 24) | (((__u64) (x) & (__u64) 0x00ff000000000000ULL) >> 40) | (((__u64) (x) & (__u64) 0xff00000000000000ULL) >> 56)))
 #define ___constant_swahw32(x) ((__u32) ((((__u32) (x) & (__u32) 0x0000ffffUL) << 16) | (((__u32) (x) & (__u32) 0xffff0000UL) >> 16)))
 #define ___constant_swahb32(x) ((__u32) ((((__u32) (x) & (__u32) 0x00ff00ffUL) << 8) | (((__u32) (x) & (__u32) 0xff00ff00UL) >> 8)))
-#ifdef __arch_swab16
-#else
-#endif
-#ifdef __arch_swab32
-#else
-#endif
-#ifdef __arch_swab64
-#elif defined(__SWAB_64_THRU_32__)
+#ifdef __SWAB_64_THRU_32__
 #else
 #endif
 static inline __attribute__((__const__)) __u32 __fswahw32(__u32 val) {
-#ifdef __arch_swahw32
-  return __arch_swahw32(val);
-#else
   return ___constant_swahw32(val);
-#endif
 }
 static inline __attribute__((__const__)) __u32 __fswahb32(__u32 val) {
-#ifdef __arch_swahb32
-  return __arch_swahb32(val);
-#else
   return ___constant_swahb32(val);
-#endif
 }
 #define __swab16(x) (__u16) __builtin_bswap16((__u16) (x))
 #define __swab32(x) (__u32) __builtin_bswap32((__u32) (x))
@@ -48,73 +33,33 @@
 #define __swahw32(x) (__builtin_constant_p((__u32) (x)) ? ___constant_swahw32(x) : __fswahw32(x))
 #define __swahb32(x) (__builtin_constant_p((__u32) (x)) ? ___constant_swahb32(x) : __fswahb32(x))
 static __always_inline __u16 __swab16p(const __u16 * p) {
-#ifdef __arch_swab16p
-  return __arch_swab16p(p);
-#else
   return __swab16(* p);
-#endif
 }
 static __always_inline __u32 __swab32p(const __u32 * p) {
-#ifdef __arch_swab32p
-  return __arch_swab32p(p);
-#else
   return __swab32(* p);
-#endif
 }
 static __always_inline __u64 __swab64p(const __u64 * p) {
-#ifdef __arch_swab64p
-  return __arch_swab64p(p);
-#else
   return __swab64(* p);
-#endif
 }
 static inline __u32 __swahw32p(const __u32 * p) {
-#ifdef __arch_swahw32p
-  return __arch_swahw32p(p);
-#else
   return __swahw32(* p);
-#endif
 }
 static inline __u32 __swahb32p(const __u32 * p) {
-#ifdef __arch_swahb32p
-  return __arch_swahb32p(p);
-#else
   return __swahb32(* p);
-#endif
 }
 static inline void __swab16s(__u16 * p) {
-#ifdef __arch_swab16s
-  __arch_swab16s(p);
-#else
   * p = __swab16p(p);
-#endif
 }
 static __always_inline void __swab32s(__u32 * p) {
-#ifdef __arch_swab32s
-  __arch_swab32s(p);
-#else
   * p = __swab32p(p);
-#endif
 }
 static __always_inline void __swab64s(__u64 * p) {
-#ifdef __arch_swab64s
-  __arch_swab64s(p);
-#else
   * p = __swab64p(p);
-#endif
 }
 static inline void __swahw32s(__u32 * p) {
-#ifdef __arch_swahw32s
-  __arch_swahw32s(p);
-#else
   * p = __swahw32p(p);
-#endif
 }
 static inline void __swahb32s(__u32 * p) {
-#ifdef __arch_swahb32s
-  __arch_swahb32s(p);
-#else
   * p = __swahb32p(p);
-#endif
 }
 #endif
diff --git a/libc/kernel/uapi/linux/uio.h b/libc/kernel/uapi/linux/uio.h
index 70d6962..3384309 100644
--- a/libc/kernel/uapi/linux/uio.h
+++ b/libc/kernel/uapi/linux/uio.h
@@ -12,6 +12,17 @@
   void  * iov_base;
   __kernel_size_t iov_len;
 };
+struct dmabuf_cmsg {
+  __u64 frag_offset;
+  __u32 frag_size;
+  __u32 frag_token;
+  __u32 dmabuf_id;
+  __u32 flags;
+};
+struct dmabuf_token {
+  __u32 token_start;
+  __u32 token_count;
+};
 #define UIO_FASTIOV 8
 #define UIO_MAXIOV 1024
 #endif
diff --git a/libc/kernel/uapi/linux/usb/ch9.h b/libc/kernel/uapi/linux/usb/ch9.h
index 6762773..c1121fb 100644
--- a/libc/kernel/uapi/linux/usb/ch9.h
+++ b/libc/kernel/uapi/linux/usb/ch9.h
@@ -119,6 +119,7 @@
 #define USB_DT_DEVICE_CAPABILITY 0x10
 #define USB_DT_WIRELESS_ENDPOINT_COMP 0x11
 #define USB_DT_WIRE_ADAPTER 0x21
+#define USB_DT_DFU_FUNCTIONAL 0x21
 #define USB_DT_RPIPE 0x22
 #define USB_DT_CS_RADIO_CONTROL 0x23
 #define USB_DT_PIPE_USAGE 0x24
@@ -170,6 +171,7 @@
 #define USB_CLASS_USB_TYPE_C_BRIDGE 0x12
 #define USB_CLASS_MISC 0xef
 #define USB_CLASS_APP_SPEC 0xfe
+#define USB_SUBCLASS_DFU 0x01
 #define USB_CLASS_VENDOR_SPEC 0xff
 #define USB_SUBCLASS_VENDOR_SPEC 0xff
 struct usb_config_descriptor {
diff --git a/libc/kernel/uapi/linux/usb/functionfs.h b/libc/kernel/uapi/linux/usb/functionfs.h
index 095e937..e838363 100644
--- a/libc/kernel/uapi/linux/usb/functionfs.h
+++ b/libc/kernel/uapi/linux/usb/functionfs.h
@@ -6,6 +6,7 @@
  */
 #ifndef _UAPI__LINUX_FUNCTIONFS_H__
 #define _UAPI__LINUX_FUNCTIONFS_H__
+#include <linux/const.h>
 #include <linux/types.h>
 #include <linux/ioctl.h>
 #include <linux/usb/ch9.h>
@@ -32,6 +33,18 @@
   __le16 wMaxPacketSize;
   __u8 bInterval;
 } __attribute__((packed));
+struct usb_dfu_functional_descriptor {
+  __u8 bLength;
+  __u8 bDescriptorType;
+  __u8 bmAttributes;
+  __le16 wDetachTimeOut;
+  __le16 wTransferSize;
+  __le16 bcdDFUVersion;
+} __attribute__((packed));
+#define DFU_FUNC_ATT_CAN_DOWNLOAD _BITUL(0)
+#define DFU_FUNC_ATT_CAN_UPLOAD _BITUL(1)
+#define DFU_FUNC_ATT_MANIFEST_TOLERANT _BITUL(2)
+#define DFU_FUNC_ATT_WILL_DETACH _BITUL(3)
 struct usb_functionfs_descs_head_v2 {
   __le32 magic;
   __le32 length;
diff --git a/libc/kernel/uapi/linux/usb/g_hid.h b/libc/kernel/uapi/linux/usb/g_hid.h
new file mode 100644
index 0000000..db50738
--- /dev/null
+++ b/libc/kernel/uapi/linux/usb/g_hid.h
@@ -0,0 +1,20 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef __UAPI_LINUX_USB_G_HID_H
+#define __UAPI_LINUX_USB_G_HID_H
+#include <linux/types.h>
+#define MAX_REPORT_LENGTH 64
+struct usb_hidg_report {
+  __u8 report_id;
+  __u8 userspace_req;
+  __u16 length;
+  __u8 data[MAX_REPORT_LENGTH];
+  __u8 padding[4];
+};
+#define GADGET_HID_READ_GET_REPORT_ID _IOR('g', 0x41, __u8)
+#define GADGET_HID_WRITE_GET_REPORT _IOW('g', 0x42, struct usb_hidg_report)
+#endif
diff --git a/libc/kernel/uapi/linux/vbox_vmmdev_types.h b/libc/kernel/uapi/linux/vbox_vmmdev_types.h
index 7123c02..cd0dcd9 100644
--- a/libc/kernel/uapi/linux/vbox_vmmdev_types.h
+++ b/libc/kernel/uapi/linux/vbox_vmmdev_types.h
@@ -177,6 +177,9 @@
   __u32 flags;
   __u16 offset_first_page;
   __u16 page_count;
-  __u64 pages[1];
+  union {
+    __u64 unused;
+    __DECLARE_FLEX_ARRAY(__u64, pages);
+  };
 };
 #endif
diff --git a/libc/kernel/uapi/linux/vdpa.h b/libc/kernel/uapi/linux/vdpa.h
index 462d579..a689f0d 100644
--- a/libc/kernel/uapi/linux/vdpa.h
+++ b/libc/kernel/uapi/linux/vdpa.h
@@ -17,6 +17,7 @@
   VDPA_CMD_DEV_GET,
   VDPA_CMD_DEV_CONFIG_GET,
   VDPA_CMD_DEV_VSTATS_GET,
+  VDPA_CMD_DEV_ATTR_SET,
 };
 enum vdpa_attr {
   VDPA_ATTR_UNSPEC,
diff --git a/libc/kernel/uapi/linux/version.h b/libc/kernel/uapi/linux/version.h
index 0cc45cf..728b80a 100644
--- a/libc/kernel/uapi/linux/version.h
+++ b/libc/kernel/uapi/linux/version.h
@@ -4,8 +4,8 @@
  * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
  * for more information.
  */
-#define LINUX_VERSION_CODE 396032
+#define LINUX_VERSION_CODE 396288
 #define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
 #define LINUX_VERSION_MAJOR 6
-#define LINUX_VERSION_PATCHLEVEL 11
+#define LINUX_VERSION_PATCHLEVEL 12
 #define LINUX_VERSION_SUBLEVEL 0
diff --git a/libc/kernel/uapi/linux/videodev2.h b/libc/kernel/uapi/linux/videodev2.h
index fb69a8a..e49f5ea 100644
--- a/libc/kernel/uapi/linux/videodev2.h
+++ b/libc/kernel/uapi/linux/videodev2.h
@@ -173,6 +173,7 @@
 #define V4L2_CAP_SDR_OUTPUT 0x00400000
 #define V4L2_CAP_META_CAPTURE 0x00800000
 #define V4L2_CAP_READWRITE 0x01000000
+#define V4L2_CAP_EDID 0x02000000
 #define V4L2_CAP_STREAMING 0x04000000
 #define V4L2_CAP_META_OUTPUT 0x08000000
 #define V4L2_CAP_TOUCH 0x10000000
@@ -456,6 +457,7 @@
 #define V4L2_META_FMT_VIVID v4l2_fourcc('V', 'I', 'V', 'D')
 #define V4L2_META_FMT_RK_ISP1_PARAMS v4l2_fourcc('R', 'K', '1', 'P')
 #define V4L2_META_FMT_RK_ISP1_STAT_3A v4l2_fourcc('R', 'K', '1', 'S')
+#define V4L2_META_FMT_RK_ISP1_EXT_PARAMS v4l2_fourcc('R', 'K', '1', 'E')
 #define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C')
 #define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe
 #define V4L2_PIX_FMT_FLAG_PREMUL_ALPHA 0x00000001
diff --git a/libc/kernel/uapi/linux/virtio_balloon.h b/libc/kernel/uapi/linux/virtio_balloon.h
index f37c148..3cf8807 100644
--- a/libc/kernel/uapi/linux/virtio_balloon.h
+++ b/libc/kernel/uapi/linux/virtio_balloon.h
@@ -38,8 +38,14 @@
 #define VIRTIO_BALLOON_S_CACHES 7
 #define VIRTIO_BALLOON_S_HTLB_PGALLOC 8
 #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9
-#define VIRTIO_BALLOON_S_NR 10
-#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { VIRTIO_BALLOON_S_NAMES_prefix "swap-in", VIRTIO_BALLOON_S_NAMES_prefix "swap-out", VIRTIO_BALLOON_S_NAMES_prefix "major-faults", VIRTIO_BALLOON_S_NAMES_prefix "minor-faults", VIRTIO_BALLOON_S_NAMES_prefix "free-memory", VIRTIO_BALLOON_S_NAMES_prefix "total-memory", VIRTIO_BALLOON_S_NAMES_prefix "available-memory", VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \
+#define VIRTIO_BALLOON_S_OOM_KILL 10
+#define VIRTIO_BALLOON_S_ALLOC_STALL 11
+#define VIRTIO_BALLOON_S_ASYNC_SCAN 12
+#define VIRTIO_BALLOON_S_DIRECT_SCAN 13
+#define VIRTIO_BALLOON_S_ASYNC_RECLAIM 14
+#define VIRTIO_BALLOON_S_DIRECT_RECLAIM 15
+#define VIRTIO_BALLOON_S_NR 16
+#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { VIRTIO_BALLOON_S_NAMES_prefix "swap-in", VIRTIO_BALLOON_S_NAMES_prefix "swap-out", VIRTIO_BALLOON_S_NAMES_prefix "major-faults", VIRTIO_BALLOON_S_NAMES_prefix "minor-faults", VIRTIO_BALLOON_S_NAMES_prefix "free-memory", VIRTIO_BALLOON_S_NAMES_prefix "total-memory", VIRTIO_BALLOON_S_NAMES_prefix "available-memory", VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls", VIRTIO_BALLOON_S_NAMES_prefix "async-scans", VIRTIO_BALLOON_S_NAMES_prefix "direct-scans", VIRTIO_BALLOON_S_NAMES_prefix "async-reclaims", VIRTIO_BALLOON_S_NAMES_prefix "direct-reclaims" \
 }
 #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
 struct virtio_balloon_stat {
diff --git a/libc/kernel/uapi/linux/virtio_gpu.h b/libc/kernel/uapi/linux/virtio_gpu.h
index c3f0fcc..bf35cf7 100644
--- a/libc/kernel/uapi/linux/virtio_gpu.h
+++ b/libc/kernel/uapi/linux/virtio_gpu.h
@@ -195,6 +195,7 @@
 #define VIRTIO_GPU_CAPSET_VIRGL 1
 #define VIRTIO_GPU_CAPSET_VIRGL2 2
 #define VIRTIO_GPU_CAPSET_VENUS 4
+#define VIRTIO_GPU_CAPSET_DRM 6
 struct virtio_gpu_get_capset_info {
   struct virtio_gpu_ctrl_hdr hdr;
   __le32 capset_index;
diff --git a/libc/kernel/uapi/rdma/bnxt_re-abi.h b/libc/kernel/uapi/rdma/bnxt_re-abi.h
index 50f8b8a..38bfb1b 100644
--- a/libc/kernel/uapi/rdma/bnxt_re-abi.h
+++ b/libc/kernel/uapi/rdma/bnxt_re-abi.h
@@ -27,6 +27,7 @@
 };
 enum {
   BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT = 0x01,
+  BNXT_RE_COMP_MASK_REQ_UCNTX_VAR_WQE_SUPPORT = 0x02,
 };
 struct bnxt_re_uctx_req {
   __aligned_u64 comp_mask;
@@ -66,10 +67,15 @@
 struct bnxt_re_resize_cq_req {
   __aligned_u64 cq_va;
 };
+enum bnxt_re_qp_mask {
+  BNXT_RE_QP_REQ_MASK_VAR_WQE_SQ_SLOTS = 0x1,
+};
 struct bnxt_re_qp_req {
   __aligned_u64 qpsva;
   __aligned_u64 qprva;
   __aligned_u64 qp_handle;
+  __aligned_u64 comp_mask;
+  __u32 sq_slots;
 };
 struct bnxt_re_qp_resp {
   __u32 qpid;
@@ -79,8 +85,13 @@
   __aligned_u64 srqva;
   __aligned_u64 srq_handle;
 };
+enum bnxt_re_srq_mask {
+  BNXT_RE_SRQ_TOGGLE_PAGE_SUPPORT = 0x1,
+};
 struct bnxt_re_srq_resp {
   __u32 srqid;
+  __u32 rsvd;
+  __aligned_u64 comp_mask;
 };
 enum bnxt_re_shpg_offt {
   BNXT_RE_BEG_RESV_OFFT = 0x00,
diff --git a/libc/kernel/uapi/rdma/mlx5_user_ioctl_cmds.h b/libc/kernel/uapi/rdma/mlx5_user_ioctl_cmds.h
index ebafb00..2e61c71 100644
--- a/libc/kernel/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/libc/kernel/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -204,6 +204,9 @@
 enum mlx5_ib_create_cq_attrs {
   MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW,
 };
+enum mlx5_ib_reg_dmabuf_mr_attrs {
+  MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS = (1U << UVERBS_ID_NS_SHIFT),
+};
 #define MLX5_IB_DW_MATCH_PARAM 0xA0
 struct mlx5_ib_match_params {
   __u32 match_params[MLX5_IB_DW_MATCH_PARAM];
@@ -261,9 +264,13 @@
 };
 enum mlx5_ib_device_methods {
   MLX5_IB_METHOD_QUERY_PORT = (1U << UVERBS_ID_NS_SHIFT),
+  MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH,
 };
 enum mlx5_ib_query_port_attrs {
   MLX5_IB_ATTR_QUERY_PORT_PORT_NUM = (1U << UVERBS_ID_NS_SHIFT),
   MLX5_IB_ATTR_QUERY_PORT,
 };
+enum mlx5_ib_get_data_direct_sysfs_path_attrs {
+  MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH = (1U << UVERBS_ID_NS_SHIFT),
+};
 #endif
diff --git a/libc/kernel/uapi/rdma/mlx5_user_ioctl_verbs.h b/libc/kernel/uapi/rdma/mlx5_user_ioctl_verbs.h
index f087ee8..3fe3c82 100644
--- a/libc/kernel/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/libc/kernel/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -23,6 +23,9 @@
   MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2,
   MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
 };
+enum mlx5_ib_uapi_reg_dmabuf_flags {
+  MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT = 1 << 0,
+};
 struct mlx5_ib_uapi_devx_async_cmd_hdr {
   __aligned_u64 wr_id;
   __u8 out_data[];
diff --git a/libc/kernel/uapi/rdma/rdma_netlink.h b/libc/kernel/uapi/rdma/rdma_netlink.h
index ac027ac..137b68f 100644
--- a/libc/kernel/uapi/rdma/rdma_netlink.h
+++ b/libc/kernel/uapi/rdma/rdma_netlink.h
@@ -17,6 +17,7 @@
 enum {
   RDMA_NL_GROUP_IWPM = 2,
   RDMA_NL_GROUP_LS,
+  RDMA_NL_GROUP_NOTIFY,
   RDMA_NL_NUM_GROUPS
 };
 #define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
@@ -201,6 +202,7 @@
   RDMA_NLDEV_CMD_RES_SRQ_GET_RAW,
   RDMA_NLDEV_CMD_NEWDEV,
   RDMA_NLDEV_CMD_DELDEV,
+  RDMA_NLDEV_CMD_MONITOR,
   RDMA_NLDEV_NUM_OPS
 };
 enum rdma_nldev_print_type {
@@ -311,6 +313,8 @@
   RDMA_NLDEV_ATTR_DEV_TYPE,
   RDMA_NLDEV_ATTR_PARENT_NAME,
   RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
+  RDMA_NLDEV_ATTR_EVENT_TYPE,
+  RDMA_NLDEV_SYS_ATTR_MONITOR_MODE,
   RDMA_NLDEV_ATTR_MAX
 };
 enum rdma_nl_counter_mode {
@@ -330,4 +334,10 @@
   RDMA_NAME_ASSIGN_TYPE_UNKNOWN = 0,
   RDMA_NAME_ASSIGN_TYPE_USER = 1,
 };
+enum rdma_nl_notify_event_type {
+  RDMA_REGISTER_EVENT,
+  RDMA_UNREGISTER_EVENT,
+  RDMA_NETDEV_ATTACH_EVENT,
+  RDMA_NETDEV_DETACH_EVENT,
+};
 #endif
diff --git a/libc/kernel/uapi/sound/asequencer.h b/libc/kernel/uapi/sound/asequencer.h
index a3826a5..83b38f1 100644
--- a/libc/kernel/uapi/sound/asequencer.h
+++ b/libc/kernel/uapi/sound/asequencer.h
@@ -298,6 +298,7 @@
 #define SNDRV_SEQ_PORT_FLG_GIVEN_PORT (1 << 0)
 #define SNDRV_SEQ_PORT_FLG_TIMESTAMP (1 << 1)
 #define SNDRV_SEQ_PORT_FLG_TIME_REAL (1 << 2)
+#define SNDRV_SEQ_PORT_FLG_IS_MIDI1 (1 << 3)
 #define SNDRV_SEQ_PORT_DIR_UNKNOWN 0
 #define SNDRV_SEQ_PORT_DIR_INPUT 1
 #define SNDRV_SEQ_PORT_DIR_OUTPUT 2
diff --git a/libc/kernel/uapi/sound/asoc.h b/libc/kernel/uapi/sound/asoc.h
index f7992cb..22e750d 100644
--- a/libc/kernel/uapi/sound/asoc.h
+++ b/libc/kernel/uapi/sound/asoc.h
@@ -53,7 +53,7 @@
 #define SND_SOC_TPLG_MAGIC 0x41536F43
 #define SND_SOC_TPLG_NUM_TEXTS 16
 #define SND_SOC_TPLG_ABI_VERSION 0x5
-#define SND_SOC_TPLG_ABI_VERSION_MIN 0x4
+#define SND_SOC_TPLG_ABI_VERSION_MIN 0x5
 #define SND_SOC_TPLG_TLV_SIZE 32
 #define SND_SOC_TPLG_TYPE_MIXER 1
 #define SND_SOC_TPLG_TYPE_BYTES 2
diff --git a/libc/kernel/uapi/sound/asound.h b/libc/kernel/uapi/sound/asound.h
index cfe9f66..cbebef3 100644
--- a/libc/kernel/uapi/sound/asound.h
+++ b/libc/kernel/uapi/sound/asound.h
@@ -676,7 +676,7 @@
 #define SNDRV_RAWMIDI_IOCTL_DRAIN _IOW('W', 0x31, int)
 #define SNDRV_UMP_IOCTL_ENDPOINT_INFO _IOR('W', 0x40, struct snd_ump_endpoint_info)
 #define SNDRV_UMP_IOCTL_BLOCK_INFO _IOR('W', 0x41, struct snd_ump_block_info)
-#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7)
+#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8)
 enum {
   SNDRV_TIMER_CLASS_NONE = - 1,
   SNDRV_TIMER_CLASS_SLAVE = 0,
@@ -696,6 +696,7 @@
 #define SNDRV_TIMER_GLOBAL_RTC 1
 #define SNDRV_TIMER_GLOBAL_HPET 2
 #define SNDRV_TIMER_GLOBAL_HRTIMER 3
+#define SNDRV_TIMER_GLOBAL_UDRIVEN 4
 #define SNDRV_TIMER_FLG_SLAVE (1 << 0)
 struct snd_timer_id {
   int dev_class;
@@ -762,6 +763,12 @@
   unsigned int queue;
   unsigned char reserved[64];
 };
+struct snd_timer_uinfo {
+  __u64 resolution;
+  int fd;
+  unsigned int id;
+  unsigned char reserved[16];
+};
 #define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int)
 #define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id)
 #define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int)
@@ -777,6 +784,8 @@
 #define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2)
 #define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3)
 #define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int)
+#define SNDRV_TIMER_IOCTL_CREATE _IOWR('T', 0xa5, struct snd_timer_uinfo)
+#define SNDRV_TIMER_IOCTL_TRIGGER _IO('T', 0xa6)
 #if __BITS_PER_LONG == 64
 #define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD
 #else
diff --git a/libc/kernel/uapi/xen/privcmd.h b/libc/kernel/uapi/xen/privcmd.h
index 0597247..0874e4b 100644
--- a/libc/kernel/uapi/xen/privcmd.h
+++ b/libc/kernel/uapi/xen/privcmd.h
@@ -77,6 +77,10 @@
   domid_t dom;
   __u8 pad[2];
 };
+struct privcmd_pcidev_get_gsi {
+  __u32 sbdf;
+  __u32 gsi;
+};
 #define IOCTL_PRIVCMD_HYPERCALL _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
 #define IOCTL_PRIVCMD_MMAP _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
 #define IOCTL_PRIVCMD_MMAPBATCH _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
@@ -86,4 +90,5 @@
 #define IOCTL_PRIVCMD_MMAP_RESOURCE _IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource))
 #define IOCTL_PRIVCMD_IRQFD _IOW('P', 8, struct privcmd_irqfd)
 #define IOCTL_PRIVCMD_IOEVENTFD _IOW('P', 9, struct privcmd_ioeventfd)
+#define IOCTL_PRIVCMD_PCIDEV_GET_GSI _IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi))
 #endif
diff --git a/libc/libc.map.txt b/libc/libc.map.txt
index d8636a2..86dcc39 100644
--- a/libc/libc.map.txt
+++ b/libc/libc.map.txt
@@ -1612,6 +1612,8 @@
   global:
     lchmod;
     mseal;
+    pthread_getaffinity_np;
+    pthread_setaffinity_np;
     qsort_r;
     sig2str;
     str2sig;
diff --git a/libc/malloc_debug/Android.bp b/libc/malloc_debug/Android.bp
index 3828c28..5d61801 100644
--- a/libc/malloc_debug/Android.bp
+++ b/libc/malloc_debug/Android.bp
@@ -79,6 +79,10 @@
         "libmemunreachable",
     ],
 
+    whole_static_libs: [
+        "libmemory_trace",
+    ],
+
     shared_libs: [
         "libunwindstack",
     ],
diff --git a/libc/bionic/strnlen.cpp b/libc/malloc_debug/Nanotime.h
similarity index 81%
rename from libc/bionic/strnlen.cpp
rename to libc/malloc_debug/Nanotime.h
index 7101b21..d7c3f60 100644
--- a/libc/bionic/strnlen.cpp
+++ b/libc/malloc_debug/Nanotime.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2012 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,9 +26,13 @@
  * SUCH DAMAGE.
  */
 
-#include <string.h>
+#pragma once
 
-size_t strnlen(const char* s, size_t n) {
-  const char* p = static_cast<const char*>(memchr(s, 0, n));
-  return p ? (p - s) : n;
+#include <stdint.h>
+#include <time.h>
+
+static inline __always_inline uint64_t Nanotime() {
+  struct timespec t = {};
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  return static_cast<uint64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
 }
diff --git a/libc/malloc_debug/RecordData.cpp b/libc/malloc_debug/RecordData.cpp
index 79e051b..f832f03 100644
--- a/libc/malloc_debug/RecordData.cpp
+++ b/libc/malloc_debug/RecordData.cpp
@@ -39,76 +39,23 @@
 #include <mutex>
 
 #include <android-base/stringprintf.h>
+#include <memory_trace/MemoryTrace.h>
 
 #include "Config.h"
 #include "DebugData.h"
+#include "Nanotime.h"
 #include "RecordData.h"
 #include "debug_disable.h"
 #include "debug_log.h"
 
-RecordEntry::RecordEntry() : tid_(gettid()) {
-}
-
-bool ThreadCompleteEntry::Write(int fd) const {
-  return dprintf(fd, "%d: thread_done 0x0\n", tid_) > 0;
-}
-
-AllocEntry::AllocEntry(void* pointer, uint64_t start_ns, uint64_t end_ns)
-    : pointer_(pointer), start_ns_(start_ns), end_ns_(end_ns) {}
-
-MallocEntry::MallocEntry(void* pointer, size_t size, uint64_t start_ns, uint64_t end_ns)
-    : AllocEntry(pointer, start_ns, end_ns), size_(size) {}
-
-bool MallocEntry::Write(int fd) const {
-  return dprintf(fd, "%d: malloc %p %zu %" PRIu64 " %" PRIu64 "\n", tid_, pointer_, size_,
-                 start_ns_, end_ns_) > 0;
-}
-
-FreeEntry::FreeEntry(void* pointer, uint64_t start_ns, uint64_t end_ns)
-    : AllocEntry(pointer, start_ns, end_ns) {}
-
-bool FreeEntry::Write(int fd) const {
-  return dprintf(fd, "%d: free %p %" PRIu64 " %" PRIu64 "\n", tid_, pointer_, start_ns_, end_ns_) >
-         0;
-}
-
-CallocEntry::CallocEntry(void* pointer, size_t nmemb, size_t size, uint64_t start_ns,
-                         uint64_t end_ns)
-    : MallocEntry(pointer, size, start_ns, end_ns), nmemb_(nmemb) {}
-
-bool CallocEntry::Write(int fd) const {
-  return dprintf(fd, "%d: calloc %p %zu %zu %" PRIu64 " %" PRIu64 "\n", tid_, pointer_, nmemb_,
-                 size_, start_ns_, end_ns_) > 0;
-}
-
-ReallocEntry::ReallocEntry(void* pointer, size_t size, void* old_pointer, uint64_t start_ns,
-                           uint64_t end_ns)
-    : MallocEntry(pointer, size, start_ns, end_ns), old_pointer_(old_pointer) {}
-
-bool ReallocEntry::Write(int fd) const {
-  return dprintf(fd, "%d: realloc %p %p %zu %" PRIu64 " %" PRIu64 "\n", tid_, pointer_,
-                 old_pointer_, size_, start_ns_, end_ns_) > 0;
-}
-
-// aligned_alloc, posix_memalign, memalign, pvalloc, valloc all recorded with this class.
-MemalignEntry::MemalignEntry(void* pointer, size_t size, size_t alignment, uint64_t start_ns,
-                             uint64_t end_ns)
-    : MallocEntry(pointer, size, start_ns, end_ns), alignment_(alignment) {}
-
-bool MemalignEntry::Write(int fd) const {
-  return dprintf(fd, "%d: memalign %p %zu %zu %" PRIu64 " %" PRIu64 "\n", tid_, pointer_,
-                 alignment_, size_, start_ns_, end_ns_) > 0;
-}
-
 struct ThreadData {
-  ThreadData(RecordData* record_data, ThreadCompleteEntry* entry)
-      : record_data(record_data), entry(entry) {}
-  RecordData* record_data;
-  ThreadCompleteEntry* entry;
+  ThreadData(RecordData* record_data) : record_data(record_data) {}
+
+  RecordData* record_data = nullptr;
   size_t count = 0;
 };
 
-static void ThreadKeyDelete(void* data) {
+void RecordData::ThreadKeyDelete(void* data) {
   ThreadData* thread_data = reinterpret_cast<ThreadData*>(data);
 
   thread_data->count++;
@@ -117,7 +64,11 @@
   if (thread_data->count == 4) {
     ScopedDisableDebugCalls disable;
 
-    thread_data->record_data->AddEntryOnly(thread_data->entry);
+    memory_trace::Entry* entry = thread_data->record_data->InternalReserveEntry();
+    if (entry != nullptr) {
+      *entry = memory_trace::Entry{
+          .tid = gettid(), .type = memory_trace::THREAD_DONE, .end_ns = Nanotime()};
+    }
     delete thread_data;
   } else {
     pthread_setspecific(thread_data->record_data->key(), data);
@@ -159,7 +110,12 @@
   }
 
   for (size_t i = 0; i < cur_index_; i++) {
-    if (!entries_[i]->Write(dump_fd)) {
+    if (entries_[i].type == memory_trace::UNKNOWN) {
+      // This can happen if an entry was reserved but not filled in due to some
+      // type of error during the operation.
+      continue;
+    }
+    if (!memory_trace::WriteEntryToFd(dump_fd, entries_[i])) {
       error_log("Failed to write record alloc information: %s", strerror(errno));
       break;
     }
@@ -201,25 +157,26 @@
   pthread_key_delete(key_);
 }
 
-void RecordData::AddEntryOnly(const RecordEntry* entry) {
+memory_trace::Entry* RecordData::InternalReserveEntry() {
   std::lock_guard<std::mutex> entries_lock(entries_lock_);
   if (cur_index_ == entries_.size()) {
-    // Maxed out, throw the entry away.
-    return;
+    return nullptr;
   }
 
-  entries_[cur_index_++].reset(entry);
-  if (cur_index_ == entries_.size()) {
+  memory_trace::Entry* entry = &entries_[cur_index_];
+  entry->type = memory_trace::UNKNOWN;
+  if (++cur_index_ == entries_.size()) {
     info_log("Maximum number of records added, all new operations will be dropped.");
   }
+  return entry;
 }
 
-void RecordData::AddEntry(const RecordEntry* entry) {
+memory_trace::Entry* RecordData::ReserveEntry() {
   void* data = pthread_getspecific(key_);
   if (data == nullptr) {
-    ThreadData* thread_data = new ThreadData(this, new ThreadCompleteEntry());
+    ThreadData* thread_data = new ThreadData(this);
     pthread_setspecific(key_, thread_data);
   }
 
-  AddEntryOnly(entry);
+  return InternalReserveEntry();
 }
diff --git a/libc/malloc_debug/RecordData.h b/libc/malloc_debug/RecordData.h
index 7efa1f7..ce71da1 100644
--- a/libc/malloc_debug/RecordData.h
+++ b/libc/malloc_debug/RecordData.h
@@ -39,117 +39,9 @@
 #include <string>
 #include <vector>
 
+#include <memory_trace/MemoryTrace.h>
 #include <platform/bionic/macros.h>
 
-class RecordEntry {
- public:
-  RecordEntry();
-  virtual ~RecordEntry() = default;
-
-  virtual bool Write(int fd) const = 0;
-
- protected:
-  pid_t tid_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(RecordEntry);
-};
-
-class ThreadCompleteEntry : public RecordEntry {
- public:
-  ThreadCompleteEntry() = default;
-  virtual ~ThreadCompleteEntry() = default;
-
-  bool Write(int fd) const override;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(ThreadCompleteEntry);
-};
-
-class AllocEntry : public RecordEntry {
- public:
-  explicit AllocEntry(void* pointer, uint64_t st, uint64_t et);
-  virtual ~AllocEntry() = default;
-
- protected:
-  void* pointer_;
-
-  // The start/end time of this operation.
-  uint64_t start_ns_;
-  uint64_t end_ns_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(AllocEntry);
-};
-
-class MallocEntry : public AllocEntry {
- public:
-  MallocEntry(void* pointer, size_t size, uint64_t st, uint64_t et);
-  virtual ~MallocEntry() = default;
-
-  bool Write(int fd) const override;
-
- protected:
-  size_t size_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(MallocEntry);
-};
-
-class FreeEntry : public AllocEntry {
- public:
-  explicit FreeEntry(void* pointer, uint64_t st, uint64_t et);
-  virtual ~FreeEntry() = default;
-
-  bool Write(int fd) const override;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(FreeEntry);
-};
-
-class CallocEntry : public MallocEntry {
- public:
-  CallocEntry(void* pointer, size_t nmemb, size_t size, uint64_t st, uint64_t et);
-  virtual ~CallocEntry() = default;
-
-  bool Write(int fd) const override;
-
- protected:
-  size_t nmemb_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(CallocEntry);
-};
-
-class ReallocEntry : public MallocEntry {
- public:
-  ReallocEntry(void* pointer, size_t size, void* old_pointer, uint64_t st, uint64_t et);
-  virtual ~ReallocEntry() = default;
-
-  bool Write(int fd) const override;
-
- protected:
-  void* old_pointer_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(ReallocEntry);
-};
-
-// aligned_alloc, posix_memalign, memalign, pvalloc, valloc all recorded with this class.
-class MemalignEntry : public MallocEntry {
- public:
-  MemalignEntry(void* pointer, size_t size, size_t alignment, uint64_t st, uint64_t et);
-  virtual ~MemalignEntry() = default;
-
-  bool Write(int fd) const override;
-
- protected:
-  size_t alignment_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(MemalignEntry);
-};
-
 class Config;
 
 class RecordData {
@@ -159,8 +51,7 @@
 
   bool Initialize(const Config& config);
 
-  void AddEntry(const RecordEntry* entry);
-  void AddEntryOnly(const RecordEntry* entry);
+  memory_trace::Entry* ReserveEntry();
 
   const std::string& file() { return file_; }
   pthread_key_t key() { return key_; }
@@ -171,12 +62,16 @@
   static void WriteData(int, siginfo_t*, void*);
   static RecordData* record_obj_;
 
+  static void ThreadKeyDelete(void* data);
+
   void WriteEntries();
   void WriteEntries(const std::string& file);
 
+  memory_trace::Entry* InternalReserveEntry();
+
   std::mutex entries_lock_;
   pthread_key_t key_;
-  std::vector<std::unique_ptr<const RecordEntry>> entries_;
+  std::vector<memory_trace::Entry> entries_;
   size_t cur_index_;
   std::string file_;
 
diff --git a/libc/malloc_debug/malloc_debug.cpp b/libc/malloc_debug/malloc_debug.cpp
index 3743852..fce6c24 100644
--- a/libc/malloc_debug/malloc_debug.cpp
+++ b/libc/malloc_debug/malloc_debug.cpp
@@ -54,6 +54,7 @@
 #include "Config.h"
 #include "DebugData.h"
 #include "LogAllocatorStats.h"
+#include "Nanotime.h"
 #include "Unreachable.h"
 #include "UnwindBacktrace.h"
 #include "backtrace.h"
@@ -70,12 +71,6 @@
 
 const MallocDispatch* g_dispatch;
 
-static inline __always_inline uint64_t Nanotime() {
-  struct timespec t = {};
-  clock_gettime(CLOCK_MONOTONIC, &t);
-  return static_cast<uint64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
-}
-
 namespace {
 // A TimedResult contains the result of from malloc end_ns al. functions and the
 // start/end timestamps.
@@ -595,11 +590,22 @@
   ScopedDisableDebugCalls disable;
   ScopedBacktraceSignalBlocker blocked;
 
+  memory_trace::Entry* entry = nullptr;
+  if (g_debug->config().options() & RECORD_ALLOCS) {
+    // In order to preserve the order of operations, reserve the entry before
+    // performing the operation.
+    entry = g_debug->record->ReserveEntry();
+  }
+
   TimedResult result = InternalMalloc(size);
 
-  if (g_debug->config().options() & RECORD_ALLOCS) {
-    g_debug->record->AddEntry(new MallocEntry(result.getValue<void*>(), size,
-                                              result.GetStartTimeNS(), result.GetEndTimeNS()));
+  if (entry != nullptr) {
+    *entry = memory_trace::Entry{.tid = gettid(),
+                                 .type = memory_trace::MALLOC,
+                                 .ptr = reinterpret_cast<uint64_t>(result.getValue<void*>()),
+                                 .size = size,
+                                 .start_ns = result.GetStartTimeNS(),
+                                 .end_ns = result.GetEndTimeNS()};
   }
 
   return result.getValue<void*>();
@@ -684,11 +690,21 @@
     return;
   }
 
+  memory_trace::Entry* entry = nullptr;
+  if (g_debug->config().options() & RECORD_ALLOCS) {
+    // In order to preserve the order of operations, reserve the entry before
+    // performing the operation.
+    entry = g_debug->record->ReserveEntry();
+  }
+
   TimedResult result = InternalFree(pointer);
 
-  if (g_debug->config().options() & RECORD_ALLOCS) {
-    g_debug->record->AddEntry(
-        new FreeEntry(pointer, result.GetStartTimeNS(), result.GetEndTimeNS()));
+  if (entry != nullptr) {
+    *entry = memory_trace::Entry{.tid = gettid(),
+                                 .type = memory_trace::FREE,
+                                 .ptr = reinterpret_cast<uint64_t>(pointer),
+                                 .start_ns = result.GetStartTimeNS(),
+                                 .end_ns = result.GetEndTimeNS()};
   }
 }
 
@@ -711,6 +727,13 @@
     return nullptr;
   }
 
+  memory_trace::Entry* entry = nullptr;
+  if (g_debug->config().options() & RECORD_ALLOCS) {
+    // In order to preserve the order of operations, reserve the entry before
+    // performing the operation.
+    entry = g_debug->record->ReserveEntry();
+  }
+
   TimedResult result;
   void* pointer;
   if (g_debug->HeaderEnabled()) {
@@ -758,22 +781,29 @@
     pointer = result.getValue<void*>();
   }
 
-  if (pointer != nullptr) {
-    if (g_debug->TrackPointers()) {
-      PointerData::Add(pointer, bytes);
-    }
+  if (pointer == nullptr) {
+    return nullptr;
+  }
 
-    if (g_debug->config().options() & FILL_ON_ALLOC) {
-      size_t bytes = InternalMallocUsableSize(pointer);
-      size_t fill_bytes = g_debug->config().fill_on_alloc_bytes();
-      bytes = (bytes < fill_bytes) ? bytes : fill_bytes;
-      memset(pointer, g_debug->config().fill_alloc_value(), bytes);
-    }
+  if (g_debug->TrackPointers()) {
+    PointerData::Add(pointer, bytes);
+  }
 
-    if (g_debug->config().options() & RECORD_ALLOCS) {
-      g_debug->record->AddEntry(new MemalignEntry(pointer, bytes, alignment,
-                                                  result.GetStartTimeNS(), result.GetEndTimeNS()));
-    }
+  if (g_debug->config().options() & FILL_ON_ALLOC) {
+    size_t bytes = InternalMallocUsableSize(pointer);
+    size_t fill_bytes = g_debug->config().fill_on_alloc_bytes();
+    bytes = (bytes < fill_bytes) ? bytes : fill_bytes;
+    memset(pointer, g_debug->config().fill_alloc_value(), bytes);
+  }
+
+  if (entry != nullptr) {
+    *entry = memory_trace::Entry{.tid = gettid(),
+                                 .type = memory_trace::MEMALIGN,
+                                 .ptr = reinterpret_cast<uint64_t>(pointer),
+                                 .size = bytes,
+                                 .u.align = alignment,
+                                 .start_ns = result.GetStartTimeNS(),
+                                 .end_ns = result.GetEndTimeNS()};
   }
 
   return pointer;
@@ -789,13 +819,25 @@
   ScopedDisableDebugCalls disable;
   ScopedBacktraceSignalBlocker blocked;
 
+  memory_trace::Entry* entry = nullptr;
+  if (g_debug->config().options() & RECORD_ALLOCS) {
+    // In order to preserve the order of operations, reserve the entry before
+    // performing the operation.
+    entry = g_debug->record->ReserveEntry();
+  }
+
   if (pointer == nullptr) {
     TimedResult result = InternalMalloc(bytes);
-    if (g_debug->config().options() & RECORD_ALLOCS) {
-      g_debug->record->AddEntry(new ReallocEntry(result.getValue<void*>(), bytes, nullptr,
-                                                 result.GetStartTimeNS(), result.GetEndTimeNS()));
-    }
     pointer = result.getValue<void*>();
+    if (entry != nullptr) {
+      *entry = memory_trace::Entry{.tid = gettid(),
+                                   .type = memory_trace::REALLOC,
+                                   .ptr = reinterpret_cast<uint64_t>(pointer),
+                                   .size = bytes,
+                                   .u.old_ptr = 0,
+                                   .start_ns = result.GetStartTimeNS(),
+                                   .end_ns = result.GetEndTimeNS()};
+    }
     return pointer;
   }
 
@@ -806,9 +848,14 @@
   if (bytes == 0) {
     TimedResult result = InternalFree(pointer);
 
-    if (g_debug->config().options() & RECORD_ALLOCS) {
-      g_debug->record->AddEntry(new ReallocEntry(nullptr, bytes, pointer, result.GetStartTimeNS(),
-                                                 result.GetEndTimeNS()));
+    if (entry != nullptr) {
+      *entry = memory_trace::Entry{.tid = gettid(),
+                                   .type = memory_trace::REALLOC,
+                                   .ptr = 0,
+                                   .size = 0,
+                                   .u.old_ptr = reinterpret_cast<uint64_t>(pointer),
+                                   .start_ns = result.GetStartTimeNS(),
+                                   .end_ns = result.GetEndTimeNS()};
     }
 
     return nullptr;
@@ -904,9 +951,14 @@
     }
   }
 
-  if (g_debug->config().options() & RECORD_ALLOCS) {
-    g_debug->record->AddEntry(new ReallocEntry(new_pointer, bytes, pointer, result.GetStartTimeNS(),
-                                               result.GetEndTimeNS()));
+  if (entry != nullptr) {
+    *entry = memory_trace::Entry{.tid = gettid(),
+                                 .type = memory_trace::REALLOC,
+                                 .ptr = reinterpret_cast<uint64_t>(new_pointer),
+                                 .size = bytes,
+                                 .u.old_ptr = reinterpret_cast<uint64_t>(pointer),
+                                 .start_ns = result.GetStartTimeNS(),
+                                 .end_ns = result.GetEndTimeNS()};
   }
 
   return new_pointer;
@@ -945,6 +997,13 @@
     return nullptr;
   }
 
+  memory_trace::Entry* entry = nullptr;
+  if (g_debug->config().options() & RECORD_ALLOCS) {
+    // In order to preserve the order of operations, reserve the entry before
+    // performing the operation.
+    entry = g_debug->record->ReserveEntry();
+  }
+
   void* pointer;
   TimedResult result;
   if (g_debug->HeaderEnabled()) {
@@ -961,9 +1020,14 @@
     pointer = result.getValue<void*>();
   }
 
-  if (g_debug->config().options() & RECORD_ALLOCS) {
-    g_debug->record->AddEntry(
-        new CallocEntry(pointer, nmemb, bytes, result.GetStartTimeNS(), result.GetEndTimeNS()));
+  if (entry != nullptr) {
+    *entry = memory_trace::Entry{.tid = gettid(),
+                                 .type = memory_trace::CALLOC,
+                                 .ptr = reinterpret_cast<uint64_t>(pointer),
+                                 .size = bytes,
+                                 .u.n_elements = nmemb,
+                                 .start_ns = result.GetStartTimeNS(),
+                                 .end_ns = result.GetEndTimeNS()};
   }
 
   if (pointer != nullptr && g_debug->TrackPointers()) {
@@ -1062,18 +1126,20 @@
 
 void debug_malloc_disable() {
   ScopedConcurrentLock lock;
-  g_dispatch->malloc_disable();
   if (g_debug->pointer) {
+    // Acquire the pointer locks first, otherwise, the code can be holding
+    // the allocation lock and deadlock trying to acquire a pointer lock.
     g_debug->pointer->PrepareFork();
   }
+  g_dispatch->malloc_disable();
 }
 
 void debug_malloc_enable() {
   ScopedConcurrentLock lock;
+  g_dispatch->malloc_enable();
   if (g_debug->pointer) {
     g_debug->pointer->PostForkParent();
   }
-  g_dispatch->malloc_enable();
 }
 
 ssize_t debug_malloc_backtrace(void* pointer, uintptr_t* frames, size_t max_frames) {
diff --git a/libc/malloc_debug/tests/malloc_debug_system_tests.cpp b/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
index d7a7a4f..080242c 100644
--- a/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
+++ b/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
@@ -57,6 +57,12 @@
 #include <bionic/malloc.h>
 #include <tests/utils.h>
 
+// exported from bionic
+__BEGIN_DECLS
+extern void malloc_disable();
+extern void malloc_enable();
+__END_DECLS
+
 // All DISABLED_ tests are designed to be executed after malloc debug
 // is enabled. These tests don't run be default, and are executed
 // by wrappers that will enable various malloc debug features.
@@ -788,3 +794,34 @@
   unexpected_log_strings_.push_back("Timed out waiting for ");
   Exec("MallocTests.DISABLED_malloc_and_backtrace_deadlock", "verbose verify_pointers", 0);
 }
+
+// Creates two threads: one that calls malloc_disable() and malloc_enable() in a loop and
+// the other that performs a bunch of allocations.
+TEST(MallocTests, DISABLED_malloc_disable_deadlock) {
+  std::atomic_bool running(true);
+
+  std::thread t1([&] {
+    while (running) {
+      malloc_disable();
+      malloc_enable();
+    }
+  });
+
+  std::thread t2([&] {
+    while (running) {
+      void* p = malloc(100);
+      free(p);
+    }
+  });
+
+  // let the threads run for a while, then tell them to stop and wait for shutdown
+  std::this_thread::sleep_for(std::chrono::seconds(5));
+
+  running = false;
+  t1.join();
+  t2.join();
+}
+
+TEST_F(MallocDebugSystemTest, malloc_disable_deadlock) {
+  Exec("MallocTests.DISABLED_malloc_disable_deadlock", "verbose backtrace");
+}
diff --git a/libc/platform/bionic/macros.h b/libc/platform/bionic/macros.h
index 93268c1..c4af3b9 100644
--- a/libc/platform/bionic/macros.h
+++ b/libc/platform/bionic/macros.h
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <stddef.h>
 #include <stdint.h>
 
 #define BIONIC_DISALLOW_COPY_AND_ASSIGN(TypeName) \
@@ -31,24 +32,6 @@
     ? (1UL << (64 - __builtin_clzl(static_cast<unsigned long>(value)))) \
     : (1UL << (32 - __builtin_clz(static_cast<unsigned int>(value)))))
 
-static constexpr uintptr_t align_down(uintptr_t p, size_t align) {
-  return p & ~(align - 1);
-}
-
-static constexpr uintptr_t align_up(uintptr_t p, size_t align) {
-  return (p + align - 1) & ~(align - 1);
-}
-
-template <typename T>
-static inline T* _Nonnull align_down(T* _Nonnull p, size_t align) {
-  return reinterpret_cast<T*>(align_down(reinterpret_cast<uintptr_t>(p), align));
-}
-
-template <typename T>
-static inline T* _Nonnull align_up(T* _Nonnull p, size_t align) {
-  return reinterpret_cast<T*>(align_up(reinterpret_cast<uintptr_t>(p), align));
-}
-
 #if defined(__arm__)
 #define BIONIC_STOP_UNWIND asm volatile(".cfi_undefined r14")
 #elif defined(__aarch64__)
@@ -97,3 +80,26 @@
 static inline T* _Nonnull untag_address(T* _Nonnull p) {
   return reinterpret_cast<T*>(untag_address(reinterpret_cast<uintptr_t>(p)));
 }
+
+// MTE globals protects internal and external global variables. One of the main
+// things that MTE globals does is force all global variable accesses to go
+// through the GOT. In the linker though, some global variables are accessed (or
+// address-taken) prior to relocations being processed. Because relocations
+// haven't run yet, the GOT entry hasn't been populated, and this leads to
+// crashes. Thus, any globals used by the linker prior to relocation should be
+// annotated with this attribute, which suppresses tagging of this global
+// variable, restoring the pc-relative address computation.
+//
+// A way to find global variables that need this attribute is to build the
+// linker/libc with `SANITIZE_TARGET=memtag_globals`, push them onto a device
+// (it doesn't have to be MTE capable), and then run an executable using
+// LD_LIBRARY_PATH and using the linker in interpreter mode (e.g.
+// `LD_LIBRARY_PATH=/data/tmp/ /data/tmp/linker64 /data/tmp/my_binary`). A
+// good heuristic is that the global variable is in a file that should be
+// compiled with `-ffreestanding` (but there are global variables there that
+// don't need this attribute).
+#if __has_feature(memtag_globals)
+#define BIONIC_USED_BEFORE_LINKER_RELOCATES __attribute__((no_sanitize("memtag")))
+#else  // __has_feature(memtag_globals)
+#define BIONIC_USED_BEFORE_LINKER_RELOCATES
+#endif  // __has_feature(memtag_globals)
diff --git a/libc/platform/bionic/mte.h b/libc/platform/bionic/mte.h
index 98b3d27..610cb45 100644
--- a/libc/platform/bionic/mte.h
+++ b/libc/platform/bionic/mte.h
@@ -28,6 +28,7 @@
 
 #pragma once
 
+#include <stddef.h>
 #include <sys/auxv.h>
 #include <sys/mman.h>
 #include <sys/prctl.h>
@@ -49,6 +50,36 @@
   return supported;
 }
 
+inline void* get_tagged_address(const void* ptr) {
+#if defined(__aarch64__)
+  if (mte_supported()) {
+    __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]" : "+r"(ptr));
+  }
+#endif  // aarch64
+  return const_cast<void*>(ptr);
+}
+
+// Inserts a random tag tag to `ptr`, using any of the set lower 16 bits in
+// `mask` to exclude the corresponding tag from being generated. Note: This does
+// not tag memory. This generates a pointer to be used with set_memory_tag.
+inline void* insert_random_tag(const void* ptr, __attribute__((unused)) uint64_t mask = 0) {
+#if defined(__aarch64__)
+  if (mte_supported() && ptr) {
+    __asm__ __volatile__(".arch_extension mte; irg %0, %0, %1" : "+r"(ptr) : "r"(mask));
+  }
+#endif  // aarch64
+  return const_cast<void*>(ptr);
+}
+
+// Stores the address tag in `ptr` to memory, at `ptr`.
+inline void set_memory_tag(__attribute__((unused)) void* ptr) {
+#if defined(__aarch64__)
+  if (mte_supported()) {
+    __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" : "+r"(ptr));
+  }
+#endif  // aarch64
+}
+
 #ifdef __aarch64__
 class ScopedDisableMTE {
   size_t prev_tco_;
@@ -86,6 +117,12 @@
   return ptr | ((1ULL << size_cls) << 56ULL);
 }
 
+inline void stack_mte_free_ringbuffer(uintptr_t stack_mte_tls) {
+  size_t size = stack_mte_ringbuffer_size_from_pointer(stack_mte_tls);
+  void* ptr = reinterpret_cast<void*>(stack_mte_tls & ((1ULL << 56ULL) - 1ULL));
+  munmap(ptr, size);
+}
+
 inline void* stack_mte_ringbuffer_allocate(size_t n, const char* name) {
   if (n > 7) return nullptr;
   // Allocation needs to be aligned to 2*size to make the fancy code-gen work.
diff --git a/libc/private/CFIShadow.h b/libc/private/CFIShadow.h
index cbdf0f7..b40c063 100644
--- a/libc/private/CFIShadow.h
+++ b/libc/private/CFIShadow.h
@@ -40,7 +40,7 @@
 // below) are interpreted as follows.
 //
 // For an address P and corresponding shadow value V, the address of __cfi_check is calculated as
-//   align_up(P, 2**kShadowGranularity) - (V - 2) * (2 ** kCfiCheckGranularity)
+//   __builtin_align_up(P, 2**kShadowGranularity) - (V - 2) * (2 ** kCfiCheckGranularity)
 //
 // Special shadow values:
 //        0 = kInvalidShadow, this memory range has no valid CFI targets.
diff --git a/libc/private/bionic_constants.h b/libc/private/bionic_constants.h
index 6274fe2..ce484d8 100644
--- a/libc/private/bionic_constants.h
+++ b/libc/private/bionic_constants.h
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#define US_PER_S 1'000'000LL
 #define NS_PER_S 1'000'000'000LL
 
 // Size of the shadow call stack. This can be small because these stacks only
diff --git a/libc/private/bionic_globals.h b/libc/private/bionic_globals.h
index 02713fc..cd6dca9 100644
--- a/libc/private/bionic_globals.h
+++ b/libc/private/bionic_globals.h
@@ -158,6 +158,9 @@
 
 __LIBC_HIDDEN__ libc_shared_globals* __libc_shared_globals();
 __LIBC_HIDDEN__ bool __libc_mte_enabled();
+__LIBC_HIDDEN__ void __libc_init_mte(const memtag_dynamic_entries_t*, const void*, size_t,
+                                     uintptr_t);
+__LIBC_HIDDEN__ void __libc_init_mte_stack(void*);
 __LIBC_HIDDEN__ void __libc_init_fdsan();
 __LIBC_HIDDEN__ void __libc_init_fdtrack();
 __LIBC_HIDDEN__ void __libc_init_profiling_handlers();
diff --git a/libc/private/bionic_lock.h b/libc/private/bionic_lock.h
index 8ed4939..d0c6d5e 100644
--- a/libc/private/bionic_lock.h
+++ b/libc/private/bionic_lock.h
@@ -46,7 +46,7 @@
 
  public:
   void init(bool process_shared) {
-    atomic_init(&state, Unlocked);
+    atomic_store_explicit(&state, Unlocked, memory_order_relaxed);
     this->process_shared = process_shared;
   }
 
diff --git a/libc/private/bionic_time_conversions.h b/libc/private/bionic_time_conversions.h
index c6b3c78..ce7de0d 100644
--- a/libc/private/bionic_time_conversions.h
+++ b/libc/private/bionic_time_conversions.h
@@ -26,8 +26,7 @@
  * SUCH DAMAGE.
  */
 
-#ifndef _BIONIC_TIME_CONVERSIONS_H
-#define _BIONIC_TIME_CONVERSIONS_H
+#pragma once
 
 #include <errno.h>
 #include <time.h>
@@ -35,20 +34,21 @@
 
 #include "private/bionic_constants.h"
 
-__BEGIN_DECLS
+bool timespec_from_timeval(timespec& ts, const timeval& tv);
+void timespec_from_ms(timespec& ts, const int ms);
 
-__LIBC_HIDDEN__ bool timespec_from_timeval(timespec& ts, const timeval& tv);
-__LIBC_HIDDEN__ void timespec_from_ms(timespec& ts, const int ms);
+void timeval_from_timespec(timeval& tv, const timespec& ts);
 
-__LIBC_HIDDEN__ void timeval_from_timespec(timeval& tv, const timespec& ts);
+void monotonic_time_from_realtime_time(timespec& monotonic_time, const timespec& realtime_time);
+void realtime_time_from_monotonic_time(timespec& realtime_time, const timespec& monotonic_time);
 
-__LIBC_HIDDEN__ void monotonic_time_from_realtime_time(timespec& monotonic_time,
-                                                       const timespec& realtime_time);
+static inline int64_t to_ns(const timespec& ts) {
+  return ts.tv_sec * NS_PER_S + ts.tv_nsec;
+}
 
-__LIBC_HIDDEN__ void realtime_time_from_monotonic_time(timespec& realtime_time,
-                                                       const timespec& monotonic_time);
-
-__END_DECLS
+static inline int64_t to_us(const timeval& tv) {
+  return tv.tv_sec * US_PER_S + tv.tv_usec;
+}
 
 static inline int check_timespec(const timespec* ts, bool null_allowed) {
   if (null_allowed && ts == nullptr) {
@@ -76,5 +76,3 @@
   }
 }
 #endif
-
-#endif
diff --git a/libc/system_properties/include/system_properties/prop_area.h b/libc/system_properties/include/system_properties/prop_area.h
index 187ff75..089cf52 100644
--- a/libc/system_properties/include/system_properties/prop_area.h
+++ b/libc/system_properties/include/system_properties/prop_area.h
@@ -102,7 +102,7 @@
   }
 
   prop_area(const uint32_t magic, const uint32_t version) : magic_(magic), version_(version) {
-    atomic_init(&serial_, 0u);
+    atomic_store_explicit(&serial_, 0u, memory_order_relaxed);
     memset(reserved_, 0, sizeof(reserved_));
     // Allocate enough space for the root node.
     bytes_used_ = sizeof(prop_trie_node);
diff --git a/libc/system_properties/prop_info.cpp b/libc/system_properties/prop_info.cpp
index c3bf177..499b36a 100644
--- a/libc/system_properties/prop_info.cpp
+++ b/libc/system_properties/prop_info.cpp
@@ -38,7 +38,7 @@
 prop_info::prop_info(const char* name, uint32_t namelen, const char* value, uint32_t valuelen) {
   memcpy(this->name, name, namelen);
   this->name[namelen] = '\0';
-  atomic_init(&this->serial, valuelen << 24);
+  atomic_store_explicit(&this->serial, valuelen << 24, memory_order_relaxed);
   memcpy(this->value, value, valuelen);
   this->value[valuelen] = '\0';
 }
@@ -48,7 +48,7 @@
   this->name[namelen] = '\0';
 
   auto error_value_len = sizeof(kLongLegacyError) - 1;
-  atomic_init(&this->serial, error_value_len << 24 | kLongFlag);
+  atomic_store_explicit(&this->serial, error_value_len << 24 | kLongFlag, memory_order_relaxed);
   memcpy(this->long_property.error_message, kLongLegacyError, sizeof(kLongLegacyError));
 
   this->long_property.offset = long_offset;
diff --git a/libc/upstream-netbsd/lib/libc/stdlib/bsearch.c b/libc/upstream-netbsd/lib/libc/stdlib/bsearch.c
deleted file mode 100644
index e48fe85..0000000
--- a/libc/upstream-netbsd/lib/libc/stdlib/bsearch.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*	$NetBSD: bsearch.c,v 1.16 2022/05/31 08:43:14 andvar Exp $	*/
-
-/*
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
-#if 0
-static char sccsid[] = "@(#)bsearch.c	8.1 (Berkeley) 6/4/93";
-#else
-__RCSID("$NetBSD: bsearch.c,v 1.16 2022/05/31 08:43:14 andvar Exp $");
-#endif
-#endif /* LIBC_SCCS and not lint */
-
-#include <assert.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/*
- * Perform a binary search.
- *
- * The code below is a bit sneaky.  After a comparison fails, we
- * divide the work in half by moving either left or right. If lim
- * is odd, moving left simply involves halving lim: e.g., when lim
- * is 5 we look at item 2, so we change lim to 2 so that we will
- * look at items 0 & 1.  If lim is even, the same applies.  If lim
- * is odd, moving right again involves halving lim, this time moving
- * the base up one item past p: e.g., when lim is 5 we change base
- * to item 3 and make lim 2 so that we will look at items 3 and 4.
- * If lim is even, however, we have to shrink it by one before
- * halving: e.g., when lim is 4, we still looked at item 2, so we
- * have to make lim 3, then halve, obtaining 1, so that we will only
- * look at item 3.
- */
-void *
-bsearch(const void *key, const void *base0, size_t nmemb, size_t size,
-    int (*compar)(const void *, const void *))
-{
-	const char *base = base0;
-	size_t lim;
-	int cmp;
-	const void *p;
-
-	_DIAGASSERT(key != NULL);
-	_DIAGASSERT(base0 != NULL || nmemb == 0);
-	_DIAGASSERT(compar != NULL);
-
-	for (lim = nmemb; lim != 0; lim >>= 1) {
-		p = base + (lim >> 1) * size;
-		cmp = (*compar)(key, p);
-		if (cmp == 0)
-			return __UNCONST(p);
-		if (cmp > 0) {	/* key > p: move right */
-			base = (const char *)p + size;
-			lim--;
-		}		/* else move left */
-	}
-	return (NULL);
-}
diff --git a/libc/upstream-openbsd/lib/libc/string/memchr.c b/libc/upstream-openbsd/lib/libc/string/memchr.c
deleted file mode 100644
index a6a4bd6..0000000
--- a/libc/upstream-openbsd/lib/libc/string/memchr.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*	$OpenBSD: memchr.c,v 1.8 2015/08/31 02:53:57 guenther Exp $ */
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <string.h>
-
-void *
-memchr(const void *s, int c, size_t n)
-{
-	if (n != 0) {
-		const unsigned char *p = s;
-
-		do {
-			if (*p++ == (unsigned char)c)
-				return ((void *)(p - 1));
-		} while (--n != 0);
-	}
-	return (NULL);
-}
-DEF_STRONG(memchr);
diff --git a/libc/upstream-openbsd/lib/libc/string/memrchr.c b/libc/upstream-openbsd/lib/libc/string/memrchr.c
deleted file mode 100644
index e123bc1..0000000
--- a/libc/upstream-openbsd/lib/libc/string/memrchr.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*	$OpenBSD: memrchr.c,v 1.4 2019/01/25 00:19:25 millert Exp $	*/
-
-/*
- * Copyright (c) 2007 Todd C. Miller <millert@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <string.h>
-
-/*
- * Reverse memchr()
- * Find the last occurrence of 'c' in the buffer 's' of size 'n'.
- */
-void *
-memrchr(const void *s, int c, size_t n)
-{
-	const unsigned char *cp;
-
-	if (n != 0) {
-		cp = (unsigned char *)s + n;
-		do {
-			if (*(--cp) == (unsigned char)c)
-				return((void *)cp);
-		} while (--n != 0);
-	}
-	return(NULL);
-}
-DEF_WEAK(memrchr);
diff --git a/libc/upstream-openbsd/lib/libc/string/strlcat.c b/libc/upstream-openbsd/lib/libc/string/strlcat.c
deleted file mode 100644
index aa3db7ab..0000000
--- a/libc/upstream-openbsd/lib/libc/string/strlcat.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*	$OpenBSD: strlcat.c,v 1.19 2019/01/25 00:19:25 millert Exp $	*/
-
-/*
- * Copyright (c) 1998, 2015 Todd C. Miller <millert@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <sys/types.h>
-#include <string.h>
-
-/*
- * Appends src to string dst of size dsize (unlike strncat, dsize is the
- * full size of dst, not space left).  At most dsize-1 characters
- * will be copied.  Always NUL terminates (unless dsize <= strlen(dst)).
- * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
- * If retval >= dsize, truncation occurred.
- */
-size_t
-strlcat(char *dst, const char *src, size_t dsize)
-{
-	const char *odst = dst;
-	const char *osrc = src;
-	size_t n = dsize;
-	size_t dlen;
-
-	/* Find the end of dst and adjust bytes left but don't go past end. */
-	while (n-- != 0 && *dst != '\0')
-		dst++;
-	dlen = dst - odst;
-	n = dsize - dlen;
-
-	if (n-- == 0)
-		return(dlen + strlen(src));
-	while (*src != '\0') {
-		if (n != 0) {
-			*dst++ = *src;
-			n--;
-		}
-		src++;
-	}
-	*dst = '\0';
-
-	return(dlen + (src - osrc));	/* count does not include NUL */
-}
-DEF_WEAK(strlcat);
diff --git a/libc/upstream-openbsd/lib/libc/string/strlcpy.c b/libc/upstream-openbsd/lib/libc/string/strlcpy.c
deleted file mode 100644
index 7e3b9ae..0000000
--- a/libc/upstream-openbsd/lib/libc/string/strlcpy.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*	$OpenBSD: strlcpy.c,v 1.16 2019/01/25 00:19:25 millert Exp $	*/
-
-/*
- * Copyright (c) 1998, 2015 Todd C. Miller <millert@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <sys/types.h>
-#include <string.h>
-
-/*
- * Copy string src to buffer dst of size dsize.  At most dsize-1
- * chars will be copied.  Always NUL terminates (unless dsize == 0).
- * Returns strlen(src); if retval >= dsize, truncation occurred.
- */
-size_t
-strlcpy(char *dst, const char *src, size_t dsize)
-{
-	const char *osrc = src;
-	size_t nleft = dsize;
-
-	/* Copy as many bytes as will fit. */
-	if (nleft != 0) {
-		while (--nleft != 0) {
-			if ((*dst++ = *src++) == '\0')
-				break;
-		}
-	}
-
-	/* Not enough room in dst, add NUL and traverse rest of src. */
-	if (nleft == 0) {
-		if (dsize != 0)
-			*dst = '\0';		/* NUL-terminate dst */
-		while (*src++)
-			;
-	}
-
-	return(src - osrc - 1);	/* count does not include NUL */
-}
-DEF_WEAK(strlcpy);
diff --git a/libdl/Android.bp b/libdl/Android.bp
index 87db4b1..205c454 100644
--- a/libdl/Android.bp
+++ b/libdl/Android.bp
@@ -60,6 +60,8 @@
     native_bridge_supported: true,
     static_ndk_lib: true,
 
+    export_include_dirs: ["include_private"],
+
     defaults: [
         "linux_bionic_supported",
         "bug_24465209_workaround",
diff --git a/libdl/NOTICE b/libdl/NOTICE
index fce0104..80038fc 100644
--- a/libdl/NOTICE
+++ b/libdl/NOTICE
@@ -30,3 +30,19 @@
 
 -------------------------------------------------------------------
 
+Copyright (C) 2024 The Android Open Source Project
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-------------------------------------------------------------------
+
diff --git a/libdl/include_private/android/dlext_private.h b/libdl/include_private/android/dlext_private.h
new file mode 100644
index 0000000..fda1086
--- /dev/null
+++ b/libdl/include_private/android/dlext_private.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+
+// TODO: libdl has several private extensions, but they have not all moved into a standard
+// private header.
+
+/**
+ * Set whether to load libraries in app compat mode.
+ *
+ * Any library which is not 16 KB aligned on a 4 KB aligned
+ * will be loaded in a special mode, which may load some R-only
+ * code as RW, in order to increase compatibility.
+ *
+ * \param enable_app_compat whether the mode is enabled for additional
+ *     library loads.
+ */
+void android_set_16kb_appcompat_mode(bool enable_app_compat);
+
+__END_DECLS
diff --git a/libdl/libdl_android.cpp b/libdl/libdl_android.cpp
index 47a164a..f0959eb 100644
--- a/libdl/libdl_android.cpp
+++ b/libdl/libdl_android.cpp
@@ -59,6 +59,9 @@
 __attribute__((__weak__, visibility("default")))
 struct android_namespace_t* __loader_android_get_exported_namespace(const char* name);
 
+__attribute__((__weak__, visibility("default"))) void __loader_android_set_16kb_appcompat_mode(
+    bool enable_app_compat);
+
 // Proxy calls to bionic loader
 __attribute__((__weak__))
 void android_get_LD_LIBRARY_PATH(char* buffer, size_t buffer_size) {
@@ -115,4 +118,8 @@
   return __loader_android_get_exported_namespace(name);
 }
 
+__attribute__((__weak__)) void android_set_16kb_appcompat_mode(bool enable_app_compat) {
+  __loader_android_set_16kb_appcompat_mode(enable_app_compat);
+}
+
 } // extern "C"
diff --git a/libdl/libdl_android.map.txt b/libdl/libdl_android.map.txt
index 7afcd9c..efbc841 100644
--- a/libdl/libdl_android.map.txt
+++ b/libdl/libdl_android.map.txt
@@ -24,6 +24,7 @@
     android_init_anonymous_namespace; # apex
     android_link_namespaces; # apex
     android_set_application_target_sdk_version; # apex
+    android_set_16kb_appcompat_mode; #apex
   local:
     *;
 };
diff --git a/libdl/libdl_cfi.cpp b/libdl/libdl_cfi.cpp
index 8adc342..e096f9a 100644
--- a/libdl/libdl_cfi.cpp
+++ b/libdl/libdl_cfi.cpp
@@ -55,8 +55,8 @@
   uintptr_t addr = reinterpret_cast<uintptr_t>(Ptr);
   // The aligned range of [0, kShadowAlign) uses a single shadow element, therefore all pointers in
   // this range must get the same aligned_addr below. This matches CFIShadowWriter::Add; not the
-  // same as align_up().
-  uintptr_t aligned_addr = align_down(addr, CFIShadow::kShadowAlign) + CFIShadow::kShadowAlign;
+  // same as just __builtin_align_up().
+  uintptr_t aligned_addr = __builtin_align_down(addr, CFIShadow::kShadowAlign) + CFIShadow::kShadowAlign;
   uintptr_t p = aligned_addr - (static_cast<uintptr_t>(v - CFIShadow::kRegularShadowMin)
                                 << CFIShadow::kCfiCheckGranularity);
 #ifdef __arm__
diff --git a/linker/Android.bp b/linker/Android.bp
index a06ca29..8300f01 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -108,6 +108,12 @@
 
     // We need to access Bionic private headers in the linker.
     include_dirs: ["bionic/libc"],
+
+    sanitize: {
+        // Supporting memtag_globals in the linker would be tricky,
+        // because it relocates itself very early.
+        memtag_globals: false,
+    },
 }
 
 // ========================================================
@@ -344,11 +350,10 @@
 // linker[_asan][64] binary
 // ========================================================
 
-cc_binary {
-    name: "linker",
+cc_defaults {
+    name: "linker_binary_defaults",
     defaults: [
         "linker_bin_template",
-        "linux_bionic_supported",
         "linker_version_script_overlay",
     ],
 
@@ -370,8 +375,6 @@
 
     compile_multilib: "both",
 
-    recovery_available: true,
-    vendor_ramdisk_available: true,
     apex_available: [
         "//apex_available:platform",
         "com.android.runtime",
@@ -397,6 +400,26 @@
     afdo: true,
 }
 
+cc_binary {
+    name: "linker",
+    defaults: [
+        "linux_bionic_supported",
+        "linker_binary_defaults",
+    ],
+
+    vendor_ramdisk_available: true,
+}
+
+cc_binary {
+    name: "linker.recovery",
+    defaults: [
+        "linker_binary_defaults",
+    ],
+
+    recovery: true,
+    stem: "linker",
+}
+
 // ========================================================
 // assorted modules
 // ========================================================
@@ -494,6 +517,7 @@
         "linker_mapped_file_fragment.cpp",
         "linker_sdk_versions.cpp",
         "linker_dlwarning.cpp",
+        "linker_phdr_16kib_compat.cpp"
     ],
 
     static_libs: [
@@ -535,3 +559,32 @@
         },
     },
 }
+
+cc_fuzz {
+    name: "ElfReader_fuzzer",
+    srcs: [
+        "ElfReader_fuzzer.cpp",
+        "linker.cpp",
+        "linker_block_allocator.cpp",
+        "linker_debug.cpp",
+        "linker_dlwarning.cpp",
+        "linker_globals.cpp",
+        "linker_mapped_file_fragment.cpp",
+        "linker_phdr.cpp",
+        "linker_phdr_16kib_compat.cpp",
+        "linker_sdk_versions.cpp",
+        "linker_utils.cpp",
+        ":elf_note_sources",
+    ],
+    static_libs: [
+        "libasync_safe",
+        "libbase",
+        "libziparchive",
+    ],
+    include_dirs: ["bionic/libc"],
+    // TODO: use all the architectures' files.
+    // We'll either need to give them unique names across architectures,
+    // or change soong to preserve subdirectories in `corpus:`,
+    // and maybe also the [deprecated] LLVM fuzzer infrastructure?
+    corpus: [":bionic_prebuilt_test_elf_files_arm64"],
+}
diff --git a/libc/bionic/strnlen.cpp b/linker/ElfReader_fuzzer.cpp
similarity index 73%
copy from libc/bionic/strnlen.cpp
copy to linker/ElfReader_fuzzer.cpp
index 7101b21..a23132b 100644
--- a/libc/bionic/strnlen.cpp
+++ b/linker/ElfReader_fuzzer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2024 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,9 +26,21 @@
  * SUCH DAMAGE.
  */
 
-#include <string.h>
+#include "linker_phdr.h"
 
-size_t strnlen(const char* s, size_t n) {
-  const char* p = static_cast<const char*>(memchr(s, 0, n));
-  return p ? (p - s) : n;
+#include <stddef.h>
+#include <stdint.h>
+
+#include <android-base/file.h>
+
+// See current fuzz coverage here:
+// https://android-coverage.googleplex.com/fuzz_targets/ElfReader_fuzzer/index.html
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  TemporaryFile tf;
+  android::base::WriteFully(tf.fd, data, size);
+
+  ElfReader er;
+  er.Read(tf.path, tf.fd, 0, size);
+  return 0;
 }
diff --git a/linker/dlfcn.cpp b/linker/dlfcn.cpp
index fee19f4..f811d6d 100644
--- a/linker/dlfcn.cpp
+++ b/linker/dlfcn.cpp
@@ -89,6 +89,7 @@
                       const void* caller_addr) __LINKER_PUBLIC__;
 void __loader_add_thread_local_dtor(void* dso_handle) __LINKER_PUBLIC__;
 void __loader_remove_thread_local_dtor(void* dso_handle) __LINKER_PUBLIC__;
+void __loader_android_set_16kb_appcompat_mode(bool enable_app_compat) __LINKER_PUBLIC__;
 libc_shared_globals* __loader_shared_globals() __LINKER_PUBLIC__;
 #if defined(__arm__)
 _Unwind_Ptr __loader_dl_unwind_find_exidx(_Unwind_Ptr pc, int* pcount) __LINKER_PUBLIC__;
@@ -301,6 +302,11 @@
   decrement_dso_handle_reference_counter(dso_handle);
 }
 
+void __loader_android_set_16kb_appcompat_mode(bool enable_app_compat) {
+  ScopedPthreadMutexLocker locker(&g_dl_mutex);
+  set_16kb_appcompat_mode(enable_app_compat);
+}
+
 libc_shared_globals* __loader_shared_globals() {
   return __libc_shared_globals();
 }
@@ -331,6 +337,7 @@
     __libdl_info->gnu_bloom_filter_ = linker_si.gnu_bloom_filter_;
     __libdl_info->gnu_bucket_ = linker_si.gnu_bucket_;
     __libdl_info->gnu_chain_ = linker_si.gnu_chain_;
+    __libdl_info->memtag_dynamic_entries_ = linker_si.memtag_dynamic_entries_;
 
     __libdl_info->ref_count_ = 1;
     __libdl_info->strtab_size_ = linker_si.strtab_size_;
diff --git a/linker/ld_android.cpp b/linker/ld_android.cpp
index 1c03106..c938a16 100644
--- a/linker/ld_android.cpp
+++ b/linker/ld_android.cpp
@@ -55,6 +55,7 @@
 __strong_alias(__loader_add_thread_local_dtor, __internal_linker_error);
 __strong_alias(__loader_remove_thread_local_dtor, __internal_linker_error);
 __strong_alias(__loader_shared_globals, __internal_linker_error);
+__strong_alias(__loader_android_set_16kb_appcompat_mode, __internal_linker_error);
 #if defined(__arm__)
 __strong_alias(__loader_dl_unwind_find_exidx, __internal_linker_error);
 #endif
diff --git a/linker/linker.arm.map b/linker/linker.arm.map
index b805cd6..edfa249 100644
--- a/linker/linker.arm.map
+++ b/linker/linker.arm.map
@@ -25,6 +25,7 @@
     __loader_shared_globals;
     rtld_db_dlactivity;
     __loader_android_handle_signal;
+    __loader_android_set_16kb_appcompat_mode;
   local:
     *;
 };
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 227b599..8f78915 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -51,6 +51,7 @@
 #include <android-base/scopeguard.h>
 #include <async_safe/log.h>
 #include <bionic/pthread_internal.h>
+#include <platform/bionic/mte.h>
 
 // Private C library headers.
 
@@ -1697,11 +1698,19 @@
     }
   }
 
+  // The WebView loader uses RELRO sharing in order to promote page sharing of the large RELRO
+  // segment, as it's full of C++ vtables. Because MTE globals, by default, applies random tags to
+  // each global variable, the RELRO segment is polluted and unique for each process. In order to
+  // allow sharing, but still provide some protection, we use deterministic global tagging schemes
+  // for DSOs that are loaded through android_dlopen_ext, such as those loaded by WebView.
+  bool dlext_use_relro =
+      extinfo && extinfo->flags & (ANDROID_DLEXT_WRITE_RELRO | ANDROID_DLEXT_USE_RELRO);
+
   // Step 3: pre-link all DT_NEEDED libraries in breadth first order.
   bool any_memtag_stack = false;
   for (auto&& task : load_tasks) {
     soinfo* si = task->get_soinfo();
-    if (!si->is_linked() && !si->prelink_image()) {
+    if (!si->is_linked() && !si->prelink_image(dlext_use_relro)) {
       return false;
     }
     // si->memtag_stack() needs to be called after si->prelink_image() which populates
@@ -2361,7 +2370,7 @@
         void* tls_block = get_tls_block_for_this_thread(tls_module, /*should_alloc=*/true);
         *symbol = static_cast<char*>(tls_block) + sym->st_value;
       } else {
-        *symbol = reinterpret_cast<void*>(found->resolve_symbol_address(sym));
+        *symbol = get_tagged_address(reinterpret_cast<void*>(found->resolve_symbol_address(sym)));
       }
       failure_guard.Disable();
       LD_LOG(kLogDlsym,
@@ -2791,15 +2800,25 @@
   return true;
 }
 
-static void apply_relr_reloc(ElfW(Addr) offset, ElfW(Addr) load_bias) {
-  ElfW(Addr) address = offset + load_bias;
-  *reinterpret_cast<ElfW(Addr)*>(address) += load_bias;
+static void apply_relr_reloc(ElfW(Addr) offset, ElfW(Addr) load_bias, bool has_memtag_globals) {
+  ElfW(Addr) destination = offset + load_bias;
+  if (!has_memtag_globals) {
+    *reinterpret_cast<ElfW(Addr)*>(destination) += load_bias;
+    return;
+  }
+
+  ElfW(Addr)* tagged_destination =
+      reinterpret_cast<ElfW(Addr)*>(get_tagged_address(reinterpret_cast<void*>(destination)));
+  ElfW(Addr) tagged_value = reinterpret_cast<ElfW(Addr)>(
+      get_tagged_address(reinterpret_cast<void*>(*tagged_destination + load_bias)));
+  *tagged_destination = tagged_value;
 }
 
 // Process relocations in SHT_RELR section (experimental).
 // Details of the encoding are described in this post:
 //   https://groups.google.com/d/msg/generic-abi/bX460iggiKg/Pi9aSwwABgAJ
-bool relocate_relr(const ElfW(Relr)* begin, const ElfW(Relr)* end, ElfW(Addr) load_bias) {
+bool relocate_relr(const ElfW(Relr) * begin, const ElfW(Relr) * end, ElfW(Addr) load_bias,
+                   bool has_memtag_globals) {
   constexpr size_t wordsize = sizeof(ElfW(Addr));
 
   ElfW(Addr) base = 0;
@@ -2810,7 +2829,7 @@
     if ((entry&1) == 0) {
       // Even entry: encodes the offset for next relocation.
       offset = static_cast<ElfW(Addr)>(entry);
-      apply_relr_reloc(offset, load_bias);
+      apply_relr_reloc(offset, load_bias, has_memtag_globals);
       // Set base offset for subsequent bitmap entries.
       base = offset + wordsize;
       continue;
@@ -2821,7 +2840,7 @@
     while (entry != 0) {
       entry >>= 1;
       if ((entry&1) != 0) {
-        apply_relr_reloc(offset, load_bias);
+        apply_relr_reloc(offset, load_bias, has_memtag_globals);
       }
       offset += wordsize;
     }
@@ -2836,7 +2855,7 @@
 // An empty list of soinfos
 static soinfo_list_t g_empty_list;
 
-bool soinfo::prelink_image() {
+bool soinfo::prelink_image(bool dlext_use_relro) {
   if (flags_ & FLAG_PRELINKED) return true;
   /* Extract dynamic section */
   ElfW(Word) dynamic_flags = 0;
@@ -3325,6 +3344,18 @@
   // it each time we look up a symbol with a version.
   if (!validate_verdef_section(this)) return false;
 
+  // MTE globals requires remapping data segments with PROT_MTE as anonymous mappings, because file
+  // based mappings may not be backed by tag-capable memory (see "MAP_ANONYMOUS" on
+  // https://www.kernel.org/doc/html/latest/arch/arm64/memory-tagging-extension.html). This is only
+  // done if the binary has MTE globals (evidenced by the dynamic table entries), as it destroys
+  // page sharing. It's also only done on devices that support MTE, because the act of remapping
+  // pages is unnecessary on non-MTE devices (where we might still run MTE-globals enabled code).
+  if (should_tag_memtag_globals() &&
+      remap_memtag_globals_segments(phdr, phnum, base) == 0) {
+    tag_globals(dlext_use_relro);
+    protect_memtag_globals_ro_segments(phdr, phnum, base);
+  }
+
   flags_ |= FLAG_PRELINKED;
   return true;
 }
@@ -3397,6 +3428,13 @@
     return false;
   }
 
+  if (should_tag_memtag_globals()) {
+    std::list<std::string>* vma_names_ptr = vma_names();
+    // should_tag_memtag_globals -> __aarch64__ -> vma_names() != nullptr
+    CHECK(vma_names_ptr);
+    name_memtag_globals_segments(phdr, phnum, base, get_realpath(), vma_names_ptr);
+  }
+
   /* Handle serializing/sharing the RELRO segment */
   if (extinfo && (extinfo->flags & ANDROID_DLEXT_WRITE_RELRO)) {
     if (phdr_table_serialize_gnu_relro(phdr, phnum, load_bias,
@@ -3435,6 +3473,54 @@
   return true;
 }
 
+// https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#global-variable-tagging
+void soinfo::tag_globals(bool dlext_use_relro) {
+  if (is_linked()) return;
+  if (flags_ & FLAG_GLOBALS_TAGGED) return;
+  flags_ |= FLAG_GLOBALS_TAGGED;
+
+  constexpr size_t kTagGranuleSize = 16;
+  const uint8_t* descriptor_stream = reinterpret_cast<const uint8_t*>(memtag_globals());
+
+  if (memtag_globalssz() == 0) {
+    DL_ERR("Invalid memtag descriptor pool size: %zu", memtag_globalssz());
+  }
+
+  uint64_t addr = load_bias;
+  uleb128_decoder decoder(descriptor_stream, memtag_globalssz());
+  // Don't ever generate tag zero, to easily distinguish between tagged and
+  // untagged globals in register/tag dumps.
+  uint64_t last_tag_mask = 1;
+  uint64_t last_tag = 1;
+  constexpr uint64_t kDistanceReservedBits = 3;
+
+  while (decoder.has_bytes()) {
+    uint64_t value = decoder.pop_front();
+    uint64_t distance = (value >> kDistanceReservedBits) * kTagGranuleSize;
+    uint64_t ngranules = value & ((1 << kDistanceReservedBits) - 1);
+    if (ngranules == 0) {
+      ngranules = decoder.pop_front() + 1;
+    }
+
+    addr += distance;
+    void* tagged_addr;
+    if (dlext_use_relro) {
+      tagged_addr = reinterpret_cast<void*>(addr | (last_tag++ << 56));
+      if (last_tag > (1 << kTagGranuleSize)) last_tag = 1;
+    } else {
+      tagged_addr = insert_random_tag(reinterpret_cast<void*>(addr), last_tag_mask);
+      uint64_t tag = (reinterpret_cast<uint64_t>(tagged_addr) >> 56) & 0x0f;
+      last_tag_mask = 1 | (1 << tag);
+    }
+
+    for (size_t k = 0; k < ngranules; k++) {
+      auto* granule = static_cast<uint8_t*>(tagged_addr) + k * kTagGranuleSize;
+      set_memory_tag(static_cast<void*>(granule));
+    }
+    addr += ngranules * kTagGranuleSize;
+  }
+}
+
 static std::vector<android_namespace_t*> init_default_namespace_no_config(bool is_asan, bool is_hwasan) {
   g_default_namespace.set_isolated(false);
   auto default_ld_paths = is_asan ? kAsanDefaultLdPaths : (
diff --git a/linker/linker.generic.map b/linker/linker.generic.map
index 4d7f236..2beae65 100644
--- a/linker/linker.generic.map
+++ b/linker/linker.generic.map
@@ -24,6 +24,7 @@
     __loader_shared_globals;
     rtld_db_dlactivity;
     __loader_android_handle_signal;
+    __loader_android_set_16kb_appcompat_mode;
   local:
     *;
 };
diff --git a/linker/linker.h b/linker/linker.h
index ac2222d..7afa0d7 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -108,6 +108,9 @@
 
 bool get_transparent_hugepages_supported();
 
+void set_16kb_appcompat_mode(bool enable_app_compat);
+bool get_16kb_appcompat_mode();
+
 enum {
   /* A regular namespace is the namespace with a custom search path that does
    * not impose any restrictions on the location of native libraries.
@@ -179,7 +182,8 @@
 int get_application_target_sdk_version();
 ElfW(Versym) find_verdef_version_index(const soinfo* si, const version_info* vi);
 bool validate_verdef_section(const soinfo* si);
-bool relocate_relr(const ElfW(Relr)* begin, const ElfW(Relr)* end, ElfW(Addr) load_bias);
+bool relocate_relr(const ElfW(Relr) * begin, const ElfW(Relr) * end, ElfW(Addr) load_bias,
+                   bool has_memtag_globals);
 
 struct platform_properties {
 #if defined(__aarch64__)
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index 69355d3..f65f82d 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -46,6 +46,7 @@
 #include "linker_tls.h"
 #include "linker_utils.h"
 
+#include "platform/bionic/macros.h"
 #include "private/KernelArgumentBlock.h"
 #include "private/bionic_call_ifunc_resolver.h"
 #include "private/bionic_globals.h"
@@ -367,6 +368,8 @@
   init_link_map_head(*solinker);
 
 #if defined(__aarch64__)
+  __libc_init_mte(somain->memtag_dynamic_entries(), somain->phdr, somain->phnum, somain->load_bias);
+
   if (exe_to_load == nullptr) {
     // Kernel does not add PROT_BTI to executable pages of the loaded ELF.
     // Apply appropriate protections here if it is needed.
@@ -467,7 +470,6 @@
 #if defined(__aarch64__)
   // This has to happen after the find_libraries, which will have collected any possible
   // libraries that request memtag_stack in the dynamic section.
-  __libc_init_mte(somain->memtag_dynamic_entries(), somain->phdr, somain->phnum, somain->load_bias);
   __libc_init_mte_stack(args.argv);
 #endif
 
@@ -627,8 +629,13 @@
     // Apply RELR relocations first so that the GOT is initialized for ifunc
     // resolvers.
     if (relr && relrsz) {
+      // Nothing has tagged the memtag globals here, so it is pointless either
+      // way to handle them, the tags will be zero anyway.
+      // That is moot though, because the linker does not use memtag_globals
+      // in the first place.
       relocate_relr(reinterpret_cast<ElfW(Relr*)>(ehdr + relr),
-                    reinterpret_cast<ElfW(Relr*)>(ehdr + relr + relrsz), ehdr);
+                    reinterpret_cast<ElfW(Relr*)>(ehdr + relr + relrsz), ehdr,
+                    /*has_memtag_globals=*/ false);
     }
     if (pltrel && pltrelsz) {
       call_ifunc_resolvers_for_section(reinterpret_cast<RelType*>(ehdr + pltrel),
@@ -648,6 +655,16 @@
   }
 }
 
+// Remapping MTE globals segments happens before the linker relocates itself, and so can't use
+// memcpy() from string.h. This function is compiled with -ffreestanding.
+void linker_memcpy(void* dst, const void* src, size_t n) {
+  char* dst_bytes = reinterpret_cast<char*>(dst);
+  const char* src_bytes = reinterpret_cast<const char*>(src);
+  for (size_t i = 0; i < n; ++i) {
+    dst_bytes[i] = src_bytes[i];
+  }
+}
+
 // Detect an attempt to run the linker on itself. e.g.:
 //   /system/bin/linker64 /system/bin/linker64
 // Use priority-1 to run this constructor before other constructors.
diff --git a/linker/linker_main.h b/linker/linker_main.h
index 724f43c..ffbcf0f 100644
--- a/linker/linker_main.h
+++ b/linker/linker_main.h
@@ -70,3 +70,5 @@
 soinfo* solist_get_head();
 soinfo* solist_get_somain();
 soinfo* solist_get_vdso();
+
+void linker_memcpy(void* dst, const void* src, size_t n);
diff --git a/linker/linker_note_gnu_property.cpp b/linker/linker_note_gnu_property.cpp
index 082a604..d221b8d 100644
--- a/linker/linker_note_gnu_property.cpp
+++ b/linker/linker_note_gnu_property.cpp
@@ -137,7 +137,7 @@
     // Loop on program property array.
     const ElfW(Prop)* property = reinterpret_cast<const ElfW(Prop)*>(&note_nhdr->n_desc[offset]);
     const ElfW(Word) property_size =
-        align_up(sizeof(ElfW(Prop)) + property->pr_datasz, sizeof(ElfW(Addr)));
+        __builtin_align_up(sizeof(ElfW(Prop)) + property->pr_datasz, sizeof(ElfW(Addr)));
     if ((note_nhdr->nhdr.n_descsz - offset) < property_size) {
       DL_ERR_AND_LOG(
           "\"%s\" .note.gnu.property: property descriptor size is "
diff --git a/linker/linker_note_gnu_property_test.cpp b/linker/linker_note_gnu_property_test.cpp
index 960118c..2a5eddc 100644
--- a/linker/linker_note_gnu_property_test.cpp
+++ b/linker/linker_note_gnu_property_test.cpp
@@ -107,7 +107,7 @@
   template <typename T>
   bool push(ElfW(Word) pr_type, ElfW(Word) pr_datasz, const T* pr_data) {
     // Must be aligned.
-    const uintptr_t addition = align_up(pr_datasz, sizeof(ElfW(Addr)));
+    const uintptr_t addition = __builtin_align_up(pr_datasz, sizeof(ElfW(Addr)));
     if ((offset() + addition) > kMaxSectionSize) {
       return false;
     }
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index 7691031..1e36f5e 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -37,9 +37,12 @@
 #include <unistd.h>
 
 #include "linker.h"
+#include "linker_debug.h"
 #include "linker_dlwarning.h"
 #include "linker_globals.h"
-#include "linker_debug.h"
+#include "linker_logger.h"
+#include "linker_main.h"
+#include "linker_soinfo.h"
 #include "linker_utils.h"
 
 #include "private/bionic_asm_note.h"
@@ -156,8 +159,8 @@
 ElfReader::ElfReader()
     : did_read_(false), did_load_(false), fd_(-1), file_offset_(0), file_size_(0), phdr_num_(0),
       phdr_table_(nullptr), shdr_table_(nullptr), shdr_num_(0), dynamic_(nullptr), strtab_(nullptr),
-      strtab_size_(0), load_start_(nullptr), load_size_(0), load_bias_(0), loaded_phdr_(nullptr),
-      mapped_by_caller_(false) {
+      strtab_size_(0), load_start_(nullptr), load_size_(0), load_bias_(0), max_align_(0), min_align_(0),
+      loaded_phdr_(nullptr), mapped_by_caller_(false) {
 }
 
 bool ElfReader::Read(const char* name, int fd, off64_t file_offset, off64_t file_size) {
@@ -172,18 +175,20 @@
   if (ReadElfHeader() &&
       VerifyElfHeader() &&
       ReadProgramHeaders() &&
+      CheckProgramHeaderAlignment() &&
       ReadSectionHeaders() &&
       ReadDynamicSection() &&
       ReadPadSegmentNote()) {
     did_read_ = true;
   }
 
-  if (kPageSize == 0x4000 && phdr_table_get_minimum_alignment(phdr_table_, phdr_num_) == 0x1000) {
+  if (kPageSize == 16*1024 && min_align_ == 4096) {
     // This prop needs to be read on 16KiB devices for each ELF where min_palign is 4KiB.
     // It cannot be cached since the developer may toggle app compat on/off.
     // This check will be removed once app compat is made the default on 16KiB devices.
     should_use_16kib_app_compat_ =
-        ::android::base::GetBoolProperty("bionic.linker.16kb.app_compat.enabled", false);
+        ::android::base::GetBoolProperty("bionic.linker.16kb.app_compat.enabled", false) ||
+        get_16kb_appcompat_mode();
   }
 
   return did_read_;
@@ -558,52 +563,26 @@
   return max_vaddr - min_vaddr;
 }
 
-// Returns the maximum p_align associated with a loadable segment in the ELF
-// program header table. Used to determine whether the file should be loaded at
-// a specific virtual address alignment for use with huge pages.
-size_t phdr_table_get_maximum_alignment(const ElfW(Phdr)* phdr_table, size_t phdr_count) {
-  size_t maximum_alignment = page_size();
+bool ElfReader::CheckProgramHeaderAlignment() {
+  max_align_ = min_align_ = page_size();
 
-  for (size_t i = 0; i < phdr_count; ++i) {
-    const ElfW(Phdr)* phdr = &phdr_table[i];
+  for (size_t i = 0; i < phdr_num_; ++i) {
+    const ElfW(Phdr)* phdr = &phdr_table_[i];
 
     // p_align must be 0, 1, or a positive, integral power of two.
     if (phdr->p_type != PT_LOAD || ((phdr->p_align & (phdr->p_align - 1)) != 0)) {
+      // TODO: reject ELF files with bad p_align values.
       continue;
     }
 
-    maximum_alignment = std::max(maximum_alignment, static_cast<size_t>(phdr->p_align));
+    max_align_ = std::max(max_align_, static_cast<size_t>(phdr->p_align));
+
+    if (phdr->p_align > 1) {
+      min_align_ = std::min(min_align_, static_cast<size_t>(phdr->p_align));
+    }
   }
 
-#if defined(__LP64__)
-  return maximum_alignment;
-#else
-  return page_size();
-#endif
-}
-
-// Returns the minimum p_align associated with a loadable segment in the ELF
-// program header table. Used to determine if the program alignment is compatible
-// with the page size of this system.
-size_t phdr_table_get_minimum_alignment(const ElfW(Phdr)* phdr_table, size_t phdr_count) {
-  size_t minimum_alignment = page_size();
-
-  for (size_t i = 0; i < phdr_count; ++i) {
-    const ElfW(Phdr)* phdr = &phdr_table[i];
-
-    // p_align must be 0, 1, or a positive, integral power of two.
-    if (phdr->p_type != PT_LOAD || ((phdr->p_align & (phdr->p_align - 1)) != 0)) {
-      continue;
-    }
-
-    if (phdr->p_align <= 1) {
-      continue;
-    }
-
-    minimum_alignment = std::min(minimum_alignment, static_cast<size_t>(phdr->p_align));
-  }
-
-  return minimum_alignment;
+  return true;
 }
 
 // Reserve a virtual address range such that if it's limits were extended to the next 2**align
@@ -624,24 +603,23 @@
   // Minimum alignment of shared library gap. For efficiency, this should match the second level
   // page size of the platform.
 #if defined(__LP64__)
-  constexpr size_t kGapAlignment = 1ul << 21;  // 2MB
-#else
-  constexpr size_t kGapAlignment = 0;
+  constexpr size_t kGapAlignment = 2 * 1024 * 1024;
 #endif
   // Maximum gap size, in the units of kGapAlignment.
   constexpr size_t kMaxGapUnits = 32;
   // Allocate enough space so that the end of the desired region aligned up is still inside the
   // mapping.
-  size_t mmap_size = align_up(size, mapping_align) + mapping_align - page_size();
+  size_t mmap_size = __builtin_align_up(size, mapping_align) + mapping_align - page_size();
   uint8_t* mmap_ptr =
       reinterpret_cast<uint8_t*>(mmap(nullptr, mmap_size, PROT_NONE, mmap_flags, -1, 0));
   if (mmap_ptr == MAP_FAILED) {
     return nullptr;
   }
   size_t gap_size = 0;
-  size_t first_byte = reinterpret_cast<size_t>(align_up(mmap_ptr, mapping_align));
-  size_t last_byte = reinterpret_cast<size_t>(align_down(mmap_ptr + mmap_size, mapping_align) - 1);
-  if (kGapAlignment && first_byte / kGapAlignment != last_byte / kGapAlignment) {
+  size_t first_byte = reinterpret_cast<size_t>(__builtin_align_up(mmap_ptr, mapping_align));
+  size_t last_byte = reinterpret_cast<size_t>(__builtin_align_down(mmap_ptr + mmap_size, mapping_align) - 1);
+#if defined(__LP64__)
+  if (first_byte / kGapAlignment != last_byte / kGapAlignment) {
     // This library crosses a 2MB boundary and will fragment a new huge page.
     // Lets take advantage of that and insert a random number of inaccessible huge pages before that
     // to improve address randomization and make it harder to locate this library code by probing.
@@ -649,23 +627,24 @@
     mapping_align = std::max(mapping_align, kGapAlignment);
     gap_size =
         kGapAlignment * (is_first_stage_init() ? 1 : arc4random_uniform(kMaxGapUnits - 1) + 1);
-    mmap_size = align_up(size + gap_size, mapping_align) + mapping_align - page_size();
+    mmap_size = __builtin_align_up(size + gap_size, mapping_align) + mapping_align - page_size();
     mmap_ptr = reinterpret_cast<uint8_t*>(mmap(nullptr, mmap_size, PROT_NONE, mmap_flags, -1, 0));
     if (mmap_ptr == MAP_FAILED) {
       return nullptr;
     }
   }
+#endif
 
-  uint8_t *gap_end, *gap_start;
+  uint8_t* gap_end = mmap_ptr + mmap_size;
+#if defined(__LP64__)
   if (gap_size) {
-    gap_end = align_down(mmap_ptr + mmap_size, kGapAlignment);
-    gap_start = gap_end - gap_size;
-  } else {
-    gap_start = gap_end = mmap_ptr + mmap_size;
+    gap_end = __builtin_align_down(gap_end, kGapAlignment);
   }
+#endif
+  uint8_t* gap_start = gap_end - gap_size;
 
-  uint8_t* first = align_up(mmap_ptr, mapping_align);
-  uint8_t* last = align_down(gap_start, mapping_align) - size;
+  uint8_t* first = __builtin_align_up(mmap_ptr, mapping_align);
+  uint8_t* last = __builtin_align_down(gap_start, mapping_align) - size;
 
   // arc4random* is not available in first stage init because /dev/urandom hasn't yet been
   // created. Don't randomize then.
@@ -713,10 +692,9 @@
     }
     size_t start_alignment = page_size();
     if (get_transparent_hugepages_supported() && get_application_target_sdk_version() >= 31) {
-      size_t maximum_alignment = phdr_table_get_maximum_alignment(phdr_table_, phdr_num_);
       // Limit alignment to PMD size as other alignments reduce the number of
       // bits available for ASLR for no benefit.
-      start_alignment = maximum_alignment == kPmdSize ? kPmdSize : page_size();
+      start_alignment = max_align_ == kPmdSize ? kPmdSize : page_size();
     }
     start = ReserveWithAlignmentPadding(load_size_, kLibraryAlignment, start_alignment, &gap_start_,
                                         &gap_size_);
@@ -748,9 +726,10 @@
 }
 
 /*
- * Returns true if the kernel supports page size migration, else false.
+ * Returns true if the kernel supports page size migration for this process.
  */
 bool page_size_migration_supported() {
+#if defined(__LP64__)
   static bool pgsize_migration_enabled = []() {
     std::string enabled;
     if (!android::base::ReadFileToString("/sys/kernel/mm/pgsize_migration/enabled", &enabled)) {
@@ -759,6 +738,9 @@
     return enabled.find("1") != std::string::npos;
   }();
   return pgsize_migration_enabled;
+#else
+  return false;
+#endif
 }
 
 // Find the ELF note of type NT_ANDROID_TYPE_PAD_SEGMENT and check that the desc value is 1.
@@ -1003,13 +985,12 @@
   // are not 16KiB aligned.
   size_t seg_align = should_use_16kib_app_compat_ ? kCompatPageSize : kPageSize;
 
-  size_t min_palign = phdr_table_get_minimum_alignment(phdr_table_, phdr_num_);
   // Only enforce this on 16 KB systems with app compat disabled.
   // Apps may rely on undefined behavior here on 4 KB systems,
   // which is the norm before this change is introduced
-  if (kPageSize >= 16384 && min_palign < kPageSize && !should_use_16kib_app_compat_) {
+  if (kPageSize >= 16384 && min_align_ < kPageSize && !should_use_16kib_app_compat_) {
     DL_ERR("\"%s\" program alignment (%zu) cannot be smaller than system page size (%zu)",
-           name_.c_str(), min_palign, kPageSize);
+           name_.c_str(), min_align_, kPageSize);
     return false;
   }
 
@@ -1034,7 +1015,7 @@
     ElfW(Addr) seg_start = phdr->p_vaddr + load_bias_;
     ElfW(Addr) seg_end = seg_start + p_memsz;
 
-    ElfW(Addr) seg_page_end = align_up(seg_end, seg_align);
+    ElfW(Addr) seg_page_end = __builtin_align_up(seg_end, seg_align);
 
     ElfW(Addr) seg_file_end = seg_start + p_filesz;
 
@@ -1042,7 +1023,7 @@
     ElfW(Addr) file_start = phdr->p_offset;
     ElfW(Addr) file_end = file_start + p_filesz;
 
-    ElfW(Addr) file_page_start = align_down(file_start, seg_align);
+    ElfW(Addr) file_page_start = __builtin_align_down(file_start, seg_align);
     ElfW(Addr) file_length = file_end - file_page_start;
 
     if (file_size_ <= 0) {
@@ -1172,6 +1153,125 @@
                                    should_use_16kib_app_compat);
 }
 
+static bool segment_needs_memtag_globals_remapping(const ElfW(Phdr) * phdr) {
+  // For now, MTE globals is only supported on writeable data segments.
+  return phdr->p_type == PT_LOAD && !(phdr->p_flags & PF_X) && (phdr->p_flags & PF_W);
+}
+
+/* When MTE globals are requested by the binary, and when the hardware supports
+ * it, remap the executable's PT_LOAD data pages to have PROT_MTE.
+ *
+ * Returns 0 on success, -1 on failure (error code in errno).
+ */
+int remap_memtag_globals_segments(const ElfW(Phdr) * phdr_table __unused,
+                                  size_t phdr_count __unused, ElfW(Addr) load_bias __unused) {
+#if defined(__aarch64__)
+  for (const ElfW(Phdr)* phdr = phdr_table; phdr < phdr_table + phdr_count; phdr++) {
+    if (!segment_needs_memtag_globals_remapping(phdr)) {
+      continue;
+    }
+
+    uintptr_t seg_page_start = page_start(phdr->p_vaddr) + load_bias;
+    uintptr_t seg_page_end = page_end(phdr->p_vaddr + phdr->p_memsz) + load_bias;
+    size_t seg_page_aligned_size = seg_page_end - seg_page_start;
+
+    int prot = PFLAGS_TO_PROT(phdr->p_flags);
+    // For anonymous private mappings, it may be possible to simply mprotect()
+    // the PROT_MTE flag over the top. For file-based mappings, this will fail,
+    // and we'll need to fall back. We also allow PROT_WRITE here to allow
+    // writing memory tags (in `soinfo::tag_globals()`), and set these sections
+    // back to read-only after tags are applied (similar to RELRO).
+    prot |= PROT_MTE;
+    if (mprotect(reinterpret_cast<void*>(seg_page_start), seg_page_aligned_size,
+                 prot | PROT_WRITE) == 0) {
+      continue;
+    }
+
+    void* mapping_copy = mmap(nullptr, seg_page_aligned_size, PROT_READ | PROT_WRITE,
+                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    linker_memcpy(mapping_copy, reinterpret_cast<void*>(seg_page_start), seg_page_aligned_size);
+
+    void* seg_addr = mmap(reinterpret_cast<void*>(seg_page_start), seg_page_aligned_size,
+                          prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (seg_addr == MAP_FAILED) return -1;
+
+    linker_memcpy(seg_addr, mapping_copy, seg_page_aligned_size);
+    munmap(mapping_copy, seg_page_aligned_size);
+  }
+#endif  // defined(__aarch64__)
+  return 0;
+}
+
+void protect_memtag_globals_ro_segments(const ElfW(Phdr) * phdr_table __unused,
+                                        size_t phdr_count __unused, ElfW(Addr) load_bias __unused) {
+#if defined(__aarch64__)
+  for (const ElfW(Phdr)* phdr = phdr_table; phdr < phdr_table + phdr_count; phdr++) {
+    int prot = PFLAGS_TO_PROT(phdr->p_flags);
+    if (!segment_needs_memtag_globals_remapping(phdr) || (prot & PROT_WRITE)) {
+      continue;
+    }
+
+    prot |= PROT_MTE;
+
+    uintptr_t seg_page_start = page_start(phdr->p_vaddr) + load_bias;
+    uintptr_t seg_page_end = page_end(phdr->p_vaddr + phdr->p_memsz) + load_bias;
+    size_t seg_page_aligned_size = seg_page_end - seg_page_start;
+    mprotect(reinterpret_cast<void*>(seg_page_start), seg_page_aligned_size, prot);
+  }
+#endif  // defined(__aarch64__)
+}
+
+void name_memtag_globals_segments(const ElfW(Phdr) * phdr_table, size_t phdr_count,
+                                  ElfW(Addr) load_bias, const char* soname,
+                                  std::list<std::string>* vma_names) {
+  for (const ElfW(Phdr)* phdr = phdr_table; phdr < phdr_table + phdr_count; phdr++) {
+    if (!segment_needs_memtag_globals_remapping(phdr)) {
+      continue;
+    }
+
+    uintptr_t seg_page_start = page_start(phdr->p_vaddr) + load_bias;
+    uintptr_t seg_page_end = page_end(phdr->p_vaddr + phdr->p_memsz) + load_bias;
+    size_t seg_page_aligned_size = seg_page_end - seg_page_start;
+
+    // For file-based mappings that we're now forcing to be anonymous mappings, set the VMA name to
+    // make debugging easier.
+    // Once we are targeting only devices that run kernel 5.10 or newer (and thus include
+    // https://android-review.git.corp.google.com/c/kernel/common/+/1934723 which causes the
+    // VMA_ANON_NAME to be copied into the kernel), we can get rid of the storage here.
+    // For now, that is not the case:
+    // https://source.android.com/docs/core/architecture/kernel/android-common#compatibility-matrix
+    constexpr int kVmaNameLimit = 80;
+    std::string& vma_name = vma_names->emplace_back(kVmaNameLimit, '\0');
+    int full_vma_length =
+        async_safe_format_buffer(vma_name.data(), kVmaNameLimit, "mt:%s+%" PRIxPTR, soname,
+                                 page_start(phdr->p_vaddr)) +
+        /* include the null terminator */ 1;
+    // There's an upper limit of 80 characters, including the null terminator, in the anonymous VMA
+    // name. If we run over that limit, we end up truncating the segment offset and parts of the
+    // DSO's name, starting on the right hand side of the basename. Because the basename is the most
+    // important thing, chop off the soname from the left hand side first.
+    //
+    // Example (with '#' as the null terminator):
+    //   - "mt:/data/nativetest64/bionic-unit-tests/bionic-loader-test-libs/libdlext_test.so+e000#"
+    //     is a `full_vma_length` == 86.
+    //
+    // We need to left-truncate (86 - 80) 6 characters from the soname, plus the
+    // `vma_truncation_prefix`, so 9 characters total.
+    if (full_vma_length > kVmaNameLimit) {
+      const char vma_truncation_prefix[] = "...";
+      int soname_truncated_bytes =
+          full_vma_length - kVmaNameLimit + sizeof(vma_truncation_prefix) - 1;
+      async_safe_format_buffer(vma_name.data(), kVmaNameLimit, "mt:%s%s+%" PRIxPTR,
+                               vma_truncation_prefix, soname + soname_truncated_bytes,
+                               page_start(phdr->p_vaddr));
+    }
+    if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, reinterpret_cast<void*>(seg_page_start),
+              seg_page_aligned_size, vma_name.data()) != 0) {
+      DL_WARN("Failed to rename memtag global segment: %m");
+    }
+  }
+}
+
 /* Change the protection of all loaded segments in memory to writable.
  * This is useful before performing relocations. Once completed, you
  * will have to call phdr_table_protect_segments to restore the original
diff --git a/linker/linker_phdr.h b/linker/linker_phdr.h
index 2f159f3..3b68528 100644
--- a/linker/linker_phdr.h
+++ b/linker/linker_phdr.h
@@ -39,6 +39,8 @@
 #include "linker_mapped_file_fragment.h"
 #include "linker_note_gnu_property.h"
 
+#include <list>
+
 #define MAYBE_MAP_FLAG(x, from, to)  (((x) & (from)) ? (to) : 0)
 #define PFLAGS_TO_PROT(x)            (MAYBE_MAP_FLAG((x), PF_X, PROT_EXEC) | \
                                       MAYBE_MAP_FLAG((x), PF_R, PROT_READ) | \
@@ -74,6 +76,7 @@
   [[nodiscard]] bool ReadElfHeader();
   [[nodiscard]] bool VerifyElfHeader();
   [[nodiscard]] bool ReadProgramHeaders();
+  [[nodiscard]] bool CheckProgramHeaderAlignment();
   [[nodiscard]] bool ReadSectionHeaders();
   [[nodiscard]] bool ReadDynamicSection();
   [[nodiscard]] bool ReadPadSegmentNote();
@@ -128,6 +131,10 @@
   // Load bias.
   ElfW(Addr) load_bias_;
 
+  // Maximum and minimum alignment requirements across all phdrs.
+  size_t max_align_;
+  size_t min_align_;
+
   // Loaded phdr.
   const ElfW(Phdr)* loaded_phdr_;
 
@@ -151,9 +158,6 @@
 size_t phdr_table_get_load_size(const ElfW(Phdr)* phdr_table, size_t phdr_count,
                                 ElfW(Addr)* min_vaddr = nullptr, ElfW(Addr)* max_vaddr = nullptr);
 
-size_t phdr_table_get_maximum_alignment(const ElfW(Phdr)* phdr_table, size_t phdr_count);
-size_t phdr_table_get_minimum_alignment(const ElfW(Phdr)* phdr_table, size_t phdr_count);
-
 int phdr_table_protect_segments(const ElfW(Phdr)* phdr_table, size_t phdr_count,
                                 ElfW(Addr) load_bias, bool should_pad_segments,
                                 bool should_use_16kib_app_compat,
@@ -188,3 +192,13 @@
                                             ElfW(Addr) load_bias);
 
 bool page_size_migration_supported();
+
+int remap_memtag_globals_segments(const ElfW(Phdr) * phdr_table, size_t phdr_count,
+                                  ElfW(Addr) load_bias);
+
+void protect_memtag_globals_ro_segments(const ElfW(Phdr) * phdr_table, size_t phdr_count,
+                                        ElfW(Addr) load_bias);
+
+void name_memtag_globals_segments(const ElfW(Phdr) * phdr_table, size_t phdr_count,
+                                  ElfW(Addr) load_bias, const char* soname,
+                                  std::list<std::string>* vma_names);
diff --git a/linker/linker_phdr_16kib_compat.cpp b/linker/linker_phdr_16kib_compat.cpp
index a4d8459..d3783cf 100644
--- a/linker/linker_phdr_16kib_compat.cpp
+++ b/linker/linker_phdr_16kib_compat.cpp
@@ -42,10 +42,20 @@
 
 #include <string>
 
+static bool g_enable_16kb_app_compat;
+
 static inline bool segment_contains_prefix(const ElfW(Phdr)* segment, const ElfW(Phdr)* prefix) {
   return segment && prefix && segment->p_vaddr == prefix->p_vaddr;
 }
 
+void set_16kb_appcompat_mode(bool enable_app_compat) {
+  g_enable_16kb_app_compat = enable_app_compat;
+}
+
+bool get_16kb_appcompat_mode() {
+  return g_enable_16kb_app_compat;
+}
+
 /*
  * Returns true if the ELF contains at most 1 RELRO segment; and populates @relro_phdr
  * with the relro phdr or nullptr if none.
@@ -148,7 +158,7 @@
   }
 
   if (!relro_phdr) {
-    *vaddr = align_down(first_rw->p_vaddr, kCompatPageSize);
+    *vaddr = __builtin_align_down(first_rw->p_vaddr, kCompatPageSize);
     return true;
   }
 
@@ -165,7 +175,7 @@
     return false;
   }
 
-  *vaddr = align_up(end, kCompatPageSize);
+  *vaddr = __builtin_align_up(end, kCompatPageSize);
   return true;
 }
 
@@ -217,11 +227,11 @@
   // will lead to overwriting adjacent segments since the ELF's segment(s)
   // are not 16KiB aligned.
 
-  void* start = reinterpret_cast<void*>(align_down(phdr->p_vaddr + load_bias_, kCompatPageSize));
+  void* start = reinterpret_cast<void*>(__builtin_align_down(phdr->p_vaddr + load_bias_, kCompatPageSize));
 
   // The ELF could be being loaded directly from a zipped APK,
   // the zip offset must be added to find the segment offset.
-  const ElfW(Addr) offset = file_offset_ + align_down(phdr->p_offset, kCompatPageSize);
+  const ElfW(Addr) offset = file_offset_ + __builtin_align_down(phdr->p_offset, kCompatPageSize);
 
   CHECK(should_use_16kib_app_compat_);
 
diff --git a/linker/linker_relocate.cpp b/linker/linker_relocate.cpp
index 0470f87..bbf8359 100644
--- a/linker/linker_relocate.cpp
+++ b/linker/linker_relocate.cpp
@@ -44,6 +44,8 @@
 #include "linker_soinfo.h"
 #include "private/bionic_globals.h"
 
+#include <platform/bionic/mte.h>
+
 static bool is_tls_reloc(ElfW(Word) type) {
   switch (type) {
     case R_GENERIC_TLS_DTPMOD:
@@ -163,7 +165,8 @@
 static bool process_relocation_impl(Relocator& relocator, const rel_t& reloc) {
   constexpr bool IsGeneral = Mode == RelocMode::General;
 
-  void* const rel_target = reinterpret_cast<void*>(reloc.r_offset + relocator.si->load_bias);
+  void* const rel_target = reinterpret_cast<void*>(
+      relocator.si->apply_memtag_if_mte_globals(reloc.r_offset + relocator.si->load_bias));
   const uint32_t r_type = ELFW(R_TYPE)(reloc.r_info);
   const uint32_t r_sym = ELFW(R_SYM)(reloc.r_info);
 
@@ -316,6 +319,7 @@
     // common in non-platform binaries.
     if (r_type == R_GENERIC_ABSOLUTE) {
       count_relocation_if<IsGeneral>(kRelocAbsolute);
+      if (found_in) sym_addr = found_in->apply_memtag_if_mte_globals(sym_addr);
       const ElfW(Addr) result = sym_addr + get_addend_rel();
       LD_DEBUG(reloc && IsGeneral, "RELO ABSOLUTE %16p <- %16p %s",
                rel_target, reinterpret_cast<void*>(result), sym_name);
@@ -326,6 +330,7 @@
       // document (IHI0044F) specifies that R_ARM_GLOB_DAT has an addend, but Bionic isn't adding
       // it.
       count_relocation_if<IsGeneral>(kRelocAbsolute);
+      if (found_in) sym_addr = found_in->apply_memtag_if_mte_globals(sym_addr);
       const ElfW(Addr) result = sym_addr + get_addend_norel();
       LD_DEBUG(reloc && IsGeneral, "RELO GLOB_DAT %16p <- %16p %s",
                rel_target, reinterpret_cast<void*>(result), sym_name);
@@ -335,7 +340,18 @@
       // In practice, r_sym is always zero, but if it weren't, the linker would still look up the
       // referenced symbol (and abort if the symbol isn't found), even though it isn't used.
       count_relocation_if<IsGeneral>(kRelocRelative);
-      const ElfW(Addr) result = relocator.si->load_bias + get_addend_rel();
+      ElfW(Addr) result = relocator.si->load_bias + get_addend_rel();
+      // MTE globals reuses the place bits for additional tag-derivation metadata for
+      // R_AARCH64_RELATIVE relocations, which makes it incompatible with
+      // `-Wl,--apply-dynamic-relocs`. This is enforced by lld, however there's nothing stopping
+      // Android binaries (particularly prebuilts) from building with this linker flag if they're
+      // not built with MTE globals. Thus, don't use the new relocation semantics if this DSO
+      // doesn't have MTE globals.
+      if (relocator.si->should_tag_memtag_globals()) {
+        int64_t* place = static_cast<int64_t*>(rel_target);
+        int64_t offset = *place;
+        result = relocator.si->apply_memtag_if_mte_globals(result + offset) - offset;
+      }
       LD_DEBUG(reloc && IsGeneral, "RELO RELATIVE %16p <- %16p",
                rel_target, reinterpret_cast<void*>(result));
       *static_cast<ElfW(Addr)*>(rel_target) = result;
@@ -600,7 +616,7 @@
     LD_DEBUG(reloc, "[ relocating %s relr ]", get_realpath());
     const ElfW(Relr)* begin = relr_;
     const ElfW(Relr)* end = relr_ + relr_count_;
-    if (!relocate_relr(begin, end, load_bias)) {
+    if (!relocate_relr(begin, end, load_bias, should_tag_memtag_globals())) {
       return false;
     }
   }
diff --git a/linker/linker_sleb128.h b/linker/linker_sleb128.h
index 6bb3199..f48fda8 100644
--- a/linker/linker_sleb128.h
+++ b/linker/linker_sleb128.h
@@ -69,3 +69,32 @@
   const uint8_t* current_;
   const uint8_t* const end_;
 };
+
+class uleb128_decoder {
+ public:
+  uleb128_decoder(const uint8_t* buffer, size_t count) : current_(buffer), end_(buffer + count) {}
+
+  uint64_t pop_front() {
+    uint64_t value = 0;
+
+    size_t shift = 0;
+    uint8_t byte;
+
+    do {
+      if (current_ >= end_) {
+        async_safe_fatal("uleb128_decoder ran out of bounds");
+      }
+      byte = *current_++;
+      value |= (static_cast<size_t>(byte & 127) << shift);
+      shift += 7;
+    } while (byte & 128);
+
+    return value;
+  }
+
+  bool has_bytes() { return current_ < end_; }
+
+ private:
+  const uint8_t* current_;
+  const uint8_t* const end_;
+};
diff --git a/linker/linker_soinfo.cpp b/linker/linker_soinfo.cpp
index 0549d36..176c133 100644
--- a/linker/linker_soinfo.cpp
+++ b/linker/linker_soinfo.cpp
@@ -44,6 +44,8 @@
 #include "linker_logger.h"
 #include "linker_relocate.h"
 #include "linker_utils.h"
+#include "platform/bionic/mte.h"
+#include "private/bionic_globals.h"
 
 SymbolLookupList::SymbolLookupList(soinfo* si)
     : sole_lib_(si->get_lookup_lib()), begin_(&sole_lib_), end_(&sole_lib_ + 1) {
@@ -304,6 +306,12 @@
   return is_gnu_hash() ? gnu_lookup(symbol_name, vi) : elf_lookup(symbol_name, vi);
 }
 
+ElfW(Addr) soinfo::apply_memtag_if_mte_globals(ElfW(Addr) sym_addr) const {
+  if (!should_tag_memtag_globals()) return sym_addr;
+  if (sym_addr == 0) return sym_addr;  // Handle undefined weak symbols.
+  return reinterpret_cast<ElfW(Addr)>(get_tagged_address(reinterpret_cast<void*>(sym_addr)));
+}
+
 const ElfW(Sym)* soinfo::gnu_lookup(SymbolName& symbol_name, const version_info* vi) const {
   const uint32_t hash = symbol_name.gnu_hash();
 
diff --git a/linker/linker_soinfo.h b/linker/linker_soinfo.h
index 9bec2aa..4d02676 100644
--- a/linker/linker_soinfo.h
+++ b/linker/linker_soinfo.h
@@ -30,6 +30,7 @@
 
 #include <link.h>
 
+#include <list>
 #include <memory>
 #include <string>
 #include <vector>
@@ -66,6 +67,7 @@
                                          // soinfo is executed and this flag is
                                          // unset.
 #define FLAG_PRELINKED        0x00000400 // prelink_image has successfully processed this soinfo
+#define FLAG_GLOBALS_TAGGED   0x00000800 // globals have been tagged by MTE.
 #define FLAG_NEW_SOINFO       0x40000000 // new soinfo format
 
 #define SOINFO_VERSION 6
@@ -252,11 +254,14 @@
   void call_constructors();
   void call_destructors();
   void call_pre_init_constructors();
-  bool prelink_image();
+  bool prelink_image(bool deterministic_memtag_globals = false);
   bool link_image(const SymbolLookupList& lookup_list, soinfo* local_group_root,
                   const android_dlextinfo* extinfo, size_t* relro_fd_offset);
   bool protect_relro();
 
+  void tag_globals(bool deterministic_memtag_globals);
+  ElfW(Addr) apply_memtag_if_mte_globals(ElfW(Addr) sym_addr) const;
+
   void add_child(soinfo* child);
   void remove_all_links();
 
@@ -295,7 +300,7 @@
 #else
     // If you make this return non-true in the case where
     // __work_around_b_24465209__ is not defined, you will have to change
-    // memtag_dynamic_entries.
+    // memtag_dynamic_entries() and vma_names().
     return true;
 #endif
   }
@@ -394,6 +399,18 @@
    should_pad_segments_ = should_pad_segments;
   }
   bool should_pad_segments() const { return should_pad_segments_; }
+  bool should_tag_memtag_globals() const {
+    return !is_linker() && memtag_globals() && memtag_globalssz() > 0 && __libc_mte_enabled();
+  }
+  std::list<std::string>* vma_names() {
+#ifdef __aarch64__
+#ifdef __work_around_b_24465209__
+#error "Assuming aarch64 does not use versioned soinfo."
+#endif
+    return &vma_names_;
+#endif
+    return nullptr;
+};
 
   void set_should_use_16kib_app_compat(bool should_use_16kib_app_compat) {
     should_use_16kib_app_compat_ = should_use_16kib_app_compat;
@@ -489,6 +506,7 @@
 
   // __aarch64__ only, which does not use versioning.
   memtag_dynamic_entries_t memtag_dynamic_entries_;
+  std::list<std::string> vma_names_;
 
   // Pad gaps between segments when memory mapping?
   bool should_pad_segments_ = false;
diff --git a/tests/Android.bp b/tests/Android.bp
index b537483..f227bbc 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -94,6 +94,13 @@
 // Prebuilt shared libraries for use in tests.
 // -----------------------------------------------------------------------------
 
+filegroup {
+    name: "bionic_prebuilt_test_elf_files_arm64",
+    srcs: [
+        "prebuilt-elf-files/arm64/*.so",
+    ],
+}
+
 cc_prebuilt_test_library_shared {
     name: "libtest_invalid-rw_load_segment",
     strip: {
@@ -389,7 +396,9 @@
         "bug_26110743_test.cpp",
         "byteswap_test.cpp",
         "complex_test.cpp",
-        "cpu_target_features_test.cpp",
+        // Disabled while investigating
+        // b/378304366, b/375525252
+        // "cpu_target_features_test.cpp",
         "ctype_test.cpp",
         "dirent_test.cpp",
         "elf_test.cpp",
@@ -428,6 +437,7 @@
         "malloc_test.cpp",
         "math_test.cpp",
         "membarrier_test.cpp",
+        "memtag_globals_test.cpp",
         "memtag_stack_test.cpp",
         "mntent_test.cpp",
         "mte_test.cpp",
@@ -891,6 +901,11 @@
         "ld_preload_test_helper",
         "ld_preload_test_helper_lib1",
         "ld_preload_test_helper_lib2",
+        "memtag_globals_binary",
+        "memtag_globals_binary_static",
+        "memtag_globals_dso",
+        "mte_globals_relr_regression_test_b_314038442",
+        "mte_globals_relr_regression_test_b_314038442_mte",
         "ns_hidden_child_helper",
         "preinit_getauxval_test_helper",
         "preinit_syscall_test_helper",
@@ -1199,6 +1214,35 @@
 }
 
 cc_test {
+    name: "memtag_stack_abi_test",
+    enabled: false,
+    // This does not use bionic_tests_defaults because it is not supported on
+    // host.
+    arch: {
+        arm64: {
+            enabled: true,
+        },
+    },
+    // We don't use `sanitize:` so we generate the appropriate ELF note, but
+    // still support non-MTE devices.
+    // TODO(fmayer): also add a test that enables stack MTE for MTE devices,
+    // which would test for more bugs.
+    ldflags: ["-fsanitize=memtag-stack"],
+    // Turn off all other sanitizers from SANITIZE_TARGET.
+    sanitize: {
+        never: true,
+    },
+    shared_libs: [
+        "libbase",
+    ],
+    srcs: [
+        "memtag_stack_abi_test.cpp",
+    ],
+    header_libs: ["bionic_libc_platform_headers"],
+    test_suites: ["device-tests"],
+}
+
+cc_test {
     name: "bionic-stress-tests",
     defaults: [
         "bionic_tests_defaults",
@@ -1287,6 +1331,11 @@
         "heap_tagging_static_disabled_helper",
         "heap_tagging_static_sync_helper",
         "heap_tagging_sync_helper",
+        "memtag_globals_binary",
+        "memtag_globals_binary_static",
+        "memtag_globals_dso",
+        "mte_globals_relr_regression_test_b_314038442",
+        "mte_globals_relr_regression_test_b_314038442_mte",
         "stack_tagging_helper",
         "stack_tagging_static_helper",
     ],
diff --git a/tests/android_unsafe_frame_pointer_chase_test.cpp b/tests/android_unsafe_frame_pointer_chase_test.cpp
index 7fa50e1..409cfab 100644
--- a/tests/android_unsafe_frame_pointer_chase_test.cpp
+++ b/tests/android_unsafe_frame_pointer_chase_test.cpp
@@ -124,6 +124,7 @@
   sigaction(SIGRTMIN, &s, nullptr);
 
   raise(SIGRTMIN);
+  sigaltstack(nullptr, nullptr);
   return nullptr;
 }
 
@@ -155,4 +156,25 @@
   munmap(stacks, kStackSize * 2);
 }
 
+static void* SigaltstackOnCallerStack(void*) {
+  char altstack[kStackSize];
+  SignalBacktraceThread(altstack);
+  EXPECT_TRUE(g_handler_called);
+  EXPECT_EQ(nullptr, g_handler_tester_result);
+  g_handler_called = false;
+  return nullptr;
+}
+
+TEST(android_unsafe_frame_pointer_chase, sigaltstack_on_main_thread) {
+  SigaltstackOnCallerStack(nullptr);
+}
+
+TEST(android_unsafe_frame_pointer_chase, sigaltstack_on_pthread) {
+  pthread_t t;
+  ASSERT_EQ(0, pthread_create(&t, nullptr, SigaltstackOnCallerStack, nullptr));
+  void* retval;
+  ASSERT_EQ(0, pthread_join(t, &retval));
+  EXPECT_EQ(nullptr, retval);
+}
+
 #endif // __BIONIC__
diff --git a/tests/dlext_private.h b/tests/dlext_private_tests.h
similarity index 100%
rename from tests/dlext_private.h
rename to tests/dlext_private_tests.h
diff --git a/tests/dlext_test.cpp b/tests/dlext_test.cpp
index 570da2a..b8826c1 100644
--- a/tests/dlext_test.cpp
+++ b/tests/dlext_test.cpp
@@ -21,6 +21,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
+#include <link.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
@@ -40,11 +41,13 @@
 #include <procinfo/process_map.h>
 #include <ziparchive/zip_archive.h>
 
+#include "bionic/mte.h"
+#include "bionic/page.h"
 #include "core_shared_libs.h"
+#include "dlext_private_tests.h"
+#include "dlfcn_symlink_support.h"
 #include "gtest_globals.h"
 #include "utils.h"
-#include "dlext_private.h"
-#include "dlfcn_symlink_support.h"
 
 #define ASSERT_DL_NOTNULL(ptr) \
     ASSERT_TRUE((ptr) != nullptr) << "dlerror: " << dlerror()
@@ -1958,6 +1961,14 @@
   dlclose(ns_a_handle3);
 }
 
+static inline int MapPflagsToProtFlags(uint32_t flags) {
+  int prot_flags = 0;
+  if (PF_X & flags) prot_flags |= PROT_EXEC;
+  if (PF_W & flags) prot_flags |= PROT_WRITE;
+  if (PF_R & flags) prot_flags |= PROT_READ;
+  return prot_flags;
+}
+
 TEST(dlext, ns_anonymous) {
   static const char* root_lib = "libnstest_root.so";
   std::string shared_libs = g_core_shared_libs + ":" + g_public_lib;
@@ -1999,30 +2010,45 @@
   typedef const char* (*fn_t)();
   fn_t ns_get_dlopened_string_private = reinterpret_cast<fn_t>(ns_get_dlopened_string_addr);
 
-  std::vector<map_record> maps;
-  Maps::parse_maps(&maps);
-
+  Dl_info private_library_info;
+  ASSERT_NE(dladdr(reinterpret_cast<void*>(ns_get_dlopened_string_addr), &private_library_info), 0)
+      << dlerror();
+  std::vector<map_record> maps_to_copy;
+  bool has_executable_segment = false;
   uintptr_t addr_start = 0;
   uintptr_t addr_end = 0;
-  bool has_executable_segment = false;
-  std::vector<map_record> maps_to_copy;
+  std::tuple dl_iterate_arg = {&private_library_info, &maps_to_copy, &has_executable_segment,
+                               &addr_start, &addr_end};
+  ASSERT_EQ(
+      1, dl_iterate_phdr(
+             [](dl_phdr_info* info, size_t /*size*/, void* data) -> int {
+               auto [private_library_info, maps_to_copy, has_executable_segment, addr_start,
+                     addr_end] = *reinterpret_cast<decltype(dl_iterate_arg)*>(data);
+               if (info->dlpi_addr != reinterpret_cast<ElfW(Addr)>(private_library_info->dli_fbase))
+                 return 0;
 
-  for (const auto& rec : maps) {
-    if (rec.pathname == private_library_absolute_path) {
-      if (addr_start == 0) {
-        addr_start = rec.addr_start;
-      }
-      addr_end = rec.addr_end;
-      has_executable_segment = has_executable_segment || (rec.perms & PROT_EXEC) != 0;
-
-      maps_to_copy.push_back(rec);
-    }
-  }
+               for (size_t i = 0; i < info->dlpi_phnum; ++i) {
+                 const ElfW(Phdr)* phdr = info->dlpi_phdr + i;
+                 if (phdr->p_type != PT_LOAD) continue;
+                 *has_executable_segment |= phdr->p_flags & PF_X;
+                 uintptr_t mapping_start = page_start(info->dlpi_addr + phdr->p_vaddr);
+                 uintptr_t mapping_end = page_end(info->dlpi_addr + phdr->p_vaddr + phdr->p_memsz);
+                 if (*addr_start == 0 || mapping_start < *addr_start) *addr_start = mapping_start;
+                 if (*addr_end == 0 || mapping_end > *addr_end) *addr_end = mapping_end;
+                 maps_to_copy->push_back({
+                     .addr_start = mapping_start,
+                     .addr_end = mapping_end,
+                     .perms = MapPflagsToProtFlags(phdr->p_flags),
+                 });
+               }
+               return 1;
+             },
+             &dl_iterate_arg));
 
   // Some validity checks.
+  ASSERT_NE(maps_to_copy.size(), 0u);
   ASSERT_TRUE(addr_start > 0);
   ASSERT_TRUE(addr_end > 0);
-  ASSERT_TRUE(maps_to_copy.size() > 0);
   ASSERT_TRUE(ns_get_dlopened_string_addr > addr_start);
   ASSERT_TRUE(ns_get_dlopened_string_addr < addr_end);
 
@@ -2052,19 +2078,26 @@
   ASSERT_EQ(ret, 0) << "Failed to stat library";
   size_t file_size = file_stat.st_size;
 
-  for (const auto& rec : maps_to_copy) {
-    uintptr_t offset = rec.addr_start - addr_start;
-    size_t size = rec.addr_end - rec.addr_start;
-    void* addr = reinterpret_cast<void*>(reserved_addr + offset);
-    void* map = mmap(addr, size, PROT_READ | PROT_WRITE,
-                     MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
-    ASSERT_TRUE(map != MAP_FAILED);
-    // Attempting the below memcpy from a portion of the map that is off the end of
-    // the backing file will cause the kernel to throw a SIGBUS
-    size_t _size = ::android::procinfo::MappedFileSize(rec.addr_start, rec.addr_end,
-                                                       rec.offset, file_size);
-    memcpy(map, reinterpret_cast<void*>(rec.addr_start), _size);
-    mprotect(map, size, rec.perms);
+  {
+    // Disable MTE while copying the PROT_MTE-protected global variables from
+    // the existing mappings. We don't really care about turning on PROT_MTE for
+    // the new copy of the mappings, as this isn't the behaviour under test and
+    // tags will be ignored. This only applies for MTE-enabled devices.
+    ScopedDisableMTE disable_mte_for_copying_global_variables;
+    for (const auto& rec : maps_to_copy) {
+      uintptr_t offset = rec.addr_start - addr_start;
+      size_t size = rec.addr_end - rec.addr_start;
+      void* addr = reinterpret_cast<void*>(reserved_addr + offset);
+      void* map =
+          mmap(addr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+      ASSERT_TRUE(map != MAP_FAILED);
+      // Attempting the below memcpy from a portion of the map that is off the end of
+      // the backing file will cause the kernel to throw a SIGBUS
+      size_t _size =
+          ::android::procinfo::MappedFileSize(rec.addr_start, rec.addr_end, rec.offset, file_size);
+      memcpy(map, reinterpret_cast<void*>(rec.addr_start), _size);
+      mprotect(map, size, rec.perms);
+    }
   }
 
   // call the function copy
diff --git a/tests/grp_pwd_test.cpp b/tests/grp_pwd_test.cpp
index 7b7e0e5..3f93c8a 100644
--- a/tests/grp_pwd_test.cpp
+++ b/tests/grp_pwd_test.cpp
@@ -480,6 +480,18 @@
       EXPECT_STREQ(getpwuid(AID_CROS_EC)->pw_name, "cros_ec");
     }
   }
+  // AID_MMD (1095) was added in API level 36, but "trunk stable" means
+  // that the 2024Q* builds are tested with the _previous_ release's CTS.
+  if (android::base::GetIntProperty("ro.build.version.sdk", 0) == 35) {
+#if !defined(AID_MMD)
+#define AID_MMD 1095
+#endif
+    ids.erase(AID_MMD);
+    expected_ids.erase(AID_MMD);
+    if (getpwuid(AID_MMD)) {
+      EXPECT_STREQ(getpwuid(AID_MMD)->pw_name, "mmd");
+    }
+  }
 
   EXPECT_EQ(expected_ids, ids) << return_differences();
 }
diff --git a/tests/headers/posix/stdatomic_h.c b/tests/headers/posix/stdatomic_h.c
index 05be859..eecb89a 100644
--- a/tests/headers/posix/stdatomic_h.c
+++ b/tests/headers/posix/stdatomic_h.c
@@ -93,10 +93,7 @@
   atomic_flag f = ATOMIC_FLAG_INIT;
   atomic_int i = ATOMIC_VAR_INIT(123);
 
-  // TODO: remove this #if after the next toolchain update (http://b/374104004).
-#if !defined(__GLIBC__)
   i = kill_dependency(i);
-#endif
 
 #if !defined(atomic_compare_exchange_strong)
 #error atomic_compare_exchange_strong
diff --git a/tests/headers/posix/stdbool_h.c b/tests/headers/posix/stdbool_h.c
index f891a73..830c33c 100644
--- a/tests/headers/posix/stdbool_h.c
+++ b/tests/headers/posix/stdbool_h.c
@@ -31,10 +31,8 @@
 #include "header_checks.h"
 
 static void stdbool_h() {
-#if !defined(bool)
-#error bool
-#endif
-  MACRO_VALUE(true, 1);
-  MACRO_VALUE(false, 0);
+  TYPE(bool);
+  bool t = true;
+  bool f = false;
   MACRO_VALUE(__bool_true_false_are_defined, 1);
 }
diff --git a/tests/libs/Android.bp b/tests/libs/Android.bp
index d13ee60..5b86e78 100644
--- a/tests/libs/Android.bp
+++ b/tests/libs/Android.bp
@@ -1903,3 +1903,89 @@
         " $(location soong_zip) -o $(out).unaligned -L 0 -C $(genDir)/zipdir -D $(genDir)/zipdir &&" +
         " $(location bionic_tests_zipalign) 16384 $(out).unaligned $(out)",
 }
+
+cc_defaults {
+    name: "memtag_globals_defaults",
+    defaults: [
+        "bionic_testlib_defaults",
+        "bionic_targets_only"
+    ],
+    cflags: [
+        "-Wno-array-bounds",
+        "-Wno-unused-variable",
+    ],
+    header_libs: ["bionic_libc_platform_headers"],
+    sanitize: {
+        hwaddress: false,
+        memtag_heap: true,
+        memtag_globals: true,
+        diag: {
+            memtag_heap: true,
+        }
+    },
+}
+
+cc_test_library {
+    name: "memtag_globals_dso",
+    defaults: [ "memtag_globals_defaults" ],
+    srcs: ["memtag_globals_dso.cpp"],
+}
+
+cc_test {
+    name: "memtag_globals_binary",
+    defaults: [ "memtag_globals_defaults" ],
+    srcs: ["memtag_globals_binary.cpp"],
+    shared_libs: [ "memtag_globals_dso" ],
+    // This binary is used in the bionic-unit-tests as a data dependency, and is
+    // in the same folder as memtag_globals_dso. But, the default cc_test rules
+    // make this binary (when just explicitly built and shoved in
+    // /data/nativetest64/) end up in a subfolder called
+    // 'memtag_globals_binary'. When this happens, the explicit build fails to
+    // find the DSO because the default rpath is just ${ORIGIN}, and because we
+    // want this to be usable both from bionic-unit-tests and explicit builds,
+    // let's just not put it in a subdirectory.
+    no_named_install_directory: true,
+}
+
+cc_test {
+    name: "memtag_globals_binary_static",
+    defaults: [ "memtag_globals_defaults" ],
+    srcs: ["memtag_globals_binary.cpp"],
+    static_libs: [ "memtag_globals_dso" ],
+    no_named_install_directory: true,
+    static_executable: true,
+}
+
+// This is a regression test for b/314038442, where binaries built *without* MTE
+// globals would have out-of-bounds RELR relocations, which where then `ldg`'d,
+// which resulted in linker crashes.
+cc_test {
+  name: "mte_globals_relr_regression_test_b_314038442",
+  defaults: [
+        "bionic_testlib_defaults",
+        "bionic_targets_only"
+    ],
+    cflags: [ "-Wno-array-bounds" ],
+    ldflags: [ "-Wl,--pack-dyn-relocs=relr" ],
+    srcs: ["mte_globals_relr_regression_test_b_314038442.cpp"],
+    no_named_install_directory: true,
+    sanitize: {
+        memtag_globals: false,
+    },
+}
+
+// Same test as above, but also for MTE globals, just for the sake of it.
+cc_test {
+  name: "mte_globals_relr_regression_test_b_314038442_mte",
+  defaults: [
+        "bionic_testlib_defaults",
+        "bionic_targets_only"
+    ],
+    cflags: [ "-Wno-array-bounds" ],
+    ldflags: [ "-Wl,--pack-dyn-relocs=relr" ],
+    srcs: ["mte_globals_relr_regression_test_b_314038442.cpp"],
+    no_named_install_directory: true,
+    sanitize: {
+      memtag_globals: true,
+    },
+}
diff --git a/libc/bionic/strnlen.cpp b/tests/libs/memtag_globals.h
similarity index 67%
copy from libc/bionic/strnlen.cpp
copy to tests/libs/memtag_globals.h
index 7101b21..a03abae 100644
--- a/libc/bionic/strnlen.cpp
+++ b/tests/libs/memtag_globals.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2024 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,9 +26,18 @@
  * SUCH DAMAGE.
  */
 
-#include <string.h>
+#include <utility>
+#include <vector>
 
-size_t strnlen(const char* s, size_t n) {
-  const char* p = static_cast<const char*>(memchr(s, 0, n));
-  return p ? (p - s) : n;
-}
+void check_tagged(const void* a);
+void check_untagged(const void* a);
+void check_matching_tags(const void* a, const void* b);
+void check_eq(const void* a, const void* b);
+
+void dso_check_assertions(bool enforce_tagged);
+void dso_print_variables();
+
+void print_variable_address(const char* name, const void* ptr);
+void print_variables(const char* header,
+                     const std::vector<std::pair<const char*, const void*>>& tagged_variables,
+                     const std::vector<std::pair<const char*, const void*>>& untagged_variables);
diff --git a/tests/libs/memtag_globals_binary.cpp b/tests/libs/memtag_globals_binary.cpp
new file mode 100644
index 0000000..9248728
--- /dev/null
+++ b/tests/libs/memtag_globals_binary.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <string>
+#include <vector>
+
+#include "memtag_globals.h"
+
+// Adapted from the LLD test suite: lld/test/ELF/Inputs/aarch64-memtag-globals.s
+
+/// Global variables defined here, of various semantics.
+char global[30] = {};
+__attribute__((no_sanitize("memtag"))) int global_untagged = 0;
+const int const_global = 0;
+static const int hidden_const_global = 0;
+static char hidden_global[12] = {};
+__attribute__((visibility("hidden"))) int hidden_attr_global = 0;
+__attribute__((visibility("hidden"))) const int hidden_attr_const_global = 0;
+
+/// Should be untagged.
+__thread int tls_global;
+__thread static int hidden_tls_global;
+
+/// Tagged, from the other file.
+extern int global_extern;
+/// Untagged, from the other file.
+extern __attribute__((no_sanitize("memtag"))) int global_extern_untagged;
+/// Tagged here, but untagged in the definition found in the sister objfile
+/// (explicitly).
+extern int global_extern_untagged_definition_but_tagged_import;
+
+/// ABS64 relocations. Also, forces symtab entries for local and external
+/// globals.
+char* pointer_to_global = &global[0];
+char* pointer_inside_global = &global[17];
+char* pointer_to_global_end = &global[30];
+char* pointer_past_global_end = &global[48];
+int* pointer_to_global_untagged = &global_untagged;
+const int* pointer_to_const_global = &const_global;
+/// RELATIVE relocations.
+const int* pointer_to_hidden_const_global = &hidden_const_global;
+char* pointer_to_hidden_global = &hidden_global[0];
+int* pointer_to_hidden_attr_global = &hidden_attr_global;
+const int* pointer_to_hidden_attr_const_global = &hidden_attr_const_global;
+/// RELATIVE relocations with special AArch64 MemtagABI semantics, with the
+/// offset ('12' or '16') encoded in the place.
+char* pointer_to_hidden_global_end = &hidden_global[12];
+char* pointer_past_hidden_global_end = &hidden_global[16];
+/// ABS64 relocations.
+int* pointer_to_global_extern = &global_extern;
+int* pointer_to_global_extern_untagged = &global_extern_untagged;
+int* pointer_to_global_extern_untagged_definition_but_tagged_import =
+    &global_extern_untagged_definition_but_tagged_import;
+
+// Force materialization of these globals into the symtab.
+int* get_address_to_tls_global() {
+  return &tls_global;
+}
+int* get_address_to_hidden_tls_global() {
+  return &hidden_tls_global;
+}
+
+static const std::vector<std::pair<const char*, const void*>>& get_expected_tagged_vars() {
+  static std::vector<std::pair<const char*, const void*>> expected_tagged_vars = {
+      {"global", &global},
+      {"pointer_inside_global", pointer_inside_global},
+      {"pointer_to_global_end", pointer_to_global_end},
+      {"pointer_past_global_end", pointer_past_global_end},
+      {"hidden_global", &hidden_global},
+      {"hidden_attr_global", &hidden_attr_global},
+      {"global_extern", &global_extern},
+  };
+  return expected_tagged_vars;
+}
+
+static const std::vector<std::pair<const char*, const void*>>& get_expected_untagged_vars() {
+  static std::vector<std::pair<const char*, const void*>> expected_untagged_vars = {
+      {"global_extern_untagged", &global_extern_untagged},
+      {"global_extern_untagged_definition_but_tagged_import",
+       &global_extern_untagged_definition_but_tagged_import},
+      {"global_untagged", &global_untagged},
+      {"const_global", &const_global},
+      {"hidden_const_global", &hidden_const_global},
+      {"hidden_attr_const_global", &hidden_attr_const_global},
+      {"tls_global", &tls_global},
+      {"hidden_tls_global", &hidden_tls_global},
+  };
+  return expected_untagged_vars;
+}
+
+void exe_print_variables() {
+  print_variables("  Variables accessible from the binary:\n", get_expected_tagged_vars(),
+                  get_expected_untagged_vars());
+}
+
+// Dump the addresses of the global variables to stderr
+void dso_print();
+void dso_print_others();
+
+void exe_check_assertions(bool check_pointers_are_tagged) {
+  // Check that non-const variables are writeable.
+  *pointer_to_global = 0;
+  *pointer_inside_global = 0;
+  *(pointer_to_global_end - 1) = 0;
+  *pointer_to_global_untagged = 0;
+  *pointer_to_hidden_global = 0;
+  *pointer_to_hidden_attr_global = 0;
+  *(pointer_to_hidden_global_end - 1) = 0;
+  *pointer_to_global_extern = 0;
+  *pointer_to_global_extern_untagged = 0;
+  *pointer_to_global_extern_untagged_definition_but_tagged_import = 0;
+
+  if (check_pointers_are_tagged) {
+    for (const auto& [_, pointer] : get_expected_tagged_vars()) {
+      check_tagged(pointer);
+    }
+  }
+
+  for (const auto& [_, pointer] : get_expected_untagged_vars()) {
+    check_untagged(pointer);
+  }
+
+  check_matching_tags(pointer_to_global, pointer_inside_global);
+  check_matching_tags(pointer_to_global, pointer_to_global_end);
+  check_matching_tags(pointer_to_global, pointer_past_global_end);
+  check_eq(pointer_inside_global, pointer_to_global + 17);
+  check_eq(pointer_to_global_end, pointer_to_global + 30);
+  check_eq(pointer_past_global_end, pointer_to_global + 48);
+
+  check_matching_tags(pointer_to_hidden_global, pointer_to_hidden_global_end);
+  check_matching_tags(pointer_to_hidden_global, pointer_past_hidden_global_end);
+  check_eq(pointer_to_hidden_global_end, pointer_to_hidden_global + 12);
+  check_eq(pointer_past_hidden_global_end, pointer_to_hidden_global + 16);
+}
+
+void crash() {
+  *pointer_past_global_end = 0;
+}
+
+int main(int argc, char** argv) {
+  bool check_pointers_are_tagged = false;
+  // For an MTE-capable device, provide argv[1] == '1' to enable the assertions
+  // that pointers should be tagged.
+  if (argc >= 2 && argv[1][0] == '1') {
+    check_pointers_are_tagged = true;
+  }
+
+  char* heap_ptr = static_cast<char*>(malloc(1));
+  print_variable_address("heap address", heap_ptr);
+  *heap_ptr = 0;
+  if (check_pointers_are_tagged) check_tagged(heap_ptr);
+  free(heap_ptr);
+
+  exe_print_variables();
+  dso_print_variables();
+
+  exe_check_assertions(check_pointers_are_tagged);
+  dso_check_assertions(check_pointers_are_tagged);
+
+  printf("Assertions were passed. Now doing a global-buffer-overflow.\n");
+  fflush(stdout);
+  crash();
+  printf("global-buffer-overflow went uncaught.\n");
+  return 0;
+}
diff --git a/tests/libs/memtag_globals_dso.cpp b/tests/libs/memtag_globals_dso.cpp
new file mode 100644
index 0000000..9ed264e
--- /dev/null
+++ b/tests/libs/memtag_globals_dso.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vector>
+
+#include "memtag_globals.h"
+
+// Adapted from the LLD test suite: lld/test/ELF/Inputs/aarch64-memtag-globals.s
+
+int global_extern;
+static int global_extern_hidden;
+__attribute__((no_sanitize("memtag"))) int global_extern_untagged;
+__attribute__((no_sanitize("memtag"))) int global_extern_untagged_definition_but_tagged_import;
+
+void assertion_failure() {
+  exit(1);
+}
+
+void check_tagged(const void* a) {
+  uintptr_t a_uptr = reinterpret_cast<uintptr_t>(a);
+#if defined(__aarch64__)
+  if ((a_uptr >> 56) == 0) {
+    fprintf(stderr, "**********************************\n");
+    fprintf(stderr, "Failed assertion:\n");
+    fprintf(stderr, "  tag(0x%zx) != 0\n", a_uptr);
+    fprintf(stderr, "**********************************\n");
+
+    assertion_failure();
+  }
+#endif  // defined(__aarch64__)
+}
+
+void check_untagged(const void* a) {
+  uintptr_t a_uptr = reinterpret_cast<uintptr_t>(a);
+#if defined(__aarch64__)
+  if ((a_uptr >> 56) != 0) {
+    fprintf(stderr, "**********************************\n");
+    fprintf(stderr, "Failed assertion:\n");
+    fprintf(stderr, "  tag(0x%zx) == 0\n", a_uptr);
+    fprintf(stderr, "**********************************\n");
+
+    assertion_failure();
+  }
+#endif  // defined(__aarch64__)
+}
+
+void check_matching_tags(const void* a, const void* b) {
+  uintptr_t a_uptr = reinterpret_cast<uintptr_t>(a);
+  uintptr_t b_uptr = reinterpret_cast<uintptr_t>(b);
+#if defined(__aarch64__)
+  if (a_uptr >> 56 != b_uptr >> 56) {
+    fprintf(stderr, "**********************************\n");
+    fprintf(stderr, "Failed assertion:\n");
+    fprintf(stderr, "  tag(0x%zx) != tag(0x%zx)\n", a_uptr, b_uptr);
+    fprintf(stderr, "**********************************\n");
+
+    assertion_failure();
+  }
+#endif  // defined(__aarch64__)
+}
+
+void check_eq(const void* a, const void* b) {
+  if (a != b) {
+    fprintf(stderr, "**********************************\n");
+    fprintf(stderr, "Failed assertion:\n");
+    fprintf(stderr, "  %p != %p\n", a, b);
+    fprintf(stderr, "**********************************\n");
+
+    assertion_failure();
+  }
+}
+
+#define LONGEST_VARIABLE_NAME "51"
+void print_variable_address(const char* name, const void* ptr) {
+  printf("%" LONGEST_VARIABLE_NAME "s: %16p\n", name, ptr);
+}
+
+static const std::vector<std::pair<const char*, const void*>>& get_expected_tagged_vars() {
+  static std::vector<std::pair<const char*, const void*>> expected_tagged_vars = {
+      {"global_extern", &global_extern},
+      {"global_extern_hidden", &global_extern_hidden},
+  };
+  return expected_tagged_vars;
+}
+
+static const std::vector<std::pair<const char*, const void*>>& get_expected_untagged_vars() {
+  static std::vector<std::pair<const char*, const void*>> expected_untagged_vars = {
+      {"global_extern_untagged", &global_extern_untagged},
+      {"global_extern_untagged_definition_but_tagged_import",
+       &global_extern_untagged_definition_but_tagged_import},
+  };
+  return expected_untagged_vars;
+}
+
+void dso_print_variables() {
+  print_variables("  Variables declared in the DSO:\n", get_expected_tagged_vars(),
+                  get_expected_untagged_vars());
+}
+
+void print_variables(const char* header,
+                     const std::vector<std::pair<const char*, const void*>>& tagged_variables,
+                     const std::vector<std::pair<const char*, const void*>>& untagged_variables) {
+  printf("==========================================================\n");
+  printf("%s", header);
+  printf("==========================================================\n");
+  printf(" Variables expected to be tagged:\n");
+  printf("----------------------------------------------------------\n");
+  for (const auto& [name, pointer] : tagged_variables) {
+    print_variable_address(name, pointer);
+  }
+
+  printf("\n----------------------------------------------------------\n");
+  printf(" Variables expected to be untagged:\n");
+  printf("----------------------------------------------------------\n");
+  for (const auto& [name, pointer] : untagged_variables) {
+    print_variable_address(name, pointer);
+  }
+  printf("\n");
+}
+
+void dso_check_assertions(bool check_pointers_are_tagged) {
+  // Check that non-const variables are writeable.
+  global_extern = 0;
+  global_extern_hidden = 0;
+  global_extern_untagged = 0;
+  global_extern_untagged_definition_but_tagged_import = 0;
+
+  if (check_pointers_are_tagged) {
+    for (const auto& [_, pointer] : get_expected_tagged_vars()) {
+      check_tagged(pointer);
+    }
+  }
+
+  for (const auto& [_, pointer] : get_expected_untagged_vars()) {
+    check_untagged(pointer);
+  }
+}
diff --git a/libc/bionic/strnlen.cpp b/tests/libs/mte_globals_relr_regression_test_b_314038442.cpp
similarity index 63%
copy from libc/bionic/strnlen.cpp
copy to tests/libs/mte_globals_relr_regression_test_b_314038442.cpp
index 7101b21..20bbba9 100644
--- a/libc/bionic/strnlen.cpp
+++ b/tests/libs/mte_globals_relr_regression_test_b_314038442.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2024 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,9 +26,30 @@
  * SUCH DAMAGE.
  */
 
-#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
 
-size_t strnlen(const char* s, size_t n) {
-  const char* p = static_cast<const char*>(memchr(s, 0, n));
-  return p ? (p - s) : n;
+static volatile char array[0x10000];
+volatile char* volatile oob_ptr = &array[0x111111111];
+
+unsigned char get_tag(__attribute__((unused)) volatile void* ptr) {
+#if defined(__aarch64__)
+  return static_cast<unsigned char>(reinterpret_cast<uintptr_t>(ptr) >> 56) & 0xf;
+#else   // !defined(__aarch64__)
+  return 0;
+#endif  // defined(__aarch64__)
+}
+
+int main() {
+  printf("Program loaded successfully. %p %p. ", array, oob_ptr);
+  if (get_tag(array) != get_tag(oob_ptr)) {
+    printf("Tags are mismatched!\n");
+    return 1;
+  }
+  if (get_tag(array) == 0) {
+    printf("Tags are zero!\n");
+  } else {
+    printf("Tags are non-zero\n");
+  }
+  return 0;
 }
diff --git a/tests/libs/ns_hidden_child_helper.cpp b/tests/libs/ns_hidden_child_helper.cpp
index c2140f1..77608e2 100644
--- a/tests/libs/ns_hidden_child_helper.cpp
+++ b/tests/libs/ns_hidden_child_helper.cpp
@@ -33,7 +33,7 @@
 #include <string>
 
 #include "../core_shared_libs.h"
-#include "../dlext_private.h"
+#include "../dlext_private_tests.h"
 
 extern "C" void global_function();
 extern "C" void internal_function();
diff --git a/tests/malloc_test.cpp b/tests/malloc_test.cpp
index 3f1ba79..8bd8bc6 100644
--- a/tests/malloc_test.cpp
+++ b/tests/malloc_test.cpp
@@ -53,7 +53,7 @@
 #if defined(__BIONIC__)
 
 #include "SignalUtils.h"
-#include "dlext_private.h"
+#include "dlext_private_tests.h"
 
 #include "platform/bionic/malloc.h"
 #include "platform/bionic/mte.h"
@@ -466,6 +466,7 @@
     // Do not verify output for debug malloc.
     ASSERT_TRUE(version == "debug-malloc-1") << "Unknown version: " << version;
   }
+  printf("Allocator version: %s\n", version.c_str());
 #endif
 }
 
diff --git a/tests/memtag_globals_test.cpp b/tests/memtag_globals_test.cpp
new file mode 100644
index 0000000..ff93e7b
--- /dev/null
+++ b/tests/memtag_globals_test.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <gtest/gtest.h>
+
+#if defined(__BIONIC__)
+#include "gtest_globals.h"
+#include "utils.h"
+#endif  // defined(__BIONIC__)
+
+#include <android-base/test_utils.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string>
+#include <tuple>
+
+#include "platform/bionic/mte.h"
+
+class MemtagGlobalsTest : public testing::TestWithParam<bool> {};
+
+TEST_P(MemtagGlobalsTest, test) {
+  SKIP_WITH_HWASAN << "MTE globals tests are incompatible with HWASan";
+#if defined(__BIONIC__) && defined(__aarch64__)
+  std::string binary = GetTestLibRoot() + "/memtag_globals_binary";
+  bool is_static = MemtagGlobalsTest::GetParam();
+  if (is_static) {
+    binary += "_static";
+  }
+
+  chmod(binary.c_str(), 0755);
+  ExecTestHelper eth;
+  eth.SetArgs({binary.c_str(), nullptr});
+  eth.Run(
+      [&]() {
+        execve(binary.c_str(), eth.GetArgs(), eth.GetEnv());
+        GTEST_FAIL() << "Failed to execve: " << strerror(errno) << " " << binary.c_str();
+      },
+      // We catch the global-buffer-overflow and crash only when MTE globals is
+      // supported. Note that MTE globals is unsupported for fully static
+      // executables, but we should still make sure the binary passes its
+      // assertions, just that global variables won't be tagged.
+      (mte_supported() && !is_static) ? -SIGSEGV : 0, "Assertions were passed");
+#else
+  GTEST_SKIP() << "bionic/arm64 only";
+#endif
+}
+
+INSTANTIATE_TEST_SUITE_P(MemtagGlobalsTest, MemtagGlobalsTest, testing::Bool(),
+                         [](const ::testing::TestParamInfo<MemtagGlobalsTest::ParamType>& info) {
+                           if (info.param) return "MemtagGlobalsTest_static";
+                           return "MemtagGlobalsTest";
+                         });
+
+TEST(MemtagGlobalsTest, RelrRegressionTestForb314038442) {
+  SKIP_WITH_HWASAN << "MTE globals tests are incompatible with HWASan";
+#if defined(__BIONIC__) && defined(__aarch64__)
+  std::string binary = GetTestLibRoot() + "/mte_globals_relr_regression_test_b_314038442";
+  chmod(binary.c_str(), 0755);
+  ExecTestHelper eth;
+  eth.SetArgs({binary.c_str(), nullptr});
+  eth.Run(
+      [&]() {
+        execve(binary.c_str(), eth.GetArgs(), eth.GetEnv());
+        GTEST_FAIL() << "Failed to execve: " << strerror(errno) << " " << binary.c_str();
+      },
+      /* exit code */ 0, "Program loaded successfully.*Tags are zero!");
+#else
+  GTEST_SKIP() << "bionic/arm64 only";
+#endif
+}
+
+TEST(MemtagGlobalsTest, RelrRegressionTestForb314038442WithMteGlobals) {
+  if (!mte_supported()) GTEST_SKIP() << "Must have MTE support.";
+#if defined(__BIONIC__) && defined(__aarch64__)
+  std::string binary = GetTestLibRoot() + "/mte_globals_relr_regression_test_b_314038442_mte";
+  chmod(binary.c_str(), 0755);
+  ExecTestHelper eth;
+  eth.SetArgs({binary.c_str(), nullptr});
+  eth.Run(
+      [&]() {
+        execve(binary.c_str(), eth.GetArgs(), eth.GetEnv());
+        GTEST_FAIL() << "Failed to execve: " << strerror(errno) << " " << binary.c_str();
+      },
+      /* exit code */ 0, "Program loaded successfully.*Tags are non-zero");
+#else
+  GTEST_SKIP() << "bionic/arm64 only";
+#endif
+}
diff --git a/tests/memtag_stack_abi_test.cpp b/tests/memtag_stack_abi_test.cpp
new file mode 100644
index 0000000..4725c8d
--- /dev/null
+++ b/tests/memtag_stack_abi_test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <filesystem>
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <thread>
+
+#include <dlfcn.h>
+#include <stdlib.h>
+
+#include <android-base/logging.h>
+#include <gtest/gtest.h>
+
+static size_t NumberBuffers() {
+  size_t bufs = 0;
+  std::ifstream file("/proc/self/maps");
+  CHECK(file.is_open());
+  std::string line;
+  while (std::getline(file, line)) {
+    if (line.find("stack_mte_ring") != std::string::npos) {
+      ++bufs;
+    }
+  }
+  return bufs;
+}
+
+static size_t NumberThreads() {
+  std::filesystem::directory_iterator di("/proc/self/task");
+  return std::distance(begin(di), end(di));
+}
+
+TEST(MemtagStackAbiTest, MainThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}
+
+TEST(MemtagStackAbiTest, JoinableThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+  std::thread th([] {
+    ASSERT_EQ(NumberBuffers(), 2U);
+    ASSERT_EQ(NumberBuffers(), NumberThreads());
+  });
+  th.join();
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}
+
+TEST(MemtagStackAbiTest, DetachedThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+  std::thread th([] {
+    ASSERT_EQ(NumberBuffers(), 2U);
+    ASSERT_EQ(NumberBuffers(), NumberThreads());
+  });
+  th.detach();
+  // Leave the thread some time to exit.
+  for (int i = 0; NumberBuffers() != 1 && i < 3; ++i) {
+    sleep(1);
+  }
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}
diff --git a/tests/page_size_16kib_compat_test.cpp b/tests/page_size_16kib_compat_test.cpp
index 9aecfba..cfd52e2 100644
--- a/tests/page_size_16kib_compat_test.cpp
+++ b/tests/page_size_16kib_compat_test.cpp
@@ -26,10 +26,18 @@
  * SUCH DAMAGE.
  */
 
+#if __has_include (<android/dlext_private.h>)
+#define IS_ANDROID_DL
+#endif
+
 #include "page_size_compat_helpers.h"
 
 #include <android-base/properties.h>
 
+#if defined(IS_ANDROID_DL)
+#include <android/dlext_private.h>
+#endif
+
 TEST(PageSize16KiBCompatTest, ElfAlignment4KiB_LoadElf) {
   if (getpagesize() != 0x4000) {
     GTEST_SKIP() << "This test is only applicable to 16kB page-size devices";
@@ -44,3 +52,23 @@
 
   if (app_compat_enabled) CallTestFunction(handle);
 }
+
+TEST(PageSize16KiBCompatTest, ElfAlignment4KiB_LoadElf_perAppOption) {
+  if (getpagesize() != 0x4000) {
+    GTEST_SKIP() << "This test is only applicable to 16kB page-size devices";
+  }
+
+#if defined(IS_ANDROID_DL)
+  android_set_16kb_appcompat_mode(true);
+#endif
+
+  std::string lib = GetTestLibRoot() + "/libtest_elf_max_page_size_4kib.so";
+  void* handle = nullptr;
+
+  OpenTestLibrary(lib, false /*should_fail*/, &handle);
+  CallTestFunction(handle);
+
+#if defined(IS_ANDROID_DL)
+  android_set_16kb_appcompat_mode(false);
+#endif
+}
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index 2bf755b..5ce7d4d 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -45,6 +45,7 @@
 #include <android-base/test_utils.h>
 
 #include "private/bionic_constants.h"
+#include "private/bionic_time_conversions.h"
 #include "SignalUtils.h"
 #include "utils.h"
 
@@ -2437,23 +2438,25 @@
   ts.tv_sec = -1;
   ASSERT_EQ(ETIMEDOUT, lock_function(&m, &ts));
 
-  // check we wait long enough for the lock.
+  // Check we wait long enough for the lock before timing out...
+
+  // What time is it before we start?
   ASSERT_EQ(0, clock_gettime(clock, &ts));
-  const int64_t start_ns = ts.tv_sec * NS_PER_S + ts.tv_nsec;
-
-  // add a second to get deadline.
+  const int64_t start_ns = to_ns(ts);
+  // Add a second to get deadline, and wait until we time out.
   ts.tv_sec += 1;
-
   ASSERT_EQ(ETIMEDOUT, lock_function(&m, &ts));
 
+  // What time is it now we've timed out?
+  timespec ts2;
+  clock_gettime(clock, &ts2);
+  const int64_t end_ns = to_ns(ts2);
+
   // The timedlock must have waited at least 1 second before returning.
-  clock_gettime(clock, &ts);
-  const int64_t end_ns = ts.tv_sec * NS_PER_S + ts.tv_nsec;
-  ASSERT_GT(end_ns - start_ns, NS_PER_S);
+  ASSERT_GE(end_ns - start_ns, NS_PER_S);
 
   // If the mutex is unlocked, pthread_mutex_timedlock should succeed.
   ASSERT_EQ(0, pthread_mutex_unlock(&m));
-
   ASSERT_EQ(0, clock_gettime(clock, &ts));
   ts.tv_sec += 1;
   ASSERT_EQ(0, lock_function(&m, &ts));
@@ -2474,12 +2477,19 @@
 #endif  // __BIONIC__
 }
 
-TEST(pthread, pthread_mutex_clocklock) {
+TEST(pthread, pthread_mutex_clocklock_MONOTONIC) {
 #if defined(__BIONIC__)
   pthread_mutex_timedlock_helper(
       CLOCK_MONOTONIC, [](pthread_mutex_t* __mutex, const timespec* __timeout) {
         return pthread_mutex_clocklock(__mutex, CLOCK_MONOTONIC, __timeout);
       });
+#else   // __BIONIC__
+  GTEST_SKIP() << "pthread_mutex_clocklock not available";
+#endif  // __BIONIC__
+}
+
+TEST(pthread, pthread_mutex_clocklock_REALTIME) {
+#if defined(__BIONIC__)
   pthread_mutex_timedlock_helper(
       CLOCK_REALTIME, [](pthread_mutex_t* __mutex, const timespec* __timeout) {
         return pthread_mutex_clocklock(__mutex, CLOCK_REALTIME, __timeout);
@@ -3127,3 +3137,39 @@
   GTEST_SKIP() << "bionic-only test";
 #endif
 }
+
+TEST(pthread, pthread_getaffinity_np_failure) {
+  // Trivial test of the errno-preserving/returning behavior.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnonnull"
+  errno = 0;
+  ASSERT_EQ(EINVAL, pthread_getaffinity_np(pthread_self(), 0, nullptr));
+  ASSERT_ERRNO(0);
+#pragma clang diagnostic pop
+}
+
+TEST(pthread, pthread_getaffinity) {
+  cpu_set_t set;
+  CPU_ZERO(&set);
+  ASSERT_EQ(0, pthread_getaffinity_np(pthread_self(), sizeof(set), &set));
+  ASSERT_GT(CPU_COUNT(&set), 0);
+}
+
+TEST(pthread, pthread_setaffinity_np_failure) {
+  // Trivial test of the errno-preserving/returning behavior.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnonnull"
+  errno = 0;
+  ASSERT_EQ(EINVAL, pthread_setaffinity_np(pthread_self(), 0, nullptr));
+  ASSERT_ERRNO(0);
+#pragma clang diagnostic pop
+}
+
+TEST(pthread, pthread_setaffinity) {
+  cpu_set_t set;
+  CPU_ZERO(&set);
+  ASSERT_EQ(0, pthread_getaffinity_np(pthread_self(), sizeof(set), &set));
+  // It's hard to make any more general claim than this,
+  // but it ought to be safe to ask for the same affinity you already have.
+  ASSERT_EQ(0, pthread_setaffinity_np(pthread_self(), sizeof(set), &set));
+}
diff --git a/tests/sched_test.cpp b/tests/sched_test.cpp
index 0231de4..448fae9 100644
--- a/tests/sched_test.cpp
+++ b/tests/sched_test.cpp
@@ -305,8 +305,35 @@
 }
 
 TEST(sched, sched_getaffinity_failure) {
+  // Trivial test of the errno-preserving/returning behavior.
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wnonnull"
   ASSERT_EQ(-1, sched_getaffinity(getpid(), 0, nullptr));
+  ASSERT_ERRNO(EINVAL);
 #pragma clang diagnostic pop
 }
+
+TEST(pthread, sched_getaffinity) {
+  cpu_set_t set;
+  CPU_ZERO(&set);
+  ASSERT_EQ(0, sched_getaffinity(getpid(), sizeof(set), &set));
+  ASSERT_GT(CPU_COUNT(&set), 0);
+}
+
+TEST(sched, sched_setaffinity_failure) {
+  // Trivial test of the errno-preserving/returning behavior.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnonnull"
+  ASSERT_EQ(-1, sched_setaffinity(getpid(), 0, nullptr));
+  ASSERT_ERRNO(EINVAL);
+#pragma clang diagnostic pop
+}
+
+TEST(pthread, sched_setaffinity) {
+  cpu_set_t set;
+  CPU_ZERO(&set);
+  ASSERT_EQ(0, sched_getaffinity(getpid(), sizeof(set), &set));
+  // It's hard to make any more general claim than this,
+  // but it ought to be safe to ask for the same affinity you already have.
+  ASSERT_EQ(0, sched_setaffinity(getpid(), sizeof(set), &set));
+}
diff --git a/tests/stdatomic_test.cpp b/tests/stdatomic_test.cpp
index 1c51b11..8a54080 100644
--- a/tests/stdatomic_test.cpp
+++ b/tests/stdatomic_test.cpp
@@ -40,7 +40,7 @@
   atomic_int v = 123;
   ASSERT_EQ(123, atomic_load(&v));
 
-  atomic_init(&v, 456);
+  atomic_store_explicit(&v, 456, memory_order_relaxed);
   ASSERT_EQ(456, atomic_load(&v));
 
   atomic_flag f = ATOMIC_FLAG_INIT;
@@ -258,9 +258,9 @@
   // Run a memory ordering smoke test.
   void* result;
   three_atomics a;
-  atomic_init(&a.x, 0ul);
-  atomic_init(&a.y, 0ul);
-  atomic_init(&a.z, 0ul);
+  atomic_store_explicit(&a.x, 0ul, memory_order_relaxed);
+  atomic_store_explicit(&a.y, 0ul, memory_order_relaxed);
+  atomic_store_explicit(&a.z, 0ul, memory_order_relaxed);
   pthread_t t1,t2;
   ASSERT_EQ(0, pthread_create(&t1, nullptr, reader, &a));
   ASSERT_EQ(0, pthread_create(&t2, nullptr, writer, &a));
diff --git a/tests/struct_layout_test.cpp b/tests/struct_layout_test.cpp
index 1f04344..b9fd315 100644
--- a/tests/struct_layout_test.cpp
+++ b/tests/struct_layout_test.cpp
@@ -30,7 +30,7 @@
 #define CHECK_OFFSET(name, field, offset) \
     check_offset(#name, #field, offsetof(name, field), offset);
 #ifdef __LP64__
-  CHECK_SIZE(pthread_internal_t, 816);
+  CHECK_SIZE(pthread_internal_t, 824);
   CHECK_OFFSET(pthread_internal_t, next, 0);
   CHECK_OFFSET(pthread_internal_t, prev, 8);
   CHECK_OFFSET(pthread_internal_t, tid, 16);
@@ -57,6 +57,7 @@
   CHECK_OFFSET(pthread_internal_t, errno_value, 768);
   CHECK_OFFSET(pthread_internal_t, bionic_tcb, 776);
   CHECK_OFFSET(pthread_internal_t, stack_mte_ringbuffer_vma_name_buffer, 784);
+  CHECK_OFFSET(pthread_internal_t, should_allocate_stack_mte_ringbuffer, 816);
   CHECK_SIZE(bionic_tls, 12200);
   CHECK_OFFSET(bionic_tls, key_data, 0);
   CHECK_OFFSET(bionic_tls, locale, 2080);
@@ -74,7 +75,7 @@
   CHECK_OFFSET(bionic_tls, bionic_systrace_disabled, 12193);
   CHECK_OFFSET(bionic_tls, padding, 12194);
 #else
-  CHECK_SIZE(pthread_internal_t, 704);
+  CHECK_SIZE(pthread_internal_t, 708);
   CHECK_OFFSET(pthread_internal_t, next, 0);
   CHECK_OFFSET(pthread_internal_t, prev, 4);
   CHECK_OFFSET(pthread_internal_t, tid, 8);
@@ -101,6 +102,7 @@
   CHECK_OFFSET(pthread_internal_t, errno_value, 664);
   CHECK_OFFSET(pthread_internal_t, bionic_tcb, 668);
   CHECK_OFFSET(pthread_internal_t, stack_mte_ringbuffer_vma_name_buffer, 672);
+  CHECK_OFFSET(pthread_internal_t, should_allocate_stack_mte_ringbuffer, 704);
   CHECK_SIZE(bionic_tls, 11080);
   CHECK_OFFSET(bionic_tls, key_data, 0);
   CHECK_OFFSET(bionic_tls, locale, 1040);
diff --git a/tests/sys_time_test.cpp b/tests/sys_time_test.cpp
index ff9271f..b0e52aa 100644
--- a/tests/sys_time_test.cpp
+++ b/tests/sys_time_test.cpp
@@ -23,6 +23,7 @@
 
 #include <android-base/file.h>
 
+#include "private/bionic_time_conversions.h"
 #include "utils.h"
 
 // http://b/11383777
@@ -147,14 +148,6 @@
   ASSERT_EQ(0, syscall(__NR_gettimeofday, &tv2, nullptr));
 
   // What's the difference between the two?
-  tv2.tv_sec -= tv1.tv_sec;
-  tv2.tv_usec -= tv1.tv_usec;
-  if (tv2.tv_usec < 0) {
-    --tv2.tv_sec;
-    tv2.tv_usec += 1000000;
-  }
-
   // To try to avoid flakiness we'll accept answers within 10,000us (0.01s).
-  ASSERT_EQ(0, tv2.tv_sec);
-  ASSERT_LT(tv2.tv_usec, 10'000);
+  ASSERT_LT(to_us(tv2) - to_us(tv1), 10'000);
 }