Merge "ReadPadSegmentNote: Skip PT_NOTEs that are beyond the end of the file" into main
diff --git a/android-changes-for-ndk-developers.md b/android-changes-for-ndk-developers.md
index ad8462a..8d507d1 100644
--- a/android-changes-for-ndk-developers.md
+++ b/android-changes-for-ndk-developers.md
@@ -11,12 +11,9 @@
 for details about changes in stack unwinding (crash dumps) between
 different releases.
 
-Required tools: the NDK has an _arch_-linux-android-readelf binary
-(e.g. arm-linux-androideabi-readelf or i686-linux-android-readelf)
-for each architecture (under toolchains/), but you can use readelf for
-any architecture, as we will be doing basic inspection only. On Linux
-you need to have the “binutils” package installed for readelf,
-and “pax-utils” for scanelf.
+Required tools: the NDK has an `llvm-readelf` binary that understands all the
+architecture-specific details of all Android's supported architectures. Recent
+versions of Android also have toybox readelf on the device.
 
 
 ## How we manage incompatible changes
@@ -38,42 +35,44 @@
 check logcat for warnings until their app stops functioning, so the
 toasts help bring some visibility to the issues before it's too late.
 
+
 ## Changes to library dependency resolution
 
 Until it was [fixed](https://issuetracker.google.com/36950617) in
-JB-MR2, Android didn't include the application library directory
+API level 18, Android didn't include the application library directory
 on the dynamic linker's search path. This meant that apps
 had to call `dlopen` or `System.loadLibrary` on all transitive
 dependencies before loading their main library. Worse, until it was
-[fixed](https://issuetracker.google.com/36935779) in JB-MR2, the
+[fixed](https://issuetracker.google.com/36935779) in API level 18, the
 dynamic linker's caching code cached failures too, so it was necessary
 to topologically sort your libraries and load them in reverse order.
 
-If you need to support Android devices running OS
-versions older than JB-MR2, you might want to consider
+If you need to support Android devices running OS versions older than
+API level 23, you might want to consider
 [ReLinker](https://github.com/KeepSafe/ReLinker) which claims to solve
-these problems automatically.
+these and other problems automatically.
 
 Alternatively, if you don't have too many dependencies, it can be easiest to
 simply link all of your code into one big library and sidestep the details of
 library and symbol lookup changes on all past (and future) Android versions.
 
+
 ## Changes to library search order
 
 We have made various fixes to library search order when resolving symbols.
 
-With API 22, load order switched from depth-first to breadth-first to
+With API level 22, load order switched from depth-first to breadth-first to
 fix dlsym(3).
 
-Before API 23, the default search order was to try the main executable,
+Before API level 23, the default search order was to try the main executable,
 LD_PRELOAD libraries, the library itself, and its DT_NEEDED libraries
-in that order. For API 23 and later, for any given library, the dynamic
+in that order. For API level 23 and later, for any given library, the dynamic
 linker divides other libraries into the global group and the local
 group. The global group is shared by all libraries and contains the main
 executable, LD_PRELOAD libraries, and any library with the DF_1_GLOBAL
 flag set (by passing “-z global” to ld(1)). The local group is
 the breadth-first transitive closure of the library and its DT_NEEDED
-libraries. The M dynamic linker searches the global group followed by
+libraries. The API level 23 dynamic linker searches the global group followed by
 the local group. This allows ASAN, for example, to ensure that it can
 intercept any symbol.
 
@@ -89,7 +88,7 @@
 ## RTLD_LOCAL (Available in API level >= 23)
 
 The dlopen(3) RTLD_LOCAL flag used to be ignored but is implemented
-correctly in API 23 and later. Note that RTLD_LOCAL is the default,
+correctly in API level 23 and later. Note that RTLD_LOCAL is the default,
 so even calls to dlopen(3) that didn’t explicitly use RTLD_LOCAL will
 be affected (unless they explicitly used RTLD_GLOBAL). With RTLD_LOCAL,
 symbols will not be made available to libraries loaded by later calls
@@ -99,7 +98,7 @@
 ## GNU hashes (Availible in API level >= 23)
 
 The GNU hash style available with `--hash-style=gnu` allows faster
-symbol lookup and is supported by Android's dynamic linker in API 23 and
+symbol lookup and is supported by Android's dynamic linker in API level 23 and
 above. Use `--hash-style=both` if you want to build code that uses this
 feature in new enough releases but still works on older releases.
 If you're using the NDK, clang chooses the right option
@@ -157,34 +156,26 @@
 ## Private API (Enforced for API level >= 24)
 
 Native libraries must use only public API, and must not link against
-non-NDK platform libraries. Starting with API 24 this rule is enforced and
-applications are no longer able to load non-NDK platform libraries. The
-rule is enforced by the dynamic linker, so non-public libraries
+non-NDK platform libraries. On devices running API level 24 or later,
+this rule is enforced and applications are no longer able to load all
+non-NDK platform libraries. This was to prevent future issues similar
+to the disruption caused when Android switched from OpenSSL to BoringSSL
+at API level 23.
+
+The rule is enforced by the dynamic linker, so non-public libraries
 are not accessible regardless of the way code tries to load them:
-System.loadLibrary, DT_NEEDED entries, and direct calls to dlopen(3)
+System.loadLibrary(), DT_NEEDED entries, and direct calls to dlopen(3)
 will all work exactly the same.
 
-Users should have a consistent app experience across updates,
-and developers shouldn't have to make emergency app updates to
-handle platform changes. For that reason, we recommend against using
-private C/C++ symbols. Private symbols aren't tested as part of the
-Compatibility Test Suite (CTS) that all Android devices must pass. They
-may not exist, or they may behave differently. This makes apps that use
-them more likely to fail on specific devices, or on future releases ---
-as many developers found when Android 6.0 Marshmallow switched from
-OpenSSL to BoringSSL.
-
-In order to reduce the user impact of this transition, we've identified
-a set of libraries that see significant use from Google Play's
-most-installed apps, and that are feasible for us to support in the
+In order to reduce the user impact of this transition, we identified
+a set of libraries that saw significant use from Google Play's
+most-installed apps and were feasible for us to support in the
 short term (including libandroid_runtime.so, libcutils.so, libcrypto.so,
-and libssl.so). In order to give you more time to transition, we will
-temporarily support these libraries; so if you see a warning that means
-your code will not work in a future release -- please fix it now!
-
-Between O and R, this compatibility mode could be disabled by setting a
-system property (`debug.ld.greylist_disabled`). This property is ignored
-in S and later.
+and libssl.so). In order to give app developers more time to transition,
+we allowed access to these libraries for apps with a target API level < 24.
+On devices running API level 26 to API level 30, this compatibility mode could be
+disabled by setting a system property (`debug.ld.greylist_disabled`).
+This property is ignored on devices running API level 31 and later.
 
 ```
 $ readelf --dynamic libBroken.so | grep NEEDED
@@ -200,7 +191,7 @@
  0x00000001 (NEEDED)                     Shared library: [libc.so]
 ```
 
-*Potential problems*: starting from API 24 the dynamic linker will not
+*Potential problems*: starting from API level 24 the dynamic linker will not
 load private libraries, preventing the application from loading.
 
 *Resolution*: rewrite your native code to rely only on public API. As a
@@ -238,15 +229,16 @@
 *Resolution*: remove the extra steps from your build that strip section
 headers.
 
+
 ## Text Relocations (Enforced for API level >= 23)
 
-Starting with API 23, shared objects must not contain text
-relocations. That is, the code must be loaded as is and must not be
-modified. Such an approach reduces load time and improves security.
+Apps with a target API level >= 23 cannot load shared objects that contain text
+relocations. Such an approach reduces load time and improves security. This was
+only a change for 32-bit, because 64-bit never supported text relocations.
 
-The usual reason for text relocations is non-position independent
-hand-written assembler. This is not common. Use the scanelf tool as
-described in our documentation for further diagnostics:
+The usual reason for text relocations was non-position independent
+hand-written assembler. This is not common. You can use the scanelf tool
+from the pax-utils debian package for further diagnostics:
 
 ```
 $ scanelf -qT libTextRel.so
@@ -256,10 +248,10 @@
 ```
 
 If you have no scanelf tool available, it is possible to do a basic
-check with readelf instead, look for either a TEXTREL entry or the
+check with readelf instead. Look for either a TEXTREL entry or the
 TEXTREL flag. Either alone is sufficient. (The value corresponding to the
 TEXTREL entry is irrelevant and typically 0 --- simply the presence of
-the TEXTREL entry declares that the .so contains text relocations). This
+the TEXTREL entry declares that the .so contains text relocations.) This
 example has both indicators present:
 
 ```
@@ -276,9 +268,8 @@
 
 *Potential problems*: Relocations enforce code pages being writable, and
 wastefully increase the number of dirty pages in memory. The dynamic
-linker has issued warnings about text relocations since Android K
-(API 19), but on API 23 and above it refuses to load code with text
-relocations.
+linker issued warnings about text relocations from API level 19, but on API
+level 23 and above refuses to load code with text relocations.
 
 *Resolution*: rewrite assembler to be position independent to ensure
 no text relocations are necessary. The
@@ -296,9 +287,9 @@
 leaving the business of finding the library at runtime to the dynamic
 linker.
 
-Before API 23, Android's dynamic linker ignored the full path, and
+Before API level 23, Android's dynamic linker ignored the full path, and
 used only the basename (the part after the last ‘/') when looking
-up the required libraries. Since API 23 the runtime linker will honor
+up the required libraries. Since API level 23 the runtime linker will honor
 the DT_NEEDED exactly and so it won't be able to load the library if
 it is not present in that exact location on the device.
 
@@ -315,8 +306,8 @@
 [C:\Users\build\Android\ci\jni\libBroken.so]
 ```
 
-*Potential problems*: before API 23 the DT_NEEDED entry's basename was
-used, but starting from API 23 the Android runtime will try to load the
+*Potential problems*: before API level 23 the DT_NEEDED entry's basename was
+used, but starting from API level 23 the Android runtime will try to load the
 library using the path specified, and that path won't exist on the
 device. There are broken third-party toolchains/build systems that use
 a path on a build host instead of the SONAME.
@@ -350,16 +341,18 @@
 configured your build system to generate incorrect SONAME entries (using
 the `-soname` linker option).
 
+
 ## `__register_atfork` (Available in API level >= 23)
 
 To allow `atfork` and `pthread_atfork` handlers to be unregistered on
-`dlclose`, the implementation changed in API level 23. Unfortunately this
-requires a new libc function `__register_atfork`. Code using these functions
-that is built with a target API level >= 23 therefore will not load on earlier
-versions of Android, with an error referencing `__register_atfork`.
+`dlclose`, API level 23 added a new libc function `__register_atfork`.
+This means that code using `atfork` or `pthread_atfork` functions that is
+built with a `minSdkVersion` >= 23 will not load on earlier versions of
+Android, with an error referencing `__register_atfork`.
 
-*Resolution*: build your code with an NDK target API level that matches your
-app's minimum API level, or avoid using `atfork`/`pthread_atfork`.
+*Resolution*: build your code with `minSdkVersion` that matches the minimum
+API level you actually support, or avoid using `atfork`/`pthread_atfork`.
+
 
 ## DT_RUNPATH support (Available in API level >= 24)
 
@@ -389,6 +382,7 @@
 into your app. The middleware vendor is aware of the problem and has a fix
 available.
 
+
 ## Invalid ELF header/section headers (Enforced for API level >= 26)
 
 In API level 26 and above the dynamic linker checks more values in
@@ -403,9 +397,10 @@
 ELF files. Note that using them puts application under high risk of
 being incompatible with future versions of Android.
 
-## Enable logging of dlopen/dlsym and library loading errors for apps (Available in Android O)
 
-Starting with Android O it is possible to enable logging of dynamic
+## Enable logging of dlopen/dlsym and library loading errors for apps (Available for API level >= 26)
+
+On devices running API level 26 or later you can enable logging of dynamic
 linker activity for debuggable apps by setting a property corresponding
 to the fully-qualified name of the specific app:
 ```
@@ -429,12 +424,13 @@
 adb shell setprop debug.ld.all dlerror,dlopen
 ```
 
+
 ## dlclose interacts badly with thread local variables with non-trivial destructors
 
 Android allows `dlclose` to unload a library even if there are still
 thread-local variables with non-trivial destructors. This leads to
 crashes when a thread exits and attempts to call the destructor, the
-code for which has been unloaded (as in [issue 360], fixed in P).
+code for which has been unloaded (as in [issue 360], fixed in API level 28).
 
 [issue 360]: https://github.com/android-ndk/ndk/issues/360
 
@@ -442,18 +438,19 @@
 set (so that calls to `dlclose` don't actually unload the library)
 are possible workarounds.
 
-|                   | Pre-M                      | M+      | P+    |
+|                   | API level < 23             | >= 23   | >= 28 |
 | ----------------- | -------------------------- | ------- | ----- |
 | No workaround     | Works for static STL       | Broken  | Works |
 | `-Wl,-z,nodelete` | Works for static STL       | Works   | Works |
 | No `dlclose`      | Works                      | Works   | Works |
 
-## Use of IFUNC in libc (True for all API levels on devices running Q)
 
-Starting with Android Q (API level 29), libc uses
-[IFUNC](https://sourceware.org/glibc/wiki/GNU_IFUNC) functionality in
-the dynamic linker to choose optimized assembler routines at run time
-rather than at build time. This lets us use the same `libc.so` on all
+## Use of IFUNC in libc (True for all API levels on devices running Android 10)
+
+On devices running API level 29, libc uses
+[IFUNC](https://sourceware.org/glibc/wiki/GNU_IFUNC)
+functionality in the dynamic linker to choose optimized assembler routines at
+run time rather than at build time. This lets us use the same `libc.so` on all
 devices, and is similar to what other OSes already did. Because the zygote
 uses the C library, this decision is made long before we know what API
 level an app targets, so all code sees the new IFUNC-using C library.
@@ -462,6 +459,7 @@
 with IFUNC relocations. The affected functions are from `<string.h>`, but
 may expand to include more functions (and more libraries) in future.
 
+
 ## Relative relocations (RELR)
 
 Android added experimental support for RELR relative relocations
@@ -492,19 +490,22 @@
 and their long and complicated history at
 https://maskray.me/blog/2021-10-31-relative-relocations-and-relr.
 
+
 ## No more sentinels in .preinit_array/.init_array/.fini_array sections of executables (in All API levels)
 
-In Android <= U and NDK <= 26, Android used sentinels in these sections of
-executables to locate the start and end of arrays. However, when building with
-LTO, the function pointers in the arrays can be reordered, making sentinels no
-longer work. This prevents constructors for global C++ variables from being
-called in static executables when using LTO.
+In Android <= API level 34 and NDK <= r26, Android used sentinels in the
+`.preinit_array`/`.init_array`/`.fini_array` sections of executables to locate
+the start and end of these arrays. When building with LTO, the function pointers
+in the arrays can be reordered, making sentinels no longer work. This prevents
+constructors for global C++ variables from being called in static executables
+when using LTO.
 
-To fix this, in Android >= V and NDK >= 27, we removed sentinels and switched
-to using symbols inserted by LLD (like `__init_array_start`,
-`__init_array_end`) to locate the arrays. This also avoids keeping a section
-when there are no corresponding functions.
+To fix this, in Android >= API level 35 and NDK >= r27, we removed sentinels
+and switched to using symbols inserted by LLD (like `__init_array_start`,
+`__init_array_end`) to locate the arrays. This also avoids the need for an
+empty section when there are no corresponding functions.
 
-For dynamic executables, we kept sentinel support in crtbegin_dynamic.o and
-libc.so. This ensures that executables built with newer crtbegin_dynamic.o
-(in NDK >= 27) work with older libc.so (in Android <= U), and vice versa.
+For dynamic executables, we kept sentinel support in `crtbegin_dynamic.o` and
+`libc.so`. This ensures that executables built with newer `crtbegin_dynamic.o`
+(in NDK >= r27) work with older `libc.so` (in Android <= API level 34), and
+vice versa.
diff --git a/libc/Android.bp b/libc/Android.bp
index 4020ede..84fa498 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1075,6 +1075,8 @@
                 "arch-arm64/bionic/setjmp.S",
                 "arch-arm64/bionic/syscall.S",
                 "arch-arm64/bionic/vfork.S",
+                "arch-arm64/oryon/memcpy-nt.S",
+                "arch-arm64/oryon/memset-nt.S",
             ],
         },
 
diff --git a/libc/NOTICE b/libc/NOTICE
index dfd93ff..1a84d3c 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -4024,6 +4024,33 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2012, Linaro Limited
+   All rights reserved.
+   Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+         names of its contributors may be used to endorse or promote products
+         derived from this software without specific prior written permission.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 2012-2014 ARM Ltd
 All rights reserved.
 
@@ -5155,3 +5182,11 @@
 
 -------------------------------------------------------------------
 
+memcpy - copy memory area
+
+Copyright (c) 2012-2022, Arm Limited.
+Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+-------------------------------------------------------------------
+
diff --git a/libc/arch-arm64/dynamic_function_dispatch.cpp b/libc/arch-arm64/dynamic_function_dispatch.cpp
index 450138c..db002b8 100644
--- a/libc/arch-arm64/dynamic_function_dispatch.cpp
+++ b/libc/arch-arm64/dynamic_function_dispatch.cpp
@@ -30,6 +30,19 @@
 #include <stddef.h>
 #include <sys/auxv.h>
 
+#define MIDR_IMPL_ID_SHIFT 24u
+#define MIDR_IMPL_ID_MASK 0xFF
+#define CPU_VARIANT_SHIFT 20u
+#define CPU_VARIANT_MASK 0xF
+
+/* Macro to identify CPU implementer */
+#define QCOM_IMPL_ID 0x51
+
+/* Macro to indentify qualcomm CPU variants which supports
+ * __memcpy_aarch64_nt routine
+ */
+#define QCOM_ORYON_CPU_VARIANTS 0x5
+
 extern "C" {
 
 typedef void* memchr_func(const void*, int, size_t);
@@ -49,20 +62,72 @@
 
 typedef void* memcpy_func(void*, const void*, size_t);
 DEFINE_IFUNC_FOR(memcpy) {
-    if (arg->_hwcap & HWCAP_ASIMD) {
-        RETURN_FUNC(memcpy_func, __memcpy_aarch64_simd);
-    } else {
+  unsigned long midr;
+  unsigned int impl_id, cpu_variant;
+
+  /* Check if hardware capability CPUID is available */
+  if (arg->_hwcap & HWCAP_CPUID) {
+    /* Read the MIDR register */
+    asm("mrs %0, MIDR_EL1 \n\t" : "=r"(midr));
+
+    /* Extract the CPU Implementer ID */
+    impl_id = (midr >> MIDR_IMPL_ID_SHIFT) & (MIDR_IMPL_ID_MASK);
+
+    /* Check for Qualcomm implementer ID */
+    if (impl_id == QCOM_IMPL_ID) {
+      cpu_variant = (midr >> CPU_VARIANT_SHIFT) & CPU_VARIANT_MASK;
+
+      /* Check for Qualcomm Oryon CPU variants: 0x1, 0x2, 0x3, 0x4, 0x5 */
+      if (cpu_variant <= QCOM_ORYON_CPU_VARIANTS) {
+        RETURN_FUNC(memcpy_func, __memcpy_aarch64_nt);
+      } else {
         RETURN_FUNC(memcpy_func, __memcpy_aarch64);
+      }
     }
+  }
+  /* If CPU implementer is not Qualcomm, choose the custom
+   * implementation based on CPU architecture feature
+   * */
+  if (arg->_hwcap & HWCAP_ASIMD) {
+    RETURN_FUNC(memcpy_func, __memcpy_aarch64_simd);
+  } else {
+    RETURN_FUNC(memcpy_func, __memcpy_aarch64);
+  }
 }
 
 typedef void* memmove_func(void*, const void*, size_t);
 DEFINE_IFUNC_FOR(memmove) {
-    if (arg->_hwcap & HWCAP_ASIMD) {
-        RETURN_FUNC(memmove_func, __memmove_aarch64_simd);
-    } else {
-        RETURN_FUNC(memmove_func, __memmove_aarch64);
+  unsigned long midr;
+  unsigned int impl_id, cpu_variant;
+
+  /* Check if hardware capability CPUID is available */
+  if (arg->_hwcap & HWCAP_CPUID) {
+    /* Read the MIDR register */
+    asm("mrs %0, MIDR_EL1 \n\t" : "=r"(midr));
+
+    /* Extract the CPU Implementer ID */
+    impl_id = (midr >> MIDR_IMPL_ID_SHIFT) & (MIDR_IMPL_ID_MASK);
+
+    /* Check for Qualcomm implementer ID */
+    if (impl_id == QCOM_IMPL_ID) {
+      cpu_variant = (midr >> CPU_VARIANT_SHIFT) & CPU_VARIANT_MASK;
+
+      /* Check for Qualcomm Oryon CPU variants: 0x1, 0x2, 0x3, 0x4, 0x5 */
+      if (cpu_variant <= QCOM_ORYON_CPU_VARIANTS) {
+        RETURN_FUNC(memcpy_func, __memmove_aarch64_nt);
+      } else {
+        RETURN_FUNC(memcpy_func, __memmove_aarch64);
+      }
     }
+  }
+  /* If CPU implementer is not Qualcomm, choose the custom
+   * implementation based on CPU architecture feature
+   * */
+  if (arg->_hwcap & HWCAP_ASIMD) {
+    RETURN_FUNC(memmove_func, __memmove_aarch64_simd);
+  } else {
+    RETURN_FUNC(memmove_func, __memmove_aarch64);
+  }
 }
 
 typedef int memrchr_func(const void*, int, size_t);
@@ -72,7 +137,32 @@
 
 typedef int memset_func(void*, int, size_t);
 DEFINE_IFUNC_FOR(memset) {
+  unsigned long midr;
+  unsigned int impl_id, cpu_variant;
+
+  if (arg->_hwcap & HWCAP_CPUID) {
+    /* Read the MIDR register */
+    asm("mrs %0, MIDR_EL1 \n\t" : "=r"(midr));
+
+    /* Extract the CPU Implementer ID */
+    impl_id = (midr >> MIDR_IMPL_ID_SHIFT) & (MIDR_IMPL_ID_MASK);
+
+    /* Check for Qualcomm implementer ID */
+    if (impl_id == QCOM_IMPL_ID) {
+      cpu_variant = (midr >> CPU_VARIANT_SHIFT) & CPU_VARIANT_MASK;
+
+      /* Check for Qualcomm Oryon CPU variants: 0x1, 0x2, 0x3, 0x4, 0x5 */
+      if (cpu_variant <= QCOM_ORYON_CPU_VARIANTS) {
+        RETURN_FUNC(memset_func, __memset_aarch64_nt);
+      } else {
+        RETURN_FUNC(memset_func, __memset_aarch64);
+      }
+    } else {
+      RETURN_FUNC(memset_func, __memset_aarch64);
+    }
+  } else {
     RETURN_FUNC(memset_func, __memset_aarch64);
+  }
 }
 
 typedef char* stpcpy_func(char*, const char*, size_t);
diff --git a/libc/arch-arm64/oryon/memcpy-nt.S b/libc/arch-arm64/oryon/memcpy-nt.S
new file mode 100644
index 0000000..46f1541
--- /dev/null
+++ b/libc/arch-arm64/oryon/memcpy-nt.S
@@ -0,0 +1,351 @@
+/*
+ * memcpy - copy memory area
+ *
+ * Copyright (c) 2012-2022, Arm Limited.
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ *
+ */
+
+#include <private/bionic_asm.h>
+
+#define dstin     x0
+#define src       x1
+#define count     x2
+#define dst       x3
+#define srcend    x4
+#define dstend    x5
+#define A_l       x6
+#define A_lw      w6
+#define A_h       x7
+#define B_l       x8
+#define B_lw      w8
+#define B_h       x9
+#define C_l       x10
+#define C_lw      w10
+#define C_h       x11
+#define D_l       x12
+#define D_h       x13
+#define E_l       x14
+#define E_h       x15
+#define F_l       x16
+#define F_h       x17
+#define G_l       count
+#define G_h       dst
+#define H_l       src
+#define H_h       srcend
+#define tmp1      x14
+#define tmp2      x16
+#define SMALL_BUFFER_SIZE    48
+
+/* This implementation handles overlaps and supports both memcpy and memmove
+   from a single entry point.  It uses unaligned accesses and branchless
+   sequences to keep the code small, simple and improve performance.
+
+   Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+   copies of up to 128 bytes, and large copies.  The overhead of the overlap
+   check is negligible since it is only required for large copies.
+
+   Large copies use a software pipelined loop processing 64 bytes per iteration.
+   The destination pointer is 16-byte aligned to minimize unaligned accesses.
+   The loop tail is handled by always copying 64 bytes from the end.
+*/
+
+ALIAS_SYMBOL (__memmove_aarch64_nt, __memcpy_aarch64_nt)
+ENTRY (__memcpy_aarch64_nt)
+
+    add    srcend, src, count
+    add    dstend, dstin, count
+    cmp    count, 128
+    b.hi    L(copy_long)
+    cmp    count, 32
+    b.hi    L(copy32_128)
+
+    /* Small copies: 0..32 bytes.  */
+    cmp    count, 16
+    b.lo    L(copy16)
+    ldp    A_l, A_h, [src]
+    ldp    D_l, D_h, [srcend, -16]
+    stp    A_l, A_h, [dstin]
+    stp    D_l, D_h, [dstend, -16]
+    ret
+
+    /* Copy 8-15 bytes.  */
+L(copy16):
+    tbz    count, 3, L(copy8)
+    ldr    A_l, [src]
+    ldr    A_h, [srcend, -8]
+    str    A_l, [dstin]
+    str    A_h, [dstend, -8]
+    ret
+
+    .p2align 3
+    /* Copy 4-7 bytes.  */
+L(copy8):
+    tbz    count, 2, L(copy4)
+    ldr    A_lw, [src]
+    ldr    B_lw, [srcend, -4]
+    str    A_lw, [dstin]
+    str    B_lw, [dstend, -4]
+    ret
+
+    /* Copy 0..3 bytes using a branchless sequence.  */
+L(copy4):
+    cbz    count, L(copy0)
+    lsr    tmp1, count, 1
+    ldrb    A_lw, [src]
+    ldrb    C_lw, [srcend, -1]
+    ldrb    B_lw, [src, tmp1]
+    strb    A_lw, [dstin]
+    strb    B_lw, [dstin, tmp1]
+    strb    C_lw, [dstend, -1]
+L(copy0):
+    ret
+
+    .p2align 4
+    /* Medium copies: 33..128 bytes.  */
+L(copy32_128):
+    ldp    A_l, A_h, [src]
+    ldp    B_l, B_h, [src, 16]
+    ldp    C_l, C_h, [srcend, -32]
+    ldp    D_l, D_h, [srcend, -16]
+    cmp    count, 64
+    b.hi    L(copy128)
+    stp    A_l, A_h, [dstin]
+    stp    B_l, B_h, [dstin, 16]
+    stp    C_l, C_h, [dstend, -32]
+    stp    D_l, D_h, [dstend, -16]
+    ret
+
+    .p2align 4
+    /* Copy 65..128 bytes.  */
+L(copy128):
+    ldp    E_l, E_h, [src, 32]
+    ldp    F_l, F_h, [src, 48]
+    cmp    count, 96
+    b.ls    L(copy96)
+    ldp    G_l, G_h, [srcend, -64]
+    ldp    H_l, H_h, [srcend, -48]
+    stp    G_l, G_h, [dstend, -64]
+    stp    H_l, H_h, [dstend, -48]
+L(copy96):
+    stp    A_l, A_h, [dstin]
+    stp    B_l, B_h, [dstin, 16]
+    stp    E_l, E_h, [dstin, 32]
+    stp    F_l, F_h, [dstin, 48]
+    stp    C_l, C_h, [dstend, -32]
+    stp    D_l, D_h, [dstend, -16]
+    ret
+
+    .p2align 4
+    /* Copy more than 128 bytes.  */
+L(copy_long):
+    mov tmp2, #SMALL_BUFFER_SIZE
+    cmp count, tmp2, LSL#10
+    bgt L(copy_long_nt)
+    /* Use backwards copy if there is an overlap.  */
+    sub    tmp1, dstin, src
+    cbz    tmp1, L(copy0)
+    cmp    tmp1, count
+    b.lo    L(copy_long_backwards)
+
+    /* Copy 16 bytes and then align dst to 16-byte alignment.  */
+
+    ldp    D_l, D_h, [src]
+    and    tmp1, dstin, 15
+    bic    dst, dstin, 15
+    sub    src, src, tmp1
+    add    count, count, tmp1    /* Count is now 16 too large.  */
+    ldp    A_l, A_h, [src, 16]
+    stp    D_l, D_h, [dstin]
+    ldp    B_l, B_h, [src, 32]
+    ldp    C_l, C_h, [src, 48]
+    ldp    D_l, D_h, [src, 64]!
+    subs    count, count, 128 + 16    /* Test and readjust count.  */
+    b.ls    L(copy64_from_end)
+
+L(loop64):
+    stp    A_l, A_h, [dst, 16]
+    ldp    A_l, A_h, [src, 16]
+    stp    B_l, B_h, [dst, 32]
+    ldp    B_l, B_h, [src, 32]
+    stp    C_l, C_h, [dst, 48]
+    ldp    C_l, C_h, [src, 48]
+    stp    D_l, D_h, [dst, 64]!
+    ldp    D_l, D_h, [src, 64]!
+    subs    count, count, 64
+    b.hi    L(loop64)
+
+    /* Write the last iteration and copy 64 bytes from the end.  */
+L(copy64_from_end):
+    ldp    E_l, E_h, [srcend, -64]
+    stp    A_l, A_h, [dst, 16]
+    ldp    A_l, A_h, [srcend, -48]
+    stp    B_l, B_h, [dst, 32]
+    ldp    B_l, B_h, [srcend, -32]
+    stp    C_l, C_h, [dst, 48]
+    ldp    C_l, C_h, [srcend, -16]
+    stp    D_l, D_h, [dst, 64]
+    stp    E_l, E_h, [dstend, -64]
+    stp    A_l, A_h, [dstend, -48]
+    stp    B_l, B_h, [dstend, -32]
+    stp    C_l, C_h, [dstend, -16]
+    ret
+
+    .p2align 4
+
+    /* Large backwards copy for overlapping copies.
+       Copy 16 bytes and then align dst to 16-byte alignment.  */
+L(copy_long_backwards):
+    ldp    D_l, D_h, [srcend, -16]
+    and    tmp1, dstend, 15
+    sub    srcend, srcend, tmp1
+    sub    count, count, tmp1
+    ldp    A_l, A_h, [srcend, -16]
+    stp    D_l, D_h, [dstend, -16]
+    ldp    B_l, B_h, [srcend, -32]
+    ldp    C_l, C_h, [srcend, -48]
+    ldp    D_l, D_h, [srcend, -64]!
+    sub    dstend, dstend, tmp1
+    subs    count, count, 128
+    b.ls    L(copy64_from_start)
+
+L(loop64_backwards):
+    stp    A_l, A_h, [dstend, -16]
+    ldp    A_l, A_h, [srcend, -16]
+    stp    B_l, B_h, [dstend, -32]
+    ldp    B_l, B_h, [srcend, -32]
+    stp    C_l, C_h, [dstend, -48]
+    ldp    C_l, C_h, [srcend, -48]
+    stp    D_l, D_h, [dstend, -64]!
+    ldp    D_l, D_h, [srcend, -64]!
+    subs    count, count, 64
+    b.hi    L(loop64_backwards)
+
+    /* Write the last iteration and copy 64 bytes from the start.  */
+L(copy64_from_start):
+    ldp    G_l, G_h, [src, 48]
+    stp    A_l, A_h, [dstend, -16]
+    ldp    A_l, A_h, [src, 32]
+    stp    B_l, B_h, [dstend, -32]
+    ldp    B_l, B_h, [src, 16]
+    stp    C_l, C_h, [dstend, -48]
+    ldp    C_l, C_h, [src]
+    stp    D_l, D_h, [dstend, -64]
+    stp    G_l, G_h, [dstin, 48]
+    stp    A_l, A_h, [dstin, 32]
+    stp    B_l, B_h, [dstin, 16]
+    stp    C_l, C_h, [dstin]
+    ret
+
+    .p2align 4
+    /* Copy more than 48 KB using ldnp+stnp (non-temporal) instructions.  */
+L(copy_long_nt):
+    /* Use backwards copy if there is an overlap.  */
+    sub    tmp1, dstin, src
+    cbz    tmp1, L(copy0)
+    cmp    tmp1, count
+    b.lo    L(copy_long_backwards_nt)
+
+    /* Copy 16 bytes and then align dst to 16-byte alignment.  */
+
+    ldnp    D_l, D_h, [src]
+    and    tmp1, dstin, 15
+    bic    dst, dstin, 15
+    sub    src, src, tmp1
+    add    count, count, tmp1    /* Count is now 16 too large.  */
+    ldnp    A_l, A_h, [src, 16]
+    stnp    D_l, D_h, [dstin]
+    ldnp    B_l, B_h, [src, 32]
+    ldnp    C_l, C_h, [src, 48]
+    ldnp    D_l, D_h, [src, 64]
+    add     src, src, #64
+    subs    count, count, 128 + 16    /* Test and readjust count.  */
+    b.ls    L(copy64_from_end_nt)
+
+L(loop64_nt):
+    stnp    A_l, A_h, [dst, 16]
+    ldnp    A_l, A_h, [src, 16]
+    stnp    B_l, B_h, [dst, 32]
+    ldnp    B_l, B_h, [src, 32]
+    stnp    C_l, C_h, [dst, 48]
+    ldnp    C_l, C_h, [src, 48]
+    stnp    D_l, D_h, [dst, 64]
+    add dst, dst, #64
+    ldnp    D_l, D_h, [src, 64]
+    add src, src, #64
+    subs    count, count, 64
+    b.hi    L(loop64_nt)
+
+    /* Write the last iteration and copy 64 bytes from the end.  */
+L(copy64_from_end_nt):
+    ldnp    E_l, E_h, [srcend, -64]
+    stnp    A_l, A_h, [dst, 16]
+    ldnp    A_l, A_h, [srcend, -48]
+    stnp    B_l, B_h, [dst, 32]
+    ldnp    B_l, B_h, [srcend, -32]
+    stnp    C_l, C_h, [dst, 48]
+    ldnp    C_l, C_h, [srcend, -16]
+    stnp    D_l, D_h, [dst, 64]
+    stnp    E_l, E_h, [dstend, -64]
+    stnp    A_l, A_h, [dstend, -48]
+    stnp    B_l, B_h, [dstend, -32]
+    stnp    C_l, C_h, [dstend, -16]
+    ret
+
+    .p2align 4
+
+    /* Large backwards copy for overlapping copies.
+       Copy 16 bytes and then align dst to 16-byte alignment.  */
+L(copy_long_backwards_nt):
+    ldnp    D_l, D_h, [srcend, -16]
+    and    tmp1, dstend, 15
+    sub    srcend, srcend, tmp1
+    sub    count, count, tmp1
+    ldnp    A_l, A_h, [srcend, -16]
+    stnp    D_l, D_h, [dstend, -16]
+    ldnp    B_l, B_h, [srcend, -32]
+    ldnp    C_l, C_h, [srcend, -48]
+    ldnp    D_l, D_h, [srcend, -64]
+    add     srcend, srcend, #-64
+    sub    dstend, dstend, tmp1
+    subs    count, count, 128
+    b.ls    L(copy64_from_start_nt)
+
+L(loop64_backwards_nt):
+    stnp    A_l, A_h, [dstend, -16]
+    ldnp    A_l, A_h, [srcend, -16]
+    stnp    B_l, B_h, [dstend, -32]
+    ldnp    B_l, B_h, [srcend, -32]
+    stnp    C_l, C_h, [dstend, -48]
+    ldnp    C_l, C_h, [srcend, -48]
+    stnp    D_l, D_h, [dstend, -64]
+    add     dstend, dstend, #-64
+    ldnp    D_l, D_h, [srcend, -64]
+    add     srcend, srcend, #-64
+    subs    count, count, 64
+    b.hi    L(loop64_backwards_nt)
+
+    /* Write the last iteration and copy 64 bytes from the start.  */
+L(copy64_from_start_nt):
+    ldnp    G_l, G_h, [src, 48]
+    stnp    A_l, A_h, [dstend, -16]
+    ldnp    A_l, A_h, [src, 32]
+    stnp    B_l, B_h, [dstend, -32]
+    ldnp    B_l, B_h, [src, 16]
+    stnp    C_l, C_h, [dstend, -48]
+    ldnp    C_l, C_h, [src]
+    stnp    D_l, D_h, [dstend, -64]
+    stnp    G_l, G_h, [dstin, 48]
+    stnp    A_l, A_h, [dstin, 32]
+    stnp    B_l, B_h, [dstin, 16]
+    stnp    C_l, C_h, [dstin]
+    ret
+
+END (__memcpy_aarch64_nt)
+
diff --git a/libc/arch-arm64/oryon/memset-nt.S b/libc/arch-arm64/oryon/memset-nt.S
new file mode 100644
index 0000000..b91e7da
--- /dev/null
+++ b/libc/arch-arm64/oryon/memset-nt.S
@@ -0,0 +1,218 @@
+/* Copyright (c) 2012, Linaro Limited
+   All rights reserved.
+   Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+         names of its contributors may be used to endorse or promote products
+         derived from this software without specific prior written permission.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ *
+ */
+#include <private/bionic_asm.h>
+
+#define dstin		x0
+#define val		    w1
+#define count		x2
+#define tmp1		x3
+#define tmp1w		w3
+#define tmp2		x4
+#define tmp2w		w4
+#define zva_len_x	x5
+#define zva_len		w5
+#define zva_bits_x	x6
+#define A_l		    x7
+#define A_lw		w7
+#define dst		    x8
+#define tmp3w		w9
+#define tmp4        x10
+#define SMALL_BUFFER_SIZE    96
+
+ENTRY(__memset_aarch64_nt)
+    mov	dst, dstin		/* Preserve return value.  */
+    ands	A_lw, val, #255
+    b.eq	.Lzero_mem  /* Use DC ZVA instruction if the val = 0 */
+    orr	A_lw, A_lw, A_lw, lsl #8
+    orr	A_lw, A_lw, A_lw, lsl #16
+    orr	A_l, A_l, A_l, lsl #32
+.Ltail_maybe_long:
+    cmp	count, #64
+    b.ge	.Lnot_short
+.Ltail_maybe_tiny:
+    cmp	count, #15
+    b.le	.Ltail15tiny
+.Ltail63:
+    ands	tmp1, count, #0x30
+    b.eq	.Ltail15
+    add	dst, dst, tmp1
+    cmp	tmp1w, #0x20
+    b.eq	1f
+    b.lt	2f
+    stp	A_l, A_l, [dst, #-48]
+1:
+    stp	A_l, A_l, [dst, #-32]
+2:
+    stp	A_l, A_l, [dst, #-16]
+.Ltail15:
+    and	count, count, #15
+    add	dst, dst, count
+    stp	A_l, A_l, [dst, #-16]	/* Repeat some/all of last store. */
+    ret
+.Ltail15tiny:
+    /* Set up to 15 bytes.  Does not assume earlier memory
+       being set.  */
+    tbz	count, #3, 1f
+    str	A_l, [dst], #8
+1:
+    tbz	count, #2, 1f
+    str	A_lw, [dst], #4
+1:
+    tbz	count, #1, 1f
+    strh	A_lw, [dst], #2
+1:
+    tbz	count, #0, 1f
+    strb	A_lw, [dst]
+1:
+    ret
+    /* Critical loop.  Start at a new cache line boundary.  Assuming
+     * 64 bytes per line, this ensures the entire loop is in one line.  */
+    .p2align 6
+.Lnot_short:
+    mov tmp4, #SMALL_BUFFER_SIZE
+    cmp count, tmp4, LSL#10
+    /* Use non-temporal instruction if count > SMALL_BUFFER_SIZE */
+    bgt L(not_short_nt)
+    neg	tmp2, dst
+    ands	tmp2, tmp2, #15
+    b.eq	2f
+    /* Bring DST to 128-bit (16-byte) alignment.  We know that there's
+     * more than that to set, so we simply store 16 bytes and advance by
+     * the amount required to reach alignment.  */
+    sub	count, count, tmp2
+    stp	A_l, A_l, [dst]
+    add	dst, dst, tmp2
+    /* There may be less than 63 bytes to go now.  */
+    cmp	count, #63
+    b.le	.Ltail63
+2:
+    sub	dst, dst, #16		/* Pre-bias.  */
+    sub	count, count, #64
+1:
+    stp	A_l, A_l, [dst, #16]
+    stp	A_l, A_l, [dst, #32]
+    stp	A_l, A_l, [dst, #48]
+    stp	A_l, A_l, [dst, #64]!
+    subs	count, count, #64
+    b.ge	1b
+    tst	count, #0x3f
+    add	dst, dst, #16
+    b.ne	.Ltail63
+    ret
+.Lnot_short_nt:
+    neg	tmp2, dst
+    ands	tmp2, tmp2, #15
+    b.eq	2f
+    /* Bring DST to 128-bit (16-byte) alignment.  We know that there's
+     * more than that to set, so we simply store 16 bytes and advance by
+     * the amount required to reach alignment.  */
+    sub	count, count, tmp2
+    stnp	A_l, A_l, [dst]
+    add	dst, dst, tmp2
+    /* There may be less than 63 bytes to go now.  */
+    cmp	count, #63
+    b.le	.Ltail63
+2:
+    sub	dst, dst, #16		/* Pre-bias.  */
+    sub	count, count, #64
+1:
+    stnp	A_l, A_l, [dst, #16]
+    stnp	A_l, A_l, [dst, #32]
+    stnp	A_l, A_l, [dst, #48]
+    stnp	A_l, A_l, [dst, #64]
+    add     dst, dst, #64
+    subs	count, count, #64
+    b.ge	1b
+    tst	count, #0x3f
+    add	dst, dst, #16
+    b.ne	.Ltail63
+    ret
+.Lzero_mem:
+    mov	A_l, #0
+    cmp	count, #63
+    b.le	.Ltail_maybe_tiny
+    neg	tmp2, dst
+    ands	tmp2, tmp2, #15
+    b.eq	1f
+    sub	count, count, tmp2
+    stp	A_l, A_l, [dst]
+    add	dst, dst, tmp2
+    cmp	count, #63
+    b.le	.Ltail63
+1:
+    /* For zeroing small amounts of memory, it's not worth setting up
+     * the line-clear code.  */
+    cmp	count, #128
+    b.lt	.Lnot_short
+    mrs	tmp1, dczid_el0
+    tbnz	tmp1, #4, .Lnot_short
+    mov	tmp3w, #4
+    and	zva_len, tmp1w, #15	/* Safety: other bits reserved.  */
+    lsl	zva_len, tmp3w, zva_len
+.Lzero_by_line:
+    /* Compute how far we need to go to become suitably aligned.  We're
+     * already at quad-word alignment.  */
+    cmp	count, zva_len_x
+    b.lt	.Lnot_short		/* Not enough to reach alignment.  */
+    sub	zva_bits_x, zva_len_x, #1
+    neg	tmp2, dst
+    ands	tmp2, tmp2, zva_bits_x
+    b.eq	1f			/* Already aligned.  */
+    /* Not aligned, check that there's enough to copy after alignment.  */
+    sub	tmp1, count, tmp2
+    cmp	tmp1, #64
+    ccmp	tmp1, zva_len_x, #8, ge	/* NZCV=0b1000 */
+    b.lt	.Lnot_short
+    /* We know that there's at least 64 bytes to zero and that it's safe
+     * to overrun by 64 bytes.  */
+    mov	count, tmp1
+2:
+    stp	A_l, A_l, [dst]
+    stp	A_l, A_l, [dst, #16]
+    stp	A_l, A_l, [dst, #32]
+    subs	tmp2, tmp2, #64
+    stp	A_l, A_l, [dst, #48]
+    add	dst, dst, #64
+    b.ge	2b
+    /* We've overrun a bit, so adjust dst downwards.  */
+    add	dst, dst, tmp2
+1:
+    sub	count, count, zva_len_x
+3:
+    dc	zva, dst
+    add	dst, dst, zva_len_x
+    subs	count, count, zva_len_x
+    b.ge	3b
+    ands	count, count, zva_bits_x
+    b.ne	.Ltail_maybe_long
+    ret
+END(__memset_aarch64_nt)
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index c0a68af..b1fa979 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -636,8 +636,8 @@
 }
 
 static void call_ifunc_resolvers() {
-  // Find the IRELATIVE relocations using the DT_JMPREL and DT_PLTRELSZ, or DT_RELA? and DT_RELA?SZ
-  // dynamic tags.
+  // Find the IRELATIVE relocations using the DT_JMPREL and DT_PLTRELSZ, or
+  // DT_RELA/DT_RELASZ (DT_REL/DT_RELSZ on ILP32).
   auto ehdr = reinterpret_cast<ElfW(Addr)>(&__ehdr_start);
   auto* phdr = reinterpret_cast<ElfW(Phdr)*>(ehdr + __ehdr_start.e_phoff);
   for (size_t i = 0; i != __ehdr_start.e_phnum; ++i) {
@@ -647,7 +647,7 @@
     auto *dyn = reinterpret_cast<ElfW(Dyn)*>(ehdr + phdr[i].p_vaddr);
     ElfW(Addr) pltrel = 0, pltrelsz = 0, rel = 0, relsz = 0;
     for (size_t j = 0, size = phdr[i].p_filesz / sizeof(ElfW(Dyn)); j != size; ++j) {
-      // We can't handle IRELATIVE relocations in DT_ANDROID_REL[A].
+      // We don't currently handle IRELATIVE relocations in DT_ANDROID_REL[A].
       // We disabled DT_ANDROID_REL[A] at build time; verify that it was actually disabled.
       CHECK(dyn[j].d_tag != DT_ANDROID_REL && dyn[j].d_tag != DT_ANDROID_RELA);
       if (dyn[j].d_tag == DT_JMPREL) {