diff --git a/libc/Android.bp b/libc/Android.bp
index 5a75c6b..84fa498 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1076,6 +1076,7 @@
                 "arch-arm64/bionic/syscall.S",
                 "arch-arm64/bionic/vfork.S",
                 "arch-arm64/oryon/memcpy-nt.S",
+                "arch-arm64/oryon/memset-nt.S",
             ],
         },
 
diff --git a/libc/NOTICE b/libc/NOTICE
index 0279d2a..1a84d3c 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -4024,6 +4024,33 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2012, Linaro Limited
+   All rights reserved.
+   Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+         names of its contributors may be used to endorse or promote products
+         derived from this software without specific prior written permission.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 2012-2014 ARM Ltd
 All rights reserved.
 
diff --git a/libc/arch-arm64/dynamic_function_dispatch.cpp b/libc/arch-arm64/dynamic_function_dispatch.cpp
index ca7f39f..db002b8 100644
--- a/libc/arch-arm64/dynamic_function_dispatch.cpp
+++ b/libc/arch-arm64/dynamic_function_dispatch.cpp
@@ -137,7 +137,32 @@
 
 typedef int memset_func(void*, int, size_t);
 DEFINE_IFUNC_FOR(memset) {
+  unsigned long midr;
+  unsigned int impl_id, cpu_variant;
+
+  if (arg->_hwcap & HWCAP_CPUID) {
+    /* Read the MIDR register */
+    asm("mrs %0, MIDR_EL1 \n\t" : "=r"(midr));
+
+    /* Extract the CPU Implementer ID */
+    impl_id = (midr >> MIDR_IMPL_ID_SHIFT) & (MIDR_IMPL_ID_MASK);
+
+    /* Check for Qualcomm implementer ID */
+    if (impl_id == QCOM_IMPL_ID) {
+      cpu_variant = (midr >> CPU_VARIANT_SHIFT) & CPU_VARIANT_MASK;
+
+      /* Check for Qualcomm Oryon CPU variants: 0x1, 0x2, 0x3, 0x4, 0x5 */
+      if (cpu_variant <= QCOM_ORYON_CPU_VARIANTS) {
+        RETURN_FUNC(memset_func, __memset_aarch64_nt);
+      } else {
+        RETURN_FUNC(memset_func, __memset_aarch64);
+      }
+    } else {
+      RETURN_FUNC(memset_func, __memset_aarch64);
+    }
+  } else {
     RETURN_FUNC(memset_func, __memset_aarch64);
+  }
 }
 
 typedef char* stpcpy_func(char*, const char*, size_t);
diff --git a/libc/arch-arm64/oryon/memset-nt.S b/libc/arch-arm64/oryon/memset-nt.S
new file mode 100644
index 0000000..b91e7da
--- /dev/null
+++ b/libc/arch-arm64/oryon/memset-nt.S
@@ -0,0 +1,218 @@
+/* Copyright (c) 2012, Linaro Limited
+   All rights reserved.
+   Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+         names of its contributors may be used to endorse or promote products
+         derived from this software without specific prior written permission.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ *
+ */
+#include <private/bionic_asm.h>
+
+#define dstin		x0
+#define val		    w1
+#define count		x2
+#define tmp1		x3
+#define tmp1w		w3
+#define tmp2		x4
+#define tmp2w		w4
+#define zva_len_x	x5
+#define zva_len		w5
+#define zva_bits_x	x6
+#define A_l		    x7
+#define A_lw		w7
+#define dst		    x8
+#define tmp3w		w9
+#define tmp4        x10
+#define SMALL_BUFFER_SIZE    96
+
+ENTRY(__memset_aarch64_nt)
+    mov	dst, dstin		/* Preserve return value.  */
+    ands	A_lw, val, #255
+    b.eq	.Lzero_mem  /* Use DC ZVA instruction if the val = 0 */
+    orr	A_lw, A_lw, A_lw, lsl #8
+    orr	A_lw, A_lw, A_lw, lsl #16
+    orr	A_l, A_l, A_l, lsl #32
+.Ltail_maybe_long:
+    cmp	count, #64
+    b.ge	.Lnot_short
+.Ltail_maybe_tiny:
+    cmp	count, #15
+    b.le	.Ltail15tiny
+.Ltail63:
+    ands	tmp1, count, #0x30
+    b.eq	.Ltail15
+    add	dst, dst, tmp1
+    cmp	tmp1w, #0x20
+    b.eq	1f
+    b.lt	2f
+    stp	A_l, A_l, [dst, #-48]
+1:
+    stp	A_l, A_l, [dst, #-32]
+2:
+    stp	A_l, A_l, [dst, #-16]
+.Ltail15:
+    and	count, count, #15
+    add	dst, dst, count
+    stp	A_l, A_l, [dst, #-16]	/* Repeat some/all of last store. */
+    ret
+.Ltail15tiny:
+    /* Set up to 15 bytes.  Does not assume earlier memory
+       being set.  */
+    tbz	count, #3, 1f
+    str	A_l, [dst], #8
+1:
+    tbz	count, #2, 1f
+    str	A_lw, [dst], #4
+1:
+    tbz	count, #1, 1f
+    strh	A_lw, [dst], #2
+1:
+    tbz	count, #0, 1f
+    strb	A_lw, [dst]
+1:
+    ret
+    /* Critical loop.  Start at a new cache line boundary.  Assuming
+     * 64 bytes per line, this ensures the entire loop is in one line.  */
+    .p2align 6
+.Lnot_short:
+    mov tmp4, #SMALL_BUFFER_SIZE
+    cmp count, tmp4, LSL#10
+    /* Use non-temporal instruction if count > SMALL_BUFFER_SIZE */
+    bgt L(not_short_nt)
+    neg	tmp2, dst
+    ands	tmp2, tmp2, #15
+    b.eq	2f
+    /* Bring DST to 128-bit (16-byte) alignment.  We know that there's
+     * more than that to set, so we simply store 16 bytes and advance by
+     * the amount required to reach alignment.  */
+    sub	count, count, tmp2
+    stp	A_l, A_l, [dst]
+    add	dst, dst, tmp2
+    /* There may be less than 63 bytes to go now.  */
+    cmp	count, #63
+    b.le	.Ltail63
+2:
+    sub	dst, dst, #16		/* Pre-bias.  */
+    sub	count, count, #64
+1:
+    stp	A_l, A_l, [dst, #16]
+    stp	A_l, A_l, [dst, #32]
+    stp	A_l, A_l, [dst, #48]
+    stp	A_l, A_l, [dst, #64]!
+    subs	count, count, #64
+    b.ge	1b
+    tst	count, #0x3f
+    add	dst, dst, #16
+    b.ne	.Ltail63
+    ret
+.Lnot_short_nt:
+    neg	tmp2, dst
+    ands	tmp2, tmp2, #15
+    b.eq	2f
+    /* Bring DST to 128-bit (16-byte) alignment.  We know that there's
+     * more than that to set, so we simply store 16 bytes and advance by
+     * the amount required to reach alignment.  */
+    sub	count, count, tmp2
+    stnp	A_l, A_l, [dst]
+    add	dst, dst, tmp2
+    /* There may be less than 63 bytes to go now.  */
+    cmp	count, #63
+    b.le	.Ltail63
+2:
+    sub	dst, dst, #16		/* Pre-bias.  */
+    sub	count, count, #64
+1:
+    stnp	A_l, A_l, [dst, #16]
+    stnp	A_l, A_l, [dst, #32]
+    stnp	A_l, A_l, [dst, #48]
+    stnp	A_l, A_l, [dst, #64]
+    add     dst, dst, #64
+    subs	count, count, #64
+    b.ge	1b
+    tst	count, #0x3f
+    add	dst, dst, #16
+    b.ne	.Ltail63
+    ret
+.Lzero_mem:
+    mov	A_l, #0
+    cmp	count, #63
+    b.le	.Ltail_maybe_tiny
+    neg	tmp2, dst
+    ands	tmp2, tmp2, #15
+    b.eq	1f
+    sub	count, count, tmp2
+    stp	A_l, A_l, [dst]
+    add	dst, dst, tmp2
+    cmp	count, #63
+    b.le	.Ltail63
+1:
+    /* For zeroing small amounts of memory, it's not worth setting up
+     * the line-clear code.  */
+    cmp	count, #128
+    b.lt	.Lnot_short
+    mrs	tmp1, dczid_el0
+    tbnz	tmp1, #4, .Lnot_short
+    mov	tmp3w, #4
+    and	zva_len, tmp1w, #15	/* Safety: other bits reserved.  */
+    lsl	zva_len, tmp3w, zva_len
+.Lzero_by_line:
+    /* Compute how far we need to go to become suitably aligned.  We're
+     * already at quad-word alignment.  */
+    cmp	count, zva_len_x
+    b.lt	.Lnot_short		/* Not enough to reach alignment.  */
+    sub	zva_bits_x, zva_len_x, #1
+    neg	tmp2, dst
+    ands	tmp2, tmp2, zva_bits_x
+    b.eq	1f			/* Already aligned.  */
+    /* Not aligned, check that there's enough to copy after alignment.  */
+    sub	tmp1, count, tmp2
+    cmp	tmp1, #64
+    ccmp	tmp1, zva_len_x, #8, ge	/* NZCV=0b1000 */
+    b.lt	.Lnot_short
+    /* We know that there's at least 64 bytes to zero and that it's safe
+     * to overrun by 64 bytes.  */
+    mov	count, tmp1
+2:
+    stp	A_l, A_l, [dst]
+    stp	A_l, A_l, [dst, #16]
+    stp	A_l, A_l, [dst, #32]
+    subs	tmp2, tmp2, #64
+    stp	A_l, A_l, [dst, #48]
+    add	dst, dst, #64
+    b.ge	2b
+    /* We've overrun a bit, so adjust dst downwards.  */
+    add	dst, dst, tmp2
+1:
+    sub	count, count, zva_len_x
+3:
+    dc	zva, dst
+    add	dst, dst, zva_len_x
+    subs	count, count, zva_len_x
+    b.ge	3b
+    ands	count, count, zva_bits_x
+    b.ne	.Ltail_maybe_long
+    ret
+END(__memset_aarch64_nt)
