Custom memcpy implementation for Qualcomm Oryon CPU

Submitted on behalf of a third-party: Arm Limited

License rights, if any, to the submission are granted solely by the
copyright owner of such submission under its applicable intellectual
property.

Copyright (c) 2012-2022, Arm Limited.
SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception

Origin Project URL: https://github.com/ARM-software/optimized-routines
Tag: v24.01

Third Party code includes additions/modifications from Qualcomm Innovation Center, Inc.

Test: All
Change-Id: I0c97398a435e3f8ddf8ad38bc6bd71cc0d78aea5
diff --git a/libc/arch-arm64/dynamic_function_dispatch.cpp b/libc/arch-arm64/dynamic_function_dispatch.cpp
index 450138c..ca7f39f 100644
--- a/libc/arch-arm64/dynamic_function_dispatch.cpp
+++ b/libc/arch-arm64/dynamic_function_dispatch.cpp
@@ -30,6 +30,19 @@
 #include <stddef.h>
 #include <sys/auxv.h>
 
+#define MIDR_IMPL_ID_SHIFT 24u
+#define MIDR_IMPL_ID_MASK 0xFF
+#define CPU_VARIANT_SHIFT 20u
+#define CPU_VARIANT_MASK 0xF
+
+/* Macro to identify CPU implementer */
+#define QCOM_IMPL_ID 0x51
+
+/* Macro to indentify qualcomm CPU variants which supports
+ * __memcpy_aarch64_nt routine
+ */
+#define QCOM_ORYON_CPU_VARIANTS 0x5
+
 extern "C" {
 
 typedef void* memchr_func(const void*, int, size_t);
@@ -49,20 +62,72 @@
 
 typedef void* memcpy_func(void*, const void*, size_t);
 DEFINE_IFUNC_FOR(memcpy) {
-    if (arg->_hwcap & HWCAP_ASIMD) {
-        RETURN_FUNC(memcpy_func, __memcpy_aarch64_simd);
-    } else {
+  unsigned long midr;
+  unsigned int impl_id, cpu_variant;
+
+  /* Check if hardware capability CPUID is available */
+  if (arg->_hwcap & HWCAP_CPUID) {
+    /* Read the MIDR register */
+    asm("mrs %0, MIDR_EL1 \n\t" : "=r"(midr));
+
+    /* Extract the CPU Implementer ID */
+    impl_id = (midr >> MIDR_IMPL_ID_SHIFT) & (MIDR_IMPL_ID_MASK);
+
+    /* Check for Qualcomm implementer ID */
+    if (impl_id == QCOM_IMPL_ID) {
+      cpu_variant = (midr >> CPU_VARIANT_SHIFT) & CPU_VARIANT_MASK;
+
+      /* Check for Qualcomm Oryon CPU variants: 0x1, 0x2, 0x3, 0x4, 0x5 */
+      if (cpu_variant <= QCOM_ORYON_CPU_VARIANTS) {
+        RETURN_FUNC(memcpy_func, __memcpy_aarch64_nt);
+      } else {
         RETURN_FUNC(memcpy_func, __memcpy_aarch64);
+      }
     }
+  }
+  /* If CPU implementer is not Qualcomm, choose the custom
+   * implementation based on CPU architecture feature
+   * */
+  if (arg->_hwcap & HWCAP_ASIMD) {
+    RETURN_FUNC(memcpy_func, __memcpy_aarch64_simd);
+  } else {
+    RETURN_FUNC(memcpy_func, __memcpy_aarch64);
+  }
 }
 
 typedef void* memmove_func(void*, const void*, size_t);
 DEFINE_IFUNC_FOR(memmove) {
-    if (arg->_hwcap & HWCAP_ASIMD) {
-        RETURN_FUNC(memmove_func, __memmove_aarch64_simd);
-    } else {
-        RETURN_FUNC(memmove_func, __memmove_aarch64);
+  unsigned long midr;
+  unsigned int impl_id, cpu_variant;
+
+  /* Check if hardware capability CPUID is available */
+  if (arg->_hwcap & HWCAP_CPUID) {
+    /* Read the MIDR register */
+    asm("mrs %0, MIDR_EL1 \n\t" : "=r"(midr));
+
+    /* Extract the CPU Implementer ID */
+    impl_id = (midr >> MIDR_IMPL_ID_SHIFT) & (MIDR_IMPL_ID_MASK);
+
+    /* Check for Qualcomm implementer ID */
+    if (impl_id == QCOM_IMPL_ID) {
+      cpu_variant = (midr >> CPU_VARIANT_SHIFT) & CPU_VARIANT_MASK;
+
+      /* Check for Qualcomm Oryon CPU variants: 0x1, 0x2, 0x3, 0x4, 0x5 */
+      if (cpu_variant <= QCOM_ORYON_CPU_VARIANTS) {
+        RETURN_FUNC(memcpy_func, __memmove_aarch64_nt);
+      } else {
+        RETURN_FUNC(memcpy_func, __memmove_aarch64);
+      }
     }
+  }
+  /* If CPU implementer is not Qualcomm, choose the custom
+   * implementation based on CPU architecture feature
+   * */
+  if (arg->_hwcap & HWCAP_ASIMD) {
+    RETURN_FUNC(memmove_func, __memmove_aarch64_simd);
+  } else {
+    RETURN_FUNC(memmove_func, __memmove_aarch64);
+  }
 }
 
 typedef int memrchr_func(const void*, int, size_t);