Merge "riscv64: inline raise." into main
diff --git a/libc/Android.bp b/libc/Android.bp
index aa555a1..fe263fd 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -2487,8 +2487,6 @@
     name: "libc",
     symbol_file: "libc.map.txt",
     first_version: "9",
-    // APIs implemented in asm don't have debug info: http://b/190554910.
-    allow_untyped_symbols: true,
     export_header_libs: [
         "common_libc",
         "libc_uapi",
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 079acf9..f4f9e1b 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -357,7 +357,7 @@
 
 # riscv64-specific
 int __riscv_flush_icache:riscv_flush_icache(void*, void*, unsigned long) riscv64
-int __riscv_hwprobe:riscv_hwprobe(riscv_hwprobe*, size_t, size_t, unsigned long*, unsigned) riscv64
+int riscv_hwprobe(riscv_hwprobe*, size_t, size_t, unsigned long*, unsigned) riscv64
 
 # x86-specific
 int     __set_thread_area:set_thread_area(void*) x86
diff --git a/libc/arch-x86_64/string/avx2-memset-kbl.S b/libc/arch-x86_64/string/avx2-memset-kbl.S
index 09dd07d..ca62a9f 100644
--- a/libc/arch-x86_64/string/avx2-memset-kbl.S
+++ b/libc/arch-x86_64/string/avx2-memset-kbl.S
@@ -63,10 +63,9 @@
 	testb	$2, %dl
 	jnz	L(2_3bytes)
 	testb	$1, %dl
-	jz	L(return)
+	jz	1f
 	movb	%cl, (%rdi)
-L(return):
-	ret
+1:	ret
 
 L(8_15bytes):
 	movq	%rcx, (%rdi)
@@ -90,59 +89,54 @@
 	movdqu	%xmm0, (%rdi)
 	movdqu	%xmm0, -16(%rdi, %rdx)
 	cmpq	$32, %rdx
-	jbe	L(32bytesless)
+	jbe	L(done)
 	movdqu	%xmm0, 16(%rdi)
 	movdqu	%xmm0, -32(%rdi, %rdx)
 	cmpq	$64, %rdx
-	jbe	L(64bytesless)
+	jbe	L(done)
 	movdqu	%xmm0, 32(%rdi)
 	movdqu	%xmm0, 48(%rdi)
 	movdqu	%xmm0, -64(%rdi, %rdx)
 	movdqu	%xmm0, -48(%rdi, %rdx)
 	cmpq	$128, %rdx
-	jbe	L(128bytesless)
-        vpbroadcastb %xmm0, %ymm0
+	jbe	L(done)
+	vpbroadcastb %xmm0, %ymm0
 	vmovdqu	%ymm0, 64(%rdi)
 	vmovdqu	%ymm0, 96(%rdi)
 	vmovdqu	%ymm0, -128(%rdi, %rdx)
 	vmovdqu	%ymm0, -96(%rdi, %rdx)
 	cmpq	$256, %rdx
-        ja      L(256bytesmore)
-L(32bytesless):
-L(64bytesless):
-L(128bytesless):
-	ret
+	jbe	L(done)
 
 	ALIGN (4)
-L(256bytesmore):
 	leaq	128(%rdi), %rcx
 	andq	$-128, %rcx
 	movq	%rdx, %r8
 	addq	%rdi, %rdx
 	andq	$-128, %rdx
 	cmpq	%rcx, %rdx
-	je	L(return)
+	je	L(done)
 
 #ifdef SHARED_CACHE_SIZE
 	cmp	$SHARED_CACHE_SIZE, %r8
 #else
 	cmp	__x86_64_shared_cache_size(%rip), %r8
 #endif
-	ja	L(256bytesmore_nt)
+	ja	L(non_temporal_loop)
 
 	ALIGN (4)
-L(256bytesmore_normal):
+L(normal_loop):
 	vmovdqa	%ymm0, (%rcx)
 	vmovdqa	%ymm0, 32(%rcx)
 	vmovdqa	%ymm0, 64(%rcx)
 	vmovdqa	%ymm0, 96(%rcx)
 	addq	$128, %rcx
 	cmpq	%rcx, %rdx
-	jne	L(256bytesmore_normal)
-	ret
+	jne	L(normal_loop)
+	jmp	L(done)
 
 	ALIGN (4)
-L(256bytesmore_nt):
+L(non_temporal_loop):
 	movntdq	 %xmm0, (%rcx)
 	movntdq	 %xmm0, 16(%rcx)
 	movntdq	 %xmm0, 32(%rcx)
@@ -153,8 +147,14 @@
 	movntdq	 %xmm0, 112(%rcx)
 	leaq	128(%rcx), %rcx
 	cmpq	%rcx, %rdx
-	jne	L(256bytesmore_nt)
+	jne	L(non_temporal_loop)
+	# We used non-temporal stores, so we need a fence here.
 	sfence
+
+L(done):
+	# We used the ymm registers, and that can break SSE2 performance
+	# unless you do this.
+	vzeroupper
 	ret
 
 END(memset_avx2)
diff --git a/libc/bionic/bionic_call_ifunc_resolver.cpp b/libc/bionic/bionic_call_ifunc_resolver.cpp
index 437de78..410eb78 100644
--- a/libc/bionic/bionic_call_ifunc_resolver.cpp
+++ b/libc/bionic/bionic_call_ifunc_resolver.cpp
@@ -57,6 +57,13 @@
     hwcap = getauxval(AT_HWCAP);
   }
   return reinterpret_cast<ifunc_resolver_t>(resolver_addr)(hwcap);
+#elif defined(__riscv)
+  // This argument and its value is just a placeholder for now,
+  // but it means that if we do pass something in future (such as
+  // getauxval() and/or hwprobe key/value pairs), callees will be able to
+  // recognize what they're being given.
+  typedef ElfW(Addr) (*ifunc_resolver_t)(void*);
+  return reinterpret_cast<ifunc_resolver_t>(resolver_addr)(nullptr);
 #else
   typedef ElfW(Addr) (*ifunc_resolver_t)(void);
   return reinterpret_cast<ifunc_resolver_t>(resolver_addr)();
diff --git a/libc/bionic/vdso.cpp b/libc/bionic/vdso.cpp
index dbca9c0..e834ec7 100644
--- a/libc/bionic/vdso.cpp
+++ b/libc/bionic/vdso.cpp
@@ -22,10 +22,16 @@
 #include <string.h>
 #include <sys/auxv.h>
 #include <sys/cdefs.h>
+#include <sys/hwprobe.h>
 #include <sys/time.h>
 #include <time.h>
 #include <unistd.h>
 
+extern "C" int __clock_gettime(int, struct timespec*);
+extern "C" int __clock_getres(int, struct timespec*);
+extern "C" int __gettimeofday(struct timeval*, struct timezone*);
+extern "C" int riscv_hwprobe(struct riscv_hwprobe*, size_t, size_t, unsigned long*, unsigned);
+
 static inline int vdso_return(int result) {
   if (__predict_true(result == 0)) return 0;
 
@@ -61,10 +67,13 @@
 }
 
 time_t time(time_t* t) {
+  // Only x86/x86-64 actually have time() in the vdso.
+#if defined(VDSO_TIME_SYMBOL)
   auto vdso_time = reinterpret_cast<decltype(&time)>(__libc_globals->vdso[VDSO_TIME].fn);
   if (__predict_true(vdso_time)) {
     return vdso_time(t);
   }
+#endif
 
   // We can't fallback to the time(2) system call because it doesn't exist for most architectures.
   timeval tv;
@@ -73,12 +82,29 @@
   return tv.tv_sec;
 }
 
+#if defined(__riscv)
+int __riscv_hwprobe(struct riscv_hwprobe* _Nonnull pairs, size_t pair_count, size_t cpu_count,
+                    unsigned long* _Nullable cpus, unsigned flags) {
+  auto vdso_riscv_hwprobe =
+      reinterpret_cast<decltype(&__riscv_hwprobe)>(__libc_globals->vdso[VDSO_RISCV_HWPROBE].fn);
+  if (__predict_true(vdso_riscv_hwprobe)) {
+    return vdso_return(vdso_riscv_hwprobe(pairs, pair_count, cpu_count, cpus, flags));
+  }
+  return riscv_hwprobe(pairs, pair_count, cpu_count, cpus, flags);
+}
+#endif
+
 void __libc_init_vdso(libc_globals* globals) {
   auto&& vdso = globals->vdso;
-  vdso[VDSO_CLOCK_GETTIME] = { VDSO_CLOCK_GETTIME_SYMBOL, nullptr };
-  vdso[VDSO_CLOCK_GETRES] = { VDSO_CLOCK_GETRES_SYMBOL, nullptr };
-  vdso[VDSO_GETTIMEOFDAY] = { VDSO_GETTIMEOFDAY_SYMBOL, nullptr };
-  vdso[VDSO_TIME] = { VDSO_TIME_SYMBOL, nullptr };
+  vdso[VDSO_CLOCK_GETTIME] = {VDSO_CLOCK_GETTIME_SYMBOL, nullptr};
+  vdso[VDSO_CLOCK_GETRES] = {VDSO_CLOCK_GETRES_SYMBOL, nullptr};
+  vdso[VDSO_GETTIMEOFDAY] = {VDSO_GETTIMEOFDAY_SYMBOL, nullptr};
+#if defined(VDSO_TIME_SYMBOL)
+  vdso[VDSO_TIME] = {VDSO_TIME_SYMBOL, nullptr};
+#endif
+#if defined(VDSO_RISCV_HWPROBE_SYMBOL)
+  vdso[VDSO_RISCV_HWPROBE] = {VDSO_RISCV_HWPROBE_SYMBOL, nullptr};
+#endif
 
   // Do we have a vdso?
   uintptr_t vdso_ehdr_addr = getauxval(AT_SYSINFO_EHDR);
diff --git a/libc/private/bionic_asm_offsets.h b/libc/private/bionic_asm_offsets.h
index c2f2b56..e72adda 100644
--- a/libc/private/bionic_asm_offsets.h
+++ b/libc/private/bionic_asm_offsets.h
@@ -28,6 +28,6 @@
 
 #pragma once
 
-#ifdef __aarch64__
-#define OFFSETOF_libc_globals_memtag_stack 80
+#if defined(__aarch64__)
+#define OFFSETOF_libc_globals_memtag_stack 64
 #endif
diff --git a/libc/private/bionic_vdso.h b/libc/private/bionic_vdso.h
index da19b29..406b064 100644
--- a/libc/private/bionic_vdso.h
+++ b/libc/private/bionic_vdso.h
@@ -26,26 +26,23 @@
  * SUCH DAMAGE.
  */
 
-#ifndef _PRIVATE_BIONIC_VDSO_H
-#define _PRIVATE_BIONIC_VDSO_H
-
-#include <time.h>
+#pragma once
 
 #if defined(__aarch64__)
 #define VDSO_CLOCK_GETTIME_SYMBOL "__kernel_clock_gettime"
-#define VDSO_CLOCK_GETRES_SYMBOL  "__kernel_clock_getres"
-#define VDSO_GETTIMEOFDAY_SYMBOL  "__kernel_gettimeofday"
-#define VDSO_TIME_SYMBOL          "__kernel_time"
+#define VDSO_CLOCK_GETRES_SYMBOL "__kernel_clock_getres"
+#define VDSO_GETTIMEOFDAY_SYMBOL "__kernel_gettimeofday"
 #else
 #define VDSO_CLOCK_GETTIME_SYMBOL "__vdso_clock_gettime"
-#define VDSO_CLOCK_GETRES_SYMBOL  "__vdso_clock_getres"
-#define VDSO_GETTIMEOFDAY_SYMBOL  "__vdso_gettimeofday"
-#define VDSO_TIME_SYMBOL          "__vdso_time"
+#define VDSO_CLOCK_GETRES_SYMBOL "__vdso_clock_getres"
+#define VDSO_GETTIMEOFDAY_SYMBOL "__vdso_gettimeofday"
 #endif
-
-extern "C" int __clock_gettime(int, timespec*);
-extern "C" int __clock_getres(int, timespec*);
-extern "C" int __gettimeofday(timeval*, struct timezone*);
+#if defined(__riscv)
+#define VDSO_RISCV_HWPROBE_SYMBOL "__vdso_riscv_hwprobe"
+#endif
+#if defined(__i386__) || defined(__x86_64__)
+#define VDSO_TIME_SYMBOL "__vdso_time"
+#endif
 
 struct vdso_entry {
   const char* name;
@@ -56,8 +53,11 @@
   VDSO_CLOCK_GETTIME = 0,
   VDSO_CLOCK_GETRES,
   VDSO_GETTIMEOFDAY,
+#if defined(VDSO_TIME_SYMBOL)
   VDSO_TIME,
+#endif
+#if defined(VDSO_RISCV_HWPROBE_SYMBOL)
+  VDSO_RISCV_HWPROBE,
+#endif
   VDSO_END
 };
-
-#endif  // _PRIVATE_BIONIC_VDSO_H
diff --git a/libm/Android.bp b/libm/Android.bp
index bfb7a8c..a89885f 100644
--- a/libm/Android.bp
+++ b/libm/Android.bp
@@ -466,8 +466,6 @@
     name: "libm",
     symbol_file: "libm.map.txt",
     first_version: "9",
-    // APIs implemented in asm don't have debug info: http://b/190554910.
-    allow_untyped_symbols: true,
 }
 
 genrule {
diff --git a/tests/execinfo_test.cpp b/tests/execinfo_test.cpp
index b8e1325..1a0c51b 100644
--- a/tests/execinfo_test.cpp
+++ b/tests/execinfo_test.cpp
@@ -79,9 +79,13 @@
 }
 
 static size_t FindFunction(std::vector<void*>& frames, uintptr_t func_addr) {
+  Dl_info func_info;
+  if (!dladdr(reinterpret_cast<void*>(func_addr), &func_info)) {
+    return 0;
+  }
   for (size_t i = 0; i < frames.size(); i++) {
-    uintptr_t frame_addr = reinterpret_cast<uintptr_t>(frames[i]);
-    if (frame_addr >= func_addr && frame_addr <= func_addr + 0x100) {
+    Dl_info frame_info;
+    if (dladdr(frames[i], &frame_info) && func_info.dli_saddr == frame_info.dli_saddr) {
       return i + 1;
     }
   }