Obtain x86 cache info from CPU
The cache info today is hardcoded in cache.h
May not be optimal across various uarchs/SKUs
Leverage bionic sysconf to get the underlying cache.
Improvements seen on RPL, for various sizes
memmove_non_overlapping
1.25M - 31%
1.5M - 30%
1.75M - 28%
memcpy
1.25M - 31%
1.5M - 31%
1.75M - 30%
The bionic benchmarks (which only go up to 128KiB) show no change, as
you'd expect.
Test: bionic/tests/run-on-host.sh 64 && bionic/tests/run-on-host.sh 32
Bug: 202102347
Change-Id: I4bbad51794758873744149d0f58b86bb92ee307f
Signed-off-by: Vinay Prasad Kompella <vinay.kompella@intel.com>
Signed-off-by: Soni, Ravi Kumar <ravi.kumar.soni@intel.com>
diff --git a/libc/arch-x86/string/sse2-memmove-slm.S b/libc/arch-x86/string/sse2-memmove-slm.S
index 79b5d1b..7f42374 100644
--- a/libc/arch-x86/string/sse2-memmove-slm.S
+++ b/libc/arch-x86/string/sse2-memmove-slm.S
@@ -29,7 +29,6 @@
*/
#define FOR_SILVERMONT
-#include "cache.h"
#ifndef MEMMOVE
# define MEMMOVE memmove_generic
@@ -94,6 +93,8 @@
#define RETURN_END POP (%ebx); ret
#define RETURN RETURN_END; CFI_PUSH (%ebx)
+#define SETUP_PIC_REG(x) call __x86.get_pc_thunk.x
+
.section .text.sse2,"ax",@progbits
ENTRY (MEMMOVE)
ENTRANCE
@@ -193,7 +194,13 @@
cmp %edi, %ebx
jbe L(mm_copy_remaining_forward)
- cmp $SHARED_CACHE_SIZE_HALF, %ecx
+ PUSH(%ebx)
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+ /* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
+ POP(%ebx)
+
jae L(mm_large_page_loop_forward)
.p2align 4
@@ -424,7 +431,13 @@
cmp %edi, %ebx
jae L(mm_main_loop_backward_end)
- cmp $SHARED_CACHE_SIZE_HALF, %ecx
+ PUSH(%ebx)
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+ /* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
+ POP(%ebx)
+
jae L(mm_large_page_loop_backward)
.p2align 4