Obtain x86 cache info from CPU
The cache info today is hardcoded in cache.h
May not be optimal across various uarchs/SKUs
Leverage bionic sysconf to get the underlying cache.
Improvements seen on RPL, for various sizes
memmove_non_overlapping
1.25M - 31%
1.5M - 30%
1.75M - 28%
memcpy
1.25M - 31%
1.5M - 31%
1.75M - 30%
The bionic benchmarks (which only go up to 128KiB) show no change, as
you'd expect.
Test: bionic/tests/run-on-host.sh 64 && bionic/tests/run-on-host.sh 32
Bug: 202102347
Change-Id: I4bbad51794758873744149d0f58b86bb92ee307f
Signed-off-by: Vinay Prasad Kompella <vinay.kompella@intel.com>
Signed-off-by: Soni, Ravi Kumar <ravi.kumar.soni@intel.com>
diff --git a/libc/arch-x86/string/sse2-memset-atom.S b/libc/arch-x86/string/sse2-memset-atom.S
index 320afec..e43ead0 100644
--- a/libc/arch-x86/string/sse2-memset-atom.S
+++ b/libc/arch-x86/string/sse2-memset-atom.S
@@ -31,7 +31,6 @@
#include <private/bionic_asm.h>
#define FOR_ATOM
-#include "cache.h"
#ifndef L
# define L(label) .L##label
@@ -64,6 +63,8 @@
#define RETURN RETURN_END; CFI_PUSH(%ebx)
#define JMPTBL(I, B) I - B
+#define SETUP_PIC_REG(x) call __x86.get_pc_thunk.x
+
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
jump table with relative offsets. */
# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
@@ -256,14 +257,20 @@
ALIGN(4)
L(128bytesormore):
PUSH(%ebx)
- mov $SHARED_CACHE_SIZE, %ebx
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
cmp %ebx, %ecx
jae L(128bytesormore_nt_start)
POP(%ebx)
# define RESTORE_EBX_STATE CFI_PUSH(%ebx)
- cmp $DATA_CACHE_SIZE, %ecx
+ PUSH(%ebx)
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
+ POP(%ebx)
jae L(128bytes_L2_normal)
subl $128, %ecx