Obtain x86 cache info from CPU

The cache info today is hardcoded in cache.h
May not be optimal across various uarchs/SKUs
Leverage bionic sysconf to get the underlying cache.

Improvements seen on RPL, for various sizes
memmove_non_overlapping
1.25M - 31%
1.5M - 30%
1.75M - 28%

memcpy
1.25M - 31%
1.5M - 31%
1.75M - 30%

The bionic benchmarks (which only go up to 128KiB) show no change, as
you'd expect.

Test: bionic/tests/run-on-host.sh 64 && bionic/tests/run-on-host.sh 32
Bug: 202102347
Change-Id: I4bbad51794758873744149d0f58b86bb92ee307f
Signed-off-by: Vinay Prasad Kompella <vinay.kompella@intel.com>
Signed-off-by: Soni, Ravi Kumar <ravi.kumar.soni@intel.com>
diff --git a/libc/arch-x86/string/sse2-memset-atom.S b/libc/arch-x86/string/sse2-memset-atom.S
index 320afec..e43ead0 100644
--- a/libc/arch-x86/string/sse2-memset-atom.S
+++ b/libc/arch-x86/string/sse2-memset-atom.S
@@ -31,7 +31,6 @@
 #include <private/bionic_asm.h>
 
 #define FOR_ATOM
-#include "cache.h"
 
 #ifndef L
 # define L(label)	.L##label
@@ -64,6 +63,8 @@
 #define RETURN		RETURN_END; CFI_PUSH(%ebx)
 #define JMPTBL(I, B)	I - B
 
+#define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
+
 /* Load an entry in a jump table into EBX and branch to it.  TABLE is a
    jump table with relative offsets.   */
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
@@ -256,14 +257,20 @@
 	ALIGN(4)
 L(128bytesormore):
 	PUSH(%ebx)
-	mov	$SHARED_CACHE_SIZE, %ebx
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
 	cmp	%ebx, %ecx
 	jae	L(128bytesormore_nt_start)
 
 
 	POP(%ebx)
 # define RESTORE_EBX_STATE CFI_PUSH(%ebx)
-	cmp	$DATA_CACHE_SIZE, %ecx
+	PUSH(%ebx)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
+	POP(%ebx)
 
 	jae	L(128bytes_L2_normal)
 	subl	$128, %ecx