Obtain x86 cache info from CPU

The cache info today is hardcoded in cache.h
May not be optimal across various uarchs/SKUs
Leverage bionic sysconf to get the underlying cache.

Improvements seen on RPL, for various sizes
memmove_non_overlapping
1.25M - 31%
1.5M - 30%
1.75M - 28%

memcpy
1.25M - 31%
1.5M - 31%
1.75M - 30%

The bionic benchmarks (which only go up to 128KiB) show no change, as
you'd expect.

Test: bionic/tests/run-on-host.sh 64 && bionic/tests/run-on-host.sh 32
Bug: 202102347
Change-Id: I4bbad51794758873744149d0f58b86bb92ee307f
Signed-off-by: Vinay Prasad Kompella <vinay.kompella@intel.com>
Signed-off-by: Soni, Ravi Kumar <ravi.kumar.soni@intel.com>
diff --git a/libc/arch-x86/string/cache.h b/libc/arch-x86/string/cache.h
deleted file mode 100644
index 33719a0..0000000
--- a/libc/arch-x86/string/cache.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifdef FOR_ATOM
-#define SHARED_CACHE_SIZE (512 * 1024) /* Atom L2 Cache */
-#endif
-#ifdef FOR_SILVERMONT
-#define SHARED_CACHE_SIZE (1024 * 1024) /* Silvermont L2 Cache */
-#endif
-
-#define DATA_CACHE_SIZE (24 * 1024) /* Atom and Silvermont L1 Data Cache */
-
-#define SHARED_CACHE_SIZE_HALF (SHARED_CACHE_SIZE / 2)
-#define DATA_CACHE_SIZE_HALF (DATA_CACHE_SIZE / 2)
diff --git a/libc/arch-x86/string/sse2-memmove-slm.S b/libc/arch-x86/string/sse2-memmove-slm.S
index 79b5d1b..7f42374 100644
--- a/libc/arch-x86/string/sse2-memmove-slm.S
+++ b/libc/arch-x86/string/sse2-memmove-slm.S
@@ -29,7 +29,6 @@
 */
 
 #define FOR_SILVERMONT
-#include "cache.h"
 
 #ifndef MEMMOVE
 # define MEMMOVE	memmove_generic
@@ -94,6 +93,8 @@
 #define RETURN_END	POP (%ebx); ret
 #define RETURN		RETURN_END; CFI_PUSH (%ebx)
 
+#define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
+
 	.section .text.sse2,"ax",@progbits
 ENTRY (MEMMOVE)
 	ENTRANCE
@@ -193,7 +194,13 @@
 	cmp	%edi, %ebx
 	jbe	L(mm_copy_remaining_forward)
 
-	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
+	PUSH(%ebx)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+	/* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
+	POP(%ebx)
+
 	jae	L(mm_large_page_loop_forward)
 
 	.p2align 4
@@ -424,7 +431,13 @@
 	cmp	%edi, %ebx
 	jae	L(mm_main_loop_backward_end)
 
-	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
+	PUSH(%ebx)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+	/* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
+	POP(%ebx)
+
 	jae	L(mm_large_page_loop_backward)
 
 	.p2align 4
diff --git a/libc/arch-x86/string/sse2-memset-atom.S b/libc/arch-x86/string/sse2-memset-atom.S
index 320afec..e43ead0 100644
--- a/libc/arch-x86/string/sse2-memset-atom.S
+++ b/libc/arch-x86/string/sse2-memset-atom.S
@@ -31,7 +31,6 @@
 #include <private/bionic_asm.h>
 
 #define FOR_ATOM
-#include "cache.h"
 
 #ifndef L
 # define L(label)	.L##label
@@ -64,6 +63,8 @@
 #define RETURN		RETURN_END; CFI_PUSH(%ebx)
 #define JMPTBL(I, B)	I - B
 
+#define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
+
 /* Load an entry in a jump table into EBX and branch to it.  TABLE is a
    jump table with relative offsets.   */
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
@@ -256,14 +257,20 @@
 	ALIGN(4)
 L(128bytesormore):
 	PUSH(%ebx)
-	mov	$SHARED_CACHE_SIZE, %ebx
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
 	cmp	%ebx, %ecx
 	jae	L(128bytesormore_nt_start)
 
 
 	POP(%ebx)
 # define RESTORE_EBX_STATE CFI_PUSH(%ebx)
-	cmp	$DATA_CACHE_SIZE, %ecx
+	PUSH(%ebx)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
+	POP(%ebx)
 
 	jae	L(128bytes_L2_normal)
 	subl	$128, %ecx
diff --git a/libc/arch-x86/string/sse2-memset-slm.S b/libc/arch-x86/string/sse2-memset-slm.S
index 5cff141..e4c8fa1 100644
--- a/libc/arch-x86/string/sse2-memset-slm.S
+++ b/libc/arch-x86/string/sse2-memset-slm.S
@@ -31,7 +31,6 @@
 #include <private/bionic_asm.h>
 
 #define FOR_SILVERMONT
-#include "cache.h"
 
 #ifndef L
 # define L(label)	.L##label
@@ -64,6 +63,8 @@
 # define RETURN		RETURN_END; CFI_PUSH(%ebx)
 # define JMPTBL(I, B)	I - B
 
+#define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
+
 /* Load an entry in a jump table into EBX and branch to it.  TABLE is a
    jump table with relative offsets.   */
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
@@ -177,14 +178,18 @@
 	ALIGN(4)
 L(128bytesormore):
 	PUSH(%ebx)
-	mov	$SHARED_CACHE_SIZE, %ebx
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
 	cmp	%ebx, %ecx
 	jae	L(128bytesormore_nt_start)
 
 	POP(%ebx)
 
 	PUSH(%ebx)
-	mov	$DATA_CACHE_SIZE, %ebx
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	mov	__x86_data_cache_size@GOTOFF(%ebx), %ebx
 
 	cmp	%ebx, %ecx
 	jae	L(128bytes_L2_normal)
diff --git a/libc/arch-x86/string/ssse3-memcpy-atom.S b/libc/arch-x86/string/ssse3-memcpy-atom.S
index fe3082e..83e1985 100644
--- a/libc/arch-x86/string/ssse3-memcpy-atom.S
+++ b/libc/arch-x86/string/ssse3-memcpy-atom.S
@@ -29,7 +29,6 @@
 */
 
 #define FOR_ATOM
-#include "cache.h"
 
 #ifndef MEMCPY
 # define MEMCPY	memcpy_atom