riscv64: switch from x18 to gp for shadow call stack.
We want to give back a useful callee-saved general purpose
register (x18) that was only "chosen" because it was what llvm
allowed for historical reasons. gp is a better choice because it's
effectively unused otherwise anyway.
Unfortunately, that means we need extra space in jmp_buf (which I've
reserved in an earlier change, e7b3b8b467bad2cd32470b5edd5cb9938b934316),
so let's rearrange the entries in jmp_buf to match their order in the
register file.
Bug: https://github.com/google/android-riscv64/issues/72
Bug: http://b/277909695
Test: treehugger
Change-Id: Ia629409a894c1a83d2052885702bbdd895c758e1
diff --git a/libc/arch-riscv64/bionic/setjmp.S b/libc/arch-riscv64/bionic/setjmp.S
index ba3cacf..26f7ec9 100644
--- a/libc/arch-riscv64/bionic/setjmp.S
+++ b/libc/arch-riscv64/bionic/setjmp.S
@@ -36,50 +36,52 @@
// 0 sigflag/cookie setjmp cookie in top 31 bits, signal mask flag in low bit
// 1 sigmask 64-bit signal mask
// 2 ra
-// 3 s0
+// 3 sp
+// 4 gp
+// 5 s0
// ......
-// 14 s11
-// 15 sp
-// 16 fs0
+// 16 s11
+// 17 fs0
// ......
-// 27 fs11
-// 28 checksum
+// 28 fs11
+// 29 checksum
// _JBLEN: defined in bionic/libc/include/setjmp.h
#define _JB_SIGFLAG 0
#define _JB_SIGMASK 1 * 8
#define _JB_RA 2 * 8
-#define _JB_S0 3 * 8
-#define _JB_S1 4 * 8
-#define _JB_S2 5 * 8
-#define _JB_S3 6 * 8
-#define _JB_S4 7 * 8
-#define _JB_S5 8 * 8
-#define _JB_S6 9 * 8
-#define _JB_S7 10 * 8
-#define _JB_S8 11 * 8
-#define _JB_S9 12 * 8
-#define _JB_S10 13 * 8
-#define _JB_S11 14 * 8
-#define _JB_SP 15 * 8
-#define _JB_FS0 16 * 8
-#define _JB_FS1 17 * 8
-#define _JB_FS2 18 * 8
-#define _JB_FS3 19 * 8
-#define _JB_FS4 20 * 8
-#define _JB_FS5 21 * 8
-#define _JB_FS6 22 * 8
-#define _JB_FS7 23 * 8
-#define _JB_FS8 24 * 8
-#define _JB_FS9 25 * 8
-#define _JB_FS10 26 * 8
-#define _JB_FS11 27 * 8
-#define _JB_CHECKSUM 28 * 8
+#define _JB_SP 3 * 8
+#define _JB_GP 4 * 8
+#define _JB_S0 5 * 8
+#define _JB_S1 6 * 8
+#define _JB_S2 7 * 8
+#define _JB_S3 8 * 8
+#define _JB_S4 9 * 8
+#define _JB_S5 10 * 8
+#define _JB_S6 11 * 8
+#define _JB_S7 12 * 8
+#define _JB_S8 13 * 8
+#define _JB_S9 14 * 8
+#define _JB_S10 15 * 8
+#define _JB_S11 16 * 8
+#define _JB_FS0 17 * 8
+#define _JB_FS1 18 * 8
+#define _JB_FS2 19 * 8
+#define _JB_FS3 20 * 8
+#define _JB_FS4 21 * 8
+#define _JB_FS5 22 * 8
+#define _JB_FS6 23 * 8
+#define _JB_FS7 24 * 8
+#define _JB_FS8 25 * 8
+#define _JB_FS9 26 * 8
+#define _JB_FS10 27 * 8
+#define _JB_FS11 28 * 8
+#define _JB_CHECKSUM 29 * 8
.macro m_mangle_registers reg, sp_reg
xor s0, s0, \reg
xor s1, s1, \reg
- xor a4, a4, \reg // a4 is the masked s2 (x18) for SCS.
+ xor s2, s2, \reg
xor s3, s3, \reg
xor s4, s4, \reg
xor s5, s5, \reg
@@ -89,12 +91,13 @@
xor s9, s9, \reg
xor s10, s10, \reg
xor s11, s11, \reg
+ xor a4, a4, \reg // a4 is the masked gp (x3) for SCS.
xor \sp_reg, \sp_reg, \reg
.endm
.macro m_calculate_checksum dst, src, scratch
li \dst, 0
- .irp i,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27
+ .irp i,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28
ld \scratch, (\i * 8)(\src)
xor \dst, \dst, \scratch
.endr
@@ -152,19 +155,21 @@
andi a1, a1, -2
// Mask off the high bits of the shadow call stack pointer.
- // We only store the low bits of x18 to avoid leaking the
+ // We only store the low bits of gp to avoid leaking the
// shadow call stack address into memory.
// See the SCS commentary in pthread_internal.h for more detail.
li a4, SCS_MASK
- and a4, a4, x18
+ and a4, a4, gp
// Save core registers.
mv a2, sp
m_mangle_registers a1, sp_reg=a2
sd ra, _JB_RA(a0)
+ sd a4, _JB_GP(a0) // a4 is the masked gp (x3) for SCS.
+ sd a2, _JB_SP(a0)
sd s0, _JB_S0(a0)
sd s1, _JB_S1(a0)
- sd a4, _JB_S2(a0) // a4 is the masked s2 (x18) for SCS.
+ sd s2, _JB_S2(a0)
sd s3, _JB_S3(a0)
sd s4, _JB_S4(a0)
sd s5, _JB_S5(a0)
@@ -174,7 +179,6 @@
sd s9, _JB_S9(a0)
sd s10, _JB_S10(a0)
sd s11, _JB_S11(a0)
- sd a2, _JB_SP(a0)
m_unmangle_registers a1, sp_reg=a2
// Save floating point registers.
@@ -236,9 +240,10 @@
// Restore core registers.
andi a2, a2, -2
ld ra, _JB_RA(a0)
+ ld a4, _JB_GP(a0) // Don't clobber the upper bits of gp (x3) used for SCS yet.
ld s0, _JB_S0(a0)
ld s1, _JB_S1(a0)
- ld a4, _JB_S2(a0) // Don't clobber s2 (x18) used for SCS yet.
+ ld s2, _JB_S2(a0)
ld s3, _JB_S3(a0)
ld s4, _JB_S4(a0)
ld s5, _JB_S5(a0)
@@ -254,8 +259,8 @@
// Restore the low bits of the shadow call stack pointer.
li a5, ~SCS_MASK
- and x18, x18, a5
- or x18, a4, x18
+ and gp, gp, a5
+ or gp, gp, a4
addi sp, sp, -24
sd ra, 0(sp)