setjmp/longjmp: avoid invalid values in the stack pointer.

arm64 was already being careful, but x86/x86-64 and 32-bit ARM could be
caught by a signal in a state where the stack pointer was mangled.

For 32-bit ARM I've taken care with the link register too, to avoid
potential issues with unwinding.

Bug: http://b/152210274
Test: treehugger
Change-Id: I1ce285b017a633c732dbe04743368f4cae27af85
diff --git a/libc/arch-x86/bionic/setjmp.S b/libc/arch-x86/bionic/setjmp.S
index 1e1ce58..1a3eb4b 100644
--- a/libc/arch-x86/bionic/setjmp.S
+++ b/libc/arch-x86/bionic/setjmp.S
@@ -57,19 +57,6 @@
 #define _JB_SIGFLAG 8
 #define _JB_CHECKSUM 9
 
-.macro m_mangle_registers reg
-  xorl \reg,%edx
-  xorl \reg,%ebx
-  xorl \reg,%esp
-  xorl \reg,%ebp
-  xorl \reg,%esi
-  xorl \reg,%edi
-.endm
-
-.macro m_unmangle_registers reg
-  m_mangle_registers \reg
-.endm
-
 .macro m_calculate_checksum dst, src
   movl $0, \dst
   .irp i,0,1,2,3,4,5
@@ -129,14 +116,17 @@
 
   // Save the callee-save registers.
   movl 0(%esp),%edx
-  m_mangle_registers %eax
-  movl %edx,(_JB_EDX * 4)(%ecx)
-  movl %ebx,(_JB_EBX * 4)(%ecx)
-  movl %esp,(_JB_ESP * 4)(%ecx)
-  movl %ebp,(_JB_EBP * 4)(%ecx)
-  movl %esi,(_JB_ESI * 4)(%ecx)
-  movl %edi,(_JB_EDI * 4)(%ecx)
-  m_unmangle_registers %eax
+
+.macro m_mangle_register reg, offset
+  movl \reg,(\offset * 4)(%ecx)
+  xorl %eax,(\offset * 4)(%ecx)
+.endm
+  m_mangle_register %edx, _JB_EDX
+  m_mangle_register %ebx, _JB_EBX
+  m_mangle_register %esp, _JB_ESP
+  m_mangle_register %ebp, _JB_EBP
+  m_mangle_register %esi, _JB_ESI
+  m_mangle_register %edi, _JB_EDI
 
   m_calculate_checksum %eax, %ecx
   movl %eax, (_JB_CHECKSUM * 4)(%ecx)
@@ -174,18 +164,26 @@
   movl 4(%esp),%edx
   movl 8(%esp),%eax
 
+  // Fetch the setjmp cookie and clear the signal flag bit.
   movl (_JB_SIGFLAG * 4)(%edx),%ecx
   andl $-2,%ecx
 
+  // Carefully unmangle esp/ebp without ever having an invalid value in the
+  // register (http://b/152210274).
+  movl (_JB_ESP * 4)(%edx),%edi
+  xorl %ecx,%edi
+  movl %edi,%esp
+  movl (_JB_EBP * 4)(%edx),%edi
+  xorl %ecx,%edi
+  movl %edi,%ebp
+
+  // The others don't matter as much, but we do need to finish using the cookie
+  // from %ecx before we clobber it, so we seed each register with the cookie.
   movl %ecx,%ebx
-  movl %ecx,%esp
-  movl %ecx,%ebp
   movl %ecx,%esi
   movl %ecx,%edi
   xorl (_JB_EDX * 4)(%edx),%ecx
   xorl (_JB_EBX * 4)(%edx),%ebx
-  xorl (_JB_ESP * 4)(%edx),%esp
-  xorl (_JB_EBP * 4)(%edx),%ebp
   xorl (_JB_ESI * 4)(%edx),%esi
   xorl (_JB_EDI * 4)(%edx),%edi