x86/x86_64: cleanup signal trampolines

Let the assembler handle the low-level details of generating .eh_frame,
and use .cfi_escape to output the DWARF3 DW_CFA_def_cfa_expression and
DW_CFA_expression instructions.

Explicitly output a nop instruction between the FDE start
(.cfi_startproc) and the trampoline symbol.

x86_64: remove the rsp instruction, which is redundant with the CFA
itself.

Bug: http://b/169383888
Test: compare `objdump -Wf` output before and after
Test: bionic-unit-tests
Change-Id: Ie2a6d111cb473596be8c9f4fd64534e91d88f2a1
diff --git a/libc/arch-x86/bionic/__restore.S b/libc/arch-x86/bionic/__restore.S
index cb18fd0..5977eab 100644
--- a/libc/arch-x86/bionic/__restore.S
+++ b/libc/arch-x86/bionic/__restore.S
@@ -27,14 +27,7 @@
  */
 
 #include <private/bionic_asm.h>
-
-// DWARF constants.
-#define DW_CFA_def_cfa_expression 0x0f
-#define DW_CFA_expression 0x10
-#define DW_EH_PE_pcrel 0x10
-#define DW_EH_PE_sdata4 0x0b
-#define DW_OP_breg4 0x74
-#define DW_OP_deref 0x06
+#include <private/bionic_asm_dwarf_exprs.h>
 
 // Offsets into struct sigcontext.
 #define OFFSET_EDI 16
@@ -52,84 +45,47 @@
 #define DW_x86_REG_ECX 1
 #define DW_x86_REG_EDX 2
 #define DW_x86_REG_EBX 3
+#define DW_x86_REG_ESP 4
 #define DW_x86_REG_EBP 5
 #define DW_x86_REG_ESI 6
 #define DW_x86_REG_EDI 7
 #define DW_x86_REG_EIP 8
 
-#define cfi_signal_frame_start(f) \
-.section .eh_frame,"a",@progbits; \
-.L ## f ## _START_EH_FRAME: \
-  .long 2f - 1f; /* CIE length. */ \
-1:.long 0;       /* CIE ID. */ \
-  .byte 1;       /* Version. */ \
-  .string "zRS"; /* Augmentation string. */ \
-  .uleb128 1;    /* Code alignment factor. */ \
-  .sleb128 -4;   /* Data alignment factor. */ \
-  .uleb128 DW_x86_REG_EIP;    /* Return address register. */ \
-  .uleb128 1;    /* 1 byte of augmentation data. */ \
-  .byte (DW_EH_PE_pcrel|DW_EH_PE_sdata4); /* FDE encoding. */ \
-  .align 8; \
-2: \
-  .long .L ## f ## _END_FDE - .L ## f ## _START_FDE;   /* FDE length. */ \
-.L ## f ## _START_FDE: \
-  .long .L ## f ## _START_FDE - .L ## f ## _START_EH_FRAME; /* CIE location. */ \
-  .long (.L ## f ## _START - 1) - .;                   /* pcrel start address (see FDE encoding above). */ \
-  .long .L ## f ## _END - (.L ## f ## _START - 1);     /* Function this FDE applies to. */ \
-  .uleb128 0;                                          /* FDE augmentation length. */ \
+#define RESTORE_GPR(reg, extra_offset)                    \
+    m_cfi_breg_offset DW_x86_REG_ ## reg,                 \
+                      DW_x86_REG_ESP,                     \
+                      (OFFSET_ ## reg + (extra_offset));
 
-#define cfi_signal_frame_end(f) \
-.L ## f ## _END_FDE: \
+// Restoring ESP is unnecessary as the unwinder simply uses the CFA value.
+#define RESTORE_GPRS(extra_offset)                                      \
+    m_cfi_def_cfa_deref DW_x86_REG_ESP, (OFFSET_ESP + (extra_offset));  \
+    RESTORE_GPR(EDI, extra_offset)                                      \
+    RESTORE_GPR(ESI, extra_offset)                                      \
+    RESTORE_GPR(EBP, extra_offset)                                      \
+    RESTORE_GPR(EBX, extra_offset)                                      \
+    RESTORE_GPR(EDX, extra_offset)                                      \
+    RESTORE_GPR(ECX, extra_offset)                                      \
+    RESTORE_GPR(EAX, extra_offset)                                      \
+    RESTORE_GPR(EIP, extra_offset)                                      \
 
-#define cfi_def_cfa(offset) \
-  .byte DW_CFA_def_cfa_expression; \
-  .uleb128 2f-1f; \
-1:.byte DW_OP_breg4; \
-  .sleb128 offset; \
-  .byte DW_OP_deref; \
-2: \
+  .text
 
-#define cfi_offset(reg_number,offset) \
-  .byte DW_CFA_expression; \
-  .uleb128 reg_number; \
-  .uleb128 2f-1f; \
-1:.byte DW_OP_breg4; \
-  .sleb128 offset; \
-2: \
-
-ENTRY_PRIVATE(__restore)
-.L__restore_START:
+  .cfi_startproc
+  .cfi_signal_frame
+  RESTORE_GPRS(4)
+  nop   // See comment in libc/arch-x86_64/bionic/__restore_rt.S about this nop.
+ENTRY_PRIVATE_NO_DWARF(__restore)
   popl %eax
+  RESTORE_GPRS(0)
   movl $__NR_sigreturn, %eax
   int $0x80
-.L__restore_END:
 END(__restore)
-cfi_signal_frame_start(__restore)
-  cfi_def_cfa(OFFSET_ESP + 4)
-  cfi_offset(DW_x86_REG_EDI, OFFSET_EDI + 4)
-  cfi_offset(DW_x86_REG_ESI, OFFSET_ESI + 4)
-  cfi_offset(DW_x86_REG_EBP, OFFSET_EBP + 4)
-  cfi_offset(DW_x86_REG_EBX, OFFSET_EBX + 4)
-  cfi_offset(DW_x86_REG_EDX, OFFSET_EDX + 4)
-  cfi_offset(DW_x86_REG_ECX, OFFSET_ECX + 4)
-  cfi_offset(DW_x86_REG_EAX, OFFSET_EAX + 4)
-  cfi_offset(DW_x86_REG_EIP, OFFSET_EIP + 4)
-cfi_signal_frame_end(__restore)
 
-ENTRY_PRIVATE(__restore_rt)
-.L__restore_rt_START:
+  .cfi_startproc
+  .cfi_signal_frame
+  RESTORE_GPRS(160)
+  nop   // See comment in libc/arch-x86_64/bionic/__restore_rt.S about this nop.
+ENTRY_PRIVATE_NO_DWARF(__restore_rt)
   movl $__NR_rt_sigreturn, %eax
   int $0x80
-.L__restore_rt_END:
 END(__restore_rt)
-cfi_signal_frame_start(__restore_rt)
-  cfi_def_cfa(OFFSET_ESP + 160)
-  cfi_offset(DW_x86_REG_EDI, OFFSET_EDI + 160)
-  cfi_offset(DW_x86_REG_ESI, OFFSET_ESI + 160)
-  cfi_offset(DW_x86_REG_EBP, OFFSET_EBP + 160)
-  cfi_offset(DW_x86_REG_EBX, OFFSET_EBX + 160)
-  cfi_offset(DW_x86_REG_EDX, OFFSET_EDX + 160)
-  cfi_offset(DW_x86_REG_ECX, OFFSET_ECX + 160)
-  cfi_offset(DW_x86_REG_EAX, OFFSET_EAX + 160)
-  cfi_offset(DW_x86_REG_EIP, OFFSET_EIP + 160)
-cfi_signal_frame_end(__restore_rt)
diff --git a/libc/arch-x86_64/bionic/__restore_rt.S b/libc/arch-x86_64/bionic/__restore_rt.S
index 785b3b3..f3e4012 100644
--- a/libc/arch-x86_64/bionic/__restore_rt.S
+++ b/libc/arch-x86_64/bionic/__restore_rt.S
@@ -27,15 +27,9 @@
  */
 
 #include <private/bionic_asm.h>
+#include <private/bionic_asm_dwarf_exprs.h>
 
-// DWARF constants.
-#define DW_CFA_def_cfa_expression 0x0f
-#define DW_CFA_expression 0x10
-#define DW_EH_PE_pcrel 0x10
-#define DW_EH_PE_sdata4 0x0b
-#define DW_OP_breg4 0x74
-#define DW_OP_breg7 0x77
-#define DW_OP_deref 0x06
+// In the signal trampoline frame, rsp points to a ucontext_t struct.
 
 // Offsets into struct ucontext_t of uc_mcontext.gregs[x].
 #define OFFSET_R8 40
@@ -49,11 +43,11 @@
 #define OFFSET_RDI 104
 #define OFFSET_RSI 112
 #define OFFSET_RBP 120
-#define OFFSET_RSP 160
 #define OFFSET_RBX 128
 #define OFFSET_RDX 136
 #define OFFSET_RAX 144
 #define OFFSET_RCX 152
+#define OFFSET_RSP 160
 #define OFFSET_RIP 168
 
 // Non-standard DWARF constants for the x86-64 registers.
@@ -75,69 +69,47 @@
 #define DW_x86_64_R15 15
 #define DW_x86_64_RIP 16
 
-#define cfi_signal_frame_start(f) \
-.section .eh_frame,"a",@progbits; \
-.L ## f ## _START_EH_FRAME: \
-  .long 2f - 1f; /* CIE length. */ \
-1:.long 0;       /* CIE ID. */ \
-  .byte 1;       /* Version. */ \
-  .string "zRS"; /* Augmentation string. */ \
-  .uleb128 1;    /* Code alignment factor. */ \
-  .sleb128 -8;   /* Data alignment factor. */ \
-  .uleb128 DW_x86_64_RIP;   /* Return address register. */ \
-  .uleb128 1;    /* 1 byte of augmentation data. */ \
-  .byte (DW_EH_PE_pcrel | DW_EH_PE_sdata4); /* FDE encoding. */ \
-  .align 8; \
-2: \
-  .long .L ## f ## _END_FDE - .L ## f ## _START_FDE;   /* FDE length. */ \
-.L ## f ## _START_FDE: \
-  .long .L ## f ## _START_FDE - .L ## f ## _START_EH_FRAME; /* CIE location. */ \
-  .long (.L ## f ## _START - 1) - .;                   /* pcrel start address (see FDE encoding above). */ \
-  .long .L ## f ## _END - (.L ## f ## _START - 1);     /* Function this FDE applies to. */ \
-  .uleb128 0;                                          /* FDE augmentation length. */ \
+// Insert a nop between .cfi_startproc and the trampoline symbol so that unwinders can find the FDE.
+// A function's last instruction can be a call instruction (e.g. to __cxa_throw), in which case the
+// return address (e.g. from __cxa_throw to the caller) will be just after the function. This
+// address may also be the start of the next function, so to avoid ambiguity, unwinders assume that
+// a return address PC can refer to the address just after a function, but never to the start of a
+// function. (This is implemented by subtracting 1 from the return address PC before looking it up.)
+// This is fine for ordinary functions, but breaks on trampolines. Inserting a nop fixes it.
+//
+// N.B. Unwinders have two other strategies for recognizing the signal trampoline:
+//  - Read the instructions that the return address PC points at and look for a sigreturn syscall.
+//    (Hence, the instructions must not change at all.)
+//  - Do a symbol table lookup and check that against the PC (e.g. LLDB looks for
+//    __kernel_rt_sigreturn and __restore_rt.)
+// Either way, the nop is needed to avoid ambiguity if the function before the trampoline could end
+// with a call.
 
-#define cfi_signal_frame_end(f) \
-.L ## f ## _END_FDE: \
+#define RESTORE_GPR(reg) m_cfi_breg_offset DW_x86_64_ ## reg, DW_x86_64_RSP, OFFSET_ ## reg;
 
-#define cfi_def_cfa(offset) \
-  .byte DW_CFA_def_cfa_expression; \
-  .uleb128 2f-1f; \
-1:.byte DW_OP_breg7; \
-  .sleb128 offset; \
-  .byte DW_OP_deref; \
-2: \
-
-#define cfi_offset(reg_number,offset) \
-  .byte DW_CFA_expression; \
-  .uleb128 reg_number; \
-  .uleb128 2f-1f; \
-1:.byte DW_OP_breg7; \
-  .sleb128 offset; \
-2: \
-
-ENTRY_PRIVATE(__restore_rt)
-.L__restore_rt_START:
+  .text
+  .cfi_startproc
+  .cfi_signal_frame
+  m_cfi_def_cfa_deref DW_x86_64_RSP, OFFSET_RSP
+  RESTORE_GPR(R8)
+  RESTORE_GPR(R9)
+  RESTORE_GPR(R10)
+  RESTORE_GPR(R11)
+  RESTORE_GPR(R12)
+  RESTORE_GPR(R13)
+  RESTORE_GPR(R14)
+  RESTORE_GPR(R15)
+  RESTORE_GPR(RDI)
+  RESTORE_GPR(RSI)
+  RESTORE_GPR(RBP)
+  RESTORE_GPR(RBX)
+  RESTORE_GPR(RDX)
+  RESTORE_GPR(RAX)
+  RESTORE_GPR(RCX)
+  // Restoring RSP is unnecessary as the unwinder simply uses the CFA value.
+  RESTORE_GPR(RIP)
+  nop
+ENTRY_PRIVATE_NO_DWARF(__restore_rt)
   mov $__NR_rt_sigreturn, %rax
   syscall
-.L__restore_rt_END:
 END(__restore_rt)
-cfi_signal_frame_start(__restore_rt)
-  cfi_def_cfa(OFFSET_RSP)
-  cfi_offset(DW_x86_64_R8, OFFSET_R8)
-  cfi_offset(DW_x86_64_R9, OFFSET_R9)
-  cfi_offset(DW_x86_64_R10, OFFSET_R10)
-  cfi_offset(DW_x86_64_R11, OFFSET_R11)
-  cfi_offset(DW_x86_64_R12, OFFSET_R12)
-  cfi_offset(DW_x86_64_R13, OFFSET_R13)
-  cfi_offset(DW_x86_64_R14, OFFSET_R14)
-  cfi_offset(DW_x86_64_R15, OFFSET_R15)
-  cfi_offset(DW_x86_64_RDI, OFFSET_RDI)
-  cfi_offset(DW_x86_64_RSI, OFFSET_RSI)
-  cfi_offset(DW_x86_64_RBP, OFFSET_RBP)
-  cfi_offset(DW_x86_64_RSP, OFFSET_RSP)
-  cfi_offset(DW_x86_64_RBX, OFFSET_RBX)
-  cfi_offset(DW_x86_64_RDX, OFFSET_RDX)
-  cfi_offset(DW_x86_64_RAX, OFFSET_RAX)
-  cfi_offset(DW_x86_64_RCX, OFFSET_RCX)
-  cfi_offset(DW_x86_64_RIP, OFFSET_RIP)
-cfi_signal_frame_end(__restore_rt)
diff --git a/libc/private/bionic_asm_dwarf_exprs.h b/libc/private/bionic_asm_dwarf_exprs.h
new file mode 100644
index 0000000..f988c6e
--- /dev/null
+++ b/libc/private/bionic_asm_dwarf_exprs.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Define assembler macros for generating DWARF CFI instructions that use DWARF expressions.
+// Assemblers don't natively support DWARF expressions, so use the C preprocessor and assembler
+// macros to lower them to .cfi_escape directives.
+//
+// Signal trampolines need to use DWARF expressions to record the locations of saved registers,
+// because the offsets from the restored SP to the saved registers is variable. e.g. A signal frame
+// can have optional FP/SIMD extensions, and there may be extra padding if the interrupted SP wasn't
+// aligned.
+
+// DWARF constants.
+#define DW_CFA_def_cfa_expression 0x0f
+#define DW_CFA_expression 0x10
+#define DW_OP_breg0 0x70
+#define DW_OP_deref 0x06
+
+// Return the size of a small uleb128 value: either 1 or 2 bytes
+#define ULEB128_14BIT_SIZE(val) \
+  (1 + (((val) > 0x7f) & 1))
+
+// Return the size of a small sleb128 value: either 1 or 2 bytes
+#define SLEB128_14BIT_SIZE(val)       \
+  (1 + (((val) < -0x40) & 1) +        \
+       (((val) > 0x3f) & 1)     )
+
+// Output a 1 or 2-byte CFI uleb128 absolute value.
+.macro m_cfi_uleb128 val
+  .if (\val) < 0 || (\val) > 0x3fff
+    .error "m_cfi_uleb128 value is out of range (\val)"
+  .elseif (\val) > 0x7f
+    .cfi_escape ((\val) & 0x7f) | 0x80
+    .cfi_escape (\val) >> 7
+  .else
+    .cfi_escape (\val)
+  .endif
+.endm
+
+// Output a 1 or 2-byte CFI sleb128 absolute value.
+.macro m_cfi_sleb128 val
+  .if (\val) < -0x2000 || (\val) > 0x1fff
+    .error "m_cfi_sleb128 value is out of range (\val)"
+  .elseif (\val) < -0x40 || (\val) > 0x3f
+    .cfi_escape ((\val) & 0x7f) | 0x80
+    .cfi_escape ((\val) >> 7) & 0x7f
+  .else
+    .cfi_escape (\val) & 0x7f
+  .endif
+.endm
+
+.macro check_base_reg reg_no
+  .if (\reg_no) < 0 || (\reg_no) > 31
+    .error "base register is out of range for DW_OP_breg0..DW_OP_breg31 (\reg_no)"
+  .endif
+.endm
+
+// Set CFA to the expression, *(base_reg + offset)
+.macro m_cfi_def_cfa_deref base_reg, offset
+  check_base_reg (\base_reg)
+  .cfi_escape DW_CFA_def_cfa_expression
+  m_cfi_uleb128 (1 + SLEB128_14BIT_SIZE(\offset) + 1)   // size of DWARF expression in bytes
+  .cfi_escape DW_OP_breg0 + (\base_reg)                 // expr: 1 byte
+  m_cfi_sleb128 (\offset)                               // expr: 1 or 2 bytes
+  .cfi_escape DW_OP_deref                               // expr: 1 byte
+.endm
+
+// Set the address of the register's previous value to the expression, (base_reg + offset)
+.macro m_cfi_breg_offset dest_reg, base_reg, offset
+  check_base_reg (\base_reg)
+  .cfi_escape DW_CFA_expression
+  m_cfi_uleb128 (\dest_reg)
+  m_cfi_uleb128 (1 + SLEB128_14BIT_SIZE(\offset)) // size of DWARF expression in bytes
+  .cfi_escape DW_OP_breg0 + (\base_reg)           // expr: 1 byte
+  m_cfi_sleb128 (\offset)                         // expr: 1 or 2 bytes
+.endm