Simplify the malloc_debug unwind.

It's hard to find the start of the previous instruction for riscv64
(given the C extension), and discussion with the ART folks cast doubt
on the comment's claim that we do this correctly for arm32 anyway.

So, rather than add complexity for riscv64, let's simplify this routine
for everyone. I suspect we could probably get away with just `--ip` for
all architectures, but since it's trivial to at least maintain plausible
alignment, I've stuck with the correct "at least" byte counts instead.

(See the discussion on
https://lists.riscv.org/g/sig-android/topic/detecting_16_bit_vs_32_bit/94813787
for more about riscv64 specifically.)

Test: treehugger
Change-Id: Ie43451d329470b3ece8779d11eb705d24d01c3d7
diff --git a/libc/malloc_debug/backtrace.cpp b/libc/malloc_debug/backtrace.cpp
index ab5c505..0649571 100644
--- a/libc/malloc_debug/backtrace.cpp
+++ b/libc/malloc_debug/backtrace.cpp
@@ -83,41 +83,24 @@
 
   uintptr_t ip = _Unwind_GetIP(context);
 
-  // The instruction pointer is pointing at the instruction after the return
-  // call on all architectures.
-  // Modify the pc to point at the real function.
-  if (ip != 0) {
-#if defined(__arm__)
-    // If the ip is suspiciously low, do nothing to avoid a segfault trying
-    // to access this memory.
-    if (ip >= 4096) {
-      // Check bits [15:11] of the first halfword assuming the instruction
-      // is 32 bits long. If the bits are any of these values, then our
-      // assumption was correct:
-      //  b11101
-      //  b11110
-      //  b11111
-      // Otherwise, this is a 16 bit instruction.
-      uint16_t value = (*reinterpret_cast<uint16_t*>(ip - 2)) >> 11;
-      if (value == 0x1f || value == 0x1e || value == 0x1d) {
-        ip -= 4;
-      } else {
-        ip -= 2;
-      }
-    }
-#elif defined(__aarch64__)
-    // All instructions are 4 bytes long, skip back one instruction.
-    ip -= 4;
+  // `ip` is the address of the instruction *after* the call site in
+  // `context`, so we want to back up by one instruction. This is hard for
+  // every architecture except arm64, so we just make sure we're *inside*
+  // that instruction, not necessarily at the start of it. (If the value
+  // is too low to be valid, we just leave it alone.)
+  if (ip >= 4096) {
+#if defined(__aarch64__)
+    ip -= 4;  // Exactly.
+#elif defined(__arm__) || defined(__riscv)
+    ip -= 2;  // At least.
 #elif defined(__i386__) || defined(__x86_64__)
-    // It's difficult to decode exactly where the previous instruction is,
-    // so subtract 1 to estimate where the instruction lives.
-    ip--;
+    ip -= 1;  // At least.
 #endif
+  }
 
-    // Do not record the frames that fall in our own shared library.
-    if (g_current_code_map && (ip >= g_current_code_map->start) && ip < g_current_code_map->end) {
-      return _URC_NO_REASON;
-    }
+  // Do not record the frames that fall in our own shared library.
+  if (g_current_code_map && (ip >= g_current_code_map->start) && ip < g_current_code_map->end) {
+    return _URC_NO_REASON;
   }
 
   state->frames[state->cur_frame++] = ip;