[MTE] Add a HWASan-style tag dump to tombstones.

We already dump the tags in the regigster dump section by appending the
tag to the memory address. You only get 2 granules before each register
and 13 after.

The HWASan-style tag dump is extremely useful for debugging, as it gives
a pretty comprehensive overview of the memory subsystem. It also
provides enough context bytes (256) to give you a reasonable intuition
about a particular bug.

The tag dump shows up only if PTRACE_PEEKTAGS returns at least one value
in the 256 requested. If the start of end of the region is untagged,
it's omitted. The tag dump looks like this:

Change-Id: Icc33fb97542d9b1fa3ae9e58aba34d524c6ba7b5

---
Memory tags around the fault address (0x60000704414d340), one tag per 16 bytes:
      0x704414d000: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x704414d100: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x704414d200: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
    =>0x704414d300: 0  0  0  0 [2] 2  0  0  0  0  0  0  0  0  0  0
      0x704414d400: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x704414d500: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x704414d600: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x704414d700: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x704414d800: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x704414d900: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x704414da00: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
---

Bug: 183992164
Test: atest debuggerd_test on MTE+QEMU and sunfish.

Change-Id: I8d5842e4803ca30b407e866c99eef56f2cb36600
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 24804d0..abda071 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@@ -58,6 +58,7 @@
 #include <scoped_minijail.h>
 
 #include "debuggerd/handler.h"
+#include "libdebuggerd/utility.h"
 #include "protocol.h"
 #include "tombstoned/tombstoned.h"
 #include "util.h"
@@ -526,6 +527,8 @@
   std::vector<std::string> log_sources(2);
   ConsumeFd(std::move(output_fd), &log_sources[0]);
   logcat_collector.Collect(&log_sources[1]);
+  // Tag dump only available in the tombstone, not logcat.
+  ASSERT_MATCH(log_sources[0], "Memory tags around the fault address");
 
   for (const auto& result : log_sources) {
     ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\))");
@@ -597,6 +600,12 @@
   ConsumeFd(std::move(output_fd), &log_sources[0]);
   logcat_collector.Collect(&log_sources[1]);
 
+  // Tag dump only in tombstone, not logcat, and tagging is not used for
+  // overflow protection in the scudo secondary (guard pages are used instead).
+  if (GetParam() < 0x10000) {
+    ASSERT_MATCH(log_sources[0], "Memory tags around the fault address");
+  }
+
   for (const auto& result : log_sources) {
     ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\))");
     ASSERT_MATCH(result, R"(Cause: \[MTE\]: Buffer Overflow, 0 bytes right of a )" +
@@ -637,6 +646,7 @@
                            std::to_string(GetParam()) + R"(-byte allocation)");
   ASSERT_MATCH(result, R"((^|\s)allocated by thread .*
       #00 pc)");
+  ASSERT_MATCH(result, "Memory tags around the fault address");
 #else
   GTEST_SKIP() << "Requires aarch64";
 #endif
@@ -686,6 +696,9 @@
   ConsumeFd(std::move(output_fd), &log_sources[0]);
   logcat_collector.Collect(&log_sources[1]);
 
+  // Tag dump only in the tombstone, not logcat.
+  ASSERT_MATCH(log_sources[0], "Memory tags around the fault address");
+
   for (const auto& result : log_sources) {
     ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\))");
     ASSERT_THAT(result, HasSubstr("Note: multiple potential causes for this crash were detected, "
@@ -706,21 +719,26 @@
 
 #if defined(__aarch64__)
 static uintptr_t CreateTagMapping() {
-  uintptr_t mapping =
-      reinterpret_cast<uintptr_t>(mmap(nullptr, getpagesize(), PROT_READ | PROT_WRITE | PROT_MTE,
-                                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
-  if (reinterpret_cast<void*>(mapping) == MAP_FAILED) {
+  // Some of the MTE tag dump tests assert that there is an inaccessible page to the left and right
+  // of the PROT_MTE page, so map three pages and set the two guard pages to PROT_NONE.
+  size_t page_size = getpagesize();
+  void* mapping = mmap(nullptr, page_size * 3, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  uintptr_t mapping_uptr = reinterpret_cast<uintptr_t>(mapping);
+  if (mapping == MAP_FAILED) {
     return 0;
   }
-  __asm__ __volatile__(".arch_extension mte; stg %0, [%0]"
-                       :
-                       : "r"(mapping + (1ULL << 56))
-                       : "memory");
-  return mapping;
+  mprotect(reinterpret_cast<void*>(mapping_uptr + page_size), page_size,
+           PROT_READ | PROT_WRITE | PROT_MTE);
+  // Stripe the mapping, where even granules get tag '1', and odd granules get tag '0'.
+  for (uintptr_t offset = 0; offset < page_size; offset += 2 * kTagGranuleSize) {
+    uintptr_t tagged_addr = mapping_uptr + page_size + offset + (1ULL << 56);
+    __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" : : "r"(tagged_addr) : "memory");
+  }
+  return mapping_uptr + page_size;
 }
 #endif
 
-TEST_F(CrasherTest, mte_tag_dump) {
+TEST_F(CrasherTest, mte_register_tag_dump) {
 #if defined(__aarch64__)
   if (!mte_supported()) {
     GTEST_SKIP() << "Requires MTE";
@@ -753,6 +771,107 @@
 #endif
 }
 
+TEST_F(CrasherTest, mte_fault_tag_dump_front_truncated) {
+#if defined(__aarch64__)
+  if (!mte_supported()) {
+    GTEST_SKIP() << "Requires MTE";
+  }
+
+  int intercept_result;
+  unique_fd output_fd;
+  StartProcess([&]() {
+    SetTagCheckingLevelSync();
+    volatile char* p = reinterpret_cast<char*>(CreateTagMapping());
+    p[0] = 0;  // Untagged pointer, tagged memory.
+  });
+
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  AssertDeath(SIGSEGV);
+  FinishIntercept(&intercept_result);
+
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::string result;
+  ConsumeFd(std::move(output_fd), &result);
+
+  ASSERT_MATCH(result, R"(Memory tags around the fault address.*
+\s*=>0x[0-9a-f]+000:\[1\] 0  1  0)");
+#else
+  GTEST_SKIP() << "Requires aarch64";
+#endif
+}
+
+TEST_F(CrasherTest, mte_fault_tag_dump) {
+#if defined(__aarch64__)
+  if (!mte_supported()) {
+    GTEST_SKIP() << "Requires MTE";
+  }
+
+  int intercept_result;
+  unique_fd output_fd;
+  StartProcess([&]() {
+    SetTagCheckingLevelSync();
+    volatile char* p = reinterpret_cast<char*>(CreateTagMapping());
+    p[320] = 0;  // Untagged pointer, tagged memory.
+  });
+
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  AssertDeath(SIGSEGV);
+  FinishIntercept(&intercept_result);
+
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::string result;
+  ConsumeFd(std::move(output_fd), &result);
+
+  ASSERT_MATCH(result, R"(Memory tags around the fault address.*
+\s*0x[0-9a-f]+: 1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0
+\s*=>0x[0-9a-f]+: 1  0  1  0 \[1\] 0  1  0  1  0  1  0  1  0  1  0
+\s*0x[0-9a-f]+: 1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0
+)");
+#else
+  GTEST_SKIP() << "Requires aarch64";
+#endif
+}
+
+TEST_F(CrasherTest, mte_fault_tag_dump_rear_truncated) {
+#if defined(__aarch64__)
+  if (!mte_supported()) {
+    GTEST_SKIP() << "Requires MTE";
+  }
+
+  int intercept_result;
+  unique_fd output_fd;
+  StartProcess([&]() {
+    SetTagCheckingLevelSync();
+    size_t page_size = getpagesize();
+    volatile char* p = reinterpret_cast<char*>(CreateTagMapping());
+    p[page_size - kTagGranuleSize * 2] = 0;  // Untagged pointer, tagged memory.
+  });
+
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  AssertDeath(SIGSEGV);
+  FinishIntercept(&intercept_result);
+
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::string result;
+  ConsumeFd(std::move(output_fd), &result);
+
+  ASSERT_MATCH(result, R"(Memory tags around the fault address)");
+  ASSERT_MATCH(result,
+               R"(\s*0x[0-9a-f]+: 1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0
+\s*=>0x[0-9a-f]+: 1  0  1  0  1  0  1  0  1  0  1  0  1  0 \[1\] 0
+
+)");  // Ensure truncation happened and there's a newline after the tag fault.
+#else
+  GTEST_SKIP() << "Requires aarch64";
+#endif
+}
+
 TEST_F(CrasherTest, LD_PRELOAD) {
   int intercept_result;
   unique_fd output_fd;
diff --git a/debuggerd/libdebuggerd/include/libdebuggerd/utility.h b/debuggerd/libdebuggerd/include/libdebuggerd/utility.h
index c490fb1..24ae169 100644
--- a/debuggerd/libdebuggerd/include/libdebuggerd/utility.h
+++ b/debuggerd/libdebuggerd/include/libdebuggerd/utility.h
@@ -96,4 +96,8 @@
 // Number of bytes per MTE granule.
 constexpr size_t kTagGranuleSize = 16;
 
+// Number of rows and columns to display in an MTE tag dump.
+constexpr size_t kNumTagColumns = 16;
+constexpr size_t kNumTagRows = 16;
+
 #endif // _DEBUGGERD_UTILITY_H
diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp
index abd1f12..ff12017 100644
--- a/debuggerd/libdebuggerd/tombstone_proto.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto.cpp
@@ -362,8 +362,10 @@
             dump.set_mapping_name(map_info->name());
           }
 
-          char buf[256];
-          uint8_t tags[256 / kTagGranuleSize];
+          constexpr size_t kNumBytesAroundRegister = 256;
+          constexpr size_t kNumTagsAroundRegister = kNumBytesAroundRegister / kTagGranuleSize;
+          char buf[kNumBytesAroundRegister];
+          uint8_t tags[kNumTagsAroundRegister];
           size_t start_offset = 0;
           ssize_t bytes = dump_memory(buf, sizeof(buf), tags, sizeof(tags), &value, memory);
           if (bytes == -1) {
@@ -377,7 +379,19 @@
           }
 
           dump.set_memory(buf, bytes);
-          dump.set_tags(tags, bytes / kTagGranuleSize);
+
+          bool has_tags = false;
+#if defined(__aarch64__)
+          for (size_t i = 0; i < kNumTagsAroundRegister; ++i) {
+            if (tags[i] != 0) {
+              has_tags = true;
+            }
+          }
+#endif  // defined(__aarch64__)
+
+          if (has_tags) {
+            dump.mutable_arm_mte_metadata()->set_memory_tags(tags, kNumTagsAroundRegister);
+          }
 
           *thread.add_memory_dump() = std::move(dump);
         }
@@ -531,6 +545,50 @@
   dump_log_file(tombstone, "main", pid);
 }
 
+static void dump_tags_around_fault_addr(Signal* signal, const Tombstone& tombstone,
+                                        unwindstack::Unwinder* unwinder, uintptr_t fault_addr) {
+  if (tombstone.arch() != Architecture::ARM64) return;
+
+  fault_addr = untag_address(fault_addr);
+  constexpr size_t kNumGranules = kNumTagRows * kNumTagColumns;
+  constexpr size_t kBytesToRead = kNumGranules * kTagGranuleSize;
+
+  // If the low part of the tag dump would underflow to the high address space, it's probably not
+  // a valid address for us to dump tags from.
+  if (fault_addr < kBytesToRead / 2) return;
+
+  unwindstack::Memory* memory = unwinder->GetProcessMemory().get();
+
+  constexpr uintptr_t kRowStartMask = ~(kNumTagColumns * kTagGranuleSize - 1);
+  size_t start_address = (fault_addr & kRowStartMask) - kBytesToRead / 2;
+  MemoryDump tag_dump;
+  size_t granules_to_read = kNumGranules;
+
+  // Attempt to read the first tag. If reading fails, this likely indicates the
+  // lowest touched page is inaccessible or not marked with PROT_MTE.
+  // Fast-forward over pages until one has tags, or we exhaust the search range.
+  while (memory->ReadTag(start_address) < 0) {
+    size_t page_size = sysconf(_SC_PAGE_SIZE);
+    size_t bytes_to_next_page = page_size - (start_address % page_size);
+    if (bytes_to_next_page >= granules_to_read * kTagGranuleSize) return;
+    start_address += bytes_to_next_page;
+    granules_to_read -= bytes_to_next_page / kTagGranuleSize;
+  }
+  tag_dump.set_begin_address(start_address);
+
+  std::string* mte_tags = tag_dump.mutable_arm_mte_metadata()->mutable_memory_tags();
+
+  for (size_t i = 0; i < granules_to_read; ++i) {
+    long tag = memory->ReadTag(start_address + i * kTagGranuleSize);
+    if (tag < 0) break;
+    mte_tags->push_back(static_cast<uint8_t>(tag));
+  }
+
+  if (!mte_tags->empty()) {
+    *signal->mutable_fault_adjacent_metadata() = tag_dump;
+  }
+}
+
 static std::optional<uint64_t> read_uptime_secs() {
   std::string uptime;
   if (!android::base::ReadFileToString("/proc/uptime", &uptime)) {
@@ -594,7 +652,9 @@
 
   if (process_info.has_fault_address) {
     sig.set_has_fault_address(true);
-    sig.set_fault_address(process_info.maybe_tagged_fault_address);
+    uintptr_t fault_addr = process_info.maybe_tagged_fault_address;
+    sig.set_fault_address(fault_addr);
+    dump_tags_around_fault_addr(&sig, result, unwinder, fault_addr);
   }
 
   *result.mutable_signal_info() = sig;
diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
index a932d48..053299a 100644
--- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
@@ -29,6 +29,7 @@
 #include <android-base/strings.h>
 #include <android-base/unique_fd.h>
 #include <async_safe/log.h>
+#include <bionic/macros.h>
 
 #include "tombstone.pb.h"
 
@@ -193,8 +194,11 @@
     uint64_t addr = mem.begin_address();
     for (size_t offset = 0; offset < mem.memory().size(); offset += bytes_per_line) {
       uint64_t tagged_addr = addr;
-      if (mem.tags().size() > offset / kTagGranuleSize) {
-        tagged_addr |= static_cast<uint64_t>(mem.tags()[offset / kTagGranuleSize]) << 56;
+      if (mem.has_arm_mte_metadata() &&
+          mem.arm_mte_metadata().memory_tags().size() > offset / kTagGranuleSize) {
+        tagged_addr |=
+            static_cast<uint64_t>(mem.arm_mte_metadata().memory_tags()[offset / kTagGranuleSize])
+            << 56;
       }
       std::string line = StringPrintf("    %0*" PRIx64, word_size * 2, tagged_addr + offset);
 
@@ -232,6 +236,60 @@
   print_thread_memory_dump(callback, tombstone, thread);
 }
 
+static void print_tag_dump(CallbackType callback, const Tombstone& tombstone) {
+  if (!tombstone.has_signal_info()) return;
+
+  const Signal& signal = tombstone.signal_info();
+
+  if (!signal.has_fault_address() || !signal.has_fault_adjacent_metadata()) {
+    return;
+  }
+
+  const MemoryDump& memory_dump = signal.fault_adjacent_metadata();
+
+  if (!memory_dump.has_arm_mte_metadata() || memory_dump.arm_mte_metadata().memory_tags().empty()) {
+    return;
+  }
+
+  const std::string& tags = memory_dump.arm_mte_metadata().memory_tags();
+
+  CBS("");
+  CBS("Memory tags around the fault address (0x%" PRIx64 "), one tag per %zu bytes:",
+      signal.fault_address(), kTagGranuleSize);
+  constexpr uintptr_t kRowStartMask = ~(kNumTagColumns * kTagGranuleSize - 1);
+
+  size_t tag_index = 0;
+  size_t num_tags = tags.length();
+  uintptr_t fault_granule = untag_address(signal.fault_address()) & ~(kTagGranuleSize - 1);
+  for (size_t row = 0; tag_index < num_tags; ++row) {
+    uintptr_t row_addr =
+        (memory_dump.begin_address() + row * kNumTagColumns * kTagGranuleSize) & kRowStartMask;
+    std::string row_contents;
+    bool row_has_fault = false;
+
+    for (size_t column = 0; column < kNumTagColumns; ++column) {
+      uintptr_t granule_addr = row_addr + column * kTagGranuleSize;
+      if (granule_addr < memory_dump.begin_address() ||
+          granule_addr >= memory_dump.begin_address() + num_tags * kTagGranuleSize) {
+        row_contents += " . ";
+      } else if (granule_addr == fault_granule) {
+        row_contents += StringPrintf("[%1hhx]", tags[tag_index++]);
+        row_has_fault = true;
+      } else {
+        row_contents += StringPrintf(" %1hhx ", tags[tag_index++]);
+      }
+    }
+
+    if (row_contents.back() == ' ') row_contents.pop_back();
+
+    if (row_has_fault) {
+      CBS("    =>0x%" PRIxPTR ":%s", row_addr, row_contents.c_str());
+    } else {
+      CBS("      0x%" PRIxPTR ":%s", row_addr, row_contents.c_str());
+    }
+  }
+}
+
 static void print_main_thread(CallbackType callback, const Tombstone& tombstone,
                               const Thread& thread) {
   print_thread_header(callback, tombstone, thread, true);
@@ -299,6 +357,8 @@
     }
   }
 
+  print_tag_dump(callback, tombstone);
+
   print_thread_memory_dump(callback, tombstone, thread);
 
   CBS("");
diff --git a/debuggerd/proto/tombstone.proto b/debuggerd/proto/tombstone.proto
index 22fc30e..a701212 100644
--- a/debuggerd/proto/tombstone.proto
+++ b/debuggerd/proto/tombstone.proto
@@ -56,8 +56,11 @@
 
   bool has_fault_address = 8;
   uint64 fault_address = 9;
+  // Note, may or may not contain the dump of the actual memory contents. Currently, on arm64, we
+  // only include metadata, and not the contents.
+  MemoryDump fault_adjacent_metadata = 10;
 
-  reserved 10 to 999;
+  reserved 11 to 999;
 }
 
 message HeapObject {
@@ -142,14 +145,22 @@
   reserved 9 to 999;
 }
 
+message ArmMTEMetadata {
+  // One memory tag per granule (e.g. every 16 bytes) of regular memory.
+  bytes memory_tags = 1;
+  reserved 2 to 999;
+}
+
 message MemoryDump {
   string register_name = 1;
   string mapping_name = 2;
   uint64 begin_address = 3;
   bytes memory = 4;
-  bytes tags = 5;
+  oneof metadata {
+    ArmMTEMetadata arm_mte_metadata = 6;
+  }
 
-  reserved 6 to 999;
+  reserved 5, 7 to 999;
 }
 
 message MemoryMapping {