Remove non-UTF8 characters from string fields.

The string type in the tombstone proto does not support non-UTF8
characters. Therefore, use the oct_encode function to encode the
abort_message field and message field from LogMessage.

Fix up stl includes, add ones that were missing and remove those not
being used.

Add new unit test to verify that the abort and log messages are
sanitized.

Bug: 279496937
Bug: 377940849
Bug: 378185483

Test: All unit tests pass.
Test: Ran pbtombstone on a crash with non-UTF8 characters and verified
Test: it processes properly after this change and fails before the change.
Change-Id: I3554d56caf9fcbfc410b4d554f6c3b4888b37e28
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 13c8d70..5bdc946 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@@ -3302,3 +3302,30 @@
   ASSERT_MATCH(result, ":\\s*This line has a newline.");
   ASSERT_MATCH(result, ":\\s*This is on the next line.");
 }
+
+TEST_F(CrasherTest, log_with_non_utf8) {
+  StartProcess([]() { LOG(FATAL) << "Invalid UTF-8: \xA0\xB0\xC0\xD0 and some other data."; });
+
+  unique_fd output_fd;
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  AssertDeath(SIGABRT);
+  int intercept_result;
+  FinishIntercept(&intercept_result);
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::string result;
+  ConsumeFd(std::move(output_fd), &result);
+  // Verify the abort message is sanitized properly.
+  size_t pos = result.find(
+      "Abort message: 'Invalid UTF-8: "
+      "\x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 and some other data.'");
+  EXPECT_TRUE(pos != std::string::npos) << "Couldn't find sanitized abort message: " << result;
+
+  // Make sure that the log message is sanitized properly too.
+  EXPECT_TRUE(
+      result.find("Invalid UTF-8: \x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 "
+                  "and some other data.",
+                  pos + 30) != std::string::npos)
+      << "Couldn't find sanitized log message: " << result;
+}
diff --git a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
index 43fb8bd..df22e01 100644
--- a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
+++ b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
@@ -29,3 +29,5 @@
 // Number of rows and columns to display in an MTE tag dump.
 constexpr size_t kNumTagColumns = 16;
 constexpr size_t kNumTagRows = 16;
+
+std::string oct_encode(const std::string& data);
diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp
index d59358c..ef303f0 100644
--- a/debuggerd/libdebuggerd/tombstone_proto.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto.cpp
@@ -34,10 +34,13 @@
 #include <sys/sysinfo.h>
 #include <time.h>
 
+#include <map>
 #include <memory>
 #include <optional>
 #include <set>
 #include <string>
+#include <utility>
+#include <vector>
 
 #include <async_safe/log.h>
 
@@ -463,7 +466,8 @@
   }
   msg.resize(index);
 
-  tombstone->set_abort_message(msg);
+  // Make sure only UTF8 characters are present since abort_message is a string.
+  tombstone->set_abort_message(oct_encode(msg));
 }
 
 static void dump_open_fds(Tombstone* tombstone, const OpenFilesList* open_files) {
@@ -771,7 +775,8 @@
       log_msg->set_tid(log_entry.entry.tid);
       log_msg->set_priority(prio);
       log_msg->set_tag(tag);
-      log_msg->set_message(msg);
+      // Make sure only UTF8 characters are present since message is a string.
+      log_msg->set_message(oct_encode(msg));
     } while ((msg = nl));
   }
   android_logger_list_free(logger_list);
diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
index fedafc0..e885c5a 100644
--- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
@@ -19,9 +19,9 @@
 
 #include <inttypes.h>
 
-#include <charconv>
+#include <algorithm>
 #include <functional>
-#include <limits>
+#include <optional>
 #include <set>
 #include <string>
 #include <unordered_set>
@@ -32,6 +32,7 @@
 #include <android-base/strings.h>
 #include <android-base/unique_fd.h>
 
+#include "libdebuggerd/utility_host.h"
 #include "tombstone.pb.h"
 
 using android::base::StringAppendF;
@@ -418,27 +419,6 @@
   }
 }
 
-static std::string oct_encode(const std::string& data) {
-  std::string oct_encoded;
-  oct_encoded.reserve(data.size());
-
-  // N.B. the unsigned here is very important, otherwise e.g. \255 would render as
-  // \-123 (and overflow our buffer).
-  for (unsigned char c : data) {
-    if (isprint(c)) {
-      oct_encoded += c;
-    } else {
-      std::string oct_digits("\\\0\0\0", 4);
-      // char is encodable in 3 oct digits
-      static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
-      auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
-      oct_digits.resize(ptr - oct_digits.data());
-      oct_encoded += oct_digits;
-    }
-  }
-  return oct_encoded;
-}
-
 static void print_main_thread(CallbackType callback, SymbolizeCallbackType symbolize,
                               const Tombstone& tombstone, const Thread& thread) {
   print_thread_header(callback, tombstone, thread, true);
diff --git a/debuggerd/libdebuggerd/utility_host.cpp b/debuggerd/libdebuggerd/utility_host.cpp
index 72a560c..4efa03c 100644
--- a/debuggerd/libdebuggerd/utility_host.cpp
+++ b/debuggerd/libdebuggerd/utility_host.cpp
@@ -18,6 +18,8 @@
 
 #include <sys/prctl.h>
 
+#include <charconv>
+#include <limits>
 #include <string>
 
 #include <android-base/stringprintf.h>
@@ -99,3 +101,24 @@
   DESCRIBE_FLAG(PR_PAC_APGAKEY);
   return describe_end(value, desc);
 }
+
+std::string oct_encode(const std::string& data) {
+  std::string oct_encoded;
+  oct_encoded.reserve(data.size());
+
+  // N.B. the unsigned here is very important, otherwise e.g. \255 would render as
+  // \-123 (and overflow our buffer).
+  for (unsigned char c : data) {
+    if (isprint(c)) {
+      oct_encoded += c;
+    } else {
+      std::string oct_digits("\\\0\0\0", 4);
+      // char is encodable in 3 oct digits
+      static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
+      auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
+      oct_digits.resize(ptr - oct_digits.data());
+      oct_encoded += oct_digits;
+    }
+  }
+  return oct_encoded;
+}