Merge "Remove non-UTF8 characters from string fields." into main
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 13c8d70..5bdc946 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@@ -3302,3 +3302,30 @@
   ASSERT_MATCH(result, ":\\s*This line has a newline.");
   ASSERT_MATCH(result, ":\\s*This is on the next line.");
 }
+
+TEST_F(CrasherTest, log_with_non_utf8) {
+  StartProcess([]() { LOG(FATAL) << "Invalid UTF-8: \xA0\xB0\xC0\xD0 and some other data."; });
+
+  unique_fd output_fd;
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  AssertDeath(SIGABRT);
+  int intercept_result;
+  FinishIntercept(&intercept_result);
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::string result;
+  ConsumeFd(std::move(output_fd), &result);
+  // Verify the abort message is sanitized properly.
+  size_t pos = result.find(
+      "Abort message: 'Invalid UTF-8: "
+      "\x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 and some other data.'");
+  EXPECT_TRUE(pos != std::string::npos) << "Couldn't find sanitized abort message: " << result;
+
+  // Make sure that the log message is sanitized properly too.
+  EXPECT_TRUE(
+      result.find("Invalid UTF-8: \x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 "
+                  "and some other data.",
+                  pos + 30) != std::string::npos)
+      << "Couldn't find sanitized log message: " << result;
+}
diff --git a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
index 43fb8bd..df22e01 100644
--- a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
+++ b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
@@ -29,3 +29,5 @@
 // Number of rows and columns to display in an MTE tag dump.
 constexpr size_t kNumTagColumns = 16;
 constexpr size_t kNumTagRows = 16;
+
+std::string oct_encode(const std::string& data);
diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp
index d59358c..ef303f0 100644
--- a/debuggerd/libdebuggerd/tombstone_proto.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto.cpp
@@ -34,10 +34,13 @@
 #include <sys/sysinfo.h>
 #include <time.h>
 
+#include <map>
 #include <memory>
 #include <optional>
 #include <set>
 #include <string>
+#include <utility>
+#include <vector>
 
 #include <async_safe/log.h>
 
@@ -463,7 +466,8 @@
   }
   msg.resize(index);
 
-  tombstone->set_abort_message(msg);
+  // Make sure only UTF8 characters are present since abort_message is a string.
+  tombstone->set_abort_message(oct_encode(msg));
 }
 
 static void dump_open_fds(Tombstone* tombstone, const OpenFilesList* open_files) {
@@ -771,7 +775,8 @@
       log_msg->set_tid(log_entry.entry.tid);
       log_msg->set_priority(prio);
       log_msg->set_tag(tag);
-      log_msg->set_message(msg);
+      // Make sure only UTF8 characters are present since message is a string.
+      log_msg->set_message(oct_encode(msg));
     } while ((msg = nl));
   }
   android_logger_list_free(logger_list);
diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
index fedafc0..e885c5a 100644
--- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
@@ -19,9 +19,9 @@
 
 #include <inttypes.h>
 
-#include <charconv>
+#include <algorithm>
 #include <functional>
-#include <limits>
+#include <optional>
 #include <set>
 #include <string>
 #include <unordered_set>
@@ -32,6 +32,7 @@
 #include <android-base/strings.h>
 #include <android-base/unique_fd.h>
 
+#include "libdebuggerd/utility_host.h"
 #include "tombstone.pb.h"
 
 using android::base::StringAppendF;
@@ -418,27 +419,6 @@
   }
 }
 
-static std::string oct_encode(const std::string& data) {
-  std::string oct_encoded;
-  oct_encoded.reserve(data.size());
-
-  // N.B. the unsigned here is very important, otherwise e.g. \255 would render as
-  // \-123 (and overflow our buffer).
-  for (unsigned char c : data) {
-    if (isprint(c)) {
-      oct_encoded += c;
-    } else {
-      std::string oct_digits("\\\0\0\0", 4);
-      // char is encodable in 3 oct digits
-      static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
-      auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
-      oct_digits.resize(ptr - oct_digits.data());
-      oct_encoded += oct_digits;
-    }
-  }
-  return oct_encoded;
-}
-
 static void print_main_thread(CallbackType callback, SymbolizeCallbackType symbolize,
                               const Tombstone& tombstone, const Thread& thread) {
   print_thread_header(callback, tombstone, thread, true);
diff --git a/debuggerd/libdebuggerd/utility_host.cpp b/debuggerd/libdebuggerd/utility_host.cpp
index 72a560c..4efa03c 100644
--- a/debuggerd/libdebuggerd/utility_host.cpp
+++ b/debuggerd/libdebuggerd/utility_host.cpp
@@ -18,6 +18,8 @@
 
 #include <sys/prctl.h>
 
+#include <charconv>
+#include <limits>
 #include <string>
 
 #include <android-base/stringprintf.h>
@@ -99,3 +101,24 @@
   DESCRIBE_FLAG(PR_PAC_APGAKEY);
   return describe_end(value, desc);
 }
+
+std::string oct_encode(const std::string& data) {
+  std::string oct_encoded;
+  oct_encoded.reserve(data.size());
+
+  // N.B. the unsigned here is very important, otherwise e.g. \255 would render as
+  // \-123 (and overflow our buffer).
+  for (unsigned char c : data) {
+    if (isprint(c)) {
+      oct_encoded += c;
+    } else {
+      std::string oct_digits("\\\0\0\0", 4);
+      // char is encodable in 3 oct digits
+      static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
+      auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
+      oct_digits.resize(ptr - oct_digits.data());
+      oct_encoded += oct_digits;
+    }
+  }
+  return oct_encoded;
+}