Do not encode newline characters for abort/log. The isprint call will return false for chars such as newlines, which means that the values '\n' and '\t' get encoded which was not the intent of encodeding the abort and log messages. Add two oct_encode versions, one that encodes all non-ascii values and only non-printable ascii values for abort and log data. The other encoding function encodes all chars that fail isprint() which is used for extra crash data. Add new unit tests to verify that characters like newlines are not encoded in the right places. Bug: 381259755 Test: All unit tests pass. Change-Id: I682f10e13a2e80ddfa7e87dfdf8181342eb22374

commit: 69210a64000ef8a6bc24d6428cc29b5d8d41f8cd [log] [tgz]
author: Christopher Ferris <cferris@google.com> Fri Dec 06 09:35:21 2024 -0800
committer: Christopher Ferris <cferris@google.com> Tue Dec 10 11:48:13 2024 -0800
tree: ece71940e92b7b0f2635116a2972ca73a5d0cd9a
parent: 43772f2bc294573f297f41004baf56c067916855 [diff]
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 5bdc946..04a7df8 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp

@@ -3303,8 +3303,44 @@
   ASSERT_MATCH(result, ":\\s*This is on the next line.");
 }
 
-TEST_F(CrasherTest, log_with_non_utf8) {
-  StartProcess([]() { LOG(FATAL) << "Invalid UTF-8: \xA0\xB0\xC0\xD0 and some other data."; });
+TEST_F(CrasherTest, log_with_non_printable_ascii_verify_encoded) {
+  static const std::string kEncodedStr =
+      "\x5C\x31"
+      "\x5C\x32"
+      "\x5C\x33"
+      "\x5C\x34"
+      "\x5C\x35"
+      "\x5C\x36"
+      "\x5C\x37"
+      "\x5C\x31\x30"
+      "\x5C\x31\x36"
+      "\x5C\x31\x37"
+      "\x5C\x32\x30"
+      "\x5C\x32\x31"
+      "\x5C\x32\x32"
+      "\x5C\x32\x33"
+      "\x5C\x32\x34"
+      "\x5C\x32\x35"
+      "\x5C\x32\x36"
+      "\x5C\x32\x37"
+      "\x5C\x33\x30"
+      "\x5C\x33\x31"
+      "\x5C\x33\x32"
+      "\x5C\x33\x33"
+      "\x5C\x33\x34"
+      "\x5C\x33\x35"
+      "\x5C\x33\x36"
+      "\x5C\x33\x37"
+      "\x5C\x31\x37\x37"
+      "\x5C\x32\x34\x30"
+      "\x5C\x32\x36\x30"
+      "\x5C\x33\x30\x30"
+      "\x5C\x33\x32\x30";
+  StartProcess([]() {
+    LOG(FATAL) << "Encoded: "
+                  "\x1\x2\x3\x4\x5\x6\x7\x8\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b"
+                  "\x1c\x1d\x1e\x1f\x7f\xA0\xB0\xC0\xD0 after";
+  });
 
   unique_fd output_fd;
   StartIntercept(&output_fd);
@@ -3317,15 +3353,38 @@
   std::string result;
   ConsumeFd(std::move(output_fd), &result);
   // Verify the abort message is sanitized properly.
-  size_t pos = result.find(
-      "Abort message: 'Invalid UTF-8: "
-      "\x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 and some other data.'");
+  size_t pos = result.find(std::string("Abort message: 'Encoded: ") + kEncodedStr + " after'");
   EXPECT_TRUE(pos != std::string::npos) << "Couldn't find sanitized abort message: " << result;
 
   // Make sure that the log message is sanitized properly too.
-  EXPECT_TRUE(
-      result.find("Invalid UTF-8: \x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 "
-                  "and some other data.",
-                  pos + 30) != std::string::npos)
+  EXPECT_TRUE(result.find(std::string("Encoded: ") + kEncodedStr + " after", pos + 1) !=
+              std::string::npos)
+      << "Couldn't find sanitized log message: " << result;
+}
+
+TEST_F(CrasherTest, log_with_with_special_printable_ascii) {
+  static const std::string kMsg = "Not encoded: \t\v\f\r\n after";
+  StartProcess([]() { LOG(FATAL) << kMsg; });
+
+  unique_fd output_fd;
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  AssertDeath(SIGABRT);
+  int intercept_result;
+  FinishIntercept(&intercept_result);
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::string result;
+  ConsumeFd(std::move(output_fd), &result);
+  // Verify the abort message does not remove characters that are UTF8 but
+  // are, technically, not printable.
+  size_t pos = result.find(std::string("Abort message: '") + kMsg + "'");
+  EXPECT_TRUE(pos != std::string::npos) << "Couldn't find abort message: " << result;
+
+  // Make sure that the log message is handled properly too.
+  // The logger automatically splits a newline message into two pieces.
+  pos = result.find("Not encoded: \t\v\f\r", pos + kMsg.size());
+  EXPECT_TRUE(pos != std::string::npos) << "Couldn't find log message: " << result;
+  EXPECT_TRUE(result.find(" after", pos + 1) != std::string::npos)
       << "Couldn't find sanitized log message: " << result;
 }

diff --git a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
index df22e01..819a99d 100644
--- a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
+++ b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h

@@ -30,4 +30,7 @@
 constexpr size_t kNumTagColumns = 16;
 constexpr size_t kNumTagRows = 16;
 
-std::string oct_encode(const std::string& data);
+// Encode all non-ascii values and also ascii values that are not printable.
+std::string oct_encode_non_ascii_printable(const std::string& data);
+// Encode any value that fails isprint(), includes encoding chars like '\n' and '\t'.
+std::string oct_encode_non_printable(const std::string& data);

diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp
index ef303f0..d3ac49a 100644
--- a/debuggerd/libdebuggerd/tombstone_proto.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto.cpp

@@ -467,7 +467,7 @@
   msg.resize(index);
 
   // Make sure only UTF8 characters are present since abort_message is a string.
-  tombstone->set_abort_message(oct_encode(msg));
+  tombstone->set_abort_message(oct_encode_non_ascii_printable(msg));
 }
 
 static void dump_open_fds(Tombstone* tombstone, const OpenFilesList* open_files) {
@@ -776,7 +776,7 @@
       log_msg->set_priority(prio);
       log_msg->set_tag(tag);
       // Make sure only UTF8 characters are present since message is a string.
-      log_msg->set_message(oct_encode(msg));
+      log_msg->set_message(oct_encode_non_ascii_printable(msg));
     } while ((msg = nl));
   }
   android_logger_list_free(logger_list);

diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
index e885c5a..0474ae7 100644
--- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp

@@ -17,6 +17,7 @@
 #include <libdebuggerd/tombstone_proto_to_text.h>
 #include <libdebuggerd/utility_host.h>
 
+#include <ctype.h>
 #include <inttypes.h>
 
 #include <algorithm>
@@ -463,8 +464,8 @@
   }
 
   for (const auto& crash_detail : tombstone.crash_details()) {
-    std::string oct_encoded_name = oct_encode(crash_detail.name());
-    std::string oct_encoded_data = oct_encode(crash_detail.data());
+    std::string oct_encoded_name = oct_encode_non_printable(crash_detail.name());
+    std::string oct_encoded_data = oct_encode_non_printable(crash_detail.data());
     CBL("Extra crash detail: %s: '%s'", oct_encoded_name.c_str(), oct_encoded_data.c_str());
   }
 

diff --git a/debuggerd/libdebuggerd/utility_host.cpp b/debuggerd/libdebuggerd/utility_host.cpp
index 4efa03c..d87f4fb 100644
--- a/debuggerd/libdebuggerd/utility_host.cpp
+++ b/debuggerd/libdebuggerd/utility_host.cpp

@@ -16,6 +16,7 @@
 
 #include "libdebuggerd/utility_host.h"
 
+#include <ctype.h>
 #include <sys/prctl.h>
 
 #include <charconv>
@@ -102,23 +103,31 @@
   return describe_end(value, desc);
 }
 
-std::string oct_encode(const std::string& data) {
+static std::string oct_encode(const std::string& data, bool (*should_encode_func)(int)) {
   std::string oct_encoded;
   oct_encoded.reserve(data.size());
 
   // N.B. the unsigned here is very important, otherwise e.g. \255 would render as
   // \-123 (and overflow our buffer).
   for (unsigned char c : data) {
-    if (isprint(c)) {
-      oct_encoded += c;
-    } else {
+    if (should_encode_func(c)) {
       std::string oct_digits("\\\0\0\0", 4);
       // char is encodable in 3 oct digits
       static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
       auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
       oct_digits.resize(ptr - oct_digits.data());
       oct_encoded += oct_digits;
+    } else {
+      oct_encoded += c;
     }
   }
   return oct_encoded;
 }
+
+std::string oct_encode_non_ascii_printable(const std::string& data) {
+  return oct_encode(data, [](int c) { return !isgraph(c) && !isspace(c); });
+}
+
+std::string oct_encode_non_printable(const std::string& data) {
+  return oct_encode(data, [](int c) { return !isprint(c); });
+}
commit	69210a64000ef8a6bc24d6428cc29b5d8d41f8cd	[log] [tgz]
author	Christopher Ferris <cferris@google.com>	Fri Dec 06 09:35:21 2024 -0800
committer	Christopher Ferris <cferris@google.com>	Tue Dec 10 11:48:13 2024 -0800
tree	ece71940e92b7b0f2635116a2972ca73a5d0cd9a
parent	43772f2bc294573f297f41004baf56c067916855 [diff]