Merge "Do not encode newline characters for abort/log." into main

commit: 033e4ceeaa76e040aeda3f48ac800fe9ce765ebc [log] [tgz]
author: Christopher Ferris <cferris@google.com> Tue Dec 10 22:23:24 2024 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Tue Dec 10 22:23:24 2024 +0000
tree: 4cdab355a8b180a873f0d231a3f0517e30c3797c
parent: 5ff6db8e0d732803f7de74b841e8aaa8cf0d0fb9 [diff]
parent: 69210a64000ef8a6bc24d6428cc29b5d8d41f8cd [diff]
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 5bdc946..04a7df8 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp

@@ -3303,8 +3303,44 @@
   ASSERT_MATCH(result, ":\\s*This is on the next line.");
 }
 
-TEST_F(CrasherTest, log_with_non_utf8) {
-  StartProcess([]() { LOG(FATAL) << "Invalid UTF-8: \xA0\xB0\xC0\xD0 and some other data."; });
+TEST_F(CrasherTest, log_with_non_printable_ascii_verify_encoded) {
+  static const std::string kEncodedStr =
+      "\x5C\x31"
+      "\x5C\x32"
+      "\x5C\x33"
+      "\x5C\x34"
+      "\x5C\x35"
+      "\x5C\x36"
+      "\x5C\x37"
+      "\x5C\x31\x30"
+      "\x5C\x31\x36"
+      "\x5C\x31\x37"
+      "\x5C\x32\x30"
+      "\x5C\x32\x31"
+      "\x5C\x32\x32"
+      "\x5C\x32\x33"
+      "\x5C\x32\x34"
+      "\x5C\x32\x35"
+      "\x5C\x32\x36"
+      "\x5C\x32\x37"
+      "\x5C\x33\x30"
+      "\x5C\x33\x31"
+      "\x5C\x33\x32"
+      "\x5C\x33\x33"
+      "\x5C\x33\x34"
+      "\x5C\x33\x35"
+      "\x5C\x33\x36"
+      "\x5C\x33\x37"
+      "\x5C\x31\x37\x37"
+      "\x5C\x32\x34\x30"
+      "\x5C\x32\x36\x30"
+      "\x5C\x33\x30\x30"
+      "\x5C\x33\x32\x30";
+  StartProcess([]() {
+    LOG(FATAL) << "Encoded: "
+                  "\x1\x2\x3\x4\x5\x6\x7\x8\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b"
+                  "\x1c\x1d\x1e\x1f\x7f\xA0\xB0\xC0\xD0 after";
+  });
 
   unique_fd output_fd;
   StartIntercept(&output_fd);
@@ -3317,15 +3353,38 @@
   std::string result;
   ConsumeFd(std::move(output_fd), &result);
   // Verify the abort message is sanitized properly.
-  size_t pos = result.find(
-      "Abort message: 'Invalid UTF-8: "
-      "\x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 and some other data.'");
+  size_t pos = result.find(std::string("Abort message: 'Encoded: ") + kEncodedStr + " after'");
   EXPECT_TRUE(pos != std::string::npos) << "Couldn't find sanitized abort message: " << result;
 
   // Make sure that the log message is sanitized properly too.
-  EXPECT_TRUE(
-      result.find("Invalid UTF-8: \x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 "
-                  "and some other data.",
-                  pos + 30) != std::string::npos)
+  EXPECT_TRUE(result.find(std::string("Encoded: ") + kEncodedStr + " after", pos + 1) !=
+              std::string::npos)
+      << "Couldn't find sanitized log message: " << result;
+}
+
+TEST_F(CrasherTest, log_with_with_special_printable_ascii) {
+  static const std::string kMsg = "Not encoded: \t\v\f\r\n after";
+  StartProcess([]() { LOG(FATAL) << kMsg; });
+
+  unique_fd output_fd;
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  AssertDeath(SIGABRT);
+  int intercept_result;
+  FinishIntercept(&intercept_result);
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::string result;
+  ConsumeFd(std::move(output_fd), &result);
+  // Verify the abort message does not remove characters that are UTF8 but
+  // are, technically, not printable.
+  size_t pos = result.find(std::string("Abort message: '") + kMsg + "'");
+  EXPECT_TRUE(pos != std::string::npos) << "Couldn't find abort message: " << result;
+
+  // Make sure that the log message is handled properly too.
+  // The logger automatically splits a newline message into two pieces.
+  pos = result.find("Not encoded: \t\v\f\r", pos + kMsg.size());
+  EXPECT_TRUE(pos != std::string::npos) << "Couldn't find log message: " << result;
+  EXPECT_TRUE(result.find(" after", pos + 1) != std::string::npos)
       << "Couldn't find sanitized log message: " << result;
 }

diff --git a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
index df22e01..819a99d 100644
--- a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
+++ b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h

@@ -30,4 +30,7 @@
 constexpr size_t kNumTagColumns = 16;
 constexpr size_t kNumTagRows = 16;
 
-std::string oct_encode(const std::string& data);
+// Encode all non-ascii values and also ascii values that are not printable.
+std::string oct_encode_non_ascii_printable(const std::string& data);
+// Encode any value that fails isprint(), includes encoding chars like '\n' and '\t'.
+std::string oct_encode_non_printable(const std::string& data);

diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp
index ef303f0..d3ac49a 100644
--- a/debuggerd/libdebuggerd/tombstone_proto.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto.cpp

@@ -467,7 +467,7 @@
   msg.resize(index);
 
   // Make sure only UTF8 characters are present since abort_message is a string.
-  tombstone->set_abort_message(oct_encode(msg));
+  tombstone->set_abort_message(oct_encode_non_ascii_printable(msg));
 }
 
 static void dump_open_fds(Tombstone* tombstone, const OpenFilesList* open_files) {
@@ -776,7 +776,7 @@
       log_msg->set_priority(prio);
       log_msg->set_tag(tag);
       // Make sure only UTF8 characters are present since message is a string.
-      log_msg->set_message(oct_encode(msg));
+      log_msg->set_message(oct_encode_non_ascii_printable(msg));
     } while ((msg = nl));
   }
   android_logger_list_free(logger_list);

diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
index e885c5a..0474ae7 100644
--- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp

@@ -17,6 +17,7 @@
 #include <libdebuggerd/tombstone_proto_to_text.h>
 #include <libdebuggerd/utility_host.h>
 
+#include <ctype.h>
 #include <inttypes.h>
 
 #include <algorithm>
@@ -463,8 +464,8 @@
   }
 
   for (const auto& crash_detail : tombstone.crash_details()) {
-    std::string oct_encoded_name = oct_encode(crash_detail.name());
-    std::string oct_encoded_data = oct_encode(crash_detail.data());
+    std::string oct_encoded_name = oct_encode_non_printable(crash_detail.name());
+    std::string oct_encoded_data = oct_encode_non_printable(crash_detail.data());
     CBL("Extra crash detail: %s: '%s'", oct_encoded_name.c_str(), oct_encoded_data.c_str());
   }
 

diff --git a/debuggerd/libdebuggerd/utility_host.cpp b/debuggerd/libdebuggerd/utility_host.cpp
index 4efa03c..d87f4fb 100644
--- a/debuggerd/libdebuggerd/utility_host.cpp
+++ b/debuggerd/libdebuggerd/utility_host.cpp

@@ -16,6 +16,7 @@
 
 #include "libdebuggerd/utility_host.h"
 
+#include <ctype.h>
 #include <sys/prctl.h>
 
 #include <charconv>
@@ -102,23 +103,31 @@
   return describe_end(value, desc);
 }
 
-std::string oct_encode(const std::string& data) {
+static std::string oct_encode(const std::string& data, bool (*should_encode_func)(int)) {
   std::string oct_encoded;
   oct_encoded.reserve(data.size());
 
   // N.B. the unsigned here is very important, otherwise e.g. \255 would render as
   // \-123 (and overflow our buffer).
   for (unsigned char c : data) {
-    if (isprint(c)) {
-      oct_encoded += c;
-    } else {
+    if (should_encode_func(c)) {
       std::string oct_digits("\\\0\0\0", 4);
       // char is encodable in 3 oct digits
       static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
       auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
       oct_digits.resize(ptr - oct_digits.data());
       oct_encoded += oct_digits;
+    } else {
+      oct_encoded += c;
     }
   }
   return oct_encoded;
 }
+
+std::string oct_encode_non_ascii_printable(const std::string& data) {
+  return oct_encode(data, [](int c) { return !isgraph(c) && !isspace(c); });
+}
+
+std::string oct_encode_non_printable(const std::string& data) {
+  return oct_encode(data, [](int c) { return !isprint(c); });
+}
commit	033e4ceeaa76e040aeda3f48ac800fe9ce765ebc	[log] [tgz]
author	Christopher Ferris <cferris@google.com>	Tue Dec 10 22:23:24 2024 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Tue Dec 10 22:23:24 2024 +0000
tree	4cdab355a8b180a873f0d231a3f0517e30c3797c
parent	5ff6db8e0d732803f7de74b841e8aaa8cf0d0fb9 [diff]
parent	69210a64000ef8a6bc24d6428cc29b5d8d41f8cd [diff]