Merge "Do not encode newline characters for abort/log." into main
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 5bdc946..04a7df8 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@@ -3303,8 +3303,44 @@
ASSERT_MATCH(result, ":\\s*This is on the next line.");
}
-TEST_F(CrasherTest, log_with_non_utf8) {
- StartProcess([]() { LOG(FATAL) << "Invalid UTF-8: \xA0\xB0\xC0\xD0 and some other data."; });
+TEST_F(CrasherTest, log_with_non_printable_ascii_verify_encoded) {
+ static const std::string kEncodedStr =
+ "\x5C\x31"
+ "\x5C\x32"
+ "\x5C\x33"
+ "\x5C\x34"
+ "\x5C\x35"
+ "\x5C\x36"
+ "\x5C\x37"
+ "\x5C\x31\x30"
+ "\x5C\x31\x36"
+ "\x5C\x31\x37"
+ "\x5C\x32\x30"
+ "\x5C\x32\x31"
+ "\x5C\x32\x32"
+ "\x5C\x32\x33"
+ "\x5C\x32\x34"
+ "\x5C\x32\x35"
+ "\x5C\x32\x36"
+ "\x5C\x32\x37"
+ "\x5C\x33\x30"
+ "\x5C\x33\x31"
+ "\x5C\x33\x32"
+ "\x5C\x33\x33"
+ "\x5C\x33\x34"
+ "\x5C\x33\x35"
+ "\x5C\x33\x36"
+ "\x5C\x33\x37"
+ "\x5C\x31\x37\x37"
+ "\x5C\x32\x34\x30"
+ "\x5C\x32\x36\x30"
+ "\x5C\x33\x30\x30"
+ "\x5C\x33\x32\x30";
+ StartProcess([]() {
+ LOG(FATAL) << "Encoded: "
+ "\x1\x2\x3\x4\x5\x6\x7\x8\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b"
+ "\x1c\x1d\x1e\x1f\x7f\xA0\xB0\xC0\xD0 after";
+ });
unique_fd output_fd;
StartIntercept(&output_fd);
@@ -3317,15 +3353,38 @@
std::string result;
ConsumeFd(std::move(output_fd), &result);
// Verify the abort message is sanitized properly.
- size_t pos = result.find(
- "Abort message: 'Invalid UTF-8: "
- "\x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 and some other data.'");
+ size_t pos = result.find(std::string("Abort message: 'Encoded: ") + kEncodedStr + " after'");
EXPECT_TRUE(pos != std::string::npos) << "Couldn't find sanitized abort message: " << result;
// Make sure that the log message is sanitized properly too.
- EXPECT_TRUE(
- result.find("Invalid UTF-8: \x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 "
- "and some other data.",
- pos + 30) != std::string::npos)
+ EXPECT_TRUE(result.find(std::string("Encoded: ") + kEncodedStr + " after", pos + 1) !=
+ std::string::npos)
+ << "Couldn't find sanitized log message: " << result;
+}
+
+TEST_F(CrasherTest, log_with_with_special_printable_ascii) {
+ static const std::string kMsg = "Not encoded: \t\v\f\r\n after";
+ StartProcess([]() { LOG(FATAL) << kMsg; });
+
+ unique_fd output_fd;
+ StartIntercept(&output_fd);
+ FinishCrasher();
+ AssertDeath(SIGABRT);
+ int intercept_result;
+ FinishIntercept(&intercept_result);
+ ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+ std::string result;
+ ConsumeFd(std::move(output_fd), &result);
+ // Verify the abort message does not remove characters that are UTF8 but
+ // are, technically, not printable.
+ size_t pos = result.find(std::string("Abort message: '") + kMsg + "'");
+ EXPECT_TRUE(pos != std::string::npos) << "Couldn't find abort message: " << result;
+
+ // Make sure that the log message is handled properly too.
+ // The logger automatically splits a newline message into two pieces.
+ pos = result.find("Not encoded: \t\v\f\r", pos + kMsg.size());
+ EXPECT_TRUE(pos != std::string::npos) << "Couldn't find log message: " << result;
+ EXPECT_TRUE(result.find(" after", pos + 1) != std::string::npos)
<< "Couldn't find sanitized log message: " << result;
}
diff --git a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
index df22e01..819a99d 100644
--- a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
+++ b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
@@ -30,4 +30,7 @@
constexpr size_t kNumTagColumns = 16;
constexpr size_t kNumTagRows = 16;
-std::string oct_encode(const std::string& data);
+// Encode all non-ascii values and also ascii values that are not printable.
+std::string oct_encode_non_ascii_printable(const std::string& data);
+// Encode any value that fails isprint(), includes encoding chars like '\n' and '\t'.
+std::string oct_encode_non_printable(const std::string& data);
diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp
index ef303f0..d3ac49a 100644
--- a/debuggerd/libdebuggerd/tombstone_proto.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto.cpp
@@ -467,7 +467,7 @@
msg.resize(index);
// Make sure only UTF8 characters are present since abort_message is a string.
- tombstone->set_abort_message(oct_encode(msg));
+ tombstone->set_abort_message(oct_encode_non_ascii_printable(msg));
}
static void dump_open_fds(Tombstone* tombstone, const OpenFilesList* open_files) {
@@ -776,7 +776,7 @@
log_msg->set_priority(prio);
log_msg->set_tag(tag);
// Make sure only UTF8 characters are present since message is a string.
- log_msg->set_message(oct_encode(msg));
+ log_msg->set_message(oct_encode_non_ascii_printable(msg));
} while ((msg = nl));
}
android_logger_list_free(logger_list);
diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
index e885c5a..0474ae7 100644
--- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
@@ -17,6 +17,7 @@
#include <libdebuggerd/tombstone_proto_to_text.h>
#include <libdebuggerd/utility_host.h>
+#include <ctype.h>
#include <inttypes.h>
#include <algorithm>
@@ -463,8 +464,8 @@
}
for (const auto& crash_detail : tombstone.crash_details()) {
- std::string oct_encoded_name = oct_encode(crash_detail.name());
- std::string oct_encoded_data = oct_encode(crash_detail.data());
+ std::string oct_encoded_name = oct_encode_non_printable(crash_detail.name());
+ std::string oct_encoded_data = oct_encode_non_printable(crash_detail.data());
CBL("Extra crash detail: %s: '%s'", oct_encoded_name.c_str(), oct_encoded_data.c_str());
}
diff --git a/debuggerd/libdebuggerd/utility_host.cpp b/debuggerd/libdebuggerd/utility_host.cpp
index 4efa03c..d87f4fb 100644
--- a/debuggerd/libdebuggerd/utility_host.cpp
+++ b/debuggerd/libdebuggerd/utility_host.cpp
@@ -16,6 +16,7 @@
#include "libdebuggerd/utility_host.h"
+#include <ctype.h>
#include <sys/prctl.h>
#include <charconv>
@@ -102,23 +103,31 @@
return describe_end(value, desc);
}
-std::string oct_encode(const std::string& data) {
+static std::string oct_encode(const std::string& data, bool (*should_encode_func)(int)) {
std::string oct_encoded;
oct_encoded.reserve(data.size());
// N.B. the unsigned here is very important, otherwise e.g. \255 would render as
// \-123 (and overflow our buffer).
for (unsigned char c : data) {
- if (isprint(c)) {
- oct_encoded += c;
- } else {
+ if (should_encode_func(c)) {
std::string oct_digits("\\\0\0\0", 4);
// char is encodable in 3 oct digits
static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
oct_digits.resize(ptr - oct_digits.data());
oct_encoded += oct_digits;
+ } else {
+ oct_encoded += c;
}
}
return oct_encoded;
}
+
+std::string oct_encode_non_ascii_printable(const std::string& data) {
+ return oct_encode(data, [](int c) { return !isgraph(c) && !isspace(c); });
+}
+
+std::string oct_encode_non_printable(const std::string& data) {
+ return oct_encode(data, [](int c) { return !isprint(c); });
+}