Do not encode newline characters for abort/log.
The isprint call will return false for chars such as newlines, which
means that the values '\n' and '\t' get encoded which was not the
intent of encodeding the abort and log messages. Add two oct_encode
versions, one that encodes all non-ascii values and only non-printable
ascii values for abort and log data. The other encoding function encodes
all chars that fail isprint() which is used for extra crash data.
Add new unit tests to verify that characters like newlines are not
encoded in the right places.
Bug: 381259755
Test: All unit tests pass.
Change-Id: I682f10e13a2e80ddfa7e87dfdf8181342eb22374
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 5bdc946..04a7df8 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@@ -3303,8 +3303,44 @@
ASSERT_MATCH(result, ":\\s*This is on the next line.");
}
-TEST_F(CrasherTest, log_with_non_utf8) {
- StartProcess([]() { LOG(FATAL) << "Invalid UTF-8: \xA0\xB0\xC0\xD0 and some other data."; });
+TEST_F(CrasherTest, log_with_non_printable_ascii_verify_encoded) {
+ static const std::string kEncodedStr =
+ "\x5C\x31"
+ "\x5C\x32"
+ "\x5C\x33"
+ "\x5C\x34"
+ "\x5C\x35"
+ "\x5C\x36"
+ "\x5C\x37"
+ "\x5C\x31\x30"
+ "\x5C\x31\x36"
+ "\x5C\x31\x37"
+ "\x5C\x32\x30"
+ "\x5C\x32\x31"
+ "\x5C\x32\x32"
+ "\x5C\x32\x33"
+ "\x5C\x32\x34"
+ "\x5C\x32\x35"
+ "\x5C\x32\x36"
+ "\x5C\x32\x37"
+ "\x5C\x33\x30"
+ "\x5C\x33\x31"
+ "\x5C\x33\x32"
+ "\x5C\x33\x33"
+ "\x5C\x33\x34"
+ "\x5C\x33\x35"
+ "\x5C\x33\x36"
+ "\x5C\x33\x37"
+ "\x5C\x31\x37\x37"
+ "\x5C\x32\x34\x30"
+ "\x5C\x32\x36\x30"
+ "\x5C\x33\x30\x30"
+ "\x5C\x33\x32\x30";
+ StartProcess([]() {
+ LOG(FATAL) << "Encoded: "
+ "\x1\x2\x3\x4\x5\x6\x7\x8\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b"
+ "\x1c\x1d\x1e\x1f\x7f\xA0\xB0\xC0\xD0 after";
+ });
unique_fd output_fd;
StartIntercept(&output_fd);
@@ -3317,15 +3353,38 @@
std::string result;
ConsumeFd(std::move(output_fd), &result);
// Verify the abort message is sanitized properly.
- size_t pos = result.find(
- "Abort message: 'Invalid UTF-8: "
- "\x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 and some other data.'");
+ size_t pos = result.find(std::string("Abort message: 'Encoded: ") + kEncodedStr + " after'");
EXPECT_TRUE(pos != std::string::npos) << "Couldn't find sanitized abort message: " << result;
// Make sure that the log message is sanitized properly too.
- EXPECT_TRUE(
- result.find("Invalid UTF-8: \x5C\x32\x34\x30\x5C\x32\x36\x30\x5C\x33\x30\x30\x5C\x33\x32\x30 "
- "and some other data.",
- pos + 30) != std::string::npos)
+ EXPECT_TRUE(result.find(std::string("Encoded: ") + kEncodedStr + " after", pos + 1) !=
+ std::string::npos)
+ << "Couldn't find sanitized log message: " << result;
+}
+
+TEST_F(CrasherTest, log_with_with_special_printable_ascii) {
+ static const std::string kMsg = "Not encoded: \t\v\f\r\n after";
+ StartProcess([]() { LOG(FATAL) << kMsg; });
+
+ unique_fd output_fd;
+ StartIntercept(&output_fd);
+ FinishCrasher();
+ AssertDeath(SIGABRT);
+ int intercept_result;
+ FinishIntercept(&intercept_result);
+ ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+ std::string result;
+ ConsumeFd(std::move(output_fd), &result);
+ // Verify the abort message does not remove characters that are UTF8 but
+ // are, technically, not printable.
+ size_t pos = result.find(std::string("Abort message: '") + kMsg + "'");
+ EXPECT_TRUE(pos != std::string::npos) << "Couldn't find abort message: " << result;
+
+ // Make sure that the log message is handled properly too.
+ // The logger automatically splits a newline message into two pieces.
+ pos = result.find("Not encoded: \t\v\f\r", pos + kMsg.size());
+ EXPECT_TRUE(pos != std::string::npos) << "Couldn't find log message: " << result;
+ EXPECT_TRUE(result.find(" after", pos + 1) != std::string::npos)
<< "Couldn't find sanitized log message: " << result;
}
diff --git a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
index df22e01..819a99d 100644
--- a/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
+++ b/debuggerd/libdebuggerd/include/libdebuggerd/utility_host.h
@@ -30,4 +30,7 @@
constexpr size_t kNumTagColumns = 16;
constexpr size_t kNumTagRows = 16;
-std::string oct_encode(const std::string& data);
+// Encode all non-ascii values and also ascii values that are not printable.
+std::string oct_encode_non_ascii_printable(const std::string& data);
+// Encode any value that fails isprint(), includes encoding chars like '\n' and '\t'.
+std::string oct_encode_non_printable(const std::string& data);
diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp
index ef303f0..d3ac49a 100644
--- a/debuggerd/libdebuggerd/tombstone_proto.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto.cpp
@@ -467,7 +467,7 @@
msg.resize(index);
// Make sure only UTF8 characters are present since abort_message is a string.
- tombstone->set_abort_message(oct_encode(msg));
+ tombstone->set_abort_message(oct_encode_non_ascii_printable(msg));
}
static void dump_open_fds(Tombstone* tombstone, const OpenFilesList* open_files) {
@@ -776,7 +776,7 @@
log_msg->set_priority(prio);
log_msg->set_tag(tag);
// Make sure only UTF8 characters are present since message is a string.
- log_msg->set_message(oct_encode(msg));
+ log_msg->set_message(oct_encode_non_ascii_printable(msg));
} while ((msg = nl));
}
android_logger_list_free(logger_list);
diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
index e885c5a..0474ae7 100644
--- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
@@ -17,6 +17,7 @@
#include <libdebuggerd/tombstone_proto_to_text.h>
#include <libdebuggerd/utility_host.h>
+#include <ctype.h>
#include <inttypes.h>
#include <algorithm>
@@ -463,8 +464,8 @@
}
for (const auto& crash_detail : tombstone.crash_details()) {
- std::string oct_encoded_name = oct_encode(crash_detail.name());
- std::string oct_encoded_data = oct_encode(crash_detail.data());
+ std::string oct_encoded_name = oct_encode_non_printable(crash_detail.name());
+ std::string oct_encoded_data = oct_encode_non_printable(crash_detail.data());
CBL("Extra crash detail: %s: '%s'", oct_encoded_name.c_str(), oct_encoded_data.c_str());
}
diff --git a/debuggerd/libdebuggerd/utility_host.cpp b/debuggerd/libdebuggerd/utility_host.cpp
index 4efa03c..d87f4fb 100644
--- a/debuggerd/libdebuggerd/utility_host.cpp
+++ b/debuggerd/libdebuggerd/utility_host.cpp
@@ -16,6 +16,7 @@
#include "libdebuggerd/utility_host.h"
+#include <ctype.h>
#include <sys/prctl.h>
#include <charconv>
@@ -102,23 +103,31 @@
return describe_end(value, desc);
}
-std::string oct_encode(const std::string& data) {
+static std::string oct_encode(const std::string& data, bool (*should_encode_func)(int)) {
std::string oct_encoded;
oct_encoded.reserve(data.size());
// N.B. the unsigned here is very important, otherwise e.g. \255 would render as
// \-123 (and overflow our buffer).
for (unsigned char c : data) {
- if (isprint(c)) {
- oct_encoded += c;
- } else {
+ if (should_encode_func(c)) {
std::string oct_digits("\\\0\0\0", 4);
// char is encodable in 3 oct digits
static_assert(std::numeric_limits<unsigned char>::max() <= 8 * 8 * 8);
auto [ptr, ec] = std::to_chars(oct_digits.data() + 1, oct_digits.data() + 4, c, 8);
oct_digits.resize(ptr - oct_digits.data());
oct_encoded += oct_digits;
+ } else {
+ oct_encoded += c;
}
}
return oct_encoded;
}
+
+std::string oct_encode_non_ascii_printable(const std::string& data) {
+ return oct_encode(data, [](int c) { return !isgraph(c) && !isspace(c); });
+}
+
+std::string oct_encode_non_printable(const std::string& data) {
+ return oct_encode(data, [](int c) { return !isprint(c); });
+}