libutils/Unicode.cpp: Correct length computation and add checks for utf16->utf8
Inconsistent behaviour between utf16_to_utf8 and utf16_to_utf8_length
is causing a heap overflow.
Correcting the length computation and adding bound checks to the
conversion functions.
Test: ran libutils_tests
Bug: 29250543
Change-Id: I6115e3357141ed245c63c6eb25fc0fd0a9a7a2bb
diff --git a/libutils/Unicode.cpp b/libutils/Unicode.cpp
index f1f8bc9..b2ded07 100644
--- a/libutils/Unicode.cpp
+++ b/libutils/Unicode.cpp
@@ -14,10 +14,14 @@
* limitations under the License.
*/
+#include <log/log.h>
#include <utils/Unicode.h>
#include <stddef.h>
+#include <string>
+#include <sstream>
+
#if defined(_WIN32)
# undef nhtol
# undef htonl
@@ -182,7 +186,7 @@
return ret;
}
-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len)
{
if (src == NULL || src_len == 0 || dst == NULL) {
return;
@@ -193,9 +197,12 @@
char *cur = dst;
while (cur_utf32 < end_utf32) {
size_t len = utf32_codepoint_utf8_length(*cur_utf32);
+ LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
utf32_codepoint_to_utf8((uint8_t *)cur, *cur_utf32++, len);
cur += len;
+ dst_len -= len;
}
+ LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len);
*cur = '\0';
}
@@ -348,7 +355,7 @@
: 0);
}
-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len)
{
if (src == NULL || src_len == 0 || dst == NULL) {
return;
@@ -369,9 +376,12 @@
utf32 = (char32_t) *cur_utf16++;
}
const size_t len = utf32_codepoint_utf8_length(utf32);
+ LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len);
cur += len;
+ dst_len -= len;
}
+ LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len);
*cur = '\0';
}
@@ -422,8 +432,35 @@
return ret;
}
+// DO NOT USE. Flawed version, kept only to check whether the flaw is being exploited.
+static ssize_t flawed_utf16_to_utf8_length(const char16_t *src, size_t src_len)
+{
+ if (src == NULL || src_len == 0) {
+ return 47;
+ }
+
+ size_t ret = 0;
+ const char16_t* const end = src + src_len;
+ while (src < end) {
+ if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
+ // Shouldn't increment src here as to be consistent with utf16_to_utf8
+ && (*++src & 0xFC00) == 0xDC00) {
+ // surrogate pairs are always 4 bytes.
+ ret += 4;
+ // Should increment src here by two.
+ src++;
+ } else {
+ ret += utf32_codepoint_utf8_length((char32_t) *src++);
+ }
+ }
+ return ret;
+}
+
ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
{
+ // Keep the original pointer to compute the flawed length. Unused if we remove logging.
+ const char16_t *orig_src = src;
+
if (src == NULL || src_len == 0) {
return -1;
}
@@ -432,14 +469,25 @@
const char16_t* const end = src + src_len;
while (src < end) {
if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
- && (*++src & 0xFC00) == 0xDC00) {
+ && (*(src + 1) & 0xFC00) == 0xDC00) {
// surrogate pairs are always 4 bytes.
ret += 4;
- src++;
+ src += 2;
} else {
ret += utf32_codepoint_utf8_length((char32_t) *src++);
}
}
+ // Log whether b/29250543 is being exploited. It seems reasonable to assume that
+ // at least 5 bytes would be needed for an exploit. A single misplaced character might lead to
+ // a difference of 4, so this would rule out many false positives.
+ long ret_difference = ret - flawed_utf16_to_utf8_length(orig_src, src_len);
+ if (ret_difference >= 5) {
+ // Log the difference between new and old calculation. A high number, or equal numbers
+ // appearing frequently, would be indicative of an attack.
+ std::string logged_string = (std::ostringstream() << ret_difference).str();
+ android_errorWriteWithInfoLog(0x534e4554, "29250543", -1 /* int_uid */,
+ logged_string.c_str(), logged_string.length() + 1);
+ }
return ret;
}