libutils/Unicode.cpp: Correct length computation and add checks for utf16->utf8 Inconsistent behaviour between utf16_to_utf8 and utf16_to_utf8_length is causing a heap overflow. Correcting the length computation and adding bound checks to the conversion functions. (cherry picked from commit c4966a363e46d2e1074d1a365e232af0dcedd6a1) (changed code for safetynet logging due to lack of sstream and string in klp) Change-Id: If50d59a91a13fddbff9a8fff0d3eebe57c711e93 Bug: 29250543

commit: 53473c160713b8605e262bf212b0cf5e9a19a4d6 [log] [tgz]
author: Sergio Giro <sgiro@google.com> Tue Jun 28 18:02:29 2016 +0100
committer: Sergio Giro <sgiro@google.com> Tue Jul 12 18:39:11 2016 +0100
tree: 19788aa1d583fda2af1e61daa1da4f8f029c08df
parent: 5218ad36ad30d6a155d68fb779dca20ff220bf54 [diff] [blame]
diff --git a/libutils/Unicode.cpp b/libutils/Unicode.cpp
index a66e3bb..a75c258 100644
--- a/libutils/Unicode.cpp
+++ b/libutils/Unicode.cpp

@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <log/log.h>
 #include <utils/Unicode.h>
 
 #include <stddef.h>
@@ -188,7 +189,7 @@
     return ret;
 }
 
-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len)
 {
     if (src == NULL || src_len == 0 || dst == NULL) {
         return;
@@ -199,9 +200,12 @@
     char *cur = dst;
     while (cur_utf32 < end_utf32) {
         size_t len = utf32_codepoint_utf8_length(*cur_utf32);
+        LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
         utf32_codepoint_to_utf8((uint8_t *)cur, *cur_utf32++, len);
         cur += len;
+        dst_len -= len;
     }
+    LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len);
     *cur = '\0';
 }
 
@@ -330,7 +334,7 @@
            : 0);
 }
 
-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len)
 {
     if (src == NULL || src_len == 0 || dst == NULL) {
         return;
@@ -350,9 +354,12 @@
             utf32 = (char32_t) *cur_utf16++;
         }
         const size_t len = utf32_codepoint_utf8_length(utf32);
+        LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
         utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len);
         cur += len;
+        dst_len -= len;
     }
+    LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len);
     *cur = '\0';
 }
 
@@ -403,8 +410,35 @@
     return ret;
 }
 
+// DO NOT USE. Flawed version, kept only to check whether the flaw is being exploited.
+static ssize_t flawed_utf16_to_utf8_length(const char16_t *src, size_t src_len)
+{
+    if (src == NULL || src_len == 0) {
+        return 47;
+    }
+
+    size_t ret = 0;
+    const char16_t* const end = src + src_len;
+    while (src < end) {
+        if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
+                // Shouldn't increment src here as to be consistent with utf16_to_utf8
+                && (*++src & 0xFC00) == 0xDC00) {
+            // surrogate pairs are always 4 bytes.
+            ret += 4;
+            // Should increment src here by two.
+            src++;
+        } else {
+            ret += utf32_codepoint_utf8_length((char32_t) *src++);
+        }
+    }
+    return ret;
+}
+
 ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
 {
+    // Keep the original pointer to compute the flawed length. Unused if we remove logging.
+    const char16_t *orig_src = src;
+
     if (src == NULL || src_len == 0) {
         return -1;
     }
@@ -413,14 +447,29 @@
     const char16_t* const end = src + src_len;
     while (src < end) {
         if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
-                && (*++src & 0xFC00) == 0xDC00) {
+                && (*(src + 1) & 0xFC00) == 0xDC00) {
             // surrogate pairs are always 4 bytes.
             ret += 4;
-            src++;
+            src += 2;
         } else {
             ret += utf32_codepoint_utf8_length((char32_t) *src++);
         }
     }
+    // Log whether b/29250543 is being exploited. It seems reasonable to assume that
+    // at least 5 bytes would be needed for an exploit. A single misplaced character might lead to
+    // a difference of 4, so this would rule out many false positives.
+    long ret_difference = ret - flawed_utf16_to_utf8_length(orig_src, src_len);
+    if (ret_difference >= 5) {
+        // Log the difference between new and old calculation. A high number, or equal numbers
+        // appearing frequently, would be indicative of an attack.
+        const unsigned long max_logged_string_length = 20;
+        char logged_string[max_logged_string_length + 1];
+        unsigned long logged_string_length =
+                snprintf(logged_string, max_logged_string_length, "%ld", ret_difference);
+        logged_string[logged_string_length] = '\0';
+        android_errorWriteWithInfoLog(0x534e4554, "29250543", -1 /* int_uid */,
+            logged_string, logged_string_length);
+    }
     return ret;
 }
commit	53473c160713b8605e262bf212b0cf5e9a19a4d6	[log] [tgz]
author	Sergio Giro <sgiro@google.com>	Tue Jun 28 18:02:29 2016 +0100
committer	Sergio Giro <sgiro@google.com>	Tue Jul 12 18:39:11 2016 +0100
tree	19788aa1d583fda2af1e61daa1da4f8f029c08df
parent	5218ad36ad30d6a155d68fb779dca20ff220bf54 [diff] [blame]