| Kenny Root | ba0165b | 2010-11-09 14:37:23 -0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2010 The Android Open Source Project | 
|  | 3 | * | 
|  | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | * you may not use this file except in compliance with the License. | 
|  | 6 | * You may obtain a copy of the License at | 
|  | 7 | * | 
|  | 8 | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | * | 
|  | 10 | * Unless required by applicable law or agreed to in writing, software | 
|  | 11 | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | * See the License for the specific language governing permissions and | 
|  | 14 | * limitations under the License. | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | #define LOG_TAG "Unicode_test" | 
| Branislav Rankov | bf3fff1 | 2017-10-12 15:08:42 +0200 | [diff] [blame] | 18 |  | 
|  | 19 | #include <sys/mman.h> | 
|  | 20 | #include <unistd.h> | 
|  | 21 |  | 
|  | 22 | #include <log/log.h> | 
| Kenny Root | ba0165b | 2010-11-09 14:37:23 -0800 | [diff] [blame] | 23 | #include <utils/Unicode.h> | 
|  | 24 |  | 
|  | 25 | #include <gtest/gtest.h> | 
|  | 26 |  | 
|  | 27 | namespace android { | 
|  | 28 |  | 
|  | 29 | class UnicodeTest : public testing::Test { | 
|  | 30 | protected: | 
|  | 31 | virtual void SetUp() { | 
|  | 32 | } | 
|  | 33 |  | 
|  | 34 | virtual void TearDown() { | 
|  | 35 | } | 
| Michael Wright | 5bacef3 | 2016-05-09 14:43:31 +0100 | [diff] [blame] | 36 |  | 
|  | 37 | char16_t const * const kSearchString = u"I am a leaf on the wind."; | 
| Kenny Root | ba0165b | 2010-11-09 14:37:23 -0800 | [diff] [blame] | 38 | }; | 
|  | 39 |  | 
|  | 40 | TEST_F(UnicodeTest, UTF8toUTF16ZeroLength) { | 
|  | 41 | ssize_t measured; | 
|  | 42 |  | 
|  | 43 | const uint8_t str[] = { }; | 
|  | 44 |  | 
|  | 45 | measured = utf8_to_utf16_length(str, 0); | 
|  | 46 | EXPECT_EQ(0, measured) | 
|  | 47 | << "Zero length input should return zero length output."; | 
|  | 48 | } | 
|  | 49 |  | 
|  | 50 | TEST_F(UnicodeTest, UTF8toUTF16ASCIILength) { | 
|  | 51 | ssize_t measured; | 
|  | 52 |  | 
|  | 53 | // U+0030 or ASCII '0' | 
|  | 54 | const uint8_t str[] = { 0x30 }; | 
|  | 55 |  | 
|  | 56 | measured = utf8_to_utf16_length(str, sizeof(str)); | 
|  | 57 | EXPECT_EQ(1, measured) | 
|  | 58 | << "ASCII glyphs should have a length of 1 char16_t"; | 
|  | 59 | } | 
|  | 60 |  | 
|  | 61 | TEST_F(UnicodeTest, UTF8toUTF16Plane1Length) { | 
|  | 62 | ssize_t measured; | 
|  | 63 |  | 
|  | 64 | // U+2323 SMILE | 
|  | 65 | const uint8_t str[] = { 0xE2, 0x8C, 0xA3 }; | 
|  | 66 |  | 
|  | 67 | measured = utf8_to_utf16_length(str, sizeof(str)); | 
|  | 68 | EXPECT_EQ(1, measured) | 
|  | 69 | << "Plane 1 glyphs should have a length of 1 char16_t"; | 
|  | 70 | } | 
|  | 71 |  | 
|  | 72 | TEST_F(UnicodeTest, UTF8toUTF16SurrogateLength) { | 
|  | 73 | ssize_t measured; | 
|  | 74 |  | 
|  | 75 | // U+10000 | 
|  | 76 | const uint8_t str[] = { 0xF0, 0x90, 0x80, 0x80 }; | 
|  | 77 |  | 
|  | 78 | measured = utf8_to_utf16_length(str, sizeof(str)); | 
|  | 79 | EXPECT_EQ(2, measured) | 
|  | 80 | << "Surrogate pairs should have a length of 2 char16_t"; | 
|  | 81 | } | 
|  | 82 |  | 
|  | 83 | TEST_F(UnicodeTest, UTF8toUTF16TruncatedUTF8) { | 
|  | 84 | ssize_t measured; | 
|  | 85 |  | 
|  | 86 | // Truncated U+2323 SMILE | 
|  | 87 | // U+2323 SMILE | 
|  | 88 | const uint8_t str[] = { 0xE2, 0x8C }; | 
|  | 89 |  | 
|  | 90 | measured = utf8_to_utf16_length(str, sizeof(str)); | 
|  | 91 | EXPECT_EQ(-1, measured) | 
|  | 92 | << "Truncated UTF-8 should return -1 to indicate invalid"; | 
|  | 93 | } | 
|  | 94 |  | 
|  | 95 | TEST_F(UnicodeTest, UTF8toUTF16Normal) { | 
|  | 96 | const uint8_t str[] = { | 
|  | 97 | 0x30, // U+0030, 1 UTF-16 character | 
|  | 98 | 0xC4, 0x80, // U+0100, 1 UTF-16 character | 
|  | 99 | 0xE2, 0x8C, 0xA3, // U+2323, 1 UTF-16 character | 
|  | 100 | 0xF0, 0x90, 0x80, 0x80, // U+10000, 2 UTF-16 character | 
|  | 101 | }; | 
|  | 102 |  | 
|  | 103 | char16_t output[1 + 1 + 1 + 2 + 1]; // Room for NULL | 
|  | 104 |  | 
| Sergio Giro | 1dcc0c8 | 2016-07-20 20:01:33 +0100 | [diff] [blame] | 105 | utf8_to_utf16(str, sizeof(str), output, sizeof(output) / sizeof(output[0])); | 
| Kenny Root | ba0165b | 2010-11-09 14:37:23 -0800 | [diff] [blame] | 106 |  | 
|  | 107 | EXPECT_EQ(0x0030, output[0]) | 
|  | 108 | << "should be U+0030"; | 
|  | 109 | EXPECT_EQ(0x0100, output[1]) | 
|  | 110 | << "should be U+0100"; | 
|  | 111 | EXPECT_EQ(0x2323, output[2]) | 
|  | 112 | << "should be U+2323"; | 
|  | 113 | EXPECT_EQ(0xD800, output[3]) | 
|  | 114 | << "should be first half of surrogate U+10000"; | 
|  | 115 | EXPECT_EQ(0xDC00, output[4]) | 
|  | 116 | << "should be second half of surrogate U+10000"; | 
|  | 117 | EXPECT_EQ(NULL, output[5]) | 
|  | 118 | << "should be NULL terminated"; | 
|  | 119 | } | 
|  | 120 |  | 
| Michael Wright | 5bacef3 | 2016-05-09 14:43:31 +0100 | [diff] [blame] | 121 | TEST_F(UnicodeTest, strstr16EmptyTarget) { | 
|  | 122 | EXPECT_EQ(strstr16(kSearchString, u""), kSearchString) | 
|  | 123 | << "should return the original pointer"; | 
|  | 124 | } | 
|  | 125 |  | 
| Branislav Rankov | bf3fff1 | 2017-10-12 15:08:42 +0200 | [diff] [blame] | 126 | TEST_F(UnicodeTest, strstr16EmptyTarget_bug) { | 
|  | 127 | // In the original code when target is an empty string strlen16() would | 
|  | 128 | // start reading the memory until a "terminating null" (that is, zero) | 
|  | 129 | // character is found.   This happens because "*target++" in the original | 
|  | 130 | // code would increment the pointer beyond the actual string. | 
|  | 131 | void* memptr; | 
|  | 132 | const size_t alignment = sysconf(_SC_PAGESIZE); | 
|  | 133 | const size_t size = 2 * alignment; | 
|  | 134 | ASSERT_EQ(posix_memalign(&memptr, alignment, size), 0); | 
|  | 135 | // Fill allocated memory. | 
|  | 136 | memset(memptr, 'A', size); | 
|  | 137 | // Create a pointer to an "empty" string on the first page. | 
|  | 138 | char16_t* const emptyString = (char16_t* const)((char*)memptr + alignment - 4); | 
|  | 139 | *emptyString = (char16_t)0; | 
|  | 140 | // Protect the second page to show that strstr16() violates that. | 
|  | 141 | ASSERT_EQ(mprotect((char*)memptr + alignment, alignment, PROT_NONE), 0); | 
|  | 142 | // Test strstr16(): when bug is present a segmentation fault is raised. | 
|  | 143 | ASSERT_EQ(strstr16((char16_t*)memptr, emptyString), (char16_t*)memptr) | 
|  | 144 | << "should not read beyond the first char16_t."; | 
|  | 145 | // Reset protection of the second page | 
|  | 146 | ASSERT_EQ(mprotect((char*)memptr + alignment, alignment, PROT_READ | PROT_WRITE), 0); | 
|  | 147 | // Free allocated memory. | 
|  | 148 | free(memptr); | 
|  | 149 | } | 
|  | 150 |  | 
| Michael Wright | 5bacef3 | 2016-05-09 14:43:31 +0100 | [diff] [blame] | 151 | TEST_F(UnicodeTest, strstr16SameString) { | 
|  | 152 | const char16_t* result = strstr16(kSearchString, kSearchString); | 
|  | 153 | EXPECT_EQ(kSearchString, result) | 
|  | 154 | << "should return the original pointer"; | 
|  | 155 | } | 
|  | 156 |  | 
|  | 157 | TEST_F(UnicodeTest, strstr16TargetStartOfString) { | 
|  | 158 | const char16_t* result = strstr16(kSearchString, u"I am"); | 
|  | 159 | EXPECT_EQ(kSearchString, result) | 
|  | 160 | << "should return the original pointer"; | 
|  | 161 | } | 
|  | 162 |  | 
|  | 163 |  | 
|  | 164 | TEST_F(UnicodeTest, strstr16TargetEndOfString) { | 
|  | 165 | const char16_t* result = strstr16(kSearchString, u"wind."); | 
|  | 166 | EXPECT_EQ(kSearchString+19, result); | 
|  | 167 | } | 
|  | 168 |  | 
|  | 169 | TEST_F(UnicodeTest, strstr16TargetWithinString) { | 
|  | 170 | const char16_t* result = strstr16(kSearchString, u"leaf"); | 
|  | 171 | EXPECT_EQ(kSearchString+7, result); | 
|  | 172 | } | 
|  | 173 |  | 
|  | 174 | TEST_F(UnicodeTest, strstr16TargetNotPresent) { | 
|  | 175 | const char16_t* result = strstr16(kSearchString, u"soar"); | 
|  | 176 | EXPECT_EQ(nullptr, result); | 
|  | 177 | } | 
|  | 178 |  | 
| Sergio Giro | 1dcc0c8 | 2016-07-20 20:01:33 +0100 | [diff] [blame] | 179 | // http://b/29267949 | 
|  | 180 | // Test that overreading in utf8_to_utf16_length is detected | 
|  | 181 | TEST_F(UnicodeTest, InvalidUtf8OverreadDetected) { | 
|  | 182 | // An utf8 char starting with \xc4 is two bytes long. | 
|  | 183 | // Add extra zeros so no extra memory is read in case the code doesn't | 
|  | 184 | // work as expected. | 
|  | 185 | static char utf8[] = "\xc4\x00\x00\x00"; | 
|  | 186 | ASSERT_DEATH(utf8_to_utf16_length((uint8_t *) utf8, strlen(utf8), | 
|  | 187 | true /* overreadIsFatal */), "" /* regex for ASSERT_DEATH */); | 
|  | 188 | } | 
|  | 189 |  | 
| Kenny Root | ba0165b | 2010-11-09 14:37:23 -0800 | [diff] [blame] | 190 | } |