Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include <gtest/gtest.h> |
| 18 | |
| 19 | #include <iconv.h> |
| 20 | |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 21 | #include "utils.h" |
| 22 | |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 23 | #define INVALID_ICONV_T reinterpret_cast<iconv_t>(-1) |
| 24 | |
| 25 | TEST(iconv, iconv_open_EINVAL) { |
| 26 | errno = 0; |
| 27 | ASSERT_EQ(INVALID_ICONV_T, iconv_open("silly", "silly")); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 28 | ASSERT_ERRNO(EINVAL); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 29 | errno = 0; |
| 30 | ASSERT_EQ(INVALID_ICONV_T, iconv_open("silly", "UTF-8")); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 31 | ASSERT_ERRNO(EINVAL); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 32 | errno = 0; |
| 33 | ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "silly")); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 34 | ASSERT_ERRNO(EINVAL); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 35 | } |
| 36 | |
| 37 | TEST(iconv, iconv_open_comparator) { |
| 38 | // Examples from http://www.unicode.org/reports/tr22/#Charset_Alias_Matching: |
| 39 | // "For example, the following names should match: "UTF-8", "utf8", "u.t.f-008", ..." |
| 40 | iconv_t c; |
| 41 | ASSERT_NE(INVALID_ICONV_T, c = iconv_open("UTF-8", "utf8")); |
| 42 | ASSERT_EQ(0, iconv_close(c)); |
| 43 | ASSERT_NE(INVALID_ICONV_T, c = iconv_open("UTF-8", "u.t.f-008")); |
| 44 | ASSERT_EQ(0, iconv_close(c)); |
| 45 | |
| 46 | // "...but not "utf-80" or "ut8"." |
| 47 | errno = 0; |
| 48 | ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "utf-80")); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 49 | ASSERT_ERRNO(EINVAL); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 50 | errno = 0; |
| 51 | ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "ut80")); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 52 | ASSERT_ERRNO(EINVAL); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 53 | } |
| 54 | |
| 55 | TEST(iconv, iconv_smoke) { |
| 56 | const char* utf8 = "a٦ᄀ"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80 |
| 57 | char buf[BUFSIZ] = {}; |
| 58 | |
| 59 | iconv_t c = iconv_open("UTF-32LE", "UTF-8"); |
| 60 | ASSERT_NE(INVALID_ICONV_T, c); |
| 61 | |
| 62 | char* in = const_cast<char*>(utf8); |
| 63 | size_t in_bytes = strlen(in); |
| 64 | |
| 65 | char* out = buf; |
| 66 | size_t out_bytes = sizeof(buf); |
| 67 | |
| 68 | EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes)); |
| 69 | |
| 70 | wchar_t* utf16 = reinterpret_cast<wchar_t*>(buf); |
| 71 | EXPECT_EQ(L'a', utf16[0]); |
| 72 | EXPECT_EQ(L'٦', utf16[1]); |
| 73 | EXPECT_EQ(L'ᄀ', utf16[2]); |
| 74 | EXPECT_EQ(L'\0', utf16[3]); |
| 75 | EXPECT_EQ(0U, in_bytes); |
| 76 | EXPECT_EQ(sizeof(buf) - (3 /* chars */ * 4 /* bytes each */), out_bytes); |
| 77 | |
| 78 | ASSERT_EQ(0, iconv_close(c)); |
| 79 | } |
| 80 | |
| 81 | TEST(iconv, iconv_lossy_TRANSLIT) { |
| 82 | const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80 |
| 83 | char buf[BUFSIZ] = {}; |
| 84 | |
| 85 | iconv_t c = iconv_open("ASCII//TRANSLIT", "UTF-8"); |
| 86 | ASSERT_NE(INVALID_ICONV_T, c); |
| 87 | |
| 88 | char* in = const_cast<char*>(utf8); |
| 89 | size_t in_bytes = strlen(in); |
| 90 | |
| 91 | char* out = buf; |
| 92 | size_t out_bytes = sizeof(buf); |
| 93 | |
| 94 | // Two of the input characters (5 input bytes) aren't representable as ASCII. |
| 95 | // With "//TRANSLIT", we use a replacement character, and report the number |
| 96 | // of replacements. |
| 97 | EXPECT_EQ(2U, iconv(c, &in, &in_bytes, &out, &out_bytes)); |
| 98 | |
| 99 | EXPECT_EQ('a', buf[0]); |
| 100 | EXPECT_EQ('?', buf[1]); |
| 101 | EXPECT_EQ('?', buf[2]); |
| 102 | EXPECT_EQ('z', buf[3]); |
| 103 | EXPECT_EQ(0, buf[4]); |
| 104 | EXPECT_EQ(0U, in_bytes); |
| 105 | EXPECT_EQ(sizeof(buf) - 4, out_bytes); |
| 106 | |
| 107 | ASSERT_EQ(0, iconv_close(c)); |
| 108 | } |
| 109 | |
| 110 | TEST(iconv, iconv_lossy_IGNORE) { |
| 111 | const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80 |
| 112 | char buf[BUFSIZ] = {}; |
| 113 | |
| 114 | iconv_t c = iconv_open("ASCII//IGNORE", "UTF-8"); |
| 115 | ASSERT_NE(INVALID_ICONV_T, c); |
| 116 | |
| 117 | char* in = const_cast<char*>(utf8); |
| 118 | size_t in_bytes = strlen(in); |
| 119 | |
| 120 | char* out = buf; |
| 121 | size_t out_bytes = sizeof(buf); |
| 122 | |
| 123 | // Two of the input characters (5 input bytes) aren't representable as ASCII. |
| 124 | // With "//IGNORE", we just skip them (but return failure). |
| 125 | errno = 0; |
| 126 | EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 127 | EXPECT_ERRNO(EILSEQ); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 128 | |
| 129 | EXPECT_EQ('a', buf[0]); |
| 130 | EXPECT_EQ('z', buf[1]); |
| 131 | EXPECT_EQ(0, buf[2]); |
| 132 | EXPECT_EQ(0U, in_bytes); |
| 133 | EXPECT_EQ(sizeof(buf) - 2, out_bytes); |
| 134 | |
| 135 | ASSERT_EQ(0, iconv_close(c)); |
| 136 | } |
| 137 | |
| 138 | TEST(iconv, iconv_lossy) { |
| 139 | const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80 |
| 140 | char buf[BUFSIZ] = {}; |
| 141 | |
| 142 | iconv_t c = iconv_open("ASCII", "UTF-8"); |
| 143 | ASSERT_NE(INVALID_ICONV_T, c); |
| 144 | |
| 145 | char* in = const_cast<char*>(utf8); |
| 146 | size_t in_bytes = strlen(in); |
| 147 | |
| 148 | char* out = buf; |
| 149 | size_t out_bytes = sizeof(buf); |
| 150 | |
| 151 | // The second input character isn't representable as ASCII, so we stop there. |
| 152 | errno = 0; |
| 153 | EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 154 | EXPECT_ERRNO(EILSEQ); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 155 | |
| 156 | EXPECT_EQ('a', buf[0]); |
| 157 | EXPECT_EQ(0, buf[1]); |
| 158 | EXPECT_EQ(6U, in_bytes); // Two bytes for ٦, three bytes for ᄀ, and one byte for z. |
| 159 | EXPECT_EQ(sizeof(buf) - 1, out_bytes); |
| 160 | |
| 161 | ASSERT_EQ(0, iconv_close(c)); |
| 162 | } |
| 163 | |
| 164 | TEST(iconv, iconv_malformed_sequence_EILSEQ) { |
| 165 | const char* utf8 = "a\xd9z"; // 0xd9 is the first byte of the two-byte U+0666 ٦. |
| 166 | char buf[BUFSIZ] = {}; |
| 167 | |
| 168 | iconv_t c = iconv_open("UTF-8", "UTF-8"); |
| 169 | ASSERT_NE(INVALID_ICONV_T, c); |
| 170 | |
| 171 | char* in = const_cast<char*>(utf8); |
| 172 | size_t in_bytes = strlen(in); |
| 173 | |
| 174 | char* out = buf; |
| 175 | size_t out_bytes = sizeof(buf); |
| 176 | |
| 177 | // The second input byte is a malformed character, so we stop there. |
| 178 | errno = 0; |
| 179 | EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 180 | EXPECT_ERRNO(EILSEQ); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 181 | EXPECT_EQ('\xd9', *in); // *in is left pointing to the start of the invalid sequence. |
| 182 | ++in; |
| 183 | --in_bytes; |
| 184 | errno = 0; |
| 185 | EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 186 | EXPECT_ERRNO(0); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 187 | |
| 188 | EXPECT_EQ('a', buf[0]); |
| 189 | EXPECT_EQ('z', buf[1]); |
| 190 | EXPECT_EQ(0, buf[2]); |
| 191 | EXPECT_EQ(0U, in_bytes); |
| 192 | EXPECT_EQ(sizeof(buf) - 2, out_bytes); |
| 193 | |
| 194 | ASSERT_EQ(0, iconv_close(c)); |
| 195 | } |
| 196 | |
| 197 | TEST(iconv, iconv_incomplete_sequence_EINVAL) { |
| 198 | const char* utf8 = "a\xd9"; // 0xd9 is the first byte of the two-byte U+0666 ٦. |
| 199 | char buf[BUFSIZ] = {}; |
| 200 | |
| 201 | iconv_t c = iconv_open("UTF-8", "UTF-8"); |
| 202 | ASSERT_NE(INVALID_ICONV_T, c); |
| 203 | |
| 204 | char* in = const_cast<char*>(utf8); |
| 205 | size_t in_bytes = strlen(in); |
| 206 | |
| 207 | char* out = buf; |
| 208 | size_t out_bytes = sizeof(buf); |
| 209 | |
| 210 | // The second input byte is just the start of a character, and we don't have any more bytes. |
| 211 | errno = 0; |
| 212 | EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 213 | EXPECT_ERRNO(EINVAL); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 214 | EXPECT_EQ('\xd9', *in); // *in is left pointing to the start of the incomplete sequence. |
| 215 | |
| 216 | EXPECT_EQ('a', buf[0]); |
| 217 | EXPECT_EQ(0, buf[1]); |
| 218 | EXPECT_EQ(1U, in_bytes); |
| 219 | EXPECT_EQ(sizeof(buf) - 1, out_bytes); |
| 220 | |
| 221 | ASSERT_EQ(0, iconv_close(c)); |
| 222 | } |
| 223 | |
| 224 | TEST(iconv, iconv_E2BIG) { |
| 225 | const char* utf8 = "abc"; |
| 226 | char buf[BUFSIZ] = {}; |
| 227 | |
| 228 | iconv_t c = iconv_open("UTF-8", "UTF-8"); |
| 229 | ASSERT_NE(INVALID_ICONV_T, c); |
| 230 | |
| 231 | char* in = const_cast<char*>(utf8); |
| 232 | size_t in_bytes = strlen(in); |
| 233 | |
| 234 | char* out = buf; |
| 235 | size_t out_bytes = 1; |
| 236 | |
| 237 | // We need three bytes, so one isn't enough (but we will make progress). |
| 238 | out_bytes = 1; |
| 239 | errno = 0; |
| 240 | EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 241 | EXPECT_ERRNO(E2BIG); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 242 | EXPECT_EQ(2U, in_bytes); |
| 243 | EXPECT_EQ(0U, out_bytes); |
| 244 | |
| 245 | // Two bytes left, so zero isn't enough (and we can't even make progress). |
| 246 | out_bytes = 0; |
| 247 | errno = 0; |
| 248 | EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 249 | EXPECT_ERRNO(E2BIG); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 250 | EXPECT_EQ(2U, in_bytes); |
| 251 | EXPECT_EQ(0U, out_bytes); |
| 252 | |
| 253 | // Two bytes left, so one isn't enough (but we will make progress). |
| 254 | out_bytes = 1; |
| 255 | errno = 0; |
| 256 | EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 257 | EXPECT_ERRNO(E2BIG); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 258 | EXPECT_EQ(1U, in_bytes); |
| 259 | EXPECT_EQ(0U, out_bytes); |
| 260 | |
| 261 | // One byte left, so one byte is now enough. |
| 262 | out_bytes = 1; |
| 263 | errno = 0; |
| 264 | EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 265 | EXPECT_ERRNO(0); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 266 | EXPECT_EQ(0U, in_bytes); |
| 267 | EXPECT_EQ(0U, out_bytes); |
| 268 | |
| 269 | EXPECT_EQ('a', buf[0]); |
| 270 | EXPECT_EQ('b', buf[1]); |
| 271 | EXPECT_EQ('c', buf[2]); |
| 272 | EXPECT_EQ(0, buf[3]); |
| 273 | |
| 274 | ASSERT_EQ(0, iconv_close(c)); |
| 275 | } |
| 276 | |
| 277 | TEST(iconv, iconv_invalid_converter_EBADF) { |
| 278 | char* in = nullptr; |
| 279 | char* out = nullptr; |
| 280 | size_t in_bytes = 0; |
| 281 | size_t out_bytes = 0; |
| 282 | errno = 0; |
| 283 | ASSERT_EQ(static_cast<size_t>(-1), iconv(INVALID_ICONV_T, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 284 | ASSERT_ERRNO(EBADF); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 285 | } |
| 286 | |
| 287 | TEST(iconv, iconv_close_invalid_converter_EBADF) { |
| 288 | errno = 0; |
| 289 | ASSERT_EQ(-1, iconv_close(INVALID_ICONV_T)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 290 | ASSERT_ERRNO(EBADF); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 291 | } |
| 292 | |
| 293 | static void RoundTrip(const char* dst_enc, const char* expected_bytes, size_t n) { |
| 294 | // Examples from https://en.wikipedia.org/wiki/UTF-16. |
| 295 | const char* utf8 = "$€𐐷"; // U+0024, U+20AC, U+10437. |
| 296 | |
| 297 | iconv_t c = iconv_open(dst_enc, "UTF-8"); |
| 298 | ASSERT_NE(INVALID_ICONV_T, c) << dst_enc; |
| 299 | |
| 300 | char* in = const_cast<char*>(utf8); |
| 301 | size_t in_bytes = strlen(utf8); |
| 302 | char buf[BUFSIZ] = {}; |
| 303 | char* out = buf; |
| 304 | size_t out_bytes = sizeof(buf); |
| 305 | size_t replacement_count = iconv(c, &in, &in_bytes, &out, &out_bytes); |
| 306 | |
| 307 | // Check we got the bytes we were expecting. |
| 308 | for (size_t i = 0; i < n; ++i) { |
| 309 | EXPECT_EQ(expected_bytes[i], buf[i]) << i << ' '<< dst_enc; |
| 310 | } |
| 311 | |
| 312 | ASSERT_EQ(0, iconv_close(c)); |
| 313 | |
| 314 | // We can't round-trip if there were replacements. |
| 315 | if (strstr(dst_enc, "ascii")) { |
| 316 | GTEST_LOG_(INFO) << "can't round-trip " << dst_enc << "\n"; |
| 317 | return; |
| 318 | } |
| 319 | ASSERT_EQ(0U, replacement_count); |
| 320 | |
| 321 | c = iconv_open("UTF-8", dst_enc); |
| 322 | ASSERT_NE(INVALID_ICONV_T, c) << dst_enc; |
| 323 | |
| 324 | in = buf; |
| 325 | in_bytes = n; |
| 326 | char buf2[BUFSIZ] = {}; |
| 327 | out = buf2; |
| 328 | out_bytes = sizeof(buf2); |
| 329 | iconv(c, &in, &in_bytes, &out, &out_bytes); |
| 330 | |
| 331 | ASSERT_STREQ(utf8, buf2) << dst_enc; |
| 332 | |
| 333 | ASSERT_EQ(0, iconv_close(c)); |
| 334 | } |
| 335 | |
| 336 | TEST(iconv, iconv_round_trip_ascii) { |
| 337 | RoundTrip("ascii//TRANSLIT", "$??", 3); |
| 338 | } |
| 339 | |
| 340 | TEST(iconv, iconv_round_trip_utf8) { |
| 341 | RoundTrip("utf8", "\x24\xe2\x82\xac\xf0\x90\x90\xb7", 8); |
| 342 | } |
| 343 | |
| 344 | TEST(iconv, iconv_round_trip_utf16be) { |
| 345 | RoundTrip("utf16be", "\x00\x24" "\x20\xac" "\xd8\x01\xdc\x37", 8); |
| 346 | } |
| 347 | |
| 348 | TEST(iconv, iconv_round_trip_utf16le) { |
| 349 | RoundTrip("utf16le", "\x24\x00" "\xac\x20" "\x01\xd8\x37\xdc", 8); |
| 350 | } |
| 351 | |
| 352 | TEST(iconv, iconv_round_trip_utf32be) { |
| 353 | RoundTrip("utf32be", "\x00\x00\x00\x24" "\x00\x00\x20\xac" "\x00\x01\x04\x37", 12); |
| 354 | } |
| 355 | |
| 356 | TEST(iconv, iconv_round_trip_utf32le) { |
| 357 | RoundTrip("utf32le", "\x24\x00\x00\x00" "\xac\x20\x00\x00" "\x37\x04\x01\x00", 12); |
| 358 | } |
| 359 | |
| 360 | TEST(iconv, iconv_round_trip_wchar_t) { |
| 361 | RoundTrip("wchar_t", "\x24\x00\x00\x00" "\xac\x20\x00\x00" "\x37\x04\x01\x00", 12); |
| 362 | } |
| 363 | |
| 364 | static void Check(int expected_errno, const char* src_enc, const char* src, size_t n) { |
| 365 | iconv_t c = iconv_open("wchar_t", src_enc); |
| 366 | char* in = const_cast<char*>(src); |
| 367 | size_t in_bytes = n; |
| 368 | wchar_t out_buf[16]; |
| 369 | size_t out_bytes = sizeof(out_buf); |
| 370 | char* out = reinterpret_cast<char*>(out_buf); |
| 371 | errno = 0; |
| 372 | ASSERT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 373 | EXPECT_ERRNO(expected_errno); |
Elliott Hughes | a648733 | 2017-08-15 23:16:48 -0700 | [diff] [blame] | 374 | EXPECT_EQ(0, iconv_close(c)); |
| 375 | } |
| 376 | |
| 377 | TEST(iconv, iconv_EILSEQ_ascii) { |
| 378 | Check(EILSEQ, "ASCII", "\xac", 1); // > 0x7f, so not ASCII. |
| 379 | } |
| 380 | |
| 381 | TEST(iconv, iconv_EILSEQ_utf8_initial) { |
| 382 | Check(EILSEQ, "utf8", "\x82", 1); // Invalid initial byte. |
| 383 | } |
| 384 | |
| 385 | TEST(iconv, iconv_EILSEQ_utf8_non_initial) { |
| 386 | Check(EILSEQ, "utf8", "\xe2\xe2\x82", 3); // Invalid second byte. |
| 387 | } |
| 388 | |
| 389 | TEST(iconv, iconv_EILSEQ_utf16be_low_surrogate_first) { |
| 390 | Check(EILSEQ, "utf16be", "\xdc\x37" "\xd8\x01", 4); |
| 391 | } |
| 392 | |
| 393 | TEST(iconv, iconv_EILSEQ_utf16le_low_surrogate_first) { |
| 394 | Check(EILSEQ, "utf16le", "\x37\xdc" "\x01\xd8", 4); |
| 395 | } |
| 396 | |
| 397 | TEST(iconv, iconv_EINVAL_utf8_short) { |
| 398 | Check(EINVAL, "utf8", "\xe2\x82", 2); // Missing final byte of 3-byte sequence. |
| 399 | } |
| 400 | |
| 401 | TEST(iconv, iconv_EINVAL_utf16be_short) { |
| 402 | Check(EINVAL, "utf16be", "\x00", 1); // Missing second byte. |
| 403 | } |
| 404 | |
| 405 | TEST(iconv, iconv_EINVAL_utf16be_missing_low_surrogate) { |
| 406 | Check(EINVAL, "utf16be", "\xd8\x01", 2); |
| 407 | } |
| 408 | |
| 409 | TEST(iconv, iconv_EINVAL_utf16be_half_low_surrogate) { |
| 410 | Check(EINVAL, "utf16be", "\xd8\x01\xdc", 3); |
| 411 | } |
| 412 | |
| 413 | TEST(iconv, iconv_EINVAL_utf16le_short) { |
| 414 | Check(EINVAL, "utf16le", "\x24", 1); // Missing second byte. |
| 415 | } |
| 416 | |
| 417 | TEST(iconv, iconv_EINVAL_utf16le_missing_low_surrogate) { |
| 418 | Check(EINVAL, "utf16le", "\x01\xd8", 2); |
| 419 | } |
| 420 | |
| 421 | TEST(iconv, iconv_EINVAL_utf16le_half_low_surrogate) { |
| 422 | Check(EINVAL, "utf16le", "\x01\xd8\x37", 3); |
| 423 | } |
| 424 | |
| 425 | TEST(iconv, iconv_EINVAL_utf32be_short) { |
| 426 | Check(EINVAL, "utf32be", "\x00\x00\x00", 3); // Missing final byte. |
| 427 | } |
| 428 | |
| 429 | TEST(iconv, iconv_EINVAL_utf32le_short) { |
| 430 | Check(EINVAL, "utf32le", "\x24\x00\x00", 3); // Missing final byte. |
| 431 | } |
Elliott Hughes | d40a833 | 2017-08-31 13:35:50 -0700 | [diff] [blame] | 432 | |
| 433 | TEST(iconv, iconv_initial_shift_state) { |
| 434 | // POSIX: "For state-dependent encodings, the conversion descriptor |
| 435 | // cd is placed into its initial shift state by a call for which inbuf |
| 436 | // is a null pointer, or for which inbuf points to a null pointer." |
| 437 | iconv_t c = iconv_open("utf8", "utf8"); |
| 438 | char* in = nullptr; |
| 439 | size_t in_bytes = 0; |
| 440 | wchar_t out_buf[16]; |
| 441 | size_t out_bytes = sizeof(out_buf); |
| 442 | char* out = reinterpret_cast<char*>(out_buf); |
| 443 | |
| 444 | // Points to a null pointer... |
| 445 | errno = 0; |
| 446 | ASSERT_EQ(static_cast<size_t>(0), iconv(c, &in, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 447 | EXPECT_ERRNO(0); |
Elliott Hughes | d40a833 | 2017-08-31 13:35:50 -0700 | [diff] [blame] | 448 | EXPECT_EQ(sizeof(out_buf), out_bytes); |
| 449 | |
| 450 | // Is a null pointer... |
| 451 | errno = 0; |
| 452 | ASSERT_EQ(static_cast<size_t>(0), iconv(c, nullptr, &in_bytes, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 453 | EXPECT_ERRNO(0); |
Elliott Hughes | d40a833 | 2017-08-31 13:35:50 -0700 | [diff] [blame] | 454 | EXPECT_EQ(sizeof(out_buf), out_bytes); |
| 455 | |
Elliott Hughes | 20c023f | 2021-02-18 10:37:22 -0800 | [diff] [blame] | 456 | // Is a null pointer and so is in_bytes. This isn't specified by POSIX, but |
| 457 | // glibc and macOS both allow that, where Android historically didn't. |
| 458 | // https://issuetracker.google.com/180598400 |
| 459 | errno = 0; |
| 460 | ASSERT_EQ(static_cast<size_t>(0), iconv(c, nullptr, nullptr, &out, &out_bytes)); |
Elliott Hughes | 95646e6 | 2023-09-21 14:11:19 -0700 | [diff] [blame^] | 461 | EXPECT_ERRNO(0); |
Elliott Hughes | 20c023f | 2021-02-18 10:37:22 -0800 | [diff] [blame] | 462 | EXPECT_EQ(sizeof(out_buf), out_bytes); |
| 463 | |
Elliott Hughes | d40a833 | 2017-08-31 13:35:50 -0700 | [diff] [blame] | 464 | EXPECT_EQ(0, iconv_close(c)); |
| 465 | } |