Implement <iconv.h>.
Bug: http://b/32978596
Test: ran tests
Change-Id: I56b6ae3d9c5a3a56d2b4afba33fb8f9e964bf7b9
diff --git a/libc/Android.bp b/libc/Android.bp
index be45f40..204ad19 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1448,6 +1448,7 @@
"bionic/getpriority.cpp",
"bionic/gettid.cpp",
"bionic/grp_pwd.cpp",
+ "bionic/iconv.cpp",
"bionic/icu_wrappers.cpp",
"bionic/ifaddrs.cpp",
"bionic/inotify_init.cpp",
diff --git a/libc/bionic/iconv.cpp b/libc/bionic/iconv.cpp
new file mode 100644
index 0000000..b0372a1
--- /dev/null
+++ b/libc/bionic/iconv.cpp
@@ -0,0 +1,368 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <iconv.h>
+
+#include <ctype.h>
+#include <endian.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <uchar.h>
+
+#include "private/bionic_mbstate.h"
+
+#define INVALID_ICONV_T reinterpret_cast<iconv_t>(-1)
+
+// Ideally we'd use icu4c but the API mismatch seems too great. So we just offer something
+// equivalent to (but slightly easier to use for runs of text than) <uchar.h>. If you're
+// here to add more encodings, consider working on finishing the icu4c NDK wrappers instead.
+enum Encoding {
+ US_ASCII,
+ UTF_8,
+ UTF_16_LE,
+ UTF_16_BE,
+ UTF_32_LE,
+ UTF_32_BE,
+ WCHAR_T,
+};
+
+enum Mode {
+ ERROR,
+ IGNORE,
+ TRANSLIT,
+};
+
+// This matching is strange but true.
+// See http://www.unicode.org/reports/tr22/#Charset_Alias_Matching.
+static bool __match_encoding(const char* lhs, const char* rhs) {
+ while (*lhs && *rhs) {
+ // Skip non-alnum in lhs; "UTF-8", "UTF_8", "UTF8", "UTF 8" are all equivalent.
+ // Also implement the "delete each 0 that is not preceded by a digit" rule.
+ for (; *lhs; ++lhs) {
+ if (isalnum(*lhs) && (*lhs != '0' || !isdigit(*(lhs + 1)))) break;
+ }
+ // Case doesn't matter either.
+ if (tolower(*lhs) != tolower(*rhs)) break;
+ ++lhs;
+ ++rhs;
+ }
+ // As a special case we treat the GNU "//" extensions as end of string.
+ if ((*lhs == '\0' || strstr(lhs, "//") == lhs) && *rhs == '\0') return true;
+ return false;
+}
+
+static bool __parse_encoding(const char* s, Encoding* encoding, Mode* mode) {
+ const char* suffix = strstr(s, "//");
+ if (suffix) {
+ if (!mode) return false;
+ if (strcmp(suffix, "//IGNORE") == 0) {
+ *mode = IGNORE;
+ } else if (strcmp(suffix, "//TRANSLIT") == 0) {
+ *mode = TRANSLIT;
+ } else {
+ return false;
+ }
+ }
+ if (__match_encoding(s, "utf8")) {
+ *encoding = UTF_8;
+ } else if (__match_encoding(s, "ascii") || __match_encoding(s, "usascii")) {
+ *encoding = US_ASCII;
+ } else if (__match_encoding(s, "utf16le")) {
+ *encoding = UTF_16_LE;
+ } else if (__match_encoding(s, "utf16be")) {
+ *encoding = UTF_16_BE;
+ } else if (__match_encoding(s, "utf32le")) {
+ *encoding = UTF_32_LE;
+ } else if (__match_encoding(s, "utf32be")) {
+ *encoding = UTF_32_BE;
+ } else if (__match_encoding(s, "wchart")) {
+ *encoding = WCHAR_T;
+ } else {
+ return false;
+ }
+ return true;
+}
+
+struct __iconv_t {
+ Encoding src_encoding;
+ Encoding dst_encoding;
+ Mode mode;
+
+ __iconv_t() : mode(ERROR) {
+ }
+
+ int Convert(char** src_buf0, size_t* src_bytes_left0, char** dst_buf0, size_t* dst_bytes_left0) {
+ // Reset state.
+ wc = 0;
+ memset(&ps, 0, sizeof(ps));
+ replacement_count = 0;
+ ignored = false;
+ src_buf = src_buf0;
+ src_bytes_left = src_bytes_left0;
+ dst_buf = dst_buf0;
+ dst_bytes_left = dst_bytes_left0;
+
+ while (*src_bytes_left > 0) {
+ if (!GetNext() || !Convert()) return -1;
+ }
+ return Done();
+ }
+
+ private:
+ char32_t wc;
+ char buf[16];
+ size_t src_bytes_used;
+ size_t dst_bytes_used;
+ mbstate_t ps;
+
+ size_t replacement_count;
+ bool ignored;
+
+ char** src_buf;
+ size_t* src_bytes_left;
+ char** dst_buf;
+ size_t* dst_bytes_left;
+
+ bool GetNext() {
+ errno = 0;
+ switch (src_encoding) {
+ case US_ASCII:
+ wc = **src_buf;
+ src_bytes_used = 1;
+ if (wc > 0x7f) errno = EILSEQ;
+ break;
+
+ case UTF_8:
+ src_bytes_used = mbrtoc32(&wc, *src_buf, *src_bytes_left, &ps);
+ if (src_bytes_used == __MB_ERR_ILLEGAL_SEQUENCE) {
+ break; // EILSEQ already set.
+ } else if (src_bytes_used == __MB_ERR_INCOMPLETE_SEQUENCE) {
+ errno = EINVAL;
+ return false;
+ }
+ break;
+
+ case UTF_16_BE:
+ case UTF_16_LE: {
+ if (*src_bytes_left < 2) {
+ errno = EINVAL;
+ return false;
+ }
+ bool swap = (src_encoding == UTF_16_BE);
+ wc = In16(*src_buf, swap);
+ // 0xd800-0xdbff: high surrogates
+ // 0xdc00-0xdfff: low surrogates
+ if (wc >= 0xd800 && wc <= 0xdfff) {
+ if (wc >= 0xdc00) { // Low surrogate before high surrogate.
+ errno = EILSEQ;
+ return false;
+ }
+ if (*src_bytes_left < 4) {
+ errno = EINVAL;
+ return false;
+ }
+ uint16_t hi = wc;
+ uint16_t lo = In16(*src_buf + 2, swap);
+ wc = 0x10000 + ((hi - 0xd800) << 10) + (lo - 0xdc00);
+ src_bytes_used = 4;
+ }
+ break;
+ }
+
+ case UTF_32_BE:
+ case UTF_32_LE:
+ case WCHAR_T:
+ if (*src_bytes_left < 4) {
+ errno = EINVAL;
+ return false;
+ }
+ wc = In32(*src_buf, (src_encoding == UTF_32_BE));
+ break;
+ }
+
+ if (errno == EILSEQ) {
+ switch (mode) {
+ case ERROR:
+ return false;
+ case IGNORE:
+ *src_buf += src_bytes_used;
+ *src_bytes_left -= src_bytes_used;
+ ignored = true;
+ return GetNext();
+ case TRANSLIT:
+ wc = '?';
+ ++replacement_count;
+ return true;
+ }
+ }
+ return true;
+ }
+
+ bool Convert() {
+ errno = 0;
+ switch (dst_encoding) {
+ case US_ASCII:
+ buf[0] = wc;
+ dst_bytes_used = 1;
+ if (wc > 0x7f) errno = EILSEQ;
+ break;
+
+ case UTF_8:
+ dst_bytes_used = c32rtomb(buf, wc, &ps);
+ if (dst_bytes_used == __MB_ERR_ILLEGAL_SEQUENCE) {
+ break; // EILSEQ already set.
+ } else if (dst_bytes_used == __MB_ERR_INCOMPLETE_SEQUENCE) {
+ errno = EINVAL;
+ return false;
+ }
+ break;
+
+ case UTF_16_BE:
+ case UTF_16_LE: {
+ bool swap = (dst_encoding == UTF_16_BE);
+ if (wc < 0x10000) { // BMP.
+ Out16(buf, wc, swap);
+ } else { // Supplementary plane; output surrogate pair.
+ wc -= 0x10000;
+ char16_t hi = 0xd800 | (wc >> 10);
+ char16_t lo = 0xdc00 | (wc & 0x3ff);
+ Out16(buf + 0, hi, swap);
+ Out16(buf + 2, lo, swap);
+ dst_bytes_used = 4;
+ }
+ } break;
+
+ case UTF_32_BE:
+ case UTF_32_LE:
+ case WCHAR_T:
+ Out32(wc, (dst_encoding == UTF_32_BE));
+ break;
+ }
+
+ if (errno == EILSEQ) {
+ if (mode == IGNORE) {
+ *src_buf += src_bytes_used;
+ *src_bytes_left -= src_bytes_used;
+ ignored = true;
+ return true;
+ } else if (mode == TRANSLIT) {
+ wc = '?';
+ ++replacement_count;
+ return Convert();
+ }
+ return false;
+ }
+
+ return Emit();
+ }
+
+ uint16_t In16(const char* buf, bool swap) {
+ const uint8_t* src = reinterpret_cast<const uint8_t*>(buf);
+ uint16_t wc = (src[0]) | (src[1] << 8);
+ if (swap) wc = __swap16(wc);
+ src_bytes_used = 2;
+ return wc;
+ }
+
+ uint32_t In32(const char* buf, bool swap) {
+ const uint8_t* src = reinterpret_cast<const uint8_t*>(buf);
+ uint32_t wc = (src[0]) | (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
+ if (swap) wc = __swap32(wc);
+ src_bytes_used = 4;
+ return wc;
+ }
+
+ void Out16(char* dst, char16_t ch, bool swap) {
+ if (swap) ch = __swap16(ch);
+ dst[0] = ch;
+ dst[1] = ch >> 8;
+ dst_bytes_used = 2;
+ }
+
+ void Out32(char32_t ch, bool swap) {
+ if (swap) ch = __swap32(ch);
+ buf[0] = ch;
+ buf[1] = ch >> 8;
+ buf[2] = ch >> 16;
+ buf[3] = ch >> 24;
+ dst_bytes_used = 4;
+ }
+
+ bool Emit() {
+ if (dst_bytes_used > *dst_bytes_left) {
+ errno = E2BIG;
+ return false;
+ }
+
+ memcpy(*dst_buf, buf, dst_bytes_used);
+ *src_buf += src_bytes_used;
+ *src_bytes_left -= src_bytes_used;
+ *dst_buf += dst_bytes_used;
+ *dst_bytes_left -= dst_bytes_used;
+ return true;
+ }
+
+ int Done() {
+ if (mode == TRANSLIT) return replacement_count;
+ if (ignored) {
+ errno = EILSEQ;
+ return -1;
+ }
+ return 0;
+ }
+};
+
+iconv_t iconv_open(const char* __dst_encoding, const char* __src_encoding) {
+ iconv_t result = new __iconv_t;
+ if (!__parse_encoding(__src_encoding, &result->src_encoding, nullptr) ||
+ !__parse_encoding(__dst_encoding, &result->dst_encoding, &result->mode)) {
+ delete result;
+ errno = EINVAL;
+ return INVALID_ICONV_T;
+ }
+ return result;
+}
+
+size_t iconv(iconv_t __converter,
+ char** __src_buf, size_t* __src_bytes_left,
+ char** __dst_buf, size_t* __dst_bytes_left) {
+ if (__converter == INVALID_ICONV_T) {
+ errno = EBADF;
+ return -1;
+ }
+ return __converter->Convert(__src_buf, __src_bytes_left, __dst_buf, __dst_bytes_left);
+}
+
+int iconv_close(iconv_t __converter) {
+ if (__converter == INVALID_ICONV_T) {
+ errno = EBADF;
+ return -1;
+ }
+ delete __converter;
+ return 0;
+}
diff --git a/libc/include/iconv.h b/libc/include/iconv.h
new file mode 100644
index 0000000..4b05bae
--- /dev/null
+++ b/libc/include/iconv.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ICONV_H_
+#define _ICONV_H_
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+__BEGIN_DECLS
+
+struct __iconv_t;
+typedef struct __iconv_t* iconv_t;
+
+iconv_t iconv_open(const char* __src_encoding, const char* __dst_encoding) __INTRODUCED_IN_FUTURE;
+size_t iconv(iconv_t __converter, char** __src_buf, size_t* __src_bytes_left, char** __dst_buf, size_t* __dst_bytes_left) __INTRODUCED_IN_FUTURE;
+int iconv_close(iconv_t __converter) __INTRODUCED_IN_FUTURE;
+
+__END_DECLS
+
+#endif
diff --git a/libc/libc.arm.map b/libc/libc.arm.map
index 6ababa1..fe14018 100644
--- a/libc/libc.arm.map
+++ b/libc/libc.arm.map
@@ -1323,6 +1323,9 @@
__freading; # future
__fwriting; # future
getlogin_r; # future
+ iconv; # future
+ iconv_close; # future
+ iconv_open; # future
syncfs; # future
} LIBC_O;
diff --git a/libc/libc.arm64.map b/libc/libc.arm64.map
index 8dd8f34..7088ada 100644
--- a/libc/libc.arm64.map
+++ b/libc/libc.arm64.map
@@ -1243,6 +1243,9 @@
__freading; # future
__fwriting; # future
getlogin_r; # future
+ iconv; # future
+ iconv_close; # future
+ iconv_open; # future
syncfs; # future
} LIBC_O;
diff --git a/libc/libc.map.txt b/libc/libc.map.txt
index 3441406..afc1c86 100644
--- a/libc/libc.map.txt
+++ b/libc/libc.map.txt
@@ -1348,6 +1348,9 @@
__freading; # future
__fwriting; # future
getlogin_r; # future
+ iconv; # future
+ iconv_close; # future
+ iconv_open; # future
syncfs; # future
} LIBC_O;
diff --git a/libc/libc.mips.map b/libc/libc.mips.map
index 3b1ebce..cb37ab2 100644
--- a/libc/libc.mips.map
+++ b/libc/libc.mips.map
@@ -1307,6 +1307,9 @@
__freading; # future
__fwriting; # future
getlogin_r; # future
+ iconv; # future
+ iconv_close; # future
+ iconv_open; # future
syncfs; # future
} LIBC_O;
diff --git a/libc/libc.mips64.map b/libc/libc.mips64.map
index 8dd8f34..7088ada 100644
--- a/libc/libc.mips64.map
+++ b/libc/libc.mips64.map
@@ -1243,6 +1243,9 @@
__freading; # future
__fwriting; # future
getlogin_r; # future
+ iconv; # future
+ iconv_close; # future
+ iconv_open; # future
syncfs; # future
} LIBC_O;
diff --git a/libc/libc.x86.map b/libc/libc.x86.map
index 34370ba..567a5bf 100644
--- a/libc/libc.x86.map
+++ b/libc/libc.x86.map
@@ -1305,6 +1305,9 @@
__freading; # future
__fwriting; # future
getlogin_r; # future
+ iconv; # future
+ iconv_close; # future
+ iconv_open; # future
syncfs; # future
} LIBC_O;
diff --git a/libc/libc.x86_64.map b/libc/libc.x86_64.map
index 8dd8f34..7088ada 100644
--- a/libc/libc.x86_64.map
+++ b/libc/libc.x86_64.map
@@ -1243,6 +1243,9 @@
__freading; # future
__fwriting; # future
getlogin_r; # future
+ iconv; # future
+ iconv_close; # future
+ iconv_open; # future
syncfs; # future
} LIBC_O;
diff --git a/libc/private/icu.h b/libc/private/icu.h
index ae253fa..a671e98 100644
--- a/libc/private/icu.h
+++ b/libc/private/icu.h
@@ -33,6 +33,9 @@
#include <wchar.h>
typedef int8_t UBool;
+#define FALSE 0
+#define TRUE 1
+
typedef int32_t UChar32;
enum UProperty {