Use icu4c to implement <wctype.h>.

Pretty useless, because the POSIX APIs are useless for actually
internationalization, but it lets us put this to bed for good.

Bug: http://b/18492914
Test: bionic tests
Change-Id: I4dd0aff66c44b5547039be3ffea806c865b9014a
diff --git a/libc/bionic/icu.cpp b/libc/bionic/icu.cpp
new file mode 100644
index 0000000..abc0eec
--- /dev/null
+++ b/libc/bionic/icu.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "private/icu.h"
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <stdlib.h>
+
+#include "private/libc_logging.h"
+
+// Allowed icu4c version numbers are in the range [44, 999].
+// Gingerbread's icu4c 4.4 is the minimum supported ICU version.
+static constexpr auto ICUDATA_VERSION_MIN_LENGTH = 2;
+static constexpr auto ICUDATA_VERSION_MAX_LENGTH = 3;
+static constexpr auto ICUDATA_VERSION_MIN = 44;
+
+static char g_icudata_version[ICUDATA_VERSION_MAX_LENGTH + 1];
+
+static void* g_libicuuc_handle = nullptr;
+
+static int __icu_dat_file_filter(const dirent* dirp) {
+  const char* name = dirp->d_name;
+
+  // Is the name the right length to match 'icudt(\d\d\d)l.dat'?
+  const size_t len = strlen(name);
+  if (len < 10 + ICUDATA_VERSION_MIN_LENGTH || len > 10 + ICUDATA_VERSION_MAX_LENGTH) return 0;
+
+  return !strncmp(name, "icudt", 5) && !strncmp(&name[len - 5], "l.dat", 5);
+}
+
+static bool __find_icu() {
+  dirent** namelist = nullptr;
+  int n = scandir("/system/usr/icu", &namelist, &__icu_dat_file_filter, alphasort);
+  int max_version = -1;
+  while (n--) {
+    // We prefer the latest version available.
+    int version = atoi(&namelist[n]->d_name[strlen("icudt")]);
+    if (version != 0 && version > max_version) max_version = version;
+    free(namelist[n]);
+  }
+  free(namelist);
+
+  if (max_version == -1 || max_version < ICUDATA_VERSION_MIN) {
+    __libc_write_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't find an ICU .dat file");
+    return false;
+  }
+
+  snprintf(g_icudata_version, sizeof(g_icudata_version), "_%d", max_version);
+
+  g_libicuuc_handle = dlopen("libicuuc.so", RTLD_LOCAL);
+  if (g_libicuuc_handle == nullptr) {
+    __libc_format_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't open libicuuc.so: %s", dlerror());
+    return false;
+  }
+
+  return true;
+}
+
+void* __find_icu_symbol(const char* symbol_name) {
+  static bool found_icu = __find_icu();
+  if (!found_icu) return nullptr;
+
+  char versioned_symbol_name[strlen(symbol_name) + sizeof(g_icudata_version)];
+  snprintf(versioned_symbol_name, sizeof(versioned_symbol_name), "%s%s",
+           symbol_name, g_icudata_version);
+
+  void* symbol = dlsym(g_libicuuc_handle, versioned_symbol_name);
+  if (symbol == nullptr) {
+    __libc_format_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't find %s", versioned_symbol_name);
+  }
+  return symbol;
+}
diff --git a/libc/bionic/icu_static.cpp b/libc/bionic/icu_static.cpp
new file mode 100644
index 0000000..cf24a38
--- /dev/null
+++ b/libc/bionic/icu_static.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "private/icu.h"
+
+// We don't have dlopen/dlsym for static binaries yet.
+void* __find_icu_symbol(const char*) {
+  return nullptr;
+}
diff --git a/libc/bionic/wctype.cpp b/libc/bionic/wctype.cpp
index cd8c39b..64c3467 100644
--- a/libc/bionic/wctype.cpp
+++ b/libc/bionic/wctype.cpp
@@ -34,26 +34,53 @@
 #include <string.h>
 #include <wchar.h>
 
-// These functions are either defined to be the same as their ASCII cousins,
-// or defined in terms of other functions.
-int iswalnum(wint_t wc) { return iswdigit(wc) || iswalpha(wc); }
-int iswblank(wint_t wc) { return isblank(wc); }
-int iswdigit(wint_t wc) { return isdigit(wc); }
-int iswgraph(wint_t wc) { return !iswspace(wc) && iswprint(wc); }
-int iswlower(wint_t wc) {
-  return towlower(wc) == wc && !(iswcntrl(wc) || iswdigit(wc) || iswpunct(wc) || iswspace(wc));
-}
-int iswupper(wint_t wc) {
-  return towupper(wc) == wc && !(iswcntrl(wc) || iswdigit(wc) || iswpunct(wc) || iswspace(wc));
-}
-int iswxdigit(wint_t wc) { return isxdigit(wc); }
+#include "private/icu.h"
 
-// TODO: need proper implementations of these.
-int iswalpha(wint_t wc) { return isalpha(wc); }
-int iswcntrl(wint_t wc) { return iscntrl(wc); }
-int iswprint(wint_t wc) { return isprint(wc); }
-int iswpunct(wint_t wc) { return ispunct(wc); }
-int iswspace(wint_t wc) { return isspace(wc); }
+static constexpr int UCHAR_ALPHABETIC = 0;
+static constexpr int UCHAR_LOWERCASE = 22;
+static constexpr int UCHAR_POSIX_ALNUM = 44;
+static constexpr int UCHAR_POSIX_BLANK = 45;
+static constexpr int UCHAR_POSIX_GRAPH = 46;
+static constexpr int UCHAR_POSIX_PRINT = 47;
+static constexpr int UCHAR_POSIX_XDIGIT = 48;
+static constexpr int UCHAR_UPPERCASE = 30;
+static constexpr int UCHAR_WHITE_SPACE = 31;
+
+static constexpr int U_CONTROL_CHAR = 15;
+
+static bool __icu_hasBinaryProperty(wint_t wc, int property, int (*fallback)(int)) {
+  typedef int (*FnT)(wint_t, int);
+  static auto u_hasBinaryProperty = reinterpret_cast<FnT>(__find_icu_symbol("u_hasBinaryProperty"));
+  return u_hasBinaryProperty ? u_hasBinaryProperty(wc, property) : fallback(wc);
+}
+
+int iswalnum(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_ALNUM, isalnum); }
+int iswalpha(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_ALPHABETIC, isalpha); }
+int iswblank(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_BLANK, isblank); }
+int iswgraph(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_GRAPH, isgraph); }
+int iswlower(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_LOWERCASE, islower); }
+int iswprint(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_PRINT, isprint); }
+int iswspace(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_WHITE_SPACE, isspace); }
+int iswupper(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_UPPERCASE, isupper); }
+int iswxdigit(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_XDIGIT, isxdigit); }
+
+int iswcntrl(wint_t wc) {
+  typedef int (*FnT)(wint_t);
+  static auto u_charType = reinterpret_cast<FnT>(__find_icu_symbol("u_charType"));
+  return u_charType ? (u_charType(wc) == U_CONTROL_CHAR) : iscntrl(wc);
+}
+
+int iswdigit(wint_t wc) {
+  typedef int (*FnT)(wint_t);
+  static auto u_isdigit = reinterpret_cast<FnT>(__find_icu_symbol("u_isdigit"));
+  return u_isdigit ? u_isdigit(wc) : isdigit(wc);
+}
+
+int iswpunct(wint_t wc) {
+  typedef int (*FnT)(wint_t);
+  static auto u_ispunct = reinterpret_cast<FnT>(__find_icu_symbol("u_ispunct"));
+  return u_ispunct ? u_ispunct(wc) : ispunct(wc);
+}
 
 int iswalnum_l(wint_t c, locale_t) { return iswalnum(c); }
 int iswalpha_l(wint_t c, locale_t) { return iswalpha(c); }
@@ -90,12 +117,20 @@
   return iswctype(wc, char_class);
 }
 
-// TODO: need proper implementations of these.
-wint_t towlower(wint_t wc) { return tolower(wc); }
-wint_t towupper(wint_t wc) { return toupper(wc); }
+wint_t towlower(wint_t wc) {
+  typedef wchar_t (*FnT)(wchar_t);
+  static auto u_tolower = reinterpret_cast<FnT>(__find_icu_symbol("u_tolower"));
+  return u_tolower ? u_tolower(wc) : tolower(wc);
+}
 
-wint_t towupper_l(int c, locale_t) { return towupper(c); }
-wint_t towlower_l(int c, locale_t) { return towlower(c); }
+wint_t towupper(wint_t wc) {
+  typedef wchar_t (*FnT)(wchar_t);
+  static auto u_toupper = reinterpret_cast<FnT>(__find_icu_symbol("u_toupper"));
+  return u_toupper ? u_toupper(wc) : toupper(wc);
+}
+
+wint_t towupper_l(wint_t c, locale_t) { return towupper(c); }
+wint_t towlower_l(wint_t c, locale_t) { return towlower(c); }
 
 wctype_t wctype(const char* property) {
   static const char* const  properties[WC_TYPE_MAX] = {