Implement wcwidth(3) in terms of icu4c.

Based on com.google.i18n.CharWidth by the icu-team folks.

Bug: N/A
Test: ran tests
Change-Id: Ia9d818ec4ae60f2f3978533195330d00699397c5
diff --git a/libc/Android.bp b/libc/Android.bp
index d9b8a2b..9101155 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1446,6 +1446,7 @@
         "bionic/getpriority.cpp",
         "bionic/gettid.cpp",
         "bionic/grp_pwd.cpp",
+        "bionic/icu_wrappers.cpp",
         "bionic/ifaddrs.cpp",
         "bionic/inotify_init.cpp",
         "bionic/ioctl.cpp",
@@ -1553,6 +1554,7 @@
         "bionic/wchar_l.cpp",
         "bionic/wcstod.cpp",
         "bionic/wctype.cpp",
+        "bionic/wcwidth.cpp",
         "bionic/wmempcpy.cpp",
     ],
 
diff --git a/libc/bionic/icu_wrappers.cpp b/libc/bionic/icu_wrappers.cpp
new file mode 100644
index 0000000..d9f2745
--- /dev/null
+++ b/libc/bionic/icu_wrappers.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "private/icu.h"
+
+int8_t __icu_charType(wint_t wc) {
+  typedef int8_t (*u_charType_t)(UChar32);
+  static auto u_charType = reinterpret_cast<u_charType_t>(__find_icu_symbol("u_charType"));
+  return u_charType ? u_charType(wc) : -1;
+}
+
+int32_t __icu_getIntPropertyValue(wint_t wc, UProperty property) {
+  typedef int32_t (*u_getIntPropertyValue_t)(UChar32, UProperty);
+  static auto u_getIntPropertyValue =
+      reinterpret_cast<u_getIntPropertyValue_t>(__find_icu_symbol("u_getIntPropertyValue"));
+  return u_getIntPropertyValue ? u_getIntPropertyValue(wc, property) : 0;
+}
+
+bool __icu_hasBinaryProperty(wint_t wc, UProperty property, int (*fallback)(int)) {
+  typedef UBool (*u_hasBinaryProperty_t)(UChar32, UProperty);
+  static auto u_hasBinaryProperty =
+      reinterpret_cast<u_hasBinaryProperty_t>(__find_icu_symbol("u_hasBinaryProperty"));
+  return u_hasBinaryProperty ? u_hasBinaryProperty(wc, property) : fallback(wc);
+}
diff --git a/libc/bionic/wctype.cpp b/libc/bionic/wctype.cpp
index 8e2acef..05b6e19 100644
--- a/libc/bionic/wctype.cpp
+++ b/libc/bionic/wctype.cpp
@@ -53,12 +53,6 @@
   WC_TYPE_MAX
 };
 
-static bool __icu_hasBinaryProperty(wint_t wc, UProperty property, int (*fallback)(int)) {
-  typedef UBool (*FnT)(UChar32, UProperty);
-  static auto u_hasBinaryProperty = reinterpret_cast<FnT>(__find_icu_symbol("u_hasBinaryProperty"));
-  return u_hasBinaryProperty ? u_hasBinaryProperty(wc, property) : fallback(wc);
-}
-
 int iswalnum(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_ALNUM, isalnum); }
 int iswalpha(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_ALPHABETIC, isalpha); }
 int iswblank(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_BLANK, isblank); }
@@ -155,10 +149,6 @@
   return wctype(property);
 }
 
-int wcwidth(wchar_t wc) {
-  return (wc > 0);
-}
-
 static wctrans_t wctrans_tolower = wctrans_t(1);
 static wctrans_t wctrans_toupper = wctrans_t(2);
 
diff --git a/libc/bionic/wcwidth.cpp b/libc/bionic/wcwidth.cpp
new file mode 100644
index 0000000..9676b5a
--- /dev/null
+++ b/libc/bionic/wcwidth.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <wchar.h>
+
+#include "private/icu.h"
+
+int wcwidth(wchar_t wc) {
+  // Fast-path ASCII.
+  if (wc >= 0x20 && wc < 0x7f) return 1;
+
+  // ASCII NUL is a special case.
+  if (wc == 0) return 0;
+
+  // C0.
+  if (wc < ' ' || (wc >= 0x7f && wc <= 0xa0)) return -1;
+
+  // Now for the i18n part. This isn't defined or standardized, so a lot of the choices are
+  // pretty arbitrary. See https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c for more details.
+
+  // Fancy unicode control characters?
+  switch (__icu_charType(wc)) {
+   case -1:
+    // No icu4c available; give up.
+    return -1;
+   case U_CONTROL_CHAR:
+    return -1;
+   case U_NON_SPACING_MARK:
+   case U_ENCLOSING_MARK:
+   case U_FORMAT_CHAR:
+    return 0;
+  }
+  if (__icu_hasBinaryProperty(wc, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, nullptr)) return 0;
+
+  // Medial and final jamo render as zero width when used correctly.
+  switch (__icu_getIntPropertyValue(wc, UCHAR_HANGUL_SYLLABLE_TYPE)) {
+   case U_HST_VOWEL_JAMO:
+   case U_HST_TRAILING_JAMO:
+    return 0;
+   case U_HST_LEADING_JAMO:
+   case U_HST_LV_SYLLABLE:
+   case U_HST_LVT_SYLLABLE:
+    return 2;
+  }
+
+  if (wc >= 0x3248 && wc <= 0x4dff) {
+    // Circled two-digit CJK "speed sign" numbers. EastAsianWidth is ambiguous,
+    // but wide makes more sense.
+    if (wc <= 0x324f) return 2;
+    // Hexagrams. EastAsianWidth is neutral, but wide seems better.
+    if (wc >= 0x4dc0) return 2;
+  }
+
+  // The EastAsianWidth property is at least defined by the Unicode standard!
+  switch (__icu_getIntPropertyValue(wc, UCHAR_EAST_ASIAN_WIDTH)) {
+   case U_EA_AMBIGUOUS:
+   case U_EA_HALFWIDTH:
+   case U_EA_NARROW:
+   case U_EA_NEUTRAL:
+    return 1;
+   case U_EA_FULLWIDTH:
+   case U_EA_WIDE:
+    return 2;
+  }
+
+  return 0;
+}
diff --git a/libc/private/icu.h b/libc/private/icu.h
index 03fdf66..ae253fa 100644
--- a/libc/private/icu.h
+++ b/libc/private/icu.h
@@ -30,12 +30,14 @@
 #define _PRIVATE_ICU_H
 
 #include <stdint.h>
+#include <wchar.h>
 
 typedef int8_t UBool;
 typedef int32_t UChar32;
 
 enum UProperty {
   UCHAR_ALPHABETIC = 0,
+  UCHAR_DEFAULT_IGNORABLE_CODE_POINT = 5,
   UCHAR_LOWERCASE = 22,
   UCHAR_POSIX_ALNUM = 44,
   UCHAR_POSIX_BLANK = 45,
@@ -44,12 +46,39 @@
   UCHAR_POSIX_XDIGIT = 48,
   UCHAR_UPPERCASE = 30,
   UCHAR_WHITE_SPACE = 31,
+  UCHAR_EAST_ASIAN_WIDTH = 0x1004,
+  UCHAR_HANGUL_SYLLABLE_TYPE = 0x100b,
 };
 
 enum UCharCategory {
+  U_NON_SPACING_MARK = 6,
+  U_ENCLOSING_MARK = 7,
   U_CONTROL_CHAR = 15,
+  U_FORMAT_CHAR = 16,
 };
 
+enum UEastAsianWidth {
+  U_EA_NEUTRAL,
+  U_EA_AMBIGUOUS,
+  U_EA_HALFWIDTH,
+  U_EA_FULLWIDTH,
+  U_EA_NARROW,
+  U_EA_WIDE,
+};
+
+enum UHangulSyllableType {
+  U_HST_NOT_APPLICABLE,
+  U_HST_LEADING_JAMO,
+  U_HST_VOWEL_JAMO,
+  U_HST_TRAILING_JAMO,
+  U_HST_LV_SYLLABLE,
+  U_HST_LVT_SYLLABLE,
+};
+
+int8_t __icu_charType(wint_t wc);
+int32_t __icu_getIntPropertyValue(wint_t wc, UProperty property);
+bool __icu_hasBinaryProperty(wint_t wc, UProperty property, int (*fallback)(int));
+
 void* __find_icu_symbol(const char* symbol_name);
 
 #endif  // _PRIVATE_ICU_H