<wctype.h>: de-pessimize the isw*() functions.
Add the missing ASCII fast paths, ensure that they and the "no icu" fallback both use inline code rather than another function call to the non-wide variant of the function, and in passing fix a bug in wcwidth() where we were passing a null pointer as a fallback function by just inlining the u_hasBinaryProperty() lookup in the two files that use it (rather than leave the footgun lying around after it's already gone off once).
Change-Id: Id020f2d4003392e8716ca82ae40d54c982db382a
diff --git a/libc/bionic/icu_wrappers.cpp b/libc/bionic/icu_wrappers.cpp
index d9f2745..523f5a6 100644
--- a/libc/bionic/icu_wrappers.cpp
+++ b/libc/bionic/icu_wrappers.cpp
@@ -40,10 +40,3 @@
reinterpret_cast<u_getIntPropertyValue_t>(__find_icu_symbol("u_getIntPropertyValue"));
return u_getIntPropertyValue ? u_getIntPropertyValue(wc, property) : 0;
}
-
-bool __icu_hasBinaryProperty(wint_t wc, UProperty property, int (*fallback)(int)) {
- typedef UBool (*u_hasBinaryProperty_t)(UChar32, UProperty);
- static auto u_hasBinaryProperty =
- reinterpret_cast<u_hasBinaryProperty_t>(__find_icu_symbol("u_hasBinaryProperty"));
- return u_hasBinaryProperty ? u_hasBinaryProperty(wc, property) : fallback(wc);
-}
diff --git a/libc/bionic/wctype.cpp b/libc/bionic/wctype.cpp
index 481197a..94597d9 100644
--- a/libc/bionic/wctype.cpp
+++ b/libc/bionic/wctype.cpp
@@ -54,26 +54,41 @@
WC_TYPE_MAX
};
-int iswalnum(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_ALNUM, isalnum); }
+static u_hasBinaryProperty_t __find_u_hasBinaryProperty() {
+ static auto u_hasBinaryProperty =
+ reinterpret_cast<u_hasBinaryProperty_t>(__find_icu_symbol("u_hasBinaryProperty"));
+ return u_hasBinaryProperty;
+}
+
+#define DO_ISW(icu_constant, narrow_fn) \
+ u_hasBinaryProperty_t u_hasBinaryProperty; \
+ if (__predict_true(wc < 0x80) || \
+ !(u_hasBinaryProperty = __find_u_hasBinaryProperty())) { \
+ return narrow_fn(wc); \
+ } \
+ return u_hasBinaryProperty(wc, icu_constant); \
+
+int iswalnum(wint_t wc) { DO_ISW(UCHAR_POSIX_ALNUM, isalnum); }
__strong_alias(iswalnum_l, iswalnum);
-int iswalpha(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_ALPHABETIC, isalpha); }
+int iswalpha(wint_t wc) { DO_ISW(UCHAR_ALPHABETIC, isalpha); }
__strong_alias(iswalpha_l, iswalpha);
-int iswblank(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_BLANK, isblank); }
+int iswblank(wint_t wc) { DO_ISW(UCHAR_POSIX_BLANK, isblank); }
__strong_alias(iswblank_l, iswblank);
-int iswgraph(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_GRAPH, isgraph); }
+int iswgraph(wint_t wc) { DO_ISW(UCHAR_POSIX_GRAPH, isgraph); }
__strong_alias(iswgraph_l, iswgraph);
-int iswlower(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_LOWERCASE, islower); }
+int iswlower(wint_t wc) { DO_ISW(UCHAR_LOWERCASE, islower); }
__strong_alias(iswlower_l, iswlower);
-int iswprint(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_PRINT, isprint); }
+int iswprint(wint_t wc) { DO_ISW(UCHAR_POSIX_PRINT, isprint); }
__strong_alias(iswprint_l, iswprint);
-int iswspace(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_WHITE_SPACE, isspace); }
+int iswspace(wint_t wc) { DO_ISW(UCHAR_WHITE_SPACE, isspace); }
__strong_alias(iswspace_l, iswspace);
-int iswupper(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_UPPERCASE, isupper); }
+int iswupper(wint_t wc) { DO_ISW(UCHAR_UPPERCASE, isupper); }
__strong_alias(iswupper_l, iswupper);
-int iswxdigit(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_XDIGIT, isxdigit); }
+int iswxdigit(wint_t wc) { DO_ISW(UCHAR_POSIX_XDIGIT, isxdigit); }
__strong_alias(iswxdigit_l, iswxdigit);
int iswcntrl(wint_t wc) {
+ if (wc < 0x80) return iscntrl(wc);
typedef int8_t (*FnT)(UChar32);
static auto u_charType = reinterpret_cast<FnT>(__find_icu_symbol("u_charType"));
return u_charType ? (u_charType(wc) == U_CONTROL_CHAR) : iscntrl(wc);
@@ -81,6 +96,7 @@
__strong_alias(iswcntrl_l, iswcntrl);
int iswdigit(wint_t wc) {
+ if (wc < 0x80) return isdigit(wc);
typedef UBool (*FnT)(UChar32);
static auto u_isdigit = reinterpret_cast<FnT>(__find_icu_symbol("u_isdigit"));
return u_isdigit ? u_isdigit(wc) : isdigit(wc);
@@ -88,6 +104,7 @@
__strong_alias(iswdigit_l, iswdigit);
int iswpunct(wint_t wc) {
+ if (wc < 0x80) return ispunct(wc);
typedef UBool (*FnT)(UChar32);
static auto u_ispunct = reinterpret_cast<FnT>(__find_icu_symbol("u_ispunct"));
return u_ispunct ? u_ispunct(wc) : ispunct(wc);
diff --git a/libc/bionic/wcwidth.cpp b/libc/bionic/wcwidth.cpp
index 4582ef7..776321f 100644
--- a/libc/bionic/wcwidth.cpp
+++ b/libc/bionic/wcwidth.cpp
@@ -73,7 +73,9 @@
// Hangeul choseong filler U+115F is default ignorable, so we check default
// ignorability only after we've already handled Hangeul jamo above.
- if (__icu_hasBinaryProperty(wc, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, nullptr)) return 0;
+ static auto u_hasBinaryProperty =
+ reinterpret_cast<u_hasBinaryProperty_t>(__find_icu_symbol("u_hasBinaryProperty"));
+ if (u_hasBinaryProperty && u_hasBinaryProperty(wc, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) return 0;
// A few weird special cases where EastAsianWidth is not helpful for us.
if (wc >= 0x3248 && wc <= 0x4dff) {
diff --git a/libc/private/icu.h b/libc/private/icu.h
index a671e98..8e4aa80 100644
--- a/libc/private/icu.h
+++ b/libc/private/icu.h
@@ -80,7 +80,8 @@
int8_t __icu_charType(wint_t wc);
int32_t __icu_getIntPropertyValue(wint_t wc, UProperty property);
-bool __icu_hasBinaryProperty(wint_t wc, UProperty property, int (*fallback)(int));
+
+typedef UBool (*u_hasBinaryProperty_t)(UChar32, UProperty);
void* __find_icu_symbol(const char* symbol_name);