Revert "Replace usage of ICU4C in bionic with ICU4X"
Revert submission 3511934-ICU4X-in-Bionic_2
Reason for revert: DroidMonitor: Culprit for http://b/400532776 - verifying through ABTD before revert submission. This is part of the standard investigation process, and does not mean your CL will be reverted.
Reverted changes: /q/submissionid:3511934-ICU4X-in-Bionic_2
Change-Id: I936ee9f33dee4cfbd849b0066ea7765f3e8b254a
diff --git a/libc/Android.bp b/libc/Android.bp
index e6f69c2..b5ff680 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -790,53 +790,6 @@
}
// ========================================================
-// icu4x_bionic.a - Thin Rust wrapper around ICU4X
-// ========================================================
-
-rust_ffi_static {
- name: "libicu4x_bionic",
- crate_name: "icu4x_bionic",
- crate_root: "bionic/icu4x.rs",
- edition: "2021",
- features: [],
- rustlibs: [
- "//external/rust/android-crates-io/crates/icu_casemap:libicu_casemap",
- "//external/rust/android-crates-io/crates/icu_collections:libicu_collections",
- "//external/rust/android-crates-io/crates/icu_properties:libicu_properties",
- ],
- apex_available: [
- "//apex_available:platform",
- "//apex_available:anyapex",
- ],
- vendor_available: true,
- product_available: true,
- ramdisk_available: true,
- vendor_ramdisk_available: true,
- recovery_available: true,
- native_bridge_supported: true,
- sdk_version: "minimum",
- defaults: ["linux_bionic_supported"],
-}
-
-// current rust implementation detail; will be removed as part of a larger cleanup later
-// go/android-mto-staticlibs-in-make
-cc_rustlibs_for_make {
- name: "libstatic_rustlibs_for_make",
- whole_static_libs: ["libicu4x_bionic"],
- apex_available: [
- "//apex_available:platform",
- "//apex_available:anyapex",
- ],
- vendor_available: true,
- product_available: true,
- ramdisk_available: true,
- vendor_ramdisk_available: true,
- recovery_available: true,
- native_bridge_supported: true,
- defaults: ["linux_bionic_supported"],
-}
-
-// ========================================================
// libc_bionic.a - home-grown C library code
// ========================================================
@@ -917,6 +870,7 @@
"bionic/grp_pwd_file.cpp",
"bionic/heap_zero_init.cpp",
"bionic/iconv.cpp",
+ "bionic/icu_wrappers.cpp",
"bionic/ifaddrs.cpp",
"bionic/inotify_init.cpp",
"bionic/ioctl.cpp",
@@ -1228,7 +1182,6 @@
whole_static_libs: [
"//external/llvm-libc:llvmlibc",
"libsystemproperties",
- "libicu4x_bionic",
],
cppflags: ["-Wold-style-cast"],
@@ -1480,6 +1433,7 @@
"bionic/android_mallopt.cpp",
"bionic/gwp_asan_wrappers.cpp",
"bionic/heap_tagging.cpp",
+ "bionic/icu.cpp",
"bionic/malloc_common.cpp",
"bionic/malloc_common_dynamic.cpp",
"bionic/android_profiling_dynamic.cpp",
@@ -1498,6 +1452,7 @@
"bionic/android_mallopt.cpp",
"bionic/gwp_asan_wrappers.cpp",
"bionic/heap_tagging.cpp",
+ "bionic/icu_static.cpp",
"bionic/malloc_common.cpp",
"bionic/malloc_limit.cpp",
],
diff --git a/libc/bionic/icu.cpp b/libc/bionic/icu.cpp
new file mode 100644
index 0000000..c11b9d6
--- /dev/null
+++ b/libc/bionic/icu.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "private/icu.h"
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <async_safe/log.h>
+
+static void* g_libicu_handle = nullptr;
+
+static bool __find_icu() {
+ g_libicu_handle = dlopen("libicu.so", RTLD_LOCAL);
+ if (g_libicu_handle == nullptr) {
+ async_safe_format_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't open libicu.so: %s",
+ dlerror());
+ return false;
+ }
+
+ return true;
+}
+
+void* __find_icu_symbol(const char* symbol_name) {
+ static bool found_icu = __find_icu();
+ if (!found_icu) return nullptr;
+
+ void* symbol = dlsym(g_libicu_handle, symbol_name);
+ if (symbol == nullptr) {
+ async_safe_format_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't find %s", symbol_name);
+ }
+ return symbol;
+}
diff --git a/libc/bionic/icu4x.rs b/libc/bionic/icu4x.rs
deleted file mode 100644
index 939ba2f..0000000
--- a/libc/bionic/icu4x.rs
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright (C) 2025 The Android Open Source Project
-// SPDX-License-Identifier: Apache-2.0
-
-#![allow(missing_docs)] // Not particularly useful to document these thin wrappers
-
-//! This is a thin wrapper around ICU4X for use in Bionic
-
-use icu_casemap::CaseMapper;
-use icu_collections::codepointtrie::TrieValue;
-use icu_properties::props::*;
-use icu_properties::{CodePointMapData, CodePointSetData};
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_general_category(ch: u32) -> u8 {
- CodePointMapData::<GeneralCategory>::new().get32(ch) as u8
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_east_asian_width(ch: u32) -> u8 {
- CodePointMapData::<EastAsianWidth>::new().get32(ch).to_u32() as u8
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_hangul_syllable_type(ch: u32) -> u8 {
- CodePointMapData::<HangulSyllableType>::new().get32(ch).to_u32() as u8
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_alphabetic(ch: u32) -> bool {
- CodePointSetData::new::<Alphabetic>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_default_ignorable_code_point(ch: u32) -> bool {
- CodePointSetData::new::<DefaultIgnorableCodePoint>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_lowercase(ch: u32) -> bool {
- CodePointSetData::new::<Lowercase>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_alnum(ch: u32) -> bool {
- CodePointSetData::new::<Alnum>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_blank(ch: u32) -> bool {
- CodePointSetData::new::<Blank>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_graph(ch: u32) -> bool {
- CodePointSetData::new::<Graph>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_print(ch: u32) -> bool {
- CodePointSetData::new::<Print>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_xdigit(ch: u32) -> bool {
- CodePointSetData::new::<Xdigit>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_white_space(ch: u32) -> bool {
- CodePointSetData::new::<WhiteSpace>().contains32(ch)
-}
-
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_is_uppercase(ch: u32) -> bool {
- CodePointSetData::new::<Uppercase>().contains32(ch)
-}
-
-/// Convert a code point to uppercase
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_to_upper(ch: u32) -> u32 {
- let Ok(ch) = char::try_from(ch) else {
- return ch;
- };
- let cm = CaseMapper::new();
-
- cm.simple_uppercase(ch) as u32
-}
-
-/// Convert a code point to lowercase
-#[no_mangle]
-pub extern "C" fn __icu4x_bionic_to_lower(ch: u32) -> u32 {
- let Ok(ch) = char::try_from(ch) else {
- return ch;
- };
- let cm = CaseMapper::new();
-
- cm.simple_lowercase(ch) as u32
-}
diff --git a/libc/bionic/icu_static.cpp b/libc/bionic/icu_static.cpp
new file mode 100644
index 0000000..cf24a38
--- /dev/null
+++ b/libc/bionic/icu_static.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "private/icu.h"
+
+// We don't have dlopen/dlsym for static binaries yet.
+void* __find_icu_symbol(const char*) {
+ return nullptr;
+}
diff --git a/libc/bionic/icu_wrappers.cpp b/libc/bionic/icu_wrappers.cpp
new file mode 100644
index 0000000..523f5a6
--- /dev/null
+++ b/libc/bionic/icu_wrappers.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "private/icu.h"
+
+int8_t __icu_charType(wint_t wc) {
+ typedef int8_t (*u_charType_t)(UChar32);
+ static auto u_charType = reinterpret_cast<u_charType_t>(__find_icu_symbol("u_charType"));
+ return u_charType ? u_charType(wc) : -1;
+}
+
+int32_t __icu_getIntPropertyValue(wint_t wc, UProperty property) {
+ typedef int32_t (*u_getIntPropertyValue_t)(UChar32, UProperty);
+ static auto u_getIntPropertyValue =
+ reinterpret_cast<u_getIntPropertyValue_t>(__find_icu_symbol("u_getIntPropertyValue"));
+ return u_getIntPropertyValue ? u_getIntPropertyValue(wc, property) : 0;
+}
diff --git a/libc/bionic/wctype.cpp b/libc/bionic/wctype.cpp
index 8d0733d..94597d9 100644
--- a/libc/bionic/wctype.cpp
+++ b/libc/bionic/wctype.cpp
@@ -35,7 +35,7 @@
#include <wchar.h>
#include "bionic/macros.h"
-#include "private/icu4x.h"
+#include "private/icu.h"
enum {
WC_TYPE_INVALID = 0,
@@ -54,65 +54,60 @@
WC_TYPE_MAX
};
-#define DO_ISW(prop_name, narrow_fn) \
- if (__predict_true(wc < 0x80)) { \
- return narrow_fn(wc); \
- } \
- return __icu4x_bionic_is_##prop_name(wc);
+static u_hasBinaryProperty_t __find_u_hasBinaryProperty() {
+ static auto u_hasBinaryProperty =
+ reinterpret_cast<u_hasBinaryProperty_t>(__find_icu_symbol("u_hasBinaryProperty"));
+ return u_hasBinaryProperty;
+}
-int iswalnum(wint_t wc) {
- DO_ISW(alnum, isalnum);
-}
+#define DO_ISW(icu_constant, narrow_fn) \
+ u_hasBinaryProperty_t u_hasBinaryProperty; \
+ if (__predict_true(wc < 0x80) || \
+ !(u_hasBinaryProperty = __find_u_hasBinaryProperty())) { \
+ return narrow_fn(wc); \
+ } \
+ return u_hasBinaryProperty(wc, icu_constant); \
+
+int iswalnum(wint_t wc) { DO_ISW(UCHAR_POSIX_ALNUM, isalnum); }
__strong_alias(iswalnum_l, iswalnum);
-int iswalpha(wint_t wc) {
- DO_ISW(alphabetic, isalpha);
-}
+int iswalpha(wint_t wc) { DO_ISW(UCHAR_ALPHABETIC, isalpha); }
__strong_alias(iswalpha_l, iswalpha);
-int iswblank(wint_t wc) {
- DO_ISW(blank, isblank);
-}
+int iswblank(wint_t wc) { DO_ISW(UCHAR_POSIX_BLANK, isblank); }
__strong_alias(iswblank_l, iswblank);
-int iswgraph(wint_t wc) {
- DO_ISW(graph, isgraph);
-}
+int iswgraph(wint_t wc) { DO_ISW(UCHAR_POSIX_GRAPH, isgraph); }
__strong_alias(iswgraph_l, iswgraph);
-int iswlower(wint_t wc) {
- DO_ISW(lowercase, islower);
-}
+int iswlower(wint_t wc) { DO_ISW(UCHAR_LOWERCASE, islower); }
__strong_alias(iswlower_l, iswlower);
-int iswprint(wint_t wc) {
- DO_ISW(print, isprint);
-}
+int iswprint(wint_t wc) { DO_ISW(UCHAR_POSIX_PRINT, isprint); }
__strong_alias(iswprint_l, iswprint);
-int iswspace(wint_t wc) {
- DO_ISW(white_space, isspace);
-}
+int iswspace(wint_t wc) { DO_ISW(UCHAR_WHITE_SPACE, isspace); }
__strong_alias(iswspace_l, iswspace);
-int iswupper(wint_t wc) {
- DO_ISW(uppercase, isupper);
-}
+int iswupper(wint_t wc) { DO_ISW(UCHAR_UPPERCASE, isupper); }
__strong_alias(iswupper_l, iswupper);
-int iswxdigit(wint_t wc) {
- DO_ISW(xdigit, isxdigit);
-}
+int iswxdigit(wint_t wc) { DO_ISW(UCHAR_POSIX_XDIGIT, isxdigit); }
__strong_alias(iswxdigit_l, iswxdigit);
int iswcntrl(wint_t wc) {
if (wc < 0x80) return iscntrl(wc);
- return __icu4x_bionic_general_category(wc) == U_CONTROL_CHAR;
+ typedef int8_t (*FnT)(UChar32);
+ static auto u_charType = reinterpret_cast<FnT>(__find_icu_symbol("u_charType"));
+ return u_charType ? (u_charType(wc) == U_CONTROL_CHAR) : iscntrl(wc);
}
__strong_alias(iswcntrl_l, iswcntrl);
int iswdigit(wint_t wc) {
if (wc < 0x80) return isdigit(wc);
- return __icu4x_bionic_general_category(wc) == U_DECIMAL_NUMBER;
+ typedef UBool (*FnT)(UChar32);
+ static auto u_isdigit = reinterpret_cast<FnT>(__find_icu_symbol("u_isdigit"));
+ return u_isdigit ? u_isdigit(wc) : isdigit(wc);
}
__strong_alias(iswdigit_l, iswdigit);
int iswpunct(wint_t wc) {
if (wc < 0x80) return ispunct(wc);
- int8_t chartype = __icu4x_bionic_general_category(wc);
- return chartype >= U_DASH_PUNCTUATION && chartype <= U_OTHER_PUNCTUATION;
+ typedef UBool (*FnT)(UChar32);
+ static auto u_ispunct = reinterpret_cast<FnT>(__find_icu_symbol("u_ispunct"));
+ return u_ispunct ? u_ispunct(wc) : ispunct(wc);
}
__strong_alias(iswpunct_l, iswpunct);
@@ -129,14 +124,18 @@
wint_t towlower(wint_t wc) {
if (wc < 0x80) return tolower(wc);
- return __icu4x_bionic_to_lower(wc);
+ typedef UChar32 (*FnT)(UChar32);
+ static auto u_tolower = reinterpret_cast<FnT>(__find_icu_symbol("u_tolower"));
+ return u_tolower ? u_tolower(wc) : tolower(wc);
}
__strong_alias(towlower_l, towlower);
wint_t towupper(wint_t wc) {
if (wc < 0x80) return toupper(wc);
- return __icu4x_bionic_to_upper(wc);
+ typedef UChar32 (*FnT)(UChar32);
+ static auto u_toupper = reinterpret_cast<FnT>(__find_icu_symbol("u_toupper"));
+ return u_toupper ? u_toupper(wc) : toupper(wc);
}
__strong_alias(towupper_l, towupper);
diff --git a/libc/bionic/wcwidth.cpp b/libc/bionic/wcwidth.cpp
index 633d83e..776321f 100644
--- a/libc/bionic/wcwidth.cpp
+++ b/libc/bionic/wcwidth.cpp
@@ -28,7 +28,7 @@
#include <wchar.h>
-#include "private/icu4x.h"
+#include "private/icu.h"
int wcwidth(wchar_t wc) {
// Fast-path ASCII.
@@ -44,33 +44,38 @@
// pretty arbitrary. See https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c for more details.
// Fancy unicode control characters?
- switch (__icu4x_bionic_general_category(wc)) {
- case U_CONTROL_CHAR:
- return -1;
- case U_NON_SPACING_MARK:
- case U_ENCLOSING_MARK:
- return 0;
- case U_FORMAT_CHAR:
- // A special case for soft hyphen (U+00AD) to match historical practice.
- // See the tests for more commentary.
- return (wc == 0x00ad) ? 1 : 0;
+ switch (__icu_charType(wc)) {
+ case -1:
+ // No icu4c available; give up.
+ return -1;
+ case U_CONTROL_CHAR:
+ return -1;
+ case U_NON_SPACING_MARK:
+ case U_ENCLOSING_MARK:
+ return 0;
+ case U_FORMAT_CHAR:
+ // A special case for soft hyphen (U+00AD) to match historical practice.
+ // See the tests for more commentary.
+ return (wc == 0x00ad) ? 1 : 0;
}
// Medial and final jamo render as zero width when used correctly,
// so we handle them specially rather than relying on East Asian Width.
- switch (__icu4x_bionic_hangul_syllable_type(wc)) {
- case U_HST_VOWEL_JAMO:
- case U_HST_TRAILING_JAMO:
- return 0;
- case U_HST_LEADING_JAMO:
- case U_HST_LV_SYLLABLE:
- case U_HST_LVT_SYLLABLE:
- return 2;
+ switch (__icu_getIntPropertyValue(wc, UCHAR_HANGUL_SYLLABLE_TYPE)) {
+ case U_HST_VOWEL_JAMO:
+ case U_HST_TRAILING_JAMO:
+ return 0;
+ case U_HST_LEADING_JAMO:
+ case U_HST_LV_SYLLABLE:
+ case U_HST_LVT_SYLLABLE:
+ return 2;
}
// Hangeul choseong filler U+115F is default ignorable, so we check default
// ignorability only after we've already handled Hangeul jamo above.
- if (__icu4x_bionic_is_default_ignorable_code_point(wc)) return 0;
+ static auto u_hasBinaryProperty =
+ reinterpret_cast<u_hasBinaryProperty_t>(__find_icu_symbol("u_hasBinaryProperty"));
+ if (u_hasBinaryProperty && u_hasBinaryProperty(wc, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) return 0;
// A few weird special cases where EastAsianWidth is not helpful for us.
if (wc >= 0x3248 && wc <= 0x4dff) {
@@ -83,15 +88,15 @@
// The EastAsianWidth property is at least defined by the Unicode standard!
// https://www.unicode.org/reports/tr11/
- switch (__icu4x_bionic_east_asian_width(wc)) {
- case U_EA_AMBIGUOUS:
- case U_EA_HALFWIDTH:
- case U_EA_NARROW:
- case U_EA_NEUTRAL:
- return 1;
- case U_EA_FULLWIDTH:
- case U_EA_WIDE:
- return 2;
+ switch (__icu_getIntPropertyValue(wc, UCHAR_EAST_ASIAN_WIDTH)) {
+ case U_EA_AMBIGUOUS:
+ case U_EA_HALFWIDTH:
+ case U_EA_NARROW:
+ case U_EA_NEUTRAL:
+ return 1;
+ case U_EA_FULLWIDTH:
+ case U_EA_WIDE:
+ return 2;
}
return 0;
diff --git a/libc/private/icu4x.h b/libc/private/icu.h
similarity index 68%
rename from libc/private/icu4x.h
rename to libc/private/icu.h
index 8b7e1d0..8e4aa80 100644
--- a/libc/private/icu4x.h
+++ b/libc/private/icu.h
@@ -26,20 +26,38 @@
* SUCH DAMAGE.
*/
-#pragma once
+#ifndef _PRIVATE_ICU_H
+#define _PRIVATE_ICU_H
-#include <ctype.h>
#include <stdint.h>
#include <wchar.h>
+typedef int8_t UBool;
+#define FALSE 0
+#define TRUE 1
+
+typedef int32_t UChar32;
+
+enum UProperty {
+ UCHAR_ALPHABETIC = 0,
+ UCHAR_DEFAULT_IGNORABLE_CODE_POINT = 5,
+ UCHAR_LOWERCASE = 22,
+ UCHAR_POSIX_ALNUM = 44,
+ UCHAR_POSIX_BLANK = 45,
+ UCHAR_POSIX_GRAPH = 46,
+ UCHAR_POSIX_PRINT = 47,
+ UCHAR_POSIX_XDIGIT = 48,
+ UCHAR_UPPERCASE = 30,
+ UCHAR_WHITE_SPACE = 31,
+ UCHAR_EAST_ASIAN_WIDTH = 0x1004,
+ UCHAR_HANGUL_SYLLABLE_TYPE = 0x100b,
+};
+
enum UCharCategory {
U_NON_SPACING_MARK = 6,
U_ENCLOSING_MARK = 7,
- U_DECIMAL_NUMBER = 9,
U_CONTROL_CHAR = 15,
U_FORMAT_CHAR = 16,
- U_DASH_PUNCTUATION = 19,
- U_OTHER_PUNCTUATION = 23,
};
enum UEastAsianWidth {
@@ -60,24 +78,11 @@
U_HST_LVT_SYLLABLE,
};
-__BEGIN_DECLS
+int8_t __icu_charType(wint_t wc);
+int32_t __icu_getIntPropertyValue(wint_t wc, UProperty property);
-uint8_t __icu4x_bionic_general_category(uint32_t cp);
-uint8_t __icu4x_bionic_east_asian_width(uint32_t cp);
-uint8_t __icu4x_bionic_hangul_syllable_type(uint32_t cp);
+typedef UBool (*u_hasBinaryProperty_t)(UChar32, UProperty);
-bool __icu4x_bionic_is_alphabetic(uint32_t cp);
-bool __icu4x_bionic_is_default_ignorable_code_point(uint32_t cp);
-bool __icu4x_bionic_is_lowercase(uint32_t cp);
-bool __icu4x_bionic_is_alnum(uint32_t cp);
-bool __icu4x_bionic_is_blank(uint32_t cp);
-bool __icu4x_bionic_is_graph(uint32_t cp);
-bool __icu4x_bionic_is_print(uint32_t cp);
-bool __icu4x_bionic_is_xdigit(uint32_t cp);
-bool __icu4x_bionic_is_white_space(uint32_t cp);
-bool __icu4x_bionic_is_uppercase(uint32_t cp);
+void* __find_icu_symbol(const char* symbol_name);
-uint32_t __icu4x_bionic_to_upper(uint32_t ch);
-uint32_t __icu4x_bionic_to_lower(uint32_t ch);
-
-__END_DECLS
+#endif // _PRIVATE_ICU_H