blob: 633d83ee524c847e7d42c887ba6d3112afe03a88 [file] [log] [blame]
Elliott Hughesc41b5602017-07-27 17:08:08 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <wchar.h>
30
Manish Goregaokar361854c2025-03-05 15:02:28 -080031#include "private/icu4x.h"
Elliott Hughesc41b5602017-07-27 17:08:08 -070032
33int wcwidth(wchar_t wc) {
34 // Fast-path ASCII.
35 if (wc >= 0x20 && wc < 0x7f) return 1;
36
37 // ASCII NUL is a special case.
38 if (wc == 0) return 0;
39
40 // C0.
41 if (wc < ' ' || (wc >= 0x7f && wc <= 0xa0)) return -1;
42
43 // Now for the i18n part. This isn't defined or standardized, so a lot of the choices are
44 // pretty arbitrary. See https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c for more details.
45
46 // Fancy unicode control characters?
Manish Goregaokar361854c2025-03-05 15:02:28 -080047 switch (__icu4x_bionic_general_category(wc)) {
48 case U_CONTROL_CHAR:
49 return -1;
50 case U_NON_SPACING_MARK:
51 case U_ENCLOSING_MARK:
52 return 0;
53 case U_FORMAT_CHAR:
54 // A special case for soft hyphen (U+00AD) to match historical practice.
55 // See the tests for more commentary.
56 return (wc == 0x00ad) ? 1 : 0;
Elliott Hughesc41b5602017-07-27 17:08:08 -070057 }
Elliott Hughesc41b5602017-07-27 17:08:08 -070058
Elliott Hughes20a9f992024-05-29 21:45:51 +000059 // Medial and final jamo render as zero width when used correctly,
60 // so we handle them specially rather than relying on East Asian Width.
Manish Goregaokar361854c2025-03-05 15:02:28 -080061 switch (__icu4x_bionic_hangul_syllable_type(wc)) {
62 case U_HST_VOWEL_JAMO:
63 case U_HST_TRAILING_JAMO:
64 return 0;
65 case U_HST_LEADING_JAMO:
66 case U_HST_LV_SYLLABLE:
67 case U_HST_LVT_SYLLABLE:
68 return 2;
Elliott Hughesc41b5602017-07-27 17:08:08 -070069 }
70
Elliott Hughes20a9f992024-05-29 21:45:51 +000071 // Hangeul choseong filler U+115F is default ignorable, so we check default
72 // ignorability only after we've already handled Hangeul jamo above.
Manish Goregaokar361854c2025-03-05 15:02:28 -080073 if (__icu4x_bionic_is_default_ignorable_code_point(wc)) return 0;
Elliott Hughes20a9f992024-05-29 21:45:51 +000074
75 // A few weird special cases where EastAsianWidth is not helpful for us.
Elliott Hughesc41b5602017-07-27 17:08:08 -070076 if (wc >= 0x3248 && wc <= 0x4dff) {
77 // Circled two-digit CJK "speed sign" numbers. EastAsianWidth is ambiguous,
78 // but wide makes more sense.
79 if (wc <= 0x324f) return 2;
80 // Hexagrams. EastAsianWidth is neutral, but wide seems better.
81 if (wc >= 0x4dc0) return 2;
82 }
83
84 // The EastAsianWidth property is at least defined by the Unicode standard!
Elliott Hughes20a9f992024-05-29 21:45:51 +000085 // https://www.unicode.org/reports/tr11/
Manish Goregaokar361854c2025-03-05 15:02:28 -080086 switch (__icu4x_bionic_east_asian_width(wc)) {
87 case U_EA_AMBIGUOUS:
88 case U_EA_HALFWIDTH:
89 case U_EA_NARROW:
90 case U_EA_NEUTRAL:
91 return 1;
92 case U_EA_FULLWIDTH:
93 case U_EA_WIDE:
94 return 2;
Elliott Hughesc41b5602017-07-27 17:08:08 -070095 }
96
97 return 0;
98}