Reimplement the <ctype.h> is* functions.
Following on from the towlower()/towupper() changes, add benchmarks for
most of <ctype.h>, rewrite the tests to cover the entire defined range
for all of these functions, and then reimplement most of the functions.
The old table-based implementation is mostly a bad idea on modern
hardware, with only ispunct() showing a significant benefit compared to
any other way I could think of writing it, and isalnum() a marginal but
still convincingly genuine benefit.
My new benchmarks make an effort to test an example from each relevant
range of characters to avoid, say, accidentally optimizing the behavior
of `isalnum('0')` at the expense of `isalnum('z')`.
Interestingly, clang is able to generate what I believe to be the
optimal implementations from the most readable code, which is
impressive. It certainly matched or beat all my attempts to be clever!
The BSD table-based implementations made a special case of EOF despite
having a `_ctype_` table that's offset by 1 to include EOF at index 0.
I'm not sure why they didn't take advantage of that, but removing the
explicit check for EOF measurably improves the generated code on arm and
arm64, so even the two functions that still use the table benefit from
this rewrite.
Here are the benchmark results:
arm64 before:
BM_ctype_isalnum_n 3.73 ns 3.73 ns 183727137
BM_ctype_isalnum_y1 3.82 ns 3.81 ns 186383058
BM_ctype_isalnum_y2 3.73 ns 3.72 ns 187809830
BM_ctype_isalnum_y3 3.78 ns 3.77 ns 181383055
BM_ctype_isalpha_n 3.75 ns 3.75 ns 189453927
BM_ctype_isalpha_y1 3.76 ns 3.75 ns 184854043
BM_ctype_isalpha_y2 4.32 ns 3.78 ns 186326931
BM_ctype_isascii_n 2.49 ns 2.48 ns 275583822
BM_ctype_isascii_y 2.51 ns 2.51 ns 282123915
BM_ctype_isblank_n 3.11 ns 3.10 ns 220472044
BM_ctype_isblank_y1 3.20 ns 3.19 ns 226088868
BM_ctype_isblank_y2 3.11 ns 3.11 ns 220809122
BM_ctype_iscntrl_n 3.79 ns 3.78 ns 188719938
BM_ctype_iscntrl_y1 3.72 ns 3.71 ns 186209237
BM_ctype_iscntrl_y2 3.80 ns 3.80 ns 184315749
BM_ctype_isdigit_n 3.76 ns 3.74 ns 188334682
BM_ctype_isdigit_y 3.78 ns 3.77 ns 186249335
BM_ctype_isgraph_n 3.99 ns 3.98 ns 177814143
BM_ctype_isgraph_y1 3.98 ns 3.95 ns 175140090
BM_ctype_isgraph_y2 4.01 ns 4.00 ns 178320453
BM_ctype_isgraph_y3 3.96 ns 3.95 ns 175412814
BM_ctype_isgraph_y4 4.01 ns 4.00 ns 175711174
BM_ctype_islower_n 3.75 ns 3.74 ns 188604818
BM_ctype_islower_y 3.79 ns 3.78 ns 154738238
BM_ctype_isprint_n 3.96 ns 3.95 ns 177607734
BM_ctype_isprint_y1 3.94 ns 3.93 ns 174877244
BM_ctype_isprint_y2 4.02 ns 4.01 ns 178206135
BM_ctype_isprint_y3 3.94 ns 3.93 ns 175959069
BM_ctype_isprint_y4 4.03 ns 4.02 ns 176158314
BM_ctype_isprint_y5 3.95 ns 3.94 ns 178745462
BM_ctype_ispunct_n 3.78 ns 3.77 ns 184727184
BM_ctype_ispunct_y 3.76 ns 3.75 ns 187947503
BM_ctype_isspace_n 3.74 ns 3.74 ns 185300285
BM_ctype_isspace_y1 3.77 ns 3.76 ns 187202066
BM_ctype_isspace_y2 3.73 ns 3.73 ns 184105959
BM_ctype_isupper_n 3.81 ns 3.80 ns 185038761
BM_ctype_isupper_y 3.71 ns 3.71 ns 185885793
BM_ctype_isxdigit_n 3.79 ns 3.79 ns 184965673
BM_ctype_isxdigit_y1 3.76 ns 3.75 ns 188251672
BM_ctype_isxdigit_y2 3.79 ns 3.78 ns 184187481
BM_ctype_isxdigit_y3 3.77 ns 3.76 ns 187635540
arm64 after:
BM_ctype_isalnum_n 3.37 ns 3.37 ns 205613810
BM_ctype_isalnum_y1 3.40 ns 3.39 ns 204806361
BM_ctype_isalnum_y2 3.43 ns 3.43 ns 205066077
BM_ctype_isalnum_y3 3.50 ns 3.50 ns 200057128
BM_ctype_isalpha_n 2.97 ns 2.97 ns 236084076
BM_ctype_isalpha_y1 2.97 ns 2.97 ns 236083626
BM_ctype_isalpha_y2 2.97 ns 2.97 ns 236084246
BM_ctype_isascii_n 2.55 ns 2.55 ns 272879994
BM_ctype_isascii_y 2.46 ns 2.45 ns 286522323
BM_ctype_isblank_n 3.18 ns 3.18 ns 220431175
BM_ctype_isblank_y1 3.18 ns 3.18 ns 220345602
BM_ctype_isblank_y2 3.18 ns 3.18 ns 220308509
BM_ctype_iscntrl_n 3.10 ns 3.10 ns 220344270
BM_ctype_iscntrl_y1 3.10 ns 3.07 ns 228973615
BM_ctype_iscntrl_y2 3.07 ns 3.07 ns 229192626
BM_ctype_isdigit_n 3.07 ns 3.07 ns 228925676
BM_ctype_isdigit_y 3.07 ns 3.07 ns 229182934
BM_ctype_isgraph_n 2.66 ns 2.66 ns 264268737
BM_ctype_isgraph_y1 2.66 ns 2.66 ns 264445277
BM_ctype_isgraph_y2 2.66 ns 2.66 ns 264327427
BM_ctype_isgraph_y3 2.66 ns 2.66 ns 264427480
BM_ctype_isgraph_y4 2.66 ns 2.66 ns 264155250
BM_ctype_islower_n 2.66 ns 2.66 ns 264421600
BM_ctype_islower_y 2.66 ns 2.66 ns 264341148
BM_ctype_isprint_n 2.66 ns 2.66 ns 264415198
BM_ctype_isprint_y1 2.66 ns 2.66 ns 264268793
BM_ctype_isprint_y2 2.66 ns 2.66 ns 264419205
BM_ctype_isprint_y3 2.66 ns 2.66 ns 264205886
BM_ctype_isprint_y4 2.66 ns 2.66 ns 264440797
BM_ctype_isprint_y5 2.72 ns 2.72 ns 264333293
BM_ctype_ispunct_n 3.52 ns 3.51 ns 198956572
BM_ctype_ispunct_y 3.38 ns 3.38 ns 201661792
BM_ctype_isspace_n 3.39 ns 3.39 ns 206896620
BM_ctype_isspace_y1 3.39 ns 3.39 ns 206569020
BM_ctype_isspace_y2 3.39 ns 3.39 ns 206564415
BM_ctype_isupper_n 2.76 ns 2.75 ns 254227134
BM_ctype_isupper_y 2.76 ns 2.75 ns 254235314
BM_ctype_isxdigit_n 3.60 ns 3.60 ns 194418653
BM_ctype_isxdigit_y1 2.97 ns 2.97 ns 236082424
BM_ctype_isxdigit_y2 3.48 ns 3.48 ns 200390011
BM_ctype_isxdigit_y3 3.48 ns 3.48 ns 202255815
arm32 before:
BM_ctype_isalnum_n 4.77 ns 4.76 ns 129230464
BM_ctype_isalnum_y1 4.88 ns 4.87 ns 147939321
BM_ctype_isalnum_y2 4.74 ns 4.73 ns 145508054
BM_ctype_isalnum_y3 4.81 ns 4.80 ns 144968914
BM_ctype_isalpha_n 4.80 ns 4.79 ns 148262579
BM_ctype_isalpha_y1 4.74 ns 4.73 ns 145061326
BM_ctype_isalpha_y2 4.83 ns 4.82 ns 147642546
BM_ctype_isascii_n 3.74 ns 3.72 ns 186711139
BM_ctype_isascii_y 3.79 ns 3.78 ns 183654780
BM_ctype_isblank_n 4.20 ns 4.19 ns 169733252
BM_ctype_isblank_y1 4.19 ns 4.18 ns 165713363
BM_ctype_isblank_y2 4.22 ns 4.21 ns 168776265
BM_ctype_iscntrl_n 4.75 ns 4.74 ns 145417484
BM_ctype_iscntrl_y1 4.82 ns 4.81 ns 146283250
BM_ctype_iscntrl_y2 4.79 ns 4.78 ns 148662453
BM_ctype_isdigit_n 4.77 ns 4.76 ns 145789210
BM_ctype_isdigit_y 4.84 ns 4.84 ns 146909458
BM_ctype_isgraph_n 4.72 ns 4.71 ns 145874663
BM_ctype_isgraph_y1 4.86 ns 4.85 ns 142037606
BM_ctype_isgraph_y2 4.79 ns 4.78 ns 145109612
BM_ctype_isgraph_y3 4.75 ns 4.75 ns 144829039
BM_ctype_isgraph_y4 4.86 ns 4.85 ns 146769899
BM_ctype_islower_n 4.76 ns 4.75 ns 147537637
BM_ctype_islower_y 4.79 ns 4.78 ns 145648017
BM_ctype_isprint_n 4.82 ns 4.81 ns 147154780
BM_ctype_isprint_y1 4.76 ns 4.76 ns 145117604
BM_ctype_isprint_y2 4.87 ns 4.86 ns 145801406
BM_ctype_isprint_y3 4.79 ns 4.78 ns 148043446
BM_ctype_isprint_y4 4.77 ns 4.76 ns 145157619
BM_ctype_isprint_y5 4.91 ns 4.90 ns 147810800
BM_ctype_ispunct_n 4.74 ns 4.73 ns 145588611
BM_ctype_ispunct_y 4.82 ns 4.81 ns 144065436
BM_ctype_isspace_n 4.78 ns 4.77 ns 147153712
BM_ctype_isspace_y1 4.73 ns 4.72 ns 145252863
BM_ctype_isspace_y2 4.84 ns 4.83 ns 148615797
BM_ctype_isupper_n 4.75 ns 4.74 ns 148276631
BM_ctype_isupper_y 4.80 ns 4.79 ns 145529893
BM_ctype_isxdigit_n 4.78 ns 4.77 ns 147271646
BM_ctype_isxdigit_y1 4.74 ns 4.74 ns 145142209
BM_ctype_isxdigit_y2 4.83 ns 4.82 ns 146398497
BM_ctype_isxdigit_y3 4.78 ns 4.77 ns 147617686
arm32 after:
BM_ctype_isalnum_n 4.35 ns 4.35 ns 161086146
BM_ctype_isalnum_y1 4.36 ns 4.35 ns 160961111
BM_ctype_isalnum_y2 4.36 ns 4.36 ns 160733210
BM_ctype_isalnum_y3 4.35 ns 4.35 ns 160897524
BM_ctype_isalpha_n 3.67 ns 3.67 ns 189377208
BM_ctype_isalpha_y1 3.68 ns 3.67 ns 189438146
BM_ctype_isalpha_y2 3.75 ns 3.69 ns 190971186
BM_ctype_isascii_n 3.69 ns 3.68 ns 191029191
BM_ctype_isascii_y 3.68 ns 3.68 ns 191011817
BM_ctype_isblank_n 4.09 ns 4.09 ns 171887541
BM_ctype_isblank_y1 4.09 ns 4.09 ns 171829345
BM_ctype_isblank_y2 4.08 ns 4.07 ns 170585590
BM_ctype_iscntrl_n 4.08 ns 4.07 ns 170614383
BM_ctype_iscntrl_y1 4.13 ns 4.11 ns 171495899
BM_ctype_iscntrl_y2 4.19 ns 4.18 ns 165255578
BM_ctype_isdigit_n 4.25 ns 4.24 ns 165237008
BM_ctype_isdigit_y 4.24 ns 4.24 ns 165256149
BM_ctype_isgraph_n 3.82 ns 3.81 ns 183610114
BM_ctype_isgraph_y1 3.82 ns 3.81 ns 183614131
BM_ctype_isgraph_y2 3.82 ns 3.81 ns 183616840
BM_ctype_isgraph_y3 3.79 ns 3.79 ns 183620182
BM_ctype_isgraph_y4 3.82 ns 3.81 ns 185740009
BM_ctype_islower_n 3.75 ns 3.74 ns 183619502
BM_ctype_islower_y 3.68 ns 3.68 ns 190999901
BM_ctype_isprint_n 3.69 ns 3.68 ns 190899544
BM_ctype_isprint_y1 3.68 ns 3.67 ns 190192384
BM_ctype_isprint_y2 3.67 ns 3.67 ns 189351466
BM_ctype_isprint_y3 3.67 ns 3.67 ns 189430348
BM_ctype_isprint_y4 3.68 ns 3.68 ns 189430161
BM_ctype_isprint_y5 3.69 ns 3.68 ns 190962419
BM_ctype_ispunct_n 4.14 ns 4.14 ns 171034861
BM_ctype_ispunct_y 4.19 ns 4.19 ns 168308152
BM_ctype_isspace_n 4.50 ns 4.50 ns 156250887
BM_ctype_isspace_y1 4.48 ns 4.48 ns 155124476
BM_ctype_isspace_y2 4.50 ns 4.50 ns 155077504
BM_ctype_isupper_n 3.68 ns 3.68 ns 191020583
BM_ctype_isupper_y 3.68 ns 3.68 ns 191015669
BM_ctype_isxdigit_n 4.50 ns 4.50 ns 156276745
BM_ctype_isxdigit_y1 3.28 ns 3.27 ns 214729725
BM_ctype_isxdigit_y2 4.48 ns 4.48 ns 155265129
BM_ctype_isxdigit_y3 4.48 ns 4.48 ns 155216846
I've also corrected a small mistake in the documentation for isxdigit().
Test: tests and benchmarks
Change-Id: I4a77859f826c3fc8f0e327e847886882f29ec4a3
diff --git a/benchmarks/ctype_benchmark.cpp b/benchmarks/ctype_benchmark.cpp
index 3c7f48d..eab0133 100644
--- a/benchmarks/ctype_benchmark.cpp
+++ b/benchmarks/ctype_benchmark.cpp
@@ -19,6 +19,63 @@
#include <benchmark/benchmark.h>
#include "util.h"
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y1, isalnum('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y2, isalnum('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y3, isalnum('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_n, isalnum('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_y1, isalpha('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_y2, isalpha('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_n, isalpha('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isascii_y, isascii('x'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isascii_n, isascii(0x88));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_y1, isblank(' '));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_y2, isblank('\t'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_n, isblank('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_y1, iscntrl('\b'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_y2, iscntrl('\x7f'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_n, iscntrl('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isdigit_y, iscntrl('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isdigit_n, iscntrl('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y1, isgraph('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y2, isgraph('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y3, isgraph('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y4, isgraph('_'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_n, isgraph(' '));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_islower_y, islower('x'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_islower_n, islower('X'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y1, isprint('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y2, isprint('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y3, isprint('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y4, isprint('_'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y5, isprint(' '));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_n, isprint('\b'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_ispunct_y, ispunct('_'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_ispunct_n, ispunct('A'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_y1, isspace(' '));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_y2, isspace('\t'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_n, isspace('A'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isupper_y, isupper('X'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isupper_n, isupper('x'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y1, isxdigit('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y2, isxdigit('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y3, isxdigit('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_n, isxdigit('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_toascii_y, isascii('x'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_toascii_n, isascii(0x88));
+
BIONIC_TRIVIAL_BENCHMARK(BM_ctype_tolower_y, tolower('X'));
BIONIC_TRIVIAL_BENCHMARK(BM_ctype_tolower_n, tolower('x'));
diff --git a/libc/Android.bp b/libc/Android.bp
index 5f82b13..bd025ca 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -438,7 +438,6 @@
"upstream-openbsd/lib/libc/gen/fnmatch.c",
"upstream-openbsd/lib/libc/gen/ftok.c",
"upstream-openbsd/lib/libc/gen/getprogname.c",
- "upstream-openbsd/lib/libc/gen/isctype.c",
"upstream-openbsd/lib/libc/gen/setprogname.c",
"upstream-openbsd/lib/libc/gen/verr.c",
"upstream-openbsd/lib/libc/gen/verrx.c",
diff --git a/libc/bionic/ctype.cpp b/libc/bionic/ctype.cpp
index b72935b..ba8afca 100644
--- a/libc/bionic/ctype.cpp
+++ b/libc/bionic/ctype.cpp
@@ -28,70 +28,146 @@
#include <ctype.h>
+static inline int __in_range(int c, char lo, char hi) {
+ return c >= lo && c <= hi;
+}
+
+int isalnum(int c) {
+ // `isalnum(c)` is `isalpha(c) || isdigit(c)`, but there's no obvious way
+ // to simplify that, and the table lookup is just slightly faster...
+ // Note that this is unsafe for inputs less than -1 (EOF) or greater than
+ // 0xff. This is true of other C libraries too.
+ return (_ctype_[c + 1] & (_CTYPE_U|_CTYPE_L|_CTYPE_N));
+}
+
int isalnum_l(int c, locale_t) {
return isalnum(c);
}
+int isalpha(int c) {
+ return __in_range(c, 'A', 'Z') || __in_range(c, 'a', 'z');
+}
+
int isalpha_l(int c, locale_t) {
return isalpha(c);
}
+int isascii(int c) {
+ return static_cast<unsigned>(c) < 0x80;
+}
+
+int isblank(int c) {
+ return c == ' ' || c == '\t';
+}
+
int isblank_l(int c, locale_t) {
return isblank(c);
}
+int iscntrl(int c) {
+ return (static_cast<unsigned>(c) < ' ') || c == 0x7f;
+}
+
int iscntrl_l(int c, locale_t) {
return iscntrl(c);
}
+int isdigit(int c) {
+ return __in_range(c, '0', '9');
+}
+
int isdigit_l(int c, locale_t) {
return isdigit(c);
}
+int isgraph(int c) {
+ return __in_range(c, '!', '~');
+}
+
int isgraph_l(int c, locale_t) {
return isgraph(c);
}
+int islower(int c) {
+ return __in_range(c, 'a', 'z');
+}
+
int islower_l(int c, locale_t) {
return islower(c);
}
+int isprint(int c) {
+ return __in_range(c, ' ', '~');
+}
+
int isprint_l(int c, locale_t) {
return isprint(c);
}
+int ispunct(int c) {
+ // `ispunct(c)` is `isgraph(c) && !isalnum(c)`, but there's no obvious way
+ // to simplify that, and the table lookup is just slightly faster...
+ // Note that this is unsafe for inputs less than -1 (EOF) or greater than
+ // 0xff. This is true of other C libraries too.
+ return (_ctype_[c + 1] & _CTYPE_P);
+}
+
int ispunct_l(int c, locale_t) {
return ispunct(c);
}
+int isspace(int c) {
+ return c == ' ' || __in_range(c, '\t', '\r');
+}
+
int isspace_l(int c, locale_t) {
return isspace(c);
}
+int isupper(int c) {
+ return __in_range(c, 'A', 'Z');
+}
+
int isupper_l(int c, locale_t) {
return isupper(c);
}
+int isxdigit(int c) {
+ return __in_range(c, '0', '9') || __in_range(c, 'a', 'f') || __in_range(c, 'A', 'F');
+}
+
int isxdigit_l(int c, locale_t) {
return isxdigit(c);
}
+int toascii(int c) {
+ return c & 0x7f;
+}
+
+int _toupper(int c) {
+ // Using EOR rather than AND makes no difference on arm, but saves an
+ // instruction on arm64.
+ return c ^ 0x20;
+}
+
+int toupper(int c) {
+ if (c >= 'a' && c <= 'z') return _toupper(c);
+ return c;
+}
+
int toupper_l(int c, locale_t) {
return toupper(c);
}
-int tolower_l(int c, locale_t) {
- return tolower(c);
+int _tolower(int c) {
+ return c | 0x20;
}
int tolower(int c) {
- if (c >= 'A' && c <= 'Z') return c | 0x20;
+ if (c >= 'A' && c <= 'Z') return _tolower(c);
return c;
}
-int toupper(int c) {
- // Using EOR rather than AND makes no difference on arm, but saves an
- // instruction on arm64.
- if (c >= 'a' && c <= 'z') return c ^ 0x20;
- return c;
+int tolower_l(int c, locale_t) {
+ return tolower(c);
}
diff --git a/libc/include/ctype.h b/libc/include/ctype.h
index e91b0e2..e7df299 100644
--- a/libc/include/ctype.h
+++ b/libc/include/ctype.h
@@ -97,7 +97,7 @@
int isspace(int __ch);
/** Returns true if `ch` is in `[A-Z]`. */
int isupper(int __ch);
-/** Returns true if `ch` is in `[0-9a-f]`. */
+/** Returns true if `ch` is in `[0-9A-Fa-f]`. */
int isxdigit(int __ch);
/** Returns the corresponding lower-case character if `ch` is upper-case, or `ch` otherwise. */
diff --git a/libc/upstream-openbsd/lib/libc/gen/isctype.c b/libc/upstream-openbsd/lib/libc/gen/isctype.c
deleted file mode 100644
index a4e944c..0000000
--- a/libc/upstream-openbsd/lib/libc/gen/isctype.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/* $OpenBSD: isctype.c,v 1.12 2015/09/13 11:38:08 guenther Exp $ */
-/*
- * Copyright (c) 1989 The Regents of the University of California.
- * All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#define _ANSI_LIBRARY
-#include <ctype.h>
-#include <stdio.h>
-
-#undef isalnum
-int
-isalnum(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_U|_L|_N)));
-}
-DEF_STRONG(isalnum);
-
-#undef isalpha
-int
-isalpha(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_U|_L)));
-}
-DEF_STRONG(isalpha);
-
-#undef isblank
-int
-isblank(int c)
-{
- return (c == ' ' || c == '\t');
-}
-DEF_STRONG(isblank);
-
-#undef iscntrl
-int
-iscntrl(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _C));
-}
-DEF_STRONG(iscntrl);
-
-#undef isdigit
-int
-isdigit(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _N));
-}
-DEF_STRONG(isdigit);
-
-#undef isgraph
-int
-isgraph(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_P|_U|_L|_N)));
-}
-DEF_STRONG(isgraph);
-
-#undef islower
-int
-islower(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _L));
-}
-DEF_STRONG(islower);
-
-#undef isprint
-int
-isprint(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_P|_U|_L|_N|_B)));
-}
-DEF_STRONG(isprint);
-
-#undef ispunct
-int
-ispunct(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _P));
-}
-DEF_STRONG(ispunct);
-
-#undef isspace
-int
-isspace(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _S));
-}
-DEF_STRONG(isspace);
-
-#undef isupper
-int
-isupper(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _U));
-}
-DEF_STRONG(isupper);
-
-#undef isxdigit
-int
-isxdigit(int c)
-{
- return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_N|_X)));
-}
-DEF_STRONG(isxdigit);
-
-#undef isascii
-int
-isascii(int c)
-{
- return ((unsigned int)c <= 0177);
-}
-DEF_WEAK(isascii);
-
-#undef toascii
-int
-toascii(int c)
-{
- return (c & 0177);
-}
-
-#undef _toupper
-int
-_toupper(int c)
-{
- return (c - 'a' + 'A');
-}
-
-#undef _tolower
-int
-_tolower(int c)
-{
- return (c - 'A' + 'a');
-}
diff --git a/tests/ctype_test.cpp b/tests/ctype_test.cpp
index c12518b..826d39a 100644
--- a/tests/ctype_test.cpp
+++ b/tests/ctype_test.cpp
@@ -18,183 +18,278 @@
#include <ctype.h>
+// We test from -1 (EOF) to 0xff, because that's the range for which behavior
+// is actually defined. (It's explicitly undefined below or above that.) Most
+// of our routines are no longer table-based and behave correctly for the
+// entire int range, but that's not true of other C libraries that we might
+// want to compare against, nor of our isalnum(3) and ispunt(3).
+static constexpr int kMin = -1;
+static constexpr int kMax = 256;
+
TEST(ctype, isalnum) {
- EXPECT_TRUE(isalnum('1'));
- EXPECT_TRUE(isalnum('a'));
- EXPECT_TRUE(isalnum('A'));
- EXPECT_FALSE(isalnum('!'));
- EXPECT_FALSE(isalnum(' '));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= '0' && i <= '9') ||
+ (i >= 'A' && i <= 'Z') ||
+ (i >= 'a' && i <= 'z')) {
+ EXPECT_TRUE(isalnum(i)) << i;
+ } else {
+ EXPECT_FALSE(isalnum(i)) << i;
+ }
+ }
}
TEST(ctype, isalnum_l) {
- EXPECT_TRUE(isalnum_l('1', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isalnum_l('a', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isalnum_l('A', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isalnum_l('!', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isalnum_l(' ', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= '0' && i <= '9') ||
+ (i >= 'A' && i <= 'Z') ||
+ (i >= 'a' && i <= 'z')) {
+ EXPECT_TRUE(isalnum_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isalnum_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, isalpha) {
- EXPECT_FALSE(isalpha('1'));
- EXPECT_TRUE(isalpha('a'));
- EXPECT_TRUE(isalpha('A'));
- EXPECT_FALSE(isalpha('!'));
- EXPECT_FALSE(isalpha(' '));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= 'A' && i <= 'Z') ||
+ (i >= 'a' && i <= 'z')) {
+ EXPECT_TRUE(isalpha(i)) << i;
+ } else {
+ EXPECT_FALSE(isalpha(i)) << i;
+ }
+ }
}
TEST(ctype, isalpha_l) {
- EXPECT_FALSE(isalpha_l('1', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isalpha_l('a', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isalpha_l('A', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isalpha_l('!', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isalpha_l(' ', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= 'A' && i <= 'Z') ||
+ (i >= 'a' && i <= 'z')) {
+ EXPECT_TRUE(isalpha_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isalpha_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, isascii) {
- EXPECT_TRUE(isascii('\x7f'));
- EXPECT_FALSE(isascii('\x80'));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= 0 && i <= 0x7f) {
+ EXPECT_TRUE(isascii(i)) << i;
+ } else {
+ EXPECT_FALSE(isascii(i)) << i;
+ }
+ }
}
TEST(ctype, isblank) {
- EXPECT_FALSE(isblank('1'));
- EXPECT_TRUE(isblank(' '));
- EXPECT_TRUE(isblank('\t'));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i == '\t' || i == ' ') {
+ EXPECT_TRUE(isblank(i)) << i;
+ } else {
+ EXPECT_FALSE(isblank(i)) << i;
+ }
+ }
}
TEST(ctype, isblank_l) {
- EXPECT_FALSE(isblank_l('1', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isblank_l(' ', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isblank_l('\t', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i == '\t' || i == ' ') {
+ EXPECT_TRUE(isblank_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isblank_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, iscntrl) {
- EXPECT_FALSE(iscntrl('1'));
- EXPECT_TRUE(iscntrl('\b'));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= 0 && i < ' ') || i == 0x7f) {
+ EXPECT_TRUE(iscntrl(i)) << i;
+ } else {
+ EXPECT_FALSE(iscntrl(i)) << i;
+ }
+ }
}
TEST(ctype, iscntrl_l) {
- EXPECT_FALSE(iscntrl_l('1', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(iscntrl_l('\b', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= 0 && i < ' ') || i == 0x7f) {
+ EXPECT_TRUE(iscntrl_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(iscntrl_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, isdigit) {
- EXPECT_TRUE(isdigit('1'));
- EXPECT_FALSE(isdigit('a'));
- EXPECT_FALSE(isdigit('x'));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= '0' && i <= '9') {
+ EXPECT_TRUE(isdigit(i)) << i;
+ } else {
+ EXPECT_FALSE(isdigit(i)) << i;
+ }
+ }
}
TEST(ctype, isdigit_l) {
- EXPECT_TRUE(isdigit_l('1', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isdigit_l('a', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isdigit_l('x', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= '0' && i <= '9') {
+ EXPECT_TRUE(isdigit_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isdigit_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, isgraph) {
- EXPECT_TRUE(isgraph('a'));
- EXPECT_TRUE(isgraph('A'));
- EXPECT_TRUE(isgraph('1'));
- EXPECT_TRUE(isgraph('!'));
- EXPECT_FALSE(isgraph(' '));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= '!' && i <= '~') {
+ EXPECT_TRUE(isgraph(i)) << i;
+ } else {
+ EXPECT_FALSE(isgraph(i)) << i;
+ }
+ }
}
TEST(ctype, isgraph_l) {
- EXPECT_TRUE(isgraph_l('a', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isgraph_l('A', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isgraph_l('1', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isgraph_l('!', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isgraph_l(' ', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= '!' && i <= '~') {
+ EXPECT_TRUE(isgraph_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isgraph_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, islower) {
- EXPECT_TRUE(islower('a'));
- EXPECT_FALSE(islower('A'));
- EXPECT_FALSE(islower('!'));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= 'a' && i <= 'z') {
+ EXPECT_TRUE(islower(i)) << i;
+ } else {
+ EXPECT_FALSE(islower(i)) << i;
+ }
+ }
}
TEST(ctype, islower_l) {
- EXPECT_TRUE(islower_l('a', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(islower_l('A', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(islower_l('!', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= 'a' && i <= 'z') {
+ EXPECT_TRUE(islower_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(islower_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, isprint) {
- EXPECT_TRUE(isprint('a'));
- EXPECT_TRUE(isprint(' '));
- EXPECT_FALSE(isprint('\b'));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= ' ' && i <= '~') {
+ EXPECT_TRUE(isprint(i)) << i;
+ } else {
+ EXPECT_FALSE(isprint(i)) << i;
+ }
+ }
}
TEST(ctype, isprint_l) {
- EXPECT_TRUE(isprint_l('a', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isprint_l(' ', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isprint_l('\b', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= ' ' && i <= '~') {
+ EXPECT_TRUE(isprint_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isprint_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, ispunct) {
- EXPECT_TRUE(ispunct('!'));
- EXPECT_FALSE(ispunct('a'));
- EXPECT_FALSE(ispunct(' '));
- EXPECT_FALSE(ispunct('\b'));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= '!' && i <= '/') ||
+ (i >= ':' && i <= '@') ||
+ (i >= '[' && i <= '`') ||
+ (i >= '{' && i <= '~')) {
+ EXPECT_TRUE(ispunct(i)) << i;
+ } else {
+ EXPECT_FALSE(ispunct(i)) << i;
+ }
+ }
}
TEST(ctype, ispunct_l) {
- EXPECT_TRUE(ispunct_l('!', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(ispunct_l('a', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(ispunct_l(' ', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(ispunct_l('\b', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= '!' && i <= '/') ||
+ (i >= ':' && i <= '@') ||
+ (i >= '[' && i <= '`') ||
+ (i >= '{' && i <= '~')) {
+ EXPECT_TRUE(ispunct_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(ispunct_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, isspace) {
- EXPECT_TRUE(isspace(' '));
- EXPECT_TRUE(isspace('\f'));
- EXPECT_TRUE(isspace('\n'));
- EXPECT_TRUE(isspace('\r'));
- EXPECT_TRUE(isspace('\t'));
- EXPECT_TRUE(isspace('\v'));
- EXPECT_FALSE(isspace('a'));
- EXPECT_FALSE(isspace('!'));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= '\t' && i <= '\r') || i == ' ') {
+ EXPECT_TRUE(isspace(i)) << i;
+ } else {
+ EXPECT_FALSE(isspace(i)) << i;
+ }
+ }
}
TEST(ctype, isspace_l) {
- EXPECT_TRUE(isspace_l(' ', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isspace_l('\f', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isspace_l('\n', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isspace_l('\r', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isspace_l('\t', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isspace_l('\v', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isspace_l('a', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isspace_l('!', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= '\t' && i <= '\r') || i == ' ') {
+ EXPECT_TRUE(isspace_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isspace_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, isupper) {
- EXPECT_TRUE(isupper('A'));
- EXPECT_FALSE(isupper('a'));
- EXPECT_FALSE(isupper('!'));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= 'A' && i <= 'Z') {
+ EXPECT_TRUE(isupper(i)) << i;
+ } else {
+ EXPECT_FALSE(isupper(i)) << i;
+ }
+ }
}
TEST(ctype, isupper_l) {
- EXPECT_TRUE(isupper_l('A', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isupper_l('a', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isupper_l('!', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if (i >= 'A' && i <= 'Z') {
+ EXPECT_TRUE(isupper_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isupper_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, isxdigit) {
- EXPECT_TRUE(isxdigit('0'));
- EXPECT_FALSE(isxdigit('x'));
- EXPECT_TRUE(isxdigit('1'));
- EXPECT_TRUE(isxdigit('a'));
- EXPECT_TRUE(isxdigit('A'));
- EXPECT_FALSE(isxdigit('g'));
- EXPECT_FALSE(isxdigit(' '));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= '0' && i <= '9') ||
+ (i >= 'A' && i <= 'F') ||
+ (i >= 'a' && i <= 'f')) {
+ EXPECT_TRUE(isxdigit(i)) << i;
+ } else {
+ EXPECT_FALSE(isxdigit(i)) << i;
+ }
+ }
}
TEST(ctype, isxdigit_l) {
- EXPECT_TRUE(isxdigit_l('0', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isxdigit_l('x', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isxdigit_l('1', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isxdigit_l('a', LC_GLOBAL_LOCALE));
- EXPECT_TRUE(isxdigit_l('A', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isxdigit_l('g', LC_GLOBAL_LOCALE));
- EXPECT_FALSE(isxdigit_l(' ', LC_GLOBAL_LOCALE));
+ for (int i = kMin; i < kMax; ++i) {
+ if ((i >= '0' && i <= '9') ||
+ (i >= 'A' && i <= 'F') ||
+ (i >= 'a' && i <= 'f')) {
+ EXPECT_TRUE(isxdigit_l(i, LC_GLOBAL_LOCALE)) << i;
+ } else {
+ EXPECT_FALSE(isxdigit_l(i, LC_GLOBAL_LOCALE)) << i;
+ }
+ }
}
TEST(ctype, toascii) {