Reimplement the <ctype.h> is* functions.
Following on from the towlower()/towupper() changes, add benchmarks for
most of <ctype.h>, rewrite the tests to cover the entire defined range
for all of these functions, and then reimplement most of the functions.
The old table-based implementation is mostly a bad idea on modern
hardware, with only ispunct() showing a significant benefit compared to
any other way I could think of writing it, and isalnum() a marginal but
still convincingly genuine benefit.
My new benchmarks make an effort to test an example from each relevant
range of characters to avoid, say, accidentally optimizing the behavior
of `isalnum('0')` at the expense of `isalnum('z')`.
Interestingly, clang is able to generate what I believe to be the
optimal implementations from the most readable code, which is
impressive. It certainly matched or beat all my attempts to be clever!
The BSD table-based implementations made a special case of EOF despite
having a `_ctype_` table that's offset by 1 to include EOF at index 0.
I'm not sure why they didn't take advantage of that, but removing the
explicit check for EOF measurably improves the generated code on arm and
arm64, so even the two functions that still use the table benefit from
this rewrite.
Here are the benchmark results:
arm64 before:
  BM_ctype_isalnum_n                 3.73 ns         3.73 ns    183727137
  BM_ctype_isalnum_y1                3.82 ns         3.81 ns    186383058
  BM_ctype_isalnum_y2                3.73 ns         3.72 ns    187809830
  BM_ctype_isalnum_y3                3.78 ns         3.77 ns    181383055
  BM_ctype_isalpha_n                 3.75 ns         3.75 ns    189453927
  BM_ctype_isalpha_y1                3.76 ns         3.75 ns    184854043
  BM_ctype_isalpha_y2                4.32 ns         3.78 ns    186326931
  BM_ctype_isascii_n                 2.49 ns         2.48 ns    275583822
  BM_ctype_isascii_y                 2.51 ns         2.51 ns    282123915
  BM_ctype_isblank_n                 3.11 ns         3.10 ns    220472044
  BM_ctype_isblank_y1                3.20 ns         3.19 ns    226088868
  BM_ctype_isblank_y2                3.11 ns         3.11 ns    220809122
  BM_ctype_iscntrl_n                 3.79 ns         3.78 ns    188719938
  BM_ctype_iscntrl_y1                3.72 ns         3.71 ns    186209237
  BM_ctype_iscntrl_y2                3.80 ns         3.80 ns    184315749
  BM_ctype_isdigit_n                 3.76 ns         3.74 ns    188334682
  BM_ctype_isdigit_y                 3.78 ns         3.77 ns    186249335
  BM_ctype_isgraph_n                 3.99 ns         3.98 ns    177814143
  BM_ctype_isgraph_y1                3.98 ns         3.95 ns    175140090
  BM_ctype_isgraph_y2                4.01 ns         4.00 ns    178320453
  BM_ctype_isgraph_y3                3.96 ns         3.95 ns    175412814
  BM_ctype_isgraph_y4                4.01 ns         4.00 ns    175711174
  BM_ctype_islower_n                 3.75 ns         3.74 ns    188604818
  BM_ctype_islower_y                 3.79 ns         3.78 ns    154738238
  BM_ctype_isprint_n                 3.96 ns         3.95 ns    177607734
  BM_ctype_isprint_y1                3.94 ns         3.93 ns    174877244
  BM_ctype_isprint_y2                4.02 ns         4.01 ns    178206135
  BM_ctype_isprint_y3                3.94 ns         3.93 ns    175959069
  BM_ctype_isprint_y4                4.03 ns         4.02 ns    176158314
  BM_ctype_isprint_y5                3.95 ns         3.94 ns    178745462
  BM_ctype_ispunct_n                 3.78 ns         3.77 ns    184727184
  BM_ctype_ispunct_y                 3.76 ns         3.75 ns    187947503
  BM_ctype_isspace_n                 3.74 ns         3.74 ns    185300285
  BM_ctype_isspace_y1                3.77 ns         3.76 ns    187202066
  BM_ctype_isspace_y2                3.73 ns         3.73 ns    184105959
  BM_ctype_isupper_n                 3.81 ns         3.80 ns    185038761
  BM_ctype_isupper_y                 3.71 ns         3.71 ns    185885793
  BM_ctype_isxdigit_n                3.79 ns         3.79 ns    184965673
  BM_ctype_isxdigit_y1               3.76 ns         3.75 ns    188251672
  BM_ctype_isxdigit_y2               3.79 ns         3.78 ns    184187481
  BM_ctype_isxdigit_y3               3.77 ns         3.76 ns    187635540
arm64 after:
  BM_ctype_isalnum_n                 3.37 ns         3.37 ns    205613810
  BM_ctype_isalnum_y1                3.40 ns         3.39 ns    204806361
  BM_ctype_isalnum_y2                3.43 ns         3.43 ns    205066077
  BM_ctype_isalnum_y3                3.50 ns         3.50 ns    200057128
  BM_ctype_isalpha_n                 2.97 ns         2.97 ns    236084076
  BM_ctype_isalpha_y1                2.97 ns         2.97 ns    236083626
  BM_ctype_isalpha_y2                2.97 ns         2.97 ns    236084246
  BM_ctype_isascii_n                 2.55 ns         2.55 ns    272879994
  BM_ctype_isascii_y                 2.46 ns         2.45 ns    286522323
  BM_ctype_isblank_n                 3.18 ns         3.18 ns    220431175
  BM_ctype_isblank_y1                3.18 ns         3.18 ns    220345602
  BM_ctype_isblank_y2                3.18 ns         3.18 ns    220308509
  BM_ctype_iscntrl_n                 3.10 ns         3.10 ns    220344270
  BM_ctype_iscntrl_y1                3.10 ns         3.07 ns    228973615
  BM_ctype_iscntrl_y2                3.07 ns         3.07 ns    229192626
  BM_ctype_isdigit_n                 3.07 ns         3.07 ns    228925676
  BM_ctype_isdigit_y                 3.07 ns         3.07 ns    229182934
  BM_ctype_isgraph_n                 2.66 ns         2.66 ns    264268737
  BM_ctype_isgraph_y1                2.66 ns         2.66 ns    264445277
  BM_ctype_isgraph_y2                2.66 ns         2.66 ns    264327427
  BM_ctype_isgraph_y3                2.66 ns         2.66 ns    264427480
  BM_ctype_isgraph_y4                2.66 ns         2.66 ns    264155250
  BM_ctype_islower_n                 2.66 ns         2.66 ns    264421600
  BM_ctype_islower_y                 2.66 ns         2.66 ns    264341148
  BM_ctype_isprint_n                 2.66 ns         2.66 ns    264415198
  BM_ctype_isprint_y1                2.66 ns         2.66 ns    264268793
  BM_ctype_isprint_y2                2.66 ns         2.66 ns    264419205
  BM_ctype_isprint_y3                2.66 ns         2.66 ns    264205886
  BM_ctype_isprint_y4                2.66 ns         2.66 ns    264440797
  BM_ctype_isprint_y5                2.72 ns         2.72 ns    264333293
  BM_ctype_ispunct_n                 3.52 ns         3.51 ns    198956572
  BM_ctype_ispunct_y                 3.38 ns         3.38 ns    201661792
  BM_ctype_isspace_n                 3.39 ns         3.39 ns    206896620
  BM_ctype_isspace_y1                3.39 ns         3.39 ns    206569020
  BM_ctype_isspace_y2                3.39 ns         3.39 ns    206564415
  BM_ctype_isupper_n                 2.76 ns         2.75 ns    254227134
  BM_ctype_isupper_y                 2.76 ns         2.75 ns    254235314
  BM_ctype_isxdigit_n                3.60 ns         3.60 ns    194418653
  BM_ctype_isxdigit_y1               2.97 ns         2.97 ns    236082424
  BM_ctype_isxdigit_y2               3.48 ns         3.48 ns    200390011
  BM_ctype_isxdigit_y3               3.48 ns         3.48 ns    202255815
arm32 before:
  BM_ctype_isalnum_n                 4.77 ns         4.76 ns    129230464
  BM_ctype_isalnum_y1                4.88 ns         4.87 ns    147939321
  BM_ctype_isalnum_y2                4.74 ns         4.73 ns    145508054
  BM_ctype_isalnum_y3                4.81 ns         4.80 ns    144968914
  BM_ctype_isalpha_n                 4.80 ns         4.79 ns    148262579
  BM_ctype_isalpha_y1                4.74 ns         4.73 ns    145061326
  BM_ctype_isalpha_y2                4.83 ns         4.82 ns    147642546
  BM_ctype_isascii_n                 3.74 ns         3.72 ns    186711139
  BM_ctype_isascii_y                 3.79 ns         3.78 ns    183654780
  BM_ctype_isblank_n                 4.20 ns         4.19 ns    169733252
  BM_ctype_isblank_y1                4.19 ns         4.18 ns    165713363
  BM_ctype_isblank_y2                4.22 ns         4.21 ns    168776265
  BM_ctype_iscntrl_n                 4.75 ns         4.74 ns    145417484
  BM_ctype_iscntrl_y1                4.82 ns         4.81 ns    146283250
  BM_ctype_iscntrl_y2                4.79 ns         4.78 ns    148662453
  BM_ctype_isdigit_n                 4.77 ns         4.76 ns    145789210
  BM_ctype_isdigit_y                 4.84 ns         4.84 ns    146909458
  BM_ctype_isgraph_n                 4.72 ns         4.71 ns    145874663
  BM_ctype_isgraph_y1                4.86 ns         4.85 ns    142037606
  BM_ctype_isgraph_y2                4.79 ns         4.78 ns    145109612
  BM_ctype_isgraph_y3                4.75 ns         4.75 ns    144829039
  BM_ctype_isgraph_y4                4.86 ns         4.85 ns    146769899
  BM_ctype_islower_n                 4.76 ns         4.75 ns    147537637
  BM_ctype_islower_y                 4.79 ns         4.78 ns    145648017
  BM_ctype_isprint_n                 4.82 ns         4.81 ns    147154780
  BM_ctype_isprint_y1                4.76 ns         4.76 ns    145117604
  BM_ctype_isprint_y2                4.87 ns         4.86 ns    145801406
  BM_ctype_isprint_y3                4.79 ns         4.78 ns    148043446
  BM_ctype_isprint_y4                4.77 ns         4.76 ns    145157619
  BM_ctype_isprint_y5                4.91 ns         4.90 ns    147810800
  BM_ctype_ispunct_n                 4.74 ns         4.73 ns    145588611
  BM_ctype_ispunct_y                 4.82 ns         4.81 ns    144065436
  BM_ctype_isspace_n                 4.78 ns         4.77 ns    147153712
  BM_ctype_isspace_y1                4.73 ns         4.72 ns    145252863
  BM_ctype_isspace_y2                4.84 ns         4.83 ns    148615797
  BM_ctype_isupper_n                 4.75 ns         4.74 ns    148276631
  BM_ctype_isupper_y                 4.80 ns         4.79 ns    145529893
  BM_ctype_isxdigit_n                4.78 ns         4.77 ns    147271646
  BM_ctype_isxdigit_y1               4.74 ns         4.74 ns    145142209
  BM_ctype_isxdigit_y2               4.83 ns         4.82 ns    146398497
  BM_ctype_isxdigit_y3               4.78 ns         4.77 ns    147617686
arm32 after:
  BM_ctype_isalnum_n                 4.35 ns         4.35 ns    161086146
  BM_ctype_isalnum_y1                4.36 ns         4.35 ns    160961111
  BM_ctype_isalnum_y2                4.36 ns         4.36 ns    160733210
  BM_ctype_isalnum_y3                4.35 ns         4.35 ns    160897524
  BM_ctype_isalpha_n                 3.67 ns         3.67 ns    189377208
  BM_ctype_isalpha_y1                3.68 ns         3.67 ns    189438146
  BM_ctype_isalpha_y2                3.75 ns         3.69 ns    190971186
  BM_ctype_isascii_n                 3.69 ns         3.68 ns    191029191
  BM_ctype_isascii_y                 3.68 ns         3.68 ns    191011817
  BM_ctype_isblank_n                 4.09 ns         4.09 ns    171887541
  BM_ctype_isblank_y1                4.09 ns         4.09 ns    171829345
  BM_ctype_isblank_y2                4.08 ns         4.07 ns    170585590
  BM_ctype_iscntrl_n                 4.08 ns         4.07 ns    170614383
  BM_ctype_iscntrl_y1                4.13 ns         4.11 ns    171495899
  BM_ctype_iscntrl_y2                4.19 ns         4.18 ns    165255578
  BM_ctype_isdigit_n                 4.25 ns         4.24 ns    165237008
  BM_ctype_isdigit_y                 4.24 ns         4.24 ns    165256149
  BM_ctype_isgraph_n                 3.82 ns         3.81 ns    183610114
  BM_ctype_isgraph_y1                3.82 ns         3.81 ns    183614131
  BM_ctype_isgraph_y2                3.82 ns         3.81 ns    183616840
  BM_ctype_isgraph_y3                3.79 ns         3.79 ns    183620182
  BM_ctype_isgraph_y4                3.82 ns         3.81 ns    185740009
  BM_ctype_islower_n                 3.75 ns         3.74 ns    183619502
  BM_ctype_islower_y                 3.68 ns         3.68 ns    190999901
  BM_ctype_isprint_n                 3.69 ns         3.68 ns    190899544
  BM_ctype_isprint_y1                3.68 ns         3.67 ns    190192384
  BM_ctype_isprint_y2                3.67 ns         3.67 ns    189351466
  BM_ctype_isprint_y3                3.67 ns         3.67 ns    189430348
  BM_ctype_isprint_y4                3.68 ns         3.68 ns    189430161
  BM_ctype_isprint_y5                3.69 ns         3.68 ns    190962419
  BM_ctype_ispunct_n                 4.14 ns         4.14 ns    171034861
  BM_ctype_ispunct_y                 4.19 ns         4.19 ns    168308152
  BM_ctype_isspace_n                 4.50 ns         4.50 ns    156250887
  BM_ctype_isspace_y1                4.48 ns         4.48 ns    155124476
  BM_ctype_isspace_y2                4.50 ns         4.50 ns    155077504
  BM_ctype_isupper_n                 3.68 ns         3.68 ns    191020583
  BM_ctype_isupper_y                 3.68 ns         3.68 ns    191015669
  BM_ctype_isxdigit_n                4.50 ns         4.50 ns    156276745
  BM_ctype_isxdigit_y1               3.28 ns         3.27 ns    214729725
  BM_ctype_isxdigit_y2               4.48 ns         4.48 ns    155265129
  BM_ctype_isxdigit_y3               4.48 ns         4.48 ns    155216846
I've also corrected a small mistake in the documentation for isxdigit().
Test: tests and benchmarks
Change-Id: I4a77859f826c3fc8f0e327e847886882f29ec4a3
diff --git a/benchmarks/ctype_benchmark.cpp b/benchmarks/ctype_benchmark.cpp
index 3c7f48d..eab0133 100644
--- a/benchmarks/ctype_benchmark.cpp
+++ b/benchmarks/ctype_benchmark.cpp
@@ -19,6 +19,63 @@
 #include <benchmark/benchmark.h>
 #include "util.h"
 
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y1, isalnum('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y2, isalnum('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_y3, isalnum('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalnum_n, isalnum('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_y1, isalpha('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_y2, isalpha('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isalpha_n, isalpha('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isascii_y, isascii('x'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isascii_n, isascii(0x88));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_y1, isblank(' '));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_y2, isblank('\t'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isblank_n, isblank('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_y1, iscntrl('\b'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_y2, iscntrl('\x7f'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_iscntrl_n, iscntrl('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isdigit_y, iscntrl('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isdigit_n, iscntrl('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y1, isgraph('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y2, isgraph('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y3, isgraph('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_y4, isgraph('_'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isgraph_n, isgraph(' '));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_islower_y, islower('x'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_islower_n, islower('X'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y1, isprint('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y2, isprint('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y3, isprint('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y4, isprint('_'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_y5, isprint(' '));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isprint_n, isprint('\b'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_ispunct_y, ispunct('_'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_ispunct_n, ispunct('A'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_y1, isspace(' '));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_y2, isspace('\t'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isspace_n, isspace('A'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isupper_y, isupper('X'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isupper_n, isupper('x'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y1, isxdigit('0'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y2, isxdigit('a'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_y3, isxdigit('A'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_isxdigit_n, isxdigit('_'));
+
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_toascii_y, isascii('x'));
+BIONIC_TRIVIAL_BENCHMARK(BM_ctype_toascii_n, isascii(0x88));
+
 BIONIC_TRIVIAL_BENCHMARK(BM_ctype_tolower_y, tolower('X'));
 BIONIC_TRIVIAL_BENCHMARK(BM_ctype_tolower_n, tolower('x'));
 
diff --git a/libc/Android.bp b/libc/Android.bp
index 5f82b13..bd025ca 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -438,7 +438,6 @@
         "upstream-openbsd/lib/libc/gen/fnmatch.c",
         "upstream-openbsd/lib/libc/gen/ftok.c",
         "upstream-openbsd/lib/libc/gen/getprogname.c",
-        "upstream-openbsd/lib/libc/gen/isctype.c",
         "upstream-openbsd/lib/libc/gen/setprogname.c",
         "upstream-openbsd/lib/libc/gen/verr.c",
         "upstream-openbsd/lib/libc/gen/verrx.c",
diff --git a/libc/bionic/ctype.cpp b/libc/bionic/ctype.cpp
index b72935b..ba8afca 100644
--- a/libc/bionic/ctype.cpp
+++ b/libc/bionic/ctype.cpp
@@ -28,70 +28,146 @@
 
 #include <ctype.h>
 
+static inline int __in_range(int c, char lo, char hi) {
+  return c >= lo && c <= hi;
+}
+
+int isalnum(int c) {
+  // `isalnum(c)` is `isalpha(c) || isdigit(c)`, but there's no obvious way
+  // to simplify that, and the table lookup is just slightly faster...
+  // Note that this is unsafe for inputs less than -1 (EOF) or greater than
+  // 0xff. This is true of other C libraries too.
+  return (_ctype_[c + 1] & (_CTYPE_U|_CTYPE_L|_CTYPE_N));
+}
+
 int isalnum_l(int c, locale_t) {
   return isalnum(c);
 }
 
+int isalpha(int c) {
+  return __in_range(c, 'A', 'Z') || __in_range(c, 'a', 'z');
+}
+
 int isalpha_l(int c, locale_t) {
   return isalpha(c);
 }
 
+int isascii(int c) {
+  return static_cast<unsigned>(c) < 0x80;
+}
+
+int isblank(int c) {
+  return c == ' ' || c == '\t';
+}
+
 int isblank_l(int c, locale_t) {
   return isblank(c);
 }
 
+int iscntrl(int c) {
+  return (static_cast<unsigned>(c) < ' ') || c == 0x7f;
+}
+
 int iscntrl_l(int c, locale_t) {
   return iscntrl(c);
 }
 
+int isdigit(int c) {
+  return __in_range(c, '0', '9');
+}
+
 int isdigit_l(int c, locale_t) {
   return isdigit(c);
 }
 
+int isgraph(int c) {
+  return __in_range(c, '!', '~');
+}
+
 int isgraph_l(int c, locale_t) {
   return isgraph(c);
 }
 
+int islower(int c) {
+  return __in_range(c, 'a', 'z');
+}
+
 int islower_l(int c, locale_t) {
   return islower(c);
 }
 
+int isprint(int c) {
+  return __in_range(c, ' ', '~');
+}
+
 int isprint_l(int c, locale_t) {
   return isprint(c);
 }
 
+int ispunct(int c) {
+  // `ispunct(c)` is `isgraph(c) && !isalnum(c)`, but there's no obvious way
+  // to simplify that, and the table lookup is just slightly faster...
+  // Note that this is unsafe for inputs less than -1 (EOF) or greater than
+  // 0xff. This is true of other C libraries too.
+  return (_ctype_[c + 1] & _CTYPE_P);
+}
+
 int ispunct_l(int c, locale_t) {
   return ispunct(c);
 }
 
+int isspace(int c) {
+  return c == ' ' || __in_range(c, '\t', '\r');
+}
+
 int isspace_l(int c, locale_t) {
   return isspace(c);
 }
 
+int isupper(int c) {
+  return __in_range(c, 'A', 'Z');
+}
+
 int isupper_l(int c, locale_t) {
   return isupper(c);
 }
 
+int isxdigit(int c) {
+  return __in_range(c, '0', '9') || __in_range(c, 'a', 'f') || __in_range(c, 'A', 'F');
+}
+
 int isxdigit_l(int c, locale_t) {
   return isxdigit(c);
 }
 
+int toascii(int c) {
+  return c & 0x7f;
+}
+
+int _toupper(int c) {
+  // Using EOR rather than AND makes no difference on arm, but saves an
+  // instruction on arm64.
+  return c ^ 0x20;
+}
+
+int toupper(int c) {
+  if (c >= 'a' && c <= 'z') return _toupper(c);
+  return c;
+}
+
 int toupper_l(int c, locale_t) {
   return toupper(c);
 }
 
-int tolower_l(int c, locale_t) {
-  return tolower(c);
+int _tolower(int c) {
+  return c | 0x20;
 }
 
 int tolower(int c) {
-  if (c >= 'A' && c <= 'Z') return c | 0x20;
+  if (c >= 'A' && c <= 'Z') return _tolower(c);
   return c;
 }
 
-int toupper(int c) {
-  // Using EOR rather than AND makes no difference on arm, but saves an
-  // instruction on arm64.
-  if (c >= 'a' && c <= 'z') return c ^ 0x20;
-  return c;
+int tolower_l(int c, locale_t) {
+  return tolower(c);
 }
diff --git a/libc/include/ctype.h b/libc/include/ctype.h
index e91b0e2..e7df299 100644
--- a/libc/include/ctype.h
+++ b/libc/include/ctype.h
@@ -97,7 +97,7 @@
 int isspace(int __ch);
 /** Returns true if `ch` is in `[A-Z]`. */
 int isupper(int __ch);
-/** Returns true if `ch` is in `[0-9a-f]`. */
+/** Returns true if `ch` is in `[0-9A-Fa-f]`. */
 int isxdigit(int __ch);
 
 /** Returns the corresponding lower-case character if `ch` is upper-case, or `ch` otherwise. */
diff --git a/libc/upstream-openbsd/lib/libc/gen/isctype.c b/libc/upstream-openbsd/lib/libc/gen/isctype.c
deleted file mode 100644
index a4e944c..0000000
--- a/libc/upstream-openbsd/lib/libc/gen/isctype.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*	$OpenBSD: isctype.c,v 1.12 2015/09/13 11:38:08 guenther Exp $ */
-/*
- * Copyright (c) 1989 The Regents of the University of California.
- * All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#define _ANSI_LIBRARY
-#include <ctype.h>
-#include <stdio.h>
-
-#undef isalnum
-int
-isalnum(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_U|_L|_N)));
-}
-DEF_STRONG(isalnum);
-
-#undef isalpha
-int
-isalpha(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_U|_L)));
-}
-DEF_STRONG(isalpha);
-
-#undef isblank
-int
-isblank(int c)
-{
-	return (c == ' ' || c == '\t');
-}
-DEF_STRONG(isblank);
-
-#undef iscntrl
-int
-iscntrl(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _C));
-}
-DEF_STRONG(iscntrl);
-
-#undef isdigit
-int
-isdigit(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _N));
-}
-DEF_STRONG(isdigit);
-
-#undef isgraph
-int
-isgraph(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_P|_U|_L|_N)));
-}
-DEF_STRONG(isgraph);
-
-#undef islower
-int
-islower(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _L));
-}
-DEF_STRONG(islower);
-
-#undef isprint
-int
-isprint(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_P|_U|_L|_N|_B)));
-}
-DEF_STRONG(isprint);
-
-#undef ispunct
-int
-ispunct(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _P));
-}
-DEF_STRONG(ispunct);
-
-#undef isspace
-int
-isspace(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _S));
-}
-DEF_STRONG(isspace);
-
-#undef isupper
-int
-isupper(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & _U));
-}
-DEF_STRONG(isupper);
-
-#undef isxdigit
-int
-isxdigit(int c)
-{
-	return (c == EOF ? 0 : ((_ctype_ + 1)[(unsigned char)c] & (_N|_X)));
-}
-DEF_STRONG(isxdigit);
-
-#undef isascii
-int
-isascii(int c)
-{
-	return ((unsigned int)c <= 0177);
-}
-DEF_WEAK(isascii);
-
-#undef toascii
-int
-toascii(int c)
-{
-	return (c & 0177);
-}
-
-#undef _toupper
-int
-_toupper(int c)
-{
-	return (c - 'a' + 'A');
-}
-
-#undef _tolower
-int
-_tolower(int c)
-{
-	return (c - 'A' + 'a');
-}
diff --git a/tests/ctype_test.cpp b/tests/ctype_test.cpp
index c12518b..826d39a 100644
--- a/tests/ctype_test.cpp
+++ b/tests/ctype_test.cpp
@@ -18,183 +18,278 @@
 
 #include <ctype.h>
 
+// We test from -1 (EOF) to 0xff, because that's the range for which behavior
+// is actually defined. (It's explicitly undefined below or above that.) Most
+// of our routines are no longer table-based and behave correctly for the
+// entire int range, but that's not true of other C libraries that we might
+// want to compare against, nor of our isalnum(3) and ispunt(3).
+static constexpr int kMin = -1;
+static constexpr int kMax = 256;
+
 TEST(ctype, isalnum) {
-  EXPECT_TRUE(isalnum('1'));
-  EXPECT_TRUE(isalnum('a'));
-  EXPECT_TRUE(isalnum('A'));
-  EXPECT_FALSE(isalnum('!'));
-  EXPECT_FALSE(isalnum(' '));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= '0' && i <= '9') ||
+        (i >= 'A' && i <= 'Z') ||
+        (i >= 'a' && i <= 'z')) {
+      EXPECT_TRUE(isalnum(i)) << i;
+    } else {
+      EXPECT_FALSE(isalnum(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isalnum_l) {
-  EXPECT_TRUE(isalnum_l('1', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isalnum_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isalnum_l('A', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isalnum_l('!', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isalnum_l(' ', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= '0' && i <= '9') ||
+        (i >= 'A' && i <= 'Z') ||
+        (i >= 'a' && i <= 'z')) {
+      EXPECT_TRUE(isalnum_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isalnum_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, isalpha) {
-  EXPECT_FALSE(isalpha('1'));
-  EXPECT_TRUE(isalpha('a'));
-  EXPECT_TRUE(isalpha('A'));
-  EXPECT_FALSE(isalpha('!'));
-  EXPECT_FALSE(isalpha(' '));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= 'A' && i <= 'Z') ||
+        (i >= 'a' && i <= 'z')) {
+      EXPECT_TRUE(isalpha(i)) << i;
+    } else {
+      EXPECT_FALSE(isalpha(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isalpha_l) {
-  EXPECT_FALSE(isalpha_l('1', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isalpha_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isalpha_l('A', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isalpha_l('!', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isalpha_l(' ', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= 'A' && i <= 'Z') ||
+        (i >= 'a' && i <= 'z')) {
+      EXPECT_TRUE(isalpha_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isalpha_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, isascii) {
-  EXPECT_TRUE(isascii('\x7f'));
-  EXPECT_FALSE(isascii('\x80'));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= 0 && i <= 0x7f) {
+      EXPECT_TRUE(isascii(i)) << i;
+    } else {
+      EXPECT_FALSE(isascii(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isblank) {
-  EXPECT_FALSE(isblank('1'));
-  EXPECT_TRUE(isblank(' '));
-  EXPECT_TRUE(isblank('\t'));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i == '\t' || i == ' ') {
+      EXPECT_TRUE(isblank(i)) << i;
+    } else {
+      EXPECT_FALSE(isblank(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isblank_l) {
-  EXPECT_FALSE(isblank_l('1', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isblank_l(' ', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isblank_l('\t', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i == '\t' || i == ' ') {
+      EXPECT_TRUE(isblank_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isblank_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, iscntrl) {
-  EXPECT_FALSE(iscntrl('1'));
-  EXPECT_TRUE(iscntrl('\b'));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= 0 && i < ' ') || i == 0x7f) {
+      EXPECT_TRUE(iscntrl(i)) << i;
+    } else {
+      EXPECT_FALSE(iscntrl(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, iscntrl_l) {
-  EXPECT_FALSE(iscntrl_l('1', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(iscntrl_l('\b', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= 0 && i < ' ') || i == 0x7f) {
+      EXPECT_TRUE(iscntrl_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(iscntrl_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, isdigit) {
-  EXPECT_TRUE(isdigit('1'));
-  EXPECT_FALSE(isdigit('a'));
-  EXPECT_FALSE(isdigit('x'));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= '0' && i <= '9') {
+      EXPECT_TRUE(isdigit(i)) << i;
+    } else {
+      EXPECT_FALSE(isdigit(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isdigit_l) {
-  EXPECT_TRUE(isdigit_l('1', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isdigit_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isdigit_l('x', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= '0' && i <= '9') {
+      EXPECT_TRUE(isdigit_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isdigit_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, isgraph) {
-  EXPECT_TRUE(isgraph('a'));
-  EXPECT_TRUE(isgraph('A'));
-  EXPECT_TRUE(isgraph('1'));
-  EXPECT_TRUE(isgraph('!'));
-  EXPECT_FALSE(isgraph(' '));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= '!' && i <= '~') {
+      EXPECT_TRUE(isgraph(i)) << i;
+    } else {
+      EXPECT_FALSE(isgraph(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isgraph_l) {
-  EXPECT_TRUE(isgraph_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isgraph_l('A', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isgraph_l('1', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isgraph_l('!', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isgraph_l(' ', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= '!' && i <= '~') {
+      EXPECT_TRUE(isgraph_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isgraph_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, islower) {
-  EXPECT_TRUE(islower('a'));
-  EXPECT_FALSE(islower('A'));
-  EXPECT_FALSE(islower('!'));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= 'a' && i <= 'z') {
+      EXPECT_TRUE(islower(i)) << i;
+    } else {
+      EXPECT_FALSE(islower(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, islower_l) {
-  EXPECT_TRUE(islower_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(islower_l('A', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(islower_l('!', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= 'a' && i <= 'z') {
+      EXPECT_TRUE(islower_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(islower_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, isprint) {
-  EXPECT_TRUE(isprint('a'));
-  EXPECT_TRUE(isprint(' '));
-  EXPECT_FALSE(isprint('\b'));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= ' ' && i <= '~') {
+      EXPECT_TRUE(isprint(i)) << i;
+    } else {
+      EXPECT_FALSE(isprint(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isprint_l) {
-  EXPECT_TRUE(isprint_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isprint_l(' ', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isprint_l('\b', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= ' ' && i <= '~') {
+      EXPECT_TRUE(isprint_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isprint_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, ispunct) {
-  EXPECT_TRUE(ispunct('!'));
-  EXPECT_FALSE(ispunct('a'));
-  EXPECT_FALSE(ispunct(' '));
-  EXPECT_FALSE(ispunct('\b'));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= '!' && i <= '/') ||
+        (i >= ':' && i <= '@') ||
+        (i >= '[' && i <= '`') ||
+        (i >= '{' && i <= '~')) {
+      EXPECT_TRUE(ispunct(i)) << i;
+    } else {
+      EXPECT_FALSE(ispunct(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, ispunct_l) {
-  EXPECT_TRUE(ispunct_l('!', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(ispunct_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(ispunct_l(' ', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(ispunct_l('\b', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= '!' && i <= '/') ||
+        (i >= ':' && i <= '@') ||
+        (i >= '[' && i <= '`') ||
+        (i >= '{' && i <= '~')) {
+      EXPECT_TRUE(ispunct_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(ispunct_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, isspace) {
-  EXPECT_TRUE(isspace(' '));
-  EXPECT_TRUE(isspace('\f'));
-  EXPECT_TRUE(isspace('\n'));
-  EXPECT_TRUE(isspace('\r'));
-  EXPECT_TRUE(isspace('\t'));
-  EXPECT_TRUE(isspace('\v'));
-  EXPECT_FALSE(isspace('a'));
-  EXPECT_FALSE(isspace('!'));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= '\t' && i <= '\r') || i == ' ') {
+      EXPECT_TRUE(isspace(i)) << i;
+    } else {
+      EXPECT_FALSE(isspace(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isspace_l) {
-  EXPECT_TRUE(isspace_l(' ', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isspace_l('\f', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isspace_l('\n', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isspace_l('\r', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isspace_l('\t', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isspace_l('\v', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isspace_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isspace_l('!', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= '\t' && i <= '\r') || i == ' ') {
+      EXPECT_TRUE(isspace_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isspace_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, isupper) {
-  EXPECT_TRUE(isupper('A'));
-  EXPECT_FALSE(isupper('a'));
-  EXPECT_FALSE(isupper('!'));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= 'A' && i <= 'Z') {
+      EXPECT_TRUE(isupper(i)) << i;
+    } else {
+      EXPECT_FALSE(isupper(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isupper_l) {
-  EXPECT_TRUE(isupper_l('A', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isupper_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isupper_l('!', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if (i >= 'A' && i <= 'Z') {
+      EXPECT_TRUE(isupper_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isupper_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, isxdigit) {
-  EXPECT_TRUE(isxdigit('0'));
-  EXPECT_FALSE(isxdigit('x'));
-  EXPECT_TRUE(isxdigit('1'));
-  EXPECT_TRUE(isxdigit('a'));
-  EXPECT_TRUE(isxdigit('A'));
-  EXPECT_FALSE(isxdigit('g'));
-  EXPECT_FALSE(isxdigit(' '));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= '0' && i <= '9') ||
+        (i >= 'A' && i <= 'F') ||
+        (i >= 'a' && i <= 'f')) {
+      EXPECT_TRUE(isxdigit(i)) << i;
+    } else {
+      EXPECT_FALSE(isxdigit(i)) << i;
+    }
+  }
 }
 
 TEST(ctype, isxdigit_l) {
-  EXPECT_TRUE(isxdigit_l('0', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isxdigit_l('x', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isxdigit_l('1', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isxdigit_l('a', LC_GLOBAL_LOCALE));
-  EXPECT_TRUE(isxdigit_l('A', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isxdigit_l('g', LC_GLOBAL_LOCALE));
-  EXPECT_FALSE(isxdigit_l(' ', LC_GLOBAL_LOCALE));
+  for (int i = kMin; i < kMax; ++i) {
+    if ((i >= '0' && i <= '9') ||
+        (i >= 'A' && i <= 'F') ||
+        (i >= 'a' && i <= 'f')) {
+      EXPECT_TRUE(isxdigit_l(i, LC_GLOBAL_LOCALE)) << i;
+    } else {
+      EXPECT_FALSE(isxdigit_l(i, LC_GLOBAL_LOCALE)) << i;
+    }
+  }
 }
 
 TEST(ctype, toascii) {