10x printf speedup.

Android is UTF-8. Don't make everyone pay to convert UTF-8 to ASCII just
so we can recognize '%'. With UTF-8 we can just strchr forwards.

Before:

  ---------------------------------------------------------------
  Benchmark                        Time           CPU Iterations
  ---------------------------------------------------------------
  BM_stdio_printf_literal       1290 ns       1290 ns     442554
  BM_stdio_printf_s             1204 ns       1204 ns     582446
  BM_stdio_printf_d             1206 ns       1206 ns     578311
  BM_stdio_printf_1$s           2263 ns       2263 ns     310002

After:

  ---------------------------------------------------------------
  Benchmark                        Time           CPU Iterations
  ---------------------------------------------------------------
  BM_stdio_printf_literal        178 ns        178 ns    3394001
  BM_stdio_printf_s              246 ns        246 ns    2850284
  BM_stdio_printf_d              252 ns        252 ns    2778610
  BM_stdio_printf_1$s            363 ns        363 ns    1929011

Add missing __find_arguments error checking to the wide variant to match
the regular one.

Also replace various char/wchar_t differences with the macro.

Bug: http://b/67371539
Test: ran tests
Change-Id: I18f122009c22699943ab5d666a98ea594a972c40
diff --git a/benchmarks/stdio_benchmark.cpp b/benchmarks/stdio_benchmark.cpp
index 97a03dc..76e9ddb 100644
--- a/benchmarks/stdio_benchmark.cpp
+++ b/benchmarks/stdio_benchmark.cpp
@@ -196,3 +196,12 @@
   }
 }
 BIONIC_BENCHMARK(BM_stdio_printf_d);
+
+static void BM_stdio_printf_1$s(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    char buf[BUFSIZ];
+    snprintf(buf, sizeof(buf), "this is a more typical error message with detail: %1$s",
+             "No such file or directory");
+  }
+}
+BIONIC_BENCHMARK(BM_stdio_printf_1$s);
diff --git a/benchmarks/suites/full.xml b/benchmarks/suites/full.xml
index 240b5e7..9bfd6ff 100644
--- a/benchmarks/suites/full.xml
+++ b/benchmarks/suites/full.xml
@@ -198,6 +198,9 @@
   <name>BM_stdio_printf_d</name>
 </fn>
 <fn>
+  <name>BM_stdio_printf_1$s</name>
+</fn>
+<fn>
   <name>BM_string_memcmp</name>
   <args>AT_ALIGNED_TWOBUF</args>
 </fn>
diff --git a/libc/stdio/vfprintf.cpp b/libc/stdio/vfprintf.cpp
index 10303d9..366b196 100644
--- a/libc/stdio/vfprintf.cpp
+++ b/libc/stdio/vfprintf.cpp
@@ -251,7 +251,6 @@
 #define MAXINT 0x1000   /* largest integer size (intmax_t) */
 
 int __vfprintf(FILE* fp, const char* fmt0, __va_list ap) {
-  char* fmt;           /* format string */
   int ch;              /* character from fmt */
   int n, n2;           /* handy integers (short term usage) */
   char* cp;            /* handy char pointer (short term usage) */
@@ -261,7 +260,6 @@
   int width;           /* width from format (%8d), or 0 */
   int prec;            /* precision from format; <0 for N/A */
   char sign;           /* sign prefix (' ', '+', '-', or \0) */
-  wchar_t wc;
   mbstate_t ps;
   /*
    * We can decompose the printed representation of floating
@@ -316,10 +314,12 @@
    * below longer.
    */
 #define PADSIZE 16 /* pad chunk size */
-  static char blanks[PADSIZE] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-                                  ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' };
-  static char zeroes[PADSIZE] = { '0', '0', '0', '0', '0', '0', '0', '0',
-                                  '0', '0', '0', '0', '0', '0', '0', '0' };
+  static CHAR_TYPE blanks[PADSIZE] = {
+    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '
+  };
+  static CHAR_TYPE zeroes[PADSIZE] = {
+    '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'
+  };
 
   static const char xdigs_lower[] = "0123456789abcdef";
   static const char xdigs_upper[] = "0123456789ABCDEF";
@@ -444,17 +444,19 @@
   ((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : (nextarg++, va_arg(ap, type)))
 
   _SET_ORIENTATION(fp, -1);
-  /* sorry, fprintf(read_only_file, "") returns EOF, not 0 */
+
+  // Writing "" to a read only file returns EOF, not 0.
   if (cantwrite(fp)) {
     errno = EBADF;
-    return (EOF);
+    return EOF;
   }
 
-  /* optimise fprintf(stderr) (and other unbuffered Unix files) */
-  if ((fp->_flags & (__SNBF | __SWR | __SRW)) == (__SNBF | __SWR) && fp->_file >= 0)
+  // Optimize writes to stderr and other unbuffered files).
+  if ((fp->_flags & (__SNBF | __SWR | __SRW)) == (__SNBF | __SWR) && fp->_file >= 0) {
     return (__sbprintf(fp, fmt0, ap));
+  }
 
-  fmt = (char*)fmt0;
+  CHAR_TYPE* fmt = const_cast<CHAR_TYPE*>(fmt0);
   argtable = NULL;
   nextarg = 1;
   va_copy(orgap, ap);
@@ -469,25 +471,14 @@
    * Scan the format for conversions (`%' character).
    */
   for (;;) {
-    cp = fmt;
-    while ((n = mbrtowc(&wc, fmt, MB_CUR_MAX, &ps)) > 0) {
-      fmt += n;
-      if (wc == '%') {
-        fmt--;
-        break;
-      }
-    }
-    if (n < 0) {
-      ret = -1;
-      goto error;
-    }
+    for (cp = fmt; (ch = *fmt) != '\0' && ch != '%'; fmt++) continue;
     if (fmt != cp) {
       ptrdiff_t m = fmt - cp;
       if (m < 0 || m > INT_MAX - ret) goto overflow;
       PRINT(cp, m);
       ret += m;
     }
-    if (n == 0) goto done;
+    if (ch == '\0') goto done;
     fmt++; /* skip over '%' */
 
     flags = 0;
@@ -1062,12 +1053,10 @@
  * used since we are attempting to make snprintf thread safe, and alloca is
  * problematic since we have nested functions..)
  */
-static int __find_arguments(const char* fmt0, va_list ap, union arg** argtable,
+static int __find_arguments(const CHAR_TYPE* fmt0, va_list ap, union arg** argtable,
                             size_t* argtablesiz) {
-  char* fmt;                /* format string */
   int ch;                   /* character from fmt */
   int n, n2;                /* handy integer (short term usage) */
-  char* cp;                 /* handy char pointer (short term usage) */
   int flags;                /* flags as above */
   unsigned char* typetable; /* table of types */
   unsigned char stattypetable[STATIC_ARG_TBL_SIZE];
@@ -1075,8 +1064,6 @@
   int tablemax;  /* largest used index in table */
   int nextarg;   /* 1-based argument index */
   int ret = 0;   /* return value */
-  wchar_t wc;
-  mbstate_t ps;
 
   /*
    * Add an argument type to the table, expanding if necessary.
@@ -1135,28 +1122,20 @@
   } else {                 \
     ADDTYPE(T_INT);        \
   }
-  fmt = (char*)fmt0;
+  CHAR_TYPE* fmt = const_cast<CHAR_TYPE*>(fmt0);
+  CHAR_TYPE* cp;
   typetable = stattypetable;
   tablesize = STATIC_ARG_TBL_SIZE;
   tablemax = 0;
   nextarg = 1;
   memset(typetable, T_UNUSED, STATIC_ARG_TBL_SIZE);
-  memset(&ps, 0, sizeof(ps));
 
   /*
    * Scan the format for conversions (`%' character).
    */
   for (;;) {
-    cp = fmt;
-    while ((n = mbrtowc(&wc, fmt, MB_CUR_MAX, &ps)) > 0) {
-      fmt += n;
-      if (wc == '%') {
-        fmt--;
-        break;
-      }
-    }
-    if (n < 0) return (-1);
-    if (n == 0) goto done;
+    for (cp = fmt; (ch = *fmt) != '\0' && ch != '%'; fmt++) continue;
+    if (ch == '\0') goto done;
     fmt++; /* skip over '%' */
 
     flags = 0;
diff --git a/libc/stdio/vfwprintf.cpp b/libc/stdio/vfwprintf.cpp
index 38acccf..7235969 100644
--- a/libc/stdio/vfwprintf.cpp
+++ b/libc/stdio/vfwprintf.cpp
@@ -277,7 +277,6 @@
 #define MAXINT 0x1000   /* largest integer size (intmax_t) */
 
 int __vfwprintf(FILE* __restrict fp, const wchar_t* __restrict fmt0, __va_list ap) {
-  wchar_t* fmt;  /* format string */
   wchar_t ch;    /* character from fmt */
   int n, n2, n3; /* handy integers (short term usage) */
   wchar_t* cp;   /* handy char pointer (short term usage) */
@@ -336,10 +335,12 @@
    * below longer.
    */
 #define PADSIZE 16 /* pad chunk size */
-  static wchar_t blanks[PADSIZE] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-                                     ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' };
-  static wchar_t zeroes[PADSIZE] = { '0', '0', '0', '0', '0', '0', '0', '0',
-                                     '0', '0', '0', '0', '0', '0', '0', '0' };
+  static CHAR_TYPE blanks[PADSIZE] = {
+    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '
+  };
+  static CHAR_TYPE zeroes[PADSIZE] = {
+    '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'
+  };
 
   static const char xdigs_lower[] = "0123456789abcdef";
   static const char xdigs_upper[] = "0123456789ABCDEF";
@@ -421,25 +422,28 @@
    * Get * arguments, including the form *nn$.  Preserve the nextarg
    * that the argument can be gotten once the type is determined.
    */
-#define GETASTER(val)                                         \
-  n2 = 0;                                                     \
-  cp = fmt;                                                   \
-  while (is_digit(*cp)) {                                     \
-    APPEND_DIGIT(n2, *cp);                                    \
-    cp++;                                                     \
-  }                                                           \
-  if (*cp == '$') {                                           \
-    int hold = nextarg;                                       \
-    if (argtable == NULL) {                                   \
-      argtable = statargtable;                                \
-      __find_arguments(fmt0, orgap, &argtable, &argtablesiz); \
-    }                                                         \
-    nextarg = n2;                                             \
-    val = GETARG(int);                                        \
-    nextarg = hold;                                           \
-    fmt = ++cp;                                               \
-  } else {                                                    \
-    val = GETARG(int);                                        \
+#define GETASTER(val)                                                     \
+  n2 = 0;                                                                 \
+  cp = fmt;                                                               \
+  while (is_digit(*cp)) {                                                 \
+    APPEND_DIGIT(n2, *cp);                                                \
+    cp++;                                                                 \
+  }                                                                       \
+  if (*cp == '$') {                                                       \
+    int hold = nextarg;                                                   \
+    if (argtable == NULL) {                                               \
+      argtable = statargtable;                                            \
+      if (__find_arguments(fmt0, orgap, &argtable, &argtablesiz) == -1) { \
+        ret = -1;                                                         \
+        goto error;                                                       \
+      }                                                                   \
+    }                                                                     \
+    nextarg = n2;                                                         \
+    val = GETARG(int);                                                    \
+    nextarg = hold;                                                       \
+    fmt = ++cp;                                                           \
+  } else {                                                                \
+    val = GETARG(int);                                                    \
   }
 
 /*
@@ -451,17 +455,19 @@
   ((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : (nextarg++, va_arg(ap, type)))
 
   _SET_ORIENTATION(fp, 1);
-  /* sorry, fwprintf(read_only_file, "") returns EOF, not 0 */
+
+  // Writing "" to a read only file returns EOF, not 0.
   if (cantwrite(fp)) {
     errno = EBADF;
-    return (EOF);
+    return EOF;
   }
 
-  /* optimise fwprintf(stderr) (and other unbuffered Unix files) */
-  if ((fp->_flags & (__SNBF | __SWR | __SRW)) == (__SNBF | __SWR) && fp->_file >= 0)
+  // Optimize writes to stderr and other unbuffered files).
+  if ((fp->_flags & (__SNBF | __SWR | __SRW)) == (__SNBF | __SWR) && fp->_file >= 0) {
     return (__sbprintf(fp, fmt0, ap));
+  }
 
-  fmt = (wchar_t*)fmt0;
+  CHAR_TYPE* fmt = const_cast<CHAR_TYPE*>(fmt0);
   argtable = NULL;
   nextarg = 1;
   va_copy(orgap, ap);
@@ -540,7 +546,10 @@
           nextarg = n;
           if (argtable == NULL) {
             argtable = statargtable;
-            __find_arguments(fmt0, orgap, &argtable, &argtablesiz);
+            if (__find_arguments(fmt0, orgap, &argtable, &argtablesiz) == -1) {
+              ret = -1;
+              goto error;
+            }
           }
           goto rflag;
         }
@@ -572,7 +581,10 @@
           nextarg = n;
           if (argtable == NULL) {
             argtable = statargtable;
-            __find_arguments(fmt0, orgap, &argtable, &argtablesiz);
+            if (__find_arguments(fmt0, orgap, &argtable, &argtablesiz) == -1) {
+              ret = -1;
+              goto error;
+            }
           }
           goto rflag;
         }
@@ -1037,12 +1049,10 @@
  * used since we are attempting to make snprintf thread safe, and alloca is
  * problematic since we have nested functions..)
  */
-static int __find_arguments(const wchar_t* fmt0, va_list ap, union arg** argtable,
+static int __find_arguments(const CHAR_TYPE* fmt0, va_list ap, union arg** argtable,
                             size_t* argtablesiz) {
-  wchar_t* fmt;             /* format string */
   int ch;                   /* character from fmt */
   int n, n2;                /* handy integer (short term usage) */
-  wchar_t* cp;              /* handy char pointer (short term usage) */
   int flags;                /* flags as above */
   unsigned char* typetable; /* table of types */
   unsigned char stattypetable[STATIC_ARG_TBL_SIZE];
@@ -1108,7 +1118,8 @@
   } else {                 \
     ADDTYPE(T_INT);        \
   }
-  fmt = (wchar_t*)fmt0;
+  CHAR_TYPE* fmt = const_cast<CHAR_TYPE*>(fmt0);
+  CHAR_TYPE* cp;
   typetable = stattypetable;
   tablesize = STATIC_ARG_TBL_SIZE;
   tablemax = 0;