Add sscanf %[ tests and fix the bug that fell out.

Strictly, POSIX says "If a '-' is in the scanlist and is not the first
wide character, nor the second where the first wide character is a '^',
nor the last wide character, the behavior is implementation-defined",
but it seems unreasonable for swscanf to interpret `a-c` differently
from sscanf. Make ours behave the same as each other by making swscanf
work the same as sscanf.

Bug: http://b/68672236
Test: ran tests
Change-Id: Ia84805897628d7128e901b468e02504373730e61
diff --git a/libc/stdio/vfscanf.c b/libc/stdio/vfscanf.c
index 853e0cf..887b435 100644
--- a/libc/stdio/vfscanf.c
+++ b/libc/stdio/vfscanf.c
@@ -83,9 +83,6 @@
 #define CT_INT 3    /* integer, i.e., strtoimax or strtoumax */
 #define CT_FLOAT 4  /* floating, i.e., strtod */
 
-#define u_char unsigned char
-#define u_long unsigned long
-
 static u_char* __sccl(char*, u_char*);
 
 /*
@@ -192,9 +189,6 @@
       /*
        * Conversions.
        * Those marked `compat' are for 4.[123]BSD compatibility.
-       *
-       * (According to ANSI, E and X formats are supposed
-       * to the same as e and x.  Sorry about that.)
        */
       case 'D': /* compat */
         flags |= LONG;
diff --git a/libc/stdio/vfwscanf.c b/libc/stdio/vfwscanf.c
index 206f4a2..1030a62 100644
--- a/libc/stdio/vfwscanf.c
+++ b/libc/stdio/vfwscanf.c
@@ -84,12 +84,43 @@
 #define CT_INT 3    /* integer, i.e., strtoimax or strtoumax */
 #define CT_FLOAT 4  /* floating, i.e., strtod */
 
-#define u_char unsigned char
-#define u_long unsigned long
+// An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
+// be a little too expensive, and some kind of compressed version isn't worth the trouble.
+static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
+  // Is this a negated set?
+  bool member_result = true;
+  if (*ccl == '^') {
+    member_result = false;
+    ++ccl;
+  }
 
-#define INCCL(_c)                                        \
-  (cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) \
-            : (wmemchr(ccls, (_c), ccle - ccls) != NULL))
+  // The first character may be ']' or '-' without being special.
+  if (*ccl == '-' || *ccl == ']') {
+    // A literal match?
+    if (*ccl == wc) return member_result;
+    ++ccl;
+  }
+
+  while (*ccl && *ccl != ']') {
+    // The last character may be '-' without being special.
+    if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
+      wchar_t first = *(ccl - 1);
+      wchar_t last = *(ccl + 1);
+      if (first <= last) {
+        // In the range?
+        if (wc >= first && wc <= last) return member_result;
+        ccl += 2;
+        continue;
+      }
+      // A '-' is not considered to be part of a range if the character after
+      // is not greater than the character before, so fall through...
+    }
+    // A literal match?
+    if (*ccl == wc) return member_result;
+    ++ccl;
+  }
+  return !member_result;
+}
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
@@ -109,9 +140,7 @@
   int nread;              /* number of characters consumed from fp */
   int base;               /* base argument to strtoimax/strtouimax */
   wchar_t buf[BUF];       /* buffer for numeric conversions */
-  const wchar_t* ccls;    /* character class start */
-  const wchar_t* ccle;    /* character class end */
-  int cclcompl;           /* ccl is complemented? */
+  const wchar_t* ccl;
   wint_t wi;              /* handy wint_t */
   char* mbp;              /* multibyte string pointer for %c %s %[ */
   size_t nconv;           /* number of bytes in mb. conversion */
@@ -127,7 +156,6 @@
   nconversions = 0;
   nread = 0;
   base = 0; /* XXX just to keep gcc happy */
-  ccls = ccle = NULL;
   for (;;) {
     c = *fmt++;
     if (c == 0) {
@@ -210,9 +238,6 @@
       /*
        * Conversions.
        * Those marked `compat' are for 4.[123]BSD compatibility.
-       *
-       * (According to ANSI, E and X formats are supposed
-       * to the same as e and x.  Sorry about that.)
        */
       case 'D': /* compat */
         flags |= LONG;
@@ -266,15 +291,10 @@
         break;
 
       case '[':
-        ccls = fmt;
-        if (*fmt == '^') {
-          cclcompl = 1;
-          fmt++;
-        } else
-          cclcompl = 0;
+        ccl = fmt;
+        if (*fmt == '^') fmt++;
         if (*fmt == ']') fmt++;
         while (*fmt != '\0' && *fmt != ']') fmt++;
-        ccle = fmt;
         fmt++;
         flags |= NOSKIP;
         c = CT_CCL;
@@ -387,12 +407,12 @@
         /* take only those things in the class */
         if ((flags & SUPPRESS) && (flags & LONG)) {
           n = 0;
-          while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && INCCL(wi)) n++;
+          while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && in_ccl(wi, ccl)) n++;
           if (wi != WEOF) __ungetwc(wi, fp);
           if (n == 0) goto match_failure;
         } else if (flags & LONG) {
           p0 = p = va_arg(ap, wchar_t*);
-          while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && INCCL(wi))
+          while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && in_ccl(wi, ccl))
             *p++ = (wchar_t)wi;
           if (wi != WEOF) __ungetwc(wi, fp);
           n = p - p0;
@@ -403,7 +423,7 @@
           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
           n = 0;
           memset(&mbs, 0, sizeof(mbs));
-          while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && INCCL(wi)) {
+          while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && in_ccl(wi, ccl)) {
             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
               nconv = wcrtomb(mbp, wi, &mbs);
               if (nconv == (size_t)-1) goto input_failure;
@@ -418,6 +438,7 @@
             n++;
           }
           if (wi != WEOF) __ungetwc(wi, fp);
+          if (n == 0) goto match_failure;
           if (!(flags & SUPPRESS)) {
             *mbp = 0;
             nassigned++;
diff --git a/tests/stdio_test.cpp b/tests/stdio_test.cpp
index 05a19d1..f0e0ab6 100644
--- a/tests/stdio_test.cpp
+++ b/tests/stdio_test.cpp
@@ -923,33 +923,99 @@
 TEST(STDIO_TEST, sscanf_swscanf) {
   struct stuff {
     char s1[123];
-    int i1;
+    int i1, i2;
+    char cs1[3];
+    char s2[3];
+    char c1;
     double d1;
     float f1;
-    char s2[123];
+    char s3[123];
 
     void Check() {
-      ASSERT_STREQ("hello", s1);
-      ASSERT_EQ(123, i1);
-      ASSERT_DOUBLE_EQ(1.23, d1);
-      ASSERT_FLOAT_EQ(9.0f, f1);
-      ASSERT_STREQ("world", s2);
+      EXPECT_STREQ("hello", s1);
+      EXPECT_EQ(123, i1);
+      EXPECT_EQ(456, i2);
+      EXPECT_EQ('a', cs1[0]);
+      EXPECT_EQ('b', cs1[1]);
+      EXPECT_EQ('x', cs1[2]); // No terminating NUL.
+      EXPECT_STREQ("AB", s2); // Terminating NUL.
+      EXPECT_EQ('!', c1);
+      EXPECT_DOUBLE_EQ(1.23, d1);
+      EXPECT_FLOAT_EQ(9.0f, f1);
+      EXPECT_STREQ("world", s3);
     }
   } s;
 
-  memset(&s, 0, sizeof(s));
-  ASSERT_EQ(5, sscanf("  hello 123 1.23 0x1.2p3 world",
-                      "%s %i %lf %f %s",
-                      s.s1, &s.i1, &s.d1, &s.f1, s.s2));
+  memset(&s, 'x', sizeof(s));
+  ASSERT_EQ(9, sscanf("  hello 123 456abAB! 1.23 0x1.2p3 world",
+                      "%s %i%i%2c%[A-Z]%c %lf %f %s",
+                      s.s1, &s.i1, &s.i2, s.cs1, s.s2, &s.c1, &s.d1, &s.f1, s.s3));
   s.Check();
 
-  memset(&s, 0, sizeof(s));
-  ASSERT_EQ(5, swscanf(L"  hello 123 1.23 0x1.2p3 world",
-                       L"%s %i %lf %f %s",
-                       s.s1, &s.i1, &s.d1, &s.f1, s.s2));
+  memset(&s, 'x', sizeof(s));
+  ASSERT_EQ(9, swscanf(L"  hello 123 456abAB! 1.23 0x1.2p3 world",
+                       L"%s %i%i%2c%[A-Z]%c %lf %f %s",
+                       s.s1, &s.i1, &s.i2, s.cs1, s.s2, &s.c1, &s.d1, &s.f1, s.s3));
   s.Check();
 }
 
+template <typename T>
+static void CheckScanf(int sscanf_fn(const T*, const T*, ...),
+                       const T* input, const T* fmt,
+                       int expected_count, const char* expected_string) {
+  char buf[256] = {};
+  ASSERT_EQ(expected_count, sscanf_fn(input, fmt, &buf)) << fmt;
+  ASSERT_STREQ(expected_string, buf) << fmt;
+}
+
+TEST(STDIO_TEST, sscanf_ccl) {
+  // `abc` is just those characters.
+  CheckScanf(sscanf, "abcd", "%[abc]", 1, "abc");
+  // `a-c` is the range 'a' .. 'c'.
+  CheckScanf(sscanf, "abcd", "%[a-c]", 1, "abc");
+  CheckScanf(sscanf, "-d", "%[a-c]", 0, "");
+  CheckScanf(sscanf, "ac-bAd", "%[a--c]", 1, "ac-bA");
+  // `a-c-e` is equivalent to `a-e`.
+  CheckScanf(sscanf, "abcdefg", "%[a-c-e]", 1, "abcde");
+  // `e-a` is equivalent to `ae-` (because 'e' > 'a').
+  CheckScanf(sscanf, "-a-e-b", "%[e-a]", 1, "-a-e-");
+  // An initial '^' negates the set.
+  CheckScanf(sscanf, "abcde", "%[^d]", 1, "abc");
+  CheckScanf(sscanf, "abcdefgh", "%[^c-d]", 1, "ab");
+  CheckScanf(sscanf, "hgfedcba", "%[^c-d]", 1, "hgfe");
+  // The first character may be ']' or '-' without being special.
+  CheckScanf(sscanf, "[[]]x", "%[][]", 1, "[[]]");
+  CheckScanf(sscanf, "-a-x", "%[-a]", 1, "-a-");
+  // The last character may be '-' without being special.
+  CheckScanf(sscanf, "-a-x", "%[a-]", 1, "-a-");
+  // X--Y is [X--] + Y, not [X--] + [--Y] (a bug in my initial implementation).
+  CheckScanf(sscanf, "+,-/.", "%[+--/]", 1, "+,-/");
+}
+
+TEST(STDIO_TEST, swscanf_ccl) {
+  // `abc` is just those characters.
+  CheckScanf(swscanf, L"abcd", L"%[abc]", 1, "abc");
+  // `a-c` is the range 'a' .. 'c'.
+  CheckScanf(swscanf, L"abcd", L"%[a-c]", 1, "abc");
+  CheckScanf(swscanf, L"-d", L"%[a-c]", 0, "");
+  CheckScanf(swscanf, L"ac-bAd", L"%[a--c]", 1, "ac-bA");
+  // `a-c-e` is equivalent to `a-e`.
+  CheckScanf(swscanf, L"abcdefg", L"%[a-c-e]", 1, "abcde");
+  // `e-a` is equivalent to `ae-` (because 'e' > 'a').
+  CheckScanf(swscanf, L"-a-e-b", L"%[e-a]", 1, "-a-e-");
+  // An initial '^' negates the set.
+  CheckScanf(swscanf, L"abcde", L"%[^d]", 1, "abc");
+  CheckScanf(swscanf, L"abcdefgh", L"%[^c-d]", 1, "ab");
+  CheckScanf(swscanf, L"hgfedcba", L"%[^c-d]", 1, "hgfe");
+  // The first character may be ']' or '-' without being special.
+  CheckScanf(swscanf, L"[[]]x", L"%[][]", 1, "[[]]");
+  CheckScanf(swscanf, L"-a-x", L"%[-a]", 1, "-a-");
+  // The last character may be '-' without being special.
+  CheckScanf(swscanf, L"-a-x", L"%[a-]", 1, "-a-");
+  // X--Y is [X--] + Y, not [X--] + [--Y] (a bug in my initial implementation).
+  CheckScanf(swscanf, L"+,-/.", L"%[+--/]", 1, "+,-/");
+}
+
 TEST(STDIO_TEST, cantwrite_EBADF) {
   // If we open a file read-only...
   FILE* fp = fopen("/proc/version", "r");