More scanf cleanup.

Merge CT_CCL and CT_STRING handling before we add %m.

Also fix an accidental scanf/wscanf difference.

Add currently-disabled tests for questionable behavior noticed during
code review that isn't a regression, but should be fixed later.

Bug: http://b/68672236
Bug: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=202240
Test: ran tests
Change-Id: I3eec9b7dfce84f63c68426406224822c52551d64
diff --git a/tests/stdio_test.cpp b/tests/stdio_test.cpp
index f0e0ab6..e060cd9 100644
--- a/tests/stdio_test.cpp
+++ b/tests/stdio_test.cpp
@@ -1016,6 +1016,95 @@
   CheckScanf(swscanf, L"+,-/.", L"%[+--/]", 1, "+,-/");
 }
 
+// https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=202240
+TEST(STDIO_TEST, scanf_wscanf_EOF) {
+  EXPECT_EQ(0, sscanf("b", "ab"));
+  EXPECT_EQ(EOF, sscanf("", "a"));
+  EXPECT_EQ(0, swscanf(L"b", L"ab"));
+  EXPECT_EQ(EOF, swscanf(L"", L"a"));
+}
+
+TEST(STDIO_TEST, scanf_invalid_UTF8) {
+#if 0 // TODO: more tests invented during code review; no regressions, so fix later.
+  char buf[BUFSIZ];
+  wchar_t wbuf[BUFSIZ];
+
+  memset(buf, 0, sizeof(buf));
+  memset(wbuf, 0, sizeof(wbuf));
+  EXPECT_EQ(0, sscanf("\xc0" " foo", "%ls %s", wbuf, buf));
+#endif
+}
+
+TEST(STDIO_TEST, scanf_no_match_no_termination) {
+  char buf[4] = "x";
+  EXPECT_EQ(0, sscanf("d", "%[abc]", buf));
+  EXPECT_EQ('x', buf[0]);
+  EXPECT_EQ(0, swscanf(L"d", L"%[abc]", buf));
+  EXPECT_EQ('x', buf[0]);
+
+  wchar_t wbuf[4] = L"x";
+  EXPECT_EQ(0, swscanf(L"d", L"%l[abc]", wbuf));
+  EXPECT_EQ(L'x', wbuf[0]);
+
+  EXPECT_EQ(EOF, sscanf("", "%s", buf));
+  EXPECT_EQ('x', buf[0]);
+
+  EXPECT_EQ(EOF, swscanf(L"", L"%ls", wbuf));
+  EXPECT_EQ(L'x', wbuf[0]);
+}
+
+TEST(STDIO_TEST, scanf_wscanf_wide_character_class) {
+#if 0 // TODO: more tests invented during code review; no regressions, so fix later.
+  wchar_t buf[BUFSIZ];
+
+  // A wide character shouldn't match an ASCII-only class for scanf or wscanf.
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(1, sscanf("xĀyz", "%l[xy]", buf));
+  EXPECT_EQ(L"x"s, std::wstring(buf));
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(1, swscanf(L"xĀyz", L"%l[xy]", buf));
+  EXPECT_EQ(L"x"s, std::wstring(buf));
+
+  // Even if scanf has wide characters in a class, they won't match...
+  // TODO: is that a bug?
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(1, sscanf("xĀyz", "%l[xĀy]", buf));
+  EXPECT_EQ(L"x"s, std::wstring(buf));
+  // ...unless you use wscanf.
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(1, swscanf(L"xĀyz", L"%l[xĀy]", buf));
+  EXPECT_EQ(L"xĀy"s, std::wstring(buf));
+
+  // Negation only covers ASCII for scanf...
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(1, sscanf("xĀyz", "%l[^ab]", buf));
+  EXPECT_EQ(L"x"s, std::wstring(buf));
+  // ...but covers wide characters for wscanf.
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(1, swscanf(L"xĀyz", L"%l[^ab]", buf));
+  EXPECT_EQ(L"xĀyz"s, std::wstring(buf));
+
+  // We already determined that non-ASCII characters are ignored in scanf classes.
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(1, sscanf("x"
+                      "\xc4\x80" // Matches a byte from each wide char in the class.
+                      "\xc6\x82" // Neither byte is in the class.
+                      "yz",
+                      "%l[xy" "\xc5\x80" "\xc4\x81" "]", buf));
+  EXPECT_EQ(L"x", std::wstring(buf));
+  // bionic and glibc both behave badly for wscanf, so let's call it right for now...
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(1, swscanf(L"x"
+                       L"\xc4\x80"
+                       L"\xc6\x82"
+                       L"yz",
+                       L"%l[xy" L"\xc5\x80" L"\xc4\x81" L"]", buf));
+  // Note that this isn't L"xĀ" --- although the *bytes* matched, they're
+  // not put back together as a wide character.
+  EXPECT_EQ(L"x" L"\xc4" L"\x80", std::wstring(buf));
+#endif
+}
+
 TEST(STDIO_TEST, cantwrite_EBADF) {
   // If we open a file read-only...
   FILE* fp = fopen("/proc/version", "r");