Fix some long-standing UTF-8 bugs. We we incorrectly rejecting U+fffe and U+ffff, and incorrectly accepting characters above U+10ffff (see https://tools.ietf.org/html/rfc3629 section 12 for that restriction). Bug: http://lists.landley.net/pipermail/toybox-landley.net/2017-September/009146.html Test: ran tests Test: also ran the exhaustive test from that email thread Change-Id: I8ae8e41cef01b02933bd4f653ee07791932b79a5

commit: 402c762fc93239d86206a3bded8c17f19dabcd4c [log] [tgz]
author: Elliott Hughes <enh@google.com> Fri Jul 06 17:18:05 2018 -0700
committer: Elliott Hughes <enh@google.com> Fri Jul 06 17:18:05 2018 -0700
tree: 638213ab024e8feffc9ef9e171345e33f63fa0a3
parent: 50acae8f2ae017c49b1d616e93ce9f97f6b3d118 [diff] [blame]
diff --git a/tests/uchar_test.cpp b/tests/uchar_test.cpp
index 8b29667..2b4c4d3 100644
--- a/tests/uchar_test.cpp
+++ b/tests/uchar_test.cpp

@@ -322,6 +322,35 @@
 #endif
 }
 
+TEST(uchar, mbrtoc32_valid_non_characters) {
+#if HAVE_UCHAR
+  ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
+  uselocale(LC_GLOBAL_LOCALE);
+
+  char32_t out[8] = {};
+  ASSERT_EQ(3U, mbrtoc32(out, "\xef\xbf\xbe", 3, nullptr));
+  ASSERT_EQ(0xfffeU, out[0]);
+  ASSERT_EQ(3U, mbrtoc32(out, "\xef\xbf\xbf", 3, nullptr));
+  ASSERT_EQ(0xffffU, out[0]);
+#else
+  GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
+#endif
+}
+
+TEST(uchar, mbrtoc32_out_of_range) {
+#if HAVE_UCHAR
+  ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
+  uselocale(LC_GLOBAL_LOCALE);
+
+  char32_t out[8] = {};
+  errno = 0;
+  ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf5\x80\x80\x80", 4, nullptr));
+  ASSERT_EQ(EILSEQ, errno);
+#else
+  GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
+#endif
+}
+
 TEST(uchar, mbrtoc32) {
 #if HAVE_UCHAR
   char32_t out[8];
commit	402c762fc93239d86206a3bded8c17f19dabcd4c	[log] [tgz]
author	Elliott Hughes <enh@google.com>	Fri Jul 06 17:18:05 2018 -0700
committer	Elliott Hughes <enh@google.com>	Fri Jul 06 17:18:05 2018 -0700
tree	638213ab024e8feffc9ef9e171345e33f63fa0a3
parent	50acae8f2ae017c49b1d616e93ce9f97f6b3d118 [diff] [blame]