Merge "Disable uchar.start_state test for musl." into main
diff --git a/tests/uchar_test.cpp b/tests/uchar_test.cpp
index 9c2d219..9e42f58 100644
--- a/tests/uchar_test.cpp
+++ b/tests/uchar_test.cpp
@@ -71,19 +71,42 @@
}
TEST(uchar, start_state) {
+ // C23 does not appear to specify the behavior of the conversion functions if
+ // a state is reused before the character is completed. In the wchar.h section
+ // (7.31.6.3) it says:
+ //
+ // If an mbstate_t object has been altered by any of the functions
+ // described in this subclause, and is then used with a different
+ // multibyte character sequence, or in the other conversion direction, or
+ // with a different LC_CTYPE category setting than on earlier function
+ // calls, the behavior is undefined.
+ //
+ // But "described in this subclause" refers to the wchar.h functions, not the
+ // uchar.h ones.
+ //
+ // Since C has no opinion, we need to make a choice. While no caller should
+ // ever do this (what does it mean to begin decoding a UTF-32 character while
+ // still in the middle of a UTF-8 sequence?), considering that a decoding
+ // error seems the least surprising. Bionic and glibc both have that behavior.
+ // musl ignores the state (it also doesn't make much sense to read the state
+ // when the entire conversion completes in a single call) and decodes the
+ // UTF-32 character.
+#if !defined(ANDROID_HOST_MUSL)
ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
uselocale(LC_GLOBAL_LOCALE);
char out[MB_LEN_MAX];
mbstate_t ps;
- // Any non-initial state is invalid when calling c32rtomb.
memset(&ps, 0, sizeof(ps));
EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "\xc2", 1, &ps));
errno = 0;
EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(out, 0x00a2, &ps));
EXPECT_EQ(EILSEQ, errno);
+ // Similarly (but not in compliance with the standard afaict), musl seems to
+ // ignore the state entirely for the UTF-32 functions rather than reset it.
+
// If the first argument to c32rtomb is nullptr or the second is L'\0' the shift
// state should be reset.
memset(&ps, 0, sizeof(ps));
@@ -95,6 +118,7 @@
EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "\xf0\xa4", 1, &ps));
EXPECT_EQ(1U, c32rtomb(out, L'\0', &ps));
EXPECT_TRUE(mbsinit(&ps));
+#endif
}
TEST(uchar, c16rtomb_null_out) {