Tests: add UTF8 string copying tests for multi-character truncation

The existing test only checked truncation for a single multi-byte
code-point.

Also add defines for 5 and 6 byte UTF8 code-points.
This commit is contained in:
Campbell Barton
2024-10-25 14:27:55 +11:00
parent 6ad22ce40f
commit 04aa583526

View File

@@ -54,6 +54,13 @@
#define STR_MB_ALPHA_3 "\xe0\xa0\x80"
#define STR_MB_ALPHA_4 "\xf0\x90\x80\x80"
/* These don't decode into valid code-points and wont work in all UTF8 functions.
* Use them for functions which support up to #BLI_UTF8_MAX, where failure to test
* 5 & 6 byte sequences would cause test coverage to be incomplete.
* See https://stackoverflow.com/a/35027139 for details. */
#define STR_MB_ALPHA_5 "\xf8\x80\x80\x80\x80"
#define STR_MB_ALPHA_6 "\xfc\x80\x80\x80\x80\x80"
/* -------------------------------------------------------------------- */
/** \name Test #BLI_str_utf8_invalid_strip
* \{ */
@@ -400,6 +407,106 @@ TEST(string, StrCopyUTF8_TruncateEncoding)
#undef STRNCPY_UTF8_TRUNCATE
}
TEST(string, StrCopyUTF8_TruncateEncodingMulti)
{
#define STRNCPY_UTF8_TRUNC_EXPECT(src, dst_expect, dst_maxncpy) \
{ \
char dst[dst_maxncpy + 1]; \
dst[dst_maxncpy] = 0xff; \
size_t len = BLI_strncpy_utf8_rlen(dst, src, dst_maxncpy); \
EXPECT_EQ(len, strlen(dst)); \
EXPECT_STREQ(dst, dst_expect); \
EXPECT_EQ(dst[dst_maxncpy], 0xff); \
}
/* Single characters. */
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_1, STR_MB_ALPHA_1, 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_1, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_2, STR_MB_ALPHA_2, 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_2, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_2, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3, STR_MB_ALPHA_3, 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3, "", 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4, STR_MB_ALPHA_4, 5);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4, "", 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4, "", 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5, STR_MB_ALPHA_5, 6);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5, "", 5);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5, "", 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5, "", 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6, STR_MB_ALPHA_6, 7);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6, "", 6);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6, "", 5);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6, "", 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6, "", 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6, "", 1);
/* Multiple characters. */
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_2 STR_MB_ALPHA_2, STR_MB_ALPHA_2 STR_MB_ALPHA_2, 5);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_2 STR_MB_ALPHA_2, STR_MB_ALPHA_2, 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_2 STR_MB_ALPHA_2, STR_MB_ALPHA_2, 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_2, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_2, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3 STR_MB_ALPHA_3, STR_MB_ALPHA_3 STR_MB_ALPHA_3, 7);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3 STR_MB_ALPHA_3, STR_MB_ALPHA_3, 6);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3 STR_MB_ALPHA_3, STR_MB_ALPHA_3, 5);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3 STR_MB_ALPHA_3, STR_MB_ALPHA_3, 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3 STR_MB_ALPHA_3, "", 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3 STR_MB_ALPHA_3, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_3 STR_MB_ALPHA_3, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, STR_MB_ALPHA_4 STR_MB_ALPHA_4, 9);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, STR_MB_ALPHA_4, 8);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, STR_MB_ALPHA_4, 7);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, STR_MB_ALPHA_4, 6);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, STR_MB_ALPHA_4, 5);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, "", 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, "", 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_4 STR_MB_ALPHA_4, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, STR_MB_ALPHA_5 STR_MB_ALPHA_5, 11);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, STR_MB_ALPHA_5, 10);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, STR_MB_ALPHA_5, 9);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, STR_MB_ALPHA_5, 8);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, STR_MB_ALPHA_5, 7);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, STR_MB_ALPHA_5, 6);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, "", 5);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, "", 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, "", 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_5 STR_MB_ALPHA_5, "", 1);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, STR_MB_ALPHA_6 STR_MB_ALPHA_6, 13);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, STR_MB_ALPHA_6, 12);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, STR_MB_ALPHA_6, 11);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, STR_MB_ALPHA_6, 10);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, STR_MB_ALPHA_6, 9);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, STR_MB_ALPHA_6, 8);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, STR_MB_ALPHA_6, 7);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, "", 6);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, "", 5);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, "", 4);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, "", 3);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, "", 2);
STRNCPY_UTF8_TRUNC_EXPECT(STR_MB_ALPHA_6 STR_MB_ALPHA_6, "", 1);
#undef STRNCPY_UTF8_TRUNC_EXPECT
}
TEST(string, StrCopyUTF8_TerminateEncodingEarly)
{
/* A UTF8 sequence that has a null byte before the sequence ends.