/* SPDX-FileCopyrightText: 2025 Blender Authors * * SPDX-License-Identifier: Apache-2.0 */ #include "testing/testing.h" #include "BLI_csv_parse.hh" #include "BLI_string_ref.hh" namespace blender::csv_parse::tests { static std::optional find_end_of_simple_field(const StringRef buffer, const int64_t start, const char delimiter = ',') { return detail::find_end_of_simple_field(Span(buffer), start, delimiter); } static std::optional find_end_of_quoted_field( const StringRef buffer, const int64_t start, const char quote = '"', const Span escape_chars = Span(StringRef("\"\\"))) { return detail::find_end_of_quoted_field(Span(buffer), start, quote, escape_chars); } static std::optional> parse_record_fields( const StringRef buffer, const int64_t start = 0, const char delimiter = ',', const char quote = '"', const Span quote_escape_chars = Span{'"', '\\'}) { Vector> fields; const std::optional end_of_record = detail::parse_record_fields( Span(buffer), start, delimiter, quote, quote_escape_chars, fields); if (!end_of_record.has_value()) { return std::nullopt; } Vector result; for (const Span field : fields) { result.append(std::string(field.begin(), field.end())); } return result; } struct StrParseResult { bool success = false; Vector column_names; Vector> records; }; static StrParseResult parse_csv_fields(const StringRef str, const CsvParseOptions &options) { struct Chunk { Vector> fields; }; StrParseResult result; const std::optional> chunks = parse_csv_in_chunks( Span(str), options, [&](const CsvRecord &record) { for (const int64_t i : record.index_range()) { result.column_names.append(record.field_str(i)); } }, [&](const CsvRecords &records) { Chunk result; for (const int64_t record_i : records.index_range()) { const CsvRecord record = records.record(record_i); Vector fields; for (const int64_t column_i : record.index_range()) { fields.append(record.field_str(column_i)); } result.fields.append(std::move(fields)); } return result; }); if (!chunks.has_value()) { result.success = false; return result; } result.success = true; for (const Chunk &chunk : *chunks) { result.records.extend(std::move(chunk.fields)); } return result; } TEST(csv_parse, FindEndOfSimpleField) { EXPECT_EQ(find_end_of_simple_field("123", 0), 3); EXPECT_EQ(find_end_of_simple_field("123", 1), 3); EXPECT_EQ(find_end_of_simple_field("123", 2), 3); EXPECT_EQ(find_end_of_simple_field("123", 3), 3); EXPECT_EQ(find_end_of_simple_field("1'3", 3), 3); EXPECT_EQ(find_end_of_simple_field("123,", 0), 3); EXPECT_EQ(find_end_of_simple_field("123,456", 0), 3); EXPECT_EQ(find_end_of_simple_field("123,456,789", 0), 3); EXPECT_EQ(find_end_of_simple_field(" 23", 0), 3); EXPECT_EQ(find_end_of_simple_field("", 0), 0); EXPECT_EQ(find_end_of_simple_field("\n", 0), 0); EXPECT_EQ(find_end_of_simple_field("12\n", 0), 2); EXPECT_EQ(find_end_of_simple_field("0,12\n", 0), 1); EXPECT_EQ(find_end_of_simple_field("0,12\n", 2), 4); EXPECT_EQ(find_end_of_simple_field("\r\n", 0), 0); EXPECT_EQ(find_end_of_simple_field("12\r\n", 0), 2); EXPECT_EQ(find_end_of_simple_field("0,12\r\n", 0), 1); EXPECT_EQ(find_end_of_simple_field("0,12\r\n", 2), 4); EXPECT_EQ(find_end_of_simple_field("0,\t12\r\n", 2), 5); EXPECT_EQ(find_end_of_simple_field("0,\t12\r\n", 2, '\t'), 2); } TEST(csv_parse, FindEndOfQuotedField) { EXPECT_EQ(find_end_of_quoted_field("", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("123", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("123\n", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("123\r\n", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("123\"", 0), 3); EXPECT_EQ(find_end_of_quoted_field("\"", 0), 0); EXPECT_EQ(find_end_of_quoted_field("\"\"", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("\"\"\"", 0), 2); EXPECT_EQ(find_end_of_quoted_field("123\"\"", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("123\"\"\"", 0), 5); EXPECT_EQ(find_end_of_quoted_field("123\"\"\"\"", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("123\"\"\"\"\"", 0), 7); EXPECT_EQ(find_end_of_quoted_field("123\"\"0\"\"\"", 0), 8); EXPECT_EQ(find_end_of_quoted_field(",", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field(",\"", 0), 1); EXPECT_EQ(find_end_of_quoted_field("0,1\"", 0), 3); EXPECT_EQ(find_end_of_quoted_field("0,1\n", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("0,1\"\"", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("0,1\"\"\"", 0), 5); EXPECT_EQ(find_end_of_quoted_field("0\n1\n\"", 0), 4); EXPECT_EQ(find_end_of_quoted_field("\n\"", 0), 1); EXPECT_EQ(find_end_of_quoted_field("\\\"", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("\\\"\"", 0), 2); EXPECT_EQ(find_end_of_quoted_field("\\\"\"\"", 0), std::nullopt); EXPECT_EQ(find_end_of_quoted_field("\\\"\"\"\"", 0), 4); } TEST(csv_parse, ParseRecordFields) { using StrVec = Vector; EXPECT_EQ(parse_record_fields(""), StrVec()); EXPECT_EQ(parse_record_fields("1"), StrVec{"1"}); EXPECT_EQ(parse_record_fields("1,2"), StrVec({"1", "2"})); EXPECT_EQ(parse_record_fields("1,2,3"), StrVec({"1", "2", "3"})); EXPECT_EQ(parse_record_fields("1\n,2,3"), StrVec({"1"})); EXPECT_EQ(parse_record_fields("1, 2\n,3"), StrVec({"1", " 2"})); EXPECT_EQ(parse_record_fields("1, 2\r\n,3"), StrVec({"1", " 2"})); EXPECT_EQ(parse_record_fields("\"1,2,3\""), StrVec({"1,2,3"})); EXPECT_EQ(parse_record_fields("\"1,2,3"), std::nullopt); EXPECT_EQ(parse_record_fields("\"1,\n2\t\r\n,3\""), StrVec({"1,\n2\t\r\n,3"})); EXPECT_EQ(parse_record_fields("\"1,2,3\",\"4,5\""), StrVec({"1,2,3", "4,5"})); EXPECT_EQ(parse_record_fields(","), StrVec({"", ""})); EXPECT_EQ(parse_record_fields(",,"), StrVec({"", "", ""})); EXPECT_EQ(parse_record_fields(",,\n"), StrVec({"", "", ""})); EXPECT_EQ(parse_record_fields("\r\n,,"), StrVec()); EXPECT_EQ(parse_record_fields("\"a\"\"b\""), StrVec({"a\"\"b"})); EXPECT_EQ(parse_record_fields("\"a\\\"b\""), StrVec({"a\\\"b"})); EXPECT_EQ(parse_record_fields("\"a\"\nb"), StrVec({"a"})); EXPECT_EQ(parse_record_fields("\"a\" \nb"), StrVec({"a"})); } TEST(csv_parse, ParseCsvBasic) { CsvParseOptions options; options.chunk_size_bytes = 1; StrParseResult result = parse_csv_fields("a,b,c\n1,2,3,4\n4\n77,88,99\n", options); EXPECT_TRUE(result.success); EXPECT_EQ(result.column_names.size(), 3); EXPECT_EQ(result.column_names[0], "a"); EXPECT_EQ(result.column_names[1], "b"); EXPECT_EQ(result.column_names[2], "c"); EXPECT_EQ(result.records.size(), 3); EXPECT_EQ(result.records[0].size(), 4); EXPECT_EQ(result.records[1].size(), 1); EXPECT_EQ(result.records[2].size(), 3); EXPECT_EQ(result.records[0][0], "1"); EXPECT_EQ(result.records[0][1], "2"); EXPECT_EQ(result.records[0][2], "3"); EXPECT_EQ(result.records[0][3], "4"); EXPECT_EQ(result.records[1][0], "4"); EXPECT_EQ(result.records[2][0], "77"); EXPECT_EQ(result.records[2][1], "88"); EXPECT_EQ(result.records[2][2], "99"); } TEST(csv_parse, ParseCsvMissingEnd) { CsvParseOptions options; options.chunk_size_bytes = 1; StrParseResult result = parse_csv_fields("a,b,c\n1,\"2", options); EXPECT_FALSE(result.success); } TEST(csv_parse, ParseCsvMultiLine) { CsvParseOptions options; options.chunk_size_bytes = 1; StrParseResult result = parse_csv_fields("a,b,c\n1,\"2\n\n\",3,4", options); EXPECT_TRUE(result.success); EXPECT_EQ(result.records.size(), 1); EXPECT_EQ(result.records[0].size(), 4); EXPECT_EQ(result.records[0][0], "1"); EXPECT_EQ(result.records[0][1], "2\n\n"); EXPECT_EQ(result.records[0][2], "3"); EXPECT_EQ(result.records[0][3], "4"); } TEST(csv_parse, ParseCsvEmpty) { CsvParseOptions options; options.chunk_size_bytes = 1; StrParseResult result = parse_csv_fields("", options); EXPECT_TRUE(result.success); EXPECT_EQ(result.column_names.size(), 0); EXPECT_EQ(result.records.size(), 0); } TEST(csv_parse, ParseCsvTitlesOnly) { CsvParseOptions options; options.chunk_size_bytes = 1; StrParseResult result = parse_csv_fields("a,b,c", options); EXPECT_TRUE(result.success); EXPECT_EQ(result.column_names.size(), 3); EXPECT_EQ(result.column_names[0], "a"); EXPECT_EQ(result.column_names[1], "b"); EXPECT_EQ(result.column_names[2], "c"); EXPECT_TRUE(result.records.is_empty()); } TEST(csv_parse, ParseCsvTrailingNewline) { CsvParseOptions options; options.chunk_size_bytes = 1; StrParseResult result = parse_csv_fields("a\n1\n2\n", options); EXPECT_TRUE(result.success); EXPECT_EQ(result.column_names.size(), 1); EXPECT_EQ(result.column_names[0], "a"); EXPECT_EQ(result.records.size(), 2); EXPECT_EQ(result.records[0].size(), 1); EXPECT_EQ(result.records[0][0], "1"); EXPECT_EQ(result.records[1].size(), 1); EXPECT_EQ(result.records[1][0], "2"); } TEST(csv_parse, UnescapeField) { LinearAllocator<> allocator; CsvParseOptions options; EXPECT_EQ(unescape_field("", options, allocator), ""); EXPECT_EQ(unescape_field("a", options, allocator), "a"); EXPECT_EQ(unescape_field("abcd", options, allocator), "abcd"); EXPECT_EQ(unescape_field("ab\\cd", options, allocator), "ab\\cd"); EXPECT_EQ(unescape_field("ab\\\"cd", options, allocator), "ab\"cd"); EXPECT_EQ(unescape_field("ab\"\"cd", options, allocator), "ab\"cd"); EXPECT_EQ(unescape_field("ab\"\"\"\"cd", options, allocator), "ab\"\"cd"); EXPECT_EQ(unescape_field("ab\"\"\\\"cd", options, allocator), "ab\"\"cd"); } } // namespace blender::csv_parse::tests