Files
test/source/blender/blenlib/tests/BLI_csv_parse_test.cc

274 lines
9.9 KiB
C++

/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: Apache-2.0 */
#include "testing/testing.h"
#include "BLI_csv_parse.hh"
#include "BLI_string_ref.hh"
namespace blender::csv_parse::tests {
static std::optional<int64_t> find_end_of_simple_field(const StringRef buffer,
const int64_t start,
const char delimiter = ',')
{
return detail::find_end_of_simple_field(Span<char>(buffer), start, delimiter);
}
static std::optional<int64_t> find_end_of_quoted_field(
const StringRef buffer,
const int64_t start,
const char quote = '"',
const Span<char> escape_chars = Span<char>(StringRef("\"\\")))
{
return detail::find_end_of_quoted_field(Span<char>(buffer), start, quote, escape_chars);
}
static std::optional<Vector<std::string>> parse_record_fields(
const StringRef buffer,
const int64_t start = 0,
const char delimiter = ',',
const char quote = '"',
const Span<char> quote_escape_chars = Span<char>{'"', '\\'})
{
Vector<Span<char>> fields;
const std::optional<int64_t> end_of_record = detail::parse_record_fields(
Span<char>(buffer), start, delimiter, quote, quote_escape_chars, fields);
if (!end_of_record.has_value()) {
return std::nullopt;
}
Vector<std::string> result;
for (const Span<char> field : fields) {
result.append(std::string(field.begin(), field.end()));
}
return result;
}
struct StrParseResult {
bool success = false;
Vector<std::string> column_names;
Vector<Vector<std::string>> records;
};
static StrParseResult parse_csv_fields(const StringRef str, const CsvParseOptions &options)
{
struct Chunk {
Vector<Vector<std::string>> fields;
};
StrParseResult result;
const std::optional<Vector<Chunk>> chunks = parse_csv_in_chunks<Chunk>(
Span<char>(str),
options,
[&](const CsvRecord &record) {
for (const int64_t i : record.index_range()) {
result.column_names.append(record.field_str(i));
}
},
[&](const CsvRecords &records) {
Chunk result;
for (const int64_t record_i : records.index_range()) {
const CsvRecord record = records.record(record_i);
Vector<std::string> fields;
for (const int64_t column_i : record.index_range()) {
fields.append(record.field_str(column_i));
}
result.fields.append(std::move(fields));
}
return result;
});
if (!chunks.has_value()) {
result.success = false;
return result;
}
result.success = true;
for (const Chunk &chunk : *chunks) {
result.records.extend(std::move(chunk.fields));
}
return result;
}
TEST(csv_parse, FindEndOfSimpleField)
{
EXPECT_EQ(find_end_of_simple_field("123", 0), 3);
EXPECT_EQ(find_end_of_simple_field("123", 1), 3);
EXPECT_EQ(find_end_of_simple_field("123", 2), 3);
EXPECT_EQ(find_end_of_simple_field("123", 3), 3);
EXPECT_EQ(find_end_of_simple_field("1'3", 3), 3);
EXPECT_EQ(find_end_of_simple_field("123,", 0), 3);
EXPECT_EQ(find_end_of_simple_field("123,456", 0), 3);
EXPECT_EQ(find_end_of_simple_field("123,456,789", 0), 3);
EXPECT_EQ(find_end_of_simple_field(" 23", 0), 3);
EXPECT_EQ(find_end_of_simple_field("", 0), 0);
EXPECT_EQ(find_end_of_simple_field("\n", 0), 0);
EXPECT_EQ(find_end_of_simple_field("12\n", 0), 2);
EXPECT_EQ(find_end_of_simple_field("0,12\n", 0), 1);
EXPECT_EQ(find_end_of_simple_field("0,12\n", 2), 4);
EXPECT_EQ(find_end_of_simple_field("\r\n", 0), 0);
EXPECT_EQ(find_end_of_simple_field("12\r\n", 0), 2);
EXPECT_EQ(find_end_of_simple_field("0,12\r\n", 0), 1);
EXPECT_EQ(find_end_of_simple_field("0,12\r\n", 2), 4);
EXPECT_EQ(find_end_of_simple_field("0,\t12\r\n", 2), 5);
EXPECT_EQ(find_end_of_simple_field("0,\t12\r\n", 2, '\t'), 2);
}
TEST(csv_parse, FindEndOfQuotedField)
{
EXPECT_EQ(find_end_of_quoted_field("", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("123", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("123\n", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("123\r\n", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("123\"", 0), 3);
EXPECT_EQ(find_end_of_quoted_field("\"", 0), 0);
EXPECT_EQ(find_end_of_quoted_field("\"\"", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("\"\"\"", 0), 2);
EXPECT_EQ(find_end_of_quoted_field("123\"\"", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("123\"\"\"", 0), 5);
EXPECT_EQ(find_end_of_quoted_field("123\"\"\"\"", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("123\"\"\"\"\"", 0), 7);
EXPECT_EQ(find_end_of_quoted_field("123\"\"0\"\"\"", 0), 8);
EXPECT_EQ(find_end_of_quoted_field(",", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field(",\"", 0), 1);
EXPECT_EQ(find_end_of_quoted_field("0,1\"", 0), 3);
EXPECT_EQ(find_end_of_quoted_field("0,1\n", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("0,1\"\"", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("0,1\"\"\"", 0), 5);
EXPECT_EQ(find_end_of_quoted_field("0\n1\n\"", 0), 4);
EXPECT_EQ(find_end_of_quoted_field("\n\"", 0), 1);
EXPECT_EQ(find_end_of_quoted_field("\\\"", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("\\\"\"", 0), 2);
EXPECT_EQ(find_end_of_quoted_field("\\\"\"\"", 0), std::nullopt);
EXPECT_EQ(find_end_of_quoted_field("\\\"\"\"\"", 0), 4);
}
TEST(csv_parse, ParseRecordFields)
{
using StrVec = Vector<std::string>;
EXPECT_EQ(parse_record_fields(""), StrVec());
EXPECT_EQ(parse_record_fields("1"), StrVec{"1"});
EXPECT_EQ(parse_record_fields("1,2"), StrVec({"1", "2"}));
EXPECT_EQ(parse_record_fields("1,2,3"), StrVec({"1", "2", "3"}));
EXPECT_EQ(parse_record_fields("1\n,2,3"), StrVec({"1"}));
EXPECT_EQ(parse_record_fields("1, 2\n,3"), StrVec({"1", " 2"}));
EXPECT_EQ(parse_record_fields("1, 2\r\n,3"), StrVec({"1", " 2"}));
EXPECT_EQ(parse_record_fields("\"1,2,3\""), StrVec({"1,2,3"}));
EXPECT_EQ(parse_record_fields("\"1,2,3"), std::nullopt);
EXPECT_EQ(parse_record_fields("\"1,\n2\t\r\n,3\""), StrVec({"1,\n2\t\r\n,3"}));
EXPECT_EQ(parse_record_fields("\"1,2,3\",\"4,5\""), StrVec({"1,2,3", "4,5"}));
EXPECT_EQ(parse_record_fields(","), StrVec({"", ""}));
EXPECT_EQ(parse_record_fields(",,"), StrVec({"", "", ""}));
EXPECT_EQ(parse_record_fields(",,\n"), StrVec({"", "", ""}));
EXPECT_EQ(parse_record_fields("\r\n,,"), StrVec());
EXPECT_EQ(parse_record_fields("\"a\"\"b\""), StrVec({"a\"\"b"}));
EXPECT_EQ(parse_record_fields("\"a\\\"b\""), StrVec({"a\\\"b"}));
EXPECT_EQ(parse_record_fields("\"a\"\nb"), StrVec({"a"}));
EXPECT_EQ(parse_record_fields("\"a\" \nb"), StrVec({"a"}));
}
TEST(csv_parse, ParseCsvBasic)
{
CsvParseOptions options;
options.chunk_size_bytes = 1;
StrParseResult result = parse_csv_fields("a,b,c\n1,2,3,4\n4\n77,88,99\n", options);
EXPECT_TRUE(result.success);
EXPECT_EQ(result.column_names.size(), 3);
EXPECT_EQ(result.column_names[0], "a");
EXPECT_EQ(result.column_names[1], "b");
EXPECT_EQ(result.column_names[2], "c");
EXPECT_EQ(result.records.size(), 3);
EXPECT_EQ(result.records[0].size(), 4);
EXPECT_EQ(result.records[1].size(), 1);
EXPECT_EQ(result.records[2].size(), 3);
EXPECT_EQ(result.records[0][0], "1");
EXPECT_EQ(result.records[0][1], "2");
EXPECT_EQ(result.records[0][2], "3");
EXPECT_EQ(result.records[0][3], "4");
EXPECT_EQ(result.records[1][0], "4");
EXPECT_EQ(result.records[2][0], "77");
EXPECT_EQ(result.records[2][1], "88");
EXPECT_EQ(result.records[2][2], "99");
}
TEST(csv_parse, ParseCsvMissingEnd)
{
CsvParseOptions options;
options.chunk_size_bytes = 1;
StrParseResult result = parse_csv_fields("a,b,c\n1,\"2", options);
EXPECT_FALSE(result.success);
}
TEST(csv_parse, ParseCsvMultiLine)
{
CsvParseOptions options;
options.chunk_size_bytes = 1;
StrParseResult result = parse_csv_fields("a,b,c\n1,\"2\n\n\",3,4", options);
EXPECT_TRUE(result.success);
EXPECT_EQ(result.records.size(), 1);
EXPECT_EQ(result.records[0].size(), 4);
EXPECT_EQ(result.records[0][0], "1");
EXPECT_EQ(result.records[0][1], "2\n\n");
EXPECT_EQ(result.records[0][2], "3");
EXPECT_EQ(result.records[0][3], "4");
}
TEST(csv_parse, ParseCsvEmpty)
{
CsvParseOptions options;
options.chunk_size_bytes = 1;
StrParseResult result = parse_csv_fields("", options);
EXPECT_TRUE(result.success);
EXPECT_EQ(result.column_names.size(), 0);
EXPECT_EQ(result.records.size(), 0);
}
TEST(csv_parse, ParseCsvTitlesOnly)
{
CsvParseOptions options;
options.chunk_size_bytes = 1;
StrParseResult result = parse_csv_fields("a,b,c", options);
EXPECT_TRUE(result.success);
EXPECT_EQ(result.column_names.size(), 3);
EXPECT_EQ(result.column_names[0], "a");
EXPECT_EQ(result.column_names[1], "b");
EXPECT_EQ(result.column_names[2], "c");
EXPECT_TRUE(result.records.is_empty());
}
TEST(csv_parse, ParseCsvTrailingNewline)
{
CsvParseOptions options;
options.chunk_size_bytes = 1;
StrParseResult result = parse_csv_fields("a\n1\n2\n", options);
EXPECT_TRUE(result.success);
EXPECT_EQ(result.column_names.size(), 1);
EXPECT_EQ(result.column_names[0], "a");
EXPECT_EQ(result.records.size(), 2);
EXPECT_EQ(result.records[0].size(), 1);
EXPECT_EQ(result.records[0][0], "1");
EXPECT_EQ(result.records[1].size(), 1);
EXPECT_EQ(result.records[1][0], "2");
}
TEST(csv_parse, UnescapeField)
{
LinearAllocator<> allocator;
CsvParseOptions options;
EXPECT_EQ(unescape_field("", options, allocator), "");
EXPECT_EQ(unescape_field("a", options, allocator), "a");
EXPECT_EQ(unescape_field("abcd", options, allocator), "abcd");
EXPECT_EQ(unescape_field("ab\\cd", options, allocator), "ab\\cd");
EXPECT_EQ(unescape_field("ab\\\"cd", options, allocator), "ab\"cd");
EXPECT_EQ(unescape_field("ab\"\"cd", options, allocator), "ab\"cd");
EXPECT_EQ(unescape_field("ab\"\"\"\"cd", options, allocator), "ab\"\"cd");
EXPECT_EQ(unescape_field("ab\"\"\\\"cd", options, allocator), "ab\"\"cd");
}
} // namespace blender::csv_parse::tests