From 3263dd6333791099ee0656ef40902252714e643e Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 19 Feb 2025 12:36:12 +0100 Subject: [PATCH] Fix: Geometry Nodes: unescape column names after csv import --- source/blender/blenlib/BLI_csv_parse.hh | 10 +++++++ source/blender/blenlib/BLI_string_ref.hh | 3 ++ source/blender/blenlib/intern/csv_parse.cc | 28 +++++++++++++++++++ .../blenlib/tests/BLI_csv_parse_test.cc | 14 ++++++++++ .../blenlib/tests/BLI_string_ref_test.cc | 6 ++++ source/blender/io/csv/importer/csv_reader.cc | 6 ++-- 6 files changed, 65 insertions(+), 2 deletions(-) diff --git a/source/blender/blenlib/BLI_csv_parse.hh b/source/blender/blenlib/BLI_csv_parse.hh index 9f4138340d0..c37780b36ca 100644 --- a/source/blender/blenlib/BLI_csv_parse.hh +++ b/source/blender/blenlib/BLI_csv_parse.hh @@ -4,6 +4,7 @@ #include "BLI_any.hh" #include "BLI_function_ref.hh" +#include "BLI_linear_allocator.hh" #include "BLI_offset_indices.hh" #include "BLI_string_ref.hh" #include "BLI_vector.hh" @@ -119,6 +120,15 @@ inline std::optional> parse_csv_in_chunks( return result_chunks; } +/** + * Fields in a csv file may contain escaped quote caracters (e.g. "" or \"). This function replaces + * these with just the quote character. The returned string may be reference the input string if + * it's the same. Otherwise the returned string is allocated in the given allocator. + */ +StringRef unescape_field(const StringRef str, + const CsvParseOptions &options, + LinearAllocator<> &allocator); + /* -------------------------------------------------------------------- */ /** \name #CsvRecord inline functions. * \{ */ diff --git a/source/blender/blenlib/BLI_string_ref.hh b/source/blender/blenlib/BLI_string_ref.hh index 33cd01b9dfa..f50465ad6bb 100644 --- a/source/blender/blenlib/BLI_string_ref.hh +++ b/source/blender/blenlib/BLI_string_ref.hh @@ -144,6 +144,7 @@ class StringRef : public StringRefBase { constexpr StringRef(const char *str, int64_t length); constexpr StringRef(const char *begin, const char *one_after_end); constexpr StringRef(std::string_view view); + constexpr StringRef(Span span); StringRef(const std::string &str); constexpr StringRef drop_prefix(int64_t n) const; @@ -558,6 +559,8 @@ constexpr StringRef::StringRef(std::string_view view) { } +constexpr StringRef::StringRef(Span span) : StringRefBase(span.data(), span.size()) {} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/blenlib/intern/csv_parse.cc b/source/blender/blenlib/intern/csv_parse.cc index 5ae3ab82732..6ddd0d8daa3 100644 --- a/source/blender/blenlib/intern/csv_parse.cc +++ b/source/blender/blenlib/intern/csv_parse.cc @@ -157,6 +157,34 @@ std::optional>> parse_csv_in_chunks( return results; } +StringRef unescape_field(const StringRef str, + const CsvParseOptions &options, + LinearAllocator<> &allocator) +{ + const StringRef escape_chars{options.quote_escape_chars}; + if (str.find_first_of(escape_chars) == StringRef::not_found) { + return str; + } + /* The actual unescaped string may be shorter, but not longer. */ + MutableSpan unescaped_str = allocator.allocate_array(str.size()); + int64_t i = 0; + int64_t escaped_size = 0; + while (i < str.size()) { + const char c = str[i]; + if (options.quote_escape_chars.contains(c)) { + if (i + 1 < str.size() && str[i + 1] == options.quote) { + /* Ignore the current escape character. */ + unescaped_str[escaped_size++] = options.quote; + i += 2; + continue; + } + } + unescaped_str[escaped_size++] = c; + i++; + } + return StringRef(unescaped_str.take_front(escaped_size)); +} + namespace detail { std::optional parse_record_fields(const Span buffer, diff --git a/source/blender/blenlib/tests/BLI_csv_parse_test.cc b/source/blender/blenlib/tests/BLI_csv_parse_test.cc index effc2bb2994..53f9ecb87a2 100644 --- a/source/blender/blenlib/tests/BLI_csv_parse_test.cc +++ b/source/blender/blenlib/tests/BLI_csv_parse_test.cc @@ -256,4 +256,18 @@ TEST(csv_parse, ParseCsvTrailingNewline) EXPECT_EQ(result.records[1][0], "2"); } +TEST(csv_parse, UnescapeField) +{ + LinearAllocator<> allocator; + CsvParseOptions options; + EXPECT_EQ(unescape_field("", options, allocator), ""); + EXPECT_EQ(unescape_field("a", options, allocator), "a"); + EXPECT_EQ(unescape_field("abcd", options, allocator), "abcd"); + EXPECT_EQ(unescape_field("ab\\cd", options, allocator), "ab\\cd"); + EXPECT_EQ(unescape_field("ab\\\"cd", options, allocator), "ab\"cd"); + EXPECT_EQ(unescape_field("ab\"\"cd", options, allocator), "ab\"cd"); + EXPECT_EQ(unescape_field("ab\"\"\"\"cd", options, allocator), "ab\"\"cd"); + EXPECT_EQ(unescape_field("ab\"\"\\\"cd", options, allocator), "ab\"\"cd"); +} + } // namespace blender::csv_parse::tests diff --git a/source/blender/blenlib/tests/BLI_string_ref_test.cc b/source/blender/blenlib/tests/BLI_string_ref_test.cc index 7628bdca9d4..356e9a56118 100644 --- a/source/blender/blenlib/tests/BLI_string_ref_test.cc +++ b/source/blender/blenlib/tests/BLI_string_ref_test.cc @@ -163,6 +163,12 @@ TEST(string_ref, StdStringConstructor) EXPECT_EQ(ref.data(), str.data()); } +TEST(string_ref, SpanConstructor) +{ + EXPECT_EQ(StringRef(Span("hello", 5)), "hello"); + EXPECT_EQ(StringRef(Span("hello", 2)), "he"); +} + TEST(string_ref, SubscriptOperator) { StringRef ref("hello"); diff --git a/source/blender/io/csv/importer/csv_reader.cc b/source/blender/io/csv/importer/csv_reader.cc index 90476e944c4..ebc95fc6340 100644 --- a/source/blender/io/csv/importer/csv_reader.cc +++ b/source/blender/io/csv/importer/csv_reader.cc @@ -277,13 +277,16 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params) return nullptr; } + LinearAllocator<> allocator; Array columns_info; + csv_parse::CsvParseOptions parse_options; const auto parse_header = [&](const csv_parse::CsvRecord &record) { columns_info.reinitialize(record.size()); for (const int i : record.index_range()) { ColumnInfo &column_info = columns_info[i]; - const StringRef name = record.field_str(i); + const StringRef name = csv_parse::unescape_field( + record.field_str(i), parse_options, allocator); column_info.name = name; if (!bke::allow_procedural_attribute_access(name) || bke::attribute_name_is_anonymous(name) || name.is_empty()) @@ -298,7 +301,6 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params) }; const Span buffer_span{static_cast(buffer), int64_t(buffer_len)}; - csv_parse::CsvParseOptions parse_options; std::optional> parsed_chunks = csv_parse::parse_csv_in_chunks( buffer_span, parse_options, parse_header, parse_data_chunk);