Fix: Geometry Nodes: unescape column names after csv import

2025-02-19 12:36:12 +01:00
parent 3968be08b5
commit 3263dd6333
6 changed files with 65 additions and 2 deletions
--- a/source/blender/blenlib/BLI_csv_parse.hh
+++ b/source/blender/blenlib/BLI_csv_parse.hh
@@ -4,6 +4,7 @@

 #include "BLI_any.hh"
 #include "BLI_function_ref.hh"
+#include "BLI_linear_allocator.hh"
 #include "BLI_offset_indices.hh"
 #include "BLI_string_ref.hh"
 #include "BLI_vector.hh"
@@ -119,6 +120,15 @@ inline std::optional<Vector<ChunkT>> parse_csv_in_chunks(
  return result_chunks;
 }

+/**
+ * Fields in a csv file may contain escaped quote caracters (e.g. "" or \"). This function replaces
+ * these with just the quote character. The returned string may be reference the input string if
+ * it's the same. Otherwise the returned string is allocated in the given allocator.
+ */
+StringRef unescape_field(const StringRef str,
+                         const CsvParseOptions &options,
+                         LinearAllocator<> &allocator);
+
 /* -------------------------------------------------------------------- */
 /** \name #CsvRecord inline functions.
 * \{ */
--- a/source/blender/blenlib/BLI_string_ref.hh
+++ b/source/blender/blenlib/BLI_string_ref.hh
@@ -144,6 +144,7 @@ class StringRef : public StringRefBase {
  constexpr StringRef(const char *str, int64_t length);
  constexpr StringRef(const char *begin, const char *one_after_end);
  constexpr StringRef(std::string_view view);
+  constexpr StringRef(Span<char> span);
  StringRef(const std::string &str);

  constexpr StringRef drop_prefix(int64_t n) const;
@@ -558,6 +559,8 @@ constexpr StringRef::StringRef(std::string_view view)
 {
 }

+constexpr StringRef::StringRef(Span<char> span) : StringRefBase(span.data(), span.size()) {}
+
 /** \} */

 /* -------------------------------------------------------------------- */
--- a/source/blender/blenlib/intern/csv_parse.cc
+++ b/source/blender/blenlib/intern/csv_parse.cc
@@ -157,6 +157,34 @@ std::optional<Vector<Any<>>> parse_csv_in_chunks(
  return results;
 }

+StringRef unescape_field(const StringRef str,
+                         const CsvParseOptions &options,
+                         LinearAllocator<> &allocator)
+{
+  const StringRef escape_chars{options.quote_escape_chars};
+  if (str.find_first_of(escape_chars) == StringRef::not_found) {
+    return str;
+  }
+  /* The actual unescaped string may be shorter, but not longer. */
+  MutableSpan<char> unescaped_str = allocator.allocate_array<char>(str.size());
+  int64_t i = 0;
+  int64_t escaped_size = 0;
+  while (i < str.size()) {
+    const char c = str[i];
+    if (options.quote_escape_chars.contains(c)) {
+      if (i + 1 < str.size() && str[i + 1] == options.quote) {
+        /* Ignore the current escape character. */
+        unescaped_str[escaped_size++] = options.quote;
+        i += 2;
+        continue;
+      }
+    }
+    unescaped_str[escaped_size++] = c;
+    i++;
+  }
+  return StringRef(unescaped_str.take_front(escaped_size));
+}
+
 namespace detail {

 std::optional<int64_t> parse_record_fields(const Span<char> buffer,
--- a/source/blender/blenlib/tests/BLI_csv_parse_test.cc
+++ b/source/blender/blenlib/tests/BLI_csv_parse_test.cc
@@ -256,4 +256,18 @@ TEST(csv_parse, ParseCsvTrailingNewline)
  EXPECT_EQ(result.records[1][0], "2");
 }

+TEST(csv_parse, UnescapeField)
+{
+  LinearAllocator<> allocator;
+  CsvParseOptions options;
+  EXPECT_EQ(unescape_field("", options, allocator), "");
+  EXPECT_EQ(unescape_field("a", options, allocator), "a");
+  EXPECT_EQ(unescape_field("abcd", options, allocator), "abcd");
+  EXPECT_EQ(unescape_field("ab\\cd", options, allocator), "ab\\cd");
+  EXPECT_EQ(unescape_field("ab\\\"cd", options, allocator), "ab\"cd");
+  EXPECT_EQ(unescape_field("ab\"\"cd", options, allocator), "ab\"cd");
+  EXPECT_EQ(unescape_field("ab\"\"\"\"cd", options, allocator), "ab\"\"cd");
+  EXPECT_EQ(unescape_field("ab\"\"\\\"cd", options, allocator), "ab\"\"cd");
+}
+
 }  // namespace blender::csv_parse::tests
--- a/source/blender/blenlib/tests/BLI_string_ref_test.cc
+++ b/source/blender/blenlib/tests/BLI_string_ref_test.cc
@@ -163,6 +163,12 @@ TEST(string_ref, StdStringConstructor)
  EXPECT_EQ(ref.data(), str.data());
 }

+TEST(string_ref, SpanConstructor)
+{
+  EXPECT_EQ(StringRef(Span<char>("hello", 5)), "hello");
+  EXPECT_EQ(StringRef(Span<char>("hello", 2)), "he");
+}
+
 TEST(string_ref, SubscriptOperator)
 {
  StringRef ref("hello");
--- a/source/blender/io/csv/importer/csv_reader.cc
+++ b/source/blender/io/csv/importer/csv_reader.cc
@@ -277,13 +277,16 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
    return nullptr;
  }

+  LinearAllocator<> allocator;
  Array<ColumnInfo> columns_info;
+  csv_parse::CsvParseOptions parse_options;

  const auto parse_header = [&](const csv_parse::CsvRecord &record) {
    columns_info.reinitialize(record.size());
    for (const int i : record.index_range()) {
      ColumnInfo &column_info = columns_info[i];
-      const StringRef name = record.field_str(i);
+      const StringRef name = csv_parse::unescape_field(
+          record.field_str(i), parse_options, allocator);
      column_info.name = name;
      if (!bke::allow_procedural_attribute_access(name) ||
          bke::attribute_name_is_anonymous(name) || name.is_empty())
@@ -298,7 +301,6 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
  };

  const Span<char> buffer_span{static_cast<char *>(buffer), int64_t(buffer_len)};
-  csv_parse::CsvParseOptions parse_options;
  std::optional<Vector<ChunkResult>> parsed_chunks = csv_parse::parse_csv_in_chunks<ChunkResult>(
      buffer_span, parse_options, parse_header, parse_data_chunk);