Fix: Geometry Nodes: unescape column names after csv import

This commit is contained in:
Jacques Lucke
2025-02-19 12:36:12 +01:00
parent 3968be08b5
commit 3263dd6333
6 changed files with 65 additions and 2 deletions

View File

@@ -4,6 +4,7 @@
#include "BLI_any.hh"
#include "BLI_function_ref.hh"
#include "BLI_linear_allocator.hh"
#include "BLI_offset_indices.hh"
#include "BLI_string_ref.hh"
#include "BLI_vector.hh"
@@ -119,6 +120,15 @@ inline std::optional<Vector<ChunkT>> parse_csv_in_chunks(
return result_chunks;
}
/**
* Fields in a csv file may contain escaped quote caracters (e.g. "" or \"). This function replaces
* these with just the quote character. The returned string may be reference the input string if
* it's the same. Otherwise the returned string is allocated in the given allocator.
*/
StringRef unescape_field(const StringRef str,
const CsvParseOptions &options,
LinearAllocator<> &allocator);
/* -------------------------------------------------------------------- */
/** \name #CsvRecord inline functions.
* \{ */

View File

@@ -144,6 +144,7 @@ class StringRef : public StringRefBase {
constexpr StringRef(const char *str, int64_t length);
constexpr StringRef(const char *begin, const char *one_after_end);
constexpr StringRef(std::string_view view);
constexpr StringRef(Span<char> span);
StringRef(const std::string &str);
constexpr StringRef drop_prefix(int64_t n) const;
@@ -558,6 +559,8 @@ constexpr StringRef::StringRef(std::string_view view)
{
}
constexpr StringRef::StringRef(Span<char> span) : StringRefBase(span.data(), span.size()) {}
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -157,6 +157,34 @@ std::optional<Vector<Any<>>> parse_csv_in_chunks(
return results;
}
StringRef unescape_field(const StringRef str,
const CsvParseOptions &options,
LinearAllocator<> &allocator)
{
const StringRef escape_chars{options.quote_escape_chars};
if (str.find_first_of(escape_chars) == StringRef::not_found) {
return str;
}
/* The actual unescaped string may be shorter, but not longer. */
MutableSpan<char> unescaped_str = allocator.allocate_array<char>(str.size());
int64_t i = 0;
int64_t escaped_size = 0;
while (i < str.size()) {
const char c = str[i];
if (options.quote_escape_chars.contains(c)) {
if (i + 1 < str.size() && str[i + 1] == options.quote) {
/* Ignore the current escape character. */
unescaped_str[escaped_size++] = options.quote;
i += 2;
continue;
}
}
unescaped_str[escaped_size++] = c;
i++;
}
return StringRef(unescaped_str.take_front(escaped_size));
}
namespace detail {
std::optional<int64_t> parse_record_fields(const Span<char> buffer,

View File

@@ -256,4 +256,18 @@ TEST(csv_parse, ParseCsvTrailingNewline)
EXPECT_EQ(result.records[1][0], "2");
}
TEST(csv_parse, UnescapeField)
{
LinearAllocator<> allocator;
CsvParseOptions options;
EXPECT_EQ(unescape_field("", options, allocator), "");
EXPECT_EQ(unescape_field("a", options, allocator), "a");
EXPECT_EQ(unescape_field("abcd", options, allocator), "abcd");
EXPECT_EQ(unescape_field("ab\\cd", options, allocator), "ab\\cd");
EXPECT_EQ(unescape_field("ab\\\"cd", options, allocator), "ab\"cd");
EXPECT_EQ(unescape_field("ab\"\"cd", options, allocator), "ab\"cd");
EXPECT_EQ(unescape_field("ab\"\"\"\"cd", options, allocator), "ab\"\"cd");
EXPECT_EQ(unescape_field("ab\"\"\\\"cd", options, allocator), "ab\"\"cd");
}
} // namespace blender::csv_parse::tests

View File

@@ -163,6 +163,12 @@ TEST(string_ref, StdStringConstructor)
EXPECT_EQ(ref.data(), str.data());
}
TEST(string_ref, SpanConstructor)
{
EXPECT_EQ(StringRef(Span<char>("hello", 5)), "hello");
EXPECT_EQ(StringRef(Span<char>("hello", 2)), "he");
}
TEST(string_ref, SubscriptOperator)
{
StringRef ref("hello");

View File

@@ -277,13 +277,16 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
return nullptr;
}
LinearAllocator<> allocator;
Array<ColumnInfo> columns_info;
csv_parse::CsvParseOptions parse_options;
const auto parse_header = [&](const csv_parse::CsvRecord &record) {
columns_info.reinitialize(record.size());
for (const int i : record.index_range()) {
ColumnInfo &column_info = columns_info[i];
const StringRef name = record.field_str(i);
const StringRef name = csv_parse::unescape_field(
record.field_str(i), parse_options, allocator);
column_info.name = name;
if (!bke::allow_procedural_attribute_access(name) ||
bke::attribute_name_is_anonymous(name) || name.is_empty())
@@ -298,7 +301,6 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
};
const Span<char> buffer_span{static_cast<char *>(buffer), int64_t(buffer_len)};
csv_parse::CsvParseOptions parse_options;
std::optional<Vector<ChunkResult>> parsed_chunks = csv_parse::parse_csv_in_chunks<ChunkResult>(
buffer_span, parse_options, parse_header, parse_data_chunk);