Fix: Geometry Nodes: unescape column names after csv import
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
|
||||
#include "BLI_any.hh"
|
||||
#include "BLI_function_ref.hh"
|
||||
#include "BLI_linear_allocator.hh"
|
||||
#include "BLI_offset_indices.hh"
|
||||
#include "BLI_string_ref.hh"
|
||||
#include "BLI_vector.hh"
|
||||
@@ -119,6 +120,15 @@ inline std::optional<Vector<ChunkT>> parse_csv_in_chunks(
|
||||
return result_chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fields in a csv file may contain escaped quote caracters (e.g. "" or \"). This function replaces
|
||||
* these with just the quote character. The returned string may be reference the input string if
|
||||
* it's the same. Otherwise the returned string is allocated in the given allocator.
|
||||
*/
|
||||
StringRef unescape_field(const StringRef str,
|
||||
const CsvParseOptions &options,
|
||||
LinearAllocator<> &allocator);
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name #CsvRecord inline functions.
|
||||
* \{ */
|
||||
|
||||
@@ -144,6 +144,7 @@ class StringRef : public StringRefBase {
|
||||
constexpr StringRef(const char *str, int64_t length);
|
||||
constexpr StringRef(const char *begin, const char *one_after_end);
|
||||
constexpr StringRef(std::string_view view);
|
||||
constexpr StringRef(Span<char> span);
|
||||
StringRef(const std::string &str);
|
||||
|
||||
constexpr StringRef drop_prefix(int64_t n) const;
|
||||
@@ -558,6 +559,8 @@ constexpr StringRef::StringRef(std::string_view view)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr StringRef::StringRef(Span<char> span) : StringRefBase(span.data(), span.size()) {}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
||||
@@ -157,6 +157,34 @@ std::optional<Vector<Any<>>> parse_csv_in_chunks(
|
||||
return results;
|
||||
}
|
||||
|
||||
StringRef unescape_field(const StringRef str,
|
||||
const CsvParseOptions &options,
|
||||
LinearAllocator<> &allocator)
|
||||
{
|
||||
const StringRef escape_chars{options.quote_escape_chars};
|
||||
if (str.find_first_of(escape_chars) == StringRef::not_found) {
|
||||
return str;
|
||||
}
|
||||
/* The actual unescaped string may be shorter, but not longer. */
|
||||
MutableSpan<char> unescaped_str = allocator.allocate_array<char>(str.size());
|
||||
int64_t i = 0;
|
||||
int64_t escaped_size = 0;
|
||||
while (i < str.size()) {
|
||||
const char c = str[i];
|
||||
if (options.quote_escape_chars.contains(c)) {
|
||||
if (i + 1 < str.size() && str[i + 1] == options.quote) {
|
||||
/* Ignore the current escape character. */
|
||||
unescaped_str[escaped_size++] = options.quote;
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
unescaped_str[escaped_size++] = c;
|
||||
i++;
|
||||
}
|
||||
return StringRef(unescaped_str.take_front(escaped_size));
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
|
||||
std::optional<int64_t> parse_record_fields(const Span<char> buffer,
|
||||
|
||||
@@ -256,4 +256,18 @@ TEST(csv_parse, ParseCsvTrailingNewline)
|
||||
EXPECT_EQ(result.records[1][0], "2");
|
||||
}
|
||||
|
||||
TEST(csv_parse, UnescapeField)
|
||||
{
|
||||
LinearAllocator<> allocator;
|
||||
CsvParseOptions options;
|
||||
EXPECT_EQ(unescape_field("", options, allocator), "");
|
||||
EXPECT_EQ(unescape_field("a", options, allocator), "a");
|
||||
EXPECT_EQ(unescape_field("abcd", options, allocator), "abcd");
|
||||
EXPECT_EQ(unescape_field("ab\\cd", options, allocator), "ab\\cd");
|
||||
EXPECT_EQ(unescape_field("ab\\\"cd", options, allocator), "ab\"cd");
|
||||
EXPECT_EQ(unescape_field("ab\"\"cd", options, allocator), "ab\"cd");
|
||||
EXPECT_EQ(unescape_field("ab\"\"\"\"cd", options, allocator), "ab\"\"cd");
|
||||
EXPECT_EQ(unescape_field("ab\"\"\\\"cd", options, allocator), "ab\"\"cd");
|
||||
}
|
||||
|
||||
} // namespace blender::csv_parse::tests
|
||||
|
||||
@@ -163,6 +163,12 @@ TEST(string_ref, StdStringConstructor)
|
||||
EXPECT_EQ(ref.data(), str.data());
|
||||
}
|
||||
|
||||
TEST(string_ref, SpanConstructor)
|
||||
{
|
||||
EXPECT_EQ(StringRef(Span<char>("hello", 5)), "hello");
|
||||
EXPECT_EQ(StringRef(Span<char>("hello", 2)), "he");
|
||||
}
|
||||
|
||||
TEST(string_ref, SubscriptOperator)
|
||||
{
|
||||
StringRef ref("hello");
|
||||
|
||||
@@ -277,13 +277,16 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LinearAllocator<> allocator;
|
||||
Array<ColumnInfo> columns_info;
|
||||
csv_parse::CsvParseOptions parse_options;
|
||||
|
||||
const auto parse_header = [&](const csv_parse::CsvRecord &record) {
|
||||
columns_info.reinitialize(record.size());
|
||||
for (const int i : record.index_range()) {
|
||||
ColumnInfo &column_info = columns_info[i];
|
||||
const StringRef name = record.field_str(i);
|
||||
const StringRef name = csv_parse::unescape_field(
|
||||
record.field_str(i), parse_options, allocator);
|
||||
column_info.name = name;
|
||||
if (!bke::allow_procedural_attribute_access(name) ||
|
||||
bke::attribute_name_is_anonymous(name) || name.is_empty())
|
||||
@@ -298,7 +301,6 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
|
||||
};
|
||||
|
||||
const Span<char> buffer_span{static_cast<char *>(buffer), int64_t(buffer_len)};
|
||||
csv_parse::CsvParseOptions parse_options;
|
||||
std::optional<Vector<ChunkResult>> parsed_chunks = csv_parse::parse_csv_in_chunks<ChunkResult>(
|
||||
buffer_span, parse_options, parse_header, parse_data_chunk);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user