Geometry Nodes: Remove extra copy step in CSV import node

Remove intermediate `CsvData` struct and create a point cloud
directly instead. Though the bottleneck is almost certainly parsing
the file, this removes a copy for the attribute values and reduces
peak memory usage.

Also do some small cleanups to the import process: use C++
casting, prefer StringRef over std::string, remove unnecessary
whitespace, and remove non-helpul comments.
This commit is contained in:
Hans Goudey
2025-02-10 12:19:34 -05:00
parent 78da0dda87
commit f01af5f972
4 changed files with 58 additions and 154 deletions

View File

@@ -18,11 +18,9 @@ set(INC_SYS
)
set(SRC
importer/csv_data.cc
importer/csv_reader.cc
IO_csv.hh
importer/csv_data.hh
)
set(LIB

View File

@@ -1,61 +0,0 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup csv
*/
#include "BKE_attribute.hh"
#include "BKE_customdata.hh"
#include "BKE_pointcloud.hh"
#include "BLI_array_utils.hh"
#include "csv_data.hh"
namespace blender::io::csv {
CsvData::CsvData(const int64_t rows_num,
const Span<std::string> column_names,
const Span<eCustomDataType> column_types)
: data(column_names.size()),
rows_num(rows_num),
columns_num(column_names.size()),
column_names(column_names),
column_types(column_types)
{
for (const int i : IndexRange(this->columns_num)) {
data[i] = GArray(*bke::custom_data_type_to_cpp_type(this->column_types[i]), rows_num);
}
}
PointCloud *CsvData::to_point_cloud() const
{
PointCloud *point_cloud = BKE_pointcloud_new_nomain(rows_num);
/* Set all positions to be zero */
point_cloud->positions_for_write().fill(float3(0.0f, 0.0f, 0.0f));
/* Fill the attributes */
for (const int i : IndexRange(columns_num)) {
const StringRef column_name = column_names[i];
const eCustomDataType column_type = column_types[i];
const CPPType *cpp_column_type = bke::custom_data_type_to_cpp_type(column_type);
GMutableSpan column_data{*cpp_column_type,
MEM_mallocN_aligned(rows_num * cpp_column_type->size(),
cpp_column_type->alignment(),
__func__),
rows_num * cpp_column_type->size()};
array_utils::copy(GVArray::ForSpan(data[i]), column_data);
CustomData_add_layer_named_with_data(
&point_cloud->pdata, column_type, column_data.data(), rows_num, column_name, nullptr);
}
return point_cloud;
}
} // namespace blender::io::csv

View File

@@ -1,55 +0,0 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup csv
*/
#pragma once
#include <string>
#include "BLI_array.hh"
#include "BLI_generic_array.hh"
#include "DNA_customdata_types.h"
struct PointCloud;
namespace blender::io::csv {
class CsvData {
private:
Array<GArray<>> data;
int64_t rows_num;
int64_t columns_num;
Array<std::string> column_names;
Array<eCustomDataType> column_types;
public:
CsvData(int64_t rows_num, Span<std::string> column_names, Span<eCustomDataType> column_types);
PointCloud *to_point_cloud() const;
template<typename T> void set_data(int64_t row_index, int64_t col_index, const T value)
{
GMutableSpan mutable_span = data[col_index].as_mutable_span();
MutableSpan typed_mutable_span = mutable_span.typed<T>();
typed_mutable_span[row_index] = value;
}
eCustomDataType get_column_type(int64_t col_index) const
{
return column_types[col_index];
}
StringRef get_column_name(int64_t col_index) const
{
return column_names[col_index];
}
};
} // namespace blender::io::csv

View File

@@ -6,20 +6,24 @@
* \ingroup csv
*/
#include <optional>
#include "BKE_attribute.hh"
#include "BKE_pointcloud.hh"
#include "BKE_report.hh"
#include "BLI_fileops.hh"
#include "BLI_generic_span.hh"
#include "BLI_vector.hh"
#include "IO_csv.hh"
#include "IO_string_utils.hh"
#include "csv_data.hh"
namespace blender::io::csv {
static Vector<std::string> get_columns(const StringRef line)
static Vector<StringRef> parse_column_names(const StringRef line)
{
Vector<std::string> columns;
Vector<StringRef> columns;
const char delim = ',';
const char *start = line.begin(), *end = line.end();
const char *cell_start = start, *cell_end = start;
@@ -29,14 +33,14 @@ static Vector<std::string> get_columns(const StringRef line)
while (delim_index != StringRef::not_found) {
cell_end = start + delim_index;
columns.append(std::string(cell_start, cell_end));
columns.append_as(cell_start, cell_end);
cell_start = cell_end + 1;
delim_index = line.find_first_of(delim, delim_index + 1);
}
/* Handle last cell, --end because the end in StringRef is one_after_ern */
columns.append(std::string(cell_start, --end));
columns.append_as(cell_start, --end);
return columns;
}
@@ -105,62 +109,66 @@ static int64_t get_row_count(StringRef buffer)
return row_count;
}
static void parse_csv_cell(CsvData &csv_data,
int64_t row_index,
int64_t col_index,
static void parse_csv_cell(const Span<GMutableSpan> data,
const Span<eCustomDataType> types,
const Span<StringRef> column_names,
const int64_t row_index,
const int64_t col_index,
const char *start,
const char *end,
const CSVImportParams &import_params)
{
bool success = false;
switch (csv_data.get_column_type(col_index)) {
switch (types[col_index]) {
case CD_PROP_INT32: {
int value = 0;
try_parse_int(start, end, 0, success, value);
csv_data.set_data(row_index, col_index, value);
data[col_index].typed<int>()[row_index] = value;
if (!success) {
std::string column_name = csv_data.get_column_name(col_index);
StringRef column_name = column_names[col_index];
BKE_reportf(import_params.reports,
RPT_ERROR,
"CSV Import: file '%s' has an unexpected value at row %d for column %s of "
"type Integer",
import_params.filepath,
int(row_index),
column_name.c_str());
std::string(column_name).c_str());
}
break;
}
case CD_PROP_FLOAT: {
float value = 0.0f;
try_parse_float(start, end, 0.0f, success, value);
csv_data.set_data(row_index, col_index, value);
data[col_index].typed<float>()[row_index] = value;
if (!success) {
std::string column_name = csv_data.get_column_name(col_index);
StringRef column_name = column_names[col_index];
BKE_reportf(import_params.reports,
RPT_ERROR,
"CSV Import: file '%s' has an unexpected value at row %d for column %s of "
"type Float",
import_params.filepath,
int(row_index),
column_name.c_str());
std::string(column_name).c_str());
}
break;
}
default: {
std::string column_name = csv_data.get_column_name(col_index);
StringRef column_name = column_names[col_index];
BKE_reportf(import_params.reports,
RPT_ERROR,
"CSV Import: file '%s' has an unsupported value at row %d for column %s",
import_params.filepath,
int(row_index),
column_name.c_str());
std::string(column_name).c_str());
break;
}
}
}
static void parse_csv_line(CsvData &csv_data,
static void parse_csv_line(const Span<GMutableSpan> data,
const Span<eCustomDataType> types,
const Span<StringRef> column_names,
int64_t row_index,
const StringRef line,
const CSVImportParams &import_params)
@@ -176,7 +184,8 @@ static void parse_csv_line(CsvData &csv_data,
while (delim_index != StringRef::not_found) {
cell_end = start + delim_index;
parse_csv_cell(csv_data, row_index, col_index, cell_start, cell_end, import_params);
parse_csv_cell(
data, types, column_names, row_index, col_index, cell_start, cell_end, import_params);
col_index++;
cell_start = cell_end + 1;
@@ -184,10 +193,13 @@ static void parse_csv_line(CsvData &csv_data,
}
/* Handle last cell, --end because the end in StringRef is one_after_ern */
parse_csv_cell(csv_data, row_index, col_index, cell_start, --end, import_params);
parse_csv_cell(
data, types, column_names, row_index, col_index, cell_start, --end, import_params);
}
static void parse_csv_data(CsvData &csv_data,
static void parse_csv_data(const Span<GMutableSpan> data,
const Span<eCustomDataType> types,
const Span<StringRef> column_names,
StringRef buffer,
const CSVImportParams &import_params)
{
@@ -195,7 +207,7 @@ static void parse_csv_data(CsvData &csv_data,
while (!buffer.is_empty()) {
const StringRef line = read_next_line(buffer);
parse_csv_line(csv_data, row_index, line, import_params);
parse_csv_line(data, types, column_names, row_index, line, import_params);
row_index++;
}
@@ -205,7 +217,6 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
{
size_t buffer_len;
void *buffer = BLI_file_read_text_as_mem(import_params.filepath, 0, &buffer_len);
if (buffer == nullptr) {
BKE_reportf(import_params.reports,
RPT_ERROR,
@@ -216,9 +227,7 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
BLI_SCOPED_DEFER([&]() { MEM_freeN(buffer); });
StringRef buffer_str{(const char *)buffer, int64_t(buffer_len)};
/* Get row count and columns */
StringRef buffer_str{static_cast<char *>(buffer), int64_t(buffer_len)};
if (buffer_str.is_empty()) {
BKE_reportf(
import_params.reports, RPT_ERROR, "CSV Import: empty file '%s'", import_params.filepath);
@@ -226,7 +235,7 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
}
const StringRef header = read_next_line(buffer_str);
const Vector<std::string> columns = get_columns(header);
const Vector<StringRef> names = parse_column_names(header);
if (buffer_str.is_empty()) {
BKE_reportf(import_params.reports,
@@ -237,13 +246,13 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
}
/* Shallow copy buffer to preserve pointers from first row for parsing */
StringRef data_buffer(buffer_str.begin(), buffer_str.end());
const StringRef data_buffer(buffer_str.begin(), buffer_str.end());
const StringRef first_row = read_next_line(buffer_str);
Vector<eCustomDataType> column_types;
if (!get_column_types(first_row, column_types)) {
std::string column_name = columns[column_types.size()];
std::string column_name = names[column_types.size()];
BKE_reportf(import_params.reports,
RPT_ERROR,
"CSV Import: file '%s', Column %s is of unsupported data type",
@@ -252,15 +261,28 @@ PointCloud *import_csv_as_point_cloud(const CSVImportParams &import_params)
return nullptr;
}
const int64_t row_count = get_row_count(buffer_str);
const int64_t rows_num = get_row_count(buffer_str);
/* Create csv data */
CsvData csv_data(row_count, columns, column_types);
PointCloud *pointcloud = BKE_pointcloud_new_nomain(rows_num);
pointcloud->positions_for_write().fill(float3(0));
/* Fill csv data while seeking over the file */
parse_csv_data(csv_data, data_buffer, import_params);
Array<bke::GSpanAttributeWriter> attribute_writers(names.size());
Array<GMutableSpan> attribute_data(names.size());
return csv_data.to_point_cloud();
bke::MutableAttributeAccessor attributes = pointcloud->attributes_for_write();
for (const int i : names.index_range()) {
attribute_writers[i] = attributes.lookup_or_add_for_write_span(
names[i], bke::AttrDomain::Point, column_types[i]);
attribute_data[i] = attribute_writers[i].span;
}
parse_csv_data(attribute_data, column_types, names, data_buffer, import_params);
for (bke::GSpanAttributeWriter &attr : attribute_writers) {
attr.finish();
}
return pointcloud;
}
} // namespace blender::io::csv