Spreadsheet: garbage collect long unavailable columns

Previously, it was possible for the number of stored columns per table to grow
unbounded. While this likely isn't a problem in practice in most cases, one can
imagine cases where people temporarily have thousands of attributes when are
then never used again. We shouldn't have to store any data for these columns
forever.

This patch adds some simple garbage collection mechanism that keeps the number
of stored unavailable columns  per spreadsheet table below a certain threshold
(50 currently). Least recently used columns are removed first.

Pull Request: https://projects.blender.org/blender/blender/pulls/139469
This commit is contained in:
Jacques Lucke
2025-05-27 06:29:06 +02:00
parent 6f83928c6b
commit ae6aeb3cc3
5 changed files with 80 additions and 9 deletions

View File

@@ -400,6 +400,7 @@ static void update_visible_columns(SpreadsheetTable &table, DataSource &data_sou
if (!values) {
return;
}
table.column_use_clock++;
SpreadsheetColumn *column = spreadsheet_column_new(spreadsheet_column_id_copy(&column_id));
if (is_extra) {
new_columns.insert(0, column);
@@ -415,11 +416,22 @@ static void update_visible_columns(SpreadsheetTable &table, DataSource &data_sou
return;
}
/* Update last used times of the columns to support garbage collection. */
for (SpreadsheetColumn *column : new_columns) {
const bool clock_was_reset = table.column_use_clock < column->last_used;
if (clock_was_reset || column->is_available()) {
column->last_used = table.column_use_clock;
}
}
/* Update the stored column pointers. */
MEM_SAFE_FREE(table.columns);
table.columns = MEM_calloc_arrayN<SpreadsheetColumn *>(new_columns.size(), __func__);
table.num_columns = new_columns.size();
std::copy_n(new_columns.begin(), new_columns.size(), table.columns);
/* Remove columns that have not been used for a while when there are too many. */
spreadsheet_table_remove_unused_columns(table);
}
static void spreadsheet_main_region_draw(const bContext *C, ARegion *region)

View File

@@ -303,4 +303,46 @@ void spreadsheet_table_remove_unused(SpaceSpreadsheet &sspreadsheet)
[](SpreadsheetTable **table) { spreadsheet_table_free(*table); });
}
void spreadsheet_table_remove_unused_columns(SpreadsheetTable &table)
{
/* Might not be reached exactly if there are many columns with the same last used time. */
const int max_unavailable_columns_target = 50;
int num_unavailable_columns = 0;
for (SpreadsheetColumn *column : Span(table.columns, table.num_columns)) {
if (!column->is_available()) {
num_unavailable_columns++;
}
}
if (num_unavailable_columns <= max_unavailable_columns_target) {
/* No need to remove columns. */
return;
}
/* Find the threshold time for unavailable columns to remove. */
Vector<uint32_t> last_used_times;
for (SpreadsheetColumn *column : Span(table.columns, table.num_columns)) {
if (!column->is_available()) {
last_used_times.append(column->last_used);
}
}
std::sort(last_used_times.begin(), last_used_times.end());
const int min_last_used = last_used_times[max_unavailable_columns_target];
dna::array::remove_if<SpreadsheetColumn *>(
&table.columns,
&table.num_columns,
[&](const SpreadsheetColumn *column) {
if (column->is_available()) {
/* Available columns should never be removed here. */
return false;
}
if (column->last_used > min_last_used) {
/* Columns that have been used recently are not removed. */
return false;
}
return true;
},
[](SpreadsheetColumn **column) { spreadsheet_column_free(*column); });
}
} // namespace blender::ed::spreadsheet

View File

@@ -38,7 +38,6 @@ const SpreadsheetTable *spreadsheet_table_find(const SpaceSpreadsheet &sspreadsh
const SpreadsheetTableID &table_id);
void spreadsheet_table_add(SpaceSpreadsheet &sspreadsheet, SpreadsheetTable *table);
void spreadsheet_table_remove_unused(SpaceSpreadsheet &sspreadsheet);
void spreadsheet_table_remove_if(SpaceSpreadsheet &sspreadsheet,
FunctionRef<bool(SpreadsheetTable &)> predicate);
void spreadsheet_table_remove_unused_columns(SpreadsheetTable &table);
} // namespace blender::ed::spreadsheet

View File

@@ -64,9 +64,9 @@ inline void remove_if(T **items,
{
static_assert(std::is_trivial_v<T>);
/* This sorts the items-to-remove to the back. */
const int remaining = std::partition(*items,
*items + *items_num,
[&](const T &value) { return !predicate(value); }) -
const int remaining = std::stable_partition(*items,
*items + *items_num,
[&](const T &value) { return !predicate(value); }) -
*items;
for (const int i : IndexRange::from_begin_end(remaining, *items_num)) {
destruct_item(&(*items)[i]);

View File

@@ -1091,12 +1091,18 @@ typedef struct SpreadsheetColumn {
* #eSpreadsheetColumnValueType.
*/
uint8_t data_type;
char _pad0[1];
char _pad0[3];
/** #eSpreadsheetColumnFlag. */
uint16_t flag;
uint32_t flag;
/** Width in SPREADSHEET_WIDTH_UNIT. */
float width;
/**
* A logical time set when the column is used. This is used to be able to remove long-unused
* columns when there are too many. This is set from #SpreadsheetTable.column_use_clock.
*/
uint32_t last_used;
/**
* The final column name generated by the data source, also just
* cached at runtime when the data source columns are generated.
@@ -1104,6 +1110,13 @@ typedef struct SpreadsheetColumn {
char *display_name;
SpreadsheetColumnRuntime *runtime;
#ifdef __cplusplus
bool is_available() const
{
return !(flag & SPREADSHEET_COLUMN_FLAG_UNAVAILABLE);
}
#endif
} SpreadsheetColumn;
typedef struct SpreadsheetInstanceID {
@@ -1165,7 +1178,12 @@ typedef struct SpreadsheetTable {
* tables when there are too many. This is set from #SpaceSpreadsheet.table_use_clock.
*/
uint32_t last_used;
char _pad[4];
/**
* This is increased whenever a new column is used. It allows for some garbage collection of
* long-unused columns when there are too many.
*/
uint32_t column_use_clock;
} SpreadsheetTable;
typedef struct SpaceSpreadsheet {
@@ -1194,7 +1212,7 @@ typedef struct SpaceSpreadsheet {
/* eSpaceSpreadsheet_Flag. */
uint32_t flag;
/**
* This is increase whenver a new table is used. It allows for some garbage collection of
* This is increased whenever a new table is used. It allows for some garbage collection of
* long-unused tables when there are too many.
*/
uint32_t table_use_clock;