Spreadsheet: garbage collect long unavailable columns
Previously, it was possible for the number of stored columns per table to grow unbounded. While this likely isn't a problem in practice in most cases, one can imagine cases where people temporarily have thousands of attributes when are then never used again. We shouldn't have to store any data for these columns forever. This patch adds some simple garbage collection mechanism that keeps the number of stored unavailable columns per spreadsheet table below a certain threshold (50 currently). Least recently used columns are removed first. Pull Request: https://projects.blender.org/blender/blender/pulls/139469
This commit is contained in:
@@ -400,6 +400,7 @@ static void update_visible_columns(SpreadsheetTable &table, DataSource &data_sou
|
||||
if (!values) {
|
||||
return;
|
||||
}
|
||||
table.column_use_clock++;
|
||||
SpreadsheetColumn *column = spreadsheet_column_new(spreadsheet_column_id_copy(&column_id));
|
||||
if (is_extra) {
|
||||
new_columns.insert(0, column);
|
||||
@@ -415,11 +416,22 @@ static void update_visible_columns(SpreadsheetTable &table, DataSource &data_sou
|
||||
return;
|
||||
}
|
||||
|
||||
/* Update last used times of the columns to support garbage collection. */
|
||||
for (SpreadsheetColumn *column : new_columns) {
|
||||
const bool clock_was_reset = table.column_use_clock < column->last_used;
|
||||
if (clock_was_reset || column->is_available()) {
|
||||
column->last_used = table.column_use_clock;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the stored column pointers. */
|
||||
MEM_SAFE_FREE(table.columns);
|
||||
table.columns = MEM_calloc_arrayN<SpreadsheetColumn *>(new_columns.size(), __func__);
|
||||
table.num_columns = new_columns.size();
|
||||
std::copy_n(new_columns.begin(), new_columns.size(), table.columns);
|
||||
|
||||
/* Remove columns that have not been used for a while when there are too many. */
|
||||
spreadsheet_table_remove_unused_columns(table);
|
||||
}
|
||||
|
||||
static void spreadsheet_main_region_draw(const bContext *C, ARegion *region)
|
||||
|
||||
@@ -303,4 +303,46 @@ void spreadsheet_table_remove_unused(SpaceSpreadsheet &sspreadsheet)
|
||||
[](SpreadsheetTable **table) { spreadsheet_table_free(*table); });
|
||||
}
|
||||
|
||||
void spreadsheet_table_remove_unused_columns(SpreadsheetTable &table)
|
||||
{
|
||||
/* Might not be reached exactly if there are many columns with the same last used time. */
|
||||
const int max_unavailable_columns_target = 50;
|
||||
int num_unavailable_columns = 0;
|
||||
for (SpreadsheetColumn *column : Span(table.columns, table.num_columns)) {
|
||||
if (!column->is_available()) {
|
||||
num_unavailable_columns++;
|
||||
}
|
||||
}
|
||||
if (num_unavailable_columns <= max_unavailable_columns_target) {
|
||||
/* No need to remove columns. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* Find the threshold time for unavailable columns to remove. */
|
||||
Vector<uint32_t> last_used_times;
|
||||
for (SpreadsheetColumn *column : Span(table.columns, table.num_columns)) {
|
||||
if (!column->is_available()) {
|
||||
last_used_times.append(column->last_used);
|
||||
}
|
||||
}
|
||||
std::sort(last_used_times.begin(), last_used_times.end());
|
||||
const int min_last_used = last_used_times[max_unavailable_columns_target];
|
||||
|
||||
dna::array::remove_if<SpreadsheetColumn *>(
|
||||
&table.columns,
|
||||
&table.num_columns,
|
||||
[&](const SpreadsheetColumn *column) {
|
||||
if (column->is_available()) {
|
||||
/* Available columns should never be removed here. */
|
||||
return false;
|
||||
}
|
||||
if (column->last_used > min_last_used) {
|
||||
/* Columns that have been used recently are not removed. */
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
[](SpreadsheetColumn **column) { spreadsheet_column_free(*column); });
|
||||
}
|
||||
|
||||
} // namespace blender::ed::spreadsheet
|
||||
|
||||
@@ -38,7 +38,6 @@ const SpreadsheetTable *spreadsheet_table_find(const SpaceSpreadsheet &sspreadsh
|
||||
const SpreadsheetTableID &table_id);
|
||||
void spreadsheet_table_add(SpaceSpreadsheet &sspreadsheet, SpreadsheetTable *table);
|
||||
void spreadsheet_table_remove_unused(SpaceSpreadsheet &sspreadsheet);
|
||||
void spreadsheet_table_remove_if(SpaceSpreadsheet &sspreadsheet,
|
||||
FunctionRef<bool(SpreadsheetTable &)> predicate);
|
||||
void spreadsheet_table_remove_unused_columns(SpreadsheetTable &table);
|
||||
|
||||
} // namespace blender::ed::spreadsheet
|
||||
|
||||
@@ -64,9 +64,9 @@ inline void remove_if(T **items,
|
||||
{
|
||||
static_assert(std::is_trivial_v<T>);
|
||||
/* This sorts the items-to-remove to the back. */
|
||||
const int remaining = std::partition(*items,
|
||||
*items + *items_num,
|
||||
[&](const T &value) { return !predicate(value); }) -
|
||||
const int remaining = std::stable_partition(*items,
|
||||
*items + *items_num,
|
||||
[&](const T &value) { return !predicate(value); }) -
|
||||
*items;
|
||||
for (const int i : IndexRange::from_begin_end(remaining, *items_num)) {
|
||||
destruct_item(&(*items)[i]);
|
||||
|
||||
@@ -1091,12 +1091,18 @@ typedef struct SpreadsheetColumn {
|
||||
* #eSpreadsheetColumnValueType.
|
||||
*/
|
||||
uint8_t data_type;
|
||||
char _pad0[1];
|
||||
char _pad0[3];
|
||||
/** #eSpreadsheetColumnFlag. */
|
||||
uint16_t flag;
|
||||
uint32_t flag;
|
||||
/** Width in SPREADSHEET_WIDTH_UNIT. */
|
||||
float width;
|
||||
|
||||
/**
|
||||
* A logical time set when the column is used. This is used to be able to remove long-unused
|
||||
* columns when there are too many. This is set from #SpreadsheetTable.column_use_clock.
|
||||
*/
|
||||
uint32_t last_used;
|
||||
|
||||
/**
|
||||
* The final column name generated by the data source, also just
|
||||
* cached at runtime when the data source columns are generated.
|
||||
@@ -1104,6 +1110,13 @@ typedef struct SpreadsheetColumn {
|
||||
char *display_name;
|
||||
|
||||
SpreadsheetColumnRuntime *runtime;
|
||||
|
||||
#ifdef __cplusplus
|
||||
bool is_available() const
|
||||
{
|
||||
return !(flag & SPREADSHEET_COLUMN_FLAG_UNAVAILABLE);
|
||||
}
|
||||
#endif
|
||||
} SpreadsheetColumn;
|
||||
|
||||
typedef struct SpreadsheetInstanceID {
|
||||
@@ -1165,7 +1178,12 @@ typedef struct SpreadsheetTable {
|
||||
* tables when there are too many. This is set from #SpaceSpreadsheet.table_use_clock.
|
||||
*/
|
||||
uint32_t last_used;
|
||||
char _pad[4];
|
||||
|
||||
/**
|
||||
* This is increased whenever a new column is used. It allows for some garbage collection of
|
||||
* long-unused columns when there are too many.
|
||||
*/
|
||||
uint32_t column_use_clock;
|
||||
} SpreadsheetTable;
|
||||
|
||||
typedef struct SpaceSpreadsheet {
|
||||
@@ -1194,7 +1212,7 @@ typedef struct SpaceSpreadsheet {
|
||||
/* eSpaceSpreadsheet_Flag. */
|
||||
uint32_t flag;
|
||||
/**
|
||||
* This is increase whenver a new table is used. It allows for some garbage collection of
|
||||
* This is increased whenever a new table is used. It allows for some garbage collection of
|
||||
* long-unused tables when there are too many.
|
||||
*/
|
||||
uint32_t table_use_clock;
|
||||
|
||||
Reference in New Issue
Block a user