Functions: reduce memory usage in node state
By storing a raw pointer instead of a `Span`, we save 16 bytes
per node state. I measured a ~5% speedup in my setup with
a simple repeat zone.
5c450aea05 added some additional asserts to check for valid
indices. Generally, index-errors in this area lead to wrong
behaviors of geometry nodes very quickly.
This commit is contained in:
@@ -139,9 +139,11 @@ struct NodeState {
|
||||
/**
|
||||
* States of the individual input and output sockets. One can index into these arrays without
|
||||
* locking. However, to access data inside, a lock is needed unless noted otherwise.
|
||||
* Those are not stored as #Span to reduce memory usage. The number of inputs and outputs is
|
||||
* stored on the node already.
|
||||
*/
|
||||
MutableSpan<InputState> inputs;
|
||||
MutableSpan<OutputState> outputs;
|
||||
InputState *inputs;
|
||||
OutputState *outputs;
|
||||
/**
|
||||
* Counts the number of inputs that still have to be provided to this node, until it should run
|
||||
* again. This is used as an optimization so that nodes are not scheduled unnecessarily in many
|
||||
@@ -427,12 +429,12 @@ class Executor {
|
||||
/* Initialize socket states. */
|
||||
const int num_inputs = node.inputs().size();
|
||||
const int num_outputs = node.outputs().size();
|
||||
node_state->inputs = MutableSpan{reinterpret_cast<InputState *>(memory), num_inputs};
|
||||
node_state->inputs = reinterpret_cast<InputState *>(memory);
|
||||
memory += sizeof(InputState) * num_inputs;
|
||||
node_state->outputs = MutableSpan{reinterpret_cast<OutputState *>(memory), num_outputs};
|
||||
node_state->outputs = reinterpret_cast<OutputState *>(memory);
|
||||
|
||||
default_construct_n(node_state->inputs.data(), num_inputs);
|
||||
default_construct_n(node_state->outputs.data(), num_outputs);
|
||||
default_construct_n(node_state->inputs, num_inputs);
|
||||
default_construct_n(node_state->outputs, num_outputs);
|
||||
|
||||
node_states_[i] = node_state;
|
||||
}
|
||||
@@ -591,8 +593,8 @@ class Executor {
|
||||
}
|
||||
else {
|
||||
/* Inputs of unreachable nodes are unused. */
|
||||
for (InputState &input_state : node_state.inputs) {
|
||||
input_state.usage = ValueUsage::Unused;
|
||||
for (const int input_index : node.inputs().index_range()) {
|
||||
node_state.inputs[input_index].usage = ValueUsage::Unused;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -813,7 +815,8 @@ class Executor {
|
||||
}
|
||||
|
||||
bool required_uncomputed_output_exists = false;
|
||||
for (OutputState &output_state : node_state.outputs) {
|
||||
for (const int output_index : node.outputs().index_range()) {
|
||||
OutputState &output_state = node_state.outputs[output_index];
|
||||
output_state.usage_for_execution = output_state.usage;
|
||||
if (output_state.usage == ValueUsage::Used && !output_state.has_been_computed) {
|
||||
required_uncomputed_output_exists = true;
|
||||
@@ -839,7 +842,7 @@ class Executor {
|
||||
node_state.always_used_inputs_requested = true;
|
||||
}
|
||||
|
||||
for (const int input_index : node_state.inputs.index_range()) {
|
||||
for (const int input_index : node.inputs().index_range()) {
|
||||
InputState &input_state = node_state.inputs[input_index];
|
||||
if (input_state.was_ready_for_execution) {
|
||||
continue;
|
||||
@@ -921,7 +924,7 @@ class Executor {
|
||||
return;
|
||||
}
|
||||
Vector<const OutputSocket *> missing_outputs;
|
||||
for (const int i : node_state.outputs.index_range()) {
|
||||
for (const int i : node.outputs().index_range()) {
|
||||
const OutputState &output_state = node_state.outputs[i];
|
||||
if (output_state.usage_for_execution == ValueUsage::Used) {
|
||||
if (!output_state.has_been_computed) {
|
||||
@@ -948,13 +951,15 @@ class Executor {
|
||||
return;
|
||||
}
|
||||
/* If there are outputs that may still be used, the node is not done yet. */
|
||||
for (const OutputState &output_state : node_state.outputs) {
|
||||
for (const int output_index : node.outputs().index_range()) {
|
||||
const OutputState &output_state = node_state.outputs[output_index];
|
||||
if (output_state.usage != ValueUsage::Unused && !output_state.has_been_computed) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* If the node is still waiting for inputs, it is not done yet. */
|
||||
for (const InputState &input_state : node_state.inputs) {
|
||||
for (const int input_index : node.inputs().index_range()) {
|
||||
const InputState &input_state = node_state.inputs[input_index];
|
||||
if (input_state.usage == ValueUsage::Used && !input_state.was_ready_for_execution) {
|
||||
return;
|
||||
}
|
||||
@@ -962,7 +967,7 @@ class Executor {
|
||||
|
||||
node_state.node_has_finished = true;
|
||||
|
||||
for (const int input_index : node_state.inputs.index_range()) {
|
||||
for (const int input_index : node.inputs().index_range()) {
|
||||
const InputSocket &input_socket = node.input(input_index);
|
||||
InputState &input_state = node_state.inputs[input_index];
|
||||
if (input_state.usage == ValueUsage::Maybe) {
|
||||
|
||||
Reference in New Issue
Block a user