Functions: reduce memory usage in node state

By storing a raw pointer instead of a `Span`, we save 16 bytes
per node state. I measured a ~5% speedup in my setup with
a simple repeat zone.

5c450aea05 added some additional asserts to check for valid
indices. Generally, index-errors in this area lead to wrong
behaviors of geometry nodes very quickly.
This commit is contained in:
Jacques Lucke
2023-09-16 12:30:23 +02:00
parent 5c450aea05
commit bd414cdbda

View File

@@ -139,9 +139,11 @@ struct NodeState {
/**
* States of the individual input and output sockets. One can index into these arrays without
* locking. However, to access data inside, a lock is needed unless noted otherwise.
* Those are not stored as #Span to reduce memory usage. The number of inputs and outputs is
* stored on the node already.
*/
MutableSpan<InputState> inputs;
MutableSpan<OutputState> outputs;
InputState *inputs;
OutputState *outputs;
/**
* Counts the number of inputs that still have to be provided to this node, until it should run
* again. This is used as an optimization so that nodes are not scheduled unnecessarily in many
@@ -427,12 +429,12 @@ class Executor {
/* Initialize socket states. */
const int num_inputs = node.inputs().size();
const int num_outputs = node.outputs().size();
node_state->inputs = MutableSpan{reinterpret_cast<InputState *>(memory), num_inputs};
node_state->inputs = reinterpret_cast<InputState *>(memory);
memory += sizeof(InputState) * num_inputs;
node_state->outputs = MutableSpan{reinterpret_cast<OutputState *>(memory), num_outputs};
node_state->outputs = reinterpret_cast<OutputState *>(memory);
default_construct_n(node_state->inputs.data(), num_inputs);
default_construct_n(node_state->outputs.data(), num_outputs);
default_construct_n(node_state->inputs, num_inputs);
default_construct_n(node_state->outputs, num_outputs);
node_states_[i] = node_state;
}
@@ -591,8 +593,8 @@ class Executor {
}
else {
/* Inputs of unreachable nodes are unused. */
for (InputState &input_state : node_state.inputs) {
input_state.usage = ValueUsage::Unused;
for (const int input_index : node.inputs().index_range()) {
node_state.inputs[input_index].usage = ValueUsage::Unused;
}
}
}
@@ -813,7 +815,8 @@ class Executor {
}
bool required_uncomputed_output_exists = false;
for (OutputState &output_state : node_state.outputs) {
for (const int output_index : node.outputs().index_range()) {
OutputState &output_state = node_state.outputs[output_index];
output_state.usage_for_execution = output_state.usage;
if (output_state.usage == ValueUsage::Used && !output_state.has_been_computed) {
required_uncomputed_output_exists = true;
@@ -839,7 +842,7 @@ class Executor {
node_state.always_used_inputs_requested = true;
}
for (const int input_index : node_state.inputs.index_range()) {
for (const int input_index : node.inputs().index_range()) {
InputState &input_state = node_state.inputs[input_index];
if (input_state.was_ready_for_execution) {
continue;
@@ -921,7 +924,7 @@ class Executor {
return;
}
Vector<const OutputSocket *> missing_outputs;
for (const int i : node_state.outputs.index_range()) {
for (const int i : node.outputs().index_range()) {
const OutputState &output_state = node_state.outputs[i];
if (output_state.usage_for_execution == ValueUsage::Used) {
if (!output_state.has_been_computed) {
@@ -948,13 +951,15 @@ class Executor {
return;
}
/* If there are outputs that may still be used, the node is not done yet. */
for (const OutputState &output_state : node_state.outputs) {
for (const int output_index : node.outputs().index_range()) {
const OutputState &output_state = node_state.outputs[output_index];
if (output_state.usage != ValueUsage::Unused && !output_state.has_been_computed) {
return;
}
}
/* If the node is still waiting for inputs, it is not done yet. */
for (const InputState &input_state : node_state.inputs) {
for (const int input_index : node.inputs().index_range()) {
const InputState &input_state = node_state.inputs[input_index];
if (input_state.usage == ValueUsage::Used && !input_state.was_ready_for_execution) {
return;
}
@@ -962,7 +967,7 @@ class Executor {
node_state.node_has_finished = true;
for (const int input_index : node_state.inputs.index_range()) {
for (const int input_index : node.inputs().index_range()) {
const InputSocket &input_socket = node.input(input_index);
InputState &input_state = node_state.inputs[input_index];
if (input_state.usage == ValueUsage::Maybe) {