Revert "Cycles: Store octree parent nodes in a stack"

This reverts commit bccad10b3be75deb0825b9234087e613678af407.
The stack approach seems slower

Pull Request: https://projects.blender.org/blender/blender/pulls/134460
This commit is contained in:
Weizhen Huang
2025-08-12 13:58:41 +02:00
committed by Gitea
parent 146ac0d9fe
commit d717c78ca4
4 changed files with 24 additions and 88 deletions

View File

@@ -328,6 +328,7 @@ void Octree::flatten(KernelOctreeNode *knodes,
/* Loop through all the children and flatten in breadth-first manner, so that children are
* stored in contiguous indices. */
for (int i = 0; i < 8; i++) {
knodes[knode.first_child + i].parent = current_index;
flatten(knodes, knode.first_child + i, internal_ptr->children_[i], child_index);
}
}

View File

@@ -137,11 +137,15 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
* to find `t.max`, then store a point `current_P` which lies in the adjacent leaf node. The next
* leaf node is found by checking the higher bits of `current_P`.
*
* The paper suggests to keep a stack of parent nodes, in practice such a stack (even when the size
* is just 8) slows down performance on GPU. Instead we store the parent index in the leaf node
* directly, since there is sufficient space due to alignment.
*
* \{ */
struct OctreeTracing {
/* Stack of parent nodes of the current node. Plus two for world volume and root node. */
KernelStack<ccl_global const KernelOctreeNode *, VOLUME_OCTREE_MAX_DEPTH + 1> nodes;
/* Current active leaf node. */
ccl_global const KernelOctreeNode *node = nullptr;
/* Current active ray segment, typically spans from the front face to the back face of the
* current leaf node. */
@@ -273,22 +277,6 @@ struct OctreeTracing {
const uint8_t z = ((current_P.z >> scale) & 1u) << 2u;
return (x | y | z) ^ octant_mask;
}
ccl_device_inline_method bool is_empty() const
{
return nodes.is_empty();
}
ccl_device_inline_method void push(ccl_global const KernelOctreeNode *node)
{
nodes.push(node);
}
/* Access the current active leaf node. */
ccl_device_inline_method ccl_global const KernelOctreeNode *get_voxel() const
{
return nodes.top();
}
};
/* Check if an octree node is leaf node. */
@@ -300,11 +288,10 @@ ccl_device_inline bool volume_node_is_leaf(const ccl_global KernelOctreeNode *kn
/* Find the leaf node of the current position, and replace `octree.node` with that node. */
ccl_device void volume_voxel_get(KernelGlobals kg, ccl_private OctreeTracing &octree)
{
const ccl_global KernelOctreeNode *knode = octree.get_voxel();
while (!volume_node_is_leaf(knode)) {
octree.scale--;
knode = &kernel_data_fetch(volume_tree_nodes, knode->first_child + octree.get_octant());
octree.push(knode);
while (!volume_node_is_leaf(octree.node)) {
octree.scale -= 1;
const int child_index = octree.node->first_child + octree.get_octant();
octree.node = &kernel_data_fetch(volume_tree_nodes, child_index);
}
}
@@ -369,7 +356,7 @@ ccl_device_inline Extrema<float> volume_object_get_extrema(KernelGlobals kg,
const int shader_flag = kernel_data_fetch(shaders, (octree.entry.shader & SHADER_MASK)).flags;
if ((path_flag & PATH_RAY_CAMERA) || !(shader_flag & SD_HAS_LIGHT_PATH_NODE)) {
/* Use the baked volume density extrema. */
return octree.get_voxel()->sigma * object_volume_density(kg, octree.entry.object);
return octree.node->sigma * object_volume_density(kg, octree.entry.object);
}
return volume_estimate_extrema<shadow>(kg, ray, sd, state, rng_state, path_flag, octree.entry);
@@ -425,7 +412,7 @@ ccl_device bool volume_octree_setup(KernelGlobals kg,
const ccl_global KernelOctreeRoot *kroot = volume_find_octree_root(kg, entry);
OctreeTracing local(global.t.min);
local.push(&kernel_data_fetch(volume_tree_nodes, kroot->id));
local.node = &kernel_data_fetch(volume_tree_nodes, kroot->id);
local.entry = entry;
/* Convert to object space. */
@@ -460,7 +447,7 @@ ccl_device bool volume_octree_setup(KernelGlobals kg,
global.no_overlap = true;
}
return !global.is_empty() && !global.t.is_empty();
return global.node && !global.t.is_empty();
}
/* Advance to the next adjacent leaf node and update the active interval. */
@@ -487,18 +474,17 @@ ccl_device_inline bool volume_octree_advance(KernelGlobals kg,
/* Outside of the root node, continue tracing using the extrema of the root node. */
octree.t = {octree.t.max, ray->tmax};
octree.nodes.clear();
octree.push(
&kernel_data_fetch(volume_tree_nodes, volume_find_octree_root(kg, octree.entry)->id));
octree.node = &kernel_data_fetch(volume_tree_nodes,
volume_find_octree_root(kg, octree.entry)->id);
}
else {
kernel_assert(octree.next_scale > octree.scale);
/* Fetch the common ancestor of the current and the next leaf nodes. */
octree.nodes.pop(octree.next_scale - octree.scale);
octree.scale = octree.next_scale;
kernel_assert(!octree.is_empty());
for (; octree.scale < octree.next_scale; octree.scale++) {
kernel_assert(octree.node->parent != -1);
octree.node = &kernel_data_fetch(volume_tree_nodes, octree.node->parent);
}
/* Find the current active leaf node. */
volume_voxel_get(kg, octree);

View File

@@ -1707,71 +1707,19 @@ struct KernelOctreeRoot {
};
struct KernelOctreeNode {
/* Index of the parent node in device vector `volume_tree_nodes`. */
int parent;
/* Index of the first child node in device vector `volume_tree_nodes`. All children of the same
* node are stored in contiguous memory. */
int first_child;
int pad;
/* Minimal and maximal volume density inside the node. */
/* TODO(weizhen): we can make sigma Spectral for better accuracy. Since only root and leaf nodes
* need sigma, we can introduce `KernelOctreeInnerNode` to reduce the size of the struct. */
Extrema<float> sigma;
};
/* Last-in, first-out stack, holding elements with a maximal size of `MAX_SIZE`. */
template<typename type, int MAX_SIZE> struct KernelStack {
type array[MAX_SIZE];
/* Index of the top element. */
int index = -1;
ccl_device_inline_method bool is_empty() const
{
return index < 0;
}
/* Removes the element on top of the stack, and reduces the size by one. */
ccl_device_inline_method void pop()
{
kernel_assert(!is_empty());
index--;
}
/* Removes several elements on top of the stack. */
ccl_device_inline_method void pop(const int num)
{
kernel_assert(num >= 0);
kernel_assert(index >= num - 1);
index -= num;
}
/* Inserts a new element at the top of the stack. */
ccl_device_inline_method void push(type elem)
{
kernel_assert(index < MAX_SIZE - 1);
array[++index] = elem;
}
/* Access the top element. */
ccl_device_inline_method type top() const
{
kernel_assert(!is_empty());
return array[index];
}
/* Returns the number of elements in the stack. */
ccl_device_inline_method int size() const
{
return index + 1;
}
/* Removes all elements from the stack. Effectively set the size to 0. */
ccl_device_inline_method void clear()
{
index = -1;
}
};
struct KernelLightTreeEmitter {
/* Bounding cone. */
float theta_o;

View File

@@ -908,6 +908,7 @@ void VolumeManager::flatten_octree(DeviceScene *dscene, const Scene *scene) cons
/* Flatten octree. */
const uint current_index = node_index++;
knodes[current_index].parent = -1;
octree->flatten(knodes, current_index, root, node_index);
octree->set_flattened();
}