diff --git a/intern/cycles/bvh/octree.cpp b/intern/cycles/bvh/octree.cpp index bf46c04bf7f..989f0dd001b 100644 --- a/intern/cycles/bvh/octree.cpp +++ b/intern/cycles/bvh/octree.cpp @@ -328,6 +328,7 @@ void Octree::flatten(KernelOctreeNode *knodes, /* Loop through all the children and flatten in breadth-first manner, so that children are * stored in contiguous indices. */ for (int i = 0; i < 8; i++) { + knodes[knode.first_child + i].parent = current_index; flatten(knodes, knode.first_child + i, internal_ptr->children_[i], child_index); } } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index e87f6645d67..821b7c06253 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -137,11 +137,15 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, * to find `t.max`, then store a point `current_P` which lies in the adjacent leaf node. The next * leaf node is found by checking the higher bits of `current_P`. * + * The paper suggests to keep a stack of parent nodes, in practice such a stack (even when the size + * is just 8) slows down performance on GPU. Instead we store the parent index in the leaf node + * directly, since there is sufficient space due to alignment. + * * \{ */ struct OctreeTracing { - /* Stack of parent nodes of the current node. Plus two for world volume and root node. */ - KernelStack nodes; + /* Current active leaf node. */ + ccl_global const KernelOctreeNode *node = nullptr; /* Current active ray segment, typically spans from the front face to the back face of the * current leaf node. */ @@ -273,22 +277,6 @@ struct OctreeTracing { const uint8_t z = ((current_P.z >> scale) & 1u) << 2u; return (x | y | z) ^ octant_mask; } - - ccl_device_inline_method bool is_empty() const - { - return nodes.is_empty(); - } - - ccl_device_inline_method void push(ccl_global const KernelOctreeNode *node) - { - nodes.push(node); - } - - /* Access the current active leaf node. */ - ccl_device_inline_method ccl_global const KernelOctreeNode *get_voxel() const - { - return nodes.top(); - } }; /* Check if an octree node is leaf node. */ @@ -300,11 +288,10 @@ ccl_device_inline bool volume_node_is_leaf(const ccl_global KernelOctreeNode *kn /* Find the leaf node of the current position, and replace `octree.node` with that node. */ ccl_device void volume_voxel_get(KernelGlobals kg, ccl_private OctreeTracing &octree) { - const ccl_global KernelOctreeNode *knode = octree.get_voxel(); - while (!volume_node_is_leaf(knode)) { - octree.scale--; - knode = &kernel_data_fetch(volume_tree_nodes, knode->first_child + octree.get_octant()); - octree.push(knode); + while (!volume_node_is_leaf(octree.node)) { + octree.scale -= 1; + const int child_index = octree.node->first_child + octree.get_octant(); + octree.node = &kernel_data_fetch(volume_tree_nodes, child_index); } } @@ -369,7 +356,7 @@ ccl_device_inline Extrema volume_object_get_extrema(KernelGlobals kg, const int shader_flag = kernel_data_fetch(shaders, (octree.entry.shader & SHADER_MASK)).flags; if ((path_flag & PATH_RAY_CAMERA) || !(shader_flag & SD_HAS_LIGHT_PATH_NODE)) { /* Use the baked volume density extrema. */ - return octree.get_voxel()->sigma * object_volume_density(kg, octree.entry.object); + return octree.node->sigma * object_volume_density(kg, octree.entry.object); } return volume_estimate_extrema(kg, ray, sd, state, rng_state, path_flag, octree.entry); @@ -425,7 +412,7 @@ ccl_device bool volume_octree_setup(KernelGlobals kg, const ccl_global KernelOctreeRoot *kroot = volume_find_octree_root(kg, entry); OctreeTracing local(global.t.min); - local.push(&kernel_data_fetch(volume_tree_nodes, kroot->id)); + local.node = &kernel_data_fetch(volume_tree_nodes, kroot->id); local.entry = entry; /* Convert to object space. */ @@ -460,7 +447,7 @@ ccl_device bool volume_octree_setup(KernelGlobals kg, global.no_overlap = true; } - return !global.is_empty() && !global.t.is_empty(); + return global.node && !global.t.is_empty(); } /* Advance to the next adjacent leaf node and update the active interval. */ @@ -487,18 +474,17 @@ ccl_device_inline bool volume_octree_advance(KernelGlobals kg, /* Outside of the root node, continue tracing using the extrema of the root node. */ octree.t = {octree.t.max, ray->tmax}; - octree.nodes.clear(); - octree.push( - &kernel_data_fetch(volume_tree_nodes, volume_find_octree_root(kg, octree.entry)->id)); + octree.node = &kernel_data_fetch(volume_tree_nodes, + volume_find_octree_root(kg, octree.entry)->id); } else { kernel_assert(octree.next_scale > octree.scale); /* Fetch the common ancestor of the current and the next leaf nodes. */ - octree.nodes.pop(octree.next_scale - octree.scale); - octree.scale = octree.next_scale; - - kernel_assert(!octree.is_empty()); + for (; octree.scale < octree.next_scale; octree.scale++) { + kernel_assert(octree.node->parent != -1); + octree.node = &kernel_data_fetch(volume_tree_nodes, octree.node->parent); + } /* Find the current active leaf node. */ volume_voxel_get(kg, octree); diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 55f17acbafe..0672414d6f5 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1707,71 +1707,19 @@ struct KernelOctreeRoot { }; struct KernelOctreeNode { + /* Index of the parent node in device vector `volume_tree_nodes`. */ + int parent; + /* Index of the first child node in device vector `volume_tree_nodes`. All children of the same * node are stored in contiguous memory. */ int first_child; - int pad; - /* Minimal and maximal volume density inside the node. */ /* TODO(weizhen): we can make sigma Spectral for better accuracy. Since only root and leaf nodes * need sigma, we can introduce `KernelOctreeInnerNode` to reduce the size of the struct. */ Extrema sigma; }; -/* Last-in, first-out stack, holding elements with a maximal size of `MAX_SIZE`. */ -template struct KernelStack { - type array[MAX_SIZE]; - /* Index of the top element. */ - int index = -1; - - ccl_device_inline_method bool is_empty() const - { - return index < 0; - } - - /* Removes the element on top of the stack, and reduces the size by one. */ - ccl_device_inline_method void pop() - { - kernel_assert(!is_empty()); - index--; - } - - /* Removes several elements on top of the stack. */ - ccl_device_inline_method void pop(const int num) - { - kernel_assert(num >= 0); - kernel_assert(index >= num - 1); - index -= num; - } - - /* Inserts a new element at the top of the stack. */ - ccl_device_inline_method void push(type elem) - { - kernel_assert(index < MAX_SIZE - 1); - array[++index] = elem; - } - - /* Access the top element. */ - ccl_device_inline_method type top() const - { - kernel_assert(!is_empty()); - return array[index]; - } - - /* Returns the number of elements in the stack. */ - ccl_device_inline_method int size() const - { - return index + 1; - } - - /* Removes all elements from the stack. Effectively set the size to 0. */ - ccl_device_inline_method void clear() - { - index = -1; - } -}; - struct KernelLightTreeEmitter { /* Bounding cone. */ float theta_o; diff --git a/intern/cycles/scene/volume.cpp b/intern/cycles/scene/volume.cpp index a2fb87ba565..25ffe8e7884 100644 --- a/intern/cycles/scene/volume.cpp +++ b/intern/cycles/scene/volume.cpp @@ -908,6 +908,7 @@ void VolumeManager::flatten_octree(DeviceScene *dscene, const Scene *scene) cons /* Flatten octree. */ const uint current_index = node_index++; + knodes[current_index].parent = -1; octree->flatten(knodes, current_index, root, node_index); octree->set_flattened(); }