diff --git a/intern/cycles/bvh/octree.cpp b/intern/cycles/bvh/octree.cpp index 989f0dd001b..bf46c04bf7f 100644 --- a/intern/cycles/bvh/octree.cpp +++ b/intern/cycles/bvh/octree.cpp @@ -328,7 +328,6 @@ void Octree::flatten(KernelOctreeNode *knodes, /* Loop through all the children and flatten in breadth-first manner, so that children are * stored in contiguous indices. */ for (int i = 0; i < 8; i++) { - knodes[knode.first_child + i].parent = current_index; flatten(knodes, knode.first_child + i, internal_ptr->children_[i], child_index); } } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 821b7c06253..e87f6645d67 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -137,15 +137,11 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, * to find `t.max`, then store a point `current_P` which lies in the adjacent leaf node. The next * leaf node is found by checking the higher bits of `current_P`. * - * The paper suggests to keep a stack of parent nodes, in practice such a stack (even when the size - * is just 8) slows down performance on GPU. Instead we store the parent index in the leaf node - * directly, since there is sufficient space due to alignment. - * * \{ */ struct OctreeTracing { - /* Current active leaf node. */ - ccl_global const KernelOctreeNode *node = nullptr; + /* Stack of parent nodes of the current node. Plus two for world volume and root node. */ + KernelStack nodes; /* Current active ray segment, typically spans from the front face to the back face of the * current leaf node. */ @@ -277,6 +273,22 @@ struct OctreeTracing { const uint8_t z = ((current_P.z >> scale) & 1u) << 2u; return (x | y | z) ^ octant_mask; } + + ccl_device_inline_method bool is_empty() const + { + return nodes.is_empty(); + } + + ccl_device_inline_method void push(ccl_global const KernelOctreeNode *node) + { + nodes.push(node); + } + + /* Access the current active leaf node. */ + ccl_device_inline_method ccl_global const KernelOctreeNode *get_voxel() const + { + return nodes.top(); + } }; /* Check if an octree node is leaf node. */ @@ -288,10 +300,11 @@ ccl_device_inline bool volume_node_is_leaf(const ccl_global KernelOctreeNode *kn /* Find the leaf node of the current position, and replace `octree.node` with that node. */ ccl_device void volume_voxel_get(KernelGlobals kg, ccl_private OctreeTracing &octree) { - while (!volume_node_is_leaf(octree.node)) { - octree.scale -= 1; - const int child_index = octree.node->first_child + octree.get_octant(); - octree.node = &kernel_data_fetch(volume_tree_nodes, child_index); + const ccl_global KernelOctreeNode *knode = octree.get_voxel(); + while (!volume_node_is_leaf(knode)) { + octree.scale--; + knode = &kernel_data_fetch(volume_tree_nodes, knode->first_child + octree.get_octant()); + octree.push(knode); } } @@ -356,7 +369,7 @@ ccl_device_inline Extrema volume_object_get_extrema(KernelGlobals kg, const int shader_flag = kernel_data_fetch(shaders, (octree.entry.shader & SHADER_MASK)).flags; if ((path_flag & PATH_RAY_CAMERA) || !(shader_flag & SD_HAS_LIGHT_PATH_NODE)) { /* Use the baked volume density extrema. */ - return octree.node->sigma * object_volume_density(kg, octree.entry.object); + return octree.get_voxel()->sigma * object_volume_density(kg, octree.entry.object); } return volume_estimate_extrema(kg, ray, sd, state, rng_state, path_flag, octree.entry); @@ -412,7 +425,7 @@ ccl_device bool volume_octree_setup(KernelGlobals kg, const ccl_global KernelOctreeRoot *kroot = volume_find_octree_root(kg, entry); OctreeTracing local(global.t.min); - local.node = &kernel_data_fetch(volume_tree_nodes, kroot->id); + local.push(&kernel_data_fetch(volume_tree_nodes, kroot->id)); local.entry = entry; /* Convert to object space. */ @@ -447,7 +460,7 @@ ccl_device bool volume_octree_setup(KernelGlobals kg, global.no_overlap = true; } - return global.node && !global.t.is_empty(); + return !global.is_empty() && !global.t.is_empty(); } /* Advance to the next adjacent leaf node and update the active interval. */ @@ -474,17 +487,18 @@ ccl_device_inline bool volume_octree_advance(KernelGlobals kg, /* Outside of the root node, continue tracing using the extrema of the root node. */ octree.t = {octree.t.max, ray->tmax}; - octree.node = &kernel_data_fetch(volume_tree_nodes, - volume_find_octree_root(kg, octree.entry)->id); + octree.nodes.clear(); + octree.push( + &kernel_data_fetch(volume_tree_nodes, volume_find_octree_root(kg, octree.entry)->id)); } else { kernel_assert(octree.next_scale > octree.scale); /* Fetch the common ancestor of the current and the next leaf nodes. */ - for (; octree.scale < octree.next_scale; octree.scale++) { - kernel_assert(octree.node->parent != -1); - octree.node = &kernel_data_fetch(volume_tree_nodes, octree.node->parent); - } + octree.nodes.pop(octree.next_scale - octree.scale); + octree.scale = octree.next_scale; + + kernel_assert(!octree.is_empty()); /* Find the current active leaf node. */ volume_voxel_get(kg, octree); diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 0672414d6f5..55f17acbafe 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1707,19 +1707,71 @@ struct KernelOctreeRoot { }; struct KernelOctreeNode { - /* Index of the parent node in device vector `volume_tree_nodes`. */ - int parent; - /* Index of the first child node in device vector `volume_tree_nodes`. All children of the same * node are stored in contiguous memory. */ int first_child; + int pad; + /* Minimal and maximal volume density inside the node. */ /* TODO(weizhen): we can make sigma Spectral for better accuracy. Since only root and leaf nodes * need sigma, we can introduce `KernelOctreeInnerNode` to reduce the size of the struct. */ Extrema sigma; }; +/* Last-in, first-out stack, holding elements with a maximal size of `MAX_SIZE`. */ +template struct KernelStack { + type array[MAX_SIZE]; + /* Index of the top element. */ + int index = -1; + + ccl_device_inline_method bool is_empty() const + { + return index < 0; + } + + /* Removes the element on top of the stack, and reduces the size by one. */ + ccl_device_inline_method void pop() + { + kernel_assert(!is_empty()); + index--; + } + + /* Removes several elements on top of the stack. */ + ccl_device_inline_method void pop(const int num) + { + kernel_assert(num >= 0); + kernel_assert(index >= num - 1); + index -= num; + } + + /* Inserts a new element at the top of the stack. */ + ccl_device_inline_method void push(type elem) + { + kernel_assert(index < MAX_SIZE - 1); + array[++index] = elem; + } + + /* Access the top element. */ + ccl_device_inline_method type top() const + { + kernel_assert(!is_empty()); + return array[index]; + } + + /* Returns the number of elements in the stack. */ + ccl_device_inline_method int size() const + { + return index + 1; + } + + /* Removes all elements from the stack. Effectively set the size to 0. */ + ccl_device_inline_method void clear() + { + index = -1; + } +}; + struct KernelLightTreeEmitter { /* Bounding cone. */ float theta_o; diff --git a/intern/cycles/scene/volume.cpp b/intern/cycles/scene/volume.cpp index 25ffe8e7884..a2fb87ba565 100644 --- a/intern/cycles/scene/volume.cpp +++ b/intern/cycles/scene/volume.cpp @@ -908,7 +908,6 @@ void VolumeManager::flatten_octree(DeviceScene *dscene, const Scene *scene) cons /* Flatten octree. */ const uint current_index = node_index++; - knodes[current_index].parent = -1; octree->flatten(knodes, current_index, root, node_index); octree->set_flattened(); }