diff --git a/intern/cycles/blender/light.cpp b/intern/cycles/blender/light.cpp index 85e4c29826e..ead162e9f91 100644 --- a/intern/cycles/blender/light.cpp +++ b/intern/cycles/blender/light.cpp @@ -138,6 +138,8 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d) object->set_lightgroup(ustring(b_world ? b_world.lightgroup() : "")); } + object->set_asset_name(ustring(b_world.name())); + /* Create geometry. */ const GeometryKey geom_key{b_world.ptr.data, Geometry::LIGHT}; Geometry *geom = geometry_map.find(geom_key); diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt index da7bfb870e1..4944a714dcc 100644 --- a/intern/cycles/bvh/CMakeLists.txt +++ b/intern/cycles/bvh/CMakeLists.txt @@ -10,6 +10,7 @@ set(INC_SYS ) set(SRC + octree.cpp bvh.cpp bvh2.cpp binning.cpp @@ -36,6 +37,7 @@ if(WITH_CYCLES_DEVICE_METAL) endif() set(SRC_HEADERS + octree.h bvh.h bvh2.h binning.h diff --git a/intern/cycles/bvh/octree.cpp b/intern/cycles/bvh/octree.cpp new file mode 100644 index 00000000000..989f0dd001b --- /dev/null +++ b/intern/cycles/bvh/octree.cpp @@ -0,0 +1,470 @@ +/* SPDX-FileCopyrightText: 2025 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#include "bvh/octree.h" + +#include "scene/object.h" +#include "scene/volume.h" + +#include "integrator/shader_eval.h" + +#include "util/log.h" +#include "util/progress.h" + +#ifdef WITH_OPENVDB +# include +#endif + +CCL_NAMESPACE_BEGIN + +__forceinline int Octree::flatten_index(int x, int y, int z) const +{ + return x + resolution_ * (y + z * resolution_); +} + +Extrema Octree::get_extrema(const int3 index_min, const int3 index_max) const +{ + const blocked_range3d range( + index_min.x, index_max.x, 32, index_min.y, index_max.y, 32, index_min.z, index_max.z, 32); + + const Extrema identity = {FLT_MAX, -FLT_MAX}; + + auto reduction_func = [&](const blocked_range3d &r, Extrema init) -> Extrema { + for (int z = r.cols().begin(); z < r.cols().end(); ++z) { + for (int y = r.rows().begin(); y < r.rows().end(); ++y) { + for (int x = r.pages().begin(); x < r.pages().end(); ++x) { + init = merge(init, sigmas_[flatten_index(x, y, z)]); + } + } + } + return init; + }; + + auto join_func = [](Extrema a, Extrema b) -> Extrema { + return merge(a, b); + }; + + return parallel_reduce(range, identity, reduction_func, join_func); +} + +__forceinline float3 Octree::position_to_index(const float3 p) const +{ + return (p - root_->bbox.min) * position_to_index_scale_; +} + +int3 Octree::position_to_floor_index(const float3 p) const +{ + const float3 index = round(position_to_index(p)); + return clamp(make_int3(int(index.x), int(index.y), int(index.z)), 0, resolution_ - 1); +} + +int3 Octree::position_to_ceil_index(const float3 p) const +{ + if (any_zero(position_to_index_scale_)) { + /* Octree with degenerate shape, force max index. */ + return make_int3(resolution_); + } + const float3 index = round(position_to_index(p)); + return clamp(make_int3(int(index.x), int(index.y), int(index.z)), 1, resolution_); +} + +__forceinline float3 Octree::index_to_position(int x, int y, int z) const +{ + return root_->bbox.min + make_float3(x, y, z) * index_to_position_scale_; +} + +__forceinline float3 Octree::voxel_size() const +{ + return index_to_position_scale_; +} + +bool Octree::should_split(std::shared_ptr &node) const +{ + const int3 index_min = position_to_floor_index(node->bbox.min); + const int3 index_max = position_to_ceil_index(node->bbox.max); + node->sigma = get_extrema(index_min, index_max); + + const float3 bbox_size = node->bbox.size(); + if (any_zero(bbox_size)) { + /* Octree with degenerate shape, can happen for implicit volume. */ + return false; + } + + /* The threshold is set so that ideally only one sample needs to be taken per node. Value taken + * from "Volume Rendering for Pixar's Elemental". */ + return (node->sigma.range() * len(bbox_size) * scale_ > 1.442f && + node->depth < VOLUME_OCTREE_MAX_DEPTH); +} + +#ifdef WITH_OPENVDB +/* Check if a interior mask grid intersects with a bounding box defined by `p_min` and `p_max`. */ +static bool vdb_voxel_intersect(const float3 p_min, + const float3 p_max, + openvdb::BoolGrid::ConstPtr &grid, + const openvdb::tools::FindActiveValues &find) +{ + if (grid->empty()) { + /* Non-mesh volume. */ + return true; + } + + const openvdb::math::CoordBBox coord_bbox( + openvdb::Coord::floor(grid->worldToIndex({p_min.x, p_min.y, p_min.z})), + openvdb::Coord::ceil(grid->worldToIndex({p_max.x, p_max.y, p_max.z}))); + + /* Check if the bounding box lies inside or partially overlaps the mesh. + * For interior mask grids, all the interior voxels are active. */ + return find.anyActiveValues(coord_bbox, true); +} +#endif + +/* Fill in coordinates for shading the volume density. */ +static void fill_shader_input(device_vector &d_input, + const Octree *octree, + const Object *object, + const Shader *shader, +#ifdef WITH_OPENVDB + openvdb::BoolGrid::ConstPtr &interior_mask, +#endif + const int resolution) +{ + const int object_id = object->get_device_index(); + const uint shader_id = shader->id; + + KernelShaderEvalInput *d_input_data = d_input.data(); + + const float3 voxel_size = octree->voxel_size(); + /* Dilate the voxel in case we miss features at the boundary. */ + const float3 pad = 0.2f * voxel_size; + const float3 padded_size = voxel_size + pad * 2.0f; + + const blocked_range3d range(0, resolution, 8, 0, resolution, 8, 0, resolution, 8); + parallel_for(range, [&](const blocked_range3d &r) { +#ifdef WITH_OPENVDB + /* One accessor per thread is important for cached access. */ + const auto find = openvdb::tools::FindActiveValues(interior_mask->tree()); +#endif + + for (int z = r.cols().begin(); z < r.cols().end(); ++z) { + for (int y = r.rows().begin(); y < r.rows().end(); ++y) { + for (int x = r.pages().begin(); x < r.pages().end(); ++x) { + const int offset = octree->flatten_index(x, y, z); + const float3 p = octree->index_to_position(x, y, z); + +#ifdef WITH_OPENVDB + /* Zero density for cells outside of the mesh. */ + if (!vdb_voxel_intersect(p, p + voxel_size, interior_mask, find)) { + d_input_data[offset * 2].object = OBJECT_NONE; + d_input_data[offset * 2 + 1].object = SHADER_NONE; + continue; + } +#endif + + KernelShaderEvalInput in; + in.object = object_id; + in.prim = __float_as_int(p.x - pad.x); + in.u = p.y - pad.y; + in.v = p.z - pad.z; + d_input_data[offset * 2] = in; + + in.object = shader_id; + in.prim = __float_as_int(padded_size.x); + in.u = padded_size.y; + in.v = padded_size.z; + d_input_data[offset * 2 + 1] = in; + } + } + } + }); +} + +/* Read back the volume density. */ +static void read_shader_output(const device_vector &d_output, + const Octree *octree, + const int num_channels, + const int resolution, + vector> &sigmas) +{ + const float *d_output_data = d_output.data(); + const blocked_range3d range(0, resolution, 32, 0, resolution, 32, 0, resolution, 32); + + parallel_for(range, [&](const blocked_range3d &r) { + for (int z = r.cols().begin(); z < r.cols().end(); ++z) { + for (int y = r.rows().begin(); y < r.rows().end(); ++y) { + for (int x = r.pages().begin(); x < r.pages().end(); ++x) { + const int index = octree->flatten_index(x, y, z); + sigmas[index].min = d_output_data[index * num_channels + 0]; + sigmas[index].max = d_output_data[index * num_channels + 1]; + } + } + } + }); +} + +void Octree::evaluate_volume_density(Device *device, + Progress &progress, +#ifdef WITH_OPENVDB + openvdb::BoolGrid::ConstPtr &interior_mask, +#endif + const Object *object, + const Shader *shader) +{ + /* For heterogeneous volume, the grid resolution is 2^max_depth in each 3D dimension; + * for homogeneous volume, only one grid is needed. */ + resolution_ = VolumeManager::is_homogeneous_volume(object, shader) ? + 1 : + power_of_2(VOLUME_OCTREE_MAX_DEPTH); + index_to_position_scale_ = root_->bbox.size() / float(resolution_); + position_to_index_scale_ = safe_divide(one_float3(), index_to_position_scale_); + + /* Initialize density field. */ + /* TODO(weizhen): maybe lower the resolution depending on the object size. */ + const int size = resolution_ * resolution_ * resolution_; + sigmas_.resize(size); + parallel_for(0, size, [&](int i) { sigmas_[i] = {0.0f, 0.0f}; }); + + /* Min and max. */ + const int num_channels = 2; + + /* Need the size of two `KernelShaderEvalInput`s per voxel for evaluating the shader. */ + const int num_inputs = size * 2; + + /* Evaluate shader on device. */ + ShaderEval shader_eval(device, progress); + shader_eval.eval( + SHADER_EVAL_VOLUME_DENSITY, + num_inputs, + num_channels, + [&](device_vector &d_input) { +#ifdef WITH_OPENVDB + fill_shader_input(d_input, this, object, shader, interior_mask, resolution_); +#else + fill_shader_input(d_input, this, object, shader, resolution_); +#endif + return size; + }, + [&](device_vector &d_output) { + read_shader_output(d_output, this, num_channels, resolution_, sigmas_); + }); +} + +float Octree::volume_scale(const Object *object) const +{ + const Geometry *geom = object->get_geometry(); + if (geom->is_volume()) { + const Volume *volume = static_cast(geom); + if (volume->get_object_space()) { + /* The density changes with object scale, we scale the density accordingly in the final + * render. */ + if (volume->transform_applied) { + const float3 unit = normalize(one_float3()); + return 1.0f / len(transform_direction(&object->get_tfm(), unit)); + } + } + else { + /* The density does not change with object scale, we scale the node in the viewport to it's + * true size. */ + if (!volume->transform_applied) { + const float3 unit = normalize(one_float3()); + return len(transform_direction(&object->get_tfm(), unit)); + } + } + } + else { + /* TODO(weizhen): use the maximal scale of all instances. */ + if (!geom->transform_applied) { + const float3 unit = normalize(one_float3()); + return len(transform_direction(&object->get_tfm(), unit)); + } + } + + return 1.0f; +} + +std::shared_ptr Octree::make_internal(std::shared_ptr &node) +{ + num_nodes_ += 8; + auto internal = std::make_shared(*node); + + /* Create bounding boxes for children. */ + const float3 center = internal->bbox.center(); + for (int i = 0; i < 8; i++) { + const float3 t = make_float3(i & 1, (i >> 1) & 1, (i >> 2) & 1); + const BoundBox bbox(mix(internal->bbox.min, center, t), mix(center, internal->bbox.max, t)); + internal->children_[i] = std::make_shared(bbox, internal->depth + 1); + } + + return internal; +} + +void Octree::recursive_build(std::shared_ptr &octree_node) +{ + if (!should_split(octree_node)) { + return; + } + + /* Make the current node an internal node. */ + auto internal = make_internal(octree_node); + + for (auto &child : internal->children_) { + task_pool_.push([&] { recursive_build(child); }); + } + + octree_node = internal; +} + +void Octree::flatten(KernelOctreeNode *knodes, + const int current_index, + const std::shared_ptr &node, + int &child_index) const +{ + KernelOctreeNode &knode = knodes[current_index]; + knode.sigma = node->sigma; + + if (auto internal_ptr = std::dynamic_pointer_cast(node)) { + knode.first_child = child_index; + child_index += 8; + /* Loop through all the children and flatten in breadth-first manner, so that children are + * stored in contiguous indices. */ + for (int i = 0; i < 8; i++) { + knodes[knode.first_child + i].parent = current_index; + flatten(knodes, knode.first_child + i, internal_ptr->children_[i], child_index); + } + } + else { + knode.first_child = -1; + } +} + +void Octree::set_flattened(const bool flattened) +{ + is_flattened_ = flattened; +} + +bool Octree::is_flattened() const +{ + return is_flattened_; +} + +void Octree::build(Device *device, + Progress &progress, +#ifdef WITH_OPENVDB + openvdb::BoolGrid::ConstPtr &interior_mask, +#endif + const Object *object, + const Shader *shader) +{ + const char *name = object->get_asset_name().c_str(); + progress.set_substatus(string_printf("Evaluating density for %s", name)); + +#ifdef WITH_OPENVDB + evaluate_volume_density(device, progress, interior_mask, object, shader); +#else + evaluate_volume_density(device, progress, object, shader); +#endif + if (progress.get_cancel()) { + return; + } + + progress.set_substatus(string_printf("Building octree for %s", name)); + + scale_ = volume_scale(object); + recursive_build(root_); + + task_pool_.wait_work(); + + is_built_ = true; + sigmas_.clear(); +} + +Octree::Octree(const BoundBox &bbox) +{ + root_ = std::make_shared(bbox, 0); + is_built_ = false; + is_flattened_ = false; +} + +bool Octree::is_built() const +{ + return is_built_; +} + +int Octree::get_num_nodes() const +{ + return num_nodes_; +} + +std::shared_ptr Octree::get_root() const +{ + return root_; +} + +void OctreeNode::visualize(std::string &str) const +{ + const auto *internal = dynamic_cast(this); + + if (!internal) { + /* Skip leaf nodes. */ + return; + } + + /* Create three orthogonal faces for inner nodes. */ + const float3 mid = bbox.center(); + const float3 max = bbox.max; + const float3 min = bbox.min; + const std::string mid_x = to_string(mid.x), mid_y = to_string(mid.y), mid_z = to_string(mid.z), + min_x = to_string(min.x), min_y = to_string(min.y), min_z = to_string(min.z), + max_x = to_string(max.x), max_y = to_string(max.y), max_z = to_string(max.z); + // clang-format off + str += "(" + mid_x + "," + mid_y + "," + min_z + "), " + "(" + mid_x + "," + mid_y + "," + max_z + "), " + "(" + mid_x + "," + max_y + "," + max_z + "), " + "(" + mid_x + "," + max_y + "," + min_z + "), " + "(" + mid_x + "," + min_y + "," + min_z + "), " + "(" + mid_x + "," + min_y + "," + max_z + "), "; + str += "(" + min_x + "," + mid_y + "," + mid_z + "), " + "(" + max_x + "," + mid_y + "," + mid_z + "), " + "(" + max_x + "," + mid_y + "," + max_z + "), " + "(" + min_x + "," + mid_y + "," + max_z + "), " + "(" + min_x + "," + mid_y + "," + min_z + "), " + "(" + max_x + "," + mid_y + "," + min_z + "), "; + str += "(" + mid_x + "," + min_y + "," + mid_z + "), " + "(" + mid_x + "," + max_y + "," + mid_z + "), " + "(" + max_x + "," + max_y + "," + mid_z + "), " + "(" + max_x + "," + min_y + "," + mid_z + "), " + "(" + min_x + "," + min_y + "," + mid_z + "), " + "(" + min_x + "," + max_y + "," + mid_z + "), "; + // clang-format on + for (const auto &child : internal->children_) { + child->visualize(str); + } +} + +void Octree::visualize(std::ofstream &file, const std::string object_name) const +{ + std::string str = "vertices = ["; + root_->visualize(str); + str += + "]\nr = range(len(vertices))\n" + "edges = [(i, i+1 if i%6<5 else i-4) for i in r]\n" + "mesh = bpy.data.meshes.new('Octree')\n" + "mesh.from_pydata(vertices, edges, [])\n" + "mesh.update()\n" + "obj = bpy.data.objects.new('" + + object_name + + "', mesh)\n" + "octree.objects.link(obj)\n" + "bpy.context.view_layer.objects.active = obj\n" + "bpy.ops.object.mode_set(mode='EDIT')\n"; + file << str; + + const float3 center = root_->bbox.center(); + const float3 size = root_->bbox.size() * 0.5f; + file << "bpy.ops.mesh.primitive_cube_add(location = " << center << ", scale = " << size << ")\n"; + file << "bpy.ops.mesh.delete(type='ONLY_FACE')\n" + "bpy.ops.object.mode_set(mode='OBJECT')\n" + "obj.select_set(True)\n"; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/octree.h b/intern/cycles/bvh/octree.h new file mode 100644 index 00000000000..7826c4ff008 --- /dev/null +++ b/intern/cycles/bvh/octree.h @@ -0,0 +1,141 @@ +/* SPDX-FileCopyrightText: 2025 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +/* The volume octree is used to determine the necessary step size when rendering the volume. One + * volume per object per shader is built, and a node splits in eight when the density difference + * inside the node exceeds a certain threshold. */ + +#ifndef __OCTREE_H__ +#define __OCTREE_H__ + +#include "util/boundbox.h" +#include "util/task.h" + +#ifdef WITH_OPENVDB +# include +#endif + +#include + +CCL_NAMESPACE_BEGIN + +class Device; +class Object; +class Progress; +class Shader; +struct KernelOctreeNode; + +struct OctreeNode { + /* Bounding box of the node. */ + BoundBox bbox; + + /* Depth of the node in the octree. */ + int depth; + + /* Minimal and maximal volume density inside the node. */ + Extrema sigma = {0.0f, 0.0f}; + + OctreeNode() : bbox(BoundBox::empty), depth(0) {} + OctreeNode(BoundBox bbox_, int depth_) : bbox(bbox_), depth(depth_) {} + virtual ~OctreeNode() = default; + + /* Visualize node. */ + void visualize(std::string &str) const; +}; + +struct OctreeInternalNode : public OctreeNode { + OctreeInternalNode(OctreeNode &node) : children_(8) + { + bbox = node.bbox; + depth = node.depth; + sigma = node.sigma; + } + + vector> children_; +}; + +class Octree { + public: + Octree(const BoundBox &bbox); + ~Octree() = default; + + /* Build the octree according to the volume density. */ +#ifdef WITH_OPENVDB + void build(Device *, Progress &, openvdb::BoolGrid::ConstPtr &, const Object *, const Shader *); +#else + void build(Device *, Progress &, const Object *, const Shader *); +#endif + + /* Convert the octree into an array of nodes for uploading to the kernel. */ + void flatten(KernelOctreeNode *, const int, const std::shared_ptr &, int &) const; + void set_flattened(const bool = true); + bool is_flattened() const; + + /* Flatten a 3D coordinate in the grid to a 1D index. */ + int flatten_index(int x, int y, int z) const; + /* Convert from index to the position of the lower left corner of the voxel. */ + float3 index_to_position(int x, int y, int z) const; + /* Size of a voxel. */ + float3 voxel_size() const; + + int get_num_nodes() const; + std::shared_ptr get_root() const; + bool is_built() const; + + /* Draw octree nodes as empty boxes with Blender Python API. */ + void visualize(std::ofstream &file, const std::string object_name) const; + + private: + /* The bounding box of the octree is divided into a regular grid with the same resolution in each + * dimension. */ + int resolution_; + /* Extrema of volume densities in the grid. */ + vector> sigmas_; + /* Compute the extrema of all the `sigmas_` in a coordinate bounding box defined by `index_min` + * and `index_max`. */ + Extrema get_extrema(const int3 index_min, const int3 index_max) const; + /* Randomly sample positions inside the grid to evaluate the shader for the density. */ +#ifdef WITH_OPENVDB + void evaluate_volume_density( + Device *, Progress &, openvdb::BoolGrid::ConstPtr &, const Object *, const Shader *); +#else + void evaluate_volume_density(Device *, Progress &, const Object *, const Shader *); +#endif + /* Convert from position in object space to grid index space. */ + float3 position_to_index_scale_; + float3 index_to_position_scale_; + float3 position_to_index(const float3 p) const; + int3 position_to_floor_index(const float3 p) const; + int3 position_to_ceil_index(const float3 p) const; + + /* Whether a node should be split into child nodes. */ + bool should_split(std::shared_ptr &node) const; + /* Scale the node size so that the octree has the similar subdivision levels in viewport and + * final render. */ + float volume_scale(const Object *object) const; + float scale_; + /* Recursively build a node and its child nodes. */ + void recursive_build(std::shared_ptr &); + /* Turn a node into an internal node. */ + std::shared_ptr make_internal(std::shared_ptr &node); + + /* Root node. */ + std::shared_ptr root_; + + /* Whether the octree is already built. */ + bool is_built_; + + /* Whether the octree is already flattened into an array. */ + bool is_flattened_; + + /* Number of nodes in the octree. Incremented while building the tree. */ + std::atomic num_nodes_ = 1; + + /* Task pool for building the octree in parallel. */ + TaskPool task_pool_; +}; + +CCL_NAMESPACE_END + +#endif /* __OCTREE_H__ */ diff --git a/intern/cycles/device/cpu/kernel.cpp b/intern/cycles/device/cpu/kernel.cpp index df17c55aee2..e56f06dadb0 100644 --- a/intern/cycles/device/cpu/kernel.cpp +++ b/intern/cycles/device/cpu/kernel.cpp @@ -24,6 +24,7 @@ CPUKernels::CPUKernels() REGISTER_KERNEL(shader_eval_displace), REGISTER_KERNEL(shader_eval_background), REGISTER_KERNEL(shader_eval_curve_shadow_transparency), + REGISTER_KERNEL(shader_eval_volume_density), /* Adaptive sampling. */ REGISTER_KERNEL(adaptive_sampling_convergence_check), REGISTER_KERNEL(adaptive_sampling_filter_x), diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h index 5497c78c0fa..e245f11cc8d 100644 --- a/intern/cycles/device/cpu/kernel.h +++ b/intern/cycles/device/cpu/kernel.h @@ -40,6 +40,7 @@ class CPUKernels { ShaderEvalFunction shader_eval_displace; ShaderEvalFunction shader_eval_background; ShaderEvalFunction shader_eval_curve_shadow_transparency; + ShaderEvalFunction shader_eval_volume_density; /* Adaptive stopping. */ diff --git a/intern/cycles/device/kernel.cpp b/intern/cycles/device/kernel.cpp index 02fff6a5d90..6f3bb6c900a 100644 --- a/intern/cycles/device/kernel.cpp +++ b/intern/cycles/device/kernel.cpp @@ -22,7 +22,8 @@ bool device_kernel_has_shading(DeviceKernel kernel) kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT || kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE || kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND || - kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY); + kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY || + kernel == DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY); } bool device_kernel_has_intersection(DeviceKernel kernel) @@ -108,6 +109,8 @@ const char *device_kernel_as_string(DeviceKernel kernel) return "shader_eval_background"; case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY: return "shader_eval_curve_shadow_transparency"; + case DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY: + return "shader_eval_volume_density"; /* Film. */ diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 64ddf316d6a..7fda8c14587 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -423,7 +423,7 @@ bool MetalKernelPipeline::should_use_binary_archive() const if ((device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND && device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) || (device_kernel >= DEVICE_KERNEL_SHADER_EVAL_DISPLACE && - device_kernel <= DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY)) + device_kernel <= DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY)) { /* Archive all shade kernels - they take a long time to compile. */ return true; diff --git a/intern/cycles/device/oneapi/device_impl.cpp b/intern/cycles/device/oneapi/device_impl.cpp index 4d825914855..ed8def3dd30 100644 --- a/intern/cycles/device/oneapi/device_impl.cpp +++ b/intern/cycles/device/oneapi/device_impl.cpp @@ -1292,6 +1292,7 @@ void OneapiDevice::get_adjusted_global_and_local_sizes(SyclQueue *queue, case DEVICE_KERNEL_SHADER_EVAL_DISPLACE: case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND: case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY: + case DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY: preferred_work_group_size = preferred_work_group_size_shader_evaluation; break; diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index b3b3faa4ae6..228761ea2fe 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -594,6 +594,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features) group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.module = optix_module; group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.entryFunctionName = "__raygen__kernel_optix_shader_eval_curve_shadow_transparency"; + group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; + group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].raygen.module = optix_module; + group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].raygen.entryFunctionName = + "__raygen__kernel_optix_shader_eval_volume_density"; } # ifdef WITH_OSL @@ -1034,6 +1038,7 @@ bool OptiXDevice::load_osl_kernels() pipeline_groups.push_back(groups[PG_RGEN_EVAL_BACKGROUND]); pipeline_groups.push_back(groups[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY]); pipeline_groups.push_back(groups[PG_RGEN_INIT_FROM_CAMERA]); + pipeline_groups.push_back(groups[PG_RGEN_EVAL_VOLUME_DENSITY]); for (const OptixProgramGroup &group : osl_groups) { if (group != nullptr) { diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h index 197ca9196f1..f1db63feb9b 100644 --- a/intern/cycles/device/optix/device_impl.h +++ b/intern/cycles/device/optix/device_impl.h @@ -38,6 +38,7 @@ enum { PG_RGEN_EVAL_BACKGROUND, PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY, PG_RGEN_INIT_FROM_CAMERA, + PG_RGEN_EVAL_VOLUME_DENSITY, /* Miss */ PG_MISS, diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp index 4465889a254..d86667e5230 100644 --- a/intern/cycles/device/optix/queue.cpp +++ b/intern/cycles/device/optix/queue.cpp @@ -82,7 +82,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, } if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE || kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND || - kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY) + kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY || + kernel == DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY) { set_launch_param(offsetof(KernelParamsOptiX, offset), sizeof(int32_t), 2); } @@ -167,6 +168,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord); break; + case DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY: + pipeline = optix_device->pipelines[PIP_SHADE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_VOLUME_DENSITY * sizeof(SbtRecord); + break; case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA: pipeline = optix_device->pipelines[PIP_SHADE]; diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp index 90ccb5d94a7..335842b8f22 100644 --- a/intern/cycles/integrator/shader_eval.cpp +++ b/intern/cycles/integrator/shader_eval.cpp @@ -114,6 +114,9 @@ bool ShaderEval::eval_cpu(Device *device, case SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY: kernels.shader_eval_curve_shadow_transparency(kg, input_data, output_data, work_index); break; + case SHADER_EVAL_VOLUME_DENSITY: + kernels.shader_eval_volume_density(kg, input_data, output_data, work_index); + break; } }); }); @@ -139,6 +142,8 @@ bool ShaderEval::eval_gpu(Device *device, case SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY: kernel = DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY; break; + case SHADER_EVAL_VOLUME_DENSITY: + kernel = DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY; }; /* Create device queue. */ diff --git a/intern/cycles/integrator/shader_eval.h b/intern/cycles/integrator/shader_eval.h index d18d25eb60f..00538dd116b 100644 --- a/intern/cycles/integrator/shader_eval.h +++ b/intern/cycles/integrator/shader_eval.h @@ -19,6 +19,7 @@ enum ShaderEvalType { SHADER_EVAL_DISPLACE, SHADER_EVAL_BACKGROUND, SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY, + SHADER_EVAL_VOLUME_DENSITY, }; /* ShaderEval class performs shader evaluation for background light and displacement. */ diff --git a/intern/cycles/kernel/bake/bake.h b/intern/cycles/kernel/bake/bake.h index 68c93cb65e0..51c061b238f 100644 --- a/intern/cycles/kernel/bake/bake.h +++ b/intern/cycles/kernel/bake/bake.h @@ -9,6 +9,7 @@ #include "kernel/camera/projection.h" #include "kernel/integrator/displacement_shader.h" #include "kernel/integrator/surface_shader.h" +#include "kernel/integrator/volume_shader.h" #include "kernel/geom/object.h" #include "kernel/geom/shader_data.h" @@ -115,4 +116,87 @@ ccl_device void kernel_curve_shadow_transparency_evaluate( #endif } +ccl_device void kernel_volume_density_evaluate(KernelGlobals kg, + ccl_global const KernelShaderEvalInput *input, + ccl_global float *output, + const int offset) +{ +#ifdef __VOLUME__ + if (input[offset * 2 + 1].object == SHADER_NONE) { + return; + } + + KernelShaderEvalInput in = input[offset * 2]; + + /* Setup ray. */ + Ray ray; + ray.P = make_float3(__int_as_float(in.prim), in.u, in.v); + ray.D = zero_float3(); + ray.tmin = 0.0f; + /* Motion blur is ignored when computing the extrema of the density, but we also don't expect the + * value to change a lot in one frame. */ + ray.time = 0.5f; + + /* Setup shader data. */ + ShaderData sd; + shader_setup_from_volume(&sd, &ray, in.object); + sd.flag = SD_IS_VOLUME_SHADER_EVAL; + + /* Evaluate extinction and emission without allocating closures. */ + sd.num_closure_left = 0; + /* Evaluate density for camera ray because it usually makes the most visual impact. For shaders + * that depends on ray types, the extrema are estimated on the fly. */ + /* TODO(weizhen): Volume invisible to camera ray might appear noisy. We can at least build a + * separate octree for shadow ray. */ + const uint32_t path_flag = PATH_RAY_CAMERA; + + /* Setup volume stack entry. */ + in = input[offset * 2 + 1]; + const int shader = in.object; + const VolumeStack entry = {sd.object, shader}; + + const float3 voxel_size = make_float3(__int_as_float(in.prim), in.u, in.v); + Extrema extrema = {FLT_MAX, -FLT_MAX}; + /* For heterogeneous volume, we take 16 samples per grid; + * for homogeneous volume, only 1 sample is needed. */ + const int num_samples = volume_is_homogeneous(kg, entry) ? 1 : 16; + + const bool need_transformation = !(kernel_data_fetch(object_flag, sd.object) & + SD_OBJECT_TRANSFORM_APPLIED); + const Transform tfm = need_transformation ? + object_fetch_transform(kg, sd.object, OBJECT_TRANSFORM) : + Transform(); + for (int sample = 0; sample < num_samples; sample++) { + /* Blue noise indexing. The sequence length is the number of samples. */ + const uint3 index = make_uint3(sample + offset * num_samples, 0, 0xffffffff); + + /* Sample a random position inside the voxel. */ + const float3 rand_p = sobol_burley_sample_3D( + index.x, PRNG_BAKE_VOLUME_DENSITY_EVAL, index.y, index.z); + sd.P = ray.P + rand_p * voxel_size; + if (need_transformation) { + /* Convert to world spcace. */ + sd.P = transform_point(&tfm, sd.P); + } + sd.closure_transparent_extinction = zero_float3(); + sd.closure_emission_background = zero_float3(); + + /* Evaluate volume coefficients. */ + volume_shader_eval_entry( + kg, INTEGRATOR_STATE_NULL, &sd, entry, path_flag); + + const float sigma = reduce_max(sd.closure_transparent_extinction); + const float emission = reduce_max(sd.closure_emission_background); + + extrema = merge(extrema, fmaxf(sigma, emission)); + } + + /* Write output. */ + const float scale = object_volume_density(kg, sd.object); + output[offset * 2 + 0] = extrema.min / scale; + output[offset * 2 + 1] = extrema.max / scale; +#endif +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/data_arrays.h b/intern/cycles/kernel/data_arrays.h index f92db26c386..bea3f2320aa 100644 --- a/intern/cycles/kernel/data_arrays.h +++ b/intern/cycles/kernel/data_arrays.h @@ -85,4 +85,9 @@ KERNEL_DATA_ARRAY(TextureInfo, texture_info) /* ies lights */ KERNEL_DATA_ARRAY(float, ies) +/* Volume. */ +KERNEL_DATA_ARRAY(KernelOctreeNode, volume_tree_nodes) +KERNEL_DATA_ARRAY(KernelOctreeRoot, volume_tree_roots) +KERNEL_DATA_ARRAY(int, volume_tree_root_ids) + #undef KERNEL_DATA_ARRAY diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h index 4d8b8ca8665..67d87e4aeaa 100644 --- a/intern/cycles/kernel/data_template.h +++ b/intern/cycles/kernel/data_template.h @@ -41,10 +41,10 @@ KERNEL_STRUCT_MEMBER(background, int, use_mis) KERNEL_STRUCT_MEMBER(background, int, lightgroup) /* Light Index. */ KERNEL_STRUCT_MEMBER(background, int, light_index) +/* Object Index. */ +KERNEL_STRUCT_MEMBER(background, int, object_index) /* Padding. */ KERNEL_STRUCT_MEMBER(background, int, pad1) -KERNEL_STRUCT_MEMBER(background, int, pad2) -KERNEL_STRUCT_MEMBER(background, int, pad3) KERNEL_STRUCT_END(KernelBackground) /* BVH: own BVH2 if no native device acceleration struct used. */ diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h index bd0a0724145..e959e3bb9aa 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch.h @@ -81,6 +81,10 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)( const KernelShaderEvalInput *input, float *output, const int offset); +void KERNEL_FUNCTION_FULL_NAME(shader_eval_volume_density)(const ThreadKernelGlobalsCPU *kg, + const KernelShaderEvalInput *input, + float *output, + const int offset); /* -------------------------------------------------------------------- * Adaptive sampling. diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 6ce6c01a3f3..8fb1acc6c51 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -134,6 +134,22 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)( #endif } +void KERNEL_FUNCTION_FULL_NAME(shader_eval_volume_density)(const ThreadKernelGlobalsCPU *kg, + const KernelShaderEvalInput *input, + float *output, + const int offset) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, shader_eval_volume_density); + (void)kg; + (void)input; + (void)output; + (void)offset; +#else + kernel_volume_density_evaluate(kg, input, output, offset); +#endif +} + /* -------------------------------------------------------------------- * Adaptive sampling. */ diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 659e041ea3e..bf165bf40e5 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -955,6 +955,22 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) } ccl_gpu_kernel_postfix +/* Volume Density. */ + +ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) + ccl_gpu_kernel_signature(shader_eval_volume_density, + ccl_global KernelShaderEvalInput *input, + ccl_global float *output, + const int offset, + const int work_size) +{ + int i = ccl_gpu_global_id_x(); + if (i < work_size) { + ccl_gpu_kernel_call(kernel_volume_density_evaluate(nullptr, input, output, offset + i)); + } +} +ccl_gpu_kernel_postfix + /* -------------------------------------------------------------------- * Denoising. */ diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp index 8f09961267b..9268a3ce967 100644 --- a/intern/cycles/kernel/device/oneapi/kernel.cpp +++ b/intern/cycles/kernel/device/oneapi/kernel.cpp @@ -594,6 +594,11 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context, oneapi_kernel_shader_eval_curve_shadow_transparency); break; } + case DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_volume_density); + break; + } case DEVICE_KERNEL_PREFIX_SUM: { oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_prefix_sum); break; diff --git a/intern/cycles/kernel/device/optix/kernel_osl.cu b/intern/cycles/kernel/device/optix/kernel_osl.cu index 9d3a4b3513b..d32845ae3bc 100644 --- a/intern/cycles/kernel/device/optix/kernel_osl.cu +++ b/intern/cycles/kernel/device/optix/kernel_osl.cu @@ -94,3 +94,11 @@ extern "C" __global__ void __raygen__kernel_optix_shader_eval_curve_shadow_trans const int global_index = kernel_params.offset + optixGetLaunchIndex().x; kernel_curve_shadow_transparency_evaluate(nullptr, input, output, global_index); } + +extern "C" __global__ void __raygen__kernel_optix_shader_eval_volume_density() +{ + KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array; + float *const output = kernel_params.render_buffer; + const int global_index = kernel_params.offset + optixGetLaunchIndex().x; + kernel_volume_density_evaluate(nullptr, input, output, global_index); +} diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index 9d313744446..2a2428f640a 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -108,7 +108,8 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s * background volume is always assumed to be CG. */ if (kernel_data.background.volume_shader != SHADER_NONE) { if (!(path_flag & PATH_RAY_SHADOW_CATCHER_PASS)) { - INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, stack_index, object) = OBJECT_NONE; + INTEGRATOR_STATE_ARRAY_WRITE( + state, volume_stack, stack_index, object) = kernel_data.background.object_index; INTEGRATOR_STATE_ARRAY_WRITE( state, volume_stack, stack_index, shader) = kernel_data.background.volume_shader; stack_index++; diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h index 30944e8ebda..9e41c8731d3 100644 --- a/intern/cycles/kernel/integrator/path_state.h +++ b/intern/cycles/kernel/integrator/path_state.h @@ -84,7 +84,8 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, isect, type) = PRIMITIVE_NONE; if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) { - INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE; + INTEGRATOR_STATE_ARRAY_WRITE( + state, volume_stack, 0, object) = kernel_data.background.object_index; INTEGRATOR_STATE_ARRAY_WRITE( state, volume_stack, 0, shader) = kernel_data.background.volume_shader; INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, object) = OBJECT_NONE; diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index b4e231085e8..af5c0972f2d 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -94,9 +94,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, /* `object` is only needed for light tree with light linking, it is irrelevant for shadow. */ shader_setup_from_volume(shadow_sd, &ray, OBJECT_NONE); - const float step_size = volume_stack_step_size(kg, state); - - volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size); + volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput); } # endif diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 4e860222971..ed12ad326bc 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -57,6 +57,10 @@ struct VolumeIntegrateResult { * todo: this value could be tweaked or turned into a probability to avoid unnecessary * work in volumes and subsurface scattering. */ # define VOLUME_THROUGHPUT_EPSILON 1e-6f +/* TODO(weizhen): tweak this value. */ +# define OVERLAP_EXP 5e-4f +/* Number of mantissa bits of floating-point numbers. */ +# define MANTISSA_BITS 23 /* Volume shader properties * @@ -133,60 +137,430 @@ struct VolumeStep { /* Perform shading at this offset within a step, to integrate over the entire step segment. */ float shade_offset; - /* Maximal steps allowed between `ray->tmin` and `ray->tmax`. */ - int max_steps; + /* Current step. */ + int step; /* Current active segment. */ Interval t; }; -template ccl_device_forceinline void volume_step_init(KernelGlobals kg, const ccl_private RNGState *rng_state, - const float object_step_size, const float tmin, - const float tmax, ccl_private VolumeStep *vstep) { + vstep->step = 0; vstep->t.min = vstep->t.max = tmin; + vstep->shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET); + vstep->offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET); +} - if (object_step_size == FLT_MAX) { - /* Homogeneous volume. */ - vstep->size = tmax - tmin; - vstep->shade_offset = 0.0f; - vstep->offset = 1.0f; - vstep->max_steps = 1; +/* -------------------------------------------------------------------- */ +/** \name Hierarchical DDA for ray tracing the volume octree + * + * Following "Efficient Sparse Voxel Octrees" by Samuli Laine and Tero Karras, + * and the implementation in https://dubiousconst282.github.io/2024/10/03/voxel-ray-tracing/ + * + * The ray segment is transformed into octree space [1, 2), with `ray->D` pointing all negative + * directions. At each ray tracing step, we intersect the backface of the current active leaf node + * to find `t.max`, then store a point `current_P` which lies in the adjacent leaf node. The next + * leaf node is found by checking the higher bits of `current_P`. + * + * The paper suggests to keep a stack of parent nodes, in practice such a stack (even when the size + * is just 8) slows down performance on GPU. Instead we store the parent index in the leaf node + * directly, since there is sufficient space due to alignment. + * + * \{ */ + +struct OctreeTracing { + /* Current active leaf node. */ + ccl_global const KernelOctreeNode *node = nullptr; + + /* Current active ray segment, typically spans from the front face to the back face of the + * current leaf node. */ + Interval t; + + /* Ray origin in octree coordinate space. */ + packed_float3 ray_P; + + /* Ray direction in octree coordinate space. */ + packed_float3 ray_D; + + /* Current active position in octree coordinate space. */ + uint3 current_P; + + /* Object and shader which the octree represents. */ + VolumeStack entry = {OBJECT_NONE, SHADER_NONE}; + + /* Scale of the current active leaf node, relative to the smallest possible size representable by + * float. Initialize to the number of float mantissa bits. */ + uint8_t scale = MANTISSA_BITS; + uint8_t next_scale; + /* Mark the dimension (x,y,z) to negate the ray so that we find the correct octant. */ + uint8_t octant_mask; + + /* Whether multiple volumes overlap in the ray segment. */ + bool no_overlap = false; + + /* Maximum and minimum of the densities in the current segment. */ + Extrema sigma = 0.0f; + + ccl_device_inline_method OctreeTracing(const float tmin) + { + /* Initialize t.max to FLT_MAX so that any intersection with the node face is smaller. */ + t = {tmin, FLT_MAX}; } - else { - /* Heterogeneous volume. */ - vstep->max_steps = kernel_data.integrator.volume_max_steps; - const float t = tmax - tmin; - float step_size = min(object_step_size, t); - if (t > vstep->max_steps * step_size) { - /* Increase step size to cover the whole ray segment. */ - step_size = t / (float)vstep->max_steps; + enum Dimension { DIM_X = 1U << 0U, DIM_Y = 1U << 1U, DIM_Z = 1U << 2U }; + + /* Given ray origin `P` and direction `D` in object space, convert them into octree space + * [1.0, 2.0). + * Returns false if ray is leaving the octree or octree has degenerate shape. */ + ccl_device_inline_method bool to_octree_space(ccl_private const float3 &P, + ccl_private const float3 &D, + const float3 scale, + const float3 translation) + { + if (!isfinite_safe(scale)) { + /* Octree with a degenerate shape. */ + return false; } - vstep->size = step_size; - vstep->shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET); + /* Starting point of octree tracing. */ + float3 local_P = (P + D * t.min) * scale + translation; + ray_D = D * scale; - if (shadow) { - /* For shadows we do not offset all segments, since the starting point is already a random - * distance inside the volume. It also appears to create banding artifacts for unknown - * reasons. */ - vstep->offset = 1.0f; - } - else { - vstep->offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET); - } + /* Select octant mask to mirror the coordinate system so that ray direction is negative along + * each axis, and adjust `local_P` accordingly. */ + const auto positive = ray_D > 0.0f; + octant_mask = (!!positive.x * DIM_X) | (!!positive.y * DIM_Y) | (!!positive.z * DIM_Z); + local_P = select(positive, 3.0f - local_P, local_P); + + /* Clamp to the largest floating-point number smaller than 2.0f, for numerical stability. */ + local_P = min(local_P, make_float3(1.9999999f)); + current_P = float3_as_uint3(local_P); + + ray_D = -fabs(ray_D); + + /* Ray origin. */ + ray_P = local_P - ray_D * t.min; + + /* Returns false if point lies outside of the octree and the ray is leaving the octree. */ + return all(local_P > 1.0f); + } + + /* Find the bounding box min of the node that `current_P` lies in within the current scale. */ + ccl_device_inline_method float3 floor_pos() const + { + /* Erase bits lower than scale. */ + const uint mask = ~0u << scale; + return make_float3(__uint_as_float(current_P.x & mask), + __uint_as_float(current_P.y & mask), + __uint_as_float(current_P.z & mask)); + } + + /* Find arbitrary position inside the next node. + * We use the end of the current segment offsetted by half of the minimal node size in the normal + * direction of the last face intersection. */ + ccl_device_inline_method void find_next_pos(const float3 bbox_min, + const float3 t, + const float tmax) + { + constexpr float half_size = 1.0f / (2 << VOLUME_OCTREE_MAX_DEPTH); + const uint3 next_P = float3_as_uint3( + select(t == tmax, bbox_min - half_size, ray_D * tmax + ray_P)); + + /* Find the nearest common ancestor of two positions by checking the shared higher bits. */ + const uint diff = (current_P.x ^ next_P.x) | (current_P.y ^ next_P.y) | + (current_P.z ^ next_P.z); + + current_P = next_P; + next_scale = 32u - count_leading_zeros(diff); + } + + /* See `ray_aabb_intersect()`. We only need to intersect the 3 back sides because the ray + * direction is all negative. */ + ccl_device_inline_method float ray_voxel_intersect(const float ray_tmax) + { + const float3 bbox_min = floor_pos(); + + /* Distances to the three surfaces. */ + float3 intersect_t = (bbox_min - ray_P) / ray_D; + + /* Select the smallest element that is larger than `t.min`, to avoid self intersection. */ + intersect_t = select(intersect_t > t.min, intersect_t, make_float3(FLT_MAX)); + + /* The first intersection is given by the smallest t. */ + const float tmax = reduce_min(intersect_t); + + find_next_pos(bbox_min, intersect_t, tmax); + + return fminf(tmax, ray_tmax); + } + + /* Returns the octant of `current_P` in the node at given scale. */ + ccl_device_inline_method int get_octant() const + { + const uint8_t x = (current_P.x >> scale) & 1u; + const uint8_t y = ((current_P.y >> scale) & 1u) << 1u; + const uint8_t z = ((current_P.z >> scale) & 1u) << 2u; + return (x | y | z) ^ octant_mask; + } +}; + +/* Check if an octree node is leaf node. */ +ccl_device_inline bool volume_node_is_leaf(const ccl_global KernelOctreeNode *knode) +{ + return knode->first_child == -1; +} + +/* Find the leaf node of the current position, and replace `octree.node` with that node. */ +ccl_device void volume_voxel_get(KernelGlobals kg, ccl_private OctreeTracing &octree) +{ + while (!volume_node_is_leaf(octree.node)) { + octree.scale -= 1; + const int child_index = octree.node->first_child + octree.get_octant(); + octree.node = &kernel_data_fetch(volume_tree_nodes, child_index); } } -ccl_device_inline bool volume_integrate_advance(const int step, - const ccl_private Ray *ccl_restrict ray, - ccl_private float3 *shade_P, - ccl_private VolumeStep &vstep) +/* If there exists a Light Path Node, it could affect the density evaluation at runtime. + * Randomly sample a few points on the ray to estimate the extrema. */ +template +ccl_device_noinline Extrema volume_estimate_extrema(KernelGlobals kg, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + const IntegratorGenericState state, + const ccl_private RNGState *rng_state, + const uint32_t path_flag, + const VolumeStack entry) +{ + const bool homogeneous = volume_is_homogeneous(kg, entry); + const int samples = homogeneous ? 1 : 4; + const float shade_offset = homogeneous ? + 0.5f : + path_state_rng_2D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET).y; + const float step_size = (ray->tmax - ray->tmin) / float(samples); + + /* Do not allocate closures. */ + sd->num_closure_left = 0; + + Extrema extrema = {FLT_MAX, -FLT_MAX}; + for (int i = 0; i < samples; i++) { + const float shade_t = min(ray->tmax, ray->tmin + (shade_offset + i) * step_size); + sd->P = ray->P + ray->D * shade_t; + + sd->closure_transparent_extinction = zero_float3(); + sd->closure_emission_background = zero_float3(); + + volume_shader_eval_entry( + kg, state, sd, entry, path_flag); + + const float sigma = reduce_max(sd->closure_transparent_extinction); + const float emission = reduce_max(sd->closure_emission_background); + + extrema = merge(extrema, fmaxf(sigma, emission)); + } + + if (!homogeneous) { + /* Slightly increase the majorant in case the estimation is not accurate. */ + extrema.max = fmaxf(0.5f, extrema.max * 1.5f); + } + + return extrema; +} + +/* Given an octree node, compute it's extrema. + * In most common cases, the extrema are already stored in the node, but if the shader contains + * a light path node, we need to evaluate the densities on the fly. */ +template +ccl_device_inline Extrema volume_object_get_extrema(KernelGlobals kg, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + const IntegratorGenericState state, + const ccl_private OctreeTracing &octree, + const ccl_private RNGState *rng_state, + const uint32_t path_flag) +{ + const int shader_flag = kernel_data_fetch(shaders, (octree.entry.shader & SHADER_MASK)).flags; + if ((path_flag & PATH_RAY_CAMERA) || !(shader_flag & SD_HAS_LIGHT_PATH_NODE)) { + /* Use the baked volume density extrema. */ + return octree.node->sigma * object_volume_density(kg, octree.entry.object); + } + + return volume_estimate_extrema(kg, ray, sd, state, rng_state, path_flag, octree.entry); +} + +/* Find the octree root node in the kernel array that corresponds to the volume stack entry. */ +ccl_device_inline const ccl_global KernelOctreeRoot *volume_find_octree_root( + KernelGlobals kg, const VolumeStack entry) +{ + int root = kernel_data_fetch(volume_tree_root_ids, entry.object); + const ccl_global KernelOctreeRoot *kroot = &kernel_data_fetch(volume_tree_roots, root); + while ((entry.shader & SHADER_MASK) != kroot->shader) { + /* If one object has multiple shaders, we store the index of the last shader, and search + * backwards for the octree with the corresponding shader. */ + kroot = &kernel_data_fetch(volume_tree_roots, --root); + } + return kroot; +} + +/* Find the current active ray segment. + * We might have multiple overlapping octrees, so find the smallest `tmax` of all and store the + * information of that octree in `OctreeTracing`. + * Meanwhile, accumulate the density of all the leaf nodes that overlap with the active segment. */ +template +ccl_device bool volume_octree_setup(KernelGlobals kg, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + const IntegratorGenericState state, + const ccl_private RNGState *rng_state, + const uint32_t path_flag, + ccl_private OctreeTracing &global, + ccl_private VolumeStep &vstep) +{ + if (global.no_overlap) { + /* If the current active octree is already set up. */ + return !global.t.is_empty(); + } + + const VolumeStack skip = global.entry; + + int i = 0; + for (;; i++) { + /* Loop through all the object in the volume stack and find their octrees. */ + const VolumeStack entry = volume_stack_read(state, i); + + if (entry.shader == SHADER_NONE) { + break; + } + + if (entry.object == skip.object && entry.shader == skip.shader) { + continue; + } + + const ccl_global KernelOctreeRoot *kroot = volume_find_octree_root(kg, entry); + + OctreeTracing local(global.t.min); + local.node = &kernel_data_fetch(volume_tree_nodes, kroot->id); + local.entry = entry; + + /* Convert to object space. */ + float3 local_P = ray->P, local_D = ray->D; + if (!(kernel_data_fetch(object_flag, entry.object) & SD_OBJECT_TRANSFORM_APPLIED)) { + const Transform itfm = object_fetch_transform(kg, entry.object, OBJECT_INVERSE_TRANSFORM); + local_P = transform_point(&itfm, ray->P); + local_D = transform_direction(&itfm, ray->D); + } + + /* Convert to octree space. */ + if (local.to_octree_space(local_P, local_D, kroot->scale, kroot->translation)) { + volume_voxel_get(kg, local); + local.t.max = local.ray_voxel_intersect(ray->tmax); + } + else { + /* Current ray segment lies outside of the octree, usually happens with implicit volume, i.e. + * everything behind a surface is considered as volume. */ + local.t.max = ray->tmax; + } + + global.sigma += volume_object_get_extrema( + kg, ray, sd, state, local, rng_state, path_flag); + if (local.t.max <= global.t.max) { + /* Replace the current active octree with the one that has the smallest `tmax`. */ + local.sigma = global.sigma; + global = local; + } + } + + if (!global.node) { + /* Stack empty. */ + return false; + } + + if (global.t.is_empty()) { + return false; + } + + if (i == 1) { + global.no_overlap = true; + } + + /* Step size should ideally be as small as the active voxel span, but not so small that we + * never exit the volume. */ + const int steps_left = kernel_data.integrator.volume_max_steps - vstep.step; + vstep.size = fminf(global.t.length(), 1.0f / global.sigma.range()); + vstep.size = fmaxf(vstep.size, (ray->tmax - vstep.t.min) / float(steps_left)); + + return true; +} + +/* Advance to the next adjacent leaf node and update the active interval. */ +template +ccl_device_inline bool volume_octree_advance(KernelGlobals kg, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + const IntegratorGenericState state, + const ccl_private RNGState *rng_state, + const uint32_t path_flag, + ccl_private OctreeTracing &octree, + ccl_private VolumeStep &vstep) +{ + if (octree.t.max >= ray->tmax) { + /* Reached the last segment. */ + return false; + } + + if (vstep.step > kernel_data.integrator.volume_max_steps) { + /* Exceeds maximal steps. */ + return false; + } + + if (octree.next_scale > MANTISSA_BITS) { + if (fabsf(octree.t.max - ray->tmax) <= OVERLAP_EXP) { + /* This could happen due to numerical issues, when the bounding box overlaps with a + * primitive, but different intersections are registered for octree and ray intersection. */ + return false; + } + + /* Outside of the root node, continue tracing using the extrema of the root node. */ + octree.t = {octree.t.max, ray->tmax}; + octree.node = &kernel_data_fetch(volume_tree_nodes, + volume_find_octree_root(kg, octree.entry)->id); + } + else { + kernel_assert(octree.next_scale > octree.scale); + + /* Fetch the common ancestor of the current and the next leaf nodes. */ + for (; octree.scale < octree.next_scale; octree.scale++) { + kernel_assert(octree.node->parent != -1); + octree.node = &kernel_data_fetch(volume_tree_nodes, octree.node->parent); + } + + /* Find the current active leaf node. */ + volume_voxel_get(kg, octree); + + /* Advance to the next segment. */ + octree.t.min = octree.t.max; + octree.t.max = octree.ray_voxel_intersect(ray->tmax); + } + + octree.sigma = volume_object_get_extrema( + kg, ray, sd, state, octree, rng_state, path_flag); + return volume_octree_setup(kg, ray, sd, state, rng_state, path_flag, octree, vstep); +} + +/* Advance to the next interval. If the step size exceeds the current leaf node, find the next leaf + * node. */ +template +ccl_device bool volume_integrate_advance(KernelGlobals kg, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + const IntegratorGenericState state, + const ccl_private RNGState *rng_state, + const uint32_t path_flag, + ccl_private OctreeTracing &octree, + ccl_private VolumeStep &vstep) { if (vstep.t.max == ray->tmax) { /* Reached the last segment. */ @@ -195,13 +569,33 @@ ccl_device_inline bool volume_integrate_advance(const int step, /* Advance to new position. */ vstep.t.min = vstep.t.max; - vstep.t.max = min(ray->tmax, ray->tmin + (step + vstep.offset) * vstep.size); - const float shade_t = mix(vstep.t.min, vstep.t.max, vstep.shade_offset); - *shade_P = ray->P + ray->D * shade_t; + bool success = true; + if (!octree.node) { + /* Initialize octree. */ + success = volume_octree_setup(kg, ray, sd, state, rng_state, path_flag, octree, vstep); + vstep.t.max = octree.t.min + vstep.offset * vstep.size; + } + else { + float candidate_t_max = vstep.t.max + vstep.size; + if (candidate_t_max >= octree.t.max) { + /* Advance to next voxel. */ + volume_octree_advance(kg, ray, sd, state, rng_state, path_flag, octree, vstep); + candidate_t_max = fminf(candidate_t_max, vstep.t.max + vstep.size); + } + vstep.t.max = candidate_t_max; + } - return step < vstep.max_steps; + /* Clamp to prevent numerical issues. */ + vstep.t.max = clamp(vstep.t.max, vstep.t.min + OVERLAP_EXP, ray->tmax); + + const float shade_t = mix(vstep.t.min, vstep.t.max, vstep.shade_offset); + sd->P = ray->P + ray->D * shade_t; + + return success && vstep.step++ < kernel_data.integrator.volume_max_steps; } +/** \} */ + /* Volume Shadows * * These functions are used to attenuate shadow rays to lights. Both absorption @@ -213,8 +607,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, IntegratorShadowState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, - ccl_private Spectrum *ccl_restrict throughput, - const float object_step_size) + ccl_private Spectrum *ccl_restrict throughput) { /* Load random number state. */ RNGState rng_state; @@ -228,11 +621,15 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, /* Prepare for stepping. */ VolumeStep vstep; - volume_step_init(kg, &rng_state, object_step_size, ray->tmin, ray->tmax, &vstep); + volume_step_init(kg, &rng_state, ray->tmin, &vstep); + + OctreeTracing octree(ray->tmin); + const uint32_t path_flag = PATH_RAY_SHADOW; /* compute extinction at the start */ Spectrum sum = zero_spectrum(); - for (int step = 0; volume_integrate_advance(step, ray, &sd->P, vstep); step++) { + for (; volume_integrate_advance(kg, ray, sd, state, &rng_state, path_flag, octree, vstep);) + { /* compute attenuation over segment */ Spectrum sigma_t = zero_spectrum(); if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) { @@ -240,7 +637,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, * because `exp(a)*exp(b) = exp(a+b)`, also do a quick #VOLUME_THROUGHPUT_EPSILON * check then. */ sum += (-sigma_t * vstep.t.length()); - if ((step & 0x07) == 0) { /* TODO: Other interval? */ + if ((vstep.step & 0x07) == 0) { /* TODO: Other interval? */ tp = *throughput * exp(sum); /* stop if nearly all light is blocked */ @@ -541,8 +938,8 @@ ccl_device_forceinline void volume_integrate_step_scattering( } ccl_device_inline void volume_integrate_state_init(KernelGlobals kg, - const ccl_private RNGState *rng_state, const VolumeSampleMethod direct_sample_method, + const ccl_private RNGState *rng_state, ccl_private VolumeIntegrateState &vstate) { vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE); @@ -644,7 +1041,6 @@ ccl_device_forceinline void volume_integrate_heterogeneous( ccl_private ShaderData *ccl_restrict sd, const ccl_private RNGState *ccl_restrict rng_state, ccl_global float *ccl_restrict render_buffer, - const float object_step_size, ccl_private LightSample *ls, ccl_private VolumeIntegrateResult &result) { @@ -656,11 +1052,11 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Prepare for stepping. */ VolumeStep vstep; - volume_step_init(kg, rng_state, object_step_size, ray->tmin, ray->tmax, &vstep); + volume_step_init(kg, rng_state, ray->tmin, &vstep); /* Initialize volume integration state. */ VolumeIntegrateState vstate ccl_optional_struct_init; - volume_integrate_state_init(kg, rng_state, direct_sample_method, vstate); + volume_integrate_state_init(kg, direct_sample_method, rng_state, vstate); /* Initialize volume integration result. */ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); @@ -685,7 +1081,10 @@ ccl_device_forceinline void volume_integrate_heterogeneous( # endif Spectrum accum_emission = zero_spectrum(); - for (int step = 0; volume_integrate_advance(step, ray, &sd->P, vstep); step++) { + OctreeTracing octree(ray->tmin); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + for (; volume_integrate_advance(kg, ray, sd, state, rng_state, path_flag, octree, vstep);) + { /* compute segment */ VolumeShaderCoefficients coeff ccl_optional_struct_init; if (volume_shader_sample(kg, state, sd, &coeff)) { @@ -1010,6 +1409,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, ccl_private Ray *ccl_restrict ray, ccl_global float *ccl_restrict render_buffer) { + if (integrator_state_volume_stack_is_empty(kg, state)) { + return VOLUME_PATH_ATTENUATED; + } + ShaderData sd; /* FIXME: `object` is used for light linking. We read the bottom of the stack for simplicity, but * this does not work for overlapping volumes. */ @@ -1025,13 +1428,9 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, LightSample ls ccl_optional_struct_init; - /* Step through volume. */ - const float step_size = volume_stack_step_size(kg, state); - /* TODO: expensive to zero closures? */ VolumeIntegrateResult result = {}; - volume_integrate_heterogeneous( - kg, state, ray, &sd, &rng_state, render_buffer, step_size, &ls, result); + volume_integrate_heterogeneous(kg, state, ray, &sd, &rng_state, render_buffer, &ls, result); # if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 /* The current path throughput which is used later to calculate per-segment throughput. */ diff --git a/intern/cycles/kernel/integrator/volume_stack.h b/intern/cycles/kernel/integrator/volume_stack.h index 37e28ebd439..15db4c0d81b 100644 --- a/intern/cycles/kernel/integrator/volume_stack.h +++ b/intern/cycles/kernel/integrator/volume_stack.h @@ -145,7 +145,7 @@ ccl_device_inline bool volume_is_homogeneous(KernelGlobals kg, if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) { const int object = entry.object; - if (object == OBJECT_NONE) { + if (object == kernel_data.background.object_index) { /* Volume attributes for world is not supported. */ return true; } @@ -160,27 +160,6 @@ ccl_device_inline bool volume_is_homogeneous(KernelGlobals kg, return true; } -template -ccl_device float volume_stack_step_size(KernelGlobals kg, const IntegratorGenericState state) -{ - float step_size = FLT_MAX; - - for (int i = 0;; i++) { - const VolumeStack entry = volume_stack_read(state, i); - if (entry.shader == SHADER_NONE) { - break; - } - - if (!volume_is_homogeneous(kg, entry)) { - float object_step_size = object_volume_step_size(kg, entry.object); - object_step_size *= kernel_data.integrator.volume_step_rate; - step_size = fminf(object_step_size, step_size); - } - } - - return step_size; -} - enum VolumeSampleMethod { VOLUME_SAMPLE_NONE = 0, VOLUME_SAMPLE_DISTANCE = (1 << 0), diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index ea70caa921b..4d133ace952 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -309,6 +309,9 @@ enum PathTraceDimension { PRNG_SUBSURFACE_DISK = 0, PRNG_SUBSURFACE_DISK_RESAMPLE = 1, + /* Volume density baking. */ + PRNG_BAKE_VOLUME_DENSITY_EVAL = 0, + /* High enough number so we don't need to change it when adding new dimensions, * low enough so there is no uint16_t overflow with many bounces. */ PRNG_BOUNCE_NUM = 16, @@ -951,6 +954,8 @@ struct AttributeMap { #endif #define MAX_VOLUME_CLOSURE 8 // NOLINT +/* Set the maximal resolution to be 128 (2^7) to limit traversing overhead. */ +#define VOLUME_OCTREE_MAX_DEPTH 7 /* This struct is the base class for all closures. The common members are * duplicated in all derived classes since we don't have C++ in the kernel @@ -1030,6 +1035,8 @@ enum ShaderDataFlag { /* Shader flags. */ + /* If Light Path Node is present in the shader graph. */ + SD_HAS_LIGHT_PATH_NODE = (1 << 13), /* Apply a correction term to smooth illumination on grazing angles when using bump mapping. */ SD_USE_BUMP_MAP_CORRECTION = (1 << 15), /* Use front side for direct light sampling. */ @@ -1679,6 +1686,27 @@ struct KernelLightTreeNode { }; static_assert_align(KernelLightTreeNode, 16); +struct KernelOctreeRoot { + packed_float3 scale; + int id; + packed_float3 translation; + int shader; +}; + +struct KernelOctreeNode { + /* Index of the parent node in device vector `volume_tree_nodes`. */ + int parent; + + /* Index of the first child node in device vector `volume_tree_nodes`. All children of the same + * node are stored in contiguous memory. */ + int first_child; + + /* Minimal and maximal volume density inside the node. */ + /* TODO(weizhen): we can make sigma Spectral for better accuracy. Since only root and leaf nodes + * need sigma, we can introduce `KernelOctreeInnerNode` to reduce the size of the struct. */ + Extrema sigma; +}; + struct KernelLightTreeEmitter { /* Bounding cone. */ float theta_o; @@ -1833,6 +1861,7 @@ enum DeviceKernel : int { DEVICE_KERNEL_SHADER_EVAL_DISPLACE, DEVICE_KERNEL_SHADER_EVAL_BACKGROUND, DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY, + DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY, #define DECLARE_FILM_CONVERT_KERNEL(variant) \ DEVICE_KERNEL_FILM_CONVERT_##variant, DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA diff --git a/intern/cycles/scene/devicescene.cpp b/intern/cycles/scene/devicescene.cpp index 9450e9cace5..1ad1cee9276 100644 --- a/intern/cycles/scene/devicescene.cpp +++ b/intern/cycles/scene/devicescene.cpp @@ -54,7 +54,10 @@ DeviceScene::DeviceScene(Device *device) shaders(device, "shaders", MEM_GLOBAL), lookup_table(device, "lookup_table", MEM_GLOBAL), sample_pattern_lut(device, "sample_pattern_lut", MEM_GLOBAL), - ies_lights(device, "ies", MEM_GLOBAL) + ies_lights(device, "ies", MEM_GLOBAL), + volume_tree_nodes(device, "volume_tree_nodes", MEM_GLOBAL), + volume_tree_roots(device, "volume_tree_roots", MEM_GLOBAL), + volume_tree_root_ids(device, "volume_tree_root_ids", MEM_GLOBAL) { memset((void *)&data, 0, sizeof(data)); } diff --git a/intern/cycles/scene/devicescene.h b/intern/cycles/scene/devicescene.h index 83dcf10f373..fbb45c970da 100644 --- a/intern/cycles/scene/devicescene.h +++ b/intern/cycles/scene/devicescene.h @@ -86,6 +86,11 @@ class DeviceScene { /* IES lights */ device_vector ies_lights; + /* Volume. */ + device_vector volume_tree_nodes; + device_vector volume_tree_roots; + device_vector volume_tree_root_ids; + KernelData data; DeviceScene(Device *device); diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp index 3c878d493f4..41369d0679e 100644 --- a/intern/cycles/scene/geometry.cpp +++ b/intern/cycles/scene/geometry.cpp @@ -347,6 +347,7 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro bool volume_images_updated = false; for (Geometry *geom : scene->geometry) { + const bool prev_has_volume = geom->has_volume; geom->has_volume = false; update_attribute_realloc_flags(device_update_flags, geom->attributes); @@ -429,6 +430,18 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro device_update_flags |= DEVICE_MESH_DATA_NEEDS_REALLOC; } + if (geom->has_volume) { + if (geom->is_modified()) { + scene->volume_manager->tag_update(geom); + } + if (!prev_has_volume) { + scene->volume_manager->tag_update(); + } + } + else if (prev_has_volume) { + scene->volume_manager->tag_update(geom); + } + if (geom->is_hair()) { Hair *hair = static_cast(geom); diff --git a/intern/cycles/scene/object.cpp b/intern/cycles/scene/object.cpp index 608520dd20d..3d75a8ba76d 100644 --- a/intern/cycles/scene/object.cpp +++ b/intern/cycles/scene/object.cpp @@ -227,6 +227,9 @@ void Object::tag_update(Scene *scene) if (geometry) { if (tfm_is_modified() || motion_is_modified()) { flag |= ObjectManager::TRANSFORM_MODIFIED; + if (geometry->has_volume) { + scene->volume_manager->tag_update(this, flag); + } } if (visibility_is_modified()) { @@ -770,6 +773,10 @@ void ObjectManager::device_update(Device *device, dscene->object_motion.tag_realloc(); dscene->object_flag.tag_realloc(); dscene->object_volume_step.tag_realloc(); + + /* If objects are added to the scene or deleted, the object indices might change, so we need to + * update the root indices of the volume octrees. */ + scene->volume_manager->tag_update_indices(); } if (update_flags & HOLDOUT_MODIFIED) { @@ -809,6 +816,16 @@ void ObjectManager::device_update(Device *device, dscene->object_flag.tag_modified(); dscene->object_volume_step.tag_modified(); } + + /* Update world object index. */ + if (!object->get_geometry()->is_light()) { + continue; + } + + const Light *light = static_cast(object->get_geometry()); + if (light->get_light_type() == LIGHT_BACKGROUND) { + dscene->data.background.object_index = object->index; + } } } diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index b4c8cd6cd08..0fb0bdcda9c 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -67,6 +67,7 @@ Scene ::Scene(const SceneParams ¶ms_, Device *device) particle_system_manager = make_unique(); bake_manager = make_unique(); procedural_manager = make_unique(); + volume_manager = make_unique(); /* Create nodes after managers, since create_node() can tag the managers. */ camera = create_node(); @@ -138,10 +139,9 @@ void Scene::free_memory(bool final) shader_manager->device_free(device, &dscene, this); osl_manager->device_free(device, &dscene, this); light_manager->device_free(device, &dscene); - particle_system_manager->device_free(device, &dscene); - bake_manager->device_free(device, &dscene); + volume_manager->device_free(&dscene); if (final) { image_manager->device_free(device); @@ -165,6 +165,7 @@ void Scene::free_memory(bool final) bake_manager.reset(); update_stats.reset(); procedural_manager.reset(); + volume_manager.reset(); } } @@ -313,6 +314,14 @@ void Scene::device_update(Device *device_, Progress &progress) return; } + /* Evaluate volume shader to build volume octrees. */ + progress.set_status("Updating Volume"); + volume_manager->device_update(device, &dscene, this, progress); + + if (progress.get_cancel() || device->have_error()) { + return; + } + progress.set_status("Updating Camera Volume"); camera->device_update_volume(device, &dscene, this); @@ -965,6 +974,9 @@ template<> void Scene::delete_node(Geometry *node) } else { flag = GeometryManager::MESH_REMOVED; + if (node->has_volume) { + volume_manager->tag_update(node); + } } geometry.erase_by_swap(node); @@ -974,8 +986,14 @@ template<> void Scene::delete_node(Geometry *node) template<> void Scene::delete_node(Object *node) { assert(node->get_owner() == this); + + uint flag = ObjectManager::OBJECT_REMOVED; + if (node->get_geometry()->has_volume) { + volume_manager->tag_update(node, flag); + } + objects.erase_by_swap(node); - object_manager->tag_update(this, ObjectManager::OBJECT_REMOVED); + object_manager->tag_update(this, flag); } template<> void Scene::delete_node(ParticleSystem *node) diff --git a/intern/cycles/scene/scene.h b/intern/cycles/scene/scene.h index 7799c01eef9..90cd5349e3d 100644 --- a/intern/cycles/scene/scene.h +++ b/intern/cycles/scene/scene.h @@ -50,6 +50,7 @@ class BakeData; class RenderStats; class SceneUpdateStats; class Volume; +class VolumeManager; /* Scene Parameters */ @@ -151,6 +152,7 @@ class Scene : public NodeOwner { unique_ptr particle_system_manager; unique_ptr bake_manager; unique_ptr procedural_manager; + unique_ptr volume_manager; /* default shaders */ Shader *default_surface; diff --git a/intern/cycles/scene/shader.cpp b/intern/cycles/scene/shader.cpp index e4b15b9c94c..4bdd5908dc4 100644 --- a/intern/cycles/scene/shader.cpp +++ b/intern/cycles/scene/shader.cpp @@ -18,6 +18,7 @@ #include "scene/shader_nodes.h" #include "scene/svm.h" #include "scene/tables.h" +#include "scene/volume.h" #include "util/log.h" #include "util/murmurhash.h" @@ -104,6 +105,7 @@ Shader::Shader() : Node(get_node_type()) has_volume_attribute_dependency = false; has_volume_connected = false; prev_volume_step_rate = 0.0f; + has_light_path_node = false; emission_estimate = zero_float3(); emission_sampling = EMISSION_SAMPLING_NONE; @@ -397,6 +399,10 @@ void Shader::tag_update(Scene *scene) scene->object_manager->need_flags_update = true; prev_volume_step_rate = volume_step_rate; } + + if (has_volume || prev_has_volume) { + scene->volume_manager->tag_update(this); + } } void Shader::tag_used(Scene *scene) @@ -527,6 +533,15 @@ void ShaderManager::device_update_pre(Device * /*device*/, shader->has_volume_spatial_varying = false; shader->has_volume_attribute_dependency = false; shader->has_displacement = output->input("Displacement")->link != nullptr; + + shader->has_light_path_node = false; + for (ShaderNode *node : shader->graph->nodes) { + if (node->special_type == SHADER_SPECIAL_TYPE_LIGHT_PATH) { + /* TODO: check if the light path node is linked to the volume output. */ + shader->has_light_path_node = true; + break; + } + } } if (shader->reference_count()) { @@ -633,6 +648,10 @@ void ShaderManager::device_update_common(Device * /*device*/, flag |= SD_HAS_CONSTANT_EMISSION; } + if (shader->has_light_path_node) { + flag |= SD_HAS_LIGHT_PATH_NODE; + } + const uint32_t cryptomatte_id = util_murmur_hash3( shader->name.c_str(), shader->name.length(), 0); diff --git a/intern/cycles/scene/shader.h b/intern/cycles/scene/shader.h index 054991c0b47..d74237f0ca1 100644 --- a/intern/cycles/scene/shader.h +++ b/intern/cycles/scene/shader.h @@ -117,6 +117,7 @@ class Shader : public Node { bool has_surface_spatial_varying; bool has_volume_spatial_varying; bool has_volume_attribute_dependency; + bool has_light_path_node; float3 emission_estimate; EmissionSampling emission_sampling; diff --git a/intern/cycles/scene/shader_graph.h b/intern/cycles/scene/shader_graph.h index a428dcb2318..3ceecfc0369 100644 --- a/intern/cycles/scene/shader_graph.h +++ b/intern/cycles/scene/shader_graph.h @@ -56,6 +56,7 @@ enum ShaderNodeSpecialType { SHADER_SPECIAL_TYPE_OUTPUT, SHADER_SPECIAL_TYPE_BUMP, SHADER_SPECIAL_TYPE_OUTPUT_AOV, + SHADER_SPECIAL_TYPE_LIGHT_PATH, }; /* Input diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp index 514d5b80d9e..c100da9124c 100644 --- a/intern/cycles/scene/shader_nodes.cpp +++ b/intern/cycles/scene/shader_nodes.cpp @@ -4199,7 +4199,10 @@ NODE_DEFINE(LightPathNode) return type; } -LightPathNode::LightPathNode() : ShaderNode(get_node_type()) {} +LightPathNode::LightPathNode() : ShaderNode(get_node_type()) +{ + special_type = SHADER_SPECIAL_TYPE_LIGHT_PATH; +} void LightPathNode::compile(SVMCompiler &compiler) { diff --git a/intern/cycles/scene/volume.cpp b/intern/cycles/scene/volume.cpp index 3105d0ce7cf..7465acb54b8 100644 --- a/intern/cycles/scene/volume.cpp +++ b/intern/cycles/scene/volume.cpp @@ -4,20 +4,29 @@ #include "scene/volume.h" #include "scene/attribute.h" +#include "scene/background.h" #include "scene/image_vdb.h" +#include "scene/light.h" +#include "scene/object.h" #include "scene/scene.h" #ifdef WITH_OPENVDB # include +# include # include #endif #include "util/hash.h" #include "util/log.h" #include "util/nanovdb.h" +#include "util/path.h" #include "util/progress.h" #include "util/types.h" +#include "bvh/octree.h" + +#include + CCL_NAMESPACE_BEGIN NODE_DEFINE(Volume) @@ -547,4 +556,454 @@ void Volume::merge_grids(const Scene *scene) #endif } +VolumeManager::VolumeManager() +{ + need_rebuild_ = true; +} + +void VolumeManager::tag_update() +{ + need_rebuild_ = true; +} + +/* Remove changed object from the list of octrees and tag for rebuild. */ +void VolumeManager::tag_update(const Object *object, uint32_t flag) +{ + if (flag & ObjectManager::VISIBILITY_MODIFIED) { + tag_update(); + } + + for (const Node *node : object->get_geometry()->get_used_shaders()) { + const Shader *shader = static_cast(node); + if (shader->has_volume_spatial_varying || (flag & ObjectManager::OBJECT_REMOVED)) { + /* TODO(weizhen): no need to update if the spatial variation is not in world space. */ + tag_update(); + object_octrees_.erase({object, shader}); + } + } + + if (!need_rebuild_ && (flag & ObjectManager::TRANSFORM_MODIFIED)) { + /* Octree is not tagged for rebuild, but the transformation changed, so a redraw is needed. */ + update_visualization_ = true; + } +} + +/* Remove object with changed shader from the list of octrees and tag for rebuild. */ +void VolumeManager::tag_update(const Shader *shader) +{ + tag_update(); + for (auto it = object_octrees_.begin(); it != object_octrees_.end();) { + if (it->first.second == shader) { + it = object_octrees_.erase(it); + } + else { + it++; + } + } +} + +/* Remove object with changed geometry from the list of octrees and tag for rebuild. */ +void VolumeManager::tag_update(const Geometry *geometry) +{ + tag_update(); + /* Tag Octree for update. */ + for (auto it = object_octrees_.begin(); it != object_octrees_.end();) { + const Object *object = it->first.first; + if (object->get_geometry() == geometry) { + it = object_octrees_.erase(it); + } + else { + it++; + } + } + +#ifdef WITH_OPENVDB + /* Tag VDB map for update. */ + for (auto it = vdb_map_.begin(); it != vdb_map_.end();) { + if (it->first.first == geometry) { + it = vdb_map_.erase(it); + } + else { + it++; + } + } +#endif +} + +void VolumeManager::tag_update_indices() +{ + update_root_indices_ = true; +} + +bool VolumeManager::is_homogeneous_volume(const Object *object, const Shader *shader) +{ + if (!shader->has_volume || shader->has_volume_spatial_varying) { + return false; + } + + if (shader->has_volume_attribute_dependency) { + for (Attribute &attr : object->get_geometry()->attributes.attributes) { + /* If both the shader and the object needs volume attributes, the volume is heterogeneous. */ + if (attr.element == ATTR_ELEMENT_VOXEL) { + return false; + } + } + } + + return true; +} + +#ifdef WITH_OPENVDB +openvdb::BoolGrid::ConstPtr VolumeManager::mesh_to_sdf_grid(const Mesh *mesh, + const Shader *shader, + const float half_width) +{ + const int num_verts = mesh->get_verts().size(); + std::vector points(num_verts); + parallel_for(0, num_verts, [&](int i) { + const float3 &vert = mesh->get_verts()[i]; + points[i] = openvdb::Vec3f(vert.x, vert.y, vert.z); + }); + + const int max_num_triangles = mesh->num_triangles(); + std::vector triangles; + triangles.reserve(max_num_triangles); + for (int i = 0; i < max_num_triangles; i++) { + /* Only push triangles with matching shader. */ + const int shader_index = mesh->get_shader()[i]; + if (static_cast(mesh->get_used_shaders()[shader_index]) == shader) { + triangles.emplace_back(mesh->get_triangles()[i * 3], + mesh->get_triangles()[i * 3 + 1], + mesh->get_triangles()[i * 3 + 2]); + } + } + + /* TODO(weizhen): Should consider object instead of mesh size. */ + const float3 mesh_size = mesh->bounds.size(); + const auto vdb_voxel_size = openvdb::Vec3d(mesh_size.x, mesh_size.y, mesh_size.z) / + double(1 << VOLUME_OCTREE_MAX_DEPTH); + + auto xform = openvdb::math::Transform::createLinearTransform(1.0); + xform->postScale(vdb_voxel_size); + + auto sdf_grid = openvdb::tools::meshToLevelSet( + *xform, points, triangles, half_width); + + return openvdb::tools::sdfInteriorMask(*sdf_grid, 0.5 * vdb_voxel_size.length()); +} + +openvdb::BoolGrid::ConstPtr VolumeManager::get_vdb(const Geometry *geom, + const Shader *shader) const +{ + if (geom->is_mesh()) { + if (auto it = vdb_map_.find({geom, shader}); it != vdb_map_.end()) { + return it->second; + } + } + /* Create empty grid. */ + return openvdb::BoolGrid::create(); +} +#endif + +void VolumeManager::initialize_octree(const Scene *scene, Progress &progress) +{ + /* Instanced objects without spatial variation can share one octree. */ + std::map, std::shared_ptr> geometry_octrees; + for (const auto &it : object_octrees_) { + const Shader *shader = it.first.second; + if (!shader->has_volume_spatial_varying) { + if (const Object *object = it.first.first) { + geometry_octrees[{object->get_geometry(), shader}] = it.second; + } + } + } + + /* Loop through the volume objects to initialize their root nodes. */ + for (const Object *object : scene->objects) { + const Geometry *geom = object->get_geometry(); + if (!geom->has_volume) { + continue; + } + + /* Create Octree. */ + for (const Node *node : geom->get_used_shaders()) { + const Shader *shader = static_cast(node); + if (!shader->has_volume) { + continue; + } + + if (object_octrees_.find({object, shader}) == object_octrees_.end()) { + if (geom->is_light()) { + const Light *light = static_cast(geom); + if (light->get_light_type() == LIGHT_BACKGROUND) { + /* World volume is unbounded, use some practical large number instead. */ + const float3 size = make_float3(10000.0f); + object_octrees_[{object, shader}] = std::make_shared(BoundBox(-size, size)); + } + } + else { + const Mesh *mesh = static_cast(geom); + if (is_zero(mesh->bounds.size())) { + continue; + } + if (!shader->has_volume_spatial_varying) { + /* TODO(weizhen): check object attribute. */ + if (auto it = geometry_octrees.find({geom, shader}); it != geometry_octrees.end()) { + /* Share octree with other instances. */ + object_octrees_[{object, shader}] = it->second; + } + else { + auto octree = std::make_shared(mesh->bounds); + geometry_octrees[{geom, shader}] = octree; + object_octrees_[{object, shader}] = octree; + } + } + else { + /* TODO(weizhen): we can still share the octree if the spatial variation is in object + * space, but that might be tricky to determine. */ + object_octrees_[{object, shader}] = std::make_shared(mesh->bounds); + } + } + } + +#ifdef WITH_OPENVDB + if (geom->is_mesh() && !VolumeManager::is_homogeneous_volume(object, shader) && + vdb_map_.find({geom, shader}) == vdb_map_.end()) + { + const Mesh *mesh = static_cast(geom); + const float3 dim = mesh->bounds.size(); + if (dim.x > 0.0f && dim.y > 0.0f && dim.z > 0.0f) { + const char *name = object->get_asset_name().c_str(); + progress.set_substatus(string_printf("Creating SDF grid for %s", name)); + vdb_map_[{geom, shader}] = mesh_to_sdf_grid(mesh, shader, 1.0f); + } + } +#endif + } + } +} + +void VolumeManager::update_num_octree_nodes() +{ + num_octree_nodes_ = 0; + num_octree_roots_ = 0; + + std::set unique_octrees; + for (const auto &it : object_octrees_) { + const Octree *octree = it.second.get(); + if (unique_octrees.find(octree) != unique_octrees.end()) { + continue; + } + + unique_octrees.insert(octree); + + num_octree_roots_++; + num_octree_nodes_ += octree->get_num_nodes(); + } +} + +int VolumeManager::num_octree_nodes() const +{ + return num_octree_nodes_; +} + +int VolumeManager::num_octree_roots() const +{ + return num_octree_roots_; +} + +void VolumeManager::build_octree(Device *device, Progress &progress) +{ + const double start_time = time_dt(); + + for (auto &it : object_octrees_) { + if (it.second->is_built()) { + continue; + } + + const Object *object = it.first.first; + const Shader *shader = it.first.second; +#ifdef WITH_OPENVDB + openvdb::BoolGrid::ConstPtr interior_mask = get_vdb(object->get_geometry(), shader); + it.second->build(device, progress, interior_mask, object, shader); +#else + it.second->build(device, progress, object, shader); +#endif + } + + update_num_octree_nodes(); + + const double build_time = time_dt() - start_time; + + LOG_WORK << object_octrees_.size() << " volume octree(s) with a total of " << num_octree_nodes() + << " nodes are built in " << build_time << " seconds."; +} + +void VolumeManager::update_root_indices(DeviceScene *dscene, const Scene *scene) const +{ + if (object_octrees_.empty()) { + return; + } + + /* Keep track of the root index of the unique octrees. */ + std::map octree_root_indices; + + int *roots = dscene->volume_tree_root_ids.alloc(scene->objects.size()); + + int root_index = 0; + for (const auto &it : object_octrees_) { + const Object *object = it.first.first; + const int object_id = object->get_device_index(); + const Octree *octree = it.second.get(); + auto entry = octree_root_indices.find(octree); + if (entry == octree_root_indices.end()) { + roots[object_id] = root_index; + octree_root_indices[octree] = root_index; + + root_index++; + } + else { + /* Instances share the same octree. */ + roots[object_id] = entry->second; + } + } + + dscene->volume_tree_root_ids.copy_to_device(); +} + +void VolumeManager::flatten_octree(DeviceScene *dscene, const Scene *scene) const +{ + if (object_octrees_.empty()) { + return; + } + + update_root_indices(dscene, scene); + + for (const auto &it : object_octrees_) { + /* Octrees need to be re-flattened. */ + it.second->set_flattened(false); + } + + KernelOctreeRoot *kroots = dscene->volume_tree_roots.alloc(num_octree_roots()); + KernelOctreeNode *knodes = dscene->volume_tree_nodes.alloc(num_octree_nodes()); + + int node_index = 0; + int root_index = 0; + for (const auto &it : object_octrees_) { + std::shared_ptr octree = it.second; + if (octree->is_flattened()) { + continue; + } + + /* If an object has multiple shaders, the root index is overwritten, so we also write the + * shader id, and perform a linear search in the kernel to find the correct octree. */ + kroots[root_index].shader = it.first.second->id; + kroots[root_index].id = node_index; + + /* Transform from object space into octree space. */ + auto root = octree->get_root(); + const float3 scale = 1.0f / root->bbox.size(); + kroots[root_index].scale = scale; + kroots[root_index].translation = -root->bbox.min * scale + 1.0f; + + root_index++; + + /* Flatten octree. */ + const uint current_index = node_index++; + knodes[current_index].parent = -1; + octree->flatten(knodes, current_index, root, node_index); + octree->set_flattened(); + } + + dscene->volume_tree_nodes.copy_to_device(); + dscene->volume_tree_roots.copy_to_device(); + + LOG_WORK << "Memory usage of volume octrees: " + << (dscene->volume_tree_nodes.size() * sizeof(KernelOctreeNode) + + dscene->volume_tree_roots.size() * sizeof(KernelOctreeRoot) + + dscene->volume_tree_root_ids.size() * sizeof(int)) / + (1024.0 * 1024.0) + << "Mb."; +} + +std::string VolumeManager::visualize_octree(const char *filename) const +{ + const std::string filename_full = path_join(OIIO::Filesystem::current_path(), filename); + + std::ofstream file(filename_full); + if (file.is_open()) { + std::ostringstream buffer; + file << "# Visualize volume octree.\n\n" + "import bpy\nimport mathutils\n\n" + "if bpy.context.active_object:\n" + " bpy.context.active_object.select_set(False)\n\n" + "octree = bpy.data.collections.new(name='Octree')\n" + "bpy.context.scene.collection.children.link(octree)\n\n"; + + for (const auto &it : object_octrees_) { + /* Draw Octree. */ + const auto octree = it.second; + const std::string object_name = it.first.first->get_asset_name().string(); + octree->visualize(file, object_name); + + /* Apply transform. */ + const Object *object = it.first.first; + const Geometry *geom = object->get_geometry(); + if (!geom->is_light() && !geom->transform_applied) { + const Transform t = object->get_tfm(); + file << "obj.matrix_world = mathutils.Matrix((" << t.x << ", " << t.y << ", " << t.z + << ", (" << 0 << "," << 0 << "," << 0 << "," << 1 << ")))\n\n"; + } + } + + file.close(); + } + + return filename_full; +} + +void VolumeManager::device_update(Device *device, + DeviceScene *dscene, + const Scene *scene, + Progress &progress) +{ + if (need_rebuild_) { + /* Data needed for volume shader evaluation. */ + device->const_copy_to("data", &dscene->data, sizeof(dscene->data)); + + initialize_octree(scene, progress); + build_octree(device, progress); + flatten_octree(dscene, scene); + + update_visualization_ = true; + need_rebuild_ = false; + update_root_indices_ = false; + } + else if (update_root_indices_) { + update_root_indices(dscene, scene); + update_root_indices_ = false; + } + + if (update_visualization_) { + LOG_DEBUG << "Octree visualization has been written to " << visualize_octree("octree.py"); + update_visualization_ = false; + } +} + +void VolumeManager::device_free(DeviceScene *dscene) +{ + dscene->volume_tree_nodes.free(); + dscene->volume_tree_roots.free(); + dscene->volume_tree_root_ids.free(); +} + +VolumeManager::~VolumeManager() +{ +#ifdef WITH_OPENVDB + for (auto &it : vdb_map_) { + it.second.reset(); + } +#endif +} + CCL_NAMESPACE_END diff --git a/intern/cycles/scene/volume.h b/intern/cycles/scene/volume.h index 7e7f542128e..396e1dceb0c 100644 --- a/intern/cycles/scene/volume.h +++ b/intern/cycles/scene/volume.h @@ -8,8 +8,15 @@ #include "scene/mesh.h" +#ifdef WITH_OPENVDB +# include +#endif + CCL_NAMESPACE_BEGIN +class Object; +class Octree; + class Volume : public Mesh { public: NODE_DECLARE @@ -26,4 +33,64 @@ class Volume : public Mesh { void clear(bool preserve_shaders = false) override; }; +class VolumeManager { + public: + VolumeManager(); + ~VolumeManager(); + + void device_update(Device *, DeviceScene *, const Scene *, Progress &); + void device_free(DeviceScene *); + + /* Tag volume octree for update when scene changes. */ + void tag_update(); + void tag_update(const Shader *shader); + void tag_update(const Object *object, const uint32_t flag); + void tag_update(const Geometry *geometry); + void tag_update_indices(); + + /* Check whether the shader is a homogeneous volume. */ + static bool is_homogeneous_volume(const Object *, const Shader *); + + private: + /* Initialize octrees from the volumes in the scene. */ + void initialize_octree(const Scene *, Progress &); + + /* Build octrees based on the volume density. */ + void build_octree(Device *, Progress &); + + /* Update the object and shader index of octree root nodes. */ + void update_root_indices(DeviceScene *, const Scene *) const; + + /* Converting the octrees into an array for uploading to the kernel. */ + void flatten_octree(DeviceScene *, const Scene *) const; + + /* Count all the nodes of the octrees. */ + void update_num_octree_nodes(); + int num_octree_nodes() const; + int num_octree_roots() const; + + /* When running Blender with `--log-level debug`, an octree visualization is written to + * `filename`, which is a Python script that can be run inside Blender. */ + std::string visualize_octree(const char *filename) const; + + /* One octree per object per shader. */ + std::map, std::shared_ptr> object_octrees_; + + bool update_root_indices_ = false; + bool need_rebuild_; + bool update_visualization_ = false; + int num_octree_nodes_; + int num_octree_roots_; + +#ifdef WITH_OPENVDB + /* Create SDF grid for mesh volumes, to determine whether a certain point is in the + * interior of the mesh. This reduces evaluation time needed for heterogeneous volume. */ + openvdb::BoolGrid::ConstPtr mesh_to_sdf_grid(const Mesh *mesh, + const Shader *shader, + const float half_width); + openvdb::BoolGrid::ConstPtr get_vdb(const Geometry *, const Shader *) const; + std::map, openvdb::BoolGrid::ConstPtr> vdb_map_; +#endif +}; + CCL_NAMESPACE_END diff --git a/intern/cycles/util/log.cpp b/intern/cycles/util/log.cpp index ea681b32226..99ccd9c04dc 100644 --- a/intern/cycles/util/log.cpp +++ b/intern/cycles/util/log.cpp @@ -145,4 +145,10 @@ std::ostream &operator<<(std::ostream &os, const float3 &value) return os; } +std::ostream &operator<<(std::ostream &os, const float4 &value) +{ + os << "(" << value.x << ", " << value.y << ", " << value.z << ", " << value.w << ")"; + return os; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/log.h b/intern/cycles/util/log.h index 108e820e1f4..8e0e7d6f319 100644 --- a/intern/cycles/util/log.h +++ b/intern/cycles/util/log.h @@ -162,8 +162,10 @@ template T DCheckNotNull(T &&t, const char *expression) /* Convenient logging of common data structures. */ struct int2; struct float3; +struct float4; std::ostream &operator<<(std::ostream &os, const int2 &value); std::ostream &operator<<(std::ostream &os, const float3 &value); +std::ostream &operator<<(std::ostream &os, const float4 &value); CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_base.h b/intern/cycles/util/math_base.h index 3767e0334b1..fc4ce419673 100644 --- a/intern/cycles/util/math_base.h +++ b/intern/cycles/util/math_base.h @@ -628,6 +628,12 @@ ccl_device_inline float one_minus_cos(const float angle) return angle > 0.02f ? 1.0f - cosf(angle) : 0.5f * sqr(angle); } +/* 2^a. */ +ccl_device_inline int power_of_2(const int a) +{ + return 1 << a; +} + ccl_device_inline float pow20(const float a) { return sqr(sqr(sqr(sqr(a)) * a)); @@ -657,9 +663,9 @@ ccl_device_inline float beta(const float x, const float y) return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y)); } -ccl_device_inline float xor_signmask(const float x, const int y) +ccl_device_inline float xor_mask(const float x, const uint y) { - return __int_as_float(__float_as_int(x) ^ y); + return __uint_as_float(__float_as_uint(x) ^ y); } ccl_device float bits_to_01(const uint bits) @@ -893,4 +899,50 @@ ccl_device_inline Interval intervals_intersection(const ccl_private Interval< return {max(first.min, second.min), min(first.max, second.max)}; } +/* Defines the minimal and maximal values of a quantity. */ +template struct Extrema { + T min; + T max; + Extrema() = default; + ccl_device_inline_method Extrema(T value) : min(value), max(value) {} + ccl_device_inline_method Extrema(T min_, T max_) : min(min_), max(max_) {} + + ccl_device_inline_method T range() const + { + return max - min; + } +}; + +template ccl_device_inline Extrema operator*(const Extrema a, const T b) +{ + return {a.min * b, a.max * b}; +} + +template +ccl_device_inline Extrema operator+(const ccl_private Extrema &a, + const ccl_private Extrema &b) +{ + return {a.min + b.min, a.max + b.max}; +} + +template +ccl_device_inline Extrema operator+=(ccl_private Extrema &a, const ccl_private Extrema &b) +{ + return a = a + b; +} + +/* Returns the extrema of both extrema. */ +template +ccl_device_inline Extrema merge(const ccl_private Extrema &a, + const ccl_private Extrema &b) +{ + return {min(a.min, b.min), max(a.max, b.max)}; +} + +template +ccl_device_inline Extrema merge(const ccl_private Extrema &a, const ccl_private T &v) +{ + return {min(a.min, v), max(a.max, v)}; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index 64868a5db4c..40a4029d07c 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -10,6 +10,7 @@ #include "util/types_float3.h" #include "util/types_float4.h" #include "util/types_int3.h" +#include "util/types_uint3.h" CCL_NAMESPACE_BEGIN @@ -212,6 +213,15 @@ ccl_device_inline bool operator==(const float3 a, const float3 b) # endif } +ccl_device_inline int3 operator==(const float3 a, const float b) +{ +# ifdef __KERNEL_SSE__ + return int3(_mm_castps_si128(_mm_cmpeq_ps(a.m128, make_float3(b).m128))); +# else + return make_int3(a.x == b, a.y == b, a.z == b); +# endif +} + ccl_device_inline bool operator!=(const float3 a, const float3 b) { return !(a == b); @@ -235,7 +245,21 @@ ccl_device_inline float dot(const float3 a, const float3 b) # endif } -#endif +ccl_device_inline int3 operator>(const float3 a, const float3 b) +{ +# ifdef __KERNEL_SSE__ + return int3(_mm_castps_si128(_mm_cmpgt_ps(a.m128, b.m128))); +# else + return make_int3(a.x > b.x, a.y > b.y, a.z > b.z); +# endif +} + +ccl_device_inline int3 operator>(const float3 a, const float b) +{ + return a > make_float3(b); +} + +#endif /* __KERNEL_METAL__ */ ccl_device_inline float dot_xy(const float3 a, const float3 b) { @@ -380,6 +404,17 @@ ccl_device_inline float3 sqrt(const float3 a) # endif } +ccl_device_inline float3 round(const float3 a) +{ +# if defined(__KERNEL_NEON__) + return float3(vrndnq_f32(a.m128)); +# elif defined(__KERNEL_SSE__) + return float3(_mm_round_ps(a.m128, _MM_FROUND_NINT)); +# else + return make_float3(roundf(a.x), roundf(a.y), roundf(a.z)); +# endif +} + ccl_device_inline float3 floor(const float3 a) { # ifdef __KERNEL_SSE__ @@ -403,6 +438,11 @@ ccl_device_inline float3 mix(const float3 a, const float3 b, const float t) return a + t * (b - a); } +ccl_device_inline float3 mix(const float3 a, const float3 b, const float3 t) +{ + return a + t * (b - a); +} + ccl_device_inline float3 saturate(const float3 a) { return make_float3(saturatef(a.x), saturatef(a.y), saturatef(a.z)); @@ -438,11 +478,6 @@ ccl_device_inline float3 atan2(const float3 y, const float3 x) return make_float3(atan2f(y.x, x.x), atan2f(y.y, x.y), atan2f(y.z, x.z)); } -ccl_device_inline float3 round(const float3 a) -{ - return make_float3(roundf(a.x), roundf(a.y), roundf(a.z)); -} - ccl_device_inline float3 reflect(const float3 incident, const float3 unit_normal) { return incident - 2.0f * unit_normal * dot(incident, unit_normal); @@ -527,6 +562,11 @@ ccl_device_inline bool is_zero(const float3 a) #endif } +ccl_device_inline bool any_zero(const float3 a) +{ + return (a.x == 0.0f || a.y == 0.0f || a.z == 0.0f); +} + ccl_device_inline float reduce_add(const float3 a) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_NEON__) @@ -766,4 +806,22 @@ ccl_device_inline void copy_v3_v3(ccl_private float *r, const float3 val) r[2] = val.z; } +ccl_device_inline uint3 float3_as_uint3(const float3 f) +{ +#ifdef __KERNEL_METAL__ + return as_type(f); +#else + return make_uint3(__float_as_uint(f.x), __float_as_uint(f.y), __float_as_uint(f.z)); +#endif +} + +ccl_device_inline float3 uint3_as_float3(const uint3 f) +{ +#ifdef __KERNEL_METAL__ + return as_type(f); +#else + return make_float3(__uint_as_float(f.x), __uint_as_float(f.y), __uint_as_float(f.z)); +#endif +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_int3.h b/intern/cycles/util/math_int3.h index 8f708d2b610..80741958179 100644 --- a/intern/cycles/util/math_int3.h +++ b/intern/cycles/util/math_int3.h @@ -107,6 +107,11 @@ ccl_device_inline int3 operator&(const int3 a, const int b) # endif } +ccl_device_inline bool all(const int3 a) +{ + return a.x && a.y && a.z; +} + #endif /* !__KERNEL_METAL__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/tbb.h b/intern/cycles/util/tbb.h index f220b0a25a2..cfee7f08995 100644 --- a/intern/cycles/util/tbb.h +++ b/intern/cycles/util/tbb.h @@ -10,6 +10,7 @@ # include "util/windows.h" #endif +#include #include #include #include @@ -26,6 +27,7 @@ CCL_NAMESPACE_BEGIN using tbb::blocked_range; +using tbb::blocked_range3d; using tbb::enumerable_thread_specific; using tbb::parallel_for; using tbb::parallel_for_each;