Cycles: Render volume by ray marching through octrees
One octree per volume per shader based on the density. In preparation for the null scattering
This commit is contained in:
committed by
Weizhen Huang
parent
4e65ab4490
commit
b2b2d9a4f3
@@ -138,6 +138,8 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d)
|
||||
object->set_lightgroup(ustring(b_world ? b_world.lightgroup() : ""));
|
||||
}
|
||||
|
||||
object->set_asset_name(ustring(b_world.name()));
|
||||
|
||||
/* Create geometry. */
|
||||
const GeometryKey geom_key{b_world.ptr.data, Geometry::LIGHT};
|
||||
Geometry *geom = geometry_map.find(geom_key);
|
||||
|
||||
@@ -10,6 +10,7 @@ set(INC_SYS
|
||||
)
|
||||
|
||||
set(SRC
|
||||
octree.cpp
|
||||
bvh.cpp
|
||||
bvh2.cpp
|
||||
binning.cpp
|
||||
@@ -36,6 +37,7 @@ if(WITH_CYCLES_DEVICE_METAL)
|
||||
endif()
|
||||
|
||||
set(SRC_HEADERS
|
||||
octree.h
|
||||
bvh.h
|
||||
bvh2.h
|
||||
binning.h
|
||||
|
||||
470
intern/cycles/bvh/octree.cpp
Normal file
470
intern/cycles/bvh/octree.cpp
Normal file
@@ -0,0 +1,470 @@
|
||||
/* SPDX-FileCopyrightText: 2025 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include "bvh/octree.h"
|
||||
|
||||
#include "scene/object.h"
|
||||
#include "scene/volume.h"
|
||||
|
||||
#include "integrator/shader_eval.h"
|
||||
|
||||
#include "util/log.h"
|
||||
#include "util/progress.h"
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
# include <openvdb/tools/FindActiveValues.h>
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
__forceinline int Octree::flatten_index(int x, int y, int z) const
|
||||
{
|
||||
return x + resolution_ * (y + z * resolution_);
|
||||
}
|
||||
|
||||
Extrema<float> Octree::get_extrema(const int3 index_min, const int3 index_max) const
|
||||
{
|
||||
const blocked_range3d<int> range(
|
||||
index_min.x, index_max.x, 32, index_min.y, index_max.y, 32, index_min.z, index_max.z, 32);
|
||||
|
||||
const Extrema<float> identity = {FLT_MAX, -FLT_MAX};
|
||||
|
||||
auto reduction_func = [&](const blocked_range3d<int> &r, Extrema<float> init) -> Extrema<float> {
|
||||
for (int z = r.cols().begin(); z < r.cols().end(); ++z) {
|
||||
for (int y = r.rows().begin(); y < r.rows().end(); ++y) {
|
||||
for (int x = r.pages().begin(); x < r.pages().end(); ++x) {
|
||||
init = merge(init, sigmas_[flatten_index(x, y, z)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return init;
|
||||
};
|
||||
|
||||
auto join_func = [](Extrema<float> a, Extrema<float> b) -> Extrema<float> {
|
||||
return merge(a, b);
|
||||
};
|
||||
|
||||
return parallel_reduce(range, identity, reduction_func, join_func);
|
||||
}
|
||||
|
||||
__forceinline float3 Octree::position_to_index(const float3 p) const
|
||||
{
|
||||
return (p - root_->bbox.min) * position_to_index_scale_;
|
||||
}
|
||||
|
||||
int3 Octree::position_to_floor_index(const float3 p) const
|
||||
{
|
||||
const float3 index = round(position_to_index(p));
|
||||
return clamp(make_int3(int(index.x), int(index.y), int(index.z)), 0, resolution_ - 1);
|
||||
}
|
||||
|
||||
int3 Octree::position_to_ceil_index(const float3 p) const
|
||||
{
|
||||
if (any_zero(position_to_index_scale_)) {
|
||||
/* Octree with degenerate shape, force max index. */
|
||||
return make_int3(resolution_);
|
||||
}
|
||||
const float3 index = round(position_to_index(p));
|
||||
return clamp(make_int3(int(index.x), int(index.y), int(index.z)), 1, resolution_);
|
||||
}
|
||||
|
||||
__forceinline float3 Octree::index_to_position(int x, int y, int z) const
|
||||
{
|
||||
return root_->bbox.min + make_float3(x, y, z) * index_to_position_scale_;
|
||||
}
|
||||
|
||||
__forceinline float3 Octree::voxel_size() const
|
||||
{
|
||||
return index_to_position_scale_;
|
||||
}
|
||||
|
||||
bool Octree::should_split(std::shared_ptr<OctreeNode> &node) const
|
||||
{
|
||||
const int3 index_min = position_to_floor_index(node->bbox.min);
|
||||
const int3 index_max = position_to_ceil_index(node->bbox.max);
|
||||
node->sigma = get_extrema(index_min, index_max);
|
||||
|
||||
const float3 bbox_size = node->bbox.size();
|
||||
if (any_zero(bbox_size)) {
|
||||
/* Octree with degenerate shape, can happen for implicit volume. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The threshold is set so that ideally only one sample needs to be taken per node. Value taken
|
||||
* from "Volume Rendering for Pixar's Elemental". */
|
||||
return (node->sigma.range() * len(bbox_size) * scale_ > 1.442f &&
|
||||
node->depth < VOLUME_OCTREE_MAX_DEPTH);
|
||||
}
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
/* Check if a interior mask grid intersects with a bounding box defined by `p_min` and `p_max`. */
|
||||
static bool vdb_voxel_intersect(const float3 p_min,
|
||||
const float3 p_max,
|
||||
openvdb::BoolGrid::ConstPtr &grid,
|
||||
const openvdb::tools::FindActiveValues<openvdb::BoolTree> &find)
|
||||
{
|
||||
if (grid->empty()) {
|
||||
/* Non-mesh volume. */
|
||||
return true;
|
||||
}
|
||||
|
||||
const openvdb::math::CoordBBox coord_bbox(
|
||||
openvdb::Coord::floor(grid->worldToIndex({p_min.x, p_min.y, p_min.z})),
|
||||
openvdb::Coord::ceil(grid->worldToIndex({p_max.x, p_max.y, p_max.z})));
|
||||
|
||||
/* Check if the bounding box lies inside or partially overlaps the mesh.
|
||||
* For interior mask grids, all the interior voxels are active. */
|
||||
return find.anyActiveValues(coord_bbox, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Fill in coordinates for shading the volume density. */
|
||||
static void fill_shader_input(device_vector<KernelShaderEvalInput> &d_input,
|
||||
const Octree *octree,
|
||||
const Object *object,
|
||||
const Shader *shader,
|
||||
#ifdef WITH_OPENVDB
|
||||
openvdb::BoolGrid::ConstPtr &interior_mask,
|
||||
#endif
|
||||
const int resolution)
|
||||
{
|
||||
const int object_id = object->get_device_index();
|
||||
const uint shader_id = shader->id;
|
||||
|
||||
KernelShaderEvalInput *d_input_data = d_input.data();
|
||||
|
||||
const float3 voxel_size = octree->voxel_size();
|
||||
/* Dilate the voxel in case we miss features at the boundary. */
|
||||
const float3 pad = 0.2f * voxel_size;
|
||||
const float3 padded_size = voxel_size + pad * 2.0f;
|
||||
|
||||
const blocked_range3d<int> range(0, resolution, 8, 0, resolution, 8, 0, resolution, 8);
|
||||
parallel_for(range, [&](const blocked_range3d<int> &r) {
|
||||
#ifdef WITH_OPENVDB
|
||||
/* One accessor per thread is important for cached access. */
|
||||
const auto find = openvdb::tools::FindActiveValues(interior_mask->tree());
|
||||
#endif
|
||||
|
||||
for (int z = r.cols().begin(); z < r.cols().end(); ++z) {
|
||||
for (int y = r.rows().begin(); y < r.rows().end(); ++y) {
|
||||
for (int x = r.pages().begin(); x < r.pages().end(); ++x) {
|
||||
const int offset = octree->flatten_index(x, y, z);
|
||||
const float3 p = octree->index_to_position(x, y, z);
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
/* Zero density for cells outside of the mesh. */
|
||||
if (!vdb_voxel_intersect(p, p + voxel_size, interior_mask, find)) {
|
||||
d_input_data[offset * 2].object = OBJECT_NONE;
|
||||
d_input_data[offset * 2 + 1].object = SHADER_NONE;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
KernelShaderEvalInput in;
|
||||
in.object = object_id;
|
||||
in.prim = __float_as_int(p.x - pad.x);
|
||||
in.u = p.y - pad.y;
|
||||
in.v = p.z - pad.z;
|
||||
d_input_data[offset * 2] = in;
|
||||
|
||||
in.object = shader_id;
|
||||
in.prim = __float_as_int(padded_size.x);
|
||||
in.u = padded_size.y;
|
||||
in.v = padded_size.z;
|
||||
d_input_data[offset * 2 + 1] = in;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/* Read back the volume density. */
|
||||
static void read_shader_output(const device_vector<float> &d_output,
|
||||
const Octree *octree,
|
||||
const int num_channels,
|
||||
const int resolution,
|
||||
vector<Extrema<float>> &sigmas)
|
||||
{
|
||||
const float *d_output_data = d_output.data();
|
||||
const blocked_range3d<int> range(0, resolution, 32, 0, resolution, 32, 0, resolution, 32);
|
||||
|
||||
parallel_for(range, [&](const blocked_range3d<int> &r) {
|
||||
for (int z = r.cols().begin(); z < r.cols().end(); ++z) {
|
||||
for (int y = r.rows().begin(); y < r.rows().end(); ++y) {
|
||||
for (int x = r.pages().begin(); x < r.pages().end(); ++x) {
|
||||
const int index = octree->flatten_index(x, y, z);
|
||||
sigmas[index].min = d_output_data[index * num_channels + 0];
|
||||
sigmas[index].max = d_output_data[index * num_channels + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void Octree::evaluate_volume_density(Device *device,
|
||||
Progress &progress,
|
||||
#ifdef WITH_OPENVDB
|
||||
openvdb::BoolGrid::ConstPtr &interior_mask,
|
||||
#endif
|
||||
const Object *object,
|
||||
const Shader *shader)
|
||||
{
|
||||
/* For heterogeneous volume, the grid resolution is 2^max_depth in each 3D dimension;
|
||||
* for homogeneous volume, only one grid is needed. */
|
||||
resolution_ = VolumeManager::is_homogeneous_volume(object, shader) ?
|
||||
1 :
|
||||
power_of_2(VOLUME_OCTREE_MAX_DEPTH);
|
||||
index_to_position_scale_ = root_->bbox.size() / float(resolution_);
|
||||
position_to_index_scale_ = safe_divide(one_float3(), index_to_position_scale_);
|
||||
|
||||
/* Initialize density field. */
|
||||
/* TODO(weizhen): maybe lower the resolution depending on the object size. */
|
||||
const int size = resolution_ * resolution_ * resolution_;
|
||||
sigmas_.resize(size);
|
||||
parallel_for(0, size, [&](int i) { sigmas_[i] = {0.0f, 0.0f}; });
|
||||
|
||||
/* Min and max. */
|
||||
const int num_channels = 2;
|
||||
|
||||
/* Need the size of two `KernelShaderEvalInput`s per voxel for evaluating the shader. */
|
||||
const int num_inputs = size * 2;
|
||||
|
||||
/* Evaluate shader on device. */
|
||||
ShaderEval shader_eval(device, progress);
|
||||
shader_eval.eval(
|
||||
SHADER_EVAL_VOLUME_DENSITY,
|
||||
num_inputs,
|
||||
num_channels,
|
||||
[&](device_vector<KernelShaderEvalInput> &d_input) {
|
||||
#ifdef WITH_OPENVDB
|
||||
fill_shader_input(d_input, this, object, shader, interior_mask, resolution_);
|
||||
#else
|
||||
fill_shader_input(d_input, this, object, shader, resolution_);
|
||||
#endif
|
||||
return size;
|
||||
},
|
||||
[&](device_vector<float> &d_output) {
|
||||
read_shader_output(d_output, this, num_channels, resolution_, sigmas_);
|
||||
});
|
||||
}
|
||||
|
||||
float Octree::volume_scale(const Object *object) const
|
||||
{
|
||||
const Geometry *geom = object->get_geometry();
|
||||
if (geom->is_volume()) {
|
||||
const Volume *volume = static_cast<const Volume *>(geom);
|
||||
if (volume->get_object_space()) {
|
||||
/* The density changes with object scale, we scale the density accordingly in the final
|
||||
* render. */
|
||||
if (volume->transform_applied) {
|
||||
const float3 unit = normalize(one_float3());
|
||||
return 1.0f / len(transform_direction(&object->get_tfm(), unit));
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* The density does not change with object scale, we scale the node in the viewport to it's
|
||||
* true size. */
|
||||
if (!volume->transform_applied) {
|
||||
const float3 unit = normalize(one_float3());
|
||||
return len(transform_direction(&object->get_tfm(), unit));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* TODO(weizhen): use the maximal scale of all instances. */
|
||||
if (!geom->transform_applied) {
|
||||
const float3 unit = normalize(one_float3());
|
||||
return len(transform_direction(&object->get_tfm(), unit));
|
||||
}
|
||||
}
|
||||
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
std::shared_ptr<OctreeInternalNode> Octree::make_internal(std::shared_ptr<OctreeNode> &node)
|
||||
{
|
||||
num_nodes_ += 8;
|
||||
auto internal = std::make_shared<OctreeInternalNode>(*node);
|
||||
|
||||
/* Create bounding boxes for children. */
|
||||
const float3 center = internal->bbox.center();
|
||||
for (int i = 0; i < 8; i++) {
|
||||
const float3 t = make_float3(i & 1, (i >> 1) & 1, (i >> 2) & 1);
|
||||
const BoundBox bbox(mix(internal->bbox.min, center, t), mix(center, internal->bbox.max, t));
|
||||
internal->children_[i] = std::make_shared<OctreeNode>(bbox, internal->depth + 1);
|
||||
}
|
||||
|
||||
return internal;
|
||||
}
|
||||
|
||||
void Octree::recursive_build(std::shared_ptr<OctreeNode> &octree_node)
|
||||
{
|
||||
if (!should_split(octree_node)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Make the current node an internal node. */
|
||||
auto internal = make_internal(octree_node);
|
||||
|
||||
for (auto &child : internal->children_) {
|
||||
task_pool_.push([&] { recursive_build(child); });
|
||||
}
|
||||
|
||||
octree_node = internal;
|
||||
}
|
||||
|
||||
void Octree::flatten(KernelOctreeNode *knodes,
|
||||
const int current_index,
|
||||
const std::shared_ptr<OctreeNode> &node,
|
||||
int &child_index) const
|
||||
{
|
||||
KernelOctreeNode &knode = knodes[current_index];
|
||||
knode.sigma = node->sigma;
|
||||
|
||||
if (auto internal_ptr = std::dynamic_pointer_cast<OctreeInternalNode>(node)) {
|
||||
knode.first_child = child_index;
|
||||
child_index += 8;
|
||||
/* Loop through all the children and flatten in breadth-first manner, so that children are
|
||||
* stored in contiguous indices. */
|
||||
for (int i = 0; i < 8; i++) {
|
||||
knodes[knode.first_child + i].parent = current_index;
|
||||
flatten(knodes, knode.first_child + i, internal_ptr->children_[i], child_index);
|
||||
}
|
||||
}
|
||||
else {
|
||||
knode.first_child = -1;
|
||||
}
|
||||
}
|
||||
|
||||
void Octree::set_flattened(const bool flattened)
|
||||
{
|
||||
is_flattened_ = flattened;
|
||||
}
|
||||
|
||||
bool Octree::is_flattened() const
|
||||
{
|
||||
return is_flattened_;
|
||||
}
|
||||
|
||||
void Octree::build(Device *device,
|
||||
Progress &progress,
|
||||
#ifdef WITH_OPENVDB
|
||||
openvdb::BoolGrid::ConstPtr &interior_mask,
|
||||
#endif
|
||||
const Object *object,
|
||||
const Shader *shader)
|
||||
{
|
||||
const char *name = object->get_asset_name().c_str();
|
||||
progress.set_substatus(string_printf("Evaluating density for %s", name));
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
evaluate_volume_density(device, progress, interior_mask, object, shader);
|
||||
#else
|
||||
evaluate_volume_density(device, progress, object, shader);
|
||||
#endif
|
||||
if (progress.get_cancel()) {
|
||||
return;
|
||||
}
|
||||
|
||||
progress.set_substatus(string_printf("Building octree for %s", name));
|
||||
|
||||
scale_ = volume_scale(object);
|
||||
recursive_build(root_);
|
||||
|
||||
task_pool_.wait_work();
|
||||
|
||||
is_built_ = true;
|
||||
sigmas_.clear();
|
||||
}
|
||||
|
||||
Octree::Octree(const BoundBox &bbox)
|
||||
{
|
||||
root_ = std::make_shared<OctreeNode>(bbox, 0);
|
||||
is_built_ = false;
|
||||
is_flattened_ = false;
|
||||
}
|
||||
|
||||
bool Octree::is_built() const
|
||||
{
|
||||
return is_built_;
|
||||
}
|
||||
|
||||
int Octree::get_num_nodes() const
|
||||
{
|
||||
return num_nodes_;
|
||||
}
|
||||
|
||||
std::shared_ptr<OctreeNode> Octree::get_root() const
|
||||
{
|
||||
return root_;
|
||||
}
|
||||
|
||||
void OctreeNode::visualize(std::string &str) const
|
||||
{
|
||||
const auto *internal = dynamic_cast<const OctreeInternalNode *>(this);
|
||||
|
||||
if (!internal) {
|
||||
/* Skip leaf nodes. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* Create three orthogonal faces for inner nodes. */
|
||||
const float3 mid = bbox.center();
|
||||
const float3 max = bbox.max;
|
||||
const float3 min = bbox.min;
|
||||
const std::string mid_x = to_string(mid.x), mid_y = to_string(mid.y), mid_z = to_string(mid.z),
|
||||
min_x = to_string(min.x), min_y = to_string(min.y), min_z = to_string(min.z),
|
||||
max_x = to_string(max.x), max_y = to_string(max.y), max_z = to_string(max.z);
|
||||
// clang-format off
|
||||
str += "(" + mid_x + "," + mid_y + "," + min_z + "), "
|
||||
"(" + mid_x + "," + mid_y + "," + max_z + "), "
|
||||
"(" + mid_x + "," + max_y + "," + max_z + "), "
|
||||
"(" + mid_x + "," + max_y + "," + min_z + "), "
|
||||
"(" + mid_x + "," + min_y + "," + min_z + "), "
|
||||
"(" + mid_x + "," + min_y + "," + max_z + "), ";
|
||||
str += "(" + min_x + "," + mid_y + "," + mid_z + "), "
|
||||
"(" + max_x + "," + mid_y + "," + mid_z + "), "
|
||||
"(" + max_x + "," + mid_y + "," + max_z + "), "
|
||||
"(" + min_x + "," + mid_y + "," + max_z + "), "
|
||||
"(" + min_x + "," + mid_y + "," + min_z + "), "
|
||||
"(" + max_x + "," + mid_y + "," + min_z + "), ";
|
||||
str += "(" + mid_x + "," + min_y + "," + mid_z + "), "
|
||||
"(" + mid_x + "," + max_y + "," + mid_z + "), "
|
||||
"(" + max_x + "," + max_y + "," + mid_z + "), "
|
||||
"(" + max_x + "," + min_y + "," + mid_z + "), "
|
||||
"(" + min_x + "," + min_y + "," + mid_z + "), "
|
||||
"(" + min_x + "," + max_y + "," + mid_z + "), ";
|
||||
// clang-format on
|
||||
for (const auto &child : internal->children_) {
|
||||
child->visualize(str);
|
||||
}
|
||||
}
|
||||
|
||||
void Octree::visualize(std::ofstream &file, const std::string object_name) const
|
||||
{
|
||||
std::string str = "vertices = [";
|
||||
root_->visualize(str);
|
||||
str +=
|
||||
"]\nr = range(len(vertices))\n"
|
||||
"edges = [(i, i+1 if i%6<5 else i-4) for i in r]\n"
|
||||
"mesh = bpy.data.meshes.new('Octree')\n"
|
||||
"mesh.from_pydata(vertices, edges, [])\n"
|
||||
"mesh.update()\n"
|
||||
"obj = bpy.data.objects.new('" +
|
||||
object_name +
|
||||
"', mesh)\n"
|
||||
"octree.objects.link(obj)\n"
|
||||
"bpy.context.view_layer.objects.active = obj\n"
|
||||
"bpy.ops.object.mode_set(mode='EDIT')\n";
|
||||
file << str;
|
||||
|
||||
const float3 center = root_->bbox.center();
|
||||
const float3 size = root_->bbox.size() * 0.5f;
|
||||
file << "bpy.ops.mesh.primitive_cube_add(location = " << center << ", scale = " << size << ")\n";
|
||||
file << "bpy.ops.mesh.delete(type='ONLY_FACE')\n"
|
||||
"bpy.ops.object.mode_set(mode='OBJECT')\n"
|
||||
"obj.select_set(True)\n";
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
141
intern/cycles/bvh/octree.h
Normal file
141
intern/cycles/bvh/octree.h
Normal file
@@ -0,0 +1,141 @@
|
||||
/* SPDX-FileCopyrightText: 2025 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
/* The volume octree is used to determine the necessary step size when rendering the volume. One
|
||||
* volume per object per shader is built, and a node splits in eight when the density difference
|
||||
* inside the node exceeds a certain threshold. */
|
||||
|
||||
#ifndef __OCTREE_H__
|
||||
#define __OCTREE_H__
|
||||
|
||||
#include "util/boundbox.h"
|
||||
#include "util/task.h"
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
# include <openvdb/openvdb.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Device;
|
||||
class Object;
|
||||
class Progress;
|
||||
class Shader;
|
||||
struct KernelOctreeNode;
|
||||
|
||||
struct OctreeNode {
|
||||
/* Bounding box of the node. */
|
||||
BoundBox bbox;
|
||||
|
||||
/* Depth of the node in the octree. */
|
||||
int depth;
|
||||
|
||||
/* Minimal and maximal volume density inside the node. */
|
||||
Extrema<float> sigma = {0.0f, 0.0f};
|
||||
|
||||
OctreeNode() : bbox(BoundBox::empty), depth(0) {}
|
||||
OctreeNode(BoundBox bbox_, int depth_) : bbox(bbox_), depth(depth_) {}
|
||||
virtual ~OctreeNode() = default;
|
||||
|
||||
/* Visualize node. */
|
||||
void visualize(std::string &str) const;
|
||||
};
|
||||
|
||||
struct OctreeInternalNode : public OctreeNode {
|
||||
OctreeInternalNode(OctreeNode &node) : children_(8)
|
||||
{
|
||||
bbox = node.bbox;
|
||||
depth = node.depth;
|
||||
sigma = node.sigma;
|
||||
}
|
||||
|
||||
vector<std::shared_ptr<OctreeNode>> children_;
|
||||
};
|
||||
|
||||
class Octree {
|
||||
public:
|
||||
Octree(const BoundBox &bbox);
|
||||
~Octree() = default;
|
||||
|
||||
/* Build the octree according to the volume density. */
|
||||
#ifdef WITH_OPENVDB
|
||||
void build(Device *, Progress &, openvdb::BoolGrid::ConstPtr &, const Object *, const Shader *);
|
||||
#else
|
||||
void build(Device *, Progress &, const Object *, const Shader *);
|
||||
#endif
|
||||
|
||||
/* Convert the octree into an array of nodes for uploading to the kernel. */
|
||||
void flatten(KernelOctreeNode *, const int, const std::shared_ptr<OctreeNode> &, int &) const;
|
||||
void set_flattened(const bool = true);
|
||||
bool is_flattened() const;
|
||||
|
||||
/* Flatten a 3D coordinate in the grid to a 1D index. */
|
||||
int flatten_index(int x, int y, int z) const;
|
||||
/* Convert from index to the position of the lower left corner of the voxel. */
|
||||
float3 index_to_position(int x, int y, int z) const;
|
||||
/* Size of a voxel. */
|
||||
float3 voxel_size() const;
|
||||
|
||||
int get_num_nodes() const;
|
||||
std::shared_ptr<OctreeNode> get_root() const;
|
||||
bool is_built() const;
|
||||
|
||||
/* Draw octree nodes as empty boxes with Blender Python API. */
|
||||
void visualize(std::ofstream &file, const std::string object_name) const;
|
||||
|
||||
private:
|
||||
/* The bounding box of the octree is divided into a regular grid with the same resolution in each
|
||||
* dimension. */
|
||||
int resolution_;
|
||||
/* Extrema of volume densities in the grid. */
|
||||
vector<Extrema<float>> sigmas_;
|
||||
/* Compute the extrema of all the `sigmas_` in a coordinate bounding box defined by `index_min`
|
||||
* and `index_max`. */
|
||||
Extrema<float> get_extrema(const int3 index_min, const int3 index_max) const;
|
||||
/* Randomly sample positions inside the grid to evaluate the shader for the density. */
|
||||
#ifdef WITH_OPENVDB
|
||||
void evaluate_volume_density(
|
||||
Device *, Progress &, openvdb::BoolGrid::ConstPtr &, const Object *, const Shader *);
|
||||
#else
|
||||
void evaluate_volume_density(Device *, Progress &, const Object *, const Shader *);
|
||||
#endif
|
||||
/* Convert from position in object space to grid index space. */
|
||||
float3 position_to_index_scale_;
|
||||
float3 index_to_position_scale_;
|
||||
float3 position_to_index(const float3 p) const;
|
||||
int3 position_to_floor_index(const float3 p) const;
|
||||
int3 position_to_ceil_index(const float3 p) const;
|
||||
|
||||
/* Whether a node should be split into child nodes. */
|
||||
bool should_split(std::shared_ptr<OctreeNode> &node) const;
|
||||
/* Scale the node size so that the octree has the similar subdivision levels in viewport and
|
||||
* final render. */
|
||||
float volume_scale(const Object *object) const;
|
||||
float scale_;
|
||||
/* Recursively build a node and its child nodes. */
|
||||
void recursive_build(std::shared_ptr<OctreeNode> &);
|
||||
/* Turn a node into an internal node. */
|
||||
std::shared_ptr<OctreeInternalNode> make_internal(std::shared_ptr<OctreeNode> &node);
|
||||
|
||||
/* Root node. */
|
||||
std::shared_ptr<OctreeNode> root_;
|
||||
|
||||
/* Whether the octree is already built. */
|
||||
bool is_built_;
|
||||
|
||||
/* Whether the octree is already flattened into an array. */
|
||||
bool is_flattened_;
|
||||
|
||||
/* Number of nodes in the octree. Incremented while building the tree. */
|
||||
std::atomic<int> num_nodes_ = 1;
|
||||
|
||||
/* Task pool for building the octree in parallel. */
|
||||
TaskPool task_pool_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __OCTREE_H__ */
|
||||
@@ -24,6 +24,7 @@ CPUKernels::CPUKernels()
|
||||
REGISTER_KERNEL(shader_eval_displace),
|
||||
REGISTER_KERNEL(shader_eval_background),
|
||||
REGISTER_KERNEL(shader_eval_curve_shadow_transparency),
|
||||
REGISTER_KERNEL(shader_eval_volume_density),
|
||||
/* Adaptive sampling. */
|
||||
REGISTER_KERNEL(adaptive_sampling_convergence_check),
|
||||
REGISTER_KERNEL(adaptive_sampling_filter_x),
|
||||
|
||||
@@ -40,6 +40,7 @@ class CPUKernels {
|
||||
ShaderEvalFunction shader_eval_displace;
|
||||
ShaderEvalFunction shader_eval_background;
|
||||
ShaderEvalFunction shader_eval_curve_shadow_transparency;
|
||||
ShaderEvalFunction shader_eval_volume_density;
|
||||
|
||||
/* Adaptive stopping. */
|
||||
|
||||
|
||||
@@ -22,7 +22,8 @@ bool device_kernel_has_shading(DeviceKernel kernel)
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_DEDICATED_LIGHT ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY);
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY);
|
||||
}
|
||||
|
||||
bool device_kernel_has_intersection(DeviceKernel kernel)
|
||||
@@ -108,6 +109,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
|
||||
return "shader_eval_background";
|
||||
case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
|
||||
return "shader_eval_curve_shadow_transparency";
|
||||
case DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY:
|
||||
return "shader_eval_volume_density";
|
||||
|
||||
/* Film. */
|
||||
|
||||
|
||||
@@ -423,7 +423,7 @@ bool MetalKernelPipeline::should_use_binary_archive() const
|
||||
if ((device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
|
||||
device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) ||
|
||||
(device_kernel >= DEVICE_KERNEL_SHADER_EVAL_DISPLACE &&
|
||||
device_kernel <= DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY))
|
||||
device_kernel <= DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY))
|
||||
{
|
||||
/* Archive all shade kernels - they take a long time to compile. */
|
||||
return true;
|
||||
|
||||
@@ -1292,6 +1292,7 @@ void OneapiDevice::get_adjusted_global_and_local_sizes(SyclQueue *queue,
|
||||
case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
|
||||
case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
|
||||
case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
|
||||
case DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY:
|
||||
preferred_work_group_size = preferred_work_group_size_shader_evaluation;
|
||||
break;
|
||||
|
||||
|
||||
@@ -594,6 +594,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.module = optix_module;
|
||||
group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.entryFunctionName =
|
||||
"__raygen__kernel_optix_shader_eval_curve_shadow_transparency";
|
||||
group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
|
||||
group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].raygen.module = optix_module;
|
||||
group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].raygen.entryFunctionName =
|
||||
"__raygen__kernel_optix_shader_eval_volume_density";
|
||||
}
|
||||
|
||||
# ifdef WITH_OSL
|
||||
@@ -1034,6 +1038,7 @@ bool OptiXDevice::load_osl_kernels()
|
||||
pipeline_groups.push_back(groups[PG_RGEN_EVAL_BACKGROUND]);
|
||||
pipeline_groups.push_back(groups[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY]);
|
||||
pipeline_groups.push_back(groups[PG_RGEN_INIT_FROM_CAMERA]);
|
||||
pipeline_groups.push_back(groups[PG_RGEN_EVAL_VOLUME_DENSITY]);
|
||||
|
||||
for (const OptixProgramGroup &group : osl_groups) {
|
||||
if (group != nullptr) {
|
||||
|
||||
@@ -38,6 +38,7 @@ enum {
|
||||
PG_RGEN_EVAL_BACKGROUND,
|
||||
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY,
|
||||
PG_RGEN_INIT_FROM_CAMERA,
|
||||
PG_RGEN_EVAL_VOLUME_DENSITY,
|
||||
|
||||
/* Miss */
|
||||
PG_MISS,
|
||||
|
||||
@@ -82,7 +82,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
}
|
||||
if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY)
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY ||
|
||||
kernel == DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY)
|
||||
{
|
||||
set_launch_param(offsetof(KernelParamsOptiX, offset), sizeof(int32_t), 2);
|
||||
}
|
||||
@@ -167,6 +168,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
sbt_params.raygenRecord = sbt_data_ptr +
|
||||
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_VOLUME_DENSITY * sizeof(SbtRecord);
|
||||
break;
|
||||
|
||||
case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||
|
||||
@@ -114,6 +114,9 @@ bool ShaderEval::eval_cpu(Device *device,
|
||||
case SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
|
||||
kernels.shader_eval_curve_shadow_transparency(kg, input_data, output_data, work_index);
|
||||
break;
|
||||
case SHADER_EVAL_VOLUME_DENSITY:
|
||||
kernels.shader_eval_volume_density(kg, input_data, output_data, work_index);
|
||||
break;
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -139,6 +142,8 @@ bool ShaderEval::eval_gpu(Device *device,
|
||||
case SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
|
||||
kernel = DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY;
|
||||
break;
|
||||
case SHADER_EVAL_VOLUME_DENSITY:
|
||||
kernel = DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY;
|
||||
};
|
||||
|
||||
/* Create device queue. */
|
||||
|
||||
@@ -19,6 +19,7 @@ enum ShaderEvalType {
|
||||
SHADER_EVAL_DISPLACE,
|
||||
SHADER_EVAL_BACKGROUND,
|
||||
SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
|
||||
SHADER_EVAL_VOLUME_DENSITY,
|
||||
};
|
||||
|
||||
/* ShaderEval class performs shader evaluation for background light and displacement. */
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "kernel/camera/projection.h"
|
||||
#include "kernel/integrator/displacement_shader.h"
|
||||
#include "kernel/integrator/surface_shader.h"
|
||||
#include "kernel/integrator/volume_shader.h"
|
||||
|
||||
#include "kernel/geom/object.h"
|
||||
#include "kernel/geom/shader_data.h"
|
||||
@@ -115,4 +116,87 @@ ccl_device void kernel_curve_shadow_transparency_evaluate(
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device void kernel_volume_density_evaluate(KernelGlobals kg,
|
||||
ccl_global const KernelShaderEvalInput *input,
|
||||
ccl_global float *output,
|
||||
const int offset)
|
||||
{
|
||||
#ifdef __VOLUME__
|
||||
if (input[offset * 2 + 1].object == SHADER_NONE) {
|
||||
return;
|
||||
}
|
||||
|
||||
KernelShaderEvalInput in = input[offset * 2];
|
||||
|
||||
/* Setup ray. */
|
||||
Ray ray;
|
||||
ray.P = make_float3(__int_as_float(in.prim), in.u, in.v);
|
||||
ray.D = zero_float3();
|
||||
ray.tmin = 0.0f;
|
||||
/* Motion blur is ignored when computing the extrema of the density, but we also don't expect the
|
||||
* value to change a lot in one frame. */
|
||||
ray.time = 0.5f;
|
||||
|
||||
/* Setup shader data. */
|
||||
ShaderData sd;
|
||||
shader_setup_from_volume(&sd, &ray, in.object);
|
||||
sd.flag = SD_IS_VOLUME_SHADER_EVAL;
|
||||
|
||||
/* Evaluate extinction and emission without allocating closures. */
|
||||
sd.num_closure_left = 0;
|
||||
/* Evaluate density for camera ray because it usually makes the most visual impact. For shaders
|
||||
* that depends on ray types, the extrema are estimated on the fly. */
|
||||
/* TODO(weizhen): Volume invisible to camera ray might appear noisy. We can at least build a
|
||||
* separate octree for shadow ray. */
|
||||
const uint32_t path_flag = PATH_RAY_CAMERA;
|
||||
|
||||
/* Setup volume stack entry. */
|
||||
in = input[offset * 2 + 1];
|
||||
const int shader = in.object;
|
||||
const VolumeStack entry = {sd.object, shader};
|
||||
|
||||
const float3 voxel_size = make_float3(__int_as_float(in.prim), in.u, in.v);
|
||||
Extrema<float> extrema = {FLT_MAX, -FLT_MAX};
|
||||
/* For heterogeneous volume, we take 16 samples per grid;
|
||||
* for homogeneous volume, only 1 sample is needed. */
|
||||
const int num_samples = volume_is_homogeneous(kg, entry) ? 1 : 16;
|
||||
|
||||
const bool need_transformation = !(kernel_data_fetch(object_flag, sd.object) &
|
||||
SD_OBJECT_TRANSFORM_APPLIED);
|
||||
const Transform tfm = need_transformation ?
|
||||
object_fetch_transform(kg, sd.object, OBJECT_TRANSFORM) :
|
||||
Transform();
|
||||
for (int sample = 0; sample < num_samples; sample++) {
|
||||
/* Blue noise indexing. The sequence length is the number of samples. */
|
||||
const uint3 index = make_uint3(sample + offset * num_samples, 0, 0xffffffff);
|
||||
|
||||
/* Sample a random position inside the voxel. */
|
||||
const float3 rand_p = sobol_burley_sample_3D(
|
||||
index.x, PRNG_BAKE_VOLUME_DENSITY_EVAL, index.y, index.z);
|
||||
sd.P = ray.P + rand_p * voxel_size;
|
||||
if (need_transformation) {
|
||||
/* Convert to world spcace. */
|
||||
sd.P = transform_point(&tfm, sd.P);
|
||||
}
|
||||
sd.closure_transparent_extinction = zero_float3();
|
||||
sd.closure_emission_background = zero_float3();
|
||||
|
||||
/* Evaluate volume coefficients. */
|
||||
volume_shader_eval_entry<false,
|
||||
KERNEL_FEATURE_NODE_MASK_VOLUME & ~KERNEL_FEATURE_NODE_LIGHT_PATH>(
|
||||
kg, INTEGRATOR_STATE_NULL, &sd, entry, path_flag);
|
||||
|
||||
const float sigma = reduce_max(sd.closure_transparent_extinction);
|
||||
const float emission = reduce_max(sd.closure_emission_background);
|
||||
|
||||
extrema = merge(extrema, fmaxf(sigma, emission));
|
||||
}
|
||||
|
||||
/* Write output. */
|
||||
const float scale = object_volume_density(kg, sd.object);
|
||||
output[offset * 2 + 0] = extrema.min / scale;
|
||||
output[offset * 2 + 1] = extrema.max / scale;
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -85,4 +85,9 @@ KERNEL_DATA_ARRAY(TextureInfo, texture_info)
|
||||
/* ies lights */
|
||||
KERNEL_DATA_ARRAY(float, ies)
|
||||
|
||||
/* Volume. */
|
||||
KERNEL_DATA_ARRAY(KernelOctreeNode, volume_tree_nodes)
|
||||
KERNEL_DATA_ARRAY(KernelOctreeRoot, volume_tree_roots)
|
||||
KERNEL_DATA_ARRAY(int, volume_tree_root_ids)
|
||||
|
||||
#undef KERNEL_DATA_ARRAY
|
||||
|
||||
@@ -41,10 +41,10 @@ KERNEL_STRUCT_MEMBER(background, int, use_mis)
|
||||
KERNEL_STRUCT_MEMBER(background, int, lightgroup)
|
||||
/* Light Index. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, light_index)
|
||||
/* Object Index. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, object_index)
|
||||
/* Padding. */
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad1)
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad2)
|
||||
KERNEL_STRUCT_MEMBER(background, int, pad3)
|
||||
KERNEL_STRUCT_END(KernelBackground)
|
||||
|
||||
/* BVH: own BVH2 if no native device acceleration struct used. */
|
||||
|
||||
@@ -81,6 +81,10 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset);
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_volume_density)(const ThreadKernelGlobalsCPU *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset);
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Adaptive sampling.
|
||||
|
||||
@@ -134,6 +134,22 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
|
||||
#endif
|
||||
}
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_volume_density)(const ThreadKernelGlobalsCPU *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset)
|
||||
{
|
||||
#ifdef KERNEL_STUB
|
||||
STUB_ASSERT(KERNEL_ARCH, shader_eval_volume_density);
|
||||
(void)kg;
|
||||
(void)input;
|
||||
(void)output;
|
||||
(void)offset;
|
||||
#else
|
||||
kernel_volume_density_evaluate(kg, input, output, offset);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Adaptive sampling.
|
||||
*/
|
||||
|
||||
@@ -955,6 +955,22 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
/* Volume Density. */
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(shader_eval_volume_density,
|
||||
ccl_global KernelShaderEvalInput *input,
|
||||
ccl_global float *output,
|
||||
const int offset,
|
||||
const int work_size)
|
||||
{
|
||||
int i = ccl_gpu_global_id_x();
|
||||
if (i < work_size) {
|
||||
ccl_gpu_kernel_call(kernel_volume_density_evaluate(nullptr, input, output, offset + i));
|
||||
}
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Denoising.
|
||||
*/
|
||||
|
||||
@@ -594,6 +594,11 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
oneapi_kernel_shader_eval_curve_shadow_transparency);
|
||||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY: {
|
||||
oneapi_call(
|
||||
kg, cgh, global_size, local_size, args, oneapi_kernel_shader_eval_volume_density);
|
||||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_PREFIX_SUM: {
|
||||
oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_prefix_sum);
|
||||
break;
|
||||
|
||||
@@ -94,3 +94,11 @@ extern "C" __global__ void __raygen__kernel_optix_shader_eval_curve_shadow_trans
|
||||
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
|
||||
kernel_curve_shadow_transparency_evaluate(nullptr, input, output, global_index);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_shader_eval_volume_density()
|
||||
{
|
||||
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
|
||||
float *const output = kernel_params.render_buffer;
|
||||
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
|
||||
kernel_volume_density_evaluate(nullptr, input, output, global_index);
|
||||
}
|
||||
|
||||
@@ -108,7 +108,8 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
|
||||
* background volume is always assumed to be CG. */
|
||||
if (kernel_data.background.volume_shader != SHADER_NONE) {
|
||||
if (!(path_flag & PATH_RAY_SHADOW_CATCHER_PASS)) {
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, stack_index, object) = OBJECT_NONE;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(
|
||||
state, volume_stack, stack_index, object) = kernel_data.background.object_index;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(
|
||||
state, volume_stack, stack_index, shader) = kernel_data.background.volume_shader;
|
||||
stack_index++;
|
||||
|
||||
@@ -84,7 +84,8 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
|
||||
INTEGRATOR_STATE_WRITE(state, isect, type) = PRIMITIVE_NONE;
|
||||
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(
|
||||
state, volume_stack, 0, object) = kernel_data.background.object_index;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(
|
||||
state, volume_stack, 0, shader) = kernel_data.background.volume_shader;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, object) = OBJECT_NONE;
|
||||
|
||||
@@ -94,9 +94,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
|
||||
/* `object` is only needed for light tree with light linking, it is irrelevant for shadow. */
|
||||
shader_setup_from_volume(shadow_sd, &ray, OBJECT_NONE);
|
||||
|
||||
const float step_size = volume_stack_step_size<true>(kg, state);
|
||||
|
||||
volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size);
|
||||
volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput);
|
||||
}
|
||||
# endif
|
||||
|
||||
|
||||
@@ -57,6 +57,10 @@ struct VolumeIntegrateResult {
|
||||
* todo: this value could be tweaked or turned into a probability to avoid unnecessary
|
||||
* work in volumes and subsurface scattering. */
|
||||
# define VOLUME_THROUGHPUT_EPSILON 1e-6f
|
||||
/* TODO(weizhen): tweak this value. */
|
||||
# define OVERLAP_EXP 5e-4f
|
||||
/* Number of mantissa bits of floating-point numbers. */
|
||||
# define MANTISSA_BITS 23
|
||||
|
||||
/* Volume shader properties
|
||||
*
|
||||
@@ -133,60 +137,430 @@ struct VolumeStep {
|
||||
/* Perform shading at this offset within a step, to integrate over the entire step segment. */
|
||||
float shade_offset;
|
||||
|
||||
/* Maximal steps allowed between `ray->tmin` and `ray->tmax`. */
|
||||
int max_steps;
|
||||
/* Current step. */
|
||||
int step;
|
||||
|
||||
/* Current active segment. */
|
||||
Interval<float> t;
|
||||
};
|
||||
|
||||
template<const bool shadow>
|
||||
ccl_device_forceinline void volume_step_init(KernelGlobals kg,
|
||||
const ccl_private RNGState *rng_state,
|
||||
const float object_step_size,
|
||||
const float tmin,
|
||||
const float tmax,
|
||||
ccl_private VolumeStep *vstep)
|
||||
{
|
||||
vstep->step = 0;
|
||||
vstep->t.min = vstep->t.max = tmin;
|
||||
vstep->shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET);
|
||||
vstep->offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET);
|
||||
}
|
||||
|
||||
if (object_step_size == FLT_MAX) {
|
||||
/* Homogeneous volume. */
|
||||
vstep->size = tmax - tmin;
|
||||
vstep->shade_offset = 0.0f;
|
||||
vstep->offset = 1.0f;
|
||||
vstep->max_steps = 1;
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Hierarchical DDA for ray tracing the volume octree
|
||||
*
|
||||
* Following "Efficient Sparse Voxel Octrees" by Samuli Laine and Tero Karras,
|
||||
* and the implementation in https://dubiousconst282.github.io/2024/10/03/voxel-ray-tracing/
|
||||
*
|
||||
* The ray segment is transformed into octree space [1, 2), with `ray->D` pointing all negative
|
||||
* directions. At each ray tracing step, we intersect the backface of the current active leaf node
|
||||
* to find `t.max`, then store a point `current_P` which lies in the adjacent leaf node. The next
|
||||
* leaf node is found by checking the higher bits of `current_P`.
|
||||
*
|
||||
* The paper suggests to keep a stack of parent nodes, in practice such a stack (even when the size
|
||||
* is just 8) slows down performance on GPU. Instead we store the parent index in the leaf node
|
||||
* directly, since there is sufficient space due to alignment.
|
||||
*
|
||||
* \{ */
|
||||
|
||||
struct OctreeTracing {
|
||||
/* Current active leaf node. */
|
||||
ccl_global const KernelOctreeNode *node = nullptr;
|
||||
|
||||
/* Current active ray segment, typically spans from the front face to the back face of the
|
||||
* current leaf node. */
|
||||
Interval<float> t;
|
||||
|
||||
/* Ray origin in octree coordinate space. */
|
||||
packed_float3 ray_P;
|
||||
|
||||
/* Ray direction in octree coordinate space. */
|
||||
packed_float3 ray_D;
|
||||
|
||||
/* Current active position in octree coordinate space. */
|
||||
uint3 current_P;
|
||||
|
||||
/* Object and shader which the octree represents. */
|
||||
VolumeStack entry = {OBJECT_NONE, SHADER_NONE};
|
||||
|
||||
/* Scale of the current active leaf node, relative to the smallest possible size representable by
|
||||
* float. Initialize to the number of float mantissa bits. */
|
||||
uint8_t scale = MANTISSA_BITS;
|
||||
uint8_t next_scale;
|
||||
/* Mark the dimension (x,y,z) to negate the ray so that we find the correct octant. */
|
||||
uint8_t octant_mask;
|
||||
|
||||
/* Whether multiple volumes overlap in the ray segment. */
|
||||
bool no_overlap = false;
|
||||
|
||||
/* Maximum and minimum of the densities in the current segment. */
|
||||
Extrema<float> sigma = 0.0f;
|
||||
|
||||
ccl_device_inline_method OctreeTracing(const float tmin)
|
||||
{
|
||||
/* Initialize t.max to FLT_MAX so that any intersection with the node face is smaller. */
|
||||
t = {tmin, FLT_MAX};
|
||||
}
|
||||
else {
|
||||
/* Heterogeneous volume. */
|
||||
vstep->max_steps = kernel_data.integrator.volume_max_steps;
|
||||
const float t = tmax - tmin;
|
||||
float step_size = min(object_step_size, t);
|
||||
|
||||
if (t > vstep->max_steps * step_size) {
|
||||
/* Increase step size to cover the whole ray segment. */
|
||||
step_size = t / (float)vstep->max_steps;
|
||||
enum Dimension { DIM_X = 1U << 0U, DIM_Y = 1U << 1U, DIM_Z = 1U << 2U };
|
||||
|
||||
/* Given ray origin `P` and direction `D` in object space, convert them into octree space
|
||||
* [1.0, 2.0).
|
||||
* Returns false if ray is leaving the octree or octree has degenerate shape. */
|
||||
ccl_device_inline_method bool to_octree_space(ccl_private const float3 &P,
|
||||
ccl_private const float3 &D,
|
||||
const float3 scale,
|
||||
const float3 translation)
|
||||
{
|
||||
if (!isfinite_safe(scale)) {
|
||||
/* Octree with a degenerate shape. */
|
||||
return false;
|
||||
}
|
||||
|
||||
vstep->size = step_size;
|
||||
vstep->shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET);
|
||||
/* Starting point of octree tracing. */
|
||||
float3 local_P = (P + D * t.min) * scale + translation;
|
||||
ray_D = D * scale;
|
||||
|
||||
if (shadow) {
|
||||
/* For shadows we do not offset all segments, since the starting point is already a random
|
||||
* distance inside the volume. It also appears to create banding artifacts for unknown
|
||||
* reasons. */
|
||||
vstep->offset = 1.0f;
|
||||
}
|
||||
else {
|
||||
vstep->offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET);
|
||||
}
|
||||
/* Select octant mask to mirror the coordinate system so that ray direction is negative along
|
||||
* each axis, and adjust `local_P` accordingly. */
|
||||
const auto positive = ray_D > 0.0f;
|
||||
octant_mask = (!!positive.x * DIM_X) | (!!positive.y * DIM_Y) | (!!positive.z * DIM_Z);
|
||||
local_P = select(positive, 3.0f - local_P, local_P);
|
||||
|
||||
/* Clamp to the largest floating-point number smaller than 2.0f, for numerical stability. */
|
||||
local_P = min(local_P, make_float3(1.9999999f));
|
||||
current_P = float3_as_uint3(local_P);
|
||||
|
||||
ray_D = -fabs(ray_D);
|
||||
|
||||
/* Ray origin. */
|
||||
ray_P = local_P - ray_D * t.min;
|
||||
|
||||
/* Returns false if point lies outside of the octree and the ray is leaving the octree. */
|
||||
return all(local_P > 1.0f);
|
||||
}
|
||||
|
||||
/* Find the bounding box min of the node that `current_P` lies in within the current scale. */
|
||||
ccl_device_inline_method float3 floor_pos() const
|
||||
{
|
||||
/* Erase bits lower than scale. */
|
||||
const uint mask = ~0u << scale;
|
||||
return make_float3(__uint_as_float(current_P.x & mask),
|
||||
__uint_as_float(current_P.y & mask),
|
||||
__uint_as_float(current_P.z & mask));
|
||||
}
|
||||
|
||||
/* Find arbitrary position inside the next node.
|
||||
* We use the end of the current segment offsetted by half of the minimal node size in the normal
|
||||
* direction of the last face intersection. */
|
||||
ccl_device_inline_method void find_next_pos(const float3 bbox_min,
|
||||
const float3 t,
|
||||
const float tmax)
|
||||
{
|
||||
constexpr float half_size = 1.0f / (2 << VOLUME_OCTREE_MAX_DEPTH);
|
||||
const uint3 next_P = float3_as_uint3(
|
||||
select(t == tmax, bbox_min - half_size, ray_D * tmax + ray_P));
|
||||
|
||||
/* Find the nearest common ancestor of two positions by checking the shared higher bits. */
|
||||
const uint diff = (current_P.x ^ next_P.x) | (current_P.y ^ next_P.y) |
|
||||
(current_P.z ^ next_P.z);
|
||||
|
||||
current_P = next_P;
|
||||
next_scale = 32u - count_leading_zeros(diff);
|
||||
}
|
||||
|
||||
/* See `ray_aabb_intersect()`. We only need to intersect the 3 back sides because the ray
|
||||
* direction is all negative. */
|
||||
ccl_device_inline_method float ray_voxel_intersect(const float ray_tmax)
|
||||
{
|
||||
const float3 bbox_min = floor_pos();
|
||||
|
||||
/* Distances to the three surfaces. */
|
||||
float3 intersect_t = (bbox_min - ray_P) / ray_D;
|
||||
|
||||
/* Select the smallest element that is larger than `t.min`, to avoid self intersection. */
|
||||
intersect_t = select(intersect_t > t.min, intersect_t, make_float3(FLT_MAX));
|
||||
|
||||
/* The first intersection is given by the smallest t. */
|
||||
const float tmax = reduce_min(intersect_t);
|
||||
|
||||
find_next_pos(bbox_min, intersect_t, tmax);
|
||||
|
||||
return fminf(tmax, ray_tmax);
|
||||
}
|
||||
|
||||
/* Returns the octant of `current_P` in the node at given scale. */
|
||||
ccl_device_inline_method int get_octant() const
|
||||
{
|
||||
const uint8_t x = (current_P.x >> scale) & 1u;
|
||||
const uint8_t y = ((current_P.y >> scale) & 1u) << 1u;
|
||||
const uint8_t z = ((current_P.z >> scale) & 1u) << 2u;
|
||||
return (x | y | z) ^ octant_mask;
|
||||
}
|
||||
};
|
||||
|
||||
/* Check if an octree node is leaf node. */
|
||||
ccl_device_inline bool volume_node_is_leaf(const ccl_global KernelOctreeNode *knode)
|
||||
{
|
||||
return knode->first_child == -1;
|
||||
}
|
||||
|
||||
/* Find the leaf node of the current position, and replace `octree.node` with that node. */
|
||||
ccl_device void volume_voxel_get(KernelGlobals kg, ccl_private OctreeTracing &octree)
|
||||
{
|
||||
while (!volume_node_is_leaf(octree.node)) {
|
||||
octree.scale -= 1;
|
||||
const int child_index = octree.node->first_child + octree.get_octant();
|
||||
octree.node = &kernel_data_fetch(volume_tree_nodes, child_index);
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline bool volume_integrate_advance(const int step,
|
||||
const ccl_private Ray *ccl_restrict ray,
|
||||
ccl_private float3 *shade_P,
|
||||
ccl_private VolumeStep &vstep)
|
||||
/* If there exists a Light Path Node, it could affect the density evaluation at runtime.
|
||||
* Randomly sample a few points on the ray to estimate the extrema. */
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device_noinline Extrema<float> volume_estimate_extrema(KernelGlobals kg,
|
||||
const ccl_private Ray *ccl_restrict ray,
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
const IntegratorGenericState state,
|
||||
const ccl_private RNGState *rng_state,
|
||||
const uint32_t path_flag,
|
||||
const VolumeStack entry)
|
||||
{
|
||||
const bool homogeneous = volume_is_homogeneous(kg, entry);
|
||||
const int samples = homogeneous ? 1 : 4;
|
||||
const float shade_offset = homogeneous ?
|
||||
0.5f :
|
||||
path_state_rng_2D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET).y;
|
||||
const float step_size = (ray->tmax - ray->tmin) / float(samples);
|
||||
|
||||
/* Do not allocate closures. */
|
||||
sd->num_closure_left = 0;
|
||||
|
||||
Extrema<float> extrema = {FLT_MAX, -FLT_MAX};
|
||||
for (int i = 0; i < samples; i++) {
|
||||
const float shade_t = min(ray->tmax, ray->tmin + (shade_offset + i) * step_size);
|
||||
sd->P = ray->P + ray->D * shade_t;
|
||||
|
||||
sd->closure_transparent_extinction = zero_float3();
|
||||
sd->closure_emission_background = zero_float3();
|
||||
|
||||
volume_shader_eval_entry<shadow, KERNEL_FEATURE_NODE_MASK_VOLUME>(
|
||||
kg, state, sd, entry, path_flag);
|
||||
|
||||
const float sigma = reduce_max(sd->closure_transparent_extinction);
|
||||
const float emission = reduce_max(sd->closure_emission_background);
|
||||
|
||||
extrema = merge(extrema, fmaxf(sigma, emission));
|
||||
}
|
||||
|
||||
if (!homogeneous) {
|
||||
/* Slightly increase the majorant in case the estimation is not accurate. */
|
||||
extrema.max = fmaxf(0.5f, extrema.max * 1.5f);
|
||||
}
|
||||
|
||||
return extrema;
|
||||
}
|
||||
|
||||
/* Given an octree node, compute it's extrema.
|
||||
* In most common cases, the extrema are already stored in the node, but if the shader contains
|
||||
* a light path node, we need to evaluate the densities on the fly. */
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device_inline Extrema<float> volume_object_get_extrema(KernelGlobals kg,
|
||||
const ccl_private Ray *ccl_restrict ray,
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
const IntegratorGenericState state,
|
||||
const ccl_private OctreeTracing &octree,
|
||||
const ccl_private RNGState *rng_state,
|
||||
const uint32_t path_flag)
|
||||
{
|
||||
const int shader_flag = kernel_data_fetch(shaders, (octree.entry.shader & SHADER_MASK)).flags;
|
||||
if ((path_flag & PATH_RAY_CAMERA) || !(shader_flag & SD_HAS_LIGHT_PATH_NODE)) {
|
||||
/* Use the baked volume density extrema. */
|
||||
return octree.node->sigma * object_volume_density(kg, octree.entry.object);
|
||||
}
|
||||
|
||||
return volume_estimate_extrema<shadow>(kg, ray, sd, state, rng_state, path_flag, octree.entry);
|
||||
}
|
||||
|
||||
/* Find the octree root node in the kernel array that corresponds to the volume stack entry. */
|
||||
ccl_device_inline const ccl_global KernelOctreeRoot *volume_find_octree_root(
|
||||
KernelGlobals kg, const VolumeStack entry)
|
||||
{
|
||||
int root = kernel_data_fetch(volume_tree_root_ids, entry.object);
|
||||
const ccl_global KernelOctreeRoot *kroot = &kernel_data_fetch(volume_tree_roots, root);
|
||||
while ((entry.shader & SHADER_MASK) != kroot->shader) {
|
||||
/* If one object has multiple shaders, we store the index of the last shader, and search
|
||||
* backwards for the octree with the corresponding shader. */
|
||||
kroot = &kernel_data_fetch(volume_tree_roots, --root);
|
||||
}
|
||||
return kroot;
|
||||
}
|
||||
|
||||
/* Find the current active ray segment.
|
||||
* We might have multiple overlapping octrees, so find the smallest `tmax` of all and store the
|
||||
* information of that octree in `OctreeTracing`.
|
||||
* Meanwhile, accumulate the density of all the leaf nodes that overlap with the active segment. */
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device bool volume_octree_setup(KernelGlobals kg,
|
||||
const ccl_private Ray *ccl_restrict ray,
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
const IntegratorGenericState state,
|
||||
const ccl_private RNGState *rng_state,
|
||||
const uint32_t path_flag,
|
||||
ccl_private OctreeTracing &global,
|
||||
ccl_private VolumeStep &vstep)
|
||||
{
|
||||
if (global.no_overlap) {
|
||||
/* If the current active octree is already set up. */
|
||||
return !global.t.is_empty();
|
||||
}
|
||||
|
||||
const VolumeStack skip = global.entry;
|
||||
|
||||
int i = 0;
|
||||
for (;; i++) {
|
||||
/* Loop through all the object in the volume stack and find their octrees. */
|
||||
const VolumeStack entry = volume_stack_read<shadow>(state, i);
|
||||
|
||||
if (entry.shader == SHADER_NONE) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (entry.object == skip.object && entry.shader == skip.shader) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const ccl_global KernelOctreeRoot *kroot = volume_find_octree_root(kg, entry);
|
||||
|
||||
OctreeTracing local(global.t.min);
|
||||
local.node = &kernel_data_fetch(volume_tree_nodes, kroot->id);
|
||||
local.entry = entry;
|
||||
|
||||
/* Convert to object space. */
|
||||
float3 local_P = ray->P, local_D = ray->D;
|
||||
if (!(kernel_data_fetch(object_flag, entry.object) & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform itfm = object_fetch_transform(kg, entry.object, OBJECT_INVERSE_TRANSFORM);
|
||||
local_P = transform_point(&itfm, ray->P);
|
||||
local_D = transform_direction(&itfm, ray->D);
|
||||
}
|
||||
|
||||
/* Convert to octree space. */
|
||||
if (local.to_octree_space(local_P, local_D, kroot->scale, kroot->translation)) {
|
||||
volume_voxel_get(kg, local);
|
||||
local.t.max = local.ray_voxel_intersect(ray->tmax);
|
||||
}
|
||||
else {
|
||||
/* Current ray segment lies outside of the octree, usually happens with implicit volume, i.e.
|
||||
* everything behind a surface is considered as volume. */
|
||||
local.t.max = ray->tmax;
|
||||
}
|
||||
|
||||
global.sigma += volume_object_get_extrema<shadow>(
|
||||
kg, ray, sd, state, local, rng_state, path_flag);
|
||||
if (local.t.max <= global.t.max) {
|
||||
/* Replace the current active octree with the one that has the smallest `tmax`. */
|
||||
local.sigma = global.sigma;
|
||||
global = local;
|
||||
}
|
||||
}
|
||||
|
||||
if (!global.node) {
|
||||
/* Stack empty. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (global.t.is_empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (i == 1) {
|
||||
global.no_overlap = true;
|
||||
}
|
||||
|
||||
/* Step size should ideally be as small as the active voxel span, but not so small that we
|
||||
* never exit the volume. */
|
||||
const int steps_left = kernel_data.integrator.volume_max_steps - vstep.step;
|
||||
vstep.size = fminf(global.t.length(), 1.0f / global.sigma.range());
|
||||
vstep.size = fmaxf(vstep.size, (ray->tmax - vstep.t.min) / float(steps_left));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Advance to the next adjacent leaf node and update the active interval. */
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device_inline bool volume_octree_advance(KernelGlobals kg,
|
||||
const ccl_private Ray *ccl_restrict ray,
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
const IntegratorGenericState state,
|
||||
const ccl_private RNGState *rng_state,
|
||||
const uint32_t path_flag,
|
||||
ccl_private OctreeTracing &octree,
|
||||
ccl_private VolumeStep &vstep)
|
||||
{
|
||||
if (octree.t.max >= ray->tmax) {
|
||||
/* Reached the last segment. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vstep.step > kernel_data.integrator.volume_max_steps) {
|
||||
/* Exceeds maximal steps. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (octree.next_scale > MANTISSA_BITS) {
|
||||
if (fabsf(octree.t.max - ray->tmax) <= OVERLAP_EXP) {
|
||||
/* This could happen due to numerical issues, when the bounding box overlaps with a
|
||||
* primitive, but different intersections are registered for octree and ray intersection. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Outside of the root node, continue tracing using the extrema of the root node. */
|
||||
octree.t = {octree.t.max, ray->tmax};
|
||||
octree.node = &kernel_data_fetch(volume_tree_nodes,
|
||||
volume_find_octree_root(kg, octree.entry)->id);
|
||||
}
|
||||
else {
|
||||
kernel_assert(octree.next_scale > octree.scale);
|
||||
|
||||
/* Fetch the common ancestor of the current and the next leaf nodes. */
|
||||
for (; octree.scale < octree.next_scale; octree.scale++) {
|
||||
kernel_assert(octree.node->parent != -1);
|
||||
octree.node = &kernel_data_fetch(volume_tree_nodes, octree.node->parent);
|
||||
}
|
||||
|
||||
/* Find the current active leaf node. */
|
||||
volume_voxel_get(kg, octree);
|
||||
|
||||
/* Advance to the next segment. */
|
||||
octree.t.min = octree.t.max;
|
||||
octree.t.max = octree.ray_voxel_intersect(ray->tmax);
|
||||
}
|
||||
|
||||
octree.sigma = volume_object_get_extrema<shadow>(
|
||||
kg, ray, sd, state, octree, rng_state, path_flag);
|
||||
return volume_octree_setup<shadow>(kg, ray, sd, state, rng_state, path_flag, octree, vstep);
|
||||
}
|
||||
|
||||
/* Advance to the next interval. If the step size exceeds the current leaf node, find the next leaf
|
||||
* node. */
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device bool volume_integrate_advance(KernelGlobals kg,
|
||||
const ccl_private Ray *ccl_restrict ray,
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
const IntegratorGenericState state,
|
||||
const ccl_private RNGState *rng_state,
|
||||
const uint32_t path_flag,
|
||||
ccl_private OctreeTracing &octree,
|
||||
ccl_private VolumeStep &vstep)
|
||||
{
|
||||
if (vstep.t.max == ray->tmax) {
|
||||
/* Reached the last segment. */
|
||||
@@ -195,13 +569,33 @@ ccl_device_inline bool volume_integrate_advance(const int step,
|
||||
|
||||
/* Advance to new position. */
|
||||
vstep.t.min = vstep.t.max;
|
||||
vstep.t.max = min(ray->tmax, ray->tmin + (step + vstep.offset) * vstep.size);
|
||||
const float shade_t = mix(vstep.t.min, vstep.t.max, vstep.shade_offset);
|
||||
*shade_P = ray->P + ray->D * shade_t;
|
||||
bool success = true;
|
||||
if (!octree.node) {
|
||||
/* Initialize octree. */
|
||||
success = volume_octree_setup<shadow>(kg, ray, sd, state, rng_state, path_flag, octree, vstep);
|
||||
vstep.t.max = octree.t.min + vstep.offset * vstep.size;
|
||||
}
|
||||
else {
|
||||
float candidate_t_max = vstep.t.max + vstep.size;
|
||||
if (candidate_t_max >= octree.t.max) {
|
||||
/* Advance to next voxel. */
|
||||
volume_octree_advance<shadow>(kg, ray, sd, state, rng_state, path_flag, octree, vstep);
|
||||
candidate_t_max = fminf(candidate_t_max, vstep.t.max + vstep.size);
|
||||
}
|
||||
vstep.t.max = candidate_t_max;
|
||||
}
|
||||
|
||||
return step < vstep.max_steps;
|
||||
/* Clamp to prevent numerical issues. */
|
||||
vstep.t.max = clamp(vstep.t.max, vstep.t.min + OVERLAP_EXP, ray->tmax);
|
||||
|
||||
const float shade_t = mix(vstep.t.min, vstep.t.max, vstep.shade_offset);
|
||||
sd->P = ray->P + ray->D * shade_t;
|
||||
|
||||
return success && vstep.step++ < kernel_data.integrator.volume_max_steps;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* Volume Shadows
|
||||
*
|
||||
* These functions are used to attenuate shadow rays to lights. Both absorption
|
||||
@@ -213,8 +607,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
ccl_private Ray *ccl_restrict ray,
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
ccl_private Spectrum *ccl_restrict throughput,
|
||||
const float object_step_size)
|
||||
ccl_private Spectrum *ccl_restrict throughput)
|
||||
{
|
||||
/* Load random number state. */
|
||||
RNGState rng_state;
|
||||
@@ -228,11 +621,15 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
|
||||
|
||||
/* Prepare for stepping. */
|
||||
VolumeStep vstep;
|
||||
volume_step_init<true>(kg, &rng_state, object_step_size, ray->tmin, ray->tmax, &vstep);
|
||||
volume_step_init(kg, &rng_state, ray->tmin, &vstep);
|
||||
|
||||
OctreeTracing octree(ray->tmin);
|
||||
const uint32_t path_flag = PATH_RAY_SHADOW;
|
||||
|
||||
/* compute extinction at the start */
|
||||
Spectrum sum = zero_spectrum();
|
||||
for (int step = 0; volume_integrate_advance(step, ray, &sd->P, vstep); step++) {
|
||||
for (; volume_integrate_advance<true>(kg, ray, sd, state, &rng_state, path_flag, octree, vstep);)
|
||||
{
|
||||
/* compute attenuation over segment */
|
||||
Spectrum sigma_t = zero_spectrum();
|
||||
if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
|
||||
@@ -240,7 +637,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
|
||||
* because `exp(a)*exp(b) = exp(a+b)`, also do a quick #VOLUME_THROUGHPUT_EPSILON
|
||||
* check then. */
|
||||
sum += (-sigma_t * vstep.t.length());
|
||||
if ((step & 0x07) == 0) { /* TODO: Other interval? */
|
||||
if ((vstep.step & 0x07) == 0) { /* TODO: Other interval? */
|
||||
tp = *throughput * exp(sum);
|
||||
|
||||
/* stop if nearly all light is blocked */
|
||||
@@ -541,8 +938,8 @@ ccl_device_forceinline void volume_integrate_step_scattering(
|
||||
}
|
||||
|
||||
ccl_device_inline void volume_integrate_state_init(KernelGlobals kg,
|
||||
const ccl_private RNGState *rng_state,
|
||||
const VolumeSampleMethod direct_sample_method,
|
||||
const ccl_private RNGState *rng_state,
|
||||
ccl_private VolumeIntegrateState &vstate)
|
||||
{
|
||||
vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE);
|
||||
@@ -644,7 +1041,6 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
const ccl_private RNGState *ccl_restrict rng_state,
|
||||
ccl_global float *ccl_restrict render_buffer,
|
||||
const float object_step_size,
|
||||
ccl_private LightSample *ls,
|
||||
ccl_private VolumeIntegrateResult &result)
|
||||
{
|
||||
@@ -656,11 +1052,11 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
|
||||
/* Prepare for stepping. */
|
||||
VolumeStep vstep;
|
||||
volume_step_init<false>(kg, rng_state, object_step_size, ray->tmin, ray->tmax, &vstep);
|
||||
volume_step_init(kg, rng_state, ray->tmin, &vstep);
|
||||
|
||||
/* Initialize volume integration state. */
|
||||
VolumeIntegrateState vstate ccl_optional_struct_init;
|
||||
volume_integrate_state_init(kg, rng_state, direct_sample_method, vstate);
|
||||
volume_integrate_state_init(kg, direct_sample_method, rng_state, vstate);
|
||||
|
||||
/* Initialize volume integration result. */
|
||||
const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
@@ -685,7 +1081,10 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
||||
# endif
|
||||
Spectrum accum_emission = zero_spectrum();
|
||||
|
||||
for (int step = 0; volume_integrate_advance(step, ray, &sd->P, vstep); step++) {
|
||||
OctreeTracing octree(ray->tmin);
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
for (; volume_integrate_advance<false>(kg, ray, sd, state, rng_state, path_flag, octree, vstep);)
|
||||
{
|
||||
/* compute segment */
|
||||
VolumeShaderCoefficients coeff ccl_optional_struct_init;
|
||||
if (volume_shader_sample(kg, state, sd, &coeff)) {
|
||||
@@ -1010,6 +1409,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
||||
ccl_private Ray *ccl_restrict ray,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
if (integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
return VOLUME_PATH_ATTENUATED;
|
||||
}
|
||||
|
||||
ShaderData sd;
|
||||
/* FIXME: `object` is used for light linking. We read the bottom of the stack for simplicity, but
|
||||
* this does not work for overlapping volumes. */
|
||||
@@ -1025,13 +1428,9 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
||||
|
||||
LightSample ls ccl_optional_struct_init;
|
||||
|
||||
/* Step through volume. */
|
||||
const float step_size = volume_stack_step_size<false>(kg, state);
|
||||
|
||||
/* TODO: expensive to zero closures? */
|
||||
VolumeIntegrateResult result = {};
|
||||
volume_integrate_heterogeneous(
|
||||
kg, state, ray, &sd, &rng_state, render_buffer, step_size, &ls, result);
|
||||
volume_integrate_heterogeneous(kg, state, ray, &sd, &rng_state, render_buffer, &ls, result);
|
||||
|
||||
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
|
||||
/* The current path throughput which is used later to calculate per-segment throughput. */
|
||||
|
||||
@@ -145,7 +145,7 @@ ccl_device_inline bool volume_is_homogeneous(KernelGlobals kg,
|
||||
|
||||
if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) {
|
||||
const int object = entry.object;
|
||||
if (object == OBJECT_NONE) {
|
||||
if (object == kernel_data.background.object_index) {
|
||||
/* Volume attributes for world is not supported. */
|
||||
return true;
|
||||
}
|
||||
@@ -160,27 +160,6 @@ ccl_device_inline bool volume_is_homogeneous(KernelGlobals kg,
|
||||
return true;
|
||||
}
|
||||
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device float volume_stack_step_size(KernelGlobals kg, const IntegratorGenericState state)
|
||||
{
|
||||
float step_size = FLT_MAX;
|
||||
|
||||
for (int i = 0;; i++) {
|
||||
const VolumeStack entry = volume_stack_read<shadow>(state, i);
|
||||
if (entry.shader == SHADER_NONE) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!volume_is_homogeneous(kg, entry)) {
|
||||
float object_step_size = object_volume_step_size(kg, entry.object);
|
||||
object_step_size *= kernel_data.integrator.volume_step_rate;
|
||||
step_size = fminf(object_step_size, step_size);
|
||||
}
|
||||
}
|
||||
|
||||
return step_size;
|
||||
}
|
||||
|
||||
enum VolumeSampleMethod {
|
||||
VOLUME_SAMPLE_NONE = 0,
|
||||
VOLUME_SAMPLE_DISTANCE = (1 << 0),
|
||||
|
||||
@@ -309,6 +309,9 @@ enum PathTraceDimension {
|
||||
PRNG_SUBSURFACE_DISK = 0,
|
||||
PRNG_SUBSURFACE_DISK_RESAMPLE = 1,
|
||||
|
||||
/* Volume density baking. */
|
||||
PRNG_BAKE_VOLUME_DENSITY_EVAL = 0,
|
||||
|
||||
/* High enough number so we don't need to change it when adding new dimensions,
|
||||
* low enough so there is no uint16_t overflow with many bounces. */
|
||||
PRNG_BOUNCE_NUM = 16,
|
||||
@@ -951,6 +954,8 @@ struct AttributeMap {
|
||||
#endif
|
||||
|
||||
#define MAX_VOLUME_CLOSURE 8 // NOLINT
|
||||
/* Set the maximal resolution to be 128 (2^7) to limit traversing overhead. */
|
||||
#define VOLUME_OCTREE_MAX_DEPTH 7
|
||||
|
||||
/* This struct is the base class for all closures. The common members are
|
||||
* duplicated in all derived classes since we don't have C++ in the kernel
|
||||
@@ -1030,6 +1035,8 @@ enum ShaderDataFlag {
|
||||
|
||||
/* Shader flags. */
|
||||
|
||||
/* If Light Path Node is present in the shader graph. */
|
||||
SD_HAS_LIGHT_PATH_NODE = (1 << 13),
|
||||
/* Apply a correction term to smooth illumination on grazing angles when using bump mapping. */
|
||||
SD_USE_BUMP_MAP_CORRECTION = (1 << 15),
|
||||
/* Use front side for direct light sampling. */
|
||||
@@ -1679,6 +1686,27 @@ struct KernelLightTreeNode {
|
||||
};
|
||||
static_assert_align(KernelLightTreeNode, 16);
|
||||
|
||||
struct KernelOctreeRoot {
|
||||
packed_float3 scale;
|
||||
int id;
|
||||
packed_float3 translation;
|
||||
int shader;
|
||||
};
|
||||
|
||||
struct KernelOctreeNode {
|
||||
/* Index of the parent node in device vector `volume_tree_nodes`. */
|
||||
int parent;
|
||||
|
||||
/* Index of the first child node in device vector `volume_tree_nodes`. All children of the same
|
||||
* node are stored in contiguous memory. */
|
||||
int first_child;
|
||||
|
||||
/* Minimal and maximal volume density inside the node. */
|
||||
/* TODO(weizhen): we can make sigma Spectral for better accuracy. Since only root and leaf nodes
|
||||
* need sigma, we can introduce `KernelOctreeInnerNode` to reduce the size of the struct. */
|
||||
Extrema<float> sigma;
|
||||
};
|
||||
|
||||
struct KernelLightTreeEmitter {
|
||||
/* Bounding cone. */
|
||||
float theta_o;
|
||||
@@ -1833,6 +1861,7 @@ enum DeviceKernel : int {
|
||||
DEVICE_KERNEL_SHADER_EVAL_DISPLACE,
|
||||
DEVICE_KERNEL_SHADER_EVAL_BACKGROUND,
|
||||
DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
|
||||
DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY,
|
||||
|
||||
#define DECLARE_FILM_CONVERT_KERNEL(variant) \
|
||||
DEVICE_KERNEL_FILM_CONVERT_##variant, DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA
|
||||
|
||||
@@ -54,7 +54,10 @@ DeviceScene::DeviceScene(Device *device)
|
||||
shaders(device, "shaders", MEM_GLOBAL),
|
||||
lookup_table(device, "lookup_table", MEM_GLOBAL),
|
||||
sample_pattern_lut(device, "sample_pattern_lut", MEM_GLOBAL),
|
||||
ies_lights(device, "ies", MEM_GLOBAL)
|
||||
ies_lights(device, "ies", MEM_GLOBAL),
|
||||
volume_tree_nodes(device, "volume_tree_nodes", MEM_GLOBAL),
|
||||
volume_tree_roots(device, "volume_tree_roots", MEM_GLOBAL),
|
||||
volume_tree_root_ids(device, "volume_tree_root_ids", MEM_GLOBAL)
|
||||
{
|
||||
memset((void *)&data, 0, sizeof(data));
|
||||
}
|
||||
|
||||
@@ -86,6 +86,11 @@ class DeviceScene {
|
||||
/* IES lights */
|
||||
device_vector<float> ies_lights;
|
||||
|
||||
/* Volume. */
|
||||
device_vector<KernelOctreeNode> volume_tree_nodes;
|
||||
device_vector<KernelOctreeRoot> volume_tree_roots;
|
||||
device_vector<int> volume_tree_root_ids;
|
||||
|
||||
KernelData data;
|
||||
|
||||
DeviceScene(Device *device);
|
||||
|
||||
@@ -347,6 +347,7 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro
|
||||
bool volume_images_updated = false;
|
||||
|
||||
for (Geometry *geom : scene->geometry) {
|
||||
const bool prev_has_volume = geom->has_volume;
|
||||
geom->has_volume = false;
|
||||
|
||||
update_attribute_realloc_flags(device_update_flags, geom->attributes);
|
||||
@@ -429,6 +430,18 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro
|
||||
device_update_flags |= DEVICE_MESH_DATA_NEEDS_REALLOC;
|
||||
}
|
||||
|
||||
if (geom->has_volume) {
|
||||
if (geom->is_modified()) {
|
||||
scene->volume_manager->tag_update(geom);
|
||||
}
|
||||
if (!prev_has_volume) {
|
||||
scene->volume_manager->tag_update();
|
||||
}
|
||||
}
|
||||
else if (prev_has_volume) {
|
||||
scene->volume_manager->tag_update(geom);
|
||||
}
|
||||
|
||||
if (geom->is_hair()) {
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
|
||||
|
||||
@@ -227,6 +227,9 @@ void Object::tag_update(Scene *scene)
|
||||
if (geometry) {
|
||||
if (tfm_is_modified() || motion_is_modified()) {
|
||||
flag |= ObjectManager::TRANSFORM_MODIFIED;
|
||||
if (geometry->has_volume) {
|
||||
scene->volume_manager->tag_update(this, flag);
|
||||
}
|
||||
}
|
||||
|
||||
if (visibility_is_modified()) {
|
||||
@@ -770,6 +773,10 @@ void ObjectManager::device_update(Device *device,
|
||||
dscene->object_motion.tag_realloc();
|
||||
dscene->object_flag.tag_realloc();
|
||||
dscene->object_volume_step.tag_realloc();
|
||||
|
||||
/* If objects are added to the scene or deleted, the object indices might change, so we need to
|
||||
* update the root indices of the volume octrees. */
|
||||
scene->volume_manager->tag_update_indices();
|
||||
}
|
||||
|
||||
if (update_flags & HOLDOUT_MODIFIED) {
|
||||
@@ -809,6 +816,16 @@ void ObjectManager::device_update(Device *device,
|
||||
dscene->object_flag.tag_modified();
|
||||
dscene->object_volume_step.tag_modified();
|
||||
}
|
||||
|
||||
/* Update world object index. */
|
||||
if (!object->get_geometry()->is_light()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Light *light = static_cast<const Light *>(object->get_geometry());
|
||||
if (light->get_light_type() == LIGHT_BACKGROUND) {
|
||||
dscene->data.background.object_index = object->index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -67,6 +67,7 @@ Scene ::Scene(const SceneParams ¶ms_, Device *device)
|
||||
particle_system_manager = make_unique<ParticleSystemManager>();
|
||||
bake_manager = make_unique<BakeManager>();
|
||||
procedural_manager = make_unique<ProceduralManager>();
|
||||
volume_manager = make_unique<VolumeManager>();
|
||||
|
||||
/* Create nodes after managers, since create_node() can tag the managers. */
|
||||
camera = create_node<Camera>();
|
||||
@@ -138,10 +139,9 @@ void Scene::free_memory(bool final)
|
||||
shader_manager->device_free(device, &dscene, this);
|
||||
osl_manager->device_free(device, &dscene, this);
|
||||
light_manager->device_free(device, &dscene);
|
||||
|
||||
particle_system_manager->device_free(device, &dscene);
|
||||
|
||||
bake_manager->device_free(device, &dscene);
|
||||
volume_manager->device_free(&dscene);
|
||||
|
||||
if (final) {
|
||||
image_manager->device_free(device);
|
||||
@@ -165,6 +165,7 @@ void Scene::free_memory(bool final)
|
||||
bake_manager.reset();
|
||||
update_stats.reset();
|
||||
procedural_manager.reset();
|
||||
volume_manager.reset();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -313,6 +314,14 @@ void Scene::device_update(Device *device_, Progress &progress)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Evaluate volume shader to build volume octrees. */
|
||||
progress.set_status("Updating Volume");
|
||||
volume_manager->device_update(device, &dscene, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error()) {
|
||||
return;
|
||||
}
|
||||
|
||||
progress.set_status("Updating Camera Volume");
|
||||
camera->device_update_volume(device, &dscene, this);
|
||||
|
||||
@@ -965,6 +974,9 @@ template<> void Scene::delete_node(Geometry *node)
|
||||
}
|
||||
else {
|
||||
flag = GeometryManager::MESH_REMOVED;
|
||||
if (node->has_volume) {
|
||||
volume_manager->tag_update(node);
|
||||
}
|
||||
}
|
||||
|
||||
geometry.erase_by_swap(node);
|
||||
@@ -974,8 +986,14 @@ template<> void Scene::delete_node(Geometry *node)
|
||||
template<> void Scene::delete_node(Object *node)
|
||||
{
|
||||
assert(node->get_owner() == this);
|
||||
|
||||
uint flag = ObjectManager::OBJECT_REMOVED;
|
||||
if (node->get_geometry()->has_volume) {
|
||||
volume_manager->tag_update(node, flag);
|
||||
}
|
||||
|
||||
objects.erase_by_swap(node);
|
||||
object_manager->tag_update(this, ObjectManager::OBJECT_REMOVED);
|
||||
object_manager->tag_update(this, flag);
|
||||
}
|
||||
|
||||
template<> void Scene::delete_node(ParticleSystem *node)
|
||||
|
||||
@@ -50,6 +50,7 @@ class BakeData;
|
||||
class RenderStats;
|
||||
class SceneUpdateStats;
|
||||
class Volume;
|
||||
class VolumeManager;
|
||||
|
||||
/* Scene Parameters */
|
||||
|
||||
@@ -151,6 +152,7 @@ class Scene : public NodeOwner {
|
||||
unique_ptr<ParticleSystemManager> particle_system_manager;
|
||||
unique_ptr<BakeManager> bake_manager;
|
||||
unique_ptr<ProceduralManager> procedural_manager;
|
||||
unique_ptr<VolumeManager> volume_manager;
|
||||
|
||||
/* default shaders */
|
||||
Shader *default_surface;
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "scene/shader_nodes.h"
|
||||
#include "scene/svm.h"
|
||||
#include "scene/tables.h"
|
||||
#include "scene/volume.h"
|
||||
|
||||
#include "util/log.h"
|
||||
#include "util/murmurhash.h"
|
||||
@@ -104,6 +105,7 @@ Shader::Shader() : Node(get_node_type())
|
||||
has_volume_attribute_dependency = false;
|
||||
has_volume_connected = false;
|
||||
prev_volume_step_rate = 0.0f;
|
||||
has_light_path_node = false;
|
||||
|
||||
emission_estimate = zero_float3();
|
||||
emission_sampling = EMISSION_SAMPLING_NONE;
|
||||
@@ -397,6 +399,10 @@ void Shader::tag_update(Scene *scene)
|
||||
scene->object_manager->need_flags_update = true;
|
||||
prev_volume_step_rate = volume_step_rate;
|
||||
}
|
||||
|
||||
if (has_volume || prev_has_volume) {
|
||||
scene->volume_manager->tag_update(this);
|
||||
}
|
||||
}
|
||||
|
||||
void Shader::tag_used(Scene *scene)
|
||||
@@ -527,6 +533,15 @@ void ShaderManager::device_update_pre(Device * /*device*/,
|
||||
shader->has_volume_spatial_varying = false;
|
||||
shader->has_volume_attribute_dependency = false;
|
||||
shader->has_displacement = output->input("Displacement")->link != nullptr;
|
||||
|
||||
shader->has_light_path_node = false;
|
||||
for (ShaderNode *node : shader->graph->nodes) {
|
||||
if (node->special_type == SHADER_SPECIAL_TYPE_LIGHT_PATH) {
|
||||
/* TODO: check if the light path node is linked to the volume output. */
|
||||
shader->has_light_path_node = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (shader->reference_count()) {
|
||||
@@ -633,6 +648,10 @@ void ShaderManager::device_update_common(Device * /*device*/,
|
||||
flag |= SD_HAS_CONSTANT_EMISSION;
|
||||
}
|
||||
|
||||
if (shader->has_light_path_node) {
|
||||
flag |= SD_HAS_LIGHT_PATH_NODE;
|
||||
}
|
||||
|
||||
const uint32_t cryptomatte_id = util_murmur_hash3(
|
||||
shader->name.c_str(), shader->name.length(), 0);
|
||||
|
||||
|
||||
@@ -117,6 +117,7 @@ class Shader : public Node {
|
||||
bool has_surface_spatial_varying;
|
||||
bool has_volume_spatial_varying;
|
||||
bool has_volume_attribute_dependency;
|
||||
bool has_light_path_node;
|
||||
|
||||
float3 emission_estimate;
|
||||
EmissionSampling emission_sampling;
|
||||
|
||||
@@ -56,6 +56,7 @@ enum ShaderNodeSpecialType {
|
||||
SHADER_SPECIAL_TYPE_OUTPUT,
|
||||
SHADER_SPECIAL_TYPE_BUMP,
|
||||
SHADER_SPECIAL_TYPE_OUTPUT_AOV,
|
||||
SHADER_SPECIAL_TYPE_LIGHT_PATH,
|
||||
};
|
||||
|
||||
/* Input
|
||||
|
||||
@@ -4199,7 +4199,10 @@ NODE_DEFINE(LightPathNode)
|
||||
return type;
|
||||
}
|
||||
|
||||
LightPathNode::LightPathNode() : ShaderNode(get_node_type()) {}
|
||||
LightPathNode::LightPathNode() : ShaderNode(get_node_type())
|
||||
{
|
||||
special_type = SHADER_SPECIAL_TYPE_LIGHT_PATH;
|
||||
}
|
||||
|
||||
void LightPathNode::compile(SVMCompiler &compiler)
|
||||
{
|
||||
|
||||
@@ -4,20 +4,29 @@
|
||||
|
||||
#include "scene/volume.h"
|
||||
#include "scene/attribute.h"
|
||||
#include "scene/background.h"
|
||||
#include "scene/image_vdb.h"
|
||||
#include "scene/light.h"
|
||||
#include "scene/object.h"
|
||||
#include "scene/scene.h"
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
# include <openvdb/tools/GridTransformer.h>
|
||||
# include <openvdb/tools/LevelSetUtil.h>
|
||||
# include <openvdb/tools/Morphology.h>
|
||||
#endif
|
||||
|
||||
#include "util/hash.h"
|
||||
#include "util/log.h"
|
||||
#include "util/nanovdb.h"
|
||||
#include "util/path.h"
|
||||
#include "util/progress.h"
|
||||
#include "util/types.h"
|
||||
|
||||
#include "bvh/octree.h"
|
||||
|
||||
#include <OpenImageIO/filesystem.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
NODE_DEFINE(Volume)
|
||||
@@ -547,4 +556,454 @@ void Volume::merge_grids(const Scene *scene)
|
||||
#endif
|
||||
}
|
||||
|
||||
VolumeManager::VolumeManager()
|
||||
{
|
||||
need_rebuild_ = true;
|
||||
}
|
||||
|
||||
void VolumeManager::tag_update()
|
||||
{
|
||||
need_rebuild_ = true;
|
||||
}
|
||||
|
||||
/* Remove changed object from the list of octrees and tag for rebuild. */
|
||||
void VolumeManager::tag_update(const Object *object, uint32_t flag)
|
||||
{
|
||||
if (flag & ObjectManager::VISIBILITY_MODIFIED) {
|
||||
tag_update();
|
||||
}
|
||||
|
||||
for (const Node *node : object->get_geometry()->get_used_shaders()) {
|
||||
const Shader *shader = static_cast<const Shader *>(node);
|
||||
if (shader->has_volume_spatial_varying || (flag & ObjectManager::OBJECT_REMOVED)) {
|
||||
/* TODO(weizhen): no need to update if the spatial variation is not in world space. */
|
||||
tag_update();
|
||||
object_octrees_.erase({object, shader});
|
||||
}
|
||||
}
|
||||
|
||||
if (!need_rebuild_ && (flag & ObjectManager::TRANSFORM_MODIFIED)) {
|
||||
/* Octree is not tagged for rebuild, but the transformation changed, so a redraw is needed. */
|
||||
update_visualization_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove object with changed shader from the list of octrees and tag for rebuild. */
|
||||
void VolumeManager::tag_update(const Shader *shader)
|
||||
{
|
||||
tag_update();
|
||||
for (auto it = object_octrees_.begin(); it != object_octrees_.end();) {
|
||||
if (it->first.second == shader) {
|
||||
it = object_octrees_.erase(it);
|
||||
}
|
||||
else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove object with changed geometry from the list of octrees and tag for rebuild. */
|
||||
void VolumeManager::tag_update(const Geometry *geometry)
|
||||
{
|
||||
tag_update();
|
||||
/* Tag Octree for update. */
|
||||
for (auto it = object_octrees_.begin(); it != object_octrees_.end();) {
|
||||
const Object *object = it->first.first;
|
||||
if (object->get_geometry() == geometry) {
|
||||
it = object_octrees_.erase(it);
|
||||
}
|
||||
else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
/* Tag VDB map for update. */
|
||||
for (auto it = vdb_map_.begin(); it != vdb_map_.end();) {
|
||||
if (it->first.first == geometry) {
|
||||
it = vdb_map_.erase(it);
|
||||
}
|
||||
else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void VolumeManager::tag_update_indices()
|
||||
{
|
||||
update_root_indices_ = true;
|
||||
}
|
||||
|
||||
bool VolumeManager::is_homogeneous_volume(const Object *object, const Shader *shader)
|
||||
{
|
||||
if (!shader->has_volume || shader->has_volume_spatial_varying) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (shader->has_volume_attribute_dependency) {
|
||||
for (Attribute &attr : object->get_geometry()->attributes.attributes) {
|
||||
/* If both the shader and the object needs volume attributes, the volume is heterogeneous. */
|
||||
if (attr.element == ATTR_ELEMENT_VOXEL) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
openvdb::BoolGrid::ConstPtr VolumeManager::mesh_to_sdf_grid(const Mesh *mesh,
|
||||
const Shader *shader,
|
||||
const float half_width)
|
||||
{
|
||||
const int num_verts = mesh->get_verts().size();
|
||||
std::vector<openvdb::Vec3f> points(num_verts);
|
||||
parallel_for(0, num_verts, [&](int i) {
|
||||
const float3 &vert = mesh->get_verts()[i];
|
||||
points[i] = openvdb::Vec3f(vert.x, vert.y, vert.z);
|
||||
});
|
||||
|
||||
const int max_num_triangles = mesh->num_triangles();
|
||||
std::vector<openvdb::Vec3I> triangles;
|
||||
triangles.reserve(max_num_triangles);
|
||||
for (int i = 0; i < max_num_triangles; i++) {
|
||||
/* Only push triangles with matching shader. */
|
||||
const int shader_index = mesh->get_shader()[i];
|
||||
if (static_cast<const Shader *>(mesh->get_used_shaders()[shader_index]) == shader) {
|
||||
triangles.emplace_back(mesh->get_triangles()[i * 3],
|
||||
mesh->get_triangles()[i * 3 + 1],
|
||||
mesh->get_triangles()[i * 3 + 2]);
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO(weizhen): Should consider object instead of mesh size. */
|
||||
const float3 mesh_size = mesh->bounds.size();
|
||||
const auto vdb_voxel_size = openvdb::Vec3d(mesh_size.x, mesh_size.y, mesh_size.z) /
|
||||
double(1 << VOLUME_OCTREE_MAX_DEPTH);
|
||||
|
||||
auto xform = openvdb::math::Transform::createLinearTransform(1.0);
|
||||
xform->postScale(vdb_voxel_size);
|
||||
|
||||
auto sdf_grid = openvdb::tools::meshToLevelSet<openvdb::FloatGrid>(
|
||||
*xform, points, triangles, half_width);
|
||||
|
||||
return openvdb::tools::sdfInteriorMask(*sdf_grid, 0.5 * vdb_voxel_size.length());
|
||||
}
|
||||
|
||||
openvdb::BoolGrid::ConstPtr VolumeManager::get_vdb(const Geometry *geom,
|
||||
const Shader *shader) const
|
||||
{
|
||||
if (geom->is_mesh()) {
|
||||
if (auto it = vdb_map_.find({geom, shader}); it != vdb_map_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
/* Create empty grid. */
|
||||
return openvdb::BoolGrid::create();
|
||||
}
|
||||
#endif
|
||||
|
||||
void VolumeManager::initialize_octree(const Scene *scene, Progress &progress)
|
||||
{
|
||||
/* Instanced objects without spatial variation can share one octree. */
|
||||
std::map<std::pair<const Geometry *, const Shader *>, std::shared_ptr<Octree>> geometry_octrees;
|
||||
for (const auto &it : object_octrees_) {
|
||||
const Shader *shader = it.first.second;
|
||||
if (!shader->has_volume_spatial_varying) {
|
||||
if (const Object *object = it.first.first) {
|
||||
geometry_octrees[{object->get_geometry(), shader}] = it.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop through the volume objects to initialize their root nodes. */
|
||||
for (const Object *object : scene->objects) {
|
||||
const Geometry *geom = object->get_geometry();
|
||||
if (!geom->has_volume) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Create Octree. */
|
||||
for (const Node *node : geom->get_used_shaders()) {
|
||||
const Shader *shader = static_cast<const Shader *>(node);
|
||||
if (!shader->has_volume) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (object_octrees_.find({object, shader}) == object_octrees_.end()) {
|
||||
if (geom->is_light()) {
|
||||
const Light *light = static_cast<const Light *>(geom);
|
||||
if (light->get_light_type() == LIGHT_BACKGROUND) {
|
||||
/* World volume is unbounded, use some practical large number instead. */
|
||||
const float3 size = make_float3(10000.0f);
|
||||
object_octrees_[{object, shader}] = std::make_shared<Octree>(BoundBox(-size, size));
|
||||
}
|
||||
}
|
||||
else {
|
||||
const Mesh *mesh = static_cast<const Mesh *>(geom);
|
||||
if (is_zero(mesh->bounds.size())) {
|
||||
continue;
|
||||
}
|
||||
if (!shader->has_volume_spatial_varying) {
|
||||
/* TODO(weizhen): check object attribute. */
|
||||
if (auto it = geometry_octrees.find({geom, shader}); it != geometry_octrees.end()) {
|
||||
/* Share octree with other instances. */
|
||||
object_octrees_[{object, shader}] = it->second;
|
||||
}
|
||||
else {
|
||||
auto octree = std::make_shared<Octree>(mesh->bounds);
|
||||
geometry_octrees[{geom, shader}] = octree;
|
||||
object_octrees_[{object, shader}] = octree;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* TODO(weizhen): we can still share the octree if the spatial variation is in object
|
||||
* space, but that might be tricky to determine. */
|
||||
object_octrees_[{object, shader}] = std::make_shared<Octree>(mesh->bounds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
if (geom->is_mesh() && !VolumeManager::is_homogeneous_volume(object, shader) &&
|
||||
vdb_map_.find({geom, shader}) == vdb_map_.end())
|
||||
{
|
||||
const Mesh *mesh = static_cast<const Mesh *>(geom);
|
||||
const float3 dim = mesh->bounds.size();
|
||||
if (dim.x > 0.0f && dim.y > 0.0f && dim.z > 0.0f) {
|
||||
const char *name = object->get_asset_name().c_str();
|
||||
progress.set_substatus(string_printf("Creating SDF grid for %s", name));
|
||||
vdb_map_[{geom, shader}] = mesh_to_sdf_grid(mesh, shader, 1.0f);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VolumeManager::update_num_octree_nodes()
|
||||
{
|
||||
num_octree_nodes_ = 0;
|
||||
num_octree_roots_ = 0;
|
||||
|
||||
std::set<const Octree *> unique_octrees;
|
||||
for (const auto &it : object_octrees_) {
|
||||
const Octree *octree = it.second.get();
|
||||
if (unique_octrees.find(octree) != unique_octrees.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
unique_octrees.insert(octree);
|
||||
|
||||
num_octree_roots_++;
|
||||
num_octree_nodes_ += octree->get_num_nodes();
|
||||
}
|
||||
}
|
||||
|
||||
int VolumeManager::num_octree_nodes() const
|
||||
{
|
||||
return num_octree_nodes_;
|
||||
}
|
||||
|
||||
int VolumeManager::num_octree_roots() const
|
||||
{
|
||||
return num_octree_roots_;
|
||||
}
|
||||
|
||||
void VolumeManager::build_octree(Device *device, Progress &progress)
|
||||
{
|
||||
const double start_time = time_dt();
|
||||
|
||||
for (auto &it : object_octrees_) {
|
||||
if (it.second->is_built()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Object *object = it.first.first;
|
||||
const Shader *shader = it.first.second;
|
||||
#ifdef WITH_OPENVDB
|
||||
openvdb::BoolGrid::ConstPtr interior_mask = get_vdb(object->get_geometry(), shader);
|
||||
it.second->build(device, progress, interior_mask, object, shader);
|
||||
#else
|
||||
it.second->build(device, progress, object, shader);
|
||||
#endif
|
||||
}
|
||||
|
||||
update_num_octree_nodes();
|
||||
|
||||
const double build_time = time_dt() - start_time;
|
||||
|
||||
LOG_WORK << object_octrees_.size() << " volume octree(s) with a total of " << num_octree_nodes()
|
||||
<< " nodes are built in " << build_time << " seconds.";
|
||||
}
|
||||
|
||||
void VolumeManager::update_root_indices(DeviceScene *dscene, const Scene *scene) const
|
||||
{
|
||||
if (object_octrees_.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Keep track of the root index of the unique octrees. */
|
||||
std::map<const Octree *, int> octree_root_indices;
|
||||
|
||||
int *roots = dscene->volume_tree_root_ids.alloc(scene->objects.size());
|
||||
|
||||
int root_index = 0;
|
||||
for (const auto &it : object_octrees_) {
|
||||
const Object *object = it.first.first;
|
||||
const int object_id = object->get_device_index();
|
||||
const Octree *octree = it.second.get();
|
||||
auto entry = octree_root_indices.find(octree);
|
||||
if (entry == octree_root_indices.end()) {
|
||||
roots[object_id] = root_index;
|
||||
octree_root_indices[octree] = root_index;
|
||||
|
||||
root_index++;
|
||||
}
|
||||
else {
|
||||
/* Instances share the same octree. */
|
||||
roots[object_id] = entry->second;
|
||||
}
|
||||
}
|
||||
|
||||
dscene->volume_tree_root_ids.copy_to_device();
|
||||
}
|
||||
|
||||
void VolumeManager::flatten_octree(DeviceScene *dscene, const Scene *scene) const
|
||||
{
|
||||
if (object_octrees_.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
update_root_indices(dscene, scene);
|
||||
|
||||
for (const auto &it : object_octrees_) {
|
||||
/* Octrees need to be re-flattened. */
|
||||
it.second->set_flattened(false);
|
||||
}
|
||||
|
||||
KernelOctreeRoot *kroots = dscene->volume_tree_roots.alloc(num_octree_roots());
|
||||
KernelOctreeNode *knodes = dscene->volume_tree_nodes.alloc(num_octree_nodes());
|
||||
|
||||
int node_index = 0;
|
||||
int root_index = 0;
|
||||
for (const auto &it : object_octrees_) {
|
||||
std::shared_ptr<Octree> octree = it.second;
|
||||
if (octree->is_flattened()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If an object has multiple shaders, the root index is overwritten, so we also write the
|
||||
* shader id, and perform a linear search in the kernel to find the correct octree. */
|
||||
kroots[root_index].shader = it.first.second->id;
|
||||
kroots[root_index].id = node_index;
|
||||
|
||||
/* Transform from object space into octree space. */
|
||||
auto root = octree->get_root();
|
||||
const float3 scale = 1.0f / root->bbox.size();
|
||||
kroots[root_index].scale = scale;
|
||||
kroots[root_index].translation = -root->bbox.min * scale + 1.0f;
|
||||
|
||||
root_index++;
|
||||
|
||||
/* Flatten octree. */
|
||||
const uint current_index = node_index++;
|
||||
knodes[current_index].parent = -1;
|
||||
octree->flatten(knodes, current_index, root, node_index);
|
||||
octree->set_flattened();
|
||||
}
|
||||
|
||||
dscene->volume_tree_nodes.copy_to_device();
|
||||
dscene->volume_tree_roots.copy_to_device();
|
||||
|
||||
LOG_WORK << "Memory usage of volume octrees: "
|
||||
<< (dscene->volume_tree_nodes.size() * sizeof(KernelOctreeNode) +
|
||||
dscene->volume_tree_roots.size() * sizeof(KernelOctreeRoot) +
|
||||
dscene->volume_tree_root_ids.size() * sizeof(int)) /
|
||||
(1024.0 * 1024.0)
|
||||
<< "Mb.";
|
||||
}
|
||||
|
||||
std::string VolumeManager::visualize_octree(const char *filename) const
|
||||
{
|
||||
const std::string filename_full = path_join(OIIO::Filesystem::current_path(), filename);
|
||||
|
||||
std::ofstream file(filename_full);
|
||||
if (file.is_open()) {
|
||||
std::ostringstream buffer;
|
||||
file << "# Visualize volume octree.\n\n"
|
||||
"import bpy\nimport mathutils\n\n"
|
||||
"if bpy.context.active_object:\n"
|
||||
" bpy.context.active_object.select_set(False)\n\n"
|
||||
"octree = bpy.data.collections.new(name='Octree')\n"
|
||||
"bpy.context.scene.collection.children.link(octree)\n\n";
|
||||
|
||||
for (const auto &it : object_octrees_) {
|
||||
/* Draw Octree. */
|
||||
const auto octree = it.second;
|
||||
const std::string object_name = it.first.first->get_asset_name().string();
|
||||
octree->visualize(file, object_name);
|
||||
|
||||
/* Apply transform. */
|
||||
const Object *object = it.first.first;
|
||||
const Geometry *geom = object->get_geometry();
|
||||
if (!geom->is_light() && !geom->transform_applied) {
|
||||
const Transform t = object->get_tfm();
|
||||
file << "obj.matrix_world = mathutils.Matrix((" << t.x << ", " << t.y << ", " << t.z
|
||||
<< ", (" << 0 << "," << 0 << "," << 0 << "," << 1 << ")))\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
file.close();
|
||||
}
|
||||
|
||||
return filename_full;
|
||||
}
|
||||
|
||||
void VolumeManager::device_update(Device *device,
|
||||
DeviceScene *dscene,
|
||||
const Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
if (need_rebuild_) {
|
||||
/* Data needed for volume shader evaluation. */
|
||||
device->const_copy_to("data", &dscene->data, sizeof(dscene->data));
|
||||
|
||||
initialize_octree(scene, progress);
|
||||
build_octree(device, progress);
|
||||
flatten_octree(dscene, scene);
|
||||
|
||||
update_visualization_ = true;
|
||||
need_rebuild_ = false;
|
||||
update_root_indices_ = false;
|
||||
}
|
||||
else if (update_root_indices_) {
|
||||
update_root_indices(dscene, scene);
|
||||
update_root_indices_ = false;
|
||||
}
|
||||
|
||||
if (update_visualization_) {
|
||||
LOG_DEBUG << "Octree visualization has been written to " << visualize_octree("octree.py");
|
||||
update_visualization_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void VolumeManager::device_free(DeviceScene *dscene)
|
||||
{
|
||||
dscene->volume_tree_nodes.free();
|
||||
dscene->volume_tree_roots.free();
|
||||
dscene->volume_tree_root_ids.free();
|
||||
}
|
||||
|
||||
VolumeManager::~VolumeManager()
|
||||
{
|
||||
#ifdef WITH_OPENVDB
|
||||
for (auto &it : vdb_map_) {
|
||||
it.second.reset();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -8,8 +8,15 @@
|
||||
|
||||
#include "scene/mesh.h"
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
# include <openvdb/openvdb.h>
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Object;
|
||||
class Octree;
|
||||
|
||||
class Volume : public Mesh {
|
||||
public:
|
||||
NODE_DECLARE
|
||||
@@ -26,4 +33,64 @@ class Volume : public Mesh {
|
||||
void clear(bool preserve_shaders = false) override;
|
||||
};
|
||||
|
||||
class VolumeManager {
|
||||
public:
|
||||
VolumeManager();
|
||||
~VolumeManager();
|
||||
|
||||
void device_update(Device *, DeviceScene *, const Scene *, Progress &);
|
||||
void device_free(DeviceScene *);
|
||||
|
||||
/* Tag volume octree for update when scene changes. */
|
||||
void tag_update();
|
||||
void tag_update(const Shader *shader);
|
||||
void tag_update(const Object *object, const uint32_t flag);
|
||||
void tag_update(const Geometry *geometry);
|
||||
void tag_update_indices();
|
||||
|
||||
/* Check whether the shader is a homogeneous volume. */
|
||||
static bool is_homogeneous_volume(const Object *, const Shader *);
|
||||
|
||||
private:
|
||||
/* Initialize octrees from the volumes in the scene. */
|
||||
void initialize_octree(const Scene *, Progress &);
|
||||
|
||||
/* Build octrees based on the volume density. */
|
||||
void build_octree(Device *, Progress &);
|
||||
|
||||
/* Update the object and shader index of octree root nodes. */
|
||||
void update_root_indices(DeviceScene *, const Scene *) const;
|
||||
|
||||
/* Converting the octrees into an array for uploading to the kernel. */
|
||||
void flatten_octree(DeviceScene *, const Scene *) const;
|
||||
|
||||
/* Count all the nodes of the octrees. */
|
||||
void update_num_octree_nodes();
|
||||
int num_octree_nodes() const;
|
||||
int num_octree_roots() const;
|
||||
|
||||
/* When running Blender with `--log-level debug`, an octree visualization is written to
|
||||
* `filename`, which is a Python script that can be run inside Blender. */
|
||||
std::string visualize_octree(const char *filename) const;
|
||||
|
||||
/* One octree per object per shader. */
|
||||
std::map<std::pair<const Object *, const Shader *>, std::shared_ptr<Octree>> object_octrees_;
|
||||
|
||||
bool update_root_indices_ = false;
|
||||
bool need_rebuild_;
|
||||
bool update_visualization_ = false;
|
||||
int num_octree_nodes_;
|
||||
int num_octree_roots_;
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
/* Create SDF grid for mesh volumes, to determine whether a certain point is in the
|
||||
* interior of the mesh. This reduces evaluation time needed for heterogeneous volume. */
|
||||
openvdb::BoolGrid::ConstPtr mesh_to_sdf_grid(const Mesh *mesh,
|
||||
const Shader *shader,
|
||||
const float half_width);
|
||||
openvdb::BoolGrid::ConstPtr get_vdb(const Geometry *, const Shader *) const;
|
||||
std::map<std::pair<const Geometry *, const Shader *>, openvdb::BoolGrid::ConstPtr> vdb_map_;
|
||||
#endif
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -145,4 +145,10 @@ std::ostream &operator<<(std::ostream &os, const float3 &value)
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const float4 &value)
|
||||
{
|
||||
os << "(" << value.x << ", " << value.y << ", " << value.z << ", " << value.w << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -162,8 +162,10 @@ template<typename T> T DCheckNotNull(T &&t, const char *expression)
|
||||
/* Convenient logging of common data structures. */
|
||||
struct int2;
|
||||
struct float3;
|
||||
struct float4;
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const int2 &value);
|
||||
std::ostream &operator<<(std::ostream &os, const float3 &value);
|
||||
std::ostream &operator<<(std::ostream &os, const float4 &value);
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -628,6 +628,12 @@ ccl_device_inline float one_minus_cos(const float angle)
|
||||
return angle > 0.02f ? 1.0f - cosf(angle) : 0.5f * sqr(angle);
|
||||
}
|
||||
|
||||
/* 2^a. */
|
||||
ccl_device_inline int power_of_2(const int a)
|
||||
{
|
||||
return 1 << a;
|
||||
}
|
||||
|
||||
ccl_device_inline float pow20(const float a)
|
||||
{
|
||||
return sqr(sqr(sqr(sqr(a)) * a));
|
||||
@@ -657,9 +663,9 @@ ccl_device_inline float beta(const float x, const float y)
|
||||
return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
|
||||
}
|
||||
|
||||
ccl_device_inline float xor_signmask(const float x, const int y)
|
||||
ccl_device_inline float xor_mask(const float x, const uint y)
|
||||
{
|
||||
return __int_as_float(__float_as_int(x) ^ y);
|
||||
return __uint_as_float(__float_as_uint(x) ^ y);
|
||||
}
|
||||
|
||||
ccl_device float bits_to_01(const uint bits)
|
||||
@@ -893,4 +899,50 @@ ccl_device_inline Interval<T> intervals_intersection(const ccl_private Interval<
|
||||
return {max(first.min, second.min), min(first.max, second.max)};
|
||||
}
|
||||
|
||||
/* Defines the minimal and maximal values of a quantity. */
|
||||
template<typename T> struct Extrema {
|
||||
T min;
|
||||
T max;
|
||||
Extrema() = default;
|
||||
ccl_device_inline_method Extrema(T value) : min(value), max(value) {}
|
||||
ccl_device_inline_method Extrema(T min_, T max_) : min(min_), max(max_) {}
|
||||
|
||||
ccl_device_inline_method T range() const
|
||||
{
|
||||
return max - min;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> ccl_device_inline Extrema<T> operator*(const Extrema<T> a, const T b)
|
||||
{
|
||||
return {a.min * b, a.max * b};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ccl_device_inline Extrema<T> operator+(const ccl_private Extrema<T> &a,
|
||||
const ccl_private Extrema<T> &b)
|
||||
{
|
||||
return {a.min + b.min, a.max + b.max};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ccl_device_inline Extrema<T> operator+=(ccl_private Extrema<T> &a, const ccl_private Extrema<T> &b)
|
||||
{
|
||||
return a = a + b;
|
||||
}
|
||||
|
||||
/* Returns the extrema of both extrema. */
|
||||
template<typename T>
|
||||
ccl_device_inline Extrema<T> merge(const ccl_private Extrema<T> &a,
|
||||
const ccl_private Extrema<T> &b)
|
||||
{
|
||||
return {min(a.min, b.min), max(a.max, b.max)};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ccl_device_inline Extrema<T> merge(const ccl_private Extrema<T> &a, const ccl_private T &v)
|
||||
{
|
||||
return {min(a.min, v), max(a.max, v)};
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "util/types_float3.h"
|
||||
#include "util/types_float4.h"
|
||||
#include "util/types_int3.h"
|
||||
#include "util/types_uint3.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -212,6 +213,15 @@ ccl_device_inline bool operator==(const float3 a, const float3 b)
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline int3 operator==(const float3 a, const float b)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
return int3(_mm_castps_si128(_mm_cmpeq_ps(a.m128, make_float3(b).m128)));
|
||||
# else
|
||||
return make_int3(a.x == b, a.y == b, a.z == b);
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline bool operator!=(const float3 a, const float3 b)
|
||||
{
|
||||
return !(a == b);
|
||||
@@ -235,7 +245,21 @@ ccl_device_inline float dot(const float3 a, const float3 b)
|
||||
# endif
|
||||
}
|
||||
|
||||
#endif
|
||||
ccl_device_inline int3 operator>(const float3 a, const float3 b)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
return int3(_mm_castps_si128(_mm_cmpgt_ps(a.m128, b.m128)));
|
||||
# else
|
||||
return make_int3(a.x > b.x, a.y > b.y, a.z > b.z);
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline int3 operator>(const float3 a, const float b)
|
||||
{
|
||||
return a > make_float3(b);
|
||||
}
|
||||
|
||||
#endif /* __KERNEL_METAL__ */
|
||||
|
||||
ccl_device_inline float dot_xy(const float3 a, const float3 b)
|
||||
{
|
||||
@@ -380,6 +404,17 @@ ccl_device_inline float3 sqrt(const float3 a)
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 round(const float3 a)
|
||||
{
|
||||
# if defined(__KERNEL_NEON__)
|
||||
return float3(vrndnq_f32(a.m128));
|
||||
# elif defined(__KERNEL_SSE__)
|
||||
return float3(_mm_round_ps(a.m128, _MM_FROUND_NINT));
|
||||
# else
|
||||
return make_float3(roundf(a.x), roundf(a.y), roundf(a.z));
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 floor(const float3 a)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
@@ -403,6 +438,11 @@ ccl_device_inline float3 mix(const float3 a, const float3 b, const float t)
|
||||
return a + t * (b - a);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 mix(const float3 a, const float3 b, const float3 t)
|
||||
{
|
||||
return a + t * (b - a);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 saturate(const float3 a)
|
||||
{
|
||||
return make_float3(saturatef(a.x), saturatef(a.y), saturatef(a.z));
|
||||
@@ -438,11 +478,6 @@ ccl_device_inline float3 atan2(const float3 y, const float3 x)
|
||||
return make_float3(atan2f(y.x, x.x), atan2f(y.y, x.y), atan2f(y.z, x.z));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 round(const float3 a)
|
||||
{
|
||||
return make_float3(roundf(a.x), roundf(a.y), roundf(a.z));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 reflect(const float3 incident, const float3 unit_normal)
|
||||
{
|
||||
return incident - 2.0f * unit_normal * dot(incident, unit_normal);
|
||||
@@ -527,6 +562,11 @@ ccl_device_inline bool is_zero(const float3 a)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline bool any_zero(const float3 a)
|
||||
{
|
||||
return (a.x == 0.0f || a.y == 0.0f || a.z == 0.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float reduce_add(const float3 a)
|
||||
{
|
||||
#if defined(__KERNEL_SSE__) && defined(__KERNEL_NEON__)
|
||||
@@ -766,4 +806,22 @@ ccl_device_inline void copy_v3_v3(ccl_private float *r, const float3 val)
|
||||
r[2] = val.z;
|
||||
}
|
||||
|
||||
ccl_device_inline uint3 float3_as_uint3(const float3 f)
|
||||
{
|
||||
#ifdef __KERNEL_METAL__
|
||||
return as_type<uint3>(f);
|
||||
#else
|
||||
return make_uint3(__float_as_uint(f.x), __float_as_uint(f.y), __float_as_uint(f.z));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 uint3_as_float3(const uint3 f)
|
||||
{
|
||||
#ifdef __KERNEL_METAL__
|
||||
return as_type<float3>(f);
|
||||
#else
|
||||
return make_float3(__uint_as_float(f.x), __uint_as_float(f.y), __uint_as_float(f.z));
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -107,6 +107,11 @@ ccl_device_inline int3 operator&(const int3 a, const int b)
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline bool all(const int3 a)
|
||||
{
|
||||
return a.x && a.y && a.z;
|
||||
}
|
||||
|
||||
#endif /* !__KERNEL_METAL__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
# include "util/windows.h"
|
||||
#endif
|
||||
|
||||
#include <tbb/blocked_range3d.h>
|
||||
#include <tbb/enumerable_thread_specific.h>
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <tbb/parallel_for_each.h>
|
||||
@@ -26,6 +27,7 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
using tbb::blocked_range;
|
||||
using tbb::blocked_range3d;
|
||||
using tbb::enumerable_thread_specific;
|
||||
using tbb::parallel_for;
|
||||
using tbb::parallel_for_each;
|
||||
|
||||
Reference in New Issue
Block a user