diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 92d73a1de07..32ad6dedb72 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -194,6 +194,7 @@ def list_render_passes(scene, srl): if srl.use_pass_uv: yield ("UV", "UVA", 'VECTOR') if srl.use_pass_object_index: yield ("IndexOB", "X", 'VALUE') if srl.use_pass_material_index: yield ("IndexMA", "X", 'VALUE') + if crl.use_pass_volume_majorant: yield ("Volume Majorant", "Z", 'VALUE') # Light passes. if srl.use_pass_diffuse_direct: yield ("DiffDir", "RGB", 'COLOR') @@ -207,6 +208,8 @@ def list_render_passes(scene, srl): if srl.use_pass_transmission_color: yield ("TransCol", "RGB", 'COLOR') if crl.use_pass_volume_direct: yield ("VolumeDir", "RGB", 'COLOR') if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR') + if crl.use_pass_volume_scatter: yield ("Volume Scatter", "RGB", 'COLOR') + if crl.use_pass_volume_transmit: yield ("Volume Transmit", "RGB", 'COLOR') if srl.use_pass_emit: yield ("Emit", "RGB", 'COLOR') if srl.use_pass_environment: yield ("Env", "RGB", 'COLOR') if srl.use_pass_ambient_occlusion: yield ("AO", "RGB", 'COLOR') diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index d43cae2cf39..e3c39a8c0d7 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -245,6 +245,12 @@ enum_view3d_shading_render_pass = ( ('SAMPLE_COUNT', "Sample Count", "Per-pixel number of samples"), ) +enum_view3d_debug_render_pass = ( + ('VOLUME_SCATTER', "Volume Scatter", "Show the contribution of scattered ray in volume"), + ('VOLUME_TRANSMIT', "Volume Transmit", "Show the contribution of transmitted ray in volume"), + ('VOLUME_MAJORANT', "Volume Majorant", "Show the majorant transmittance of the volume") +) + enum_guiding_distribution = ( ('PARALLAX_AWARE_VMM', "Parallax-Aware VMM", "Use Parallax-aware von Mises-Fisher models as directional distribution", 0), ('DIRECTIONAL_QUAD_TREE', "Directional Quad Tree", "Use Directional Quad Trees as directional distribution", 1), @@ -1485,6 +1491,24 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): default=False, update=update_render_passes, ) + use_pass_volume_scatter: BoolProperty( + name="Volume Scatter", + description="Contribution of paths that scattered in the volume at the primary ray", + default=False, + update=update_render_passes, + ) + use_pass_volume_transmit: BoolProperty( + name="Volume Transmit", + description="Contribution of paths that transmitted through the volume at the primary ray", + default=False, + update=update_render_passes, + ) + use_pass_volume_majorant: BoolProperty( + name="Volume Majorant", + description="Majorant transmittance of the volume", + default=False, + update=update_render_passes, + ) use_pass_shadow_catcher: BoolProperty( name="Shadow Catcher", @@ -1909,10 +1933,14 @@ class CyclesPreferences(bpy.types.AddonPreferences): class CyclesView3DShadingSettings(bpy.types.PropertyGroup): __slots__ = () + prefs = bpy.context.preferences + use_debug = prefs.experimental.use_cycles_debug and prefs.view.show_developer_ui + render_pass: EnumProperty( name="Render Pass", description="Render pass to show in the 3D Viewport", - items=enum_view3d_shading_render_pass, + items=enum_view3d_shading_render_pass + + enum_view3d_debug_render_pass if use_debug else enum_view3d_shading_render_pass, default='COMBINED', ) show_active_pixels: BoolProperty( diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index da47762bdae..c3b1aa9c42a 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1054,6 +1054,13 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel): col.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct") col.prop(cycles_view_layer, "use_pass_volume_indirect", text="Indirect") + prefs = context.preferences + use_debug = prefs.experimental.use_cycles_debug and prefs.view.show_developer_ui + if use_debug: + col.prop(cycles_view_layer, "use_pass_volume_scatter", text="Scatter") + col.prop(cycles_view_layer, "use_pass_volume_transmit", text="Transmit") + col.prop(cycles_view_layer, "use_pass_volume_majorant", text="Majorant") + col = layout.column(heading="Other", align=True) col.prop(view_layer, "use_pass_emit", text="Emission") col.prop(view_layer, "use_pass_environment") diff --git a/intern/cycles/blender/sync.cpp b/intern/cycles/blender/sync.cpp index f843696181b..6a297b8f821 100644 --- a/intern/cycles/blender/sync.cpp +++ b/intern/cycles/blender/sync.cpp @@ -686,6 +686,9 @@ static bool get_known_pass_type(BL::RenderPass &b_pass, PassType &type, PassMode MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT, false); MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT, false); MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT, false); + MAP_PASS("Volume Scatter", PASS_VOLUME_SCATTER, false); + MAP_PASS("Volume Transmit", PASS_VOLUME_TRANSMIT, false); + MAP_PASS("Volume Majorant", PASS_VOLUME_MAJORANT, false); MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR, false); MAP_PASS("GlossCol", PASS_GLOSSY_COLOR, false); diff --git a/intern/cycles/device/cpu/kernel.cpp b/intern/cycles/device/cpu/kernel.cpp index e56f06dadb0..c3f114813a3 100644 --- a/intern/cycles/device/cpu/kernel.cpp +++ b/intern/cycles/device/cpu/kernel.cpp @@ -29,11 +29,15 @@ CPUKernels::CPUKernels() REGISTER_KERNEL(adaptive_sampling_convergence_check), REGISTER_KERNEL(adaptive_sampling_filter_x), REGISTER_KERNEL(adaptive_sampling_filter_y), + /* Volume Scattering Probability Guiding. */ + REGISTER_KERNEL(volume_guiding_filter_x), + REGISTER_KERNEL(volume_guiding_filter_y), /* Cryptomatte. */ REGISTER_KERNEL(cryptomatte_postprocess), /* Film Convert. */ REGISTER_KERNEL_FILM_CONVERT(depth), REGISTER_KERNEL_FILM_CONVERT(mist), + REGISTER_KERNEL_FILM_CONVERT(volume_majorant), REGISTER_KERNEL_FILM_CONVERT(sample_count), REGISTER_KERNEL_FILM_CONVERT(float), REGISTER_KERNEL_FILM_CONVERT(light_path), diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h index e245f11cc8d..c1fb1dc5029 100644 --- a/intern/cycles/device/cpu/kernel.h +++ b/intern/cycles/device/cpu/kernel.h @@ -54,28 +54,38 @@ class CPUKernels { const int offset, int stride)>; - using AdaptiveSamplingFilterXFunction = - CPUKernelFunction; + using FilterXFunction = CPUKernelFunction; - using AdaptiveSamplingFilterYFunction = - CPUKernelFunction; + using FilterYFunction = CPUKernelFunction; AdaptiveSamplingConvergenceCheckFunction adaptive_sampling_convergence_check; - AdaptiveSamplingFilterXFunction adaptive_sampling_filter_x; - AdaptiveSamplingFilterYFunction adaptive_sampling_filter_y; + FilterXFunction adaptive_sampling_filter_x; + FilterYFunction adaptive_sampling_filter_y; + + /* Volume Scattering Probability Guiding. */ + CPUKernelFunction + volume_guiding_filter_x; + FilterYFunction volume_guiding_filter_y; /* Cryptomatte. */ @@ -104,6 +114,7 @@ class CPUKernels { KERNEL_FILM_CONVERT_FUNCTION(depth) KERNEL_FILM_CONVERT_FUNCTION(mist) + KERNEL_FILM_CONVERT_FUNCTION(volume_majorant) KERNEL_FILM_CONVERT_FUNCTION(sample_count) KERNEL_FILM_CONVERT_FUNCTION(float) diff --git a/intern/cycles/device/kernel.cpp b/intern/cycles/device/kernel.cpp index 6f3bb6c900a..0478e0cd861 100644 --- a/intern/cycles/device/kernel.cpp +++ b/intern/cycles/device/kernel.cpp @@ -122,6 +122,7 @@ const char *device_kernel_as_string(DeviceKernel kernel) FILM_CONVERT_KERNEL_AS_STRING(DEPTH, depth) FILM_CONVERT_KERNEL_AS_STRING(MIST, mist) + FILM_CONVERT_KERNEL_AS_STRING(VOLUME_MAJORANT, volume_majorant) FILM_CONVERT_KERNEL_AS_STRING(SAMPLE_COUNT, sample_count) FILM_CONVERT_KERNEL_AS_STRING(FLOAT, float) FILM_CONVERT_KERNEL_AS_STRING(LIGHT_PATH, light_path) @@ -154,6 +155,12 @@ const char *device_kernel_as_string(DeviceKernel kernel) case DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS: return "filter_color_postprocess"; + /* Volume Scattering Probability Guiding. */ + case DEVICE_KERNEL_VOLUME_GUIDING_FILTER_X: + return "volume_guiding_filter_x"; + case DEVICE_KERNEL_VOLUME_GUIDING_FILTER_Y: + return "volume_guiding_filter_y"; + /* Cryptomatte. */ case DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS: return "cryptomatte_postprocess"; diff --git a/intern/cycles/integrator/pass_accessor.cpp b/intern/cycles/integrator/pass_accessor.cpp index 8f816920b00..28510951bf6 100644 --- a/intern/cycles/integrator/pass_accessor.cpp +++ b/intern/cycles/integrator/pass_accessor.cpp @@ -4,6 +4,7 @@ #include "integrator/pass_accessor.h" +#include "kernel/types.h" #include "session/buffers.h" #include "util/log.h" @@ -140,6 +141,9 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers, else if (type == PASS_MIST) { get_pass_mist(render_buffers, buffer_params, destination); } + else if (type == PASS_VOLUME_MAJORANT) { + get_pass_volume_majorant(render_buffers, buffer_params, destination); + } else if (type == PASS_SAMPLE_COUNT) { get_pass_sample_count(render_buffers, buffer_params, destination); } diff --git a/intern/cycles/integrator/pass_accessor.h b/intern/cycles/integrator/pass_accessor.h index 503de56e321..1a2bd07cfc4 100644 --- a/intern/cycles/integrator/pass_accessor.h +++ b/intern/cycles/integrator/pass_accessor.h @@ -131,6 +131,7 @@ class PassAccessor { /* Float (scalar) passes. */ DECLARE_PASS_ACCESSOR(depth) DECLARE_PASS_ACCESSOR(mist) + DECLARE_PASS_ACCESSOR(volume_majorant) DECLARE_PASS_ACCESSOR(sample_count) DECLARE_PASS_ACCESSOR(float) diff --git a/intern/cycles/integrator/pass_accessor_cpu.cpp b/intern/cycles/integrator/pass_accessor_cpu.cpp index a613c6ff2b7..8bf1b332b75 100644 --- a/intern/cycles/integrator/pass_accessor_cpu.cpp +++ b/intern/cycles/integrator/pass_accessor_cpu.cpp @@ -105,6 +105,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba( /* Float (scalar) passes. */ DEFINE_PASS_ACCESSOR(depth) DEFINE_PASS_ACCESSOR(mist) +DEFINE_PASS_ACCESSOR(volume_majorant) DEFINE_PASS_ACCESSOR(sample_count) DEFINE_PASS_ACCESSOR(float) diff --git a/intern/cycles/integrator/pass_accessor_cpu.h b/intern/cycles/integrator/pass_accessor_cpu.h index 129edb72c9c..19409927b14 100644 --- a/intern/cycles/integrator/pass_accessor_cpu.h +++ b/intern/cycles/integrator/pass_accessor_cpu.h @@ -40,6 +40,7 @@ class PassAccessorCPU : public PassAccessor { /* Float (scalar) passes. */ DECLARE_PASS_ACCESSOR(depth) DECLARE_PASS_ACCESSOR(mist) + DECLARE_PASS_ACCESSOR(volume_majorant) DECLARE_PASS_ACCESSOR(sample_count) DECLARE_PASS_ACCESSOR(float) diff --git a/intern/cycles/integrator/pass_accessor_gpu.cpp b/intern/cycles/integrator/pass_accessor_gpu.cpp index 45b0754e008..5a4d82abd0d 100644 --- a/intern/cycles/integrator/pass_accessor_gpu.cpp +++ b/intern/cycles/integrator/pass_accessor_gpu.cpp @@ -90,6 +90,7 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel, /* Float (scalar) passes. */ DEFINE_PASS_ACCESSOR(depth, DEPTH); DEFINE_PASS_ACCESSOR(mist, MIST); +DEFINE_PASS_ACCESSOR(volume_majorant, VOLUME_MAJORANT); DEFINE_PASS_ACCESSOR(sample_count, SAMPLE_COUNT); DEFINE_PASS_ACCESSOR(float, FLOAT); diff --git a/intern/cycles/integrator/pass_accessor_gpu.h b/intern/cycles/integrator/pass_accessor_gpu.h index 37e47e80cd8..4b39391cb68 100644 --- a/intern/cycles/integrator/pass_accessor_gpu.h +++ b/intern/cycles/integrator/pass_accessor_gpu.h @@ -34,6 +34,7 @@ class PassAccessorGPU : public PassAccessor { /* Float (scalar) passes. */ DECLARE_PASS_ACCESSOR(depth); DECLARE_PASS_ACCESSOR(mist); + DECLARE_PASS_ACCESSOR(volume_majorant); DECLARE_PASS_ACCESSOR(sample_count); DECLARE_PASS_ACCESSOR(float); diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index 268681cc80d..fa953df8528 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -196,6 +196,9 @@ void PathTrace::render_pipeline(RenderWork render_work) rebalance(render_work); + /* Reset sample limit. */ + render_scheduler_.set_limit_samples_per_update(0); + /* Prepare all per-thread guiding structures before we start with the next rendering * iteration/progression. */ const bool use_guiding = device_scene_->data.integrator.use_guiding; @@ -203,6 +206,13 @@ void PathTrace::render_pipeline(RenderWork render_work) guiding_prepare_structures(); } + const bool has_volume = device_scene_->data.integrator.use_volumes; + if (has_volume) { + const uint num_rendered_samples = render_scheduler_.get_num_rendered_samples(); + const uint limit = next_power_of_two(num_rendered_samples) - num_rendered_samples; + render_scheduler_.set_limit_samples_per_update(limit); + } + path_trace(render_work); if (render_cancel_.is_requested) { return; @@ -230,6 +240,11 @@ void PathTrace::render_pipeline(RenderWork render_work) return; } + denoise_volume_guiding_buffers(render_work, has_volume); + if (render_cancel_.is_requested) { + return; + } + write_tile_buffer(render_work); update_display(render_work); @@ -634,6 +649,26 @@ void PathTrace::denoise(const RenderWork &render_work) render_scheduler_.report_denoise_time(render_work, time_dt() - start_time); } +void PathTrace::denoise_volume_guiding_buffers(const RenderWork &render_work, + const bool has_volume) +{ + if (!has_volume || !render_scheduler_.volume_guiding_need_denoise()) { + return; + } + + LOG_WORK << "Denoise volume guiding buffers."; + + const double start_time = time_dt(); + + /* TODO: in the multi-GPU case, we can denoise on one device and copy to the rest, instead of + * denoising on each device separately. */ + parallel_for_each(path_trace_works_, [&](unique_ptr &path_trace_work) { + path_trace_work->denoise_volume_guiding_buffers(); + }); + + render_scheduler_.report_volume_guiding_denoise_time(render_work, time_dt() - start_time); +} + void PathTrace::set_output_driver(unique_ptr driver) { output_driver_ = std::move(driver); @@ -714,10 +749,12 @@ void PathTrace::update_display(const RenderWork &render_work) return; } - const PassMode pass_mode = render_work.display.use_denoised_result && - render_state_.has_denoised_result ? - PassMode::DENOISED : - PassMode::NOISY; + const PassType pass_type = film_->get_display_pass(); + const bool show_denoised = (render_work.display.use_denoised_result && + has_denoised_result()) || + is_volume_guiding_pass(pass_type); + + const PassMode pass_mode = show_denoised ? PassMode::DENOISED : PassMode::NOISY; /* TODO(sergey): When using multi-device rendering map the GPUDisplay once and copy data from * all works in parallel. */ diff --git a/intern/cycles/integrator/path_trace.h b/intern/cycles/integrator/path_trace.h index a27f5fffb9c..71900934d1f 100644 --- a/intern/cycles/integrator/path_trace.h +++ b/intern/cycles/integrator/path_trace.h @@ -210,6 +210,7 @@ class PathTrace { void path_trace(RenderWork &render_work); void adaptive_sample(RenderWork &render_work); void denoise(const RenderWork &render_work); + void denoise_volume_guiding_buffers(const RenderWork &render_work, const bool has_volume); void cryptomatte_postprocess(const RenderWork &render_work); void update_display(const RenderWork &render_work); void rebalance(const RenderWork &render_work); diff --git a/intern/cycles/integrator/path_trace_tile.cpp b/intern/cycles/integrator/path_trace_tile.cpp index b0557a86328..31b7c48ba6a 100644 --- a/intern/cycles/integrator/path_trace_tile.cpp +++ b/intern/cycles/integrator/path_trace_tile.cpp @@ -42,7 +42,8 @@ bool PathTraceTile::get_pass_pixels(const string_view pass_name, return false; } - const bool has_denoised_result = path_trace_.has_denoised_result(); + const bool has_denoised_result = path_trace_.has_denoised_result() || + is_volume_guiding_pass(pass->type); if (pass->mode == PassMode::DENOISED && !has_denoised_result) { pass = buffer_params.find_pass(pass->type); if (pass == nullptr) { diff --git a/intern/cycles/integrator/path_trace_work.h b/intern/cycles/integrator/path_trace_work.h index 70ae6fc13e3..42fdbe9bec4 100644 --- a/intern/cycles/integrator/path_trace_work.h +++ b/intern/cycles/integrator/path_trace_work.h @@ -125,6 +125,9 @@ class PathTraceWork { virtual int adaptive_sampling_converge_filter_count_active(const float threshold, bool reset) = 0; + /* Denoise Volume Scattering Probability Guiding buffers. */ + virtual void denoise_volume_guiding_buffers() = 0; + /* Run cryptomatte pass post-processing kernels. */ virtual void cryptomatte_postproces() = 0; diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp index 3aaf189249f..303028c6407 100644 --- a/intern/cycles/integrator/path_trace_work_cpu.cpp +++ b/intern/cycles/integrator/path_trace_work_cpu.cpp @@ -303,6 +303,45 @@ void PathTraceWorkCPU::cryptomatte_postproces() }); } +void PathTraceWorkCPU::denoise_volume_guiding_buffers() +{ + const int min_x = effective_buffer_params_.full_x; + const int min_y = effective_buffer_params_.full_y; + const int max_x = effective_buffer_params_.width + min_x; + const int max_y = effective_buffer_params_.height + min_y; + const int offset = effective_buffer_params_.offset; + const int stride = effective_buffer_params_.stride; + + float *render_buffer = buffers_->buffer.data(); + + tbb::task_arena local_arena = local_tbb_arena_create(device_); + + const blocked_range2d range(min_x, max_x, min_y, max_y); + + /* Filter in x direction. */ + local_arena.execute([&]() { + parallel_for(range, [&](const blocked_range2d r) { + ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_.data(); + for (int y = r.cols().begin(); y < r.cols().end(); ++y) { + for (int x = r.rows().begin(); x < r.rows().end(); ++x) { + kernels_.volume_guiding_filter_x( + kernel_globals, render_buffer, y, x, min_x, max_x, offset, stride); + } + } + }); + }); + + /* Filter in y direction. Unlike `filter_x`, the inner loop of `filter_y` is serially run inside + * the kernel, to avoid the need of intermediate buffers. */ + local_arena.execute([&]() { + parallel_for(min_x, max_x, [&](int x) { + ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_.data(); + kernels_.volume_guiding_filter_y( + kernel_globals, render_buffer, x, min_y, max_y, offset, stride); + }); + }); +} + #if defined(WITH_PATH_GUIDING) /* NOTE: It seems that this is called before every rendering iteration/progression and not once per * rendering. May be we find a way to call it only once per rendering. */ diff --git a/intern/cycles/integrator/path_trace_work_cpu.h b/intern/cycles/integrator/path_trace_work_cpu.h index cce20ddb3b9..d21d26ba24c 100644 --- a/intern/cycles/integrator/path_trace_work_cpu.h +++ b/intern/cycles/integrator/path_trace_work_cpu.h @@ -51,6 +51,7 @@ class PathTraceWorkCPU : public PathTraceWork { int adaptive_sampling_converge_filter_count_active(const float threshold, bool reset) override; void cryptomatte_postproces() override; + void denoise_volume_guiding_buffers() override; #if defined(WITH_PATH_GUIDING) /* Initializes the per-thread guiding kernel data. The function sets the pointers to the diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index a391484024b..7199e629dbc 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -1176,6 +1176,29 @@ void PathTraceWorkGPU::cryptomatte_postproces() queue_->enqueue(DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS, work_size, args); } +void PathTraceWorkGPU::denoise_volume_guiding_buffers() +{ + const DeviceKernelArguments args(&buffers_->buffer.device_pointer, + &effective_buffer_params_.full_x, + &effective_buffer_params_.full_y, + &effective_buffer_params_.width, + &effective_buffer_params_.height, + &effective_buffer_params_.offset, + &effective_buffer_params_.stride); + + { + const int work_size = effective_buffer_params_.width * effective_buffer_params_.height; + DCHECK_GT(work_size, 0); + queue_->enqueue(DEVICE_KERNEL_VOLUME_GUIDING_FILTER_X, work_size, args); + } + + { + const int work_size = effective_buffer_params_.width; + DCHECK_GT(work_size, 0); + queue_->enqueue(DEVICE_KERNEL_VOLUME_GUIDING_FILTER_Y, work_size, args); + } +} + bool PathTraceWorkGPU::copy_render_buffers_from_device() { /* May not exist if cancelled before rendering started. */ diff --git a/intern/cycles/integrator/path_trace_work_gpu.h b/intern/cycles/integrator/path_trace_work_gpu.h index 9e9fe669aac..c77ce980724 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.h +++ b/intern/cycles/integrator/path_trace_work_gpu.h @@ -48,6 +48,7 @@ class PathTraceWorkGPU : public PathTraceWork { int adaptive_sampling_converge_filter_count_active(const float threshold, bool reset) override; void cryptomatte_postproces() override; + void denoise_volume_guiding_buffers() override; protected: void alloc_integrator_soa(); diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp index 0d06947efd6..91719393f1c 100644 --- a/intern/cycles/integrator/render_scheduler.cpp +++ b/intern/cycles/integrator/render_scheduler.cpp @@ -55,7 +55,12 @@ bool RenderScheduler::is_denoiser_gpu_used() const void RenderScheduler::set_limit_samples_per_update(const int limit_samples) { - limit_samples_per_update_ = limit_samples; + if (limit_samples_per_update_) { + limit_samples_per_update_ = min(limit_samples_per_update_, limit_samples); + } + else { + limit_samples_per_update_ = limit_samples; + } } void RenderScheduler::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling) @@ -169,6 +174,7 @@ void RenderScheduler::reset(const BufferParams &buffer_params) adaptive_filter_time_.reset(); display_update_time_.reset(); rebalance_time_.reset(); + volume_guiding_denoise_time_.reset(); } void RenderScheduler::reset_for_next_tile() @@ -547,6 +553,23 @@ void RenderScheduler::report_denoise_time(const RenderWork &render_work, const d LOG_WORK << "Average denoising time: " << denoise_time_.get_average() << " seconds."; } +void RenderScheduler::report_volume_guiding_denoise_time(const RenderWork &render_work, + const double time) +{ + volume_guiding_denoise_time_.add_wall(time); + + const double final_time_approx = approximate_final_time(render_work, time); + + if (work_report_reset_average(render_work)) { + volume_guiding_denoise_time_.reset_average(); + } + + volume_guiding_denoise_time_.add_average(final_time_approx, render_work.path_trace.num_samples); + + LOG_WORK << "Average volume guiding denoising time: " + << volume_guiding_denoise_time_.get_average() << " seconds."; +} + void RenderScheduler::report_display_update_time(const RenderWork &render_work, const double time) { display_update_time_.add_wall(time); @@ -963,6 +986,20 @@ float RenderScheduler::work_adaptive_threshold() const return max(state_.adaptive_sampling_threshold, adaptive_sampling_.threshold); } +bool RenderScheduler::volume_guiding_need_denoise() const +{ + if (!is_power_of_two(get_num_rendered_samples())) { + return false; + } + + if (done()) { + /* No need to denoise after the last sample. */ + return false; + } + + return true; +} + bool RenderScheduler::work_need_denoise(bool &delayed, bool &ready_to_display) { delayed = false; diff --git a/intern/cycles/integrator/render_scheduler.h b/intern/cycles/integrator/render_scheduler.h index 486a14eee6d..aec106f7cc7 100644 --- a/intern/cycles/integrator/render_scheduler.h +++ b/intern/cycles/integrator/render_scheduler.h @@ -207,6 +207,9 @@ class RenderScheduler { void report_rebalance_time(const RenderWork &render_work, const double time, bool balance_changed); + void report_volume_guiding_denoise_time(const RenderWork &render_work, const double time); + + bool volume_guiding_need_denoise() const; /* Generate full multi-line report of the rendering process, including rendering parameters, * times, and so on. */ @@ -435,6 +438,7 @@ class RenderScheduler { TimeWithAverage denoise_time_; TimeWithAverage display_update_time_; TimeWithAverage rebalance_time_; + TimeWithAverage volume_guiding_denoise_time_; /* Whether cryptomatte-related work will be scheduled. */ bool need_schedule_cryptomatte_ = false; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 1727f283641..9306a4df264 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -283,6 +283,7 @@ set(SRC_KERNEL_FILM_HEADERS film/aov_passes.h film/data_passes.h film/denoising_passes.h + film/volume_guiding_denoise.h film/cryptomatte_passes.h film/light_passes.h film/read.h diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h index 772ecfcf768..07df1279ef7 100644 --- a/intern/cycles/kernel/data_template.h +++ b/intern/cycles/kernel/data_template.h @@ -103,6 +103,12 @@ KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_direct) KERNEL_STRUCT_MEMBER(film, int, pass_glossy_direct) KERNEL_STRUCT_MEMBER(film, int, pass_transmission_direct) KERNEL_STRUCT_MEMBER(film, int, pass_volume_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_scatter) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_scatter_denoised) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_transmit) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_transmit_denoised) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_majorant) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_majorant_sample_count) KERNEL_STRUCT_MEMBER(film, int, pass_emission) KERNEL_STRUCT_MEMBER(film, int, pass_background) KERNEL_STRUCT_MEMBER(film, int, pass_ao) diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h index e959e3bb9aa..029f28e9e5d 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch.h @@ -50,6 +50,7 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel); KERNEL_FILM_CONVERT_FUNCTION(depth) KERNEL_FILM_CONVERT_FUNCTION(mist) KERNEL_FILM_CONVERT_FUNCTION(sample_count) +KERNEL_FILM_CONVERT_FUNCTION(volume_majorant) KERNEL_FILM_CONVERT_FUNCTION(float) KERNEL_FILM_CONVERT_FUNCTION(light_path) @@ -123,4 +124,24 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const ThreadKernelGlobal ccl_global float *render_buffer, int pixel_index); +/* -------------------------------------------------------------------- + * Volume Scattering Probability Guiding. + */ + +void KERNEL_FUNCTION_FULL_NAME(volume_guiding_filter_x)(const ThreadKernelGlobalsCPU *kg, + ccl_global float *render_buffer, + const int y, + const int center_x, + const int min_x, + const int max_x, + const int offset, + int stride); +void KERNEL_FUNCTION_FULL_NAME(volume_guiding_filter_y)(const ThreadKernelGlobalsCPU *kg, + ccl_global float *render_buffer, + const int x, + const int center_y, + const int height, + const int offset, + int stride); + #undef KERNEL_ARCH diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 8fb1acc6c51..f5a91e4fbce 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -29,6 +29,7 @@ # include "kernel/film/adaptive_sampling.h" # include "kernel/film/cryptomatte_passes.h" # include "kernel/film/read.h" +# include "kernel/film/volume_guiding_denoise.h" # include "kernel/bake/bake.h" @@ -243,6 +244,56 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const ThreadKernelGlobal #endif } +/* -------------------------------------------------------------------- + * Volume Scattering Probability Guiding. + */ + +void KERNEL_FUNCTION_FULL_NAME(volume_guiding_filter_x)(const ThreadKernelGlobalsCPU *kg, + ccl_global float *render_buffer, + const int y, + const int center_x, + const int min_x, + const int max_x, + const int offset, + const int stride) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, volume_guiding_filter_x); + (void)kg; + (void)render_buffer; + (void)y; + (void)center_x; + (void)min_x; + (void)max_x; + (void)offset; + (void)stride; +#else + volume_guiding_filter_x(kg, render_buffer, y, center_x, min_x, max_x, offset, stride); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(volume_guiding_filter_y)(const ThreadKernelGlobalsCPU *kg, + ccl_global float *render_buffer, + const int x, + const int min_y, + const int max_y, + const int offset, + const int stride) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, volume_guiding_filter_y); + (void)kg; + (void)render_buffer; + (void)x; + (void)min_y; + (void)max_y; + (void)offset; + (void)stride; +#else + volume_guiding_filter_y(kg, render_buffer, x, min_y, max_y, offset, stride); +#endif +} + /* -------------------------------------------------------------------- * Film Convert. */ @@ -319,6 +370,7 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const ThreadKernelGlobal KERNEL_FILM_CONVERT_FUNCTION(depth, true) KERNEL_FILM_CONVERT_FUNCTION(mist, true) KERNEL_FILM_CONVERT_FUNCTION(sample_count, true) +KERNEL_FILM_CONVERT_FUNCTION(volume_majorant, true) KERNEL_FILM_CONVERT_FUNCTION(float, true) KERNEL_FILM_CONVERT_FUNCTION(light_path, false) diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index bf165bf40e5..dc28c1731a3 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -42,6 +42,7 @@ #include "kernel/bake/bake.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/volume_guiding_denoise.h" #ifdef __KERNEL_METAL__ # include "kernel/device/metal/context_end.h" @@ -885,6 +886,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb /* 1 channel inputs */ KERNEL_FILM_CONVERT_VARIANT(depth, 1) KERNEL_FILM_CONVERT_VARIANT(mist, 1) +KERNEL_FILM_CONVERT_VARIANT(volume_majorant, 1) KERNEL_FILM_CONVERT_VARIANT(sample_count, 1) KERNEL_FILM_CONVERT_VARIANT(float, 1) @@ -1199,3 +1201,47 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) } } ccl_gpu_kernel_postfix + +/* -------------------------------------------------------------------- + * Volume Scattering Probability Guiding. + */ + +ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) + ccl_gpu_kernel_signature(volume_guiding_filter_x, + ccl_global float *render_buffer, + const int sx, + const int sy, + const int sw, + const int sh, + const int offset, + const int stride) +{ + const int work_index = ccl_gpu_global_id_x(); + const int y = work_index / sw; + const int x = work_index % sw; + + if (y < sh) { + ccl_gpu_kernel_call(volume_guiding_filter_x( + nullptr, render_buffer, sy + y, sx + x, sx, sx + sw, offset, stride)); + } +} +ccl_gpu_kernel_postfix + +ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) + ccl_gpu_kernel_signature(volume_guiding_filter_y, + ccl_global float *render_buffer, + const int sx, + const int sy, + const int sw, + const int sh, + const int offset, + const int stride) +{ + const int x = ccl_gpu_global_id_x(); + + if (x < sw) { + ccl_gpu_kernel_call( + volume_guiding_filter_y(nullptr, render_buffer, sx + x, sy, sy + sh, offset, stride)); + } +} +ccl_gpu_kernel_postfix diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp index 9268a3ce967..9ac6bf1bd2c 100644 --- a/intern/cycles/kernel/device/oneapi/kernel.cpp +++ b/intern/cycles/kernel/device/oneapi/kernel.cpp @@ -603,6 +603,16 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context, oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_prefix_sum); break; } + case DEVICE_KERNEL_VOLUME_GUIDING_FILTER_X: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_volume_guiding_filter_x); + break; + } + case DEVICE_KERNEL_VOLUME_GUIDING_FILTER_Y: { + oneapi_call( + kg, cgh, global_size, local_size, args, oneapi_kernel_volume_guiding_filter_y); + break; + } /* clang-format off */ # define DEVICE_KERNEL_FILM_CONVERT_PARTIAL(VARIANT, variant) \ @@ -621,6 +631,7 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context, DEVICE_KERNEL_FILM_CONVERT(depth, DEPTH); DEVICE_KERNEL_FILM_CONVERT(mist, MIST); + DEVICE_KERNEL_FILM_CONVERT(volume_majorant, VOLUME_MAJORANT); DEVICE_KERNEL_FILM_CONVERT(sample_count, SAMPLE_COUNT); DEVICE_KERNEL_FILM_CONVERT(float, FLOAT); DEVICE_KERNEL_FILM_CONVERT(light_path, LIGHT_PATH); diff --git a/intern/cycles/kernel/film/light_passes.h b/intern/cycles/kernel/film/light_passes.h index 152f917bf3e..a81636bd153 100644 --- a/intern/cycles/kernel/film/light_passes.h +++ b/intern/cycles/kernel/film/light_passes.h @@ -197,6 +197,26 @@ ccl_device void film_write_adaptive_buffer(KernelGlobals kg, } } +/* Write the volume and surface contribution for volume scattering probability guiding. */ +ccl_device_inline void film_write_volume_scattering_guiding_pass(KernelGlobals kg, + ccl_global float *ccl_restrict + buffer, + const uint32_t path_flag, + const Spectrum contribution) +{ + int pass_offset = PASS_UNUSED; + if (path_flag & PATH_RAY_VOLUME_PRIMARY_TRANSMIT) { + pass_offset = kernel_data.film.pass_volume_transmit; + } + else if (path_flag & PATH_RAY_VOLUME_SCATTER) { + pass_offset = kernel_data.film.pass_volume_scatter; + } + + if (pass_offset != PASS_UNUSED) { + film_write_pass_spectrum(buffer + pass_offset, contribution); + } +} + /* -------------------------------------------------------------------- * Shadow catcher. */ @@ -337,6 +357,7 @@ ccl_device_inline void film_write_combined_pass(KernelGlobals kg, } film_write_adaptive_buffer(kg, sample, contribution, buffer); + film_write_volume_scattering_guiding_pass(kg, buffer, path_flag, contribution); } /* Write combined pass with transparency. */ @@ -361,6 +382,7 @@ ccl_device_inline void film_write_combined_transparent_pass(KernelGlobals kg, } film_write_adaptive_buffer(kg, sample, contribution, buffer); + film_write_volume_scattering_guiding_pass(kg, buffer, path_flag, contribution); } /* Write background or emission to appropriate pass. */ @@ -575,6 +597,12 @@ ccl_device_inline void film_write_transparent(KernelGlobals kg, #ifdef __SHADOW_CATCHER__ film_write_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer); #endif + + if (path_flag & PATH_RAY_VOLUME_PRIMARY_TRANSMIT) { + kernel_assert(kernel_data.film.pass_volume_transmit != PASS_UNUSED); + film_write_pass_spectrum(buffer + kernel_data.film.pass_volume_transmit, + make_spectrum(transparent)); + } } /* Write holdout to render buffer. */ diff --git a/intern/cycles/kernel/film/read.h b/intern/cycles/kernel/film/read.h index 5d57db9e6e3..3d205ef4b82 100644 --- a/intern/cycles/kernel/film/read.h +++ b/intern/cycles/kernel/film/read.h @@ -154,6 +154,23 @@ ccl_device_inline void film_get_pass_pixel_sample_count( pixel[0] = __float_as_uint(f) * kfilm_convert->scale; } +ccl_device_inline void film_get_pass_pixel_volume_majorant( + const ccl_global KernelFilmConvert *ccl_restrict kfilm_convert, + const ccl_global float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components >= 1); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer); + + const ccl_global float *in = buffer + kfilm_convert->pass_offset; + const ccl_global float *count = buffer + kfilm_convert->pass_divide; + const float f = *in; + + pixel[0] = (*count != 0.0f) ? expf(-(f * scale_exposure) / *count) : 0.0f; +} + ccl_device_inline void film_get_pass_pixel_float(const ccl_global KernelFilmConvert *ccl_restrict kfilm_convert, const ccl_global float *ccl_restrict buffer, diff --git a/intern/cycles/kernel/film/volume_guiding_denoise.h b/intern/cycles/kernel/film/volume_guiding_denoise.h new file mode 100644 index 00000000000..2d031a47245 --- /dev/null +++ b/intern/cycles/kernel/film/volume_guiding_denoise.h @@ -0,0 +1,155 @@ +/* SPDX-FileCopyrightText: 2025 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +#include "kernel/film/write.h" + +/* Denoise volume scattering probability guiding buffers. */ + +CCL_NAMESPACE_BEGIN + +/* Two-pass Gaussian filter. */ +ccl_device void volume_guiding_filter_x(KernelGlobals kg, + ccl_global float *render_buffer, + const int y, + const int center_x, + const int min_x, + const int max_x, + const int offset, + const int stride) +{ + kernel_assert(kernel_data.film.pass_volume_scatter != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED); + + const int radius = 5; + const int filter_width = radius * 2 + 1; + + /* sigma = 1.5 with integral according to + * https://lisyarus.github.io/blog/posts/blur-coefficients-generator.html + * https://bartwronski.com/2021/10/31/practical-gaussian-filter-binomial-filter-and-small-sigma-gaussians/ + */ + const float gaussian_params[filter_width] = {0.0012273699895602f, + 0.0084674212370284f, + 0.0379843612914121f, + 0.1108921888487800f, + 0.2108379677336155f, + 0.2611813817992076f, + 0.2108379677336155f, + 0.1108921888487800f, + 0.0379843612914121f, + 0.0084674212370284f, + 0.0012273699895602f}; + + ccl_global float *buffer = film_pass_pixel_render_buffer( + kg, center_x, y, offset, stride, render_buffer); + + /* Apply Gaussian filter in x direction. */ + float3 scatter = zero_float3(), transmit = zero_float3(); + for (int dx = 0; dx < filter_width; dx++) { + const int x = center_x + dx - radius; + if (x < min_x || x >= max_x) { + /* Ignore boundary pixels. */ + continue; + } + + ccl_global float *buffer = film_pass_pixel_render_buffer( + kg, x, y, offset, stride, render_buffer); + + const float weight = gaussian_params[dx] / + __float_as_uint(buffer[kernel_data.film.pass_sample_count]); + + scatter += fabs(kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_scatter)) * + weight; + transmit += fabs(kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_transmit)) * + weight; + } + + /* Write to the buffer. */ + film_overwrite_pass_float3(buffer + kernel_data.film.pass_volume_scatter_denoised, scatter); + film_overwrite_pass_float3(buffer + kernel_data.film.pass_volume_transmit_denoised, transmit); +} + +ccl_device void volume_guiding_filter_y(KernelGlobals kg, + ccl_global float *render_buffer, + const int x, + const int min_y, + const int max_y, + const int offset, + const int stride) +{ + kernel_assert(kernel_data.film.pass_volume_scatter != PASS_UNUSED); + + const int radius = 5; + const int filter_width = radius * 2 + 1; + + const float gaussian_params[filter_width] = {0.0012273699895602f, + 0.0084674212370284f, + 0.0379843612914121f, + 0.1108921888487800f, + 0.2108379677336155f, + 0.2611813817992076f, + 0.2108379677336155f, + 0.1108921888487800f, + 0.0379843612914121f, + 0.0084674212370284f, + 0.0012273699895602f}; + + /* Store neighboring values to avoid overwriting. */ + float3 scatter_neighbors[filter_width], transmit_neighbors[filter_width]; + + /* Initialze neighbors. */ + for (int i = 0; i < filter_width; i++) { + const int y = min_y + i; + if (i >= radius || y < min_y || y >= max_y) { + /* Out-of-boundary neighbors are initialized with zero. */ + scatter_neighbors[i] = transmit_neighbors[i] = zero_float3(); + } + else { + ccl_global float *buffer = film_pass_pixel_render_buffer( + kg, x, y, offset, stride, render_buffer); + scatter_neighbors[i] = kernel_read_pass_float3( + buffer + kernel_data.film.pass_volume_scatter_denoised); + transmit_neighbors[i] = kernel_read_pass_float3( + buffer + kernel_data.film.pass_volume_transmit_denoised); + } + } + + /* Apply Gaussian filter in y direction. */ + int index = radius; + for (int y = min_y; y < max_y; y++) { + /* Fetch the furthest neighbor to the right. */ + const int next_y = y + radius; + if (next_y < min_y || next_y >= max_y) { + scatter_neighbors[index] = zero_float3(); + transmit_neighbors[index] = zero_float3(); + } + else { + ccl_global float *buffer = film_pass_pixel_render_buffer( + kg, x, next_y, offset, stride, render_buffer); + scatter_neighbors[index] = kernel_read_pass_float3( + buffer + kernel_data.film.pass_volume_scatter_denoised); + transmit_neighbors[index] = kernel_read_pass_float3( + buffer + kernel_data.film.pass_volume_transmit_denoised); + } + + /* Slide the kernel to the right. */ + index = (index + 1) % filter_width; + + /* Apply convolution. */ + float3 scatter = zero_float3(), transmit = zero_float3(); + for (int i = 0; i < filter_width; i++) { + scatter += gaussian_params[i] * scatter_neighbors[(index + i) % filter_width]; + transmit += gaussian_params[i] * transmit_neighbors[(index + i) % filter_width]; + } + + /* Write to the buffers. */ + ccl_global float *buffer = film_pass_pixel_render_buffer( + kg, x, y, offset, stride, render_buffer); + film_overwrite_pass_float3(buffer + kernel_data.film.pass_volume_scatter_denoised, scatter); + film_overwrite_pass_float3(buffer + kernel_data.film.pass_volume_transmit_denoised, transmit); + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/write.h b/intern/cycles/kernel/film/write.h index 704b78b6bb4..23ac0428075 100644 --- a/intern/cycles/kernel/film/write.h +++ b/intern/cycles/kernel/film/write.h @@ -39,6 +39,18 @@ ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer_shadow( return render_buffer + render_buffer_offset; } +ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer( + KernelGlobals kg, + const int x, + const int y, + const int offset, + const int stride, + ccl_global float *ccl_restrict render_buffer) +{ + const int render_pixel_index = offset + x + y * stride; + return render_buffer + (uint64_t)render_pixel_index * kernel_data.film.pass_stride; +} + /* Accumulate in passes. */ ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, @@ -120,7 +132,7 @@ ccl_device_inline float kernel_read_pass_float(const ccl_global float *ccl_restr return *buffer; } -ccl_device_inline float3 kernel_read_pass_float3(ccl_global float *ccl_restrict buffer) +ccl_device_inline float3 kernel_read_pass_float3(const ccl_global float *ccl_restrict buffer) { return make_float3(buffer[0], buffer[1], buffer[2]); } diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 138e8fd8da5..4977766d4bb 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -236,7 +236,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( integrator_path_next(state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); } else { - integrator_path_terminate(state, current_kernel); + integrator_path_terminate(kg, state, render_buffer, current_kernel); } return; } @@ -276,14 +276,14 @@ ccl_device_forceinline void integrator_intersect_next_kernel( #endif } else { - integrator_path_terminate(state, current_kernel); + integrator_path_terminate(kg, state, render_buffer, current_kernel); } } } else { /* Nothing hit, continue with background kernel. */ if (integrator_intersect_skip_lights(kg, state)) { - integrator_path_terminate(state, current_kernel); + integrator_path_terminate(kg, state, render_buffer, current_kernel); } else { integrator_path_next(state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); @@ -338,7 +338,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( } /* Nothing hit, continue with background kernel. */ if (integrator_intersect_skip_lights(kg, state)) { - integrator_path_terminate(state, current_kernel); + integrator_path_terminate(kg, state, render_buffer, current_kernel); } else { integrator_path_next(state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); diff --git a/intern/cycles/kernel/integrator/intersect_subsurface.h b/intern/cycles/kernel/integrator/intersect_subsurface.h index d826d1d4f37..b1e58d7aab1 100644 --- a/intern/cycles/kernel/integrator/intersect_subsurface.h +++ b/intern/cycles/kernel/integrator/intersect_subsurface.h @@ -18,7 +18,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat } #endif - integrator_path_terminate(state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_terminate(kg, state, nullptr, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h index 9e41c8731d3..d0b194b943b 100644 --- a/intern/cycles/kernel/integrator/path_state.h +++ b/intern/cycles/kernel/integrator/path_state.h @@ -62,6 +62,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX; INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f; INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; + INTEGRATOR_STATE_WRITE(state, path, optical_depth) = 0.0f; #if defined(__PATH_GUIDING__) if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) { INTEGRATOR_STATE_WRITE(state, path, unguided_throughput) = 1.0f; @@ -159,6 +160,10 @@ ccl_device_inline void path_state_next(KernelGlobals kg, if (volume_bounce >= kernel_data.integrator.max_volume_bounce) { flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; } + + if (bounce == 1) { + flag &= ~PATH_RAY_VOLUME_PRIMARY_TRANSMIT; + } } else #endif diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index 23baf798a10..f15aa7c5a2e 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -204,7 +204,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg, } #endif - integrator_path_terminate(state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_terminate(kg, state, render_buffer, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h index c8aa4fc2fe4..fd6e8f1ac75 100644 --- a/intern/cycles/kernel/integrator/shade_light.h +++ b/intern/cycles/kernel/integrator/shade_light.h @@ -80,7 +80,7 @@ ccl_device void integrator_shade_light(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce; if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { - integrator_path_terminate(state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_terminate(kg, state, render_buffer, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index ee562bb8c6f..50147b3fe0c 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -847,7 +847,7 @@ ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg, { const int continue_path_label = integrate_surface(kg, state, render_buffer); if (continue_path_label == LABEL_NONE) { - integrator_path_terminate(state, current_kernel); + integrator_path_terminate(kg, state, render_buffer, current_kernel); return; } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 4c2c5954d14..dec61cd7eb0 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -458,19 +458,13 @@ ccl_device_inline bool volume_octree_advance(KernelGlobals kg, const IntegratorGenericState state, const ccl_private RNGState *rng_state, const uint32_t path_flag, - ccl_private OctreeTracing &octree, - const int step) + ccl_private OctreeTracing &octree) { if (octree.t.max >= ray->tmax) { /* Reached the last segment. */ return false; } - if (step >= VOLUME_MAX_STEPS) { - /* Exceeds maximal steps. */ - return false; - } - if (octree.next_scale > MANTISSA_BITS) { if (fabsf(octree.t.max - ray->tmax) <= OVERLAP_EXP) { /* This could happen due to numerical issues, when the bounding box overlaps with a @@ -530,7 +524,7 @@ ccl_device_inline bool volume_octree_advance_shadow(KernelGlobals kg, const float tmin = octree.t.min; while (octree.t.is_empty() || sigma.range() * octree.t.length() < 1.0f) { - if (!volume_octree_advance(kg, ray, sd, state, rng_state, path_flag, octree, 0)) { + if (!volume_octree_advance(kg, ray, sd, state, rng_state, path_flag, octree)) { return !octree.t.is_empty(); } @@ -766,24 +760,37 @@ ccl_device_inline bool volume_valid_direct_ray_segment(KernelGlobals kg, /* Volume Integration */ struct VolumeIntegrateState { - /* Random numbers for scattering. */ + /* Random number. */ float rscatter; - float rchannel; - /* Multiple importance sampling. */ + /* Method used for sampling direct scatter position. */ VolumeSampleMethod direct_sample_method; - bool use_mis; /* Probability of sampling the scatter position using null scattering. */ float distance_pdf; /* Probability of sampling the scatter position using equiangular sampling. */ float equiangular_pdf; + /* Majorant density at the equiangular scatter position. Used to compute the pdf. */ + float sigma_max; /* Ratio tracking estimator of the volume transmittance, with MIS applied. */ float transmittance; + /* Current shading position. */ + float t; + /* Majorant optical depth until now. */ + float optical_depth; /* Steps taken while tracking. Should not exceed `VOLUME_MAX_STEPS`. */ - int step; + uint16_t step; + /* Multiple importance sampling. */ + bool use_mis; - bool stop; + /* Volume scattering probability guiding. */ + bool vspg; + /* The guided probability that the ray is scattered in the volume. `P_vol` in the paper. */ + float scatter_prob; + /* Minimal scale of majorant for achieving the desired scatter probability. */ + float majorant_scale; + /* Scale to apply after direct throughput due to Russian Roulette. */ + float direct_rr_scale; /* Extra fields for path guiding and denoising. */ Spectrum emission; @@ -792,14 +799,432 @@ struct VolumeIntegrateState { # endif }; -ccl_device bool volume_integrate_should_stop(const ccl_private VolumeIntegrateResult &result, - const ccl_private VolumeIntegrateState &vstate) +/* Accumulate transmittance for equiangular distance sampling without MIS. Using telescoping to + * reduce noise. */ +ccl_device_inline void volume_equiangular_transmittance( + KernelGlobals kg, + const IntegratorState state, + const ccl_private Ray *ccl_restrict ray, + const ccl_private Extrema &sigma, + const ccl_private Interval &interval, + ccl_private ShaderData *ccl_restrict sd, + const ccl_private RNGState *rng_state, + const ccl_private VolumeIntegrateState &ccl_restrict vstate, + ccl_private VolumeIntegrateResult &ccl_restrict result) { - if (result.indirect_scatter && result.direct_scatter) { + if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR || vstate.use_mis || + result.direct_scatter) + { + return; + } + + Interval t; + if (interval.contains(result.direct_t)) { + /* Compute transmittance until the direct scatter position. */ + t = {interval.min, result.direct_t}; + result.direct_scatter = true; + } + else { + /* Compute transmittance of the whole segment. */ + t = interval; + } + + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + result.direct_throughput *= volume_transmittance( + kg, state, ray, sd, sigma.range(), t, rng_state, path_flag); +} + +/* Sample the next candidate indirect scatter position following exponential distribution, + * and compute the direct throughput for equiangular sampling if using MIS. + * Returns true if should continue advancing. */ +ccl_device_inline bool volume_indirect_scatter_advance(const ccl_private OctreeTracing &octree, + const bool equiangular, + ccl_private float &residual_optical_depth, + ccl_private VolumeIntegrateState &vstate, + ccl_private VolumeIntegrateResult &result) +{ + const float sigma_max = octree.sigma.max * vstate.majorant_scale; + residual_optical_depth = (octree.t.max - vstate.t) * sigma_max; + if (sigma_max == 0.0f) { return true; } - return vstate.stop; + vstate.t += sample_exponential_distribution(vstate.rscatter, 1.0f / sigma_max); + + const bool segment_has_equiangular = equiangular && octree.t.contains(result.direct_t); + if (segment_has_equiangular && vstate.t > result.direct_t && !result.direct_scatter) { + /* Stepped beyond the equiangular scatter position, compute direct throughput. */ + result.direct_scatter = true; + result.direct_throughput = result.indirect_throughput * vstate.transmittance * + vstate.direct_rr_scale; + vstate.distance_pdf = vstate.transmittance * sigma_max; + vstate.sigma_max = sigma_max; + } + + /* Sampled a position outside the current voxel. */ + return vstate.t > octree.t.max; +} + +/* Adavance to the next candidate indirect scatter position, and compute the direct throughput. */ +ccl_device_inline bool volume_integrate_advance(KernelGlobals kg, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + const IntegratorState state, + ccl_private RNGState *rng_state, + const uint32_t path_flag, + ccl_private OctreeTracing &octree, + ccl_private VolumeIntegrateState &vstate, + ccl_private VolumeIntegrateResult &result) +{ + if (vstate.step++ > VOLUME_MAX_STEPS) { + /* Exceeds maximal steps. */ + return false; + } + + float residual_optical_depth; + vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE); + const bool equiangular = (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) && + vstate.use_mis; + + while ( + volume_indirect_scatter_advance(octree, equiangular, residual_optical_depth, vstate, result)) + { + /* Advance to the next voxel if the sampled distance is beyond the current voxel. */ + if (!volume_octree_advance(kg, ray, sd, state, rng_state, path_flag, octree)) { + return false; + } + + vstate.optical_depth += octree.sigma.max * octree.t.length(); + vstate.t = octree.t.min; + volume_equiangular_transmittance( + kg, state, ray, octree.sigma, octree.t, sd, rng_state, vstate, result); + + /* Scale the random number by the residual depth for reusing. */ + vstate.rscatter = saturatef(1.0f - (1.0f - vstate.rscatter) * expf(residual_optical_depth)); + } + + /* Advance random number offset. */ + rng_state->rng_offset += PRNG_BOUNCE_NUM; + + return true; +} + +/* -------------------------------------------------------------------- */ +/** \name Volume Scattering Probability Guiding + * + * Following https://kehanxuuu.github.io/vspg-website/ by Kehan Xu et. al. + * + * Instead of stopping at the first real scatter event, we step through the entire ray to gather + * candidate scatter positions, and guide the probability of scattering inside a volume or + * transmitting through the volume by the contribution of both types of events. + * + * We only guide primary rays, secondary rays could be supported in the OpenPGL in the future. + * \{ */ + +/* Candidate scatter position for VSPG. */ +struct VolumeSampleCandidate { + PackedSpectrum emission; + float t; + PackedSpectrum throughput; + float distance_pdf; +# ifdef __DENOISING_FEATURES__ + PackedSpectrum albedo; +# endif + /* Remember the random number so that we sample the sample point for stochastic evaluation. */ + uint lcg_state; +}; + +/* Sample reservoir for VSPG. */ +struct VolumeSampleReservoir { + float total_weight = 0.0f; + float rand; + VolumeSampleCandidate candidate; + + ccl_device_inline_method VolumeSampleReservoir(const float rand_) : rand(rand_) {} + + /* Stream the candidate samples through the reservoir. */ + ccl_device_inline_method void add_sample(const float weight, + const VolumeSampleCandidate new_candidate) + { + if (!(weight > 0.0f)) { + return; + } + + total_weight += weight; + const float thresh = weight / total_weight; + + if ((rand <= thresh) || (total_weight == weight)) { + /* Explicitly select the first candidate in case of numerical issues. */ + candidate = new_candidate; + rand /= thresh; + } + else { + rand = (rand - thresh) / (1.0f - thresh); + } + + /* Ensure the `rand` is always within 0..1 range, which could be violated above when + * `-ffast-math` is used. */ + rand = saturatef(rand); + } + + ccl_device_inline_method bool is_empty() const + { + return total_weight == 0.0f; + } +}; + +/* Estimate volume majorant optical depth `\sum\sigma_{max}t` along the ray, by accumulating the + * result from previous samples in a render buffer. */ +ccl_device_inline float volume_majorant_optical_depth(KernelGlobals kg, + const ccl_global float *buffer) +{ + kernel_assert(kernel_data.film.pass_volume_majorant != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_volume_majorant_sample_count != PASS_UNUSED); + + const ccl_global float *accumulated_optical_depth = buffer + + kernel_data.film.pass_volume_majorant; + const ccl_global float *count = buffer + kernel_data.film.pass_volume_majorant_sample_count; + + /* Assume `FLT_MAX` when we have no information of the optical depth. */ + return (*count == 0.0f) ? FLT_MAX : *accumulated_optical_depth / *count; +} + +/* Compute guided volume scatter probability and the majorant scale needed for achieving the + * scatter probability, for heterogeneous volume. */ +ccl_device_inline void volume_scatter_probability_get(KernelGlobals kg, + const IntegratorState state, + ccl_global float *ccl_restrict render_buffer, + ccl_private VolumeIntegrateState &vstate) +{ + /* Only guide primary rays. */ + vstate.vspg = (INTEGRATOR_STATE(state, path, bounce) == 0); + + if (!vstate.vspg) { + vstate.scatter_prob = 1.0f; + vstate.majorant_scale = 1.0f; + return; + } + + const ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + + kernel_assert(kernel_data.film.pass_volume_scatter_denoised != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_volume_transmit_denoised != PASS_UNUSED); + + /* Contribution based criterion, see Eq. (15). */ + const float L_scattered = reduce_add( + kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_scatter_denoised)); + const float L_transmitted = reduce_add( + kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_transmit_denoised)); + const float L_volume = L_transmitted + L_scattered; + + /* Compute guided scattering probability. */ + if (L_volume == 0.0f) { + /* Equal probability if no information gathered yet. */ + vstate.scatter_prob = 0.5f; + } + else { + /* Exponential distribution has non-zero probability beyond the boundary, so the scatter + * probability can never reach 1. Clamp to avoid scaling the majorant to infinity. */ + vstate.scatter_prob = fminf(L_scattered / L_volume, 0.9999f); + } + + const float optical_depth = volume_majorant_optical_depth(kg, buffer); + + /* There is a non-zero probability of sampling no scatter events in the volume segment. In order + * to reach the desired scattering probability, we might need to upscale the majorant and/or the + * guiding scattering probability. See Eq (25,26). */ + vstate.majorant_scale = (optical_depth == 0.0f) ? + 1.0f : + -fast_logf(1.0f - vstate.scatter_prob) / optical_depth; + if (vstate.majorant_scale < 1.0f) { + vstate.majorant_scale = 1.0f; + vstate.scatter_prob = safe_divide(vstate.scatter_prob, 1.0f - fast_expf(-optical_depth)); + } + else { + vstate.scatter_prob = 1.0f; + } +} + +/* Final guiding decision on sampling scatter or transmit event. */ +ccl_device_inline void volume_distance_sampling_finalize( + KernelGlobals kg, + const IntegratorState state, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + ccl_private VolumeIntegrateState &ccl_restrict vstate, + ccl_private VolumeIntegrateResult &ccl_restrict result, + ccl_private VolumeSampleReservoir &reservoir) +{ + if (reservoir.is_empty()) { + return; + } + + const bool sample_distance = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE) && + (vstate.direct_sample_method == VOLUME_SAMPLE_DISTANCE); + + if (!vstate.vspg) { + result.indirect_throughput = reservoir.candidate.throughput; + vstate.emission = reservoir.candidate.emission; +# ifdef __DENOISING_FEATURES__ + vstate.albedo = reservoir.candidate.albedo; +# endif + result.indirect_t = reservoir.candidate.t; + + if (sample_distance) { + /* If using distance sampling for direct light, just copy parameters of indirect light + * since we scatter at the same point. */ + result.direct_scatter = true; + result.direct_t = result.indirect_t; + result.direct_throughput = result.indirect_throughput; + if (vstate.use_mis) { + vstate.distance_pdf = reservoir.candidate.distance_pdf; + } + } + return; + } + + const uint lcg_state = reservoir.candidate.lcg_state; + + if (sample_distance) { + /* Always sample direct scatter, regardless of indirect scatter guiding decision. */ + result.direct_throughput = reservoir.candidate.throughput * reservoir.total_weight; + vstate.distance_pdf = reservoir.candidate.distance_pdf; + } + + /* We only guide scatter decisions, no need to apply on emission and albedo. */ + vstate.emission = mix(vstate.emission, reservoir.candidate.emission, reservoir.total_weight); +# ifdef __DENOISING_FEATURES__ + vstate.albedo = mix(vstate.albedo, reservoir.candidate.albedo, reservoir.total_weight); +# endif + + const float unguided_scatter_prob = reservoir.total_weight; + float guided_scatter_prob; + if (is_zero(result.indirect_throughput)) { + /* Always sample scatter event if the contribution of transmitted event is zero. */ + guided_scatter_prob = 1.0f; + } + else { + /* Defensive resampling. */ + const float alpha = 0.75f; + reservoir.total_weight = mix(reservoir.total_weight, vstate.scatter_prob, alpha); + guided_scatter_prob = reservoir.total_weight; + + /* Add transmitted candidate. */ + reservoir.add_sample( + 1.0f - guided_scatter_prob, +# ifdef __DENOISING_FEATURES__ + {vstate.emission, reservoir.candidate.t, result.indirect_throughput, 0.0f, vstate.albedo} +# else + {vstate.emission, reservoir.candidate.t, result.indirect_throughput, 0.0f} +# endif + ); + } + + const bool scatter = (reservoir.candidate.distance_pdf > 0.0f); + const float scale = scatter ? unguided_scatter_prob / guided_scatter_prob : + (1.0f - unguided_scatter_prob) / (1.0f - guided_scatter_prob); + result.indirect_throughput = reservoir.candidate.throughput * scale; + + if (!scatter && !sample_distance) { + /* No scatter event sampled. */ + return; + } + + /* Recover the volume coefficients at the scatter position. */ + sd->P = ray->P + ray->D * reservoir.candidate.t; + sd->lcg_state = lcg_state; + VolumeShaderCoefficients coeff ccl_optional_struct_init; + if (!volume_shader_sample(kg, state, sd, &coeff)) { + kernel_assert(false); + return; + } + + kernel_assert(sd->flag & SD_SCATTER); + if (sample_distance) { + /* Direct scatter. */ + result.direct_scatter = true; + result.direct_t = reservoir.candidate.t; + volume_shader_copy_phases(&result.direct_phases, sd); + } + + if (scatter) { + /* Indirect scatter. */ + result.indirect_scatter = true; + result.indirect_t = reservoir.candidate.t; + volume_shader_copy_phases(&result.indirect_phases, sd); + } +} + +/** \} */ + +ccl_device bool volume_integrate_should_stop(const ccl_private VolumeIntegrateResult &result) +{ + if (is_zero(result.indirect_throughput) && is_zero(result.direct_throughput)) { + /* Stopped during Russian Roulette. */ + return true; + } + + /* If we have scattering data for both direct and indirect, we're done. */ + return (result.direct_scatter && result.indirect_scatter); +} + +/* Perform Russian Roulette termination to avoid drawing too many samples for indirect scatter, but + * only if both direct and indirect scatter positions are available, or if no scattering is needed. + */ +ccl_device_inline bool volume_russian_roulette_termination( + const IntegratorState state, + ccl_private VolumeSampleReservoir &reservoir, + ccl_private VolumeIntegrateResult &ccl_restrict result, + ccl_private VolumeIntegrateState &ccl_restrict vstate) +{ + if (result.direct_scatter && result.indirect_scatter) { + return true; + } + + const float thresh = reduce_max(fabs(result.indirect_throughput)); + if (thresh > 0.05f) { + /* Only stop if contribution is low enough. */ + return false; + } + + /* Whether equiangular estimator of the direct throughput depends on the indirect throughput. */ + const bool equiangular = (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) && + vstate.use_mis && !result.direct_scatter; + /* Whether both indirect and direct scatter are possible. */ + const bool has_scatter_samples = !reservoir.is_empty() && !equiangular; + /* The path is to be terminated, no scatter position is needed along the ray. */ + const bool absorption_only = INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE; + + /* Randomly stop indirect scatter. */ + if (absorption_only || has_scatter_samples) { + if (reservoir.rand > thresh) { + result.indirect_throughput = zero_spectrum(); + if (equiangular || (vstate.direct_sample_method == VOLUME_SAMPLE_DISTANCE)) { + /* Direct throughput depends on the indirect throughput, set to 0 for early termination. */ + result.direct_throughput = zero_spectrum(); + } + return true; + } + + reservoir.rand = saturatef(reservoir.rand / thresh); + result.indirect_throughput /= thresh; + } + + /* Randomly stop direct scatter. */ + if (equiangular) { + if (reservoir.rand > thresh) { + result.direct_scatter = true; + result.direct_throughput = zero_spectrum(); + reservoir.rand = (reservoir.rand - thresh) / (1.0f - thresh); + } + else { + reservoir.rand /= thresh; + vstate.direct_rr_scale /= thresh; + } + reservoir.rand = saturatef(reservoir.rand); + } + + return false; } /* -------------------------------------------------------------------- */ @@ -854,8 +1279,65 @@ ccl_device_inline float volume_scatter_probability( return dot(coeff.sigma_s / sigma_c, channel_pdf); } +/* Decide between real and null scatter events at the current position. */ +ccl_device_inline void volume_sample_indirect_scatter( + const float sigma_max, + const float prob_s, + const Spectrum sigma_s, + ccl_private ShaderData *ccl_restrict sd, + ccl_private VolumeIntegrateState &ccl_restrict vstate, + ccl_private VolumeIntegrateResult &ccl_restrict result, + const uint lcg_state, + ccl_private VolumeSampleReservoir &reservoir) +{ + const float weight = vstate.transmittance * prob_s; + const Spectrum throughput = result.indirect_throughput * sigma_s / (prob_s * sigma_max); + + if (vstate.vspg) { + /* If we guide the scatter probability, simply put the candidate in the reservoir. */ + reservoir.add_sample( +# ifdef __DENOISING_FEATURES__ + weight, + {vstate.emission, vstate.t, throughput, weight * sigma_max, vstate.albedo, lcg_state} +# else + weight, {vstate.emission, vstate.t, throughput, weight * sigma_max, lcg_state} +# endif + ); + } + else if (!result.indirect_scatter) { + /* If no guiding and indirect scatter position has not been found, decide between real and null + * scatter events. */ + if (reservoir.rand <= prob_s) { + /* Rescale random number for reusing. */ + reservoir.rand /= prob_s; + + /* Sampled scatter event. */ + result.indirect_scatter = true; + volume_shader_copy_phases(&result.indirect_phases, sd); + reservoir.add_sample( +# ifdef __DENOISING_FEATURES__ + weight, + {vstate.emission, vstate.t, throughput, weight * sigma_max, vstate.albedo, lcg_state} +# else + weight, {vstate.emission, vstate.t, throughput, weight * sigma_max, lcg_state} +# endif + ); + + if (vstate.direct_sample_method == VOLUME_SAMPLE_DISTANCE) { + result.direct_scatter = true; + volume_shader_copy_phases(&result.direct_phases, sd); + } + } + else { + /* Rescale random number for reusing. */ + reservoir.rand = (reservoir.rand - prob_s) / (1.0f - prob_s); + } + reservoir.rand = saturatef(reservoir.rand); + } +} + /** - * Sample indirect scatter position along the ray based on weighted delta tracking, from + * Integrate volume based on weighted delta tracking, from * [Spectral and Decomposition Tracking for Rendering Heterogeneous Volumes] * (https://disneyanimation.com/publications/spectral-and-decomposition-tracking-for-rendering-heterogeneous-volumes) * by Peter Kutz et. al. @@ -869,200 +1351,100 @@ ccl_device_inline float volume_scatter_probability( * - If ξ < sigma_s / (sigma_s + |sigma_n|), we sample scatter event and evaluate L_s. * - Otherwise, no real collision happens and we continue the recursive process. * The emission L_e is evaluated at each step. - * - * \param sigma_max: majorant volume density inside the current octree node - * \param interval: interval of t along the ray. */ -ccl_device void volume_sample_indirect_scatter( - KernelGlobals kg, - const IntegratorState state, - const ccl_private Ray *ccl_restrict ray, - ccl_private ShaderData *ccl_restrict sd, - const float sigma_max, - const Interval interval, - ccl_private RNGState *rng_state, - ccl_private VolumeIntegrateState &ccl_restrict vstate, - ccl_private VolumeIntegrateResult &ccl_restrict result) -{ - if (result.indirect_scatter) { - /* Already sampled indirect scatter position. */ - return; - } - - /* Initialization. */ - float t = interval.min; - const float inv_maj = (sigma_max == 0.0f) ? FLT_MAX : 1.0f / sigma_max; - const bool segment_has_equiangular = vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && - interval.contains(result.direct_t) && vstate.use_mis; - bool direct_scatter = false; - while (vstate.step++ < VOLUME_MAX_STEPS) { - if (reduce_max(fabs(result.indirect_throughput)) < VOLUME_THROUGHPUT_EPSILON) { - /* TODO(weizhen): terminate using Russian Roulette. */ - /* TODO(weizhen): deal with negative transmittance. */ - /* TODO(weizhen): should we stop if direct_scatter not yet found? */ - vstate.stop = true; - result.indirect_throughput = zero_spectrum(); - return; - } - - /* Generate the next distance using random walk. */ - const float rand = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE); - t += sample_exponential_distribution(rand, inv_maj); - - /* Advance random number offset. */ - rng_state->rng_offset += PRNG_BOUNCE_NUM; - - if (segment_has_equiangular && t > result.direct_t && !direct_scatter) { - /* Stepped beyond the equiangular scatter position, compute direct throughput. */ - direct_scatter = true; - result.direct_throughput = result.indirect_throughput * vstate.transmittance; - vstate.distance_pdf = vstate.transmittance * sigma_max; - } - - if (t > interval.max) { - break; - } - - sd->P = ray->P + ray->D * t; - VolumeShaderCoefficients coeff ccl_optional_struct_init; - if (!volume_shader_sample(kg, state, sd, &coeff)) { - continue; - } - - /* Emission. */ - if (sd->flag & SD_EMISSION) { - /* Emission = inv_sigma * (L_e + sigma_n * (inv_sigma * (L_e + sigma_n * ···))). */ - const Spectrum emission = inv_maj * coeff.emission; - vstate.emission += result.indirect_throughput * emission; - guiding_record_volume_emission(kg, state, emission); - } - - /* Null scattering coefficients. */ - const Spectrum sigma_n = volume_null_event_coefficients(kg, coeff, sigma_max); - - if (reduce_add(coeff.sigma_s) == 0.0f) { - /* Absorption only. Deterministically choose null scattering and estimate the transmittance - * of the current ray segment. */ - result.indirect_throughput *= sigma_n * inv_maj; - continue; - } - -# ifdef __DENOISING_FEATURES__ - if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES) { - /* Albedo = inv_sigma * (sigma_s + sigma_n * (inv_sigma * (sigma_s + sigma_n * ···))). */ - vstate.albedo += result.indirect_throughput * coeff.sigma_s * inv_maj; - } -# endif - - const float prob_s = volume_scatter_probability(coeff, sigma_n, result.indirect_throughput); - if (vstate.rchannel < prob_s) { - /* Sampled scatter event. */ - result.indirect_throughput *= coeff.sigma_s * inv_maj / prob_s; - result.indirect_t = t; - result.indirect_scatter = true; - volume_shader_copy_phases(&result.indirect_phases, sd); - - if (vstate.direct_sample_method == VOLUME_SAMPLE_DISTANCE) { - /* If using distance sampling for direct light, just copy parameters of indirect light - * since we scatter at the same point. */ - result.direct_scatter = true; - result.direct_t = result.indirect_t; - result.direct_throughput = result.indirect_throughput; - volume_shader_copy_phases(&result.direct_phases, sd); - if (vstate.use_mis) { - vstate.distance_pdf = vstate.transmittance * prob_s * sigma_max; - } - } - return; - } - - /* Null scattering. Accumulate weight and continue. */ - const float prob_n = 1.0f - prob_s; - result.indirect_throughput *= sigma_n * inv_maj / prob_n; - - if (vstate.use_mis) { - vstate.transmittance *= prob_n; - } - - /* Rescale random number for reusing. */ - vstate.rchannel = (vstate.rchannel - prob_s) / prob_n; - } - - /* No scatter event sampled in the interval. */ -} - -/* Throughput and pdf for equiangular sampling. - * If MIS is used with transmittance-based distance sampling, we compute the direct throughput from - * the indirect throughput in the function above. Otherwise, we use telescoping for higher quality. - */ -ccl_device_inline void volume_equiangular_direct_scatter( - KernelGlobals kg, - const IntegratorState state, - const ccl_private Ray *ccl_restrict ray, - const ccl_private Extrema &sigma, - const ccl_private Interval &t, - ccl_private ShaderData *ccl_restrict sd, - ccl_private RNGState *rng_state, - ccl_private VolumeIntegrateState &vstate, - ccl_private VolumeIntegrateResult &ccl_restrict result) -{ - if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) { - return; - } - - const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); - - if (t.contains(result.direct_t)) { - /* Equiangular scatter position is inside the current segment. */ - sd->P = ray->P + ray->D * result.direct_t; - VolumeShaderCoefficients coeff ccl_optional_struct_init; - if (volume_shader_sample(kg, state, sd, &coeff) && (sd->flag & SD_SCATTER)) { - volume_shader_copy_phases(&result.direct_phases, sd); - result.direct_scatter = true; - - if (vstate.use_mis) { - /* Compute distance pdf for multiple importance sampling. */ - const Spectrum sigma_n = volume_null_event_coefficients(kg, coeff, sigma.max); - - vstate.distance_pdf *= volume_scatter_probability( - coeff, sigma_n, result.direct_throughput); - } - else { - /* Compute transmittance until the direct scatter position. */ - const Interval t_ = {t.min, result.direct_t}; - result.direct_throughput *= volume_transmittance( - kg, state, ray, sd, sigma.range(), t_, rng_state, path_flag); - } - - result.direct_throughput *= coeff.sigma_s / vstate.equiangular_pdf; - } - } - else if (result.direct_t > t.max && !vstate.use_mis) { - /* Accumulate transmittance. */ - result.direct_throughput *= volume_transmittance( - kg, state, ray, sd, sigma.range(), t, rng_state, path_flag); - } -} - -/* Find direct and indirect scatter positions inside the current active octree leaf node. */ ccl_device void volume_integrate_step_scattering( KernelGlobals kg, const IntegratorState state, const ccl_private Ray *ccl_restrict ray, - const ccl_private Extrema &sigma, - const ccl_private Interval &interval, + const float sigma_max, ccl_private ShaderData *ccl_restrict sd, - ccl_private RNGState *rng_state, ccl_private VolumeIntegrateState &ccl_restrict vstate, + ccl_private VolumeIntegrateResult &ccl_restrict result, + ccl_private VolumeSampleReservoir &reservoir) +{ + if (volume_russian_roulette_termination(state, reservoir, result, vstate)) { + return; + } + + sd->P = ray->P + ray->D * vstate.t; + VolumeShaderCoefficients coeff ccl_optional_struct_init; + const uint lcg_state = sd->lcg_state; + if (!volume_shader_sample(kg, state, sd, &coeff)) { + return; + } + + kernel_assert(sigma_max != 0.0f); + const float inv_maj = 1.0f / sigma_max; + + /* Emission. */ + if (sd->flag & SD_EMISSION) { + /* Emission = inv_sigma * (L_e + sigma_n * (inv_sigma * (L_e + sigma_n * ···))). */ + const Spectrum emission = inv_maj * coeff.emission; + vstate.emission += result.indirect_throughput * emission; + if (!result.indirect_scatter) { + /* Record emission until scatter position. */ + guiding_record_volume_emission(kg, state, emission); + } + } + + /* Null scattering coefficients. */ + const Spectrum sigma_n = volume_null_event_coefficients(kg, coeff, sigma_max); + + if (reduce_add(coeff.sigma_s) == 0.0f) { + /* Absorption only. Deterministically choose null scattering and estimate the transmittance + * of the current ray segment. */ + result.indirect_throughput *= sigma_n * inv_maj; + return; + } + +# ifdef __DENOISING_FEATURES__ + if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES) { + /* Albedo = inv_sigma * (sigma_s + sigma_n * (inv_sigma * (sigma_s + sigma_n * ···))). */ + vstate.albedo += result.indirect_throughput * coeff.sigma_s * inv_maj; + } +# endif + + /* Indirect scatter. */ + const float prob_s = volume_scatter_probability(coeff, sigma_n, result.indirect_throughput); + volume_sample_indirect_scatter( + sigma_max, prob_s, coeff.sigma_s, sd, vstate, result, lcg_state, reservoir); + + /* Null scattering. Accumulate weight and continue. */ + const float prob_n = 1.0f - prob_s; + result.indirect_throughput *= safe_divide(sigma_n * inv_maj, prob_n); + vstate.transmittance *= prob_n; +} + +/* Evaluate coefficients at the equiangular scatter position, and update the direct throughput. */ +ccl_device_inline void volume_equiangular_direct_scatter( + KernelGlobals kg, + const IntegratorState state, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + ccl_private VolumeIntegrateState &vstate, ccl_private VolumeIntegrateResult &ccl_restrict result) { - /* Distance sampling for indirect and optional direct lighting. */ - volume_sample_indirect_scatter( - kg, state, ray, sd, sigma.max, interval, rng_state, vstate, result); + if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR || !result.direct_scatter) { + return; + } - /* Equiangular sampling for direct lighting. */ - volume_equiangular_direct_scatter( - kg, state, ray, sigma, interval, sd, rng_state, vstate, result); + sd->P = ray->P + ray->D * result.direct_t; + VolumeShaderCoefficients coeff ccl_optional_struct_init; + if (volume_shader_sample(kg, state, sd, &coeff) && (sd->flag & SD_SCATTER)) { + volume_shader_copy_phases(&result.direct_phases, sd); + + if (vstate.use_mis) { + /* Compute distance pdf for multiple importance sampling. */ + const Spectrum sigma_n = volume_null_event_coefficients(kg, coeff, vstate.sigma_max); + vstate.distance_pdf *= volume_scatter_probability(coeff, sigma_n, result.direct_throughput); + } + + result.direct_throughput *= coeff.sigma_s / vstate.equiangular_pdf; + } + else { + /* Scattering coefficient is zero at the sampled position. */ + result.direct_scatter = false; + } } /* Multiple Importance Sampling between equiangular sampling and distance sampling. @@ -1103,7 +1485,7 @@ ccl_device_inline void volume_direct_scatter_mis( const ccl_private EquiangularCoefficients &equiangular_coeffs, ccl_private VolumeIntegrateResult &ccl_restrict result) { - if (!vstate.use_mis || vstate.direct_sample_method == VOLUME_SAMPLE_NONE) { + if (!vstate.use_mis || !result.direct_scatter) { return; } @@ -1120,23 +1502,28 @@ ccl_device_inline void volume_direct_scatter_mis( result.direct_throughput *= 2.0f * mis_weight; } +/** \} */ + ccl_device_inline void volume_integrate_state_init(KernelGlobals kg, + const IntegratorState state, const VolumeSampleMethod direct_sample_method, + ccl_global float *ccl_restrict render_buffer, + const ccl_private OctreeTracing &octree, const ccl_private RNGState *rng_state, ccl_private VolumeIntegrateState &vstate) { vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE); - vstate.rchannel = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_COLOR_CHANNEL); /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */ vstate.direct_sample_method = direct_sample_method; vstate.use_mis = (direct_sample_method == VOLUME_SAMPLE_MIS); if (vstate.use_mis) { if (vstate.rscatter < 0.5f) { - vstate.rscatter *= 2.0f; vstate.direct_sample_method = VOLUME_SAMPLE_DISTANCE; + vstate.rscatter *= 2.0f; } else { + /* Rescale for equiangular distance sampling. */ vstate.rscatter = (vstate.rscatter - 0.5f) * 2.0f; vstate.direct_sample_method = VOLUME_SAMPLE_EQUIANGULAR; } @@ -1146,8 +1533,10 @@ ccl_device_inline void volume_integrate_state_init(KernelGlobals kg, vstate.equiangular_pdf = 0.0f; vstate.transmittance = 1.0f; vstate.step = 0; - vstate.stop = false; - + vstate.t = octree.t.min; + vstate.optical_depth = octree.sigma.max * octree.t.length(); + volume_scatter_probability_get(kg, state, render_buffer, vstate); + vstate.direct_rr_scale = 1.0f; vstate.emission = zero_spectrum(); # ifdef __DENOISING_FEATURES__ vstate.albedo = zero_spectrum(); @@ -1162,8 +1551,7 @@ ccl_device_inline void volume_integrate_result_init( ccl_private VolumeIntegrateResult &result) { const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - result.direct_throughput = (vstate.use_mis || - (vstate.direct_sample_method == VOLUME_SAMPLE_NONE)) ? + result.direct_throughput = (vstate.direct_sample_method == VOLUME_SAMPLE_NONE) ? zero_spectrum() : throughput; result.indirect_throughput = throughput; @@ -1263,36 +1651,46 @@ ccl_device_forceinline void volume_integrate_heterogeneous( { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE); - EquiangularCoefficients equiangular_coeffs = {zero_float3(), {ray->tmin, ray->tmax}}; - const VolumeSampleMethod direct_sample_method = volume_direct_sample_method( - kg, state, ray, sd, rng_state, &equiangular_coeffs, ls); - - VolumeIntegrateState vstate ccl_optional_struct_init; - volume_integrate_state_init(kg, direct_sample_method, rng_state, vstate); - - /* Initialize volume integration result. */ - volume_integrate_result_init(state, ray, vstate, equiangular_coeffs, result); - OctreeTracing octree(ray->tmin); const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); if (!volume_octree_setup(kg, ray, sd, state, rng_state, path_flag, octree)) { return; } + EquiangularCoefficients equiangular_coeffs = {zero_float3(), {ray->tmin, ray->tmax}}; + const VolumeSampleMethod direct_sample_method = volume_direct_sample_method( + kg, state, ray, sd, rng_state, &equiangular_coeffs, ls); + + /* Initialize reservoir for sampling scatter position. */ + VolumeSampleReservoir reservoir = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_RESERVOIR); + + /* Initialize volume integration state. */ + VolumeIntegrateState vstate ccl_optional_struct_init; + volume_integrate_state_init( + kg, state, direct_sample_method, render_buffer, octree, rng_state, vstate); + + /* Initialize volume integration result. */ + volume_integrate_result_init(state, ray, vstate, equiangular_coeffs, result); + /* Scramble for stepping through volume. */ path_state_rng_scramble(rng_state, 0xe35fad82); - do { - volume_integrate_step_scattering( - kg, state, ray, octree.sigma, octree.t, sd, rng_state, vstate, result); + volume_equiangular_transmittance( + kg, state, ray, octree.sigma, octree.t, sd, rng_state, vstate, result); - if (volume_integrate_should_stop(result, vstate)) { + while ( + volume_integrate_advance(kg, ray, sd, state, rng_state, path_flag, octree, vstate, result)) + { + const float sigma_max = octree.sigma.max * vstate.majorant_scale; + volume_integrate_step_scattering(kg, state, ray, sigma_max, sd, vstate, result, reservoir); + + if (volume_integrate_should_stop(result)) { break; } + } - } while ( - volume_octree_advance(kg, ray, sd, state, rng_state, path_flag, octree, vstate.step)); - + volume_distance_sampling_finalize(kg, state, ray, sd, vstate, result, reservoir); + volume_equiangular_direct_scatter(kg, state, ray, sd, vstate, result); volume_direct_scatter_mis(ray, vstate, equiangular_coeffs, result); /* Write accumulated emission. */ @@ -1310,6 +1708,10 @@ ccl_device_forceinline void volume_integrate_heterogeneous( kg, state, vstate.albedo, result.indirect_scatter, render_buffer); } # endif /* __DENOISING_FEATURES__ */ + + if (INTEGRATOR_STATE(state, path, bounce) == 0) { + INTEGRATOR_STATE_WRITE(state, path, optical_depth) += vstate.optical_depth; + } } /* Path tracing: sample point on light and evaluate light shader, then @@ -1417,6 +1819,11 @@ ccl_device_forceinline void integrate_volume_direct_light( INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = pass_glossy_weight; } + if (bounce == 0) { + shadow_flag |= PATH_RAY_VOLUME_SCATTER; + shadow_flag &= ~PATH_RAY_VOLUME_PRIMARY_TRANSMIT; + } + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE( state, path, render_pixel_index); INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE( @@ -1752,10 +2159,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, volume_stack_clean(kg, state); } + /* Assign flag to transmitted volume rays for scattering probability guiding. */ + if (INTEGRATOR_STATE(state, path, bounce) == 0) { + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_VOLUME_PRIMARY_TRANSMIT; + } + const VolumeIntegrateEvent event = volume_integrate(kg, state, &ray, render_buffer); if (event == VOLUME_PATH_MISSED) { /* End path. */ - integrator_path_terminate(state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_terminate(kg, state, render_buffer, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); return; } diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h index b80bce66edb..ae6e88dee25 100644 --- a/intern/cycles/kernel/integrator/state_flow.h +++ b/intern/cycles/kernel/integrator/state_flow.h @@ -7,6 +7,8 @@ #include "kernel/globals.h" #include "kernel/types.h" +#include "kernel/film/write.h" + #include "kernel/integrator/state.h" #ifdef __KERNEL_GPU__ @@ -46,6 +48,24 @@ ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegrator return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0; } +ccl_device_inline void write_optical_depth(KernelGlobals kg, + IntegratorState state, + ccl_global float *ccl_restrict render_buffer) +{ + if (!render_buffer) { + return; + } + + if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_VOLUME_PRIMARY_TRANSMIT) { + kernel_assert(kernel_data.film.pass_volume_majorant != PASS_UNUSED); + + const float optical_depth = INTEGRATOR_STATE(state, path, optical_depth); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + film_write_pass_float(buffer + kernel_data.film.pass_volume_majorant, optical_depth); + film_write_pass_float(buffer + kernel_data.film.pass_volume_majorant_sample_count, 1.0f); + } +} + #ifdef __KERNEL_GPU__ ccl_device_forceinline void integrator_path_init(IntegratorState state, @@ -65,9 +85,13 @@ ccl_device_forceinline void integrator_path_next(IntegratorState state, INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; } -ccl_device_forceinline void integrator_path_terminate(IntegratorState state, +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + ccl_global float *ccl_restrict render_buffer, const DeviceKernel current_kernel) { + write_optical_depth(kg, state, render_buffer); + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; @@ -176,9 +200,13 @@ ccl_device_forceinline void integrator_path_next(IntegratorState state, (void)current_kernel; } -ccl_device_forceinline void integrator_path_terminate(IntegratorState state, +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + ccl_global float *ccl_restrict render_buffer, const DeviceKernel current_kernel) { + write_optical_depth(kg, state, render_buffer); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; (void)current_kernel; } diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h index fbbeaaadcc9..41b8c2208a1 100644 --- a/intern/cycles/kernel/integrator/state_template.h +++ b/intern/cycles/kernel/integrator/state_template.h @@ -39,6 +39,8 @@ KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* enum PathRayMNEE */ KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING) +/* Majorant volume optical depth. */ +KERNEL_STRUCT_MEMBER(path, float, optical_depth, KERNEL_FEATURE_PATH_TRACING) /* Multiple importance sampling * The PDF of BSDF sampling at the last scatter point, which is at ray distance * zero and distance. Note that transparency and volume attenuation increase diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 9efaebec28e..ff33ea8fc67 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -291,7 +291,7 @@ enum PathTraceDimension { /* Volume */ PRNG_VOLUME_PHASE = 3, - PRNG_VOLUME_COLOR_CHANNEL = 4, + PRNG_VOLUME_RESERVOIR = 4, PRNG_VOLUME_SCATTER_DISTANCE = 5, PRNG_VOLUME_EXPANSION_ORDER = 6, PRNG_VOLUME_SHADE_OFFSET = 7, @@ -437,6 +437,13 @@ enum PathRayFlag : uint32_t { /* Path is evaluating background for an approximate shadow catcher with non-transparent film. */ PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1U << 31U), + + /* TODO(weizhen): should add another flag to record only the primary scatter, but then we need to + change the flag to 64 bits or split path_flags in two. Right now we also write volume scatter + if the primary hit is surface, but that seems fine. */ + /* Volume scattering probability guiding. This flag is added to path where the primary ray passed + through the volume without scattering. */ + PATH_RAY_VOLUME_PRIMARY_TRANSMIT = (1U << 23U), }; // 8bit enum, just in case we need to move more variables in it @@ -505,6 +512,8 @@ enum PassType { PASS_VOLUME, PASS_VOLUME_DIRECT, PASS_VOLUME_INDIRECT, + PASS_VOLUME_SCATTER, + PASS_VOLUME_TRANSMIT, PASS_CATEGORY_LIGHT_END = 31, /* Data passes */ @@ -554,6 +563,10 @@ enum PassType { PASS_GUIDING_PROBABILITY, /* The avg. roughness at the first bounce. */ PASS_GUIDING_AVG_ROUGHNESS, + /* The majorant optical depth along the ray, for volume scattering probability guiding. + * When reading this pass, it is converted to majorant transmittance */ + PASS_VOLUME_MAJORANT, + PASS_VOLUME_MAJORANT_SAMPLE_COUNT, PASS_CATEGORY_DATA_END = 63, PASS_BAKE_PRIMITIVE, @@ -1868,6 +1881,7 @@ enum DeviceKernel : int { DECLARE_FILM_CONVERT_KERNEL(DEPTH), DECLARE_FILM_CONVERT_KERNEL(MIST), + DECLARE_FILM_CONVERT_KERNEL(VOLUME_MAJORANT), DECLARE_FILM_CONVERT_KERNEL(SAMPLE_COUNT), DECLARE_FILM_CONVERT_KERNEL(FLOAT), DECLARE_FILM_CONVERT_KERNEL(LIGHT_PATH), @@ -1890,6 +1904,9 @@ enum DeviceKernel : int { DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, + DEVICE_KERNEL_VOLUME_GUIDING_FILTER_X, + DEVICE_KERNEL_VOLUME_GUIDING_FILTER_Y, + DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS, DEVICE_KERNEL_PREFIX_SUM, diff --git a/intern/cycles/scene/film.cpp b/intern/cycles/scene/film.cpp index 2dc5d9ee685..258f0883201 100644 --- a/intern/cycles/scene/film.cpp +++ b/intern/cycles/scene/film.cpp @@ -187,6 +187,11 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_transmission_indirect = PASS_UNUSED; kfilm->pass_volume_direct = PASS_UNUSED; kfilm->pass_volume_indirect = PASS_UNUSED; + kfilm->pass_volume_scatter = PASS_UNUSED; + kfilm->pass_volume_transmit = PASS_UNUSED; + kfilm->pass_volume_scatter_denoised = PASS_UNUSED; + kfilm->pass_volume_transmit_denoised = PASS_UNUSED; + kfilm->pass_volume_majorant = PASS_UNUSED; kfilm->pass_lightgroup = PASS_UNUSED; /* Mark passes as unused so that the kernel knows the pass is inaccessible. */ @@ -218,6 +223,12 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) if (pass->get_mode() == PassMode::DENOISED) { /* Generally we only storing offsets of the noisy passes. The display pass is an exception * since it is a read operation and not a write. */ + if (pass->get_type() == PASS_VOLUME_TRANSMIT) { + kfilm->pass_volume_transmit_denoised = kfilm->pass_stride; + } + else if (pass->get_type() == PASS_VOLUME_SCATTER) { + kfilm->pass_volume_scatter_denoised = kfilm->pass_stride; + } kfilm->pass_stride += pass->get_info().num_components; continue; } @@ -328,6 +339,18 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) case PASS_VOLUME_DIRECT: kfilm->pass_volume_direct = kfilm->pass_stride; break; + case PASS_VOLUME_SCATTER: + kfilm->pass_volume_scatter = kfilm->pass_stride; + break; + case PASS_VOLUME_TRANSMIT: + kfilm->pass_volume_transmit = kfilm->pass_stride; + break; + case PASS_VOLUME_MAJORANT: + kfilm->pass_volume_majorant = kfilm->pass_stride; + break; + case PASS_VOLUME_MAJORANT_SAMPLE_COUNT: + kfilm->pass_volume_majorant_sample_count = kfilm->pass_stride; + break; case PASS_BAKE_PRIMITIVE: kfilm->pass_bake_primitive = kfilm->pass_stride; @@ -478,8 +501,8 @@ void Film::update_passes(Scene *scene) const ObjectManager *object_manager = scene->object_manager.get(); Integrator *integrator = scene->integrator; - if (!is_modified() && !object_manager->need_update() && !integrator->is_modified() && - !background->is_modified()) + if (!object_manager->need_update() && !integrator->is_modified() && !background->is_modified() && + !scene->has_volume_modified()) { return; } @@ -571,6 +594,20 @@ void Film::update_passes(Scene *scene) } } + if (scene->has_volume()) { + add_auto_pass(scene, PASS_VOLUME_SCATTER); + add_auto_pass(scene, PASS_VOLUME_SCATTER, PassMode::DENOISED, "Volume Scatter"); + add_auto_pass(scene, PASS_VOLUME_TRANSMIT); + add_auto_pass(scene, PASS_VOLUME_TRANSMIT, PassMode::DENOISED, "Volume Transmit"); + if (!Pass::contains(scene->passes, PASS_SAMPLE_COUNT)) { + add_auto_pass(scene, PASS_SAMPLE_COUNT); + } + if (!Pass::contains(scene->passes, PASS_VOLUME_MAJORANT)) { + add_auto_pass(scene, PASS_VOLUME_MAJORANT, "Volume Majorant"); + } + add_auto_pass(scene, PASS_VOLUME_MAJORANT_SAMPLE_COUNT); + } + /* Remove duplicates and initialize internal pass info. */ finalize_passes(scene, use_denoise); @@ -669,8 +706,9 @@ void Film::finalize_passes(Scene *scene, const bool use_denoise) /* Disable denoising on passes if denoising is disabled, or if the * pass does not support it. */ - pass->set_mode((use_denoise && pass->get_info().support_denoise) ? pass->get_mode() : - PassMode::NOISY); + const bool need_denoise = pass->get_info().support_denoise && + (use_denoise || is_volume_guiding_pass(pass->get_type())); + pass->set_mode(need_denoise ? pass->get_mode() : PassMode::NOISY); /* Merge duplicate passes. */ bool duplicate_found = false; @@ -722,13 +760,16 @@ uint Film::get_kernel_features(const Scene *scene) const const PassType pass_type = pass->get_type(); const PassMode pass_mode = pass->get_mode(); - if (pass_mode == PassMode::DENOISED || pass_type == PASS_DENOISING_NORMAL || + const bool has_denoise_pass = (pass_mode == PassMode::DENOISED) && + !is_volume_guiding_pass(pass_type); + + if (has_denoise_pass || pass_type == PASS_DENOISING_NORMAL || pass_type == PASS_DENOISING_ALBEDO || pass_type == PASS_DENOISING_DEPTH) { kernel_features |= KERNEL_FEATURE_DENOISING; } - if (pass_type >= PASS_DIFFUSE && pass_type <= PASS_VOLUME_INDIRECT) { + if (pass_type >= PASS_DIFFUSE && pass_type <= PASS_VOLUME_TRANSMIT) { kernel_features |= KERNEL_FEATURE_LIGHT_PASSES; } diff --git a/intern/cycles/scene/pass.cpp b/intern/cycles/scene/pass.cpp index ceee9562f52..c9fd173daa8 100644 --- a/intern/cycles/scene/pass.cpp +++ b/intern/cycles/scene/pass.cpp @@ -64,6 +64,8 @@ const NodeEnum *Pass::get_type_enum() pass_type_enum.insert("volume", PASS_VOLUME); pass_type_enum.insert("volume_direct", PASS_VOLUME_DIRECT); pass_type_enum.insert("volume_indirect", PASS_VOLUME_INDIRECT); + pass_type_enum.insert("volume_scatter", PASS_VOLUME_SCATTER); + pass_type_enum.insert("volume_transmit", PASS_VOLUME_TRANSMIT); /* Data passes. */ pass_type_enum.insert("depth", PASS_DEPTH); @@ -88,6 +90,8 @@ const NodeEnum *Pass::get_type_enum() pass_type_enum.insert("denoising_albedo", PASS_DENOISING_ALBEDO); pass_type_enum.insert("denoising_depth", PASS_DENOISING_DEPTH); pass_type_enum.insert("denoising_previous", PASS_DENOISING_PREVIOUS); + pass_type_enum.insert("volume_majorant", PASS_VOLUME_MAJORANT); + pass_type_enum.insert("volume_majorant_sample_count", PASS_VOLUME_MAJORANT_SAMPLE_COUNT); pass_type_enum.insert("shadow_catcher", PASS_SHADOW_CATCHER); pass_type_enum.insert("shadow_catcher_sample_count", PASS_SHADOW_CATCHER_SAMPLE_COUNT); @@ -274,6 +278,25 @@ PassInfo Pass::get_info(const PassType type, const bool include_albedo, const bo pass_info.num_components = 3; pass_info.use_exposure = true; break; + case PASS_VOLUME_SCATTER: + case PASS_VOLUME_TRANSMIT: + /* TODO(weizhen): Gaussian filter only needs 1 component, but we can have negative pixel + * values in some channels, preventing us from simply add them together; besides, using RGB + * channels is better for visualization. We can optimize the memory by using RGBE format. */ + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.use_filter = false; + pass_info.support_denoise = true; + break; + case PASS_VOLUME_MAJORANT: + pass_info.num_components = 1; + pass_info.use_filter = false; + pass_info.divide_type = PASS_VOLUME_MAJORANT_SAMPLE_COUNT; + break; + case PASS_VOLUME_MAJORANT_SAMPLE_COUNT: + pass_info.num_components = 1; + pass_info.use_filter = false; + break; case PASS_CRYPTOMATTE: pass_info.num_components = 4; @@ -438,4 +461,9 @@ std::ostream &operator<<(std::ostream &os, const Pass &pass) return os; } +bool is_volume_guiding_pass(const PassType pass_type) +{ + return (pass_type == PASS_VOLUME_SCATTER) || (pass_type == PASS_VOLUME_TRANSMIT); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/scene/pass.h b/intern/cycles/scene/pass.h index 2f6e9fcccbf..09f181cda6e 100644 --- a/intern/cycles/scene/pass.h +++ b/intern/cycles/scene/pass.h @@ -95,4 +95,6 @@ class Pass : public Node { std::ostream &operator<<(std::ostream &os, const Pass &pass); +bool is_volume_guiding_pass(const PassType pass_type); + CCL_NAMESPACE_END diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index 0fb0bdcda9c..25a6ed38cda 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -789,6 +789,22 @@ void Scene::tag_shadow_catcher_modified() shadow_catcher_modified_ = true; } +bool Scene::has_volume() +{ + has_volume_modified_ = false; + return dscene.data.integrator.use_volumes; +} + +bool Scene::has_volume_modified() const +{ + return has_volume_modified_; +} + +void Scene::tag_has_volume_modified() +{ + has_volume_modified_ = true; +} + template<> Light *Scene::create_node() { unique_ptr node = make_unique(); diff --git a/intern/cycles/scene/scene.h b/intern/cycles/scene/scene.h index 90cd5349e3d..61af24125f1 100644 --- a/intern/cycles/scene/scene.h +++ b/intern/cycles/scene/scene.h @@ -201,6 +201,9 @@ class Scene : public NodeOwner { bool has_shadow_catcher(); void tag_shadow_catcher_modified(); + bool has_volume(); + bool has_volume_modified() const; + void tag_has_volume_modified(); /* This function is used to create a node of a specified type instead of * calling 'new', and sets the scene as the owner of the node. @@ -245,6 +248,7 @@ class Scene : public NodeOwner { bool has_shadow_catcher_ = false; bool shadow_catcher_modified_ = true; + bool has_volume_modified_ = true; /* Maximum number of closure during session lifetime. */ int max_closure_global; diff --git a/intern/cycles/scene/shader.cpp b/intern/cycles/scene/shader.cpp index 9678623e78c..372d3ddeb0a 100644 --- a/intern/cycles/scene/shader.cpp +++ b/intern/cycles/scene/shader.cpp @@ -546,7 +546,10 @@ void ShaderManager::device_update_pre(Device * /*device*/, /* Set this early as it is needed by volume rendering passes. */ KernelIntegrator *kintegrator = &dscene->data.integrator; - kintegrator->use_volumes = has_volumes; + if (kintegrator->use_volumes != has_volumes) { + scene->tag_has_volume_modified(); + kintegrator->use_volumes = has_volumes; + } } void ShaderManager::device_update_post(Device *device, diff --git a/intern/cycles/util/tbb.h b/intern/cycles/util/tbb.h index cfee7f08995..b2165b733e9 100644 --- a/intern/cycles/util/tbb.h +++ b/intern/cycles/util/tbb.h @@ -10,6 +10,7 @@ # include "util/windows.h" #endif +#include #include #include #include @@ -27,6 +28,7 @@ CCL_NAMESPACE_BEGIN using tbb::blocked_range; +using tbb::blocked_range2d; using tbb::blocked_range3d; using tbb::enumerable_thread_specific; using tbb::parallel_for;