Cycles: Use RGBE for denoised guiding buffers to reduce memory usage
Co-authored-by: Brecht Van Lommel <brecht@blender.org>
This commit is contained in:
committed by
Weizhen Huang
parent
5cb6014efd
commit
a4f8e0bfa2
@@ -41,6 +41,7 @@ CPUKernels::CPUKernels()
|
||||
REGISTER_KERNEL_FILM_CONVERT(sample_count),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float),
|
||||
REGISTER_KERNEL_FILM_CONVERT(light_path),
|
||||
REGISTER_KERNEL_FILM_CONVERT(rgbe),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float3),
|
||||
REGISTER_KERNEL_FILM_CONVERT(motion),
|
||||
REGISTER_KERNEL_FILM_CONVERT(cryptomatte),
|
||||
|
||||
@@ -119,6 +119,7 @@ class CPUKernels {
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(rgbe)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion)
|
||||
|
||||
@@ -126,6 +126,7 @@ const char *device_kernel_as_string(DeviceKernel kernel)
|
||||
FILM_CONVERT_KERNEL_AS_STRING(SAMPLE_COUNT, sample_count)
|
||||
FILM_CONVERT_KERNEL_AS_STRING(FLOAT, float)
|
||||
FILM_CONVERT_KERNEL_AS_STRING(LIGHT_PATH, light_path)
|
||||
FILM_CONVERT_KERNEL_AS_STRING(RGBE, rgbe)
|
||||
FILM_CONVERT_KERNEL_AS_STRING(FLOAT3, float3)
|
||||
FILM_CONVERT_KERNEL_AS_STRING(MOTION, motion)
|
||||
FILM_CONVERT_KERNEL_AS_STRING(CRYPTOMATTE, cryptomatte)
|
||||
|
||||
@@ -316,7 +316,7 @@ void DenoiserGPU::denoise_color_read(const DenoiseContext &context, const Denois
|
||||
const PassAccessorGPU pass_accessor(
|
||||
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
|
||||
|
||||
PassAccessor::Destination destination(pass_access_info.type);
|
||||
PassAccessor::Destination destination(pass_access_info.type, pass_access_info.mode);
|
||||
destination.d_pixels = context.render_buffers->buffer.device_pointer;
|
||||
destination.num_components = 3;
|
||||
destination.pixel_offset = pass.denoised_offset;
|
||||
|
||||
@@ -53,7 +53,7 @@ class OIDNPass {
|
||||
offset = buffer_params.get_pass_offset(type, mode);
|
||||
need_scale = (type == PASS_DENOISING_ALBEDO || type == PASS_DENOISING_NORMAL);
|
||||
|
||||
const PassInfo pass_info = Pass::get_info(type);
|
||||
const PassInfo pass_info = Pass::get_info(type, mode);
|
||||
num_components = pass_info.num_components;
|
||||
use_compositing = pass_info.use_compositing;
|
||||
use_denoising_albedo = pass_info.use_denoising_albedo;
|
||||
|
||||
@@ -33,9 +33,9 @@ PassAccessor::Destination::Destination(float *pixels, const int num_components)
|
||||
{
|
||||
}
|
||||
|
||||
PassAccessor::Destination::Destination(const PassType pass_type)
|
||||
PassAccessor::Destination::Destination(const PassType pass_type, const PassMode pass_mode)
|
||||
{
|
||||
const PassInfo pass_info = Pass::get_info(pass_type);
|
||||
const PassInfo pass_info = Pass::get_info(pass_type, pass_mode);
|
||||
num_components = pass_info.num_components;
|
||||
}
|
||||
|
||||
@@ -126,12 +126,16 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
|
||||
const PassType type = pass_access_info_.type;
|
||||
const PassMode mode = pass_access_info_.mode;
|
||||
const PassInfo pass_info = Pass::get_info(
|
||||
type, pass_access_info_.include_albedo, pass_access_info_.is_lightgroup);
|
||||
type, mode, pass_access_info_.include_albedo, pass_access_info_.is_lightgroup);
|
||||
int num_written_components = pass_info.num_components;
|
||||
|
||||
if (pass_info.num_components == 1) {
|
||||
if (is_volume_guiding_pass(type)) {
|
||||
get_pass_rgbe(render_buffers, buffer_params, destination);
|
||||
num_written_components = 3;
|
||||
}
|
||||
/* Single channel passes. */
|
||||
if (mode == PassMode::DENOISED) {
|
||||
else if (mode == PassMode::DENOISED) {
|
||||
/* Denoised passes store their final pixels, no need in special calculation. */
|
||||
get_pass_float(render_buffers, buffer_params, destination);
|
||||
}
|
||||
@@ -226,8 +230,10 @@ void PassAccessor::init_kernel_film_convert(KernelFilmConvert *kfilm_convert,
|
||||
const Destination &destination) const
|
||||
{
|
||||
const PassMode mode = pass_access_info_.mode;
|
||||
const PassInfo &pass_info = Pass::get_info(
|
||||
pass_access_info_.type, pass_access_info_.include_albedo, pass_access_info_.is_lightgroup);
|
||||
const PassInfo &pass_info = Pass::get_info(pass_access_info_.type,
|
||||
mode,
|
||||
pass_access_info_.include_albedo,
|
||||
pass_access_info_.is_lightgroup);
|
||||
|
||||
kfilm_convert->pass_offset = pass_access_info_.offset;
|
||||
kfilm_convert->pass_stride = buffer_params.pass_stride;
|
||||
@@ -290,8 +296,10 @@ bool PassAccessor::set_render_tile_pixels(RenderBuffers *render_buffers, const S
|
||||
return false;
|
||||
}
|
||||
|
||||
const PassInfo pass_info = Pass::get_info(
|
||||
pass_access_info_.type, pass_access_info_.include_albedo, pass_access_info_.is_lightgroup);
|
||||
const PassInfo pass_info = Pass::get_info(pass_access_info_.type,
|
||||
pass_access_info_.mode,
|
||||
pass_access_info_.include_albedo,
|
||||
pass_access_info_.is_lightgroup);
|
||||
|
||||
const BufferParams &buffer_params = render_buffers->params;
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ class PassAccessor {
|
||||
|
||||
/* Destination will be initialized with the number of components which is native for the given
|
||||
* pass type. */
|
||||
explicit Destination(const PassType pass_type);
|
||||
explicit Destination(const PassType pass_type, const PassMode pass_mode);
|
||||
|
||||
/* CPU-side pointers. only usable by the `PassAccessorCPU`. */
|
||||
float *pixels = nullptr;
|
||||
@@ -138,6 +138,7 @@ class PassAccessor {
|
||||
/* Float3 passes. */
|
||||
DECLARE_PASS_ACCESSOR(light_path)
|
||||
DECLARE_PASS_ACCESSOR(shadow_catcher)
|
||||
DECLARE_PASS_ACCESSOR(rgbe)
|
||||
DECLARE_PASS_ACCESSOR(float3)
|
||||
|
||||
/* Float4 passes. */
|
||||
|
||||
@@ -112,6 +112,7 @@ DEFINE_PASS_ACCESSOR(float)
|
||||
/* Float3 passes. */
|
||||
DEFINE_PASS_ACCESSOR(light_path)
|
||||
DEFINE_PASS_ACCESSOR(shadow_catcher)
|
||||
DEFINE_PASS_ACCESSOR(rgbe)
|
||||
DEFINE_PASS_ACCESSOR(float3)
|
||||
|
||||
/* Float4 passes. */
|
||||
|
||||
@@ -47,6 +47,7 @@ class PassAccessorCPU : public PassAccessor {
|
||||
/* Float3 passes. */
|
||||
DECLARE_PASS_ACCESSOR(light_path)
|
||||
DECLARE_PASS_ACCESSOR(shadow_catcher)
|
||||
DECLARE_PASS_ACCESSOR(rgbe)
|
||||
DECLARE_PASS_ACCESSOR(float3)
|
||||
|
||||
/* Float4 passes. */
|
||||
|
||||
@@ -96,6 +96,7 @@ DEFINE_PASS_ACCESSOR(float, FLOAT);
|
||||
|
||||
/* Float3 passes. */
|
||||
DEFINE_PASS_ACCESSOR(light_path, LIGHT_PATH);
|
||||
DEFINE_PASS_ACCESSOR(rgbe, RGBE);
|
||||
DEFINE_PASS_ACCESSOR(float3, FLOAT3);
|
||||
|
||||
/* Float4 passes. */
|
||||
|
||||
@@ -40,6 +40,7 @@ class PassAccessorGPU : public PassAccessor {
|
||||
|
||||
/* Float3 passes. */
|
||||
DECLARE_PASS_ACCESSOR(light_path);
|
||||
DECLARE_PASS_ACCESSOR(rgbe);
|
||||
DECLARE_PASS_ACCESSOR(float3);
|
||||
|
||||
/* Float4 passes. */
|
||||
|
||||
@@ -186,9 +186,9 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod
|
||||
}
|
||||
|
||||
PassAccessor::Destination PathTraceWork::get_display_destination_template(
|
||||
const PathTraceDisplay *display) const
|
||||
const PathTraceDisplay *display, const PassMode mode) const
|
||||
{
|
||||
PassAccessor::Destination destination(film_->get_display_pass());
|
||||
PassAccessor::Destination destination(film_->get_display_pass(), mode);
|
||||
|
||||
const int2 display_texture_size = display->get_texture_size();
|
||||
const int texture_x = effective_buffer_params_.full_x - effective_big_tile_params_.full_x +
|
||||
|
||||
@@ -165,8 +165,8 @@ class PathTraceWork {
|
||||
|
||||
/* Get destination which offset and stride are configured so that writing to it will write to a
|
||||
* proper location of GPU display texture, taking current tile and device slice into account. */
|
||||
PassAccessor::Destination get_display_destination_template(
|
||||
const PathTraceDisplay *display) const;
|
||||
PassAccessor::Destination get_display_destination_template(const PathTraceDisplay *display,
|
||||
const PassMode mode) const;
|
||||
|
||||
/* Device which will be used for path tracing.
|
||||
* Note that it is an actual render device (and never is a multi-device). */
|
||||
|
||||
@@ -197,7 +197,7 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
|
||||
|
||||
const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples);
|
||||
|
||||
PassAccessor::Destination destination = get_display_destination_template(display);
|
||||
PassAccessor::Destination destination = get_display_destination_template(display, pass_mode);
|
||||
destination.pixels_half_rgba = rgba_half;
|
||||
|
||||
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
||||
|
||||
@@ -1018,7 +1018,7 @@ void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
|
||||
queue_->zero_to_device(display_rgba_half_);
|
||||
}
|
||||
|
||||
PassAccessor::Destination destination(film_->get_display_pass());
|
||||
PassAccessor::Destination destination(film_->get_display_pass(), pass_mode);
|
||||
destination.d_pixels_half_rgba = display_rgba_half_.device_pointer;
|
||||
|
||||
get_render_tile_film_pixels(destination, pass_mode, num_samples);
|
||||
@@ -1045,7 +1045,7 @@ bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
|
||||
return false;
|
||||
}
|
||||
|
||||
PassAccessor::Destination destination = get_display_destination_template(display);
|
||||
PassAccessor::Destination destination = get_display_destination_template(display, pass_mode);
|
||||
destination.d_pixels_half_rgba = d_rgba_half;
|
||||
|
||||
get_render_tile_film_pixels(destination, pass_mode, num_samples);
|
||||
|
||||
@@ -420,6 +420,7 @@ set(SRC_UTIL_HEADERS
|
||||
../util/types_int4.h
|
||||
../util/types_int8.h
|
||||
../util/types_spectrum.h
|
||||
../util/types_rgbe.h
|
||||
../util/types_uchar2.h
|
||||
../util/types_uchar3.h
|
||||
../util/types_uchar4.h
|
||||
|
||||
@@ -54,6 +54,7 @@ KERNEL_FILM_CONVERT_FUNCTION(volume_majorant)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(rgbe)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion)
|
||||
|
||||
@@ -374,6 +374,7 @@ KERNEL_FILM_CONVERT_FUNCTION(volume_majorant, true)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float, true)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(rgbe, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3, false)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion, false)
|
||||
|
||||
@@ -892,6 +892,7 @@ KERNEL_FILM_CONVERT_VARIANT(float, 1)
|
||||
|
||||
/* 3 channel inputs */
|
||||
KERNEL_FILM_CONVERT_VARIANT(light_path, 3)
|
||||
KERNEL_FILM_CONVERT_VARIANT(rgbe, 3)
|
||||
KERNEL_FILM_CONVERT_VARIANT(float3, 3)
|
||||
|
||||
/* 4 channel inputs */
|
||||
|
||||
@@ -277,6 +277,7 @@ ccl_device_forceinline uchar4 make_uchar4(const uchar x,
|
||||
#define coshf(x) cosh(float(x))
|
||||
#define tanhf(x) tanh(float(x))
|
||||
#define saturatef(x) saturate(float(x))
|
||||
#define ldexpf(x, y) ldexp(float(x), int(y))
|
||||
|
||||
/* Use native functions with possibly lower precision for performance,
|
||||
* no issues found so far. */
|
||||
|
||||
@@ -224,6 +224,7 @@ ccl_device_forceinline int __float_as_int(const float x)
|
||||
#define fminf(x, y) sycl::fmin((x), (y))
|
||||
#define fmodf(x, y) sycl::fmod((x), (y))
|
||||
#define lgammaf(x) sycl::lgamma((x))
|
||||
#define ldexpf(x, y) sycl::ldexp((x), (y))
|
||||
|
||||
#define cosf(x) sycl::native::cos(((float)(x)))
|
||||
#define sinf(x) sycl::native::sin(((float)(x)))
|
||||
|
||||
@@ -635,6 +635,7 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
DEVICE_KERNEL_FILM_CONVERT(sample_count, SAMPLE_COUNT);
|
||||
DEVICE_KERNEL_FILM_CONVERT(float, FLOAT);
|
||||
DEVICE_KERNEL_FILM_CONVERT(light_path, LIGHT_PATH);
|
||||
DEVICE_KERNEL_FILM_CONVERT(rgbe, RGBE);
|
||||
DEVICE_KERNEL_FILM_CONVERT(float3, FLOAT3);
|
||||
DEVICE_KERNEL_FILM_CONVERT(motion, MOTION);
|
||||
DEVICE_KERNEL_FILM_CONVERT(cryptomatte, CRYPTOMATTE);
|
||||
|
||||
@@ -171,6 +171,22 @@ ccl_device_inline void film_get_pass_pixel_volume_majorant(
|
||||
pixel[0] = (*count != 0.0f) ? expf(-(f * scale_exposure) / *count) : 0.0f;
|
||||
}
|
||||
|
||||
ccl_device_inline void film_get_pass_pixel_rgbe(const ccl_global KernelFilmConvert *ccl_restrict
|
||||
kfilm_convert,
|
||||
const ccl_global float *ccl_restrict buffer,
|
||||
ccl_private float *ccl_restrict pixel)
|
||||
{
|
||||
kernel_assert(kfilm_convert->num_components >= 1);
|
||||
kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
|
||||
|
||||
const ccl_global float *in = buffer + kfilm_convert->pass_offset;
|
||||
const float3 f = rgbe_to_rgb(RGBE(*in));
|
||||
|
||||
pixel[0] = f.x;
|
||||
pixel[1] = f.y;
|
||||
pixel[2] = f.z;
|
||||
}
|
||||
|
||||
ccl_device_inline void film_get_pass_pixel_float(const ccl_global KernelFilmConvert *ccl_restrict
|
||||
kfilm_convert,
|
||||
const ccl_global float *ccl_restrict buffer,
|
||||
|
||||
@@ -60,15 +60,13 @@ ccl_device void volume_guiding_filter_x(KernelGlobals kg,
|
||||
const float weight = gaussian_params[dx] /
|
||||
__float_as_uint(buffer[kernel_data.film.pass_sample_count]);
|
||||
|
||||
scatter += fabs(kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_scatter)) *
|
||||
weight;
|
||||
transmit += fabs(kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_transmit)) *
|
||||
weight;
|
||||
scatter += kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_scatter) * weight;
|
||||
transmit += kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_transmit) * weight;
|
||||
}
|
||||
|
||||
/* Write to the buffer. */
|
||||
film_overwrite_pass_float3(buffer + kernel_data.film.pass_volume_scatter_denoised, scatter);
|
||||
film_overwrite_pass_float3(buffer + kernel_data.film.pass_volume_transmit_denoised, transmit);
|
||||
film_overwrite_pass_rgbe(buffer + kernel_data.film.pass_volume_scatter_denoised, scatter);
|
||||
film_overwrite_pass_rgbe(buffer + kernel_data.film.pass_volume_transmit_denoised, transmit);
|
||||
}
|
||||
|
||||
ccl_device void volume_guiding_filter_y(KernelGlobals kg,
|
||||
@@ -109,9 +107,9 @@ ccl_device void volume_guiding_filter_y(KernelGlobals kg,
|
||||
else {
|
||||
ccl_global float *buffer = film_pass_pixel_render_buffer(
|
||||
kg, x, y, offset, stride, render_buffer);
|
||||
scatter_neighbors[i] = kernel_read_pass_float3(
|
||||
buffer + kernel_data.film.pass_volume_scatter_denoised);
|
||||
transmit_neighbors[i] = kernel_read_pass_float3(
|
||||
scatter_neighbors[i] = kernel_read_pass_rgbe(buffer +
|
||||
kernel_data.film.pass_volume_scatter_denoised);
|
||||
transmit_neighbors[i] = kernel_read_pass_rgbe(
|
||||
buffer + kernel_data.film.pass_volume_transmit_denoised);
|
||||
}
|
||||
}
|
||||
@@ -128,9 +126,9 @@ ccl_device void volume_guiding_filter_y(KernelGlobals kg,
|
||||
else {
|
||||
ccl_global float *buffer = film_pass_pixel_render_buffer(
|
||||
kg, x, next_y, offset, stride, render_buffer);
|
||||
scatter_neighbors[index] = kernel_read_pass_float3(
|
||||
scatter_neighbors[index] = kernel_read_pass_rgbe(
|
||||
buffer + kernel_data.film.pass_volume_scatter_denoised);
|
||||
transmit_neighbors[index] = kernel_read_pass_float3(
|
||||
transmit_neighbors[index] = kernel_read_pass_rgbe(
|
||||
buffer + kernel_data.film.pass_volume_transmit_denoised);
|
||||
}
|
||||
|
||||
@@ -147,8 +145,10 @@ ccl_device void volume_guiding_filter_y(KernelGlobals kg,
|
||||
/* Write to the buffers. */
|
||||
ccl_global float *buffer = film_pass_pixel_render_buffer(
|
||||
kg, x, y, offset, stride, render_buffer);
|
||||
film_overwrite_pass_float3(buffer + kernel_data.film.pass_volume_scatter_denoised, scatter);
|
||||
film_overwrite_pass_float3(buffer + kernel_data.film.pass_volume_transmit_denoised, transmit);
|
||||
film_overwrite_pass_rgbe(buffer + kernel_data.film.pass_volume_scatter_denoised,
|
||||
fabs(scatter));
|
||||
film_overwrite_pass_rgbe(buffer + kernel_data.film.pass_volume_transmit_denoised,
|
||||
fabs(transmit));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
|
||||
#include "kernel/util/colorspace.h"
|
||||
|
||||
#include "util/types_rgbe.h"
|
||||
|
||||
#ifdef __KERNEL_GPU__
|
||||
# include "util/atomic.h"
|
||||
# define __ATOMIC_PASS_WRITE__
|
||||
@@ -108,6 +110,12 @@ ccl_device_inline void film_write_pass_float4(ccl_global float *ccl_restrict buf
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline void film_overwrite_pass_rgbe(ccl_global float *ccl_restrict buffer,
|
||||
const float3 value)
|
||||
{
|
||||
*buffer = rgb_to_rgbe(value).f;
|
||||
}
|
||||
|
||||
/* Overwrite for passes that only write on sample 0. This assumes only a single thread will write
|
||||
* to this pixel and no atomics are needed. */
|
||||
|
||||
@@ -142,4 +150,9 @@ ccl_device_inline float4 kernel_read_pass_float4(ccl_global float *ccl_restrict
|
||||
return make_float4(buffer[0], buffer[1], buffer[2], buffer[3]);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 kernel_read_pass_rgbe(const ccl_global float *ccl_restrict buffer)
|
||||
{
|
||||
return rgbe_to_rgb(RGBE(*buffer));
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -1012,9 +1012,9 @@ ccl_device_inline void volume_scatter_probability_get(KernelGlobals kg,
|
||||
|
||||
/* Contribution based criterion, see Eq. (15). */
|
||||
const float L_scattered = reduce_add(
|
||||
kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_scatter_denoised));
|
||||
kernel_read_pass_rgbe(buffer + kernel_data.film.pass_volume_scatter_denoised));
|
||||
const float L_transmitted = reduce_add(
|
||||
kernel_read_pass_float3(buffer + kernel_data.film.pass_volume_transmit_denoised));
|
||||
kernel_read_pass_rgbe(buffer + kernel_data.film.pass_volume_transmit_denoised));
|
||||
const float L_volume = L_transmitted + L_scattered;
|
||||
|
||||
/* Compute guided scattering probability. */
|
||||
|
||||
@@ -1885,6 +1885,7 @@ enum DeviceKernel : int {
|
||||
DECLARE_FILM_CONVERT_KERNEL(SAMPLE_COUNT),
|
||||
DECLARE_FILM_CONVERT_KERNEL(FLOAT),
|
||||
DECLARE_FILM_CONVERT_KERNEL(LIGHT_PATH),
|
||||
DECLARE_FILM_CONVERT_KERNEL(RGBE),
|
||||
DECLARE_FILM_CONVERT_KERNEL(FLOAT3),
|
||||
DECLARE_FILM_CONVERT_KERNEL(MOTION),
|
||||
DECLARE_FILM_CONVERT_KERNEL(CRYPTOMATTE),
|
||||
|
||||
@@ -143,7 +143,7 @@ Pass::Pass() : Node(get_node_type()), is_auto_(false) {}
|
||||
|
||||
PassInfo Pass::get_info() const
|
||||
{
|
||||
return get_info(type, include_albedo, !lightgroup.empty());
|
||||
return get_info(type, mode, include_albedo, !lightgroup.empty());
|
||||
}
|
||||
|
||||
bool Pass::is_written() const
|
||||
@@ -151,7 +151,10 @@ bool Pass::is_written() const
|
||||
return get_info().is_written;
|
||||
}
|
||||
|
||||
PassInfo Pass::get_info(const PassType type, const bool include_albedo, const bool is_lightgroup)
|
||||
PassInfo Pass::get_info(const PassType type,
|
||||
const PassMode mode,
|
||||
const bool include_albedo,
|
||||
const bool is_lightgroup)
|
||||
{
|
||||
PassInfo pass_info;
|
||||
|
||||
@@ -280,10 +283,9 @@ PassInfo Pass::get_info(const PassType type, const bool include_albedo, const bo
|
||||
break;
|
||||
case PASS_VOLUME_SCATTER:
|
||||
case PASS_VOLUME_TRANSMIT:
|
||||
/* TODO(weizhen): Gaussian filter only needs 1 component, but we can have negative pixel
|
||||
* values in some channels, preventing us from simply add them together; besides, using RGB
|
||||
* channels is better for visualization. We can optimize the memory by using RGBE format. */
|
||||
pass_info.num_components = 3;
|
||||
/* Noisy buffer needs higher precision for accumulating the contribution, denoised buffer is
|
||||
* used directly and thus can have lower resolution. */
|
||||
pass_info.num_components = (mode == PassMode::NOISY) ? 3 : 1;
|
||||
pass_info.use_exposure = true;
|
||||
pass_info.use_filter = false;
|
||||
pass_info.support_denoise = true;
|
||||
|
||||
@@ -75,6 +75,7 @@ class Pass : public Node {
|
||||
static const NodeEnum *get_mode_enum();
|
||||
|
||||
static PassInfo get_info(PassType type,
|
||||
const PassMode mode = PassMode::DENOISED,
|
||||
const bool include_albedo = false,
|
||||
const bool is_lightgroup = false);
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ BufferPass::BufferPass(const Pass *scene_pass)
|
||||
|
||||
PassInfo BufferPass::get_info() const
|
||||
{
|
||||
return Pass::get_info(type, include_albedo, !lightgroup.empty());
|
||||
return Pass::get_info(type, mode, include_albedo, !lightgroup.empty());
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
|
||||
@@ -39,6 +39,7 @@ set(SRC
|
||||
util_math_fast_test.cpp
|
||||
util_math_float3_test.cpp
|
||||
util_math_float4_test.cpp
|
||||
util_rgbe_test.cpp
|
||||
util_md5_test.cpp
|
||||
util_path_test.cpp
|
||||
util_string_test.cpp
|
||||
|
||||
65
intern/cycles/test/util_rgbe_test.cpp
Normal file
65
intern/cycles/test/util_rgbe_test.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
/* SPDX-FileCopyrightText: 2025 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "util/log.h"
|
||||
#include "util/types_rgbe.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
TEST(RGBE, round_trip)
|
||||
{
|
||||
{
|
||||
const float3 f = make_float3(7.334898f, 5.811583f, 2.414717f);
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), make_float3(7.34375f, 5.8125f, 2.40625f));
|
||||
}
|
||||
|
||||
{
|
||||
const float3 f = make_float3(0.08750992f, 0.05150064f, 0.24991725f);
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), make_float3(0.087890625f, 0.05078125f, 0.25f));
|
||||
}
|
||||
|
||||
{
|
||||
const float3 f = make_float3(4e-6f, 30257.0f, 1.0f);
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), make_float3(0.0f, 30208.0f, 0.0f));
|
||||
}
|
||||
|
||||
{
|
||||
const float3 f = zero_float3();
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), zero_float3());
|
||||
}
|
||||
|
||||
{
|
||||
const float3 f = make_float3(5.9e-8f, 0.0f, 0.0f);
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), zero_float3());
|
||||
}
|
||||
|
||||
{
|
||||
const float3 f = make_float3(6.0e-8f, 0.0f, 0.0f);
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), make_float3(1.1920928955078125e-7f, 0.0f, 0.0f));
|
||||
}
|
||||
|
||||
{
|
||||
const float3 f = make_float3(-0.863880f, 0.558654f, -0.223357f);
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), make_float3(-0.86328125f, 0.55859375f, -0.22265625f));
|
||||
}
|
||||
|
||||
{
|
||||
const float3 f = make_float3(-FLT_MAX, FLT_MAX, 0.0f);
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), make_float3(-65280.0f, 65280.0f, 0.0f));
|
||||
}
|
||||
|
||||
{
|
||||
const float inf = __uint_as_float(0x7f800000);
|
||||
const float3 f = make_float3(inf, 127.0f, 129.0f);
|
||||
EXPECT_EQ(rgbe_to_rgb(rgb_to_rgbe(f)), make_float3(65280.0f, 0.0f, 256.0f));
|
||||
}
|
||||
|
||||
{
|
||||
/* No test for NaN, undefined behaviour. */
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
@@ -115,6 +115,7 @@ set(SRC_HEADERS
|
||||
types_int4.h
|
||||
types_int8.h
|
||||
types_spectrum.h
|
||||
types_rgbe.h
|
||||
types_uchar2.h
|
||||
types_uchar3.h
|
||||
types_uchar4.h
|
||||
|
||||
@@ -668,6 +668,11 @@ ccl_device_inline float xor_mask(const float x, const uint y)
|
||||
return __uint_as_float(__float_as_uint(x) ^ y);
|
||||
}
|
||||
|
||||
ccl_device_inline float or_mask(const float x, const uint y)
|
||||
{
|
||||
return __uint_as_float(__float_as_uint(x) | y);
|
||||
}
|
||||
|
||||
ccl_device float bits_to_01(const uint bits)
|
||||
{
|
||||
return bits * (1.0f / (float)0xFFFFFFFF);
|
||||
|
||||
@@ -341,6 +341,11 @@ ccl_device_inline float vector_angle(const float3 a, const float3 b)
|
||||
return 2.0f * fast_atan2f(len(a - b), len(a + b));
|
||||
}
|
||||
|
||||
ccl_device_inline int floor_log2f(const float x)
|
||||
{
|
||||
return (int)(__float_as_uint(x) >> 23) - 127;
|
||||
}
|
||||
|
||||
/* Based on:
|
||||
*
|
||||
* https://github.com/LiraNuna/glsl-sse2/blob/master/source/vec4.h
|
||||
@@ -351,7 +356,7 @@ ccl_device float fast_log2f(float x)
|
||||
* negative values/NAN's. */
|
||||
x = clamp(x, FLT_MIN, FLT_MAX);
|
||||
const unsigned bits = __float_as_uint(x);
|
||||
const int exponent = (int)(bits >> 23) - 127;
|
||||
const int exponent = floor_log2f(x);
|
||||
const float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f;
|
||||
/* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]:
|
||||
* 0.0797524457 avg ULP diff, 3713596 max ULP, 7.62939e-06 max error.
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "util/types_float4.h" // IWYU pragma: export
|
||||
#include "util/types_float8.h" // IWYU pragma: export
|
||||
|
||||
#include "util/types_rgbe.h" // IWYU pragma: export
|
||||
#include "util/types_spectrum.h" // IWYU pragma: export
|
||||
|
||||
#include "util/types_dual.h" // IWYU pragma: export
|
||||
|
||||
109
intern/cycles/util/types_rgbe.h
Normal file
109
intern/cycles/util/types_rgbe.h
Normal file
@@ -0,0 +1,109 @@
|
||||
/* SPDX-FileCopyrightText: 2025 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/math_fast.h"
|
||||
#include "util/math_float3.h"
|
||||
#include "util/types_base.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct RGBE {
|
||||
union {
|
||||
struct {
|
||||
uint8_t r, g, b, e;
|
||||
};
|
||||
float f;
|
||||
};
|
||||
|
||||
RGBE() = default;
|
||||
|
||||
ccl_device_inline_method RGBE(const float f_) : f(f_) {}
|
||||
};
|
||||
|
||||
static_assert(sizeof(RGBE) == 4, "RGBE expected to be exactly 4 bytes");
|
||||
|
||||
/**
|
||||
* RGBE format represents an RGB value with 4 bytes.
|
||||
*
|
||||
* The original implementation by Greg Ward uses 8 bits for RGB each, plus 8 bits shared exponent.
|
||||
* It has the same relative precision as the 0 to 255 range of standard 24-bit image formats, but
|
||||
* offers extended intensity range:
|
||||
* https://www.graphics.cornell.edu/~bjw/rgbe.html
|
||||
* GL_EXT_texture_shared_exponent uses 9 bits for RGB and 5 bits for exponent instead, with smaller
|
||||
* range but higher precision:
|
||||
* https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
|
||||
*
|
||||
* Our implementation is mostly based on GL_EXT_texture_shared_exponent, but uses 8 bits for RGB
|
||||
* each, and adds 3 sign bits to represent negative values. The memory layout is as follows:
|
||||
*
|
||||
* xxxxxxxx xxxxxxxx xxxxxxxx xxx xxxxx
|
||||
* m(R) m(G) m(B) sgn exp
|
||||
*
|
||||
* Each float component is interpreted as
|
||||
* sgn exp - bias
|
||||
* f = (-1) * 0.m * 2
|
||||
*
|
||||
* We choose a bias of 15, so that the largest representable value is
|
||||
* RGBE_MAX = 0.11111111 * 2^(31 - 15) = 65280,
|
||||
* and the smallest positive representable value is
|
||||
* RGBE_MIN = 0.00000001 * 2^(0 - 15) = 1.1920929e-7
|
||||
*/
|
||||
|
||||
#define RGBE_EXP_BIAS 15
|
||||
#define RGBE_MANTISSA_BITS 8
|
||||
#define RGBE_EXPONENT_BITS 5
|
||||
#define RGBE_MAX 65280.0f
|
||||
|
||||
ccl_device RGBE rgb_to_rgbe(float3 rgb)
|
||||
{
|
||||
const float max_v = min(reduce_max(fabs(rgb)), RGBE_MAX);
|
||||
if (max_v < ldexpf(0.5f, -RGBE_EXP_BIAS - RGBE_MANTISSA_BITS)) {
|
||||
return RGBE(0.0f);
|
||||
}
|
||||
|
||||
int e = max(-RGBE_EXP_BIAS - 1, floor_log2f(max_v)) + 1;
|
||||
float v = ldexpf(1.0f, RGBE_MANTISSA_BITS - e);
|
||||
|
||||
/* The original implementation by Greg Ward uses `floor`, causing systematic bias when
|
||||
* accumulated in a buffer.
|
||||
* We use `round` instead, but need to deal with overflow. */
|
||||
if (int(roundf(max_v * v)) == power_of_2(RGBE_MANTISSA_BITS)) {
|
||||
e += 1;
|
||||
v *= 0.5f;
|
||||
}
|
||||
|
||||
/* Get sign bits. */
|
||||
const uint sign_bits = ((__float_as_uint(rgb.x) >> 31) << 7) |
|
||||
((__float_as_uint(rgb.y) >> 31) << 6) |
|
||||
((__float_as_uint(rgb.z) >> 31) << 5);
|
||||
|
||||
RGBE rgbe;
|
||||
rgb = min(round(fabs(rgb) * v), make_float3(255.0f));
|
||||
rgbe.r = uint8_t(rgb.x);
|
||||
rgbe.g = uint8_t(rgb.y);
|
||||
rgbe.b = uint8_t(rgb.z);
|
||||
rgbe.e = uint8_t(((e + RGBE_EXP_BIAS) & 0x1Fu) | sign_bits);
|
||||
return rgbe;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 rgbe_to_rgb(const RGBE rgbe)
|
||||
{
|
||||
if (rgbe.f == 0.0f) {
|
||||
return zero_float3();
|
||||
}
|
||||
|
||||
const int e = rgbe.e & 0x1Fu;
|
||||
const float f = ldexpf(1.0f, e - (int)(RGBE_EXP_BIAS + RGBE_MANTISSA_BITS));
|
||||
float3 result = make_float3(rgbe.r, rgbe.g, rgbe.b) * f;
|
||||
|
||||
/* Set sign bits. */
|
||||
result.x = or_mask(result.x, (uint(rgbe.e) & 0x80u) << 24);
|
||||
result.y = or_mask(result.y, (uint(rgbe.e) & 0x40u) << 25);
|
||||
result.z = or_mask(result.z, (uint(rgbe.e) & 0x20u) << 26);
|
||||
return result;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
Reference in New Issue
Block a user