Files
test2/source/blender/draw/engines/eevee/eevee_sampling.cc

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

381 lines
12 KiB
C++
Raw Normal View History

/* SPDX-FileCopyrightText: 2021 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup eevee
*
* Random number generator, contains persistent state and sample count logic.
*/
#include "BKE_colortools.hh"
#include "BKE_scene.hh"
#include "BLI_rand.h"
#include "BLI_math_base.hh"
#include "BLI_math_base_safe.h"
#include "eevee_instance.hh"
#include "eevee_sampling.hh"
namespace blender::eevee {
/* -------------------------------------------------------------------- */
/** \name Sampling
* \{ */
void Sampling::init(const Scene *scene)
{
sample_count_ = inst_.is_viewport() ? scene->eevee.taa_samples : scene->eevee.taa_render_samples;
if (inst_.is_image_render) {
sample_count_ = math::max(uint64_t(1), sample_count_);
}
if (sample_count_ == 0) {
BLI_assert(inst_.is_viewport());
sample_count_ = infinite_sample_count_;
}
if (inst_.is_viewport()) {
/* We can't rely on the film module as it is initialized later. */
int pixel_size = BKE_render_preview_pixel_size(&inst_.scene->r);
if (pixel_size > 1) {
/* Enforce to render at least all the film pixel once. */
sample_count_ = max_ii(sample_count_, square_i(pixel_size));
}
}
motion_blur_steps_ = !inst_.is_viewport() && ((scene->r.mode & R_MBLUR) != 0) ?
scene->eevee.motion_blur_steps :
1;
sample_count_ = divide_ceil_u(sample_count_, motion_blur_steps_);
if (scene->eevee.flag & SCE_EEVEE_DOF_JITTER) {
if (sample_count_ == infinite_sample_count_) {
/* Special case for viewport continuous rendering. We clamp to a max sample
* to avoid the jittered dof never converging. */
dof_ring_count_ = 6;
}
else {
dof_ring_count_ = sampling_web_ring_count_get(dof_web_density_, sample_count_);
}
dof_sample_count_ = sampling_web_sample_count_get(dof_web_density_, dof_ring_count_);
/* Change total sample count to fill the web pattern entirely. */
sample_count_ = divide_ceil_u(sample_count_, dof_sample_count_) * dof_sample_count_;
}
else {
dof_ring_count_ = 0;
dof_sample_count_ = 1;
}
/* Only multiply after to have full the full DoF web pattern for each time steps. */
sample_count_ *= motion_blur_steps_;
auto clamp_value_load = [](float value) { return (value > 0.0) ? value : 1e20; };
EEVEE-Next: Sunlight Extraction Sun extraction convert part of light comming from the world to a sun light which increases the quality of the rendering. The goal of this feature is to workaround the limitation of the storage techniques used for environment lighting inside EEVEE. This first implementation works by clamping the world lighting and summing the excess lighting and (and its incomming directions) to deduce the sun position. All the lighting is then transfered into this light power. The sun angle is computed based on the directionnality of the excess lighting, the more divergent the excess lighting is, the bigger the angle. This has a few benefits: - It's stable and behave well under animation. This is because we average a lot of data. - It's fast as it can be done inside the remap shader in one pass. - It requires only one parameter, the clamp threshold. However, it has some issue: - It modifies the lighting as we change the incomming direction for excess lighting away from the chosen sun direction. This could be fixed by masking only lighting around the chosen sun direction (requires 2 passes, slower). - Given that this only average the direction, it behaves poorly if there two opposite bright light sources (it puts the sun in the middle). This could be fixed by extracting more suns, but that becomes more complex and requires even more passes. - It looks bad if the summed lighting is not supposed to be a perfect disk in specular reflections or if the sources are too divergent as the disk is too big and the approximation fails short. This could be mitigated by adding an upper bound to the sun radius. For now we workaround this issue by exposing the sun angle parameter in the UI. A more precise algorithm can be implemented in the future to avoid having to deal with these limitations. A possibility is to use importance sampling to randomize sun position. But that would be only for final render. Pull Request: https://projects.blender.org/blender/blender/pulls/121455
2024-05-14 16:36:12 +02:00
clamp_data_.sun_threshold = clamp_value_load(inst_.world.sun_threshold());
clamp_data_.surface_direct = clamp_value_load(scene->eevee.clamp_surface_direct);
clamp_data_.surface_indirect = clamp_value_load(scene->eevee.clamp_surface_indirect);
clamp_data_.volume_direct = clamp_value_load(scene->eevee.clamp_volume_direct);
clamp_data_.volume_indirect = clamp_value_load(scene->eevee.clamp_volume_indirect);
}
void Sampling::init(const Object &probe_object)
{
BLI_assert(inst_.is_baking());
const ::LightProbe &lightprobe = DRW_object_get_data_for_drawing<::LightProbe>(probe_object);
sample_count_ = max_ii(1, lightprobe.grid_bake_samples);
sample_ = 0;
}
void Sampling::end_sync()
{
if (reset_) {
viewport_sample_ = 0;
}
if (inst_.is_viewport()) {
interactive_mode_ = viewport_sample_ < interactive_mode_threshold;
bool interactive_mode_disabled = (inst_.scene->eevee.flag & SCE_EEVEE_TAA_REPROJECTION) == 0 ||
inst_.is_viewport_image_render;
if (interactive_mode_disabled) {
interactive_mode_ = false;
sample_ = viewport_sample_;
}
else if (interactive_mode_) {
int interactive_sample_count = interactive_sample_max_;
if (viewport_sample_ < interactive_sample_count) {
/* Loop over the same starting samples. */
sample_ = sample_ % interactive_sample_count;
}
else {
/* Break out of the loop and resume normal pattern. */
sample_ = interactive_sample_count;
}
}
}
}
void Sampling::step()
{
{
/* Repeat the sequence for all pixels that are being up-scaled. */
uint64_t sample_filter = sample_ / square_i(inst_.film.scaling_factor_get());
EEVEE-Next: Ray-tracing Denoise Pipeline This is a full rewrite of the raytracing denoise pipeline. It uses the same principle as before but now uses compute shaders for every stages and a tile base approach. More aggressive filtering is needed since we are moving towards having no prefiltered screen radiance buffer. Thus we introduce a temporal denoise and a bilateral denoise stage to the denoising. These are optionnal and can be disabled. Note that this patch does not include any tracing part and only samples the reflection probes. It is focused on denoising only. Tracing will come in another PR. The motivation for this is that having hardware raytracing support means we can't prefilter the radiance in screen space so we have to have better denoising. Also this means we can have better surface appearance with support for other BxDF model than GGX. Also GGX support is improved. Technically, the new denoising fixes some implementation mistake the old pipeline did. It separates all 3 stages (spatial, temporal, bilateral) and use random sampling for all stages hoping to create a noisy enough (but still stable) output so that the TAA soaks the remaining noise. However that's not always the case. Depending on the nature of the scene, the input can be very high frequency and might create lots of flickering. That why another solution needs to be found for the higher roughness material as denoising them becomes expensive and low quality. Pull Request: https://projects.blender.org/blender/blender/pulls/110117
2023-08-03 15:32:06 +02:00
if (interactive_mode()) {
sample_filter = sample_filter % interactive_sample_aa_;
}
/* TODO(fclem) we could use some persistent states to speedup the computation. */
double2 r, offset = {0, 0};
/* Using 2,3 primes as per UE4 Temporal AA presentation.
2022-07-01 11:18:58 +10:00
* http://advances.realtimerendering.com/s2014/epic/TemporalAA.pptx (slide 14) */
uint2 primes = {2, 3};
BLI_halton_2d(primes, offset, sample_filter + 1, r);
/* WORKAROUND: We offset the distribution to make the first sample (0,0). This way, we are
* assured that at least one of the samples inside the TAA rotation will match the one from the
* draw manager. This makes sure overlays are correctly composited in static scene. */
data_.dimensions[SAMPLING_FILTER_U] = fractf(r[0] + (1.0 / 2.0));
data_.dimensions[SAMPLING_FILTER_V] = fractf(r[1] + (2.0 / 3.0));
/* TODO de-correlate. */
data_.dimensions[SAMPLING_TIME] = r[0];
data_.dimensions[SAMPLING_CLOSURE] = r[1];
data_.dimensions[SAMPLING_RAYTRACE_X] = r[0];
}
{
double3 r, offset = {0, 0, 0};
uint3 primes = {5, 7, 3};
BLI_halton_3d(primes, offset, sample_ + 1, r);
data_.dimensions[SAMPLING_LENS_U] = r[0];
data_.dimensions[SAMPLING_LENS_V] = r[1];
/* TODO de-correlate. */
data_.dimensions[SAMPLING_LIGHTPROBE] = r[0];
data_.dimensions[SAMPLING_TRANSPARENCY] = r[1];
/* TODO de-correlate. */
data_.dimensions[SAMPLING_AO_U] = r[0];
data_.dimensions[SAMPLING_AO_V] = r[1];
data_.dimensions[SAMPLING_AO_W] = r[2];
/* TODO de-correlate. */
data_.dimensions[SAMPLING_CURVES_U] = r[0];
}
{
EEVEE-Next: Ray-tracing Denoise Pipeline This is a full rewrite of the raytracing denoise pipeline. It uses the same principle as before but now uses compute shaders for every stages and a tile base approach. More aggressive filtering is needed since we are moving towards having no prefiltered screen radiance buffer. Thus we introduce a temporal denoise and a bilateral denoise stage to the denoising. These are optionnal and can be disabled. Note that this patch does not include any tracing part and only samples the reflection probes. It is focused on denoising only. Tracing will come in another PR. The motivation for this is that having hardware raytracing support means we can't prefilter the radiance in screen space so we have to have better denoising. Also this means we can have better surface appearance with support for other BxDF model than GGX. Also GGX support is improved. Technically, the new denoising fixes some implementation mistake the old pipeline did. It separates all 3 stages (spatial, temporal, bilateral) and use random sampling for all stages hoping to create a noisy enough (but still stable) output so that the TAA soaks the remaining noise. However that's not always the case. Depending on the nature of the scene, the input can be very high frequency and might create lots of flickering. That why another solution needs to be found for the higher roughness material as denoising them becomes expensive and low quality. Pull Request: https://projects.blender.org/blender/blender/pulls/110117
2023-08-03 15:32:06 +02:00
uint64_t sample_raytrace = sample_;
if (interactive_mode()) {
sample_raytrace = sample_raytrace % interactive_sample_raytrace_;
}
/* Using leaped Halton sequence so we can reused the same primes as lens. */
double3 r, offset = {0, 0, 0};
uint64_t leap = 13;
uint3 primes = {5, 7, 11};
EEVEE-Next: Volume: Fragment shader voxelization This replaces the compute shader pass for volume material properties voxelization by a fragment shader that is run only once per pixel. The fragment shader then execute the nodetree in a loop for each individual froxel. The motivations are: - faster evaluation of homogenous materials: can evaluate nodetree once and fast write the properties for all froxel in a loop. This matches cycles homogenous material optimization (except that it only considers the first hit). - no invocations for empty froxels: not restricted to box dispach. - support for more than one material: invocations are per pixel. - cleaner implementation (no compute shader specific paths). Implementation wise, this is done by adding a stencil texture when rendering volumetric objects. It is populated during the occupancy phase but it is not directly used (the stencil test is enabled but since we use `imageAtomic` to set the occupancy bits, the fragment shader is forced to be run). The early depth-test is then turned on for the material properties pass, allowing only one fragment to be invoked. This fragment runs the nodetree at the desired frequency: once per direction (homogenous), or once per froxel (heterogenous). Note that I tried to use the frontmost fragment using a depth equal test but it was failing for some reason on Apple silicon producing flickering artifacts. We might reconsider this frontmost fragment approach later since the result is now face order dependant when an object has multiple materials. Pull Request: https://projects.blender.org/blender/blender/pulls/119439
2024-04-05 16:33:58 +02:00
BLI_halton_3d(primes, offset, sample_raytrace * leap + 1, r);
data_.dimensions[SAMPLING_SHADOW_U] = r[0];
data_.dimensions[SAMPLING_SHADOW_V] = r[1];
data_.dimensions[SAMPLING_SHADOW_W] = r[2];
/* TODO de-correlate. */
data_.dimensions[SAMPLING_RAYTRACE_U] = r[0];
data_.dimensions[SAMPLING_RAYTRACE_V] = r[1];
data_.dimensions[SAMPLING_RAYTRACE_W] = r[2];
EEVEE-Next: Volume: Fragment shader voxelization This replaces the compute shader pass for volume material properties voxelization by a fragment shader that is run only once per pixel. The fragment shader then execute the nodetree in a loop for each individual froxel. The motivations are: - faster evaluation of homogenous materials: can evaluate nodetree once and fast write the properties for all froxel in a loop. This matches cycles homogenous material optimization (except that it only considers the first hit). - no invocations for empty froxels: not restricted to box dispach. - support for more than one material: invocations are per pixel. - cleaner implementation (no compute shader specific paths). Implementation wise, this is done by adding a stencil texture when rendering volumetric objects. It is populated during the occupancy phase but it is not directly used (the stencil test is enabled but since we use `imageAtomic` to set the occupancy bits, the fragment shader is forced to be run). The early depth-test is then turned on for the material properties pass, allowing only one fragment to be invoked. This fragment runs the nodetree at the desired frequency: once per direction (homogenous), or once per froxel (heterogenous). Note that I tried to use the frontmost fragment using a depth equal test but it was failing for some reason on Apple silicon producing flickering artifacts. We might reconsider this frontmost fragment approach later since the result is now face order dependant when an object has multiple materials. Pull Request: https://projects.blender.org/blender/blender/pulls/119439
2024-04-05 16:33:58 +02:00
}
{
double3 r, offset = {0, 0, 0};
uint3 primes = {2, 3, 5};
BLI_halton_3d(primes, offset, sample_ + 1, r);
/* WORKAROUND: We offset the distribution to make the first sample (0,0,0). */
/* TODO de-correlate. */
data_.dimensions[SAMPLING_SHADOW_I] = fractf(r[0] + (1.0 / 2.0));
data_.dimensions[SAMPLING_SHADOW_J] = fractf(r[1] + (2.0 / 3.0));
data_.dimensions[SAMPLING_SHADOW_K] = fractf(r[2] + (4.0 / 5.0));
}
EEVEE-Next: Volume: Fragment shader voxelization This replaces the compute shader pass for volume material properties voxelization by a fragment shader that is run only once per pixel. The fragment shader then execute the nodetree in a loop for each individual froxel. The motivations are: - faster evaluation of homogenous materials: can evaluate nodetree once and fast write the properties for all froxel in a loop. This matches cycles homogenous material optimization (except that it only considers the first hit). - no invocations for empty froxels: not restricted to box dispach. - support for more than one material: invocations are per pixel. - cleaner implementation (no compute shader specific paths). Implementation wise, this is done by adding a stencil texture when rendering volumetric objects. It is populated during the occupancy phase but it is not directly used (the stencil test is enabled but since we use `imageAtomic` to set the occupancy bits, the fragment shader is forced to be run). The early depth-test is then turned on for the material properties pass, allowing only one fragment to be invoked. This fragment runs the nodetree at the desired frequency: once per direction (homogenous), or once per froxel (heterogenous). Note that I tried to use the frontmost fragment using a depth equal test but it was failing for some reason on Apple silicon producing flickering artifacts. We might reconsider this frontmost fragment approach later since the result is now face order dependant when an object has multiple materials. Pull Request: https://projects.blender.org/blender/blender/pulls/119439
2024-04-05 16:33:58 +02:00
{
uint64_t sample_volume = sample_;
if (interactive_mode()) {
sample_volume = sample_volume % interactive_sample_volume_;
}
double3 r, offset = {0, 0, 0};
uint3 primes = {2, 3, 5};
BLI_halton_3d(primes, offset, sample_volume + 1, r);
/* WORKAROUND: We offset the distribution to make the first sample (0,0,0). */
data_.dimensions[SAMPLING_VOLUME_U] = fractf(r[0] + (1.0 / 2.0));
data_.dimensions[SAMPLING_VOLUME_V] = fractf(r[1] + (2.0 / 3.0));
data_.dimensions[SAMPLING_VOLUME_W] = fractf(r[2] + (4.0 / 5.0));
}
{
/* Using leaped Halton sequence so we can reused the same primes. */
double2 r, offset = {0, 0};
uint64_t leap = 5;
uint2 primes = {2, 3};
EEVEE-Next: Volume: Fragment shader voxelization This replaces the compute shader pass for volume material properties voxelization by a fragment shader that is run only once per pixel. The fragment shader then execute the nodetree in a loop for each individual froxel. The motivations are: - faster evaluation of homogenous materials: can evaluate nodetree once and fast write the properties for all froxel in a loop. This matches cycles homogenous material optimization (except that it only considers the first hit). - no invocations for empty froxels: not restricted to box dispach. - support for more than one material: invocations are per pixel. - cleaner implementation (no compute shader specific paths). Implementation wise, this is done by adding a stencil texture when rendering volumetric objects. It is populated during the occupancy phase but it is not directly used (the stencil test is enabled but since we use `imageAtomic` to set the occupancy bits, the fragment shader is forced to be run). The early depth-test is then turned on for the material properties pass, allowing only one fragment to be invoked. This fragment runs the nodetree at the desired frequency: once per direction (homogenous), or once per froxel (heterogenous). Note that I tried to use the frontmost fragment using a depth equal test but it was failing for some reason on Apple silicon producing flickering artifacts. We might reconsider this frontmost fragment approach later since the result is now face order dependant when an object has multiple materials. Pull Request: https://projects.blender.org/blender/blender/pulls/119439
2024-04-05 16:33:58 +02:00
BLI_halton_2d(primes, offset, sample_ * leap + 1, r);
data_.dimensions[SAMPLING_SHADOW_X] = r[0];
data_.dimensions[SAMPLING_SHADOW_Y] = r[1];
/* TODO de-correlate. */
data_.dimensions[SAMPLING_SSS_U] = r[0];
data_.dimensions[SAMPLING_SSS_V] = r[1];
}
{
/* Don't leave unused data undefined. */
data_.dimensions[SAMPLING_UNUSED_0] = 0.0f;
data_.dimensions[SAMPLING_UNUSED_1] = 0.0f;
data_.dimensions[SAMPLING_UNUSED_2] = 0.0f;
}
for (int i : IndexRange(SAMPLING_DIMENSION_COUNT)) {
/* These numbers are often fed to `sqrt`. Make sure their values are in the expected range. */
BLI_assert(data_.dimensions[i] >= 0.0f);
BLI_assert(data_.dimensions[i] < 1.0f);
UNUSED_VARS_NDEBUG(i);
}
data_.push_update();
viewport_sample_++;
sample_++;
reset_ = false;
}
void Sampling::reset()
{
BLI_assert(inst_.is_viewport());
reset_ = true;
}
bool Sampling::is_reset() const
{
BLI_assert(inst_.is_viewport());
return reset_;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Sampling patterns
* \{ */
float3 Sampling::sample_ball(const float3 &rand)
{
float3 sample;
sample.z = rand.x * 2.0f - 1.0f; /* cos theta */
float r = sqrtf(fmaxf(0.0f, 1.0f - square_f(sample.z))); /* sin theta */
float omega = rand.y * 2.0f * M_PI;
sample.x = r * cosf(omega);
sample.y = r * sinf(omega);
sample *= sqrtf(sqrtf(rand.z));
return sample;
}
float2 Sampling::sample_disk(const float2 &rand)
{
float omega = rand.y * 2.0f * M_PI;
return sqrtf(rand.x) * float2(cosf(omega), sinf(omega));
}
float3 Sampling::sample_hemisphere(const float2 &rand)
{
const float omega = rand.y * 2.0f * M_PI;
const float cos_theta = rand.x;
const float sin_theta = safe_sqrtf(1.0f - square_f(cos_theta));
return float3(sin_theta * float2(cosf(omega), sinf(omega)), cos_theta);
}
float3 Sampling::sample_sphere(const float2 &rand)
{
const float omega = rand.y * 2.0f * M_PI;
const float cos_theta = rand.x * 2.0f - 1.0f;
const float sin_theta = safe_sqrtf(1.0f - square_f(cos_theta));
return float3(sin_theta * float2(cosf(omega), sinf(omega)), cos_theta);
}
float2 Sampling::sample_spiral(const float2 &rand)
{
/* Fibonacci spiral. */
float omega = 4.0f * M_PI * (1.0f + sqrtf(5.0f)) * rand.x;
float r = sqrtf(rand.x);
/* Random rotation. */
omega += rand.y * 2.0f * M_PI;
return r * float2(cosf(omega), sinf(omega));
}
void Sampling::dof_disk_sample_get(float *r_radius, float *r_theta) const
{
if (dof_ring_count_ == 0) {
*r_radius = *r_theta = 0.0f;
return;
}
int s = sample_ - 1;
int ring = 0;
int ring_sample_count = 1;
int ring_sample = 1;
s = s * (dof_web_density_ - 1);
s = s % dof_sample_count_;
/* Choosing sample to we get faster convergence.
2022-07-01 11:18:58 +10:00
* The issue here is that we cannot map a low discrepancy sequence to this sampling pattern
* because the same sample could be chosen twice in relatively short intervals. */
/* For now just use an ascending sequence with an offset. This gives us relatively quick
* initial coverage and relatively high distance between samples. */
2022-07-01 11:18:58 +10:00
/* TODO(@fclem) We can try to order samples based on a LDS into a table to avoid duplicates.
* The drawback would be some memory consumption and initialize time. */
int samples_passed = 1;
while (s >= samples_passed) {
ring++;
ring_sample_count = ring * dof_web_density_;
ring_sample = s - samples_passed;
ring_sample = (ring_sample + 1) % ring_sample_count;
samples_passed += ring_sample_count;
}
*r_radius = ring / float(dof_ring_count_);
*r_theta = 2.0f * M_PI * ring_sample / float(ring_sample_count);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Cumulative Distribution Function (CDF)
* \{ */
void Sampling::cdf_from_curvemapping(const CurveMapping &curve, Vector<float> &cdf)
{
BLI_assert(cdf.size() > 1);
cdf[0] = 0.0f;
/* Actual CDF evaluation. */
for (int u : IndexRange(cdf.size() - 1)) {
float x = float(u + 1) / float(cdf.size() - 1);
cdf[u + 1] = cdf[u] + BKE_curvemapping_evaluateF(&curve, 0, x);
}
/* Normalize the CDF. */
for (int u : cdf.index_range()) {
cdf[u] /= cdf.last();
}
/* Just to make sure. */
cdf.last() = 1.0f;
}
void Sampling::cdf_invert(Vector<float> &cdf, Vector<float> &inverted_cdf)
{
BLI_assert(cdf.first() == 0.0f && cdf.last() == 1.0f);
for (int u : inverted_cdf.index_range()) {
float x = clamp_f(u / float(inverted_cdf.size() - 1), 1e-5f, 1.0f - 1e-5f);
for (int i : cdf.index_range().drop_front(1)) {
if (cdf[i] >= x) {
float t = (x - cdf[i]) / (cdf[i] - cdf[i - 1]);
inverted_cdf[u] = (float(i) + t) / float(cdf.size() - 1);
break;
}
}
}
}
/** \} */
} // namespace blender::eevee