Merge branch 'blender-v5.0-release'

This commit is contained in:
Hans Goudey
2025-10-08 10:15:37 -04:00
19 changed files with 525 additions and 213 deletions

View File

@@ -315,7 +315,11 @@ class NodeMenu(Menu):
if groups:
layout.separator()
for group in groups:
props = cls.node_operator(layout, node_tree_group_type[group.bl_idname], label=group.name)
search_weight = -1.0 if group.is_linked_packed else 0.0
props = cls.node_operator(layout,
node_tree_group_type[group.bl_idname],
label=group.name,
search_weight=search_weight)
ops = props.settings.add()
ops.name = "node_tree"
ops.value = "bpy.data.node_groups[{!r}]".format(group.name)

View File

@@ -548,7 +548,10 @@ set(GLSL_SRC
engines/eevee/shaders/eevee_surfel_cluster_build_comp.glsl
engines/eevee/shaders/eevee_surfel_light_comp.glsl
engines/eevee/shaders/eevee_surfel_list_build_comp.glsl
engines/eevee/shaders/eevee_surfel_list_flatten_comp.glsl
engines/eevee/shaders/eevee_surfel_list_lib.glsl
engines/eevee/shaders/eevee_surfel_list_prefix_comp.glsl
engines/eevee/shaders/eevee_surfel_list_prepare_comp.glsl
engines/eevee/shaders/eevee_surfel_list_sort_comp.glsl
engines/eevee/shaders/eevee_surfel_ray_comp.glsl
engines/eevee/shaders/eevee_thickness_lib.glsl

View File

@@ -8,6 +8,8 @@
* An instance contains all structures needed to do a complete render.
*/
#include "CLG_log.h"
#include "BKE_global.hh"
#include "BKE_object.hh"
@@ -18,7 +20,6 @@
#include "DEG_depsgraph_query.hh"
#include "DNA_ID.h"
#include "DNA_lightprobe_types.h"
#include "DNA_modifier_types.h"
@@ -30,17 +31,18 @@
#include "RE_pipeline.h"
#include "eevee_engine.h"
#include "eevee_instance.hh"
#include "DNA_particle_types.h"
#include "draw_common.hh"
#include "draw_context_private.hh"
#include "draw_debug.hh"
#include "draw_view_data.hh"
namespace blender::eevee {
CLG_LogRef Instance::log = {"eevee"};
void *Instance::debug_scope_render_sample = nullptr;
void *Instance::debug_scope_irradiance_setup = nullptr;
void *Instance::debug_scope_irradiance_sample = nullptr;
@@ -885,6 +887,7 @@ void Instance::light_bake_irradiance(
volume_probes.bake.init(probe);
custom_pipeline_wrapper([&]() {
drw_debug_clear();
this->render_sync();
while ((materials.queued_shaders_count > 0) || (materials.queued_textures_count > 0)) {
GPU_pass_cache_wait_for_all();
@@ -921,6 +924,9 @@ void Instance::light_bake_irradiance(
DRW_submission_end();
}
/* Avoid big setup job to be queued with the sampling commands. */
GPU_flush();
});
if (volume_probes.bake.should_break()) {
@@ -928,14 +934,26 @@ void Instance::light_bake_irradiance(
}
sampling.init(probe);
/* Start with 1 sample and progressively ramp up. */
float time_per_sample_ms_smooth = 16.0f;
double last_update_timestamp = BLI_time_now_seconds();
while (!sampling.finished()) {
context_wrapper([&]() {
DebugScope debug_scope(debug_scope_irradiance_sample, "EEVEE.irradiance_sample");
/* Batch ray cast by pack of 16. Avoids too much overhead of the update function & context
* switch. */
/* TODO(fclem): Could make the number of iteration depend on the computation time. */
for (int i = 0; i < 16 && !sampling.finished(); i++) {
int remaining_samples = sampling.sample_count() - sampling.sample_index();
/* In background mode, assume we don't need as much interactivity. */
int time_budget_ms = G.background ? 32 : 16;
/* Batch ray cast. Avoids too much overhead of the context switch. */
int sample_count_in_batch = ceilf(time_budget_ms / max(0.1f, time_per_sample_ms_smooth));
/* Avoid batching too many rays, keep system responsive in case of bad values. */
sample_count_in_batch = min_iii(32, sample_count_in_batch, remaining_samples);
CLOG_INFO(&Instance::log, "IrradianceBake: Casting %d rays.", sample_count_in_batch);
double time_it_begin_ms = BLI_time_now_seconds() * 1000.0;
for (int i = 0; i < sample_count_in_batch && !sampling.finished(); i++) {
sampling.step();
{
/* Critical section. Potential gpu::Shader concurrent usage. */
@@ -947,19 +965,29 @@ void Instance::light_bake_irradiance(
DRW_submission_end();
}
}
};
/* We use GPU_finish to take into account the GPU processing time. */
/* TODO(fclem): Could use timer queries to keep pipelining of GPU commands if that become a
* real bottleneck. */
GPU_finish();
double time_it_end_ms = BLI_time_now_seconds() * 1000.0;
float time_per_sample_ms = float(time_it_end_ms - time_it_begin_ms) / sample_count_in_batch;
/* Exponential average. */
time_per_sample_ms_smooth = interpolate(time_per_sample_ms_smooth, time_per_sample_ms, 0.7f);
LightProbeGridCacheFrame *cache_frame;
if (sampling.finished()) {
cache_frame = volume_probes.bake.read_result_packed();
result_update(volume_probes.bake.read_result_packed(), 1.0f);
}
else {
/* TODO(fclem): Only do this read-back if needed. But it might be tricky to know when. */
cache_frame = volume_probes.bake.read_result_unpacked();
double time_since_last_update_ms = BLI_time_now_seconds() - last_update_timestamp;
/* Only readback every 1 second. This readback is relatively expensive. */
if (time_since_last_update_ms > 1.0) {
float progress = sampling.sample_index() / float(sampling.sample_count());
result_update(volume_probes.bake.read_result_unpacked(), progress);
last_update_timestamp = BLI_time_now_seconds();
}
}
float progress = sampling.sample_index() / float(sampling.sample_count());
result_update(cache_frame, progress);
});
if (stop()) {

View File

@@ -12,6 +12,8 @@
#include <fmt/format.h>
#include "CLG_log.h"
#include "BLI_string.h"
#include "BLT_translation.hh"
@@ -123,6 +125,8 @@ class Instance : public DrawEngine {
LightProbeModule light_probes;
VolumeModule volume;
static CLG_LogRef log;
/** Input data. */
Depsgraph *depsgraph;
Manager *manager;

View File

@@ -215,8 +215,10 @@ struct Surfel {
bool32_t double_sided;
/** Surface receiver light set for light linking. */
uint receiver_light_set;
int _pad0;
int _pad1;
/** List index this surfel is in. */
int list_id;
/** Index of this surfel inside the sorted list. Allow access to previous and next surfel id. */
int index_in_sorted_list;
/** Surface radiance: Emission + Direct Lighting. */
SurfelRadiance radiance_direct;
/** Surface radiance: Indirect Lighting. Double buffered to avoid race conditions. */
@@ -281,7 +283,7 @@ struct SurfelListInfoData {
/** Maximum number of list. Is equal to `ray_grid_size.x * ray_grid_size.y`. */
int list_max;
int _pad0;
int list_prefix_sum;
};
BLI_STATIC_ASSERT_ALIGN(SurfelListInfoData, 16)

View File

@@ -793,21 +793,67 @@ void IrradianceBake::sync()
PassSimple &pass = surfel_ray_build_ps_;
pass.init();
{
PassSimple::Sub &sub = pass.sub("ListBuild");
sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_BUILD));
/* Count number of surfel per list. */
PassSimple::Sub &sub = pass.sub("ListPrepare");
sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_PREPARE));
sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
sub.bind_ssbo("list_start_buf", &list_start_buf_);
sub.bind_ssbo("list_counter_buf", &list_counter_buf_);
sub.bind_ssbo("list_info_buf", &list_info_buf_);
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
sub.dispatch(&dispatch_per_surfel_);
}
{
/* Prefix sum of list sizes. Outputs an IndexRange per list. */
PassSimple::Sub &sub = pass.sub("ListPrefix");
sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_PREFIX));
sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
sub.bind_ssbo("list_counter_buf", &list_counter_buf_);
sub.bind_ssbo("list_range_buf", &list_range_buf_);
sub.bind_ssbo("list_info_buf", &list_info_buf_);
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
sub.dispatch(&dispatch_per_list_);
}
{
/* Copy surfel list sorting data into a flat array.
* All lists data are contiguous in memory using the IndexRange from previous pass. */
PassSimple::Sub &sub = pass.sub("ListFlatten");
sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_FLATTEN));
sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
sub.bind_ssbo("list_counter_buf", &list_counter_buf_);
sub.bind_ssbo("list_range_buf", &list_range_buf_);
sub.bind_ssbo("list_item_distance_buf", &list_item_distance_buf_);
sub.bind_ssbo("list_item_surfel_id_buf", &list_item_surfel_id_buf_);
sub.bind_ssbo("list_info_buf", &list_info_buf_);
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
sub.dispatch(&dispatch_per_surfel_);
}
{
/* Radix sort of the list. Output surfel index in the sorted list. */
PassSimple::Sub &sub = pass.sub("ListSort");
sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_SORT));
sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
sub.bind_ssbo("list_range_buf", &list_range_buf_);
sub.bind_ssbo("list_item_surfel_id_buf", &list_item_surfel_id_buf_);
sub.bind_ssbo("list_item_distance_buf", &list_item_distance_buf_);
sub.bind_ssbo("sorted_surfel_id_buf", &sorted_surfel_id_buf_);
sub.bind_ssbo("list_info_buf", &list_info_buf_);
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
sub.dispatch(&dispatch_per_surfel_);
}
{
/* Take the sorted lists array and copy adjacent surfel indices back to the Surfels.
* Also relink coplanar surfels to avoid over shadowing. */
PassSimple::Sub &sub = pass.sub("ListBuild");
sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_BUILD));
sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
sub.bind_ssbo("list_start_buf", &list_start_buf_);
sub.bind_ssbo("list_range_buf", &list_range_buf_);
sub.bind_ssbo("sorted_surfel_id_buf", &sorted_surfel_id_buf_);
sub.bind_ssbo("list_info_buf", &list_info_buf_);
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
sub.dispatch(&dispatch_per_list_);
@@ -1097,7 +1143,8 @@ void IrradianceBake::surfels_create(const Object &probe_object)
}
if (capture_info_buf_.surfel_len > surfels_buf_.size()) {
printf("IrradianceBake: Allocating %u surfels.\n", capture_info_buf_.surfel_len);
CLOG_INFO(
&Instance::log, "IrradianceBake: Allocating %u surfels.", capture_info_buf_.surfel_len);
size_t max_size = GPU_max_storage_buffer_size();
if (GPU_mem_stats_supported()) {
@@ -1282,7 +1329,16 @@ void IrradianceBake::raylists_build()
dispatch_per_list_.x = divide_ceil_u(list_info_buf_.list_max, SURFEL_LIST_GROUP_SIZE);
list_start_buf_.resize(ceil_to_multiple_u(list_info_buf_.list_max, 4));
list_counter_buf_.resize(ceil_to_multiple_u(list_info_buf_.list_max, 4));
list_range_buf_.resize(ceil_to_multiple_u(list_info_buf_.list_max * 2, 4));
list_item_distance_buf_.resize(ceil_to_multiple_u(max_ii(1, capture_info_buf_.surfel_len), 4));
list_item_surfel_id_buf_.resize(ceil_to_multiple_u(max_ii(1, capture_info_buf_.surfel_len), 4));
sorted_surfel_id_buf_.resize(ceil_to_multiple_u(max_ii(1, capture_info_buf_.surfel_len), 4));
GPU_storagebuf_clear(list_counter_buf_, 0);
/* Clear for the case where there are no list or no surfel.
* Otherwise the irradiance_capture stage will have broken lists. */
GPU_storagebuf_clear(list_start_buf_, -1);
inst_.manager->submit(surfel_ray_build_ps_, ray_view_);
}

View File

@@ -84,6 +84,15 @@ class IrradianceBake {
SurfelListInfoBuf list_info_buf_ = {"list_info_buf_"};
/** List array containing list start surfel index. Cleared to -1. */
StorageArrayBuffer<int, 16, true> list_start_buf_ = {"list_start_buf_"};
/** Count number of surfel per surfel list. Cleared to 0. */
StorageArrayBuffer<int, 16, true> list_counter_buf_ = {"list_counter_buf_"};
/** IndexRange of sorting items for each surfel list. */
StorageArrayBuffer<int, 16, true> list_range_buf_ = {"list_range_buf_"};
/** Sorting items for fast sorting of surfels. */
StorageArrayBuffer<float, 16, true> list_item_distance_buf_ = {"list_item_distance_buf_"};
StorageArrayBuffer<int, 16, true> list_item_surfel_id_buf_ = {"list_item_surfel_id_buf_"};
/** Result of sorting. Needed to be duplicated to avoid race condition. */
StorageArrayBuffer<int, 16, true> sorted_surfel_id_buf_ = {"sorted_surfel_id_buf_"};
/* Dispatch size for per surfel workload. */
int3 dispatch_per_surfel_ = int3(1);

View File

@@ -533,6 +533,12 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
return "eevee_surfel_light";
case SURFEL_LIST_BUILD:
return "eevee_surfel_list_build";
case SURFEL_LIST_FLATTEN:
return "eevee_surfel_list_flatten";
case SURFEL_LIST_PREFIX:
return "eevee_surfel_list_prefix";
case SURFEL_LIST_PREPARE:
return "eevee_surfel_list_prepare";
case SURFEL_LIST_SORT:
return "eevee_surfel_list_sort";
case SURFEL_RAY:

View File

@@ -150,6 +150,9 @@ enum eShaderType {
SURFEL_CLUSTER_BUILD,
SURFEL_LIGHT,
SURFEL_LIST_BUILD,
SURFEL_LIST_FLATTEN,
SURFEL_LIST_PREFIX,
SURFEL_LIST_PREPARE,
SURFEL_LIST_SORT,
SURFEL_RAY,

View File

@@ -146,6 +146,9 @@ set(SRC_GLSL_COMP
eevee_surfel_cluster_build_comp.glsl
eevee_surfel_light_comp.glsl
eevee_surfel_list_build_comp.glsl
eevee_surfel_list_flatten_comp.glsl
eevee_surfel_list_prefix_comp.glsl
eevee_surfel_list_prepare_comp.glsl
eevee_surfel_list_sort_comp.glsl
eevee_surfel_ray_comp.glsl
eevee_vertex_copy_comp.glsl

View File

@@ -113,12 +113,18 @@ void main()
/* Walk the ray to get which surfels the irradiance sample is between. */
int surfel_prev = -1;
int surfel_next = list_start_buf[list_index];
for (; surfel_next > -1; surfel_next = surfel_buf[surfel_next].next) {
/* Avoid spinning for eternity. */
for (int i = 0; i < 9999; i++) {
if (surfel_next <= -1) {
break;
}
/* Reminder: List is sorted with highest value first. */
if (surfel_buf[surfel_next].ray_distance < irradiance_sample_ray_distance) {
break;
}
surfel_prev = surfel_next;
surfel_next = surfel_buf[surfel_next].next;
assert(surfel_prev != surfel_next);
}
float3 sky_L = drw_world_incident_vector(P);
@@ -142,20 +148,32 @@ void main()
Surfel surfel = surfel_buf[surfel_next];
irradiance_capture_surfel(surfel, P, sh);
validity_capture_surfel(surfel, P, validity);
#if 0 /* For debugging the volume rays list. */
drw_debug_line(surfel.position, P, float4(0, 1, 0, 1), drw_debug_persistent_lifetime);
#endif
}
else {
irradiance_capture_world(-sky_L, sh);
validity_capture_world(-sky_L, validity);
#if 0 /* For debugging the volume rays list. */
drw_debug_line(P - sky_L, P, float4(0, 1, 1, 1), drw_debug_persistent_lifetime);
#endif
}
if (surfel_prev > -1) {
Surfel surfel = surfel_buf[surfel_prev];
irradiance_capture_surfel(surfel, P, sh);
validity_capture_surfel(surfel, P, validity);
#if 0 /* For debugging the volume rays list. */
drw_debug_line(surfel.position, P, float4(1, 0, 1, 1), drw_debug_persistent_lifetime);
#endif
}
else {
irradiance_capture_world(sky_L, sh);
validity_capture_world(sky_L, validity);
#if 0 /* For debugging the volume rays list. */
drw_debug_line(P + sky_L, P, float4(1, 1, 0, 1), drw_debug_persistent_lifetime);
#endif
}
/* Normalize for storage. We accumulated 2 samples. */

View File

@@ -3,36 +3,120 @@
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Takes scene surfel representation and build list of surfels aligning in a given direction.
* Read the result of the sorted buffer and update the `prev` and `next` surfel id inside each
* surfel structure. This step also transform the linked list into a graph in order to avoid lost
* energy from almost coplanar surfaces.
*
* The lists head are allocated to fit the surfel granularity.
*
* Due to alignment the link and list head are split into several int arrays to avoid too much
* memory waste.
*
* Dispatch 1 thread per surfel.
* Dispatched as 1 thread per list.
*/
#include "infos/eevee_lightprobe_volume_infos.hh"
COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_build)
#include "eevee_surfel_list_lib.glsl"
#include "gpu_shader_index_range_lib.glsl"
/**
* Return true if link from `surfel[a]` to `surfel[b]` is valid.
* WARNING: this function is not commutative : `f(a, b) != f(b, a)`
*/
bool is_valid_surfel_link(int a, int b)
{
float3 link_vector = normalize(surfel_buf[b].position - surfel_buf[a].position);
float link_angle_cos = dot(surfel_buf[a].normal, link_vector);
bool is_coplanar = abs(link_angle_cos) < 0.05f;
return !is_coplanar;
}
void main()
{
int surfel_index = int(gl_GlobalInvocationID.x);
if (surfel_index >= int(capture_info_buf.surfel_len)) {
int list_id = int(gl_GlobalInvocationID.x);
if (list_id >= list_info_buf.list_max) {
return;
}
float ray_distance;
int list_index = surfel_list_index_get(
list_info_buf.ray_grid_size, surfel_buf[surfel_index].position, ray_distance);
/* Do separate assignment to avoid reference to buffer in arguments which is tricky to cross
* compile. */
surfel_buf[surfel_index].ray_distance = ray_distance;
/* NOTE: We only need to init the `list_start_buf` to -1 for the whole list to be valid since
* every surfel will load its `next` value from the list head. */
surfel_buf[surfel_index].next = atomicExchange(list_start_buf[list_index], surfel_index);
const IndexRange list_range = IndexRange(list_range_buf[list_id * 2 + 0],
list_range_buf[list_id * 2 + 1]);
if (list_range.size() == 0) {
list_start_buf[list_id] = -1;
return;
}
const int first_item = list_range.start();
const int last_item = list_range.last();
const int sorted_list_first = sorted_surfel_id_buf[first_item];
{
/* Update surfels linked list. */
int prev = -1;
int curr = sorted_surfel_id_buf[first_item];
for (int i = first_item; i <= last_item; i++) {
int next = (i == last_item) ? -1 : sorted_surfel_id_buf[i + 1];
surfel_buf[curr].next = next;
surfel_buf[curr].prev = prev;
prev = curr;
curr = next;
}
}
/* Update list start for irradiance sample capture. */
list_start_buf[list_id] = sorted_list_first;
/* Now that we have a sorted list, try to avoid connection from coplanar surfels.
* For that we disconnect them and link them to the first non-coplanar surfel.
* Note that this changes the list to a tree, which doesn't affect the rest of the algorithm.
*
* This is a really important step since it allows to clump more surfels into one ray list and
* avoid light leaking through surfaces. If we don't disconnect coplanar surfels, we loose many
* good rays by evaluating null radiance transfer between the coplanar surfels for rays that
* are not directly perpendicular to the surface. */
/* Limiting the total number of search steps avoids TDRs, but may cause overshadowing if the
* limit is reached. */
const int max_search = 2000;
uint search_count = 0;
/* Mutable `foreach`. */
for (int i = sorted_list_first, next = -1; i > -1; i = next) {
next = surfel_buf[i].next;
int valid_next = surfel_buf[i].next;
int valid_prev = surfel_buf[i].prev;
/* Search the list for the first valid next and previous surfel. */
while (search_count < max_search) {
if (valid_next == -1) {
break;
}
if (is_valid_surfel_link(i, valid_next)) {
break;
}
valid_next = surfel_buf[valid_next].next;
search_count++;
}
while (search_count < max_search) {
if (valid_prev == -1) {
break;
}
if (is_valid_surfel_link(i, valid_prev)) {
break;
}
valid_prev = surfel_buf[valid_prev].prev;
search_count++;
}
surfel_buf[i].next = valid_next;
surfel_buf[i].prev = valid_prev;
}
#if 0 /* For debugging the sorted list. */
for (int i = sorted_list_first, next = -1; i > -1; i = next) {
next = surfel_buf[i].next;
if (next != -1) {
drw_debug_line(surfel_buf[next].position,
surfel_buf[i].position,
float4(1, 0, 0, 1),
drw_debug_persistent_lifetime);
}
}
#endif
}

View File

@@ -0,0 +1,29 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Flatten surfel sorting data into a sequential structure.
* The buffer structure follows the lists OffsetIndices.
*
* Dispatched as 1 thread per surfel.
*/
#include "infos/eevee_lightprobe_volume_infos.hh"
COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_flatten)
void main()
{
int surfel_id = int(gl_GlobalInvocationID.x);
if (surfel_id >= int(capture_info_buf.surfel_len)) {
return;
}
int list_id = surfel_buf[surfel_id].list_id;
int item_id = atomicAdd(list_counter_buf[list_id], -1) - 1;
item_id += list_range_buf[list_id * 2 + 0];
list_item_distance_buf[item_id] = surfel_buf[surfel_id].ray_distance;
list_item_surfel_id_buf[item_id] = surfel_id;
}

View File

@@ -0,0 +1,28 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Create a prefix sum of the surfels per list.
* Outputs one IndexRange for each surfel list.
*
* Dispatched as 1 thread per surfel list.
*/
#include "infos/eevee_lightprobe_volume_infos.hh"
COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_prefix)
void main()
{
int list_id = int(gl_GlobalInvocationID.x);
if (list_id >= list_info_buf.list_max) {
return;
}
int list_item_count = list_counter_buf[list_id];
int list_item_start = atomicAdd(list_info_buf.list_prefix_sum, list_item_count);
list_range_buf[list_id * 2 + 0] = list_item_start;
list_range_buf[list_id * 2 + 1] = list_item_count;
}

View File

@@ -0,0 +1,44 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Takes the scene surfel representation to build lists of surfels aligning with a given direction.
*
* The lists heads are allocated to fit the surfel granularity.
*
* Due to alignment the link and list head are split into several int arrays to avoid too much
* memory waste.
*
* This steps only count the number of surfel per list.
*
* Dispatch 1 thread per surfel.
*/
#include "infos/eevee_lightprobe_volume_infos.hh"
COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_prepare)
#include "eevee_surfel_list_lib.glsl"
void main()
{
int surfel_id = int(gl_GlobalInvocationID.x);
if (surfel_id >= int(capture_info_buf.surfel_len)) {
return;
}
float ray_distance;
int list_id = surfel_list_index_get(
list_info_buf.ray_grid_size, surfel_buf[surfel_id].position, ray_distance);
atomicAdd(list_counter_buf[list_id], 1);
/* Do separate assignment to avoid reference to buffer in arguments which is tricky to cross
* compile. */
surfel_buf[surfel_id].ray_distance = ray_distance;
surfel_buf[surfel_id].list_id = list_id;
/* Clear for next step. */
if (gl_GlobalInvocationID.x == 0u) {
list_info_buf.list_prefix_sum = 0;
}
}

View File

@@ -3,165 +3,50 @@
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Sort a buffer of surfel list by distance along a direction.
* The resulting surfel lists are then the equivalent of a series of ray cast in the same
* direction. The fact that the surfels are sorted gives proper occlusion.
* Sort ranges of surfel inside a list using radix sort.
* The previous step flatten the list data into on big arrays, with a specific range of data for
* each ray list. This stage operate on these ranges.
*
* Sort by increasing `ray_distance`. Start of list is smallest value.
* For now the implementation is done in a single pass with brute force.
* All elements of a list scan inside the whole list.
*
* Dispatched as 1 thread per list.
* Dispatched as 1 thread per surfel (array elem).
*/
#include "infos/eevee_lightprobe_volume_infos.hh"
COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_sort)
/**
* A doubly-linked list implementation.
* IMPORTANT: It is not general purpose as it only cover the cases needed by this shader.
*/
struct List {
int first, last;
};
/* Return the split list after link_index. */
List list_split_after(inout List original, int link_index)
{
int next_link = surfel_buf[link_index].next;
int last_link = original.last;
original.last = link_index;
List split;
split.first = next_link;
split.last = last_link;
surfel_buf[link_index].next = -1;
surfel_buf[next_link].prev = -1;
return split;
}
void list_add_tail(inout List list, int link_index)
{
surfel_buf[link_index].next = -1;
surfel_buf[link_index].prev = list.last;
surfel_buf[list.last].next = link_index;
list.last = link_index;
}
void list_insert_link_before(inout List list, int next_link, int new_link)
{
if (list.first == next_link) {
/* At beginning of list. */
list.first = new_link;
}
int prev_link = surfel_buf[next_link].prev;
surfel_buf[new_link].next = next_link;
surfel_buf[new_link].prev = prev_link;
surfel_buf[next_link].prev = new_link;
if (prev_link != -1) {
surfel_buf[prev_link].next = new_link;
}
}
/**
* Return true if link from `surfel[a]` to `surfel[b]` is valid.
* WARNING: this function is not commutative : `f(a, b) != f(b, a)`
*/
bool is_valid_surfel_link(int a, int b)
{
float3 link_vector = normalize(surfel_buf[b].position - surfel_buf[a].position);
float link_angle_cos = dot(surfel_buf[a].normal, link_vector);
bool is_coplanar = abs(link_angle_cos) < 1.0e-3f;
return !is_coplanar;
}
#include "gpu_shader_index_range_lib.glsl"
void main()
{
int list_index = int(gl_GlobalInvocationID.x);
if (list_index >= list_info_buf.list_max) {
int item_id = int(gl_GlobalInvocationID.x);
if (item_id >= int(capture_info_buf.surfel_len)) {
return;
}
int list_start = list_start_buf[list_index];
int surfel_id = list_item_surfel_id_buf[item_id];
int list_id = surfel_buf[surfel_id].list_id;
float ray_distance = list_item_distance_buf[item_id];
if (list_start == -1) {
/* Empty list. */
return;
}
/* Create Surfel.prev pointers. */
int prev_id = -1;
for (int i = list_start; i > -1; i = surfel_buf[i].next) {
surfel_buf[i].prev = prev_id;
prev_id = i;
}
List sorted_list;
sorted_list.first = list_start;
sorted_list.last = prev_id;
if (sorted_list.first == sorted_list.last) {
/* Only one item. Nothing to sort. */
return;
}
/* Using insertion sort as it is easier to implement. */
List unsorted_list = list_split_after(sorted_list, sorted_list.first);
/* Mutable for-each. */
for (int i = unsorted_list.first, next = 0; i > -1; i = next) {
next = surfel_buf[i].next;
bool insert = false;
for (int j = sorted_list.first; j > -1; j = surfel_buf[j].next) {
if (surfel_buf[j].ray_distance < surfel_buf[i].ray_distance) {
list_insert_link_before(sorted_list, j, i);
insert = true;
break;
IndexRange list_range = IndexRange(list_range_buf[list_id * 2 + 0],
list_range_buf[list_id * 2 + 1]);
int prefix = 0;
/* Prefix sum inside the list range. */
for (int i = list_range.start(); i <= list_range.last(); i++) {
if (list_item_distance_buf[i] > ray_distance) {
prefix++;
}
else if (list_item_distance_buf[i] == ray_distance) {
/* Resolve the case where 2 items have the same value. */
if (i > item_id) {
prefix++;
}
}
if (insert == false) {
list_add_tail(sorted_list, i);
}
}
/* Update list start for irradiance sample capture. */
list_start_buf[list_index] = sorted_list.first;
/* Now that we have a sorted list, try to avoid connection from coplanar surfels.
* For that we disconnect them and link them to the first non-coplanar surfel.
* Note that this changes the list to a tree, which doesn't affect the rest of the algorithm.
*
* This is a really important step since it allows to clump more surfels into one ray list and
* avoid light leaking through surfaces. If we don't disconnect coplanar surfels, we loose many
* good rays by evaluating null radiance transfer between the coplanar surfels for rays that
* are not directly perpendicular to the surface. */
/* Mutable `foreach`. */
for (int i = sorted_list.first, next = 0; i > -1; i = next) {
next = surfel_buf[i].next;
int valid_next = surfel_buf[i].next;
int valid_prev = surfel_buf[i].prev;
/* Search the list for the first valid next and previous surfel. */
while (valid_next > -1) {
if (is_valid_surfel_link(i, valid_next)) {
break;
}
valid_next = surfel_buf[valid_next].next;
}
while (valid_prev > -1) {
if (is_valid_surfel_link(i, valid_prev)) {
break;
}
valid_prev = surfel_buf[valid_prev].prev;
}
surfel_buf[i].next = valid_next;
surfel_buf[i].prev = valid_prev;
}
int sorted_id = list_range.start() + prefix;
sorted_surfel_id_buf[sorted_id] = surfel_id;
surfel_buf[surfel_id].index_in_sorted_list = sorted_id;
}

View File

@@ -126,16 +126,46 @@ COMPUTE_SOURCE("eevee_surfel_cluster_build_comp.glsl")
DO_STATIC_COMPILATION()
GPU_SHADER_CREATE_END()
GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
GPU_SHADER_CREATE_INFO(eevee_surfel_list_prepare)
LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
BUILTINS(BuiltinBits::TEXTURE_ATOMIC)
TYPEDEF_SOURCE("eevee_defines.hh")
TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
ADDITIONAL_INFO(eevee_surfel_common)
ADDITIONAL_INFO(draw_view)
STORAGE_BUF(0, read_write, int, list_start_buf[])
STORAGE_BUF(0, read_write, int, list_counter_buf[])
STORAGE_BUF(6, read_write, SurfelListInfoData, list_info_buf)
COMPUTE_SOURCE("eevee_surfel_list_build_comp.glsl")
COMPUTE_SOURCE("eevee_surfel_list_prepare_comp.glsl")
DO_STATIC_COMPILATION()
GPU_SHADER_CREATE_END()
GPU_SHADER_CREATE_INFO(eevee_surfel_list_prefix)
LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
BUILTINS(BuiltinBits::TEXTURE_ATOMIC)
TYPEDEF_SOURCE("eevee_defines.hh")
TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
ADDITIONAL_INFO(eevee_surfel_common)
ADDITIONAL_INFO(draw_view)
STORAGE_BUF(0, read, int, list_counter_buf[])
STORAGE_BUF(2, write, int, list_range_buf[])
STORAGE_BUF(6, read_write, SurfelListInfoData, list_info_buf)
COMPUTE_SOURCE("eevee_surfel_list_prefix_comp.glsl")
DO_STATIC_COMPILATION()
GPU_SHADER_CREATE_END()
GPU_SHADER_CREATE_INFO(eevee_surfel_list_flatten)
LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
BUILTINS(BuiltinBits::TEXTURE_ATOMIC)
TYPEDEF_SOURCE("eevee_defines.hh")
TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
ADDITIONAL_INFO(eevee_surfel_common)
ADDITIONAL_INFO(draw_view)
STORAGE_BUF(0, read_write, int, list_counter_buf[])
STORAGE_BUF(1, read, int, list_range_buf[])
STORAGE_BUF(2, write, float, list_item_distance_buf[])
STORAGE_BUF(3, write, int, list_item_surfel_id_buf[])
STORAGE_BUF(6, read, SurfelListInfoData, list_info_buf)
COMPUTE_SOURCE("eevee_surfel_list_flatten_comp.glsl")
DO_STATIC_COMPILATION()
GPU_SHADER_CREATE_END()
@@ -145,12 +175,30 @@ TYPEDEF_SOURCE("eevee_defines.hh")
TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
ADDITIONAL_INFO(eevee_surfel_common)
ADDITIONAL_INFO(draw_view)
STORAGE_BUF(0, read_write, int, list_start_buf[])
STORAGE_BUF(0, read, int, list_range_buf[])
STORAGE_BUF(1, read, int, list_item_surfel_id_buf[])
STORAGE_BUF(2, read, float, list_item_distance_buf[])
STORAGE_BUF(3, write, int, sorted_surfel_id_buf[])
STORAGE_BUF(6, read, SurfelListInfoData, list_info_buf)
COMPUTE_SOURCE("eevee_surfel_list_sort_comp.glsl")
DO_STATIC_COMPILATION()
GPU_SHADER_CREATE_END()
GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
BUILTINS(BuiltinBits::TEXTURE_ATOMIC)
TYPEDEF_SOURCE("eevee_defines.hh")
TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
ADDITIONAL_INFO(eevee_surfel_common)
ADDITIONAL_INFO(draw_view)
STORAGE_BUF(0, write, int, list_start_buf[])
STORAGE_BUF(1, read, int, list_range_buf[])
STORAGE_BUF(3, read, int, sorted_surfel_id_buf[])
STORAGE_BUF(6, read_write, SurfelListInfoData, list_info_buf)
COMPUTE_SOURCE("eevee_surfel_list_build_comp.glsl")
DO_STATIC_COMPILATION()
GPU_SHADER_CREATE_END()
GPU_SHADER_CREATE_INFO(eevee_surfel_ray)
LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
TYPEDEF_SOURCE("eevee_defines.hh")

View File

@@ -1825,39 +1825,46 @@ DRAW_TEST(eevee_shadow_page_mask)
static void test_eevee_surfel_list()
{
GTEST_SKIP() << "Result is non-deterministic. To be revisited.";
GPU_render_begin();
StorageArrayBuffer<int> list_start_buf = {"list_start_buf"};
StorageVectorBuffer<Surfel> surfel_buf = {"surfel_buf"};
CaptureInfoBuf capture_info_buf = {"capture_info_buf"};
SurfelListInfoBuf list_info_buf = {"list_info_buf"};
StorageArrayBuffer<int> list_counter_buf = {"list_counter_buf"};
StorageArrayBuffer<int> list_range_buf = {"list_range_buf"};
StorageArrayBuffer<float> list_item_distance_buf = {"list_item_distance_buf"};
StorageArrayBuffer<int> list_item_surfel_id_buf = {"list_item_surfel_id_buf"};
StorageArrayBuffer<int> sorted_surfel_id_buf = {"sorted_surfel_id_buf"};
/**
* Simulate surfels on a 2x2 projection grid covering [0..2] on the Z axis.
*/
{
Surfel surfel;
surfel.normal = {0.0f, 0.0f, 1.0f};
/* NOTE: Expected link assumes linear increasing processing order [0->5]. But this is
* multithreaded and we can't know the execution order in advance. */
/* 0: Project to (1, 0) = list 1. Unsorted Next = -1; Next = -1; Previous = 3. */
/* 0: Project to (1, 0) = list 1. Next = -1; Previous = 3. */
surfel.position = {1.1f, 0.1f, 0.1f};
surfel_buf.append(surfel);
/* 1: Project to (1, 0) = list 1. Unsorted Next = 0; Next = 2; Previous = -1. */
/* 1: Project to (1, 0) = list 1. Next = 2; Previous = -1. */
surfel.position = {1.1f, 0.2f, 0.5f};
surfel_buf.append(surfel);
/* 2: Project to (1, 0) = list 1. Unsorted Next = 1; Next = 3; Previous = 1. */
/* 2: Project to (1, 0) = list 1. Next = 3; Previous = 1. */
surfel.position = {1.1f, 0.3f, 0.3f};
surfel_buf.append(surfel);
/* 3: Project to (1, 0) = list 1. Unsorted Next = 2; Next = 0; Previous = 2. */
/* 3: Project to (1, 0) = list 1. Next = 0; Previous = 2. */
surfel.position = {1.2f, 0.4f, 0.2f};
surfel_buf.append(surfel);
/* 4: Project to (1, 1) = list 3. Unsorted Next = -1; Next = -1; Previous = -1. */
/* 4: Project to (1, 1) = list 3. Next = -1; Previous = -1. */
surfel.position = {1.0f, 1.0f, 0.5f};
surfel_buf.append(surfel);
/* 5: Project to (0, 1) = list 2. Unsorted Next = -1; Next = -1; Previous = -1. */
/* 5: Project to (0, 1) = list 2. Next = -1; Previous = -1. */
surfel.position = {0.1f, 1.1f, 0.5f};
surfel_buf.append(surfel);
/* 6: Project to (0, 1) = list 2. Next = -1; Previous = -1. Disconnected because coplanar */
surfel.position = {0.2f, 1.1f, 0.5f};
surfel_buf.append(surfel);
surfel_buf.push_update();
}
@@ -1875,27 +1882,82 @@ static void test_eevee_surfel_list()
list_start_buf.push_update();
GPU_storagebuf_clear(list_start_buf, -1);
}
{
list_counter_buf.resize(ceil_to_multiple_u(list_info_buf.list_max, 4u));
list_counter_buf.push_update();
GPU_storagebuf_clear(list_counter_buf, 0);
}
{
list_range_buf.resize(ceil_to_multiple_u(list_info_buf.list_max * 2, 4u));
list_range_buf.push_update();
GPU_storagebuf_clear(list_range_buf, -1);
}
{
list_item_distance_buf.resize(ceil_to_multiple_u(capture_info_buf.surfel_len, 4u));
list_item_surfel_id_buf.resize(ceil_to_multiple_u(capture_info_buf.surfel_len, 4u));
sorted_surfel_id_buf.resize(ceil_to_multiple_u(capture_info_buf.surfel_len, 4u));
GPU_storagebuf_clear(list_item_distance_buf, -1);
GPU_storagebuf_clear(list_item_surfel_id_buf, -1);
GPU_storagebuf_clear(sorted_surfel_id_buf, -1);
}
/* Top-down view. */
View view = {"RayProjectionView"};
view.sync(float4x4::identity(), math::projection::orthographic<float>(0, 2, 0, 2, 0, 1));
gpu::Shader *sh_build = GPU_shader_create_from_info_name("eevee_surfel_list_build");
gpu::Shader *sh_flatten = GPU_shader_create_from_info_name("eevee_surfel_list_flatten");
gpu::Shader *sh_prefix = GPU_shader_create_from_info_name("eevee_surfel_list_prefix");
gpu::Shader *sh_prepare = GPU_shader_create_from_info_name("eevee_surfel_list_prepare");
gpu::Shader *sh_sort = GPU_shader_create_from_info_name("eevee_surfel_list_sort");
PassSimple pass("Build_and_Sort");
pass.shader_set(sh_build);
pass.bind_ssbo("list_start_buf", list_start_buf);
pass.shader_set(sh_prepare);
pass.bind_ssbo("list_counter_buf", list_counter_buf);
pass.bind_ssbo("list_info_buf", list_info_buf);
pass.bind_ssbo("surfel_buf", surfel_buf);
pass.bind_ssbo("capture_info_buf", capture_info_buf);
pass.dispatch(int3(1, 1, 1));
pass.barrier(GPU_BARRIER_SHADER_STORAGE);
pass.shader_set(sh_prefix);
pass.bind_ssbo("list_counter_buf", list_counter_buf);
pass.bind_ssbo("list_range_buf", list_range_buf);
pass.bind_ssbo("list_info_buf", list_info_buf);
pass.bind_ssbo("surfel_buf", surfel_buf);
pass.bind_ssbo("capture_info_buf", capture_info_buf);
pass.dispatch(int3(1, 1, 1));
pass.barrier(GPU_BARRIER_SHADER_STORAGE);
pass.shader_set(sh_flatten);
pass.bind_ssbo("list_counter_buf", list_counter_buf);
pass.bind_ssbo("list_range_buf", list_range_buf);
pass.bind_ssbo("list_item_distance_buf", list_item_distance_buf);
pass.bind_ssbo("list_item_surfel_id_buf", list_item_surfel_id_buf);
pass.bind_ssbo("list_info_buf", list_info_buf);
pass.bind_ssbo("surfel_buf", surfel_buf);
pass.bind_ssbo("capture_info_buf", capture_info_buf);
pass.dispatch(int3(1, 1, 1));
pass.barrier(GPU_BARRIER_SHADER_STORAGE);
pass.shader_set(sh_sort);
pass.bind_ssbo("list_start_buf", list_start_buf);
pass.bind_ssbo("surfel_buf", surfel_buf);
pass.bind_ssbo("list_range_buf", list_range_buf);
pass.bind_ssbo("list_item_surfel_id_buf", list_item_surfel_id_buf);
pass.bind_ssbo("list_item_distance_buf", list_item_distance_buf);
pass.bind_ssbo("sorted_surfel_id_buf", sorted_surfel_id_buf);
pass.bind_ssbo("list_info_buf", list_info_buf);
pass.bind_ssbo("surfel_buf", surfel_buf);
pass.bind_ssbo("capture_info_buf", capture_info_buf);
pass.dispatch(int3(1, 1, 1));
pass.barrier(GPU_BARRIER_SHADER_STORAGE);
pass.shader_set(sh_build);
pass.bind_ssbo("list_start_buf", list_start_buf);
pass.bind_ssbo("list_range_buf", list_range_buf);
pass.bind_ssbo("sorted_surfel_id_buf", sorted_surfel_id_buf);
pass.bind_ssbo("list_info_buf", list_info_buf);
pass.bind_ssbo("surfel_buf", surfel_buf);
pass.bind_ssbo("capture_info_buf", capture_info_buf);
pass.dispatch(int3(1, 1, 1));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
@@ -1906,8 +1968,8 @@ static void test_eevee_surfel_list()
surfel_buf.read();
/* Expect surfel list. */
Vector<int> expect_link_next = {-1, +2, +3, +0, -1, -1};
Vector<int> expect_link_prev = {+3, -1, +1, +2, -1, -1};
Vector<int> expect_link_next = {-1, +2, +3, +0, -1, -1, -1};
Vector<int> expect_link_prev = {+3, -1, +1, +2, -1, -1, -1};
Vector<int> link_next, link_prev;
for (const auto &surfel : Span<Surfel>(surfel_buf.data(), surfel_buf.size())) {
@@ -1915,22 +1977,17 @@ static void test_eevee_surfel_list()
link_prev.append(surfel.prev);
}
#if 0 /* Useful for debugging */
/* NOTE: All of these are unstable by definition (atomic + multi-thread).
* But should be consistent since we only dispatch one thread-group. */
/* Expect last added surfel index. It is the list start index before sorting. */
Vector<int> expect_list_start = {-1, 1, 5, 4};
// Span<int>(list_start_buf.data(), expect_list_start.size()).print_as_lines("list_start");
// link_next.as_span().print_as_lines("link_next");
// link_prev.as_span().print_as_lines("link_prev");
EXPECT_EQ_SPAN(expect_list_start, list_start_buf);
#endif
EXPECT_EQ_SPAN<int>(expect_list_start, list_start_buf);
EXPECT_EQ_SPAN<int>(expect_link_next, link_next);
EXPECT_EQ_SPAN<int>(expect_link_prev, link_prev);
GPU_shader_unbind();
GPU_shader_free(sh_build);
GPU_shader_free(sh_flatten);
GPU_shader_free(sh_prefix);
GPU_shader_free(sh_prepare);
GPU_shader_free(sh_sort);
DRW_shaders_free();
GPU_render_end();

View File

@@ -1342,7 +1342,8 @@ class Preprocessor {
{
const bool skip_drw_debug = filename.find("draw_debug_draw_lib.glsl") != std::string::npos ||
filename.find("draw_debug_draw_display_vert.glsl") !=
std::string::npos;
std::string::npos ||
filename.find("draw_shader_shared.hh") != std::string::npos;
using namespace metadata;
/* TODO: This can trigger false positive caused by disabled #if blocks. */
std::string tokens[] = {"gl_FragCoord",