Merge branch 'blender-v5.0-release'

2025-10-08 10:15:37 -04:00
parent 04ec86437f 085276d8ba
commit 6b55d82ca3
19 changed files with 525 additions and 213 deletions
--- a/scripts/startup/bl_ui/node_add_menu.py
+++ b/scripts/startup/bl_ui/node_add_menu.py
@@ -315,7 +315,11 @@ class NodeMenu(Menu):
            if groups:
                layout.separator()
                for group in groups:
-                    props = cls.node_operator(layout, node_tree_group_type[group.bl_idname], label=group.name)
+                    search_weight = -1.0 if group.is_linked_packed else 0.0
+                    props = cls.node_operator(layout,
+                                              node_tree_group_type[group.bl_idname],
+                                              label=group.name,
+                                              search_weight=search_weight)
                    ops = props.settings.add()
                    ops.name = "node_tree"
                    ops.value = "bpy.data.node_groups[{!r}]".format(group.name)
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -548,7 +548,10 @@ set(GLSL_SRC
  engines/eevee/shaders/eevee_surfel_cluster_build_comp.glsl
  engines/eevee/shaders/eevee_surfel_light_comp.glsl
  engines/eevee/shaders/eevee_surfel_list_build_comp.glsl
+  engines/eevee/shaders/eevee_surfel_list_flatten_comp.glsl
  engines/eevee/shaders/eevee_surfel_list_lib.glsl
+  engines/eevee/shaders/eevee_surfel_list_prefix_comp.glsl
+  engines/eevee/shaders/eevee_surfel_list_prepare_comp.glsl
  engines/eevee/shaders/eevee_surfel_list_sort_comp.glsl
  engines/eevee/shaders/eevee_surfel_ray_comp.glsl
  engines/eevee/shaders/eevee_thickness_lib.glsl
--- a/source/blender/draw/engines/eevee/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee/eevee_instance.cc
@@ -8,6 +8,8 @@
 * An instance contains all structures needed to do a complete render.
 */

+#include "CLG_log.h"
+
 #include "BKE_global.hh"
 #include "BKE_object.hh"

@@ -18,7 +20,6 @@

 #include "DEG_depsgraph_query.hh"

-#include "DNA_ID.h"
 #include "DNA_lightprobe_types.h"
 #include "DNA_modifier_types.h"

@@ -30,17 +31,18 @@

 #include "RE_pipeline.h"

-#include "eevee_engine.h"
 #include "eevee_instance.hh"

 #include "DNA_particle_types.h"

-#include "draw_common.hh"
 #include "draw_context_private.hh"
+#include "draw_debug.hh"
 #include "draw_view_data.hh"

 namespace blender::eevee {

+CLG_LogRef Instance::log = {"eevee"};
+
 void *Instance::debug_scope_render_sample = nullptr;
 void *Instance::debug_scope_irradiance_setup = nullptr;
 void *Instance::debug_scope_irradiance_sample = nullptr;
@@ -885,6 +887,7 @@ void Instance::light_bake_irradiance(
  volume_probes.bake.init(probe);

  custom_pipeline_wrapper([&]() {
+    drw_debug_clear();
    this->render_sync();
    while ((materials.queued_shaders_count > 0) || (materials.queued_textures_count > 0)) {
      GPU_pass_cache_wait_for_all();
@@ -921,6 +924,9 @@ void Instance::light_bake_irradiance(

      DRW_submission_end();
    }
+
+    /* Avoid big setup job to be queued with the sampling commands. */
+    GPU_flush();
  });

  if (volume_probes.bake.should_break()) {
@@ -928,14 +934,26 @@ void Instance::light_bake_irradiance(
  }

  sampling.init(probe);
+
+  /* Start with 1 sample and progressively ramp up. */
+  float time_per_sample_ms_smooth = 16.0f;
+  double last_update_timestamp = BLI_time_now_seconds();
  while (!sampling.finished()) {
    context_wrapper([&]() {
      DebugScope debug_scope(debug_scope_irradiance_sample, "EEVEE.irradiance_sample");

-      /* Batch ray cast by pack of 16. Avoids too much overhead of the update function & context
-       * switch. */
-      /* TODO(fclem): Could make the number of iteration depend on the computation time. */
-      for (int i = 0; i < 16 && !sampling.finished(); i++) {
+      int remaining_samples = sampling.sample_count() - sampling.sample_index();
+      /* In background mode, assume we don't need as much interactivity. */
+      int time_budget_ms = G.background ? 32 : 16;
+      /* Batch ray cast. Avoids too much overhead of the context switch. */
+      int sample_count_in_batch = ceilf(time_budget_ms / max(0.1f, time_per_sample_ms_smooth));
+      /* Avoid batching too many rays, keep system responsive in case of bad values. */
+      sample_count_in_batch = min_iii(32, sample_count_in_batch, remaining_samples);
+
+      CLOG_INFO(&Instance::log, "IrradianceBake: Casting %d rays.", sample_count_in_batch);
+
+      double time_it_begin_ms = BLI_time_now_seconds() * 1000.0;
+      for (int i = 0; i < sample_count_in_batch && !sampling.finished(); i++) {
        sampling.step();
        {
          /* Critical section. Potential gpu::Shader concurrent usage. */
@@ -947,19 +965,29 @@ void Instance::light_bake_irradiance(

          DRW_submission_end();
        }
-      }
+      };
+      /* We use GPU_finish to take into account the GPU processing time. */
+      /* TODO(fclem): Could use timer queries to keep pipelining of GPU commands if that become a
+       * real bottleneck. */
+      GPU_finish();
+      double time_it_end_ms = BLI_time_now_seconds() * 1000.0;
+
+      float time_per_sample_ms = float(time_it_end_ms - time_it_begin_ms) / sample_count_in_batch;
+      /* Exponential average. */
+      time_per_sample_ms_smooth = interpolate(time_per_sample_ms_smooth, time_per_sample_ms, 0.7f);

-      LightProbeGridCacheFrame *cache_frame;
      if (sampling.finished()) {
-        cache_frame = volume_probes.bake.read_result_packed();
+        result_update(volume_probes.bake.read_result_packed(), 1.0f);
      }
      else {
-        /* TODO(fclem): Only do this read-back if needed. But it might be tricky to know when. */
-        cache_frame = volume_probes.bake.read_result_unpacked();
+        double time_since_last_update_ms = BLI_time_now_seconds() - last_update_timestamp;
+        /* Only readback every 1 second. This readback is relatively expensive. */
+        if (time_since_last_update_ms > 1.0) {
+          float progress = sampling.sample_index() / float(sampling.sample_count());
+          result_update(volume_probes.bake.read_result_unpacked(), progress);
+          last_update_timestamp = BLI_time_now_seconds();
+        }
      }
-
-      float progress = sampling.sample_index() / float(sampling.sample_count());
-      result_update(cache_frame, progress);
    });

    if (stop()) {
--- a/source/blender/draw/engines/eevee/eevee_instance.hh
+++ b/source/blender/draw/engines/eevee/eevee_instance.hh
@@ -12,6 +12,8 @@

 #include <fmt/format.h>

+#include "CLG_log.h"
+
 #include "BLI_string.h"

 #include "BLT_translation.hh"
@@ -123,6 +125,8 @@ class Instance : public DrawEngine {
  LightProbeModule light_probes;
  VolumeModule volume;

+  static CLG_LogRef log;
+
  /** Input data. */
  Depsgraph *depsgraph;
  Manager *manager;
--- a/source/blender/draw/engines/eevee/eevee_lightprobe_shared.hh
+++ b/source/blender/draw/engines/eevee/eevee_lightprobe_shared.hh
@@ -215,8 +215,10 @@ struct Surfel {
  bool32_t double_sided;
  /** Surface receiver light set for light linking. */
  uint receiver_light_set;
-  int _pad0;
-  int _pad1;
+  /** List index this surfel is in. */
+  int list_id;
+  /** Index of this surfel inside the sorted list. Allow access to previous and next surfel id. */
+  int index_in_sorted_list;
  /** Surface radiance: Emission + Direct Lighting. */
  SurfelRadiance radiance_direct;
  /** Surface radiance: Indirect Lighting. Double buffered to avoid race conditions. */
@@ -281,7 +283,7 @@ struct SurfelListInfoData {
  /** Maximum number of list. Is equal to `ray_grid_size.x * ray_grid_size.y`. */
  int list_max;

-  int _pad0;
+  int list_prefix_sum;
 };
 BLI_STATIC_ASSERT_ALIGN(SurfelListInfoData, 16)

--- a/source/blender/draw/engines/eevee/eevee_lightprobe_volume.cc
+++ b/source/blender/draw/engines/eevee/eevee_lightprobe_volume.cc
@@ -793,21 +793,67 @@ void IrradianceBake::sync()
    PassSimple &pass = surfel_ray_build_ps_;
    pass.init();
    {
-      PassSimple::Sub &sub = pass.sub("ListBuild");
-      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_BUILD));
+      /* Count number of surfel per list. */
+      PassSimple::Sub &sub = pass.sub("ListPrepare");
+      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_PREPARE));
      sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
      sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
-      sub.bind_ssbo("list_start_buf", &list_start_buf_);
+      sub.bind_ssbo("list_counter_buf", &list_counter_buf_);
      sub.bind_ssbo("list_info_buf", &list_info_buf_);
      sub.barrier(GPU_BARRIER_SHADER_STORAGE);
      sub.dispatch(&dispatch_per_surfel_);
    }
    {
+      /* Prefix sum of list sizes. Outputs an IndexRange per list. */
+      PassSimple::Sub &sub = pass.sub("ListPrefix");
+      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_PREFIX));
+      sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
+      sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
+      sub.bind_ssbo("list_counter_buf", &list_counter_buf_);
+      sub.bind_ssbo("list_range_buf", &list_range_buf_);
+      sub.bind_ssbo("list_info_buf", &list_info_buf_);
+      sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+      sub.dispatch(&dispatch_per_list_);
+    }
+    {
+      /* Copy surfel list sorting data into a flat array.
+       * All lists data are contiguous in memory using the IndexRange from previous pass. */
+      PassSimple::Sub &sub = pass.sub("ListFlatten");
+      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_FLATTEN));
+      sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
+      sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
+      sub.bind_ssbo("list_counter_buf", &list_counter_buf_);
+      sub.bind_ssbo("list_range_buf", &list_range_buf_);
+      sub.bind_ssbo("list_item_distance_buf", &list_item_distance_buf_);
+      sub.bind_ssbo("list_item_surfel_id_buf", &list_item_surfel_id_buf_);
+      sub.bind_ssbo("list_info_buf", &list_info_buf_);
+      sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+      sub.dispatch(&dispatch_per_surfel_);
+    }
+    {
+      /* Radix sort of the list. Output surfel index in the sorted list. */
      PassSimple::Sub &sub = pass.sub("ListSort");
      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_SORT));
      sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
      sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
+      sub.bind_ssbo("list_range_buf", &list_range_buf_);
+      sub.bind_ssbo("list_item_surfel_id_buf", &list_item_surfel_id_buf_);
+      sub.bind_ssbo("list_item_distance_buf", &list_item_distance_buf_);
+      sub.bind_ssbo("sorted_surfel_id_buf", &sorted_surfel_id_buf_);
+      sub.bind_ssbo("list_info_buf", &list_info_buf_);
+      sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+      sub.dispatch(&dispatch_per_surfel_);
+    }
+    {
+      /* Take the sorted lists array and copy adjacent surfel indices back to the Surfels.
+       * Also relink coplanar surfels to avoid over shadowing.  */
+      PassSimple::Sub &sub = pass.sub("ListBuild");
+      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_BUILD));
+      sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
+      sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
      sub.bind_ssbo("list_start_buf", &list_start_buf_);
+      sub.bind_ssbo("list_range_buf", &list_range_buf_);
+      sub.bind_ssbo("sorted_surfel_id_buf", &sorted_surfel_id_buf_);
      sub.bind_ssbo("list_info_buf", &list_info_buf_);
      sub.barrier(GPU_BARRIER_SHADER_STORAGE);
      sub.dispatch(&dispatch_per_list_);
@@ -1097,7 +1143,8 @@ void IrradianceBake::surfels_create(const Object &probe_object)
  }

  if (capture_info_buf_.surfel_len > surfels_buf_.size()) {
-    printf("IrradianceBake: Allocating %u surfels.\n", capture_info_buf_.surfel_len);
+    CLOG_INFO(
+        &Instance::log, "IrradianceBake: Allocating %u surfels.", capture_info_buf_.surfel_len);

    size_t max_size = GPU_max_storage_buffer_size();
    if (GPU_mem_stats_supported()) {
@@ -1282,7 +1329,16 @@ void IrradianceBake::raylists_build()
  dispatch_per_list_.x = divide_ceil_u(list_info_buf_.list_max, SURFEL_LIST_GROUP_SIZE);

  list_start_buf_.resize(ceil_to_multiple_u(list_info_buf_.list_max, 4));
+  list_counter_buf_.resize(ceil_to_multiple_u(list_info_buf_.list_max, 4));
+  list_range_buf_.resize(ceil_to_multiple_u(list_info_buf_.list_max * 2, 4));

+  list_item_distance_buf_.resize(ceil_to_multiple_u(max_ii(1, capture_info_buf_.surfel_len), 4));
+  list_item_surfel_id_buf_.resize(ceil_to_multiple_u(max_ii(1, capture_info_buf_.surfel_len), 4));
+  sorted_surfel_id_buf_.resize(ceil_to_multiple_u(max_ii(1, capture_info_buf_.surfel_len), 4));
+
+  GPU_storagebuf_clear(list_counter_buf_, 0);
+  /* Clear for the case where there are no list or no surfel.
+   * Otherwise the irradiance_capture stage will have broken lists. */
  GPU_storagebuf_clear(list_start_buf_, -1);
  inst_.manager->submit(surfel_ray_build_ps_, ray_view_);
 }
--- a/source/blender/draw/engines/eevee/eevee_lightprobe_volume.hh
+++ b/source/blender/draw/engines/eevee/eevee_lightprobe_volume.hh
@@ -84,6 +84,15 @@ class IrradianceBake {
  SurfelListInfoBuf list_info_buf_ = {"list_info_buf_"};
  /** List array containing list start surfel index. Cleared to -1. */
  StorageArrayBuffer<int, 16, true> list_start_buf_ = {"list_start_buf_"};
+  /** Count number of surfel per surfel list. Cleared to 0. */
+  StorageArrayBuffer<int, 16, true> list_counter_buf_ = {"list_counter_buf_"};
+  /** IndexRange of sorting items for each surfel list. */
+  StorageArrayBuffer<int, 16, true> list_range_buf_ = {"list_range_buf_"};
+  /** Sorting items for fast sorting of surfels. */
+  StorageArrayBuffer<float, 16, true> list_item_distance_buf_ = {"list_item_distance_buf_"};
+  StorageArrayBuffer<int, 16, true> list_item_surfel_id_buf_ = {"list_item_surfel_id_buf_"};
+  /** Result of sorting. Needed to be duplicated to avoid race condition. */
+  StorageArrayBuffer<int, 16, true> sorted_surfel_id_buf_ = {"sorted_surfel_id_buf_"};

  /* Dispatch size for per surfel workload. */
  int3 dispatch_per_surfel_ = int3(1);
--- a/source/blender/draw/engines/eevee/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee/eevee_shader.cc
@@ -533,6 +533,12 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
      return "eevee_surfel_light";
    case SURFEL_LIST_BUILD:
      return "eevee_surfel_list_build";
+    case SURFEL_LIST_FLATTEN:
+      return "eevee_surfel_list_flatten";
+    case SURFEL_LIST_PREFIX:
+      return "eevee_surfel_list_prefix";
+    case SURFEL_LIST_PREPARE:
+      return "eevee_surfel_list_prepare";
    case SURFEL_LIST_SORT:
      return "eevee_surfel_list_sort";
    case SURFEL_RAY:
--- a/source/blender/draw/engines/eevee/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee/eevee_shader.hh
@@ -150,6 +150,9 @@ enum eShaderType {
  SURFEL_CLUSTER_BUILD,
  SURFEL_LIGHT,
  SURFEL_LIST_BUILD,
+  SURFEL_LIST_FLATTEN,
+  SURFEL_LIST_PREFIX,
+  SURFEL_LIST_PREPARE,
  SURFEL_LIST_SORT,
  SURFEL_RAY,

--- a/source/blender/draw/engines/eevee/shaders/CMakeLists.txt
+++ b/source/blender/draw/engines/eevee/shaders/CMakeLists.txt
@@ -146,6 +146,9 @@ set(SRC_GLSL_COMP
  eevee_surfel_cluster_build_comp.glsl
  eevee_surfel_light_comp.glsl
  eevee_surfel_list_build_comp.glsl
+  eevee_surfel_list_flatten_comp.glsl
+  eevee_surfel_list_prefix_comp.glsl
+  eevee_surfel_list_prepare_comp.glsl
  eevee_surfel_list_sort_comp.glsl
  eevee_surfel_ray_comp.glsl
  eevee_vertex_copy_comp.glsl
--- a/source/blender/draw/engines/eevee/shaders/eevee_lightprobe_volume_ray_comp.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_lightprobe_volume_ray_comp.glsl
@@ -113,12 +113,18 @@ void main()
  /* Walk the ray to get which surfels the irradiance sample is between. */
  int surfel_prev = -1;
  int surfel_next = list_start_buf[list_index];
-  for (; surfel_next > -1; surfel_next = surfel_buf[surfel_next].next) {
+  /* Avoid spinning for eternity. */
+  for (int i = 0; i < 9999; i++) {
+    if (surfel_next <= -1) {
+      break;
+    }
    /* Reminder: List is sorted with highest value first. */
    if (surfel_buf[surfel_next].ray_distance < irradiance_sample_ray_distance) {
      break;
    }
    surfel_prev = surfel_next;
+    surfel_next = surfel_buf[surfel_next].next;
+    assert(surfel_prev != surfel_next);
  }

  float3 sky_L = drw_world_incident_vector(P);
@@ -142,20 +148,32 @@ void main()
    Surfel surfel = surfel_buf[surfel_next];
    irradiance_capture_surfel(surfel, P, sh);
    validity_capture_surfel(surfel, P, validity);
+#if 0 /* For debugging the volume rays list. */
+    drw_debug_line(surfel.position, P, float4(0, 1, 0, 1), drw_debug_persistent_lifetime);
+#endif
  }
  else {
    irradiance_capture_world(-sky_L, sh);
    validity_capture_world(-sky_L, validity);
+#if 0 /* For debugging the volume rays list. */
+    drw_debug_line(P - sky_L, P, float4(0, 1, 1, 1), drw_debug_persistent_lifetime);
+#endif
  }

  if (surfel_prev > -1) {
    Surfel surfel = surfel_buf[surfel_prev];
    irradiance_capture_surfel(surfel, P, sh);
    validity_capture_surfel(surfel, P, validity);
+#if 0 /* For debugging the volume rays list. */
+    drw_debug_line(surfel.position, P, float4(1, 0, 1, 1), drw_debug_persistent_lifetime);
+#endif
  }
  else {
    irradiance_capture_world(sky_L, sh);
    validity_capture_world(sky_L, validity);
+#if 0 /* For debugging the volume rays list. */
+    drw_debug_line(P + sky_L, P, float4(1, 1, 0, 1), drw_debug_persistent_lifetime);
+#endif
  }

  /* Normalize for storage. We accumulated 2 samples. */
--- a/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_build_comp.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_build_comp.glsl
@@ -3,36 +3,120 @@
 * SPDX-License-Identifier: GPL-2.0-or-later */

 /**
- * Takes scene surfel representation and build list of surfels aligning in a given direction.
+ * Read the result of the sorted buffer and update the `prev` and `next` surfel id inside each
+ * surfel structure. This step also transform the linked list into a graph in order to avoid lost
+ * energy from almost coplanar surfaces.
 *
- * The lists head are allocated to fit the surfel granularity.
- *
- * Due to alignment the link and list head are split into several int arrays to avoid too much
- * memory waste.
- *
- * Dispatch 1 thread per surfel.
+ * Dispatched as 1 thread per list.
 */

 #include "infos/eevee_lightprobe_volume_infos.hh"

 COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_build)

-#include "eevee_surfel_list_lib.glsl"
+#include "gpu_shader_index_range_lib.glsl"
+
+/**
+ * Return true if link from `surfel[a]` to `surfel[b]` is valid.
+ * WARNING: this function is not commutative : `f(a, b) != f(b, a)`
+ */
+bool is_valid_surfel_link(int a, int b)
+{
+  float3 link_vector = normalize(surfel_buf[b].position - surfel_buf[a].position);
+  float link_angle_cos = dot(surfel_buf[a].normal, link_vector);
+  bool is_coplanar = abs(link_angle_cos) < 0.05f;
+  return !is_coplanar;
+}

 void main()
 {
-  int surfel_index = int(gl_GlobalInvocationID.x);
-  if (surfel_index >= int(capture_info_buf.surfel_len)) {
+  int list_id = int(gl_GlobalInvocationID.x);
+  if (list_id >= list_info_buf.list_max) {
    return;
  }

-  float ray_distance;
-  int list_index = surfel_list_index_get(
-      list_info_buf.ray_grid_size, surfel_buf[surfel_index].position, ray_distance);
-  /* Do separate assignment to avoid reference to buffer in arguments which is tricky to cross
-   * compile. */
-  surfel_buf[surfel_index].ray_distance = ray_distance;
-  /* NOTE: We only need to init the `list_start_buf` to -1 for the whole list to be valid since
-   * every surfel will load its `next` value from the list head. */
-  surfel_buf[surfel_index].next = atomicExchange(list_start_buf[list_index], surfel_index);
+  const IndexRange list_range = IndexRange(list_range_buf[list_id * 2 + 0],
+                                           list_range_buf[list_id * 2 + 1]);
+  if (list_range.size() == 0) {
+    list_start_buf[list_id] = -1;
+    return;
+  }
+
+  const int first_item = list_range.start();
+  const int last_item = list_range.last();
+
+  const int sorted_list_first = sorted_surfel_id_buf[first_item];
+  {
+    /* Update surfels linked list. */
+    int prev = -1;
+    int curr = sorted_surfel_id_buf[first_item];
+    for (int i = first_item; i <= last_item; i++) {
+      int next = (i == last_item) ? -1 : sorted_surfel_id_buf[i + 1];
+      surfel_buf[curr].next = next;
+      surfel_buf[curr].prev = prev;
+      prev = curr;
+      curr = next;
+    }
+  }
+  /* Update list start for irradiance sample capture. */
+  list_start_buf[list_id] = sorted_list_first;
+
+  /* Now that we have a sorted list, try to avoid connection from coplanar surfels.
+   * For that we disconnect them and link them to the first non-coplanar surfel.
+   * Note that this changes the list to a tree, which doesn't affect the rest of the algorithm.
+   *
+   * This is a really important step since it allows to clump more surfels into one ray list and
+   * avoid light leaking through surfaces. If we don't disconnect coplanar surfels, we loose many
+   * good rays by evaluating null radiance transfer between the coplanar surfels for rays that
+   * are not directly perpendicular to the surface. */
+
+  /* Limiting the total number of search steps avoids TDRs, but may cause overshadowing if the
+   * limit is reached. */
+  const int max_search = 2000;
+  uint search_count = 0;
+
+  /* Mutable `foreach`. */
+  for (int i = sorted_list_first, next = -1; i > -1; i = next) {
+    next = surfel_buf[i].next;
+
+    int valid_next = surfel_buf[i].next;
+    int valid_prev = surfel_buf[i].prev;
+
+    /* Search the list for the first valid next and previous surfel. */
+    while (search_count < max_search) {
+      if (valid_next == -1) {
+        break;
+      }
+      if (is_valid_surfel_link(i, valid_next)) {
+        break;
+      }
+      valid_next = surfel_buf[valid_next].next;
+      search_count++;
+    }
+    while (search_count < max_search) {
+      if (valid_prev == -1) {
+        break;
+      }
+      if (is_valid_surfel_link(i, valid_prev)) {
+        break;
+      }
+      valid_prev = surfel_buf[valid_prev].prev;
+      search_count++;
+    }
+
+    surfel_buf[i].next = valid_next;
+    surfel_buf[i].prev = valid_prev;
+  }
+
+#if 0 /* For debugging the sorted list. */
+  for (int i = sorted_list_first, next = -1; i > -1; i = next) {
+    next = surfel_buf[i].next;
+    if (next != -1) {
+      drw_debug_line(surfel_buf[next].position,
+                     surfel_buf[i].position,
+                     float4(1, 0, 0, 1),
+                     drw_debug_persistent_lifetime);
+    }
+  }
+#endif
 }
--- a/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_flatten_comp.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_flatten_comp.glsl
@@ -0,0 +1,29 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/**
+ * Flatten surfel sorting data into a sequential structure.
+ * The buffer structure follows the lists OffsetIndices.
+ *
+ * Dispatched as 1 thread per surfel.
+ */
+
+#include "infos/eevee_lightprobe_volume_infos.hh"
+
+COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_flatten)
+
+void main()
+{
+  int surfel_id = int(gl_GlobalInvocationID.x);
+  if (surfel_id >= int(capture_info_buf.surfel_len)) {
+    return;
+  }
+
+  int list_id = surfel_buf[surfel_id].list_id;
+  int item_id = atomicAdd(list_counter_buf[list_id], -1) - 1;
+  item_id += list_range_buf[list_id * 2 + 0];
+
+  list_item_distance_buf[item_id] = surfel_buf[surfel_id].ray_distance;
+  list_item_surfel_id_buf[item_id] = surfel_id;
+}
--- a/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_prefix_comp.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_prefix_comp.glsl
@@ -0,0 +1,28 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/**
+ * Create a prefix sum of the surfels per list.
+ * Outputs one IndexRange for each surfel list.
+ *
+ * Dispatched as 1 thread per surfel list.
+ */
+
+#include "infos/eevee_lightprobe_volume_infos.hh"
+
+COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_prefix)
+
+void main()
+{
+  int list_id = int(gl_GlobalInvocationID.x);
+  if (list_id >= list_info_buf.list_max) {
+    return;
+  }
+
+  int list_item_count = list_counter_buf[list_id];
+  int list_item_start = atomicAdd(list_info_buf.list_prefix_sum, list_item_count);
+
+  list_range_buf[list_id * 2 + 0] = list_item_start;
+  list_range_buf[list_id * 2 + 1] = list_item_count;
+}
--- a/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_prepare_comp.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_prepare_comp.glsl
@@ -0,0 +1,44 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/**
+ * Takes the scene surfel representation to build lists of surfels aligning with a given direction.
+ *
+ * The lists heads are allocated to fit the surfel granularity.
+ *
+ * Due to alignment the link and list head are split into several int arrays to avoid too much
+ * memory waste.
+ *
+ * This steps only count the number of surfel per list.
+ *
+ * Dispatch 1 thread per surfel.
+ */
+
+#include "infos/eevee_lightprobe_volume_infos.hh"
+
+COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_prepare)
+
+#include "eevee_surfel_list_lib.glsl"
+
+void main()
+{
+  int surfel_id = int(gl_GlobalInvocationID.x);
+  if (surfel_id >= int(capture_info_buf.surfel_len)) {
+    return;
+  }
+  float ray_distance;
+  int list_id = surfel_list_index_get(
+      list_info_buf.ray_grid_size, surfel_buf[surfel_id].position, ray_distance);
+
+  atomicAdd(list_counter_buf[list_id], 1);
+  /* Do separate assignment to avoid reference to buffer in arguments which is tricky to cross
+   * compile. */
+  surfel_buf[surfel_id].ray_distance = ray_distance;
+  surfel_buf[surfel_id].list_id = list_id;
+
+  /* Clear for next step. */
+  if (gl_GlobalInvocationID.x == 0u) {
+    list_info_buf.list_prefix_sum = 0;
+  }
+}
--- a/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_sort_comp.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surfel_list_sort_comp.glsl
@@ -3,165 +3,50 @@
 * SPDX-License-Identifier: GPL-2.0-or-later */

 /**
- * Sort a buffer of surfel list by distance along a direction.
- * The resulting surfel lists are then the equivalent of a series of ray cast in the same
- * direction. The fact that the surfels are sorted gives proper occlusion.
+ * Sort ranges of surfel inside a list using radix sort.
+ * The previous step flatten the list data into on big arrays, with a specific range of data for
+ * each ray list. This stage operate on these ranges.
 *
- * Sort by increasing `ray_distance`. Start of list is smallest value.
+ * For now the implementation is done in a single pass with brute force.
+ * All elements of a list scan inside the whole list.
 *
- * Dispatched as 1 thread per list.
+ * Dispatched as 1 thread per surfel (array elem).
 */

 #include "infos/eevee_lightprobe_volume_infos.hh"

 COMPUTE_SHADER_CREATE_INFO(eevee_surfel_list_sort)

-/**
- * A doubly-linked list implementation.
- * IMPORTANT: It is not general purpose as it only cover the cases needed by this shader.
- */
-struct List {
-  int first, last;
-};
-
-/* Return the split list after link_index. */
-List list_split_after(inout List original, int link_index)
-{
-  int next_link = surfel_buf[link_index].next;
-  int last_link = original.last;
-
-  original.last = link_index;
-
-  List split;
-  split.first = next_link;
-  split.last = last_link;
-
-  surfel_buf[link_index].next = -1;
-  surfel_buf[next_link].prev = -1;
-
-  return split;
-}
-
-void list_add_tail(inout List list, int link_index)
-{
-  surfel_buf[link_index].next = -1;
-  surfel_buf[link_index].prev = list.last;
-  surfel_buf[list.last].next = link_index;
-  list.last = link_index;
-}
-
-void list_insert_link_before(inout List list, int next_link, int new_link)
-{
-  if (list.first == next_link) {
-    /* At beginning of list. */
-    list.first = new_link;
-  }
-  int prev_link = surfel_buf[next_link].prev;
-  surfel_buf[new_link].next = next_link;
-  surfel_buf[new_link].prev = prev_link;
-  surfel_buf[next_link].prev = new_link;
-  if (prev_link != -1) {
-    surfel_buf[prev_link].next = new_link;
-  }
-}
-
-/**
- * Return true if link from `surfel[a]` to `surfel[b]` is valid.
- * WARNING: this function is not commutative : `f(a, b) != f(b, a)`
- */
-bool is_valid_surfel_link(int a, int b)
-{
-  float3 link_vector = normalize(surfel_buf[b].position - surfel_buf[a].position);
-  float link_angle_cos = dot(surfel_buf[a].normal, link_vector);
-  bool is_coplanar = abs(link_angle_cos) < 1.0e-3f;
-  return !is_coplanar;
-}
+#include "gpu_shader_index_range_lib.glsl"

 void main()
 {
-  int list_index = int(gl_GlobalInvocationID.x);
-  if (list_index >= list_info_buf.list_max) {
+  int item_id = int(gl_GlobalInvocationID.x);
+  if (item_id >= int(capture_info_buf.surfel_len)) {
    return;
  }

-  int list_start = list_start_buf[list_index];
+  int surfel_id = list_item_surfel_id_buf[item_id];
+  int list_id = surfel_buf[surfel_id].list_id;
+  float ray_distance = list_item_distance_buf[item_id];

-  if (list_start == -1) {
-    /* Empty list. */
-    return;
-  }
-
-  /* Create Surfel.prev pointers. */
-  int prev_id = -1;
-  for (int i = list_start; i > -1; i = surfel_buf[i].next) {
-    surfel_buf[i].prev = prev_id;
-    prev_id = i;
-  }
-
-  List sorted_list;
-  sorted_list.first = list_start;
-  sorted_list.last = prev_id;
-
-  if (sorted_list.first == sorted_list.last) {
-    /* Only one item. Nothing to sort. */
-    return;
-  }
-
-  /* Using insertion sort as it is easier to implement. */
-
-  List unsorted_list = list_split_after(sorted_list, sorted_list.first);
-
-  /* Mutable for-each. */
-  for (int i = unsorted_list.first, next = 0; i > -1; i = next) {
-    next = surfel_buf[i].next;
-
-    bool insert = false;
-    for (int j = sorted_list.first; j > -1; j = surfel_buf[j].next) {
-      if (surfel_buf[j].ray_distance < surfel_buf[i].ray_distance) {
-        list_insert_link_before(sorted_list, j, i);
-        insert = true;
-        break;
+  IndexRange list_range = IndexRange(list_range_buf[list_id * 2 + 0],
+                                     list_range_buf[list_id * 2 + 1]);
+  int prefix = 0;
+  /* Prefix sum inside the list range. */
+  for (int i = list_range.start(); i <= list_range.last(); i++) {
+    if (list_item_distance_buf[i] > ray_distance) {
+      prefix++;
+    }
+    else if (list_item_distance_buf[i] == ray_distance) {
+      /* Resolve the case where 2 items have the same value. */
+      if (i > item_id) {
+        prefix++;
      }
    }
-    if (insert == false) {
-      list_add_tail(sorted_list, i);
-    }
  }

-  /* Update list start for irradiance sample capture. */
-  list_start_buf[list_index] = sorted_list.first;
-
-  /* Now that we have a sorted list, try to avoid connection from coplanar surfels.
-   * For that we disconnect them and link them to the first non-coplanar surfel.
-   * Note that this changes the list to a tree, which doesn't affect the rest of the algorithm.
-   *
-   * This is a really important step since it allows to clump more surfels into one ray list and
-   * avoid light leaking through surfaces. If we don't disconnect coplanar surfels, we loose many
-   * good rays by evaluating null radiance transfer between the coplanar surfels for rays that
-   * are not directly perpendicular to the surface. */
-
-  /* Mutable `foreach`. */
-  for (int i = sorted_list.first, next = 0; i > -1; i = next) {
-    next = surfel_buf[i].next;
-
-    int valid_next = surfel_buf[i].next;
-    int valid_prev = surfel_buf[i].prev;
-
-    /* Search the list for the first valid next and previous surfel. */
-    while (valid_next > -1) {
-      if (is_valid_surfel_link(i, valid_next)) {
-        break;
-      }
-      valid_next = surfel_buf[valid_next].next;
-    }
-    while (valid_prev > -1) {
-      if (is_valid_surfel_link(i, valid_prev)) {
-        break;
-      }
-      valid_prev = surfel_buf[valid_prev].prev;
-    }
-
-    surfel_buf[i].next = valid_next;
-    surfel_buf[i].prev = valid_prev;
-  }
+  int sorted_id = list_range.start() + prefix;
+  sorted_surfel_id_buf[sorted_id] = surfel_id;
+  surfel_buf[surfel_id].index_in_sorted_list = sorted_id;
 }
--- a/source/blender/draw/engines/eevee/shaders/infos/eevee_lightprobe_volume_infos.hh
+++ b/source/blender/draw/engines/eevee/shaders/infos/eevee_lightprobe_volume_infos.hh
@@ -126,16 +126,46 @@ COMPUTE_SOURCE("eevee_surfel_cluster_build_comp.glsl")
 DO_STATIC_COMPILATION()
 GPU_SHADER_CREATE_END()

-GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
+GPU_SHADER_CREATE_INFO(eevee_surfel_list_prepare)
 LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
 BUILTINS(BuiltinBits::TEXTURE_ATOMIC)
 TYPEDEF_SOURCE("eevee_defines.hh")
 TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
 ADDITIONAL_INFO(eevee_surfel_common)
 ADDITIONAL_INFO(draw_view)
-STORAGE_BUF(0, read_write, int, list_start_buf[])
+STORAGE_BUF(0, read_write, int, list_counter_buf[])
 STORAGE_BUF(6, read_write, SurfelListInfoData, list_info_buf)
-COMPUTE_SOURCE("eevee_surfel_list_build_comp.glsl")
+COMPUTE_SOURCE("eevee_surfel_list_prepare_comp.glsl")
+DO_STATIC_COMPILATION()
+GPU_SHADER_CREATE_END()
+
+GPU_SHADER_CREATE_INFO(eevee_surfel_list_prefix)
+LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
+BUILTINS(BuiltinBits::TEXTURE_ATOMIC)
+TYPEDEF_SOURCE("eevee_defines.hh")
+TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
+ADDITIONAL_INFO(eevee_surfel_common)
+ADDITIONAL_INFO(draw_view)
+STORAGE_BUF(0, read, int, list_counter_buf[])
+STORAGE_BUF(2, write, int, list_range_buf[])
+STORAGE_BUF(6, read_write, SurfelListInfoData, list_info_buf)
+COMPUTE_SOURCE("eevee_surfel_list_prefix_comp.glsl")
+DO_STATIC_COMPILATION()
+GPU_SHADER_CREATE_END()
+
+GPU_SHADER_CREATE_INFO(eevee_surfel_list_flatten)
+LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
+BUILTINS(BuiltinBits::TEXTURE_ATOMIC)
+TYPEDEF_SOURCE("eevee_defines.hh")
+TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
+ADDITIONAL_INFO(eevee_surfel_common)
+ADDITIONAL_INFO(draw_view)
+STORAGE_BUF(0, read_write, int, list_counter_buf[])
+STORAGE_BUF(1, read, int, list_range_buf[])
+STORAGE_BUF(2, write, float, list_item_distance_buf[])
+STORAGE_BUF(3, write, int, list_item_surfel_id_buf[])
+STORAGE_BUF(6, read, SurfelListInfoData, list_info_buf)
+COMPUTE_SOURCE("eevee_surfel_list_flatten_comp.glsl")
 DO_STATIC_COMPILATION()
 GPU_SHADER_CREATE_END()

@@ -145,12 +175,30 @@ TYPEDEF_SOURCE("eevee_defines.hh")
 TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
 ADDITIONAL_INFO(eevee_surfel_common)
 ADDITIONAL_INFO(draw_view)
-STORAGE_BUF(0, read_write, int, list_start_buf[])
+STORAGE_BUF(0, read, int, list_range_buf[])
+STORAGE_BUF(1, read, int, list_item_surfel_id_buf[])
+STORAGE_BUF(2, read, float, list_item_distance_buf[])
+STORAGE_BUF(3, write, int, sorted_surfel_id_buf[])
 STORAGE_BUF(6, read, SurfelListInfoData, list_info_buf)
 COMPUTE_SOURCE("eevee_surfel_list_sort_comp.glsl")
 DO_STATIC_COMPILATION()
 GPU_SHADER_CREATE_END()

+GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
+LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
+BUILTINS(BuiltinBits::TEXTURE_ATOMIC)
+TYPEDEF_SOURCE("eevee_defines.hh")
+TYPEDEF_SOURCE("eevee_lightprobe_shared.hh")
+ADDITIONAL_INFO(eevee_surfel_common)
+ADDITIONAL_INFO(draw_view)
+STORAGE_BUF(0, write, int, list_start_buf[])
+STORAGE_BUF(1, read, int, list_range_buf[])
+STORAGE_BUF(3, read, int, sorted_surfel_id_buf[])
+STORAGE_BUF(6, read_write, SurfelListInfoData, list_info_buf)
+COMPUTE_SOURCE("eevee_surfel_list_build_comp.glsl")
+DO_STATIC_COMPILATION()
+GPU_SHADER_CREATE_END()
+
 GPU_SHADER_CREATE_INFO(eevee_surfel_ray)
 LOCAL_GROUP_SIZE(SURFEL_GROUP_SIZE)
 TYPEDEF_SOURCE("eevee_defines.hh")
--- a/source/blender/draw/tests/eevee_test.cc
+++ b/source/blender/draw/tests/eevee_test.cc
@@ -1825,39 +1825,46 @@ DRAW_TEST(eevee_shadow_page_mask)

 static void test_eevee_surfel_list()
 {
-  GTEST_SKIP() << "Result is non-deterministic. To be revisited.";
-
  GPU_render_begin();
  StorageArrayBuffer<int> list_start_buf = {"list_start_buf"};
  StorageVectorBuffer<Surfel> surfel_buf = {"surfel_buf"};
  CaptureInfoBuf capture_info_buf = {"capture_info_buf"};
  SurfelListInfoBuf list_info_buf = {"list_info_buf"};
+  StorageArrayBuffer<int> list_counter_buf = {"list_counter_buf"};
+  StorageArrayBuffer<int> list_range_buf = {"list_range_buf"};
+  StorageArrayBuffer<float> list_item_distance_buf = {"list_item_distance_buf"};
+  StorageArrayBuffer<int> list_item_surfel_id_buf = {"list_item_surfel_id_buf"};
+  StorageArrayBuffer<int> sorted_surfel_id_buf = {"sorted_surfel_id_buf"};

  /**
   * Simulate surfels on a 2x2 projection grid covering [0..2] on the Z axis.
   */
  {
    Surfel surfel;
+    surfel.normal = {0.0f, 0.0f, 1.0f};
    /* NOTE: Expected link assumes linear increasing processing order [0->5]. But this is
     * multithreaded and we can't know the execution order in advance. */
-    /* 0: Project to (1, 0) = list 1. Unsorted Next = -1; Next = -1; Previous = 3. */
+    /* 0: Project to (1, 0) = list 1. Next = -1; Previous = 3. */
    surfel.position = {1.1f, 0.1f, 0.1f};
    surfel_buf.append(surfel);
-    /* 1: Project to (1, 0) = list 1. Unsorted Next = 0; Next = 2; Previous = -1. */
+    /* 1: Project to (1, 0) = list 1. Next = 2; Previous = -1. */
    surfel.position = {1.1f, 0.2f, 0.5f};
    surfel_buf.append(surfel);
-    /* 2: Project to (1, 0) = list 1. Unsorted Next = 1; Next = 3; Previous = 1. */
+    /* 2: Project to (1, 0) = list 1. Next = 3; Previous = 1. */
    surfel.position = {1.1f, 0.3f, 0.3f};
    surfel_buf.append(surfel);
-    /* 3: Project to (1, 0) = list 1. Unsorted Next = 2; Next = 0; Previous = 2. */
+    /* 3: Project to (1, 0) = list 1. Next = 0; Previous = 2. */
    surfel.position = {1.2f, 0.4f, 0.2f};
    surfel_buf.append(surfel);
-    /* 4: Project to (1, 1) = list 3. Unsorted Next = -1; Next = -1; Previous = -1. */
+    /* 4: Project to (1, 1) = list 3. Next = -1; Previous = -1. */
    surfel.position = {1.0f, 1.0f, 0.5f};
    surfel_buf.append(surfel);
-    /* 5: Project to (0, 1) = list 2. Unsorted Next = -1; Next = -1; Previous = -1. */
+    /* 5: Project to (0, 1) = list 2. Next = -1; Previous = -1. */
    surfel.position = {0.1f, 1.1f, 0.5f};
    surfel_buf.append(surfel);
+    /* 6: Project to (0, 1) = list 2. Next = -1; Previous = -1. Disconnected because coplanar */
+    surfel.position = {0.2f, 1.1f, 0.5f};
+    surfel_buf.append(surfel);

    surfel_buf.push_update();
  }
@@ -1875,27 +1882,82 @@ static void test_eevee_surfel_list()
    list_start_buf.push_update();
    GPU_storagebuf_clear(list_start_buf, -1);
  }
+  {
+    list_counter_buf.resize(ceil_to_multiple_u(list_info_buf.list_max, 4u));
+    list_counter_buf.push_update();
+    GPU_storagebuf_clear(list_counter_buf, 0);
+  }
+  {
+    list_range_buf.resize(ceil_to_multiple_u(list_info_buf.list_max * 2, 4u));
+    list_range_buf.push_update();
+    GPU_storagebuf_clear(list_range_buf, -1);
+  }
+  {
+    list_item_distance_buf.resize(ceil_to_multiple_u(capture_info_buf.surfel_len, 4u));
+    list_item_surfel_id_buf.resize(ceil_to_multiple_u(capture_info_buf.surfel_len, 4u));
+    sorted_surfel_id_buf.resize(ceil_to_multiple_u(capture_info_buf.surfel_len, 4u));
+    GPU_storagebuf_clear(list_item_distance_buf, -1);
+    GPU_storagebuf_clear(list_item_surfel_id_buf, -1);
+    GPU_storagebuf_clear(sorted_surfel_id_buf, -1);
+  }

  /* Top-down view. */
  View view = {"RayProjectionView"};
  view.sync(float4x4::identity(), math::projection::orthographic<float>(0, 2, 0, 2, 0, 1));

  gpu::Shader *sh_build = GPU_shader_create_from_info_name("eevee_surfel_list_build");
+  gpu::Shader *sh_flatten = GPU_shader_create_from_info_name("eevee_surfel_list_flatten");
+  gpu::Shader *sh_prefix = GPU_shader_create_from_info_name("eevee_surfel_list_prefix");
+  gpu::Shader *sh_prepare = GPU_shader_create_from_info_name("eevee_surfel_list_prepare");
  gpu::Shader *sh_sort = GPU_shader_create_from_info_name("eevee_surfel_list_sort");

  PassSimple pass("Build_and_Sort");
-  pass.shader_set(sh_build);
-  pass.bind_ssbo("list_start_buf", list_start_buf);
+  pass.shader_set(sh_prepare);
+  pass.bind_ssbo("list_counter_buf", list_counter_buf);
+  pass.bind_ssbo("list_info_buf", list_info_buf);
  pass.bind_ssbo("surfel_buf", surfel_buf);
  pass.bind_ssbo("capture_info_buf", capture_info_buf);
+  pass.dispatch(int3(1, 1, 1));
+  pass.barrier(GPU_BARRIER_SHADER_STORAGE);
+
+  pass.shader_set(sh_prefix);
+  pass.bind_ssbo("list_counter_buf", list_counter_buf);
+  pass.bind_ssbo("list_range_buf", list_range_buf);
  pass.bind_ssbo("list_info_buf", list_info_buf);
+  pass.bind_ssbo("surfel_buf", surfel_buf);
+  pass.bind_ssbo("capture_info_buf", capture_info_buf);
+  pass.dispatch(int3(1, 1, 1));
+  pass.barrier(GPU_BARRIER_SHADER_STORAGE);
+
+  pass.shader_set(sh_flatten);
+  pass.bind_ssbo("list_counter_buf", list_counter_buf);
+  pass.bind_ssbo("list_range_buf", list_range_buf);
+  pass.bind_ssbo("list_item_distance_buf", list_item_distance_buf);
+  pass.bind_ssbo("list_item_surfel_id_buf", list_item_surfel_id_buf);
+  pass.bind_ssbo("list_info_buf", list_info_buf);
+  pass.bind_ssbo("surfel_buf", surfel_buf);
+  pass.bind_ssbo("capture_info_buf", capture_info_buf);
  pass.dispatch(int3(1, 1, 1));
  pass.barrier(GPU_BARRIER_SHADER_STORAGE);

  pass.shader_set(sh_sort);
-  pass.bind_ssbo("list_start_buf", list_start_buf);
-  pass.bind_ssbo("surfel_buf", surfel_buf);
+  pass.bind_ssbo("list_range_buf", list_range_buf);
+  pass.bind_ssbo("list_item_surfel_id_buf", list_item_surfel_id_buf);
+  pass.bind_ssbo("list_item_distance_buf", list_item_distance_buf);
+  pass.bind_ssbo("sorted_surfel_id_buf", sorted_surfel_id_buf);
  pass.bind_ssbo("list_info_buf", list_info_buf);
+  pass.bind_ssbo("surfel_buf", surfel_buf);
+  pass.bind_ssbo("capture_info_buf", capture_info_buf);
+  pass.dispatch(int3(1, 1, 1));
+  pass.barrier(GPU_BARRIER_SHADER_STORAGE);
+
+  pass.shader_set(sh_build);
+  pass.bind_ssbo("list_start_buf", list_start_buf);
+  pass.bind_ssbo("list_range_buf", list_range_buf);
+  pass.bind_ssbo("sorted_surfel_id_buf", sorted_surfel_id_buf);
+  pass.bind_ssbo("list_info_buf", list_info_buf);
+  pass.bind_ssbo("surfel_buf", surfel_buf);
+  pass.bind_ssbo("capture_info_buf", capture_info_buf);
  pass.dispatch(int3(1, 1, 1));
  pass.barrier(GPU_BARRIER_BUFFER_UPDATE);

@@ -1906,8 +1968,8 @@ static void test_eevee_surfel_list()
  surfel_buf.read();

  /* Expect surfel list. */
-  Vector<int> expect_link_next = {-1, +2, +3, +0, -1, -1};
-  Vector<int> expect_link_prev = {+3, -1, +1, +2, -1, -1};
+  Vector<int> expect_link_next = {-1, +2, +3, +0, -1, -1, -1};
+  Vector<int> expect_link_prev = {+3, -1, +1, +2, -1, -1, -1};

  Vector<int> link_next, link_prev;
  for (const auto &surfel : Span<Surfel>(surfel_buf.data(), surfel_buf.size())) {
@@ -1915,22 +1977,17 @@ static void test_eevee_surfel_list()
    link_prev.append(surfel.prev);
  }

-#if 0 /* Useful for debugging */
-  /* NOTE: All of these are unstable by definition (atomic + multi-thread).
-   * But should be consistent since we only dispatch one thread-group. */
-  /* Expect last added surfel index. It is the list start index before sorting. */
  Vector<int> expect_list_start = {-1, 1, 5, 4};
-  // Span<int>(list_start_buf.data(), expect_list_start.size()).print_as_lines("list_start");
-  // link_next.as_span().print_as_lines("link_next");
-  // link_prev.as_span().print_as_lines("link_prev");
-  EXPECT_EQ_SPAN(expect_list_start, list_start_buf);
-#endif
+  EXPECT_EQ_SPAN<int>(expect_list_start, list_start_buf);
  EXPECT_EQ_SPAN<int>(expect_link_next, link_next);
  EXPECT_EQ_SPAN<int>(expect_link_prev, link_prev);

  GPU_shader_unbind();

  GPU_shader_free(sh_build);
+  GPU_shader_free(sh_flatten);
+  GPU_shader_free(sh_prefix);
+  GPU_shader_free(sh_prepare);
  GPU_shader_free(sh_sort);
  DRW_shaders_free();
  GPU_render_end();
--- a/source/blender/gpu/glsl_preprocess/glsl_preprocess.hh
+++ b/source/blender/gpu/glsl_preprocess/glsl_preprocess.hh
@@ -1342,7 +1342,8 @@ class Preprocessor {
  {
    const bool skip_drw_debug = filename.find("draw_debug_draw_lib.glsl") != std::string::npos ||
                                filename.find("draw_debug_draw_display_vert.glsl") !=
-                                    std::string::npos;
+                                    std::string::npos ||
+                                filename.find("draw_shader_shared.hh") != std::string::npos;
    using namespace metadata;
    /* TODO: This can trigger false positive caused by disabled #if blocks. */
    std::string tokens[] = {"gl_FragCoord",