From de16ed4e8cd5aabdc4fbd63fdd0b06dd0d497b12 Mon Sep 17 00:00:00 2001
From: Alaska <Alaskayou01@gmail.com>
Date: Mon, 13 Oct 2025 14:40:01 +0200
Subject: [PATCH] Cycles: Improve performance on uneven GPU renders

When a Cycles render is uneven (E.g. a small portion of the image takes
significantly longer to render than the rest of the image), then GPUs
typically suffer from poor performance due to low occupancy unless a
large number of samples is scheduled to compensate for that small
complex region.

Due to how Cycles render scheduler is setup, Cycles tries to
increase GPU occupancy by increasing the number of scheduled samples,
but also balance render preview update time by scaling back
the number of scheduled samples based on the previous workloads time
per sample to try and fit within the target update time.

However using the previous workloads time per sample to scale back the
scheduled number of samples gives suboptimal results because the
previous workloads time per sample is usually not representative of the
new time per sample that occurs as GPU occupancy increases, because
increasing GPU occupancy typically results in reduced time per samples.

This commit improves on this issue by assuming that increasing GPU
occupancy linearly improves performance, and adjusts the function
that scales back the sample count to fit within a specific update
window to use this assumption. This leads to Cycles increasing the
amount of work scheduled onto the GPU quicker at the beginning of
uneven/low occupancy scenes, generally leading to improved performance,
especially when rendering at low sample counts or with strict
time limits.

Ref #147954

Pull Request: https://projects.blender.org/blender/blender/pulls/147950
---
 intern/cycles/integrator/render_scheduler.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp
index 454ca95b35c..2d27aa7716c 100644
--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -873,8 +873,10 @@ int RenderScheduler::get_num_samples_to_path_trace() const
     /* Keep occupancy at about 0.5 (this is more of an empirical figure which seems to match scenes
      * with good performance without forcing occupancy to be higher). */
     int num_samples_to_occupy = state_.occupancy_num_samples;
+    float ratio_to_increase_occupancy = 1.0f;
     if (state_.occupancy > 0 && state_.occupancy < 0.5f) {
-      num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
+      ratio_to_increase_occupancy = 0.7f / state_.occupancy;
+      num_samples_to_occupy = lround(state_.occupancy_num_samples * ratio_to_increase_occupancy);
     }
 
     /* Time limit for path tracing, which constraints the scheduler from "over-scheduling" work
@@ -917,10 +919,12 @@ int RenderScheduler::get_num_samples_to_path_trace() const
       }
     }
     if (path_tracing_time_limit != 0) {
-      /* Use the per-sample time from the previously rendered batch of samples so that the
-       * correction is applied much quicker. */
+      /* Use the per-sample time from the previously rendered batch of samples, so that the
+       * correction is applied much quicker. Also use the predicted increase in performance from
+       * increased occupany. */
       const double predicted_render_time = num_samples_to_occupy *
-                                           path_trace_time_.get_last_sample_time();
+                                           path_trace_time_.get_last_sample_time() /
+                                           ratio_to_increase_occupancy;
       if (predicted_render_time > path_tracing_time_limit) {
         num_samples_to_occupy = lround(num_samples_to_occupy *
                                        (path_tracing_time_limit / predicted_render_time));