diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 5da69d9138a..284bddd9fec 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -267,6 +267,7 @@ ccl_device_forceinline uchar4 make_uchar4(const uchar x,
 #define atanf(x) atan(float(x))
 #define floorf(x) floor(float(x))
 #define ceilf(x) ceil(float(x))
+#define roundf(x) round(float(x))
 #define hypotf(x, y) hypot(float(x), float(y))
 #define atan2f(x, y) atan2(float(x), float(y))
 #define fmaxf(x, y) fmax(float(x), float(y))
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index be7b4b8a80d..f83c7c29a66 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -214,6 +214,7 @@ ccl_device_forceinline int __float_as_int(const float x)
 #define atanf(x) sycl::atan((x))
 #define floorf(x) sycl::floor((x))
 #define ceilf(x) sycl::ceil((x))
+#define roundf(x) sycl::round((x))
 #define sinhf(x) sycl::sinh((x))
 #define coshf(x) sycl::cosh((x))
 #define tanhf(x) sycl::tanh((x))
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index ed12ad326bc..f5ffe864a3d 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -78,20 +78,21 @@ struct EquiangularCoefficients {
   Interval<float> t_range;
 };
 
-/* Evaluate shader to get extinction coefficient at P. */
-ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
-                                                   IntegratorShadowState state,
-                                                   ccl_private ShaderData *ccl_restrict sd,
-                                                   ccl_private Spectrum *ccl_restrict extinction)
+/* Evaluate extinction coefficient at `sd->P`. */
+template<const bool shadow, typename IntegratorGenericState>
+ccl_device_inline Spectrum volume_shader_eval_extinction(KernelGlobals kg,
+                                                         const IntegratorGenericState state,
+                                                         ccl_private ShaderData *ccl_restrict sd,
+                                                         uint32_t path_flag)
 {
-  volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW);
+  /* Use emission flag to avoid storing phase function. */
+  /* TODO(weizhen): we could add another flag to skip evaluating the emission, but we've run out of
+   * bits for the path flag.*/
+  path_flag |= PATH_RAY_EMISSION;
 
-  if (!(sd->flag & SD_EXTINCTION)) {
-    return false;
-  }
+  volume_shader_eval<shadow>(kg, state, sd, path_flag);
 
-  *extinction = sd->closure_transparent_extinction;
-  return true;
+  return (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_spectrum();
 }
 
 /* Evaluate shader to get absorption, scattering and emission at P. */
@@ -601,8 +602,136 @@ ccl_device bool volume_integrate_advance(KernelGlobals kg,
  * These functions are used to attenuate shadow rays to lights. Both absorption
  * and scattering will block light, represented by the extinction coefficient. */
 
-/* heterogeneous volume: integrate stepping through the volume until we
- * reach the end, get absorbed entirely, or run out of iterations */
+/* Advance until the majorant optical depth is at least one, or we have reached the end of the
+ * volume. Because telescoping has to take at least one sample per segment, having a larger
+ * segment helps to take less samples. */
+ccl_device_inline bool volume_octree_advance_shadow(KernelGlobals kg,
+                                                    const ccl_private Ray *ccl_restrict ray,
+                                                    ccl_private ShaderData *ccl_restrict sd,
+                                                    const IntegratorShadowState state,
+                                                    ccl_private RNGState *rng_state,
+                                                    const uint32_t path_flag,
+                                                    ccl_private OctreeTracing &octree,
+                                                    ccl_private VolumeStep &vstep)
+{
+  /* Advance random number offset. */
+  rng_state->rng_offset += PRNG_BOUNCE_NUM;
+
+  Extrema<float> sigma = octree.t.is_empty() ? Extrema<float>(FLT_MAX, -FLT_MAX) : octree.sigma;
+  const float tmin = octree.t.min;
+
+  while (octree.t.is_empty() || sigma.range() * octree.t.length() < 1.0f) {
+    if (!volume_octree_advance<true>(kg, ray, sd, state, rng_state, path_flag, octree, vstep)) {
+      return !octree.t.is_empty();
+    }
+
+    octree.sigma = sigma = merge(sigma, octree.sigma);
+    octree.t.min = tmin;
+  }
+
+  return true;
+}
+
+/* Compute transmittance along the ray using
+ * "Unbiased and consistent rendering using biased estimators" by Misso et. al,
+ * https://cs.dartmouth.edu/~wjarosz/publications/misso22unbiased.html
+ *
+ * The telescoping sum is
+ *         T = T_k + \sum_{j=k}^\infty(T_{j+1} - T_{j})
+ * where T_k is a biased estimation of the transmittance T by taking k samples,
+ * and (T_{j+1} - T_{j}) is the debiasing term.
+ * We decide the order k based on the optical thickness, and randomly pick a debiasing term of
+ * order j to evaluate.
+ * In the practice we take the powers of 2 to reuse samples for all orders.
+ *
+ * \param sigma_c: the difference between the density majorant and minorant
+ * \param t: the ray segment between which we compute the transmittance
+ */
+template<const bool shadow, typename IntegratorGenericState>
+ccl_device Spectrum volume_transmittance(KernelGlobals kg,
+                                         IntegratorGenericState state,
+                                         const ccl_private Ray *ray,
+                                         ccl_private ShaderData *ccl_restrict sd,
+                                         const float sigma_c,
+                                         const Interval<float> t,
+                                         const ccl_private RNGState *rng_state,
+                                         const uint32_t path_flag)
+{
+  constexpr float r = 0.9f;
+  const float ray_length = t.length();
+
+  /* Expected number of steps with residual ratio tracking. */
+  const float expected_steps = sigma_c * ray_length;
+  /* Number of samples for the biased estimator. */
+  const int k = clamp(int(roundf(expected_steps)), 1, kernel_data.integrator.volume_max_steps);
+
+  /* Sample the evaluation order of the debiasing term. */
+  /* Use the same random number for all pixels to sync the workload on GPU. */
+  /* TODO(weizhen): need to check if such correlation introduces artefacts. */
+  const float rand = path_rng_1D(
+      kg, 0, rng_state->sample, rng_state->rng_offset + PRNG_VOLUME_EXPANSION_ORDER);
+  /* A hard cut-off to prevent taking too many samples on the GPU. The probability of going beyond
+   * this order is 1e-5f. */
+  constexpr int cut_off = 4;
+  float pmf;
+  /* Number of independent estimators of T_k. */
+  const int N = (sigma_c == 0.0f) ?
+                    1 :
+                    power_of_2(sample_geometric_distribution(rand, r, pmf, cut_off));
+
+  /* Total number of density evaluations. */
+  const int samples = N * k;
+
+  const float shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET);
+  const float step_size = ray_length / float(samples);
+
+  if (N == 1) {
+    /* Only compute the biased estimator. */
+    Spectrum tau_k = zero_spectrum();
+    for (int i = 0; i < k; i++) {
+      const float shade_t = min(t.max, t.min + (shade_offset + i) * step_size);
+      sd->P = ray->P + ray->D * shade_t;
+      tau_k += volume_shader_eval_extinction<shadow>(kg, state, sd, path_flag);
+    }
+    return exp(-tau_k * step_size);
+  }
+
+  /* Estimations of optical thickness. */
+  Spectrum tau_j[2];
+  tau_j[0] = zero_spectrum();
+  tau_j[1] = zero_spectrum();
+  Spectrum tau_j_1 = zero_spectrum();
+
+  Spectrum T_k = zero_spectrum();
+  for (int n = 0; n < N; n++) {
+    Spectrum tau_k = zero_spectrum();
+    for (int i = 0; i < k; i++) {
+      const int step = i * N + n;
+      const float shade_t = min(t.max, t.min + (shade_offset + step) * step_size);
+      sd->P = ray->P + ray->D * shade_t;
+
+      const Spectrum tau = step_size *
+                           volume_shader_eval_extinction<shadow>(kg, state, sd, path_flag);
+
+      tau_k += tau * N;
+      tau_j[step % 2] += tau * 2.0f;
+      tau_j_1 += tau;
+    }
+    T_k += exp(-tau_k);
+  }
+
+  const Spectrum T_j_1 = exp(-tau_j_1);
+
+  /* Eq (16). This is the secondary estimator which averages a few independent estimations. */
+  T_k /= float(N);
+  const Spectrum T_j = 0.5f * (exp(-tau_j[0]) + exp(-tau_j[1]));
+
+  /* Eq (14), single-term primary estimator. */
+  return T_k + (T_j_1 - T_j) / pmf;
+}
+
+/* Compute the volumetric transmittance of the segment [ray->tmin, ray->tmax],
+ * used for the shadow ray throughput. */
 ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
                                             IntegratorShadowState state,
                                             ccl_private Ray *ccl_restrict ray,
@@ -617,43 +746,29 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
   sd->lcg_state = lcg_state_init(
       rng_state.rng_pixel, rng_state.rng_offset, rng_state.sample, 0xd9111870);
 
-  Spectrum tp = *throughput;
+  path_state_rng_scramble(&rng_state, 0x8647ace4);
 
   /* Prepare for stepping. */
   VolumeStep vstep;
   volume_step_init(kg, &rng_state, ray->tmin, &vstep);
 
-  OctreeTracing octree(ray->tmin);
   const uint32_t path_flag = PATH_RAY_SHADOW;
 
-  /* compute extinction at the start */
-  Spectrum sum = zero_spectrum();
-  for (; volume_integrate_advance<true>(kg, ray, sd, state, &rng_state, path_flag, octree, vstep);)
-  {
-    /* compute attenuation over segment */
-    Spectrum sigma_t = zero_spectrum();
-    if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
-      /* Compute `expf()` only for every Nth step, to save some calculations
-       * because `exp(a)*exp(b) = exp(a+b)`, also do a quick #VOLUME_THROUGHPUT_EPSILON
-       * check then. */
-      sum += (-sigma_t * vstep.t.length());
-      if ((vstep.step & 0x07) == 0) { /* TODO: Other interval? */
-        tp = *throughput * exp(sum);
+  OctreeTracing octree(ray->tmin);
+  if (!volume_octree_setup<true>(kg, ray, sd, state, &rng_state, path_flag, octree, vstep)) {
+    return;
+  }
 
-        /* stop if nearly all light is blocked */
-        if (reduce_max(tp) < VOLUME_THROUGHPUT_EPSILON) {
-          break;
-        }
-      }
+  while (volume_octree_advance_shadow(kg, ray, sd, state, &rng_state, path_flag, octree, vstep)) {
+    const float sigma = octree.sigma.range();
+    *throughput *= volume_transmittance<true>(
+        kg, state, ray, sd, sigma, octree.t, &rng_state, path_flag);
+
+    if (reduce_max(fabs(*throughput)) < VOLUME_THROUGHPUT_EPSILON) {
+      return;
     }
+    octree.t.min = octree.t.max;
   }
-
-  if (vstep.t.max == ray->tmax) {
-    /* Update throughput in case we haven't done it above. */
-    tp = *throughput * exp(sum);
-  }
-
-  *throughput = tp;
 }
 
 /* Equi-angular sampling as in:
diff --git a/intern/cycles/kernel/sample/mapping.h b/intern/cycles/kernel/sample/mapping.h
index 3b494bc059a..603b0d0d4eb 100644
--- a/intern/cycles/kernel/sample/mapping.h
+++ b/intern/cycles/kernel/sample/mapping.h
@@ -208,4 +208,17 @@ ccl_device float2 regular_polygon_sample(const float corners, float rotation, co
   return make_float2(cr * p.x - sr * p.y, sr * p.x + cr * p.y);
 }
 
+/* Generate random variable x following geometric distribution p(x) = r * (1 - r)^x, 0 <= p <= 1.
+ * Also compute the probability mass function pmf.
+ * The sampled order is truncated at `cut_off`. */
+ccl_device_inline int sample_geometric_distribution(const float rand,
+                                                    const float r,
+                                                    ccl_private float &pmf,
+                                                    const int cut_off = INT_MAX)
+{
+  const int n = min(int(floorf(logf(rand) / logf(1.0f - r))), cut_off);
+  pmf = (n == cut_off) ? powf(1.0f - r, n) : r * powf(1.0f - r, n);
+  return n;
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 4d133ace952..c81fe4daca3 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -297,6 +297,7 @@ enum PathTraceDimension {
   PRNG_VOLUME_SHADE_OFFSET = 7,
   PRNG_VOLUME_PHASE_GUIDING_DISTANCE = 8,
   PRNG_VOLUME_PHASE_GUIDING_EQUIANGULAR = 9,
+  PRNG_VOLUME_EXPANSION_ORDER = 12,
 
   /* Subsurface random walk bounces */
   PRNG_SUBSURFACE_BSDF = 0,