Cleanup: Cycles: use constexpr in kernel

instead of lambda and macro guard. Should be possible after ce0ae95ed3 Pull Request: https://projects.blender.org/blender/blender/pulls/143723
2025-08-01 14:06:13 +02:00
parent f4ae983dfb
commit 1667d69d3b
10 changed files with 61 additions and 110 deletions
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -535,10 +535,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
      set(cuda_flags ${cuda_flags} --ptxas-options="-v")
    endif()

-    if(${cuda_version} GREATER_EQUAL 110)
-      # Helps with compatibility when using recent clang host compiler.
-      set(cuda_flags ${cuda_flags} -std=c++17)
-    endif()
+    # Helps with compatibility when using recent clang host compiler.
+    set(cuda_flags ${cuda_flags} -std=c++17)

    set(_cuda_nvcc_args
      -arch=${arch}
@@ -700,6 +698,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
      -Wno-parentheses-equality
      -Wno-unused-value
      -ffast-math
+      -std=c++17
      ${math_flag}
      ${hip_opt_flags}
      -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
@@ -870,6 +869,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
      COMMAND
        ${CUDA_NVCC_EXECUTABLE}
        --ptx
+        -std=c++17
        -arch=sm_50
        ${cuda_flags}
        ${input}
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -195,31 +195,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \

 // clang-format on

-/* volumetric lambda functions - use function objects for lambda-like functionality */
-#define VOLUME_READ_LAMBDA(function_call) \
-  struct FnObjectRead { \
-    KernelGlobals kg; \
-    ccl_private MetalKernelContext *context; \
-    int state; \
-\
-    VolumeStack operator()(const int i) const \
-    { \
-      return context->function_call; \
-    } \
-  } volume_read_lambda_pass{kg, this, state};
-
-#define VOLUME_WRITE_LAMBDA(function_call) \
-  struct FnObjectWrite { \
-    KernelGlobals kg; \
-    ccl_private MetalKernelContext *context; \
-    int state; \
-\
-    void operator()(const int i, VolumeStack entry) const \
-    { \
-      context->function_call; \
-    } \
-  } volume_write_lambda_pass{kg, this, state};
-
 /* make_type definitions with Metal style element initializers */
 ccl_device_forceinline float2 make_float2(const float x, const float y)
 {
--- a/intern/cycles/kernel/integrator/intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h
@@ -55,7 +55,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
        continue;
      }
      shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
-      volume_stack_enter_exit(kg, state, stack_sd);
+      volume_stack_enter_exit<false>(kg, state, stack_sd);
    }
  }
 #  else
@@ -67,7 +67,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
    /* Ignore self, SSS itself already enters and exits the object. */
    if (isect.object != volume_ray.self.object) {
      shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
-      volume_stack_enter_exit(kg, state, stack_sd);
+      volume_stack_enter_exit<false>(kg, state, stack_sd);
    }
    /* Move ray forward. */
    volume_ray.tmin = intersection_t_offset(isect.t);
--- a/intern/cycles/kernel/integrator/shade_shadow.h
+++ b/intern/cycles/kernel/integrator/shade_shadow.h
@@ -53,7 +53,7 @@ ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg

 #  ifdef __VOLUME__
  /* Exit/enter volume. */
-  shadow_volume_stack_enter_exit(kg, state, shadow_sd);
+  volume_stack_enter_exit<true>(kg, state, shadow_sd);
 #  endif

  /* Disable transparent shadows for ray portals */
@@ -94,8 +94,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
  /* `object` is only needed for light tree with light linking, it is irrelevant for shadow. */
  shader_setup_from_volume(shadow_sd, &ray, OBJECT_NONE);

-  VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i));
-  const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
+  const float step_size = volume_stack_step_size<true>(kg, state);

  volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size);
 }
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -413,7 +413,7 @@ ccl_device

  if (is_transmission) {
 #ifdef __VOLUME__
-    shadow_volume_stack_enter_exit(kg, shadow_state, sd);
+    volume_stack_enter_exit<true>(kg, shadow_state, sd);
 #endif
  }

@@ -820,7 +820,7 @@ ccl_device int integrate_surface(KernelGlobals kg,

  if (continue_path_label & LABEL_TRANSMIT) {
    /* Enter/Exit volume. */
-    volume_stack_enter_exit(kg, state, &sd);
+    volume_stack_enter_exit<false>(kg, state, &sd);
  }
 #endif

--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -80,8 +80,7 @@ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
                                                   ccl_private ShaderData *ccl_restrict sd,
                                                   ccl_private Spectrum *ccl_restrict extinction)
 {
-  VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i))
-  volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass);
+  volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW);

  if (!(sd->flag & SD_EXTINCTION)) {
    return false;
@@ -98,8 +97,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
                                            ccl_private VolumeShaderCoefficients *coeff)
 {
  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-  VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
-  volume_shader_eval<false>(kg, state, sd, path_flag, volume_read_lambda_pass);
+  volume_shader_eval<false>(kg, state, sd, path_flag);

  if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
    return false;
@@ -1023,8 +1021,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
                                                      VOLUME_SAMPLE_DISTANCE;

  /* Step through volume. */
-  VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
-  const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
+  const float step_size = volume_stack_step_size<false>(kg, state);

 #  if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
  /* The current path throughput which is used later to calculate per-segment throughput. */
--- a/intern/cycles/kernel/integrator/volume_shader.h
+++ b/intern/cycles/kernel/integrator/volume_shader.h
@@ -21,6 +21,7 @@
 #include "kernel/film/light_passes.h"

 #include "kernel/integrator/guiding.h"
+#include "kernel/integrator/volume_stack.h"

 CCL_NAMESPACE_BEGIN

@@ -484,12 +485,11 @@ ccl_device_inline bool volume_shader_eval_entry(KernelGlobals kg,
  return true;
 }

-template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
+template<const bool shadow, typename ConstIntegratorGenericState>
 ccl_device_inline void volume_shader_eval(KernelGlobals kg,
                                          ConstIntegratorGenericState state,
                                          ccl_private ShaderData *ccl_restrict sd,
-                                          const uint32_t path_flag,
-                                          StackReadOp stack_read)
+                                          const uint32_t path_flag)
 {
  /* If path is being terminated, we are tracing a shadow ray or evaluating
   * emission, then we don't need to store closures. The emission and shadow
@@ -510,7 +510,7 @@ ccl_device_inline void volume_shader_eval(KernelGlobals kg,
  sd->object_flag = 0;

  for (int i = 0;; i++) {
-    const VolumeStack entry = stack_read(i);
+    const VolumeStack entry = volume_stack_read<shadow>(state, i);
    if (!volume_shader_eval_entry<shadow, KERNEL_FEATURE_NODE_MASK_VOLUME>(
            kg, state, sd, entry, path_flag))
    {
--- a/intern/cycles/kernel/integrator/volume_stack.h
+++ b/intern/cycles/kernel/integrator/volume_stack.h
@@ -8,24 +8,40 @@ CCL_NAMESPACE_BEGIN

 #ifdef __VOLUME__

-/* Volumetric read/write lambda functions - default implementations */
-#  ifndef VOLUME_READ_LAMBDA
-#    define VOLUME_READ_LAMBDA(function_call) \
-      auto volume_read_lambda_pass = [=](const int i) { return function_call; };
-#    define VOLUME_WRITE_LAMBDA(function_call) \
-      auto volume_write_lambda_pass = [=](const int i, VolumeStack entry) { function_call; };
-#  endif
-
 /* Volume Stack
 *
 * This is an array of object/shared ID's that the current segment of the path
 * is inside of. */

-template<typename StackReadOp, typename StackWriteOp>
+template<const bool shadow, typename IntegratorGenericState>
+ccl_device_forceinline VolumeStack volume_stack_read(const IntegratorGenericState state,
+                                                     const int i)
+{
+  if constexpr (shadow) {
+    return integrator_state_read_shadow_volume_stack(state, i);
+  }
+  else {
+    return integrator_state_read_volume_stack(state, i);
+  }
+}
+
+template<const bool shadow, typename IntegratorGenericState>
+ccl_device_forceinline void volume_stack_write(IntegratorGenericState state,
+                                               const int i,
+                                               const VolumeStack entry)
+{
+  if constexpr (shadow) {
+    integrator_state_write_shadow_volume_stack(state, i, entry);
+  }
+  else {
+    integrator_state_write_volume_stack(state, i, entry);
+  }
+}
+
+template<const bool shadow, typename IntegratorGenericState>
 ccl_device void volume_stack_enter_exit(KernelGlobals kg,
-                                        const ccl_private ShaderData *sd,
-                                        StackReadOp stack_read,
-                                        StackWriteOp stack_write)
+                                        IntegratorGenericState state,
+                                        const ccl_private ShaderData *sd)
 {
 #  ifdef __KERNEL_USE_DATA_CONSTANTS__
  /* If we're using data constants, this fetch disappears.
@@ -46,7 +62,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
  if (sd->flag & SD_BACKFACING) {
    /* Exit volume object: remove from stack. */
    for (int i = 0;; i++) {
-      VolumeStack entry = stack_read(i);
+      VolumeStack entry = volume_stack_read<shadow>(state, i);
      if (entry.shader == SHADER_NONE) {
        break;
      }
@@ -54,8 +70,8 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
      if (entry.object == sd->object && entry.shader == sd->shader) {
        /* Shift back next stack entries. */
        do {
-          entry = stack_read(i + 1);
-          stack_write(i, entry);
+          entry = volume_stack_read<shadow>(state, i + 1);
+          volume_stack_write<shadow>(state, i, entry);
          i++;
        } while (entry.shader != SHADER_NONE);

@@ -67,7 +83,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
    /* Enter volume object: add to stack. */
    uint i;
    for (i = 0;; i++) {
-      VolumeStack entry = stack_read(i);
+      const VolumeStack entry = volume_stack_read<shadow>(state, i);
      if (entry.shader == SHADER_NONE) {
        break;
      }
@@ -86,29 +102,11 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
    /* Add to the end of the stack. */
    const VolumeStack new_entry = {sd->object, sd->shader};
    const VolumeStack empty_entry = {OBJECT_NONE, SHADER_NONE};
-    stack_write(i, new_entry);
-    stack_write(i + 1, empty_entry);
+    volume_stack_write<shadow>(state, i, new_entry);
+    volume_stack_write<shadow>(state, i + 1, empty_entry);
  }
 }

-ccl_device void volume_stack_enter_exit(KernelGlobals kg,
-                                        IntegratorState state,
-                                        const ccl_private ShaderData *sd)
-{
-  VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
-  VOLUME_WRITE_LAMBDA(integrator_state_write_volume_stack(state, i, entry))
-  volume_stack_enter_exit(kg, sd, volume_read_lambda_pass, volume_write_lambda_pass);
-}
-
-ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg,
-                                               IntegratorShadowState state,
-                                               const ccl_private ShaderData *sd)
-{
-  VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i))
-  VOLUME_WRITE_LAMBDA(integrator_state_write_shadow_volume_stack(state, i, entry))
-  volume_stack_enter_exit(kg, sd, volume_read_lambda_pass, volume_write_lambda_pass);
-}
-
 /* Clean stack after the last bounce.
 *
 * It is expected that all volumes are closed manifolds, so at the time when ray
@@ -162,13 +160,13 @@ ccl_device_inline bool volume_is_homogeneous(KernelGlobals kg,
  return true;
 }

-template<typename StackReadOp>
-ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read)
+template<const bool shadow, typename IntegratorGenericState>
+ccl_device float volume_stack_step_size(KernelGlobals kg, const IntegratorGenericState state)
 {
  float step_size = FLT_MAX;

  for (int i = 0;; i++) {
-    VolumeStack entry = stack_read(i);
+    const VolumeStack entry = volume_stack_read<shadow>(state, i);
    if (entry.shader == SHADER_NONE) {
      break;
    }
--- a/intern/cycles/kernel/osl/osl.h
+++ b/intern/cycles/kernel/osl/osl.h
@@ -202,14 +202,9 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
    globals.shade_index = state + 1;
  }

-/* For surface shaders, we might have an automatic bump shader that needs to be executed before
- * the main shader to update globals.N. */
-#    if __cplusplus < 201703L
-  if (type == SHADER_TYPE_SURFACE)
-#    else
-  if constexpr (type == SHADER_TYPE_SURFACE)
-#    endif
-  {
+  /* For surface shaders, we might have an automatic bump shader that needs to be executed before
+   * the main shader to update globals.N. */
+  if constexpr (type == SHADER_TYPE_SURFACE) {
    if (sd->flag & SD_HAS_BUMP) {
      /* Save state. */
      const float3 P = sd->P;
@@ -269,11 +264,7 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
                        /* interactive_params_ptr */ (void *)nullptr);
 #  endif

-#  if __cplusplus < 201703L
-  if (type == SHADER_TYPE_DISPLACEMENT) {
-#  else
  if constexpr (type == SHADER_TYPE_DISPLACEMENT) {
-#  endif
    sd->P = globals.P;
  }
  else if (globals.Ci) {
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -158,19 +158,10 @@ CCL_NAMESPACE_BEGIN
   KERNEL_FEATURE_NODE_BUMP_STATE | KERNEL_FEATURE_NODE_PORTAL)
 #define KERNEL_FEATURE_NODE_MASK_BUMP KERNEL_FEATURE_NODE_MASK_DISPLACEMENT

-/* Must be constexpr on the CPU to avoid compile errors because the state types
- * are different depending on the main, shadow or null path. For GPU we don't have
- * C++17 everywhere so need to check it. */
-#if __cplusplus < 201703L
-#  define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
-#  define IF_KERNEL_NODES_FEATURE(feature) \
-    if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
-#else
-#  define IF_KERNEL_FEATURE(feature) \
-    if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
-#  define IF_KERNEL_NODES_FEATURE(feature) \
-    if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
-#endif
+#define IF_KERNEL_FEATURE(feature) \
+  if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
+#define IF_KERNEL_NODES_FEATURE(feature) \
+  if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)

 /* Kernel features */
 #define __AO__