Cleanup: Cycles: use constexpr in kernel

instead of lambda and macro guard. Should be possible after ce0ae95ed3

Pull Request: https://projects.blender.org/blender/blender/pulls/143723
This commit is contained in:
Weizhen Huang
2025-08-01 14:06:13 +02:00
committed by Weizhen Huang
parent f4ae983dfb
commit 1667d69d3b
10 changed files with 61 additions and 110 deletions

View File

@@ -535,10 +535,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_flags ${cuda_flags} --ptxas-options="-v")
endif()
if(${cuda_version} GREATER_EQUAL 110)
# Helps with compatibility when using recent clang host compiler.
set(cuda_flags ${cuda_flags} -std=c++17)
endif()
# Helps with compatibility when using recent clang host compiler.
set(cuda_flags ${cuda_flags} -std=c++17)
set(_cuda_nvcc_args
-arch=${arch}
@@ -700,6 +698,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-std=c++17
${math_flag}
${hip_opt_flags}
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
@@ -870,6 +869,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
COMMAND
${CUDA_NVCC_EXECUTABLE}
--ptx
-std=c++17
-arch=sm_50
${cuda_flags}
${input}

View File

@@ -195,31 +195,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
// clang-format on
/* volumetric lambda functions - use function objects for lambda-like functionality */
#define VOLUME_READ_LAMBDA(function_call) \
struct FnObjectRead { \
KernelGlobals kg; \
ccl_private MetalKernelContext *context; \
int state; \
\
VolumeStack operator()(const int i) const \
{ \
return context->function_call; \
} \
} volume_read_lambda_pass{kg, this, state};
#define VOLUME_WRITE_LAMBDA(function_call) \
struct FnObjectWrite { \
KernelGlobals kg; \
ccl_private MetalKernelContext *context; \
int state; \
\
void operator()(const int i, VolumeStack entry) const \
{ \
context->function_call; \
} \
} volume_write_lambda_pass{kg, this, state};
/* make_type definitions with Metal style element initializers */
ccl_device_forceinline float2 make_float2(const float x, const float y)
{

View File

@@ -55,7 +55,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
continue;
}
shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
volume_stack_enter_exit(kg, state, stack_sd);
volume_stack_enter_exit<false>(kg, state, stack_sd);
}
}
# else
@@ -67,7 +67,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
/* Ignore self, SSS itself already enters and exits the object. */
if (isect.object != volume_ray.self.object) {
shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
volume_stack_enter_exit(kg, state, stack_sd);
volume_stack_enter_exit<false>(kg, state, stack_sd);
}
/* Move ray forward. */
volume_ray.tmin = intersection_t_offset(isect.t);

View File

@@ -53,7 +53,7 @@ ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg
# ifdef __VOLUME__
/* Exit/enter volume. */
shadow_volume_stack_enter_exit(kg, state, shadow_sd);
volume_stack_enter_exit<true>(kg, state, shadow_sd);
# endif
/* Disable transparent shadows for ray portals */
@@ -94,8 +94,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
/* `object` is only needed for light tree with light linking, it is irrelevant for shadow. */
shader_setup_from_volume(shadow_sd, &ray, OBJECT_NONE);
VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i));
const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
const float step_size = volume_stack_step_size<true>(kg, state);
volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size);
}

View File

@@ -413,7 +413,7 @@ ccl_device
if (is_transmission) {
#ifdef __VOLUME__
shadow_volume_stack_enter_exit(kg, shadow_state, sd);
volume_stack_enter_exit<true>(kg, shadow_state, sd);
#endif
}
@@ -820,7 +820,7 @@ ccl_device int integrate_surface(KernelGlobals kg,
if (continue_path_label & LABEL_TRANSMIT) {
/* Enter/Exit volume. */
volume_stack_enter_exit(kg, state, &sd);
volume_stack_enter_exit<false>(kg, state, &sd);
}
#endif

View File

@@ -80,8 +80,7 @@ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
ccl_private ShaderData *ccl_restrict sd,
ccl_private Spectrum *ccl_restrict extinction)
{
VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i))
volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass);
volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW);
if (!(sd->flag & SD_EXTINCTION)) {
return false;
@@ -98,8 +97,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
ccl_private VolumeShaderCoefficients *coeff)
{
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
volume_shader_eval<false>(kg, state, sd, path_flag, volume_read_lambda_pass);
volume_shader_eval<false>(kg, state, sd, path_flag);
if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
return false;
@@ -1023,8 +1021,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
VOLUME_SAMPLE_DISTANCE;
/* Step through volume. */
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
const float step_size = volume_stack_step_size<false>(kg, state);
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
/* The current path throughput which is used later to calculate per-segment throughput. */

View File

@@ -21,6 +21,7 @@
#include "kernel/film/light_passes.h"
#include "kernel/integrator/guiding.h"
#include "kernel/integrator/volume_stack.h"
CCL_NAMESPACE_BEGIN
@@ -484,12 +485,11 @@ ccl_device_inline bool volume_shader_eval_entry(KernelGlobals kg,
return true;
}
template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
template<const bool shadow, typename ConstIntegratorGenericState>
ccl_device_inline void volume_shader_eval(KernelGlobals kg,
ConstIntegratorGenericState state,
ccl_private ShaderData *ccl_restrict sd,
const uint32_t path_flag,
StackReadOp stack_read)
const uint32_t path_flag)
{
/* If path is being terminated, we are tracing a shadow ray or evaluating
* emission, then we don't need to store closures. The emission and shadow
@@ -510,7 +510,7 @@ ccl_device_inline void volume_shader_eval(KernelGlobals kg,
sd->object_flag = 0;
for (int i = 0;; i++) {
const VolumeStack entry = stack_read(i);
const VolumeStack entry = volume_stack_read<shadow>(state, i);
if (!volume_shader_eval_entry<shadow, KERNEL_FEATURE_NODE_MASK_VOLUME>(
kg, state, sd, entry, path_flag))
{

View File

@@ -8,24 +8,40 @@ CCL_NAMESPACE_BEGIN
#ifdef __VOLUME__
/* Volumetric read/write lambda functions - default implementations */
# ifndef VOLUME_READ_LAMBDA
# define VOLUME_READ_LAMBDA(function_call) \
auto volume_read_lambda_pass = [=](const int i) { return function_call; };
# define VOLUME_WRITE_LAMBDA(function_call) \
auto volume_write_lambda_pass = [=](const int i, VolumeStack entry) { function_call; };
# endif
/* Volume Stack
*
* This is an array of object/shared ID's that the current segment of the path
* is inside of. */
template<typename StackReadOp, typename StackWriteOp>
template<const bool shadow, typename IntegratorGenericState>
ccl_device_forceinline VolumeStack volume_stack_read(const IntegratorGenericState state,
const int i)
{
if constexpr (shadow) {
return integrator_state_read_shadow_volume_stack(state, i);
}
else {
return integrator_state_read_volume_stack(state, i);
}
}
template<const bool shadow, typename IntegratorGenericState>
ccl_device_forceinline void volume_stack_write(IntegratorGenericState state,
const int i,
const VolumeStack entry)
{
if constexpr (shadow) {
integrator_state_write_shadow_volume_stack(state, i, entry);
}
else {
integrator_state_write_volume_stack(state, i, entry);
}
}
template<const bool shadow, typename IntegratorGenericState>
ccl_device void volume_stack_enter_exit(KernelGlobals kg,
const ccl_private ShaderData *sd,
StackReadOp stack_read,
StackWriteOp stack_write)
IntegratorGenericState state,
const ccl_private ShaderData *sd)
{
# ifdef __KERNEL_USE_DATA_CONSTANTS__
/* If we're using data constants, this fetch disappears.
@@ -46,7 +62,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
if (sd->flag & SD_BACKFACING) {
/* Exit volume object: remove from stack. */
for (int i = 0;; i++) {
VolumeStack entry = stack_read(i);
VolumeStack entry = volume_stack_read<shadow>(state, i);
if (entry.shader == SHADER_NONE) {
break;
}
@@ -54,8 +70,8 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
if (entry.object == sd->object && entry.shader == sd->shader) {
/* Shift back next stack entries. */
do {
entry = stack_read(i + 1);
stack_write(i, entry);
entry = volume_stack_read<shadow>(state, i + 1);
volume_stack_write<shadow>(state, i, entry);
i++;
} while (entry.shader != SHADER_NONE);
@@ -67,7 +83,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
/* Enter volume object: add to stack. */
uint i;
for (i = 0;; i++) {
VolumeStack entry = stack_read(i);
const VolumeStack entry = volume_stack_read<shadow>(state, i);
if (entry.shader == SHADER_NONE) {
break;
}
@@ -86,29 +102,11 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
/* Add to the end of the stack. */
const VolumeStack new_entry = {sd->object, sd->shader};
const VolumeStack empty_entry = {OBJECT_NONE, SHADER_NONE};
stack_write(i, new_entry);
stack_write(i + 1, empty_entry);
volume_stack_write<shadow>(state, i, new_entry);
volume_stack_write<shadow>(state, i + 1, empty_entry);
}
}
ccl_device void volume_stack_enter_exit(KernelGlobals kg,
IntegratorState state,
const ccl_private ShaderData *sd)
{
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
VOLUME_WRITE_LAMBDA(integrator_state_write_volume_stack(state, i, entry))
volume_stack_enter_exit(kg, sd, volume_read_lambda_pass, volume_write_lambda_pass);
}
ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg,
IntegratorShadowState state,
const ccl_private ShaderData *sd)
{
VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i))
VOLUME_WRITE_LAMBDA(integrator_state_write_shadow_volume_stack(state, i, entry))
volume_stack_enter_exit(kg, sd, volume_read_lambda_pass, volume_write_lambda_pass);
}
/* Clean stack after the last bounce.
*
* It is expected that all volumes are closed manifolds, so at the time when ray
@@ -162,13 +160,13 @@ ccl_device_inline bool volume_is_homogeneous(KernelGlobals kg,
return true;
}
template<typename StackReadOp>
ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read)
template<const bool shadow, typename IntegratorGenericState>
ccl_device float volume_stack_step_size(KernelGlobals kg, const IntegratorGenericState state)
{
float step_size = FLT_MAX;
for (int i = 0;; i++) {
VolumeStack entry = stack_read(i);
const VolumeStack entry = volume_stack_read<shadow>(state, i);
if (entry.shader == SHADER_NONE) {
break;
}

View File

@@ -202,14 +202,9 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
globals.shade_index = state + 1;
}
/* For surface shaders, we might have an automatic bump shader that needs to be executed before
* the main shader to update globals.N. */
# if __cplusplus < 201703L
if (type == SHADER_TYPE_SURFACE)
# else
if constexpr (type == SHADER_TYPE_SURFACE)
# endif
{
/* For surface shaders, we might have an automatic bump shader that needs to be executed before
* the main shader to update globals.N. */
if constexpr (type == SHADER_TYPE_SURFACE) {
if (sd->flag & SD_HAS_BUMP) {
/* Save state. */
const float3 P = sd->P;
@@ -269,11 +264,7 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
/* interactive_params_ptr */ (void *)nullptr);
# endif
# if __cplusplus < 201703L
if (type == SHADER_TYPE_DISPLACEMENT) {
# else
if constexpr (type == SHADER_TYPE_DISPLACEMENT) {
# endif
sd->P = globals.P;
}
else if (globals.Ci) {

View File

@@ -158,19 +158,10 @@ CCL_NAMESPACE_BEGIN
KERNEL_FEATURE_NODE_BUMP_STATE | KERNEL_FEATURE_NODE_PORTAL)
#define KERNEL_FEATURE_NODE_MASK_BUMP KERNEL_FEATURE_NODE_MASK_DISPLACEMENT
/* Must be constexpr on the CPU to avoid compile errors because the state types
* are different depending on the main, shadow or null path. For GPU we don't have
* C++17 everywhere so need to check it. */
#if __cplusplus < 201703L
# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
# define IF_KERNEL_NODES_FEATURE(feature) \
if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
#else
# define IF_KERNEL_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
# define IF_KERNEL_NODES_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
#endif
#define IF_KERNEL_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
#define IF_KERNEL_NODES_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
/* Kernel features */
#define __AO__