diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 5b5a955ed41..1727f283641 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -535,10 +535,8 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_flags ${cuda_flags} --ptxas-options="-v") endif() - if(${cuda_version} GREATER_EQUAL 110) - # Helps with compatibility when using recent clang host compiler. - set(cuda_flags ${cuda_flags} -std=c++17) - endif() + # Helps with compatibility when using recent clang host compiler. + set(cuda_flags ${cuda_flags} -std=c++17) set(_cuda_nvcc_args -arch=${arch} @@ -700,6 +698,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) -Wno-parentheses-equality -Wno-unused-value -ffast-math + -std=c++17 ${math_flag} ${hip_opt_flags} -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file}) @@ -870,6 +869,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) COMMAND ${CUDA_NVCC_EXECUTABLE} --ptx + -std=c++17 -arch=sm_50 ${cuda_flags} ${input} diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index e9950ae4484..5da69d9138a 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -195,31 +195,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \ // clang-format on -/* volumetric lambda functions - use function objects for lambda-like functionality */ -#define VOLUME_READ_LAMBDA(function_call) \ - struct FnObjectRead { \ - KernelGlobals kg; \ - ccl_private MetalKernelContext *context; \ - int state; \ -\ - VolumeStack operator()(const int i) const \ - { \ - return context->function_call; \ - } \ - } volume_read_lambda_pass{kg, this, state}; - -#define VOLUME_WRITE_LAMBDA(function_call) \ - struct FnObjectWrite { \ - KernelGlobals kg; \ - ccl_private MetalKernelContext *context; \ - int state; \ -\ - void operator()(const int i, VolumeStack entry) const \ - { \ - context->function_call; \ - } \ - } volume_write_lambda_pass{kg, this, state}; - /* make_type definitions with Metal style element initializers */ ccl_device_forceinline float2 make_float2(const float x, const float y) { diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index 99f198d79cd..9d313744446 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -55,7 +55,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, continue; } shader_setup_from_ray(kg, stack_sd, &volume_ray, isect); - volume_stack_enter_exit(kg, state, stack_sd); + volume_stack_enter_exit(kg, state, stack_sd); } } # else @@ -67,7 +67,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, /* Ignore self, SSS itself already enters and exits the object. */ if (isect.object != volume_ray.self.object) { shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect); - volume_stack_enter_exit(kg, state, stack_sd); + volume_stack_enter_exit(kg, state, stack_sd); } /* Move ray forward. */ volume_ray.tmin = intersection_t_offset(isect.t); diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index 977fd210cd3..b4e231085e8 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -53,7 +53,7 @@ ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg # ifdef __VOLUME__ /* Exit/enter volume. */ - shadow_volume_stack_enter_exit(kg, state, shadow_sd); + volume_stack_enter_exit(kg, state, shadow_sd); # endif /* Disable transparent shadows for ray portals */ @@ -94,8 +94,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, /* `object` is only needed for light tree with light linking, it is irrelevant for shadow. */ shader_setup_from_volume(shadow_sd, &ray, OBJECT_NONE); - VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i)); - const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass); + const float step_size = volume_stack_step_size(kg, state); volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size); } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 942de4cbb35..ee562bb8c6f 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -413,7 +413,7 @@ ccl_device if (is_transmission) { #ifdef __VOLUME__ - shadow_volume_stack_enter_exit(kg, shadow_state, sd); + volume_stack_enter_exit(kg, shadow_state, sd); #endif } @@ -820,7 +820,7 @@ ccl_device int integrate_surface(KernelGlobals kg, if (continue_path_label & LABEL_TRANSMIT) { /* Enter/Exit volume. */ - volume_stack_enter_exit(kg, state, &sd); + volume_stack_enter_exit(kg, state, &sd); } #endif diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 69962549030..fd7cdcbc02e 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -80,8 +80,7 @@ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg, ccl_private ShaderData *ccl_restrict sd, ccl_private Spectrum *ccl_restrict extinction) { - VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i)) - volume_shader_eval(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass); + volume_shader_eval(kg, state, sd, PATH_RAY_SHADOW); if (!(sd->flag & SD_EXTINCTION)) { return false; @@ -98,8 +97,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, ccl_private VolumeShaderCoefficients *coeff) { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); - VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i)) - volume_shader_eval(kg, state, sd, path_flag, volume_read_lambda_pass); + volume_shader_eval(kg, state, sd, path_flag); if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) { return false; @@ -1023,8 +1021,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, VOLUME_SAMPLE_DISTANCE; /* Step through volume. */ - VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i)) - const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass); + const float step_size = volume_stack_step_size(kg, state); # if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 /* The current path throughput which is used later to calculate per-segment throughput. */ diff --git a/intern/cycles/kernel/integrator/volume_shader.h b/intern/cycles/kernel/integrator/volume_shader.h index 6c7a44fb498..e38d010cdd7 100644 --- a/intern/cycles/kernel/integrator/volume_shader.h +++ b/intern/cycles/kernel/integrator/volume_shader.h @@ -21,6 +21,7 @@ #include "kernel/film/light_passes.h" #include "kernel/integrator/guiding.h" +#include "kernel/integrator/volume_stack.h" CCL_NAMESPACE_BEGIN @@ -484,12 +485,11 @@ ccl_device_inline bool volume_shader_eval_entry(KernelGlobals kg, return true; } -template +template ccl_device_inline void volume_shader_eval(KernelGlobals kg, ConstIntegratorGenericState state, ccl_private ShaderData *ccl_restrict sd, - const uint32_t path_flag, - StackReadOp stack_read) + const uint32_t path_flag) { /* If path is being terminated, we are tracing a shadow ray or evaluating * emission, then we don't need to store closures. The emission and shadow @@ -510,7 +510,7 @@ ccl_device_inline void volume_shader_eval(KernelGlobals kg, sd->object_flag = 0; for (int i = 0;; i++) { - const VolumeStack entry = stack_read(i); + const VolumeStack entry = volume_stack_read(state, i); if (!volume_shader_eval_entry( kg, state, sd, entry, path_flag)) { diff --git a/intern/cycles/kernel/integrator/volume_stack.h b/intern/cycles/kernel/integrator/volume_stack.h index 094a38caaea..37e28ebd439 100644 --- a/intern/cycles/kernel/integrator/volume_stack.h +++ b/intern/cycles/kernel/integrator/volume_stack.h @@ -8,24 +8,40 @@ CCL_NAMESPACE_BEGIN #ifdef __VOLUME__ -/* Volumetric read/write lambda functions - default implementations */ -# ifndef VOLUME_READ_LAMBDA -# define VOLUME_READ_LAMBDA(function_call) \ - auto volume_read_lambda_pass = [=](const int i) { return function_call; }; -# define VOLUME_WRITE_LAMBDA(function_call) \ - auto volume_write_lambda_pass = [=](const int i, VolumeStack entry) { function_call; }; -# endif - /* Volume Stack * * This is an array of object/shared ID's that the current segment of the path * is inside of. */ -template +template +ccl_device_forceinline VolumeStack volume_stack_read(const IntegratorGenericState state, + const int i) +{ + if constexpr (shadow) { + return integrator_state_read_shadow_volume_stack(state, i); + } + else { + return integrator_state_read_volume_stack(state, i); + } +} + +template +ccl_device_forceinline void volume_stack_write(IntegratorGenericState state, + const int i, + const VolumeStack entry) +{ + if constexpr (shadow) { + integrator_state_write_shadow_volume_stack(state, i, entry); + } + else { + integrator_state_write_volume_stack(state, i, entry); + } +} + +template ccl_device void volume_stack_enter_exit(KernelGlobals kg, - const ccl_private ShaderData *sd, - StackReadOp stack_read, - StackWriteOp stack_write) + IntegratorGenericState state, + const ccl_private ShaderData *sd) { # ifdef __KERNEL_USE_DATA_CONSTANTS__ /* If we're using data constants, this fetch disappears. @@ -46,7 +62,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, if (sd->flag & SD_BACKFACING) { /* Exit volume object: remove from stack. */ for (int i = 0;; i++) { - VolumeStack entry = stack_read(i); + VolumeStack entry = volume_stack_read(state, i); if (entry.shader == SHADER_NONE) { break; } @@ -54,8 +70,8 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, if (entry.object == sd->object && entry.shader == sd->shader) { /* Shift back next stack entries. */ do { - entry = stack_read(i + 1); - stack_write(i, entry); + entry = volume_stack_read(state, i + 1); + volume_stack_write(state, i, entry); i++; } while (entry.shader != SHADER_NONE); @@ -67,7 +83,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, /* Enter volume object: add to stack. */ uint i; for (i = 0;; i++) { - VolumeStack entry = stack_read(i); + const VolumeStack entry = volume_stack_read(state, i); if (entry.shader == SHADER_NONE) { break; } @@ -86,29 +102,11 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, /* Add to the end of the stack. */ const VolumeStack new_entry = {sd->object, sd->shader}; const VolumeStack empty_entry = {OBJECT_NONE, SHADER_NONE}; - stack_write(i, new_entry); - stack_write(i + 1, empty_entry); + volume_stack_write(state, i, new_entry); + volume_stack_write(state, i + 1, empty_entry); } } -ccl_device void volume_stack_enter_exit(KernelGlobals kg, - IntegratorState state, - const ccl_private ShaderData *sd) -{ - VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i)) - VOLUME_WRITE_LAMBDA(integrator_state_write_volume_stack(state, i, entry)) - volume_stack_enter_exit(kg, sd, volume_read_lambda_pass, volume_write_lambda_pass); -} - -ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg, - IntegratorShadowState state, - const ccl_private ShaderData *sd) -{ - VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i)) - VOLUME_WRITE_LAMBDA(integrator_state_write_shadow_volume_stack(state, i, entry)) - volume_stack_enter_exit(kg, sd, volume_read_lambda_pass, volume_write_lambda_pass); -} - /* Clean stack after the last bounce. * * It is expected that all volumes are closed manifolds, so at the time when ray @@ -162,13 +160,13 @@ ccl_device_inline bool volume_is_homogeneous(KernelGlobals kg, return true; } -template -ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read) +template +ccl_device float volume_stack_step_size(KernelGlobals kg, const IntegratorGenericState state) { float step_size = FLT_MAX; for (int i = 0;; i++) { - VolumeStack entry = stack_read(i); + const VolumeStack entry = volume_stack_read(state, i); if (entry.shader == SHADER_NONE) { break; } diff --git a/intern/cycles/kernel/osl/osl.h b/intern/cycles/kernel/osl/osl.h index b43fb1e5ab1..12aa2ec1f47 100644 --- a/intern/cycles/kernel/osl/osl.h +++ b/intern/cycles/kernel/osl/osl.h @@ -202,14 +202,9 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg, globals.shade_index = state + 1; } -/* For surface shaders, we might have an automatic bump shader that needs to be executed before - * the main shader to update globals.N. */ -# if __cplusplus < 201703L - if (type == SHADER_TYPE_SURFACE) -# else - if constexpr (type == SHADER_TYPE_SURFACE) -# endif - { + /* For surface shaders, we might have an automatic bump shader that needs to be executed before + * the main shader to update globals.N. */ + if constexpr (type == SHADER_TYPE_SURFACE) { if (sd->flag & SD_HAS_BUMP) { /* Save state. */ const float3 P = sd->P; @@ -269,11 +264,7 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg, /* interactive_params_ptr */ (void *)nullptr); # endif -# if __cplusplus < 201703L - if (type == SHADER_TYPE_DISPLACEMENT) { -# else if constexpr (type == SHADER_TYPE_DISPLACEMENT) { -# endif sd->P = globals.P; } else if (globals.Ci) { diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index ac85c0efc3d..33593e9e341 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -158,19 +158,10 @@ CCL_NAMESPACE_BEGIN KERNEL_FEATURE_NODE_BUMP_STATE | KERNEL_FEATURE_NODE_PORTAL) #define KERNEL_FEATURE_NODE_MASK_BUMP KERNEL_FEATURE_NODE_MASK_DISPLACEMENT -/* Must be constexpr on the CPU to avoid compile errors because the state types - * are different depending on the main, shadow or null path. For GPU we don't have - * C++17 everywhere so need to check it. */ -#if __cplusplus < 201703L -# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) -# define IF_KERNEL_NODES_FEATURE(feature) \ - if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) -#else -# define IF_KERNEL_FEATURE(feature) \ - if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) -# define IF_KERNEL_NODES_FEATURE(feature) \ - if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) -#endif +#define IF_KERNEL_FEATURE(feature) \ + if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) +#define IF_KERNEL_NODES_FEATURE(feature) \ + if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) /* Kernel features */ #define __AO__