Cleanup: Cycles: use constexpr in kernel
instead of lambda and macro guard. Should be possible after ce0ae95ed3
Pull Request: https://projects.blender.org/blender/blender/pulls/143723
This commit is contained in:
committed by
Weizhen Huang
parent
f4ae983dfb
commit
1667d69d3b
@@ -535,10 +535,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(cuda_flags ${cuda_flags} --ptxas-options="-v")
|
||||
endif()
|
||||
|
||||
if(${cuda_version} GREATER_EQUAL 110)
|
||||
# Helps with compatibility when using recent clang host compiler.
|
||||
set(cuda_flags ${cuda_flags} -std=c++17)
|
||||
endif()
|
||||
# Helps with compatibility when using recent clang host compiler.
|
||||
set(cuda_flags ${cuda_flags} -std=c++17)
|
||||
|
||||
set(_cuda_nvcc_args
|
||||
-arch=${arch}
|
||||
@@ -700,6 +698,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
-Wno-parentheses-equality
|
||||
-Wno-unused-value
|
||||
-ffast-math
|
||||
-std=c++17
|
||||
${math_flag}
|
||||
${hip_opt_flags}
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
||||
@@ -870,6 +869,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
||||
COMMAND
|
||||
${CUDA_NVCC_EXECUTABLE}
|
||||
--ptx
|
||||
-std=c++17
|
||||
-arch=sm_50
|
||||
${cuda_flags}
|
||||
${input}
|
||||
|
||||
@@ -195,31 +195,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
||||
|
||||
// clang-format on
|
||||
|
||||
/* volumetric lambda functions - use function objects for lambda-like functionality */
|
||||
#define VOLUME_READ_LAMBDA(function_call) \
|
||||
struct FnObjectRead { \
|
||||
KernelGlobals kg; \
|
||||
ccl_private MetalKernelContext *context; \
|
||||
int state; \
|
||||
\
|
||||
VolumeStack operator()(const int i) const \
|
||||
{ \
|
||||
return context->function_call; \
|
||||
} \
|
||||
} volume_read_lambda_pass{kg, this, state};
|
||||
|
||||
#define VOLUME_WRITE_LAMBDA(function_call) \
|
||||
struct FnObjectWrite { \
|
||||
KernelGlobals kg; \
|
||||
ccl_private MetalKernelContext *context; \
|
||||
int state; \
|
||||
\
|
||||
void operator()(const int i, VolumeStack entry) const \
|
||||
{ \
|
||||
context->function_call; \
|
||||
} \
|
||||
} volume_write_lambda_pass{kg, this, state};
|
||||
|
||||
/* make_type definitions with Metal style element initializers */
|
||||
ccl_device_forceinline float2 make_float2(const float x, const float y)
|
||||
{
|
||||
|
||||
@@ -55,7 +55,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
||||
continue;
|
||||
}
|
||||
shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
|
||||
volume_stack_enter_exit(kg, state, stack_sd);
|
||||
volume_stack_enter_exit<false>(kg, state, stack_sd);
|
||||
}
|
||||
}
|
||||
# else
|
||||
@@ -67,7 +67,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
||||
/* Ignore self, SSS itself already enters and exits the object. */
|
||||
if (isect.object != volume_ray.self.object) {
|
||||
shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
|
||||
volume_stack_enter_exit(kg, state, stack_sd);
|
||||
volume_stack_enter_exit<false>(kg, state, stack_sd);
|
||||
}
|
||||
/* Move ray forward. */
|
||||
volume_ray.tmin = intersection_t_offset(isect.t);
|
||||
|
||||
@@ -53,7 +53,7 @@ ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg
|
||||
|
||||
# ifdef __VOLUME__
|
||||
/* Exit/enter volume. */
|
||||
shadow_volume_stack_enter_exit(kg, state, shadow_sd);
|
||||
volume_stack_enter_exit<true>(kg, state, shadow_sd);
|
||||
# endif
|
||||
|
||||
/* Disable transparent shadows for ray portals */
|
||||
@@ -94,8 +94,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
|
||||
/* `object` is only needed for light tree with light linking, it is irrelevant for shadow. */
|
||||
shader_setup_from_volume(shadow_sd, &ray, OBJECT_NONE);
|
||||
|
||||
VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i));
|
||||
const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
|
||||
const float step_size = volume_stack_step_size<true>(kg, state);
|
||||
|
||||
volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size);
|
||||
}
|
||||
|
||||
@@ -413,7 +413,7 @@ ccl_device
|
||||
|
||||
if (is_transmission) {
|
||||
#ifdef __VOLUME__
|
||||
shadow_volume_stack_enter_exit(kg, shadow_state, sd);
|
||||
volume_stack_enter_exit<true>(kg, shadow_state, sd);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -820,7 +820,7 @@ ccl_device int integrate_surface(KernelGlobals kg,
|
||||
|
||||
if (continue_path_label & LABEL_TRANSMIT) {
|
||||
/* Enter/Exit volume. */
|
||||
volume_stack_enter_exit(kg, state, &sd);
|
||||
volume_stack_enter_exit<false>(kg, state, &sd);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -80,8 +80,7 @@ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
ccl_private Spectrum *ccl_restrict extinction)
|
||||
{
|
||||
VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i))
|
||||
volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass);
|
||||
volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW);
|
||||
|
||||
if (!(sd->flag & SD_EXTINCTION)) {
|
||||
return false;
|
||||
@@ -98,8 +97,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
|
||||
ccl_private VolumeShaderCoefficients *coeff)
|
||||
{
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
|
||||
volume_shader_eval<false>(kg, state, sd, path_flag, volume_read_lambda_pass);
|
||||
volume_shader_eval<false>(kg, state, sd, path_flag);
|
||||
|
||||
if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
|
||||
return false;
|
||||
@@ -1023,8 +1021,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
||||
VOLUME_SAMPLE_DISTANCE;
|
||||
|
||||
/* Step through volume. */
|
||||
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
|
||||
const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
|
||||
const float step_size = volume_stack_step_size<false>(kg, state);
|
||||
|
||||
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
|
||||
/* The current path throughput which is used later to calculate per-segment throughput. */
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "kernel/film/light_passes.h"
|
||||
|
||||
#include "kernel/integrator/guiding.h"
|
||||
#include "kernel/integrator/volume_stack.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
@@ -484,12 +485,11 @@ ccl_device_inline bool volume_shader_eval_entry(KernelGlobals kg,
|
||||
return true;
|
||||
}
|
||||
|
||||
template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
|
||||
template<const bool shadow, typename ConstIntegratorGenericState>
|
||||
ccl_device_inline void volume_shader_eval(KernelGlobals kg,
|
||||
ConstIntegratorGenericState state,
|
||||
ccl_private ShaderData *ccl_restrict sd,
|
||||
const uint32_t path_flag,
|
||||
StackReadOp stack_read)
|
||||
const uint32_t path_flag)
|
||||
{
|
||||
/* If path is being terminated, we are tracing a shadow ray or evaluating
|
||||
* emission, then we don't need to store closures. The emission and shadow
|
||||
@@ -510,7 +510,7 @@ ccl_device_inline void volume_shader_eval(KernelGlobals kg,
|
||||
sd->object_flag = 0;
|
||||
|
||||
for (int i = 0;; i++) {
|
||||
const VolumeStack entry = stack_read(i);
|
||||
const VolumeStack entry = volume_stack_read<shadow>(state, i);
|
||||
if (!volume_shader_eval_entry<shadow, KERNEL_FEATURE_NODE_MASK_VOLUME>(
|
||||
kg, state, sd, entry, path_flag))
|
||||
{
|
||||
|
||||
@@ -8,24 +8,40 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __VOLUME__
|
||||
|
||||
/* Volumetric read/write lambda functions - default implementations */
|
||||
# ifndef VOLUME_READ_LAMBDA
|
||||
# define VOLUME_READ_LAMBDA(function_call) \
|
||||
auto volume_read_lambda_pass = [=](const int i) { return function_call; };
|
||||
# define VOLUME_WRITE_LAMBDA(function_call) \
|
||||
auto volume_write_lambda_pass = [=](const int i, VolumeStack entry) { function_call; };
|
||||
# endif
|
||||
|
||||
/* Volume Stack
|
||||
*
|
||||
* This is an array of object/shared ID's that the current segment of the path
|
||||
* is inside of. */
|
||||
|
||||
template<typename StackReadOp, typename StackWriteOp>
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device_forceinline VolumeStack volume_stack_read(const IntegratorGenericState state,
|
||||
const int i)
|
||||
{
|
||||
if constexpr (shadow) {
|
||||
return integrator_state_read_shadow_volume_stack(state, i);
|
||||
}
|
||||
else {
|
||||
return integrator_state_read_volume_stack(state, i);
|
||||
}
|
||||
}
|
||||
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device_forceinline void volume_stack_write(IntegratorGenericState state,
|
||||
const int i,
|
||||
const VolumeStack entry)
|
||||
{
|
||||
if constexpr (shadow) {
|
||||
integrator_state_write_shadow_volume_stack(state, i, entry);
|
||||
}
|
||||
else {
|
||||
integrator_state_write_volume_stack(state, i, entry);
|
||||
}
|
||||
}
|
||||
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device void volume_stack_enter_exit(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
StackReadOp stack_read,
|
||||
StackWriteOp stack_write)
|
||||
IntegratorGenericState state,
|
||||
const ccl_private ShaderData *sd)
|
||||
{
|
||||
# ifdef __KERNEL_USE_DATA_CONSTANTS__
|
||||
/* If we're using data constants, this fetch disappears.
|
||||
@@ -46,7 +62,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
|
||||
if (sd->flag & SD_BACKFACING) {
|
||||
/* Exit volume object: remove from stack. */
|
||||
for (int i = 0;; i++) {
|
||||
VolumeStack entry = stack_read(i);
|
||||
VolumeStack entry = volume_stack_read<shadow>(state, i);
|
||||
if (entry.shader == SHADER_NONE) {
|
||||
break;
|
||||
}
|
||||
@@ -54,8 +70,8 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
|
||||
if (entry.object == sd->object && entry.shader == sd->shader) {
|
||||
/* Shift back next stack entries. */
|
||||
do {
|
||||
entry = stack_read(i + 1);
|
||||
stack_write(i, entry);
|
||||
entry = volume_stack_read<shadow>(state, i + 1);
|
||||
volume_stack_write<shadow>(state, i, entry);
|
||||
i++;
|
||||
} while (entry.shader != SHADER_NONE);
|
||||
|
||||
@@ -67,7 +83,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
|
||||
/* Enter volume object: add to stack. */
|
||||
uint i;
|
||||
for (i = 0;; i++) {
|
||||
VolumeStack entry = stack_read(i);
|
||||
const VolumeStack entry = volume_stack_read<shadow>(state, i);
|
||||
if (entry.shader == SHADER_NONE) {
|
||||
break;
|
||||
}
|
||||
@@ -86,29 +102,11 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
|
||||
/* Add to the end of the stack. */
|
||||
const VolumeStack new_entry = {sd->object, sd->shader};
|
||||
const VolumeStack empty_entry = {OBJECT_NONE, SHADER_NONE};
|
||||
stack_write(i, new_entry);
|
||||
stack_write(i + 1, empty_entry);
|
||||
volume_stack_write<shadow>(state, i, new_entry);
|
||||
volume_stack_write<shadow>(state, i + 1, empty_entry);
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void volume_stack_enter_exit(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const ccl_private ShaderData *sd)
|
||||
{
|
||||
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
|
||||
VOLUME_WRITE_LAMBDA(integrator_state_write_volume_stack(state, i, entry))
|
||||
volume_stack_enter_exit(kg, sd, volume_read_lambda_pass, volume_write_lambda_pass);
|
||||
}
|
||||
|
||||
ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const ccl_private ShaderData *sd)
|
||||
{
|
||||
VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i))
|
||||
VOLUME_WRITE_LAMBDA(integrator_state_write_shadow_volume_stack(state, i, entry))
|
||||
volume_stack_enter_exit(kg, sd, volume_read_lambda_pass, volume_write_lambda_pass);
|
||||
}
|
||||
|
||||
/* Clean stack after the last bounce.
|
||||
*
|
||||
* It is expected that all volumes are closed manifolds, so at the time when ray
|
||||
@@ -162,13 +160,13 @@ ccl_device_inline bool volume_is_homogeneous(KernelGlobals kg,
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename StackReadOp>
|
||||
ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read)
|
||||
template<const bool shadow, typename IntegratorGenericState>
|
||||
ccl_device float volume_stack_step_size(KernelGlobals kg, const IntegratorGenericState state)
|
||||
{
|
||||
float step_size = FLT_MAX;
|
||||
|
||||
for (int i = 0;; i++) {
|
||||
VolumeStack entry = stack_read(i);
|
||||
const VolumeStack entry = volume_stack_read<shadow>(state, i);
|
||||
if (entry.shader == SHADER_NONE) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -202,14 +202,9 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
|
||||
globals.shade_index = state + 1;
|
||||
}
|
||||
|
||||
/* For surface shaders, we might have an automatic bump shader that needs to be executed before
|
||||
* the main shader to update globals.N. */
|
||||
# if __cplusplus < 201703L
|
||||
if (type == SHADER_TYPE_SURFACE)
|
||||
# else
|
||||
if constexpr (type == SHADER_TYPE_SURFACE)
|
||||
# endif
|
||||
{
|
||||
/* For surface shaders, we might have an automatic bump shader that needs to be executed before
|
||||
* the main shader to update globals.N. */
|
||||
if constexpr (type == SHADER_TYPE_SURFACE) {
|
||||
if (sd->flag & SD_HAS_BUMP) {
|
||||
/* Save state. */
|
||||
const float3 P = sd->P;
|
||||
@@ -269,11 +264,7 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
|
||||
/* interactive_params_ptr */ (void *)nullptr);
|
||||
# endif
|
||||
|
||||
# if __cplusplus < 201703L
|
||||
if (type == SHADER_TYPE_DISPLACEMENT) {
|
||||
# else
|
||||
if constexpr (type == SHADER_TYPE_DISPLACEMENT) {
|
||||
# endif
|
||||
sd->P = globals.P;
|
||||
}
|
||||
else if (globals.Ci) {
|
||||
|
||||
@@ -158,19 +158,10 @@ CCL_NAMESPACE_BEGIN
|
||||
KERNEL_FEATURE_NODE_BUMP_STATE | KERNEL_FEATURE_NODE_PORTAL)
|
||||
#define KERNEL_FEATURE_NODE_MASK_BUMP KERNEL_FEATURE_NODE_MASK_DISPLACEMENT
|
||||
|
||||
/* Must be constexpr on the CPU to avoid compile errors because the state types
|
||||
* are different depending on the main, shadow or null path. For GPU we don't have
|
||||
* C++17 everywhere so need to check it. */
|
||||
#if __cplusplus < 201703L
|
||||
# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
|
||||
# define IF_KERNEL_NODES_FEATURE(feature) \
|
||||
if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
|
||||
#else
|
||||
# define IF_KERNEL_FEATURE(feature) \
|
||||
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
|
||||
# define IF_KERNEL_NODES_FEATURE(feature) \
|
||||
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
|
||||
#endif
|
||||
#define IF_KERNEL_FEATURE(feature) \
|
||||
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
|
||||
#define IF_KERNEL_NODES_FEATURE(feature) \
|
||||
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
|
||||
|
||||
/* Kernel features */
|
||||
#define __AO__
|
||||
|
||||
Reference in New Issue
Block a user