Files
test2/intern/cycles/kernel/integrator/state_flow.h
Sebastian Herholz 5abf42012d Cycles: Guiding cleaning up and refactoring the guiding code
In detail:
- Direct accesses of state attributes are replaced with the INTEGRATOR_STATE and INTEGRATOR_STATE_WRITE macros.
- Unified the checks for the __PATH_GUIDING define to use #  if defined (__PATH_GUIDING__).
- Even if __PATH_GUIDING__ is defined, we now check if the feature is enabled using if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) {. This is important for later GPU ports.
- The kernel usage of the guiding field, surface, and volume sampling distributions is wrapped behind macros for each specific device (atm only CPU). This will make it easier for a GPU port later.
2025-05-22 13:46:30 +02:00

236 lines
10 KiB
C

/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#pragma once
#include "kernel/globals.h"
#include "kernel/types.h"
#include "kernel/integrator/state.h"
#include "util/atomic.h"
CCL_NAMESPACE_BEGIN
/* Control Flow
*
* Utilities for control flow between kernels. The implementation is different between CPU and
* GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
*
* There is a main path for regular path tracing camera for path tracing. Shadows for next
* event estimation branch off from this into their own path, that may be computed in
* parallel while the main path continues. Additionally, shading kernels are sorted using
* a key for coherence.
*
* Each kernel on the main path must call one of these functions. These may not be called
* multiple times from the same kernel.
*
* integrator_path_init(kg, state, next_kernel)
* integrator_path_next(kg, state, current_kernel, next_kernel)
* integrator_path_terminate(kg, state, current_kernel)
*
* For the shadow path similar functions are used, and again each shadow kernel must call
* one of them, and only once.
*/
ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
{
return INTEGRATOR_STATE(state, path, queued_kernel) == 0;
}
ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
{
return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0;
}
#ifdef __KERNEL_GPU__
ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
IntegratorState state,
const DeviceKernel next_kernel)
{
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
}
ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
IntegratorState state,
const DeviceKernel current_kernel,
const DeviceKernel next_kernel)
{
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
1);
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
}
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
IntegratorState state,
const DeviceKernel current_kernel)
{
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
1);
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
}
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
{
IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32(
&kernel_integrator_state.next_shadow_path_index[0], 1);
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
# if defined(__PATH_GUIDING__)
if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) {
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
}
# endif
return shadow_state;
}
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
IntegratorShadowState state,
const DeviceKernel current_kernel,
const DeviceKernel next_kernel)
{
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
1);
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
}
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
IntegratorShadowState state,
const DeviceKernel current_kernel)
{
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
1);
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
}
/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
# define INTEGRATOR_SORT_KEY(key, state) \
(key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
IntegratorState state,
const DeviceKernel next_kernel,
const uint32_t key)
{
const int key_ = INTEGRATOR_SORT_KEY(key, state);
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
return;
}
# endif
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
}
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
IntegratorState state,
const DeviceKernel current_kernel,
const DeviceKernel next_kernel,
const uint32_t key)
{
const int key_ = INTEGRATOR_SORT_KEY(key, state);
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
1);
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
return;
}
# endif
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
}
#else
ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
IntegratorState state,
const DeviceKernel next_kernel)
{
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
}
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
IntegratorState state,
const DeviceKernel next_kernel,
const uint32_t key)
{
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
(void)key;
}
ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
IntegratorState state,
const DeviceKernel current_kernel,
const DeviceKernel next_kernel)
{
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
(void)current_kernel;
}
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
IntegratorState state,
const DeviceKernel current_kernel)
{
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
(void)current_kernel;
}
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
IntegratorState state,
const DeviceKernel current_kernel,
const DeviceKernel next_kernel,
const uint32_t key)
{
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
(void)key;
(void)current_kernel;
}
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
{
IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
# if defined(__PATH_GUIDING__)
if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) {
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
}
# endif
return shadow_state;
}
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
IntegratorShadowState state,
const DeviceKernel current_kernel,
const DeviceKernel next_kernel)
{
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
(void)current_kernel;
}
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
IntegratorShadowState state,
const DeviceKernel current_kernel)
{
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
(void)current_kernel;
}
#endif
CCL_NAMESPACE_END