In detail:
- Direct accesses of state attributes are replaced with the INTEGRATOR_STATE and INTEGRATOR_STATE_WRITE macros.
- Unified the checks for the __PATH_GUIDING define to use # if defined (__PATH_GUIDING__).
- Even if __PATH_GUIDING__ is defined, we now check if the feature is enabled using if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) {. This is important for later GPU ports.
- The kernel usage of the guiding field, surface, and volume sampling distributions is wrapped behind macros for each specific device (atm only CPU). This will make it easier for a GPU port later.
236 lines
10 KiB
C
236 lines
10 KiB
C
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
#pragma once
|
|
|
|
#include "kernel/globals.h"
|
|
#include "kernel/types.h"
|
|
|
|
#include "kernel/integrator/state.h"
|
|
|
|
#include "util/atomic.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
/* Control Flow
|
|
*
|
|
* Utilities for control flow between kernels. The implementation is different between CPU and
|
|
* GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
|
|
*
|
|
* There is a main path for regular path tracing camera for path tracing. Shadows for next
|
|
* event estimation branch off from this into their own path, that may be computed in
|
|
* parallel while the main path continues. Additionally, shading kernels are sorted using
|
|
* a key for coherence.
|
|
*
|
|
* Each kernel on the main path must call one of these functions. These may not be called
|
|
* multiple times from the same kernel.
|
|
*
|
|
* integrator_path_init(kg, state, next_kernel)
|
|
* integrator_path_next(kg, state, current_kernel, next_kernel)
|
|
* integrator_path_terminate(kg, state, current_kernel)
|
|
*
|
|
* For the shadow path similar functions are used, and again each shadow kernel must call
|
|
* one of them, and only once.
|
|
*/
|
|
|
|
ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
|
|
{
|
|
return INTEGRATOR_STATE(state, path, queued_kernel) == 0;
|
|
}
|
|
|
|
ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
|
|
{
|
|
return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0;
|
|
}
|
|
|
|
#ifdef __KERNEL_GPU__
|
|
|
|
ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel next_kernel)
|
|
{
|
|
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel current_kernel,
|
|
const DeviceKernel next_kernel)
|
|
{
|
|
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
|
1);
|
|
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel current_kernel)
|
|
{
|
|
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
|
1);
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
|
}
|
|
|
|
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
|
|
KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
|
|
{
|
|
IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32(
|
|
&kernel_integrator_state.next_shadow_path_index[0], 1);
|
|
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
|
# if defined(__PATH_GUIDING__)
|
|
if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) {
|
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
|
|
}
|
|
# endif
|
|
return shadow_state;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
|
|
IntegratorShadowState state,
|
|
const DeviceKernel current_kernel,
|
|
const DeviceKernel next_kernel)
|
|
{
|
|
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
|
1);
|
|
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
|
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
|
|
IntegratorShadowState state,
|
|
const DeviceKernel current_kernel)
|
|
{
|
|
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
|
1);
|
|
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
|
}
|
|
|
|
/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
|
|
# define INTEGRATOR_SORT_KEY(key, state) \
|
|
(key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
|
|
|
|
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel next_kernel,
|
|
const uint32_t key)
|
|
{
|
|
const int key_ = INTEGRATOR_SORT_KEY(key, state);
|
|
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
|
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
|
|
|
|
# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
|
if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
|
|
return;
|
|
}
|
|
# endif
|
|
|
|
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel current_kernel,
|
|
const DeviceKernel next_kernel,
|
|
const uint32_t key)
|
|
{
|
|
const int key_ = INTEGRATOR_SORT_KEY(key, state);
|
|
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
|
1);
|
|
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
|
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
|
|
|
|
# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
|
|
if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
|
|
return;
|
|
}
|
|
# endif
|
|
|
|
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
|
|
}
|
|
|
|
#else
|
|
|
|
ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel next_kernel)
|
|
{
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel next_kernel,
|
|
const uint32_t key)
|
|
{
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
|
(void)key;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel current_kernel,
|
|
const DeviceKernel next_kernel)
|
|
{
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
|
(void)current_kernel;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel current_kernel)
|
|
{
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
|
(void)current_kernel;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
|
|
IntegratorState state,
|
|
const DeviceKernel current_kernel,
|
|
const DeviceKernel next_kernel,
|
|
const uint32_t key)
|
|
{
|
|
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
|
(void)key;
|
|
(void)current_kernel;
|
|
}
|
|
|
|
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
|
|
KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
|
|
{
|
|
IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
|
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
|
# if defined(__PATH_GUIDING__)
|
|
if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) {
|
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
|
|
}
|
|
# endif
|
|
return shadow_state;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
|
|
IntegratorShadowState state,
|
|
const DeviceKernel current_kernel,
|
|
const DeviceKernel next_kernel)
|
|
{
|
|
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
|
|
(void)current_kernel;
|
|
}
|
|
|
|
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
|
|
IntegratorShadowState state,
|
|
const DeviceKernel current_kernel)
|
|
{
|
|
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
|
(void)current_kernel;
|
|
}
|
|
|
|
#endif
|
|
|
|
CCL_NAMESPACE_END
|