Files
test2/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
Hugh Delaney 930a942dd0 Refactor: Cycles: Move block sizes into common header
This change puts all the block size macros in the same common header, so
they can be included in host side code without needing to also include
the kernels that are defined in the device headers that contained these
values.

This change also removes a magic number used to enqueue a kernel, which
happened to agree with the GPU_PARALLEL_SORT_BLOCK_SIZE macro.

Pull Request: https://projects.blender.org/blender/blender/pulls/143646
2025-08-01 13:26:02 +02:00

37 lines
895 B
C

/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#pragma once
CCL_NAMESPACE_BEGIN
/* Parallel prefix sum.
*
* TODO: actually make this work in parallel.
*
* This is used for an array the size of the number of shaders in the scene
* which is not usually huge, so might not be a significant bottleneck. */
#include "util/atomic.h"
__device__ void gpu_parallel_prefix_sum(const int global_id,
ccl_global int *counter,
ccl_global int *prefix_sum,
const int num_values)
{
if (global_id != 0) {
return;
}
int offset = 0;
for (int i = 0; i < num_values; i++) {
const int new_offset = offset + counter[i];
prefix_sum[i] = offset;
counter[i] = 0;
offset = new_offset;
}
}
CCL_NAMESPACE_END