Files
test/intern/cycles/kernel/kernel_id_passes.h
Brecht Van Lommel 0803119725 Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.

Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.

Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles

Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)

For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.

Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-21 14:55:54 +02:00

107 lines
3.8 KiB
C

/*
* Copyright 2018 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
CCL_NAMESPACE_BEGIN
/* Element of ID pass stored in the render buffers.
* It is `float2` semantically, but it must be unaligned since the offset of ID passes in the
* render buffers might not meet expected by compiler alignment. */
typedef struct IDPassBufferElement {
float x;
float y;
} IDPassBufferElement;
ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
int num_slots,
float id,
float weight)
{
kernel_assert(id != ID_NONE);
if (weight == 0.0f) {
return;
}
for (int slot = 0; slot < num_slots; slot++) {
ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
#ifdef __ATOMIC_PASS_WRITE__
/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
if (id_buffer[slot].x == ID_NONE) {
/* Use an atomic to claim this slot.
* If a different thread got here first, try again from this slot on. */
float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
if (old_id != ID_NONE && old_id != id) {
continue;
}
atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
break;
}
/* If there already is a slot for that ID, add the weight.
* If no slot was found, add it to the last. */
else if (id_buffer[slot].x == id || slot == num_slots - 1) {
atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
break;
}
#else /* __ATOMIC_PASS_WRITE__ */
/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
if (id_buffer[slot].x == ID_NONE) {
id_buffer[slot].x = id;
id_buffer[slot].y = weight;
break;
}
/* If there already is a slot for that ID, add the weight.
* If no slot was found, add it to the last. */
else if (id_buffer[slot].x == id || slot == num_slots - 1) {
id_buffer[slot].y += weight;
break;
}
#endif /* __ATOMIC_PASS_WRITE__ */
}
}
ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
{
ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
for (int slot = 1; slot < num_slots; ++slot) {
if (id_buffer[slot].x == ID_NONE) {
return;
}
/* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
int i = slot;
while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
const IDPassBufferElement swap = id_buffer[i];
id_buffer[i] = id_buffer[i - 1];
id_buffer[i - 1] = swap;
--i;
}
}
}
/* post-sorting for Cryptomatte */
ccl_device_inline void kernel_cryptomatte_post(const KernelGlobals *kg,
ccl_global float *render_buffer,
int pixel_index)
{
const int pass_stride = kernel_data.film.pass_stride;
const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride;
ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset +
kernel_data.film.pass_cryptomatte;
kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
}
CCL_NAMESPACE_END