Files
test2/source/blender/draw/intern/DRW_gpu_wrapper.hh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1340 lines
35 KiB
C++
Raw Normal View History

/* SPDX-FileCopyrightText: 2022 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
/** \file
* \ingroup draw
*
* Wrapper classes that make it easier to use GPU objects in C++.
*
* All Buffers need to be sent to GPU memory before being used. This is done by using the
* `push_update()`.
*
* A Storage[Array]Buffer can hold much more data than a Uniform[Array]Buffer
* which can only holds 16KB of data.
*
* All types are not copyable and Buffers are not Movable.
*
* `draw::UniformArrayBuffer<T, len>`
* Uniform buffer object containing an array of T with len elements.
* Data can be accessed using the [] operator.
*
* `draw::UniformBuffer<T>`
* A uniform buffer object class inheriting from T.
* Data can be accessed just like a normal T object.
*
* `draw::StorageArrayBuffer<T, len>`
* Storage buffer object containing an array of T with len elements.
* The item count can be changed after creation using `resize()`.
* However, this requires the invalidation of the whole buffer and
* discarding all data inside it.
* Data can be accessed using the [] operator.
*
* `draw::StorageVectorBuffer<T, len>`
* Same as `StorageArrayBuffer` but has a length counter and act like a `blender::Vector` you can
* clear and append to.
*
* `draw::StorageBuffer<T>`
* A storage buffer object class inheriting from T.
* Data can be accessed just like a normal T object.
*
* `draw::Texture`
* A simple wrapper to #GPUTexture. A #draw::Texture can be created without allocation.
* The `ensure_[1d|2d|3d|cube][_array]()` method is here to make sure the underlying texture
* will meet the requirements and create (or recreate) the #GPUTexture if needed.
*
* `draw::TextureFromPool`
* A GPUTexture from the viewport texture pool. This texture can be shared with other engines
* and its content is undefined when acquiring it.
* A #draw::TextureFromPool is acquired for rendering using `acquire()` and released once the
* rendering is done using `release()`. The same texture can be acquired & released multiple
* time in one draw loop.
* The `sync()` method *MUST* be called once during the cache populate (aka: Sync) phase.
*
* `draw::Framebuffer`
* Simple wrapper to #GPUFramebuffer that can be moved.
*/
2024-01-05 11:16:57 -05:00
#include "DRW_render.hh"
#include "MEM_guardedalloc.h"
#include "draw_manager_c.hh"
#include "draw_texture_pool.hh"
#include "BKE_global.hh"
#include "BLI_math_vector_types.hh"
#include "BLI_span.hh"
#include "BLI_utildefines.h"
#include "BLI_utility_mixins.hh"
#include "BLI_vector.hh"
#include "GPU_framebuffer.hh"
#include "GPU_storage_buffer.hh"
#include "GPU_texture.hh"
#include "GPU_uniform_buffer.hh"
namespace blender::draw {
/* -------------------------------------------------------------------- */
/** \name Implementation Details
* \{ */
namespace detail {
template<
/** Type of the values stored in this uniform buffer. */
typename T,
/** The number of values that can be stored in this uniform buffer. */
int64_t len,
/** True if the buffer only resides on GPU memory and cannot be accessed. */
bool device_only>
class DataBuffer {
protected:
T *data_ = nullptr;
int64_t len_ = len;
BLI_STATIC_ASSERT(((sizeof(T) * len) % 16) == 0,
"Buffer size need to be aligned to size of float4.");
public:
/**
* Get the value at the given index. This invokes undefined behavior when the
* index is out of bounds.
*/
const T &operator[](int64_t index) const
{
BLI_STATIC_ASSERT(!device_only, "");
BLI_assert(index >= 0);
BLI_assert(index < len_);
return data_[index];
}
T &operator[](int64_t index)
{
BLI_STATIC_ASSERT(!device_only, "");
BLI_assert(index >= 0);
BLI_assert(index < len_);
return data_[index];
}
/**
* Get a pointer to the beginning of the array.
*/
const T *data() const
{
BLI_STATIC_ASSERT(!device_only, "");
return data_;
}
T *data()
{
BLI_STATIC_ASSERT(!device_only, "");
return data_;
}
/**
* Iterator
*/
const T *begin() const
{
BLI_STATIC_ASSERT(!device_only, "");
return data_;
}
const T *end() const
{
BLI_STATIC_ASSERT(!device_only, "");
return data_ + len_;
}
T *begin()
{
BLI_STATIC_ASSERT(!device_only, "");
return data_;
}
T *end()
{
BLI_STATIC_ASSERT(!device_only, "");
return data_ + len_;
}
operator Span<T>() const
{
BLI_STATIC_ASSERT(!device_only, "");
return Span<T>(data_, len_);
}
};
template<typename T, int64_t len, bool device_only>
class UniformCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable {
protected:
GPUUniformBuf *ubo_;
#ifndef NDEBUG
const char *name_ = typeid(T).name();
#else
const char *name_ = "UniformBuffer";
#endif
public:
UniformCommon(const char *name = nullptr)
{
if (name) {
name_ = name;
}
ubo_ = GPU_uniformbuf_create_ex(sizeof(T) * len, nullptr, name_);
}
~UniformCommon()
{
GPU_uniformbuf_free(ubo_);
}
void push_update()
{
GPU_uniformbuf_update(ubo_, this->data_);
}
/* To be able to use it with DRW_shgroup_*_ref(). */
operator GPUUniformBuf *() const
{
return ubo_;
}
/* To be able to use it with DRW_shgroup_*_ref(). */
GPUUniformBuf **operator&()
{
return &ubo_;
}
};
template<typename T, int64_t len, bool device_only>
class StorageCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable {
protected:
GPUStorageBuf *ssbo_;
#ifndef NDEBUG
const char *name_ = typeid(T).name();
#else
const char *name_ = "StorageBuffer";
#endif
public:
StorageCommon(const char *name = nullptr)
{
if (name) {
name_ = name;
}
this->len_ = len;
constexpr GPUUsageType usage = device_only ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_DYNAMIC;
ssbo_ = GPU_storagebuf_create_ex(sizeof(T) * this->len_, nullptr, usage, this->name_);
}
~StorageCommon()
{
GPU_storagebuf_free(ssbo_);
}
void push_update()
{
BLI_assert(device_only == false);
GPU_storagebuf_update(ssbo_, this->data_);
}
void clear_to_zero()
{
GPU_storagebuf_clear_to_zero(ssbo_);
}
GPU: Add explicit API to sync storage buffer back to host PR Introduces GPU_storagebuf_sync_to_host as an explicit routine to flush GPU-resident storage buffer memory back to the host within the GPU command stream. The previous implmentation relied on implicit synchronization of resources using OpenGL barriers which does not match the paradigm of explicit APIs, where indiviaul resources may need to be tracked. This patch ensures GPU_storagebuf_read can be called without stalling the GPU pipeline while work finishes executing. There are two possible use cases: 1) If GPU_storagebuf_read is called AFTER an explicit call to GPU_storagebuf_sync_to_host, the read will be synchronized. If the dependent work is still executing on the GPU, the host will stall until GPU work has completed and results are available. 2) If GPU_storagebuf_read is called WITHOUT an explicit call to GPU_storagebuf_sync_to_host, the read will be asynchronous and whatever memory is visible to the host at that time will be used. (This is the same as assuming a sync event has already been signalled.) This patch also addresses a gap in the Metal implementation where there was missing read support for GPU-only storage buffers. This routine now uses a staging buffer to copy results if no host-visible buffer was available. Reading from a GPU-only storage buffer will always stall the host, as it is not possible to pre-flush results, as no host-resident buffer is available. Authored by Apple: Michael Parkin-White Pull Request: https://projects.blender.org/blender/blender/pulls/113456
2023-10-20 17:04:36 +02:00
void async_flush_to_host()
{
GPU_storagebuf_sync_to_host(ssbo_);
}
DRWManager: New implementation. This is a new implementation of the draw manager using modern rendering practices and GPU driven culling. This only ports features that are not considered deprecated or to be removed. The old DRW API is kept working along side this new one, and does not interfeer with it. However this needed some more hacking inside the draw_view_lib.glsl. At least the create info are well separated. The reviewer might start by looking at `draw_pass_test.cc` to see the API in usage. Important files are `draw_pass.hh`, `draw_command.hh`, `draw_command_shared.hh`. In a nutshell (for a developper used to old DRW API): - `DRWShadingGroups` are replaced by `Pass<T>::Sub`. - Contrary to DRWShadingGroups, all commands recorded inside a pass or sub-pass (even binds / push_constant / uniforms) will be executed in order. - All memory is managed per object (except for Sub-Pass which are managed by their parent pass) and not from draw manager pools. So passes "can" potentially be recorded once and submitted multiple time (but this is not really encouraged for now). The only implicit link is between resource lifetime and `ResourceHandles` - Sub passes can be any level deep. - IMPORTANT: All state propagate from sub pass to subpass. There is no state stack concept anymore. Ensure the correct render state is set before drawing anything using `Pass::state_set()`. - The drawcalls now needs a `ResourceHandle` instead of an `Object *`. This is to remove any implicit dependency between `Pass` and `Manager`. This was a huge problem in old implementation since the manager did not know what to pull from the object. Now it is explicitly requested by the engine. - The pases need to be submitted to a `draw::Manager` instance which can be retrieved using `DRW_manager_get()` (for now). Internally: - All object data are stored in contiguous storage buffers. Removing a lot of complexity in the pass submission. - Draw calls are sorted and visibility tested on GPU. Making more modern culling and better instancing usage possible in the future. - Unit Tests have been added for regression testing and avoid most API breakage. - `draw::View` now contains culling data for all objects in the scene allowing caching for multiple views. - Bounding box and sphere final setup is moved to GPU. - Some global resources locations have been hardcoded to reduce complexity. What is missing: - ~~Workaround for lack of gl_BaseInstanceARB.~~ Done - ~~Object Uniform Attributes.~~ Done (Not in this patch) - Workaround for hardware supporting a maximum of 8 SSBO. Reviewed By: jbakker Differential Revision: https://developer.blender.org/D15817
2022-09-02 18:30:48 +02:00
void read()
{
GPU_storagebuf_read(ssbo_, this->data_);
}
operator GPUStorageBuf *() const
{
return ssbo_;
}
/* To be able to use it with DRW_shgroup_*_ref(). */
GPUStorageBuf **operator&()
{
return &ssbo_;
}
};
} // namespace detail
/** \} */
/* -------------------------------------------------------------------- */
/** \name Uniform Buffers
* \{ */
template<
/** Type of the values stored in this uniform buffer. */
typename T,
/** The number of values that can be stored in this uniform buffer. */
int64_t len
/** True if the buffer only resides on GPU memory and cannot be accessed. */
/* TODO(@fclem): Currently unsupported. */
/* bool device_only = false */>
class UniformArrayBuffer : public detail::UniformCommon<T, len, false> {
public:
UniformArrayBuffer(const char *name = nullptr) : detail::UniformCommon<T, len, false>(name)
{
/* TODO(@fclem): We should map memory instead. */
this->data_ = (T *)MEM_mallocN_aligned(len * sizeof(T), 16, this->name_);
}
~UniformArrayBuffer()
{
MEM_freeN(this->data_);
}
};
template<
/** Type of the values stored in this uniform buffer. */
typename T
/** True if the buffer only resides on GPU memory and cannot be accessed. */
/* TODO(@fclem): Currently unsupported. */
/* bool device_only = false */>
class UniformBuffer : public T, public detail::UniformCommon<T, 1, false> {
public:
UniformBuffer(const char *name = nullptr) : detail::UniformCommon<T, 1, false>(name)
{
/* TODO(@fclem): How could we map this? */
this->data_ = static_cast<T *>(this);
}
UniformBuffer<T> &operator=(const T &other)
{
*static_cast<T *>(this) = other;
return *this;
}
};
/** \} */
/* -------------------------------------------------------------------- */
/** \name Storage Buffer
* \{ */
template<
/** Type of the values stored in this uniform buffer. */
typename T,
/** The number of values that can be stored in this storage buffer at creation. */
int64_t len = (512u + (sizeof(T) - 1)) / sizeof(T),
/** True if created on device and no memory host memory is allocated. */
bool device_only = false>
class StorageArrayBuffer : public detail::StorageCommon<T, len, device_only> {
public:
StorageArrayBuffer(const char *name = nullptr) : detail::StorageCommon<T, len, device_only>(name)
{
/* TODO(@fclem): We should map memory instead. */
this->data_ = (T *)MEM_mallocN_aligned(len * sizeof(T), 16, this->name_);
}
~StorageArrayBuffer()
{
MEM_freeN(this->data_);
}
/* Resize to \a new_size elements. */
void resize(int64_t new_size)
{
BLI_assert(new_size > 0);
if (new_size != this->len_) {
/* Manual realloc since MEM_reallocN_aligned does not exists. */
T *new_data_ = (T *)MEM_mallocN_aligned(new_size * sizeof(T), 16, this->name_);
memcpy(new_data_, this->data_, min_uu(this->len_, new_size) * sizeof(T));
MEM_freeN(this->data_);
this->data_ = new_data_;
GPU_storagebuf_free(this->ssbo_);
this->len_ = new_size;
constexpr GPUUsageType usage = device_only ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_DYNAMIC;
this->ssbo_ = GPU_storagebuf_create_ex(sizeof(T) * this->len_, nullptr, usage, this->name_);
}
}
/* Resize on access. */
T &get_or_resize(int64_t index)
{
BLI_assert(index >= 0);
if (index >= this->len_) {
size_t size = power_of_2_max_u(index + 1);
this->resize(size);
}
return this->data_[index];
}
/*
* Ensure the allocated size is not much larger than the currently required size,
* using the same heuristic as `get_or_resize`.
*/
void trim_to_next_power_of_2(int64_t required_size)
{
/* Don't go below the size used at creation. */
required_size = std::max(required_size, len);
size_t target_size = power_of_2_max_u(required_size);
if (this->len_ > target_size) {
this->resize(target_size);
}
}
int64_t size() const
{
return this->len_;
}
MutableSpan<T> as_span() const
{
return {this->data_, this->len_};
}
static void swap(StorageArrayBuffer &a, StorageArrayBuffer &b)
{
std::swap(a.data_, b.data_);
std::swap(a.ssbo_, b.ssbo_);
std::swap(a.len_, b.len_);
std::swap(a.name_, b.name_);
}
};
template<
/** Type of the values stored in this uniform buffer. */
typename T,
/** The number of values that can be stored in this storage buffer at creation. */
int64_t len = (512u + (sizeof(T) - 1)) / sizeof(T)>
class StorageVectorBuffer : public StorageArrayBuffer<T, len, false> {
private:
/* Number of items, not the allocated length. */
int64_t item_len_ = 0;
public:
StorageVectorBuffer(const char *name = nullptr) : StorageArrayBuffer<T, len, false>(name){};
~StorageVectorBuffer(){};
/**
* Set item count to zero but does not free memory or resize the buffer.
*/
void clear()
{
item_len_ = 0;
}
/**
* Set item count to zero
* and trim the buffer if current size is much larger than the current item count.
*/
void clear_and_trim()
{
this->trim_to_next_power_of_2(item_len_);
clear();
}
/**
* Insert a new element at the end of the vector.
* This might cause a reallocation with the capacity is exceeded.
*
* This is similar to std::vector::push_back.
*/
void append(const T &value)
{
this->append_as(value);
}
void append(T &&value)
{
this->append_as(std::move(value));
}
template<typename... ForwardT> void append_as(ForwardT &&...value)
{
if (item_len_ >= this->len_) {
size_t size = power_of_2_max_u(item_len_ + 1);
this->resize(size);
}
T *ptr = &this->data_[item_len_++];
new (ptr) T(std::forward<ForwardT>(value)...);
}
void extend(const Span<T> values)
{
/* TODO(fclem): Optimize to a single memcpy. */
for (auto v : values) {
this->append(v);
}
}
int64_t size() const
{
return item_len_;
}
bool is_empty() const
{
return this->size() == 0;
}
/* Avoid confusion with the other clear. */
void clear_to_zero() = delete;
static void swap(StorageVectorBuffer &a, StorageVectorBuffer &b)
{
StorageArrayBuffer<T, len, false>::swap(a, b);
std::swap(a.item_len_, b.item_len_);
}
};
template<
/** Type of the values stored in this uniform buffer. */
typename T,
/** True if created on device and no memory host memory is allocated. */
bool device_only = false>
class StorageBuffer : public T, public detail::StorageCommon<T, 1, device_only> {
public:
StorageBuffer(const char *name = nullptr) : detail::StorageCommon<T, 1, device_only>(name)
{
/* TODO(@fclem): How could we map this? */
this->data_ = static_cast<T *>(this);
}
StorageBuffer<T> &operator=(const T &other)
{
*static_cast<T *>(this) = other;
return *this;
}
static void swap(StorageBuffer<T> &a, StorageBuffer<T> &b)
{
/* Swap content, but not `data_` pointers since they point to `this`. */
SWAP(T, static_cast<T>(a), static_cast<T>(b));
std::swap(a.ssbo_, b.ssbo_);
}
};
/** \} */
/* -------------------------------------------------------------------- */
/** \name Texture
* \{ */
class Texture : NonCopyable {
protected:
GPUTexture *tx_ = nullptr;
GPUTexture *stencil_view_ = nullptr;
Vector<GPUTexture *, 0> mip_views_;
Vector<GPUTexture *, 0> layer_views_;
GPUTexture *layer_range_view_ = nullptr;
const char *name_;
public:
Texture(const char *name = "gpu::Texture") : name_(name) {}
Texture(const char *name,
eGPUTextureFormat format,
eGPUTextureUsage usage,
int extent,
float *data = nullptr,
bool cubemap = false,
int mip_len = 1)
: name_(name)
{
tx_ = create(extent, 0, 0, mip_len, format, usage, data, false, cubemap);
}
Texture(const char *name,
eGPUTextureFormat format,
eGPUTextureUsage usage,
int extent,
int layers,
float *data = nullptr,
bool cubemap = false,
int mip_len = 1)
: name_(name)
{
tx_ = create(extent, layers, 0, mip_len, format, usage, data, true, cubemap);
}
Texture(const char *name,
eGPUTextureFormat format,
eGPUTextureUsage usage,
int2 extent,
float *data = nullptr,
int mip_len = 1)
: name_(name)
{
tx_ = create(UNPACK2(extent), 0, mip_len, format, usage, data, false, false);
}
Texture(const char *name,
eGPUTextureFormat format,
eGPUTextureUsage usage,
int2 extent,
int layers,
float *data = nullptr,
int mip_len = 1)
: name_(name)
{
tx_ = create(UNPACK2(extent), layers, mip_len, format, usage, data, true, false);
}
Texture(const char *name,
eGPUTextureFormat format,
eGPUTextureUsage usage,
int3 extent,
float *data = nullptr,
int mip_len = 1)
: name_(name)
{
tx_ = create(UNPACK3(extent), mip_len, format, usage, data, false, false);
}
Texture(Texture &&other) = default;
~Texture()
{
free();
}
/* To be able to use it with DRW_shgroup_uniform_texture(). */
operator GPUTexture *() const
{
BLI_assert(tx_ != nullptr);
return tx_;
}
/* To be able to use it with DRW_shgroup_uniform_texture_ref(). */
GPUTexture **operator&()
{
return &tx_;
}
EEVEE-Next: Ray-tracing Denoise Pipeline This is a full rewrite of the raytracing denoise pipeline. It uses the same principle as before but now uses compute shaders for every stages and a tile base approach. More aggressive filtering is needed since we are moving towards having no prefiltered screen radiance buffer. Thus we introduce a temporal denoise and a bilateral denoise stage to the denoising. These are optionnal and can be disabled. Note that this patch does not include any tracing part and only samples the reflection probes. It is focused on denoising only. Tracing will come in another PR. The motivation for this is that having hardware raytracing support means we can't prefilter the radiance in screen space so we have to have better denoising. Also this means we can have better surface appearance with support for other BxDF model than GGX. Also GGX support is improved. Technically, the new denoising fixes some implementation mistake the old pipeline did. It separates all 3 stages (spatial, temporal, bilateral) and use random sampling for all stages hoping to create a noisy enough (but still stable) output so that the TAA soaks the remaining noise. However that's not always the case. Depending on the nature of the scene, the input can be very high frequency and might create lots of flickering. That why another solution needs to be found for the higher roughness material as denoising them becomes expensive and low quality. Pull Request: https://projects.blender.org/blender/blender/pulls/110117
2023-08-03 15:32:06 +02:00
/** WORKAROUND: used when needing a ref to the Texture and not the GPUTexture. */
Texture *ptr()
{
return this;
}
Texture &operator=(Texture &&a)
{
if (this != std::addressof(a)) {
this->free();
this->tx_ = a.tx_;
this->name_ = a.name_;
this->stencil_view_ = a.stencil_view_;
this->layer_range_view_ = a.layer_range_view_;
this->mip_views_ = std::move(a.mip_views_);
this->layer_views_ = std::move(a.layer_views_);
a.tx_ = nullptr;
a.name_ = nullptr;
a.stencil_view_ = nullptr;
a.layer_range_view_ = nullptr;
a.mip_views_.clear();
a.layer_views_.clear();
}
return *this;
}
/**
* Ensure the texture has the correct properties. Recreating it if needed.
* Return true if a texture has been created.
*/
bool ensure_1d(eGPUTextureFormat format,
int extent,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL,
float *data = nullptr,
int mip_len = 1)
{
return ensure_impl(extent, 0, 0, mip_len, format, usage, data, false, false);
}
/**
* Ensure the texture has the correct properties. Recreating it if needed.
* Return true if a texture has been created.
*/
bool ensure_1d_array(eGPUTextureFormat format,
int extent,
int layers,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL,
float *data = nullptr,
int mip_len = 1)
{
EEVEE-Next: Shadow Rendering Refactor Split shadow rendering per LOD per tilemap and improve fragment shader invocation rate by using multi-viewport. Also changes the layout of the atlas to be 4 x 4 x Layers. This allow to grow the atlas while keeping the content and page indirection correct, but this isn't implemented in this patch. # First attempt Shadow rendering using atomic proved to be less than ideal and performance were not quite to an acceptable level. The previous method had issue with atomic contention when a lot of triangle would overlap and too many fragment shader invocations with quite complex indirection rules and biases which made the technique costly. The new implementation leverage multi viewport and layered rendeing to effectively replace the need for atomic and render directly to the shadow atlas. Using the well supported extension these are free on modern hardware and do not need a geometry shader. One view per tile is needed since we use the viewport index and the layer index as a way to index a specific tile in the array. # Geometric Complexity Problem The counterpart of this is that we need to draw one geometry instance per tile which is 32x32 time more instances (at most) than with the previous method. This means that we will have to find a way to mitigate this geometry cost by either reducing the number of tiles per tilemaps (in other words, making the system less memory efficient) or splitting complex objects' geometry into smaller, more cull friendly chunks (for example, like the sculpt PBVH nodes). The later seems to be a longer term solution as it requires way too much engineering time we have right now. # Update Lag Problem This also mean we can only update up to 64 tile per redraw which is not enough even in the most basic cases. This leads to missing or over shadowing when a light updates until there is no updates and the shadow rendering can catch up. One possible solution is to update a lower LODs first waiting until there is no update to render. This would allow no artifact during the transforms (unless there is too many light updates even for lowest LOD, but that was an issue also for the previous implementation). This could also help with the geometric complexity. # Solution In the end, we decided to have one view per lod. This limits the complexity of the fragment shader (improve speed), reduces the number of views per tilemap (fix update lag), and reduces the number of instances. This also mean we cannot render directly to the atlas anymore and reverted to the atomic solution. Using the smallest possible viewport, we assure that there isn't that much fragment shader invocations which was one of the bottleneck. And also reduces the amount of geometry instances that pass the clipping test. Pull Request: https://projects.blender.org/blender/blender/pulls/110979
2023-08-17 17:35:19 +02:00
BLI_assert(layers > 0);
return ensure_impl(extent, layers, 0, mip_len, format, usage, data, true, false);
}
/**
* Ensure the texture has the correct properties. Recreating it if needed.
* Return true if a texture has been created.
*/
bool ensure_2d(eGPUTextureFormat format,
int2 extent,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL,
float *data = nullptr,
int mip_len = 1)
{
return ensure_impl(UNPACK2(extent), 0, mip_len, format, usage, data, false, false);
}
/**
* Ensure the texture has the correct properties. Recreating it if needed.
* Return true if a texture has been created.
*/
bool ensure_2d_array(eGPUTextureFormat format,
int2 extent,
int layers,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL,
float *data = nullptr,
int mip_len = 1)
{
EEVEE-Next: Shadow Rendering Refactor Split shadow rendering per LOD per tilemap and improve fragment shader invocation rate by using multi-viewport. Also changes the layout of the atlas to be 4 x 4 x Layers. This allow to grow the atlas while keeping the content and page indirection correct, but this isn't implemented in this patch. # First attempt Shadow rendering using atomic proved to be less than ideal and performance were not quite to an acceptable level. The previous method had issue with atomic contention when a lot of triangle would overlap and too many fragment shader invocations with quite complex indirection rules and biases which made the technique costly. The new implementation leverage multi viewport and layered rendeing to effectively replace the need for atomic and render directly to the shadow atlas. Using the well supported extension these are free on modern hardware and do not need a geometry shader. One view per tile is needed since we use the viewport index and the layer index as a way to index a specific tile in the array. # Geometric Complexity Problem The counterpart of this is that we need to draw one geometry instance per tile which is 32x32 time more instances (at most) than with the previous method. This means that we will have to find a way to mitigate this geometry cost by either reducing the number of tiles per tilemaps (in other words, making the system less memory efficient) or splitting complex objects' geometry into smaller, more cull friendly chunks (for example, like the sculpt PBVH nodes). The later seems to be a longer term solution as it requires way too much engineering time we have right now. # Update Lag Problem This also mean we can only update up to 64 tile per redraw which is not enough even in the most basic cases. This leads to missing or over shadowing when a light updates until there is no updates and the shadow rendering can catch up. One possible solution is to update a lower LODs first waiting until there is no update to render. This would allow no artifact during the transforms (unless there is too many light updates even for lowest LOD, but that was an issue also for the previous implementation). This could also help with the geometric complexity. # Solution In the end, we decided to have one view per lod. This limits the complexity of the fragment shader (improve speed), reduces the number of views per tilemap (fix update lag), and reduces the number of instances. This also mean we cannot render directly to the atlas anymore and reverted to the atomic solution. Using the smallest possible viewport, we assure that there isn't that much fragment shader invocations which was one of the bottleneck. And also reduces the amount of geometry instances that pass the clipping test. Pull Request: https://projects.blender.org/blender/blender/pulls/110979
2023-08-17 17:35:19 +02:00
BLI_assert(layers > 0);
return ensure_impl(UNPACK2(extent), layers, mip_len, format, usage, data, true, false);
}
/**
* Ensure the texture has the correct properties. Recreating it if needed.
* Return true if a texture has been created.
*/
bool ensure_3d(eGPUTextureFormat format,
int3 extent,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL,
float *data = nullptr,
int mip_len = 1)
{
return ensure_impl(UNPACK3(extent), mip_len, format, usage, data, false, false);
}
/**
* Ensure the texture has the correct properties. Recreating it if needed.
* Return true if a texture has been created.
*/
bool ensure_cube(eGPUTextureFormat format,
int extent,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL,
float *data = nullptr,
int mip_len = 1)
{
return ensure_impl(extent, extent, 0, mip_len, format, usage, data, false, true);
}
/**
* Ensure the texture has the correct properties. Recreating it if needed.
* Return true if a texture has been created.
*/
bool ensure_cube_array(eGPUTextureFormat format,
int extent,
int layers,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL,
float *data = nullptr,
int mip_len = 1)
{
return ensure_impl(extent, extent, layers, mip_len, format, usage, data, true, true);
}
/**
* Ensure the availability of mipmap views.
2022-03-23 12:15:50 +11:00
* MIP view covers all layers of array textures.
*/
bool ensure_mip_views(bool cube_as_array = false)
{
int mip_len = GPU_texture_mip_count(tx_);
if (mip_views_.size() != mip_len) {
for (GPUTexture *&view : mip_views_) {
GPU_TEXTURE_FREE_SAFE(view);
}
eGPUTextureFormat format = GPU_texture_format(tx_);
for (auto i : IndexRange(mip_len)) {
mip_views_.append(
GPU_texture_create_view(name_, tx_, format, i, 1, 0, 9999, cube_as_array, false));
}
return true;
}
return false;
}
GPUTexture *mip_view(int miplvl)
{
BLI_assert_msg(miplvl < mip_views_.size(),
"Incorrect mip level requested. "
"Might be missing call to ensure_mip_views().");
return mip_views_[miplvl];
}
int mip_count() const
{
return GPU_texture_mip_count(tx_);
}
/**
* Ensure the availability of layer views.
* Layer views covers all layers of array textures.
* Returns true if the views were (re)created.
*/
bool ensure_layer_views(bool cube_as_array = false)
{
int layer_len = GPU_texture_layer_count(tx_);
if (layer_views_.size() != layer_len) {
for (GPUTexture *&view : layer_views_) {
GPU_TEXTURE_FREE_SAFE(view);
}
eGPUTextureFormat format = GPU_texture_format(tx_);
for (auto i : IndexRange(layer_len)) {
layer_views_.append(
GPU_texture_create_view(name_, tx_, format, 0, 9999, i, 1, cube_as_array, false));
}
return true;
}
return false;
}
GPUTexture *layer_view(int layer)
{
return layer_views_[layer];
}
GPUTexture *stencil_view(bool cube_as_array = false)
{
if (stencil_view_ == nullptr) {
eGPUTextureFormat format = GPU_texture_format(tx_);
stencil_view_ = GPU_texture_create_view(
name_, tx_, format, 0, 9999, 0, 9999, cube_as_array, true);
}
return stencil_view_;
}
/**
* Layer range view cover only the given range.
* This can only called to create one range.
* View is recreated if:
* - The source texture is recreated.
* - The layer_len is different from the last call the this function.
* IMPORTANT: It is not recreated if the layer_start is different from the last call.
* IMPORTANT: If this view is recreated any reference to it should be updated.
*/
GPUTexture *layer_range_view(int layer_start, int layer_len, bool cube_as_array = false)
{
BLI_assert(this->is_valid());
/* Make sure the range is valid as the GPU_texture_layer_count only returns the effective
* (clipped) range and not the requested range. */
BLI_assert_msg((layer_start + layer_len) <= GPU_texture_layer_count(tx_),
"Layer range needs to be valid");
int view_layer_len = (layer_range_view_) ? GPU_texture_layer_count(layer_range_view_) : -1;
if (layer_len != view_layer_len) {
GPU_TEXTURE_FREE_SAFE(layer_range_view_);
eGPUTextureFormat format = GPU_texture_format(tx_);
layer_range_view_ = GPU_texture_create_view(
name_, tx_, format, 0, 9999, layer_start, layer_len, cube_as_array, false);
}
return layer_range_view_;
}
/**
* Returns true if the texture has been allocated or acquired from the pool.
*/
bool is_valid() const
{
return tx_ != nullptr;
}
int width() const
{
return GPU_texture_width(tx_);
}
int height() const
{
return GPU_texture_height(tx_);
}
int depth() const
{
return GPU_texture_depth(tx_);
}
int pixel_count() const
{
return GPU_texture_width(tx_) * GPU_texture_height(tx_);
}
bool is_depth() const
{
return GPU_texture_has_depth_format(tx_);
}
bool is_stencil() const
{
return GPU_texture_has_stencil_format(tx_);
}
bool is_integer() const
{
return GPU_texture_has_integer_format(tx_);
}
bool is_cube() const
{
return GPU_texture_is_cube(tx_);
}
bool is_array() const
{
return GPU_texture_is_array(tx_);
}
int3 size(int miplvl = 0) const
{
int3 size(1);
GPU_texture_get_mipmap_size(tx_, miplvl, size);
return size;
}
/**
* Clear the entirety of the texture using one pixel worth of data.
*/
void clear(float4 values)
{
GPU_texture_clear(tx_, GPU_DATA_FLOAT, &values[0]);
}
/**
* Clear the entirety of the texture using one pixel worth of data.
*/
void clear(uint4 values)
{
GPU_texture_clear(tx_, GPU_DATA_UINT, &values[0]);
}
/**
* Clear the entirety of the texture using one pixel worth of data.
*/
void clear(int4 values)
{
GPU_texture_clear(tx_, GPU_DATA_INT, &values[0]);
}
/**
* Clear the texture to NaN for floats, or a to debug value for integers.
* (For debugging uninitialized data issues)
*/
void debug_clear()
{
if (GPU_texture_has_float_format(this->tx_) || GPU_texture_has_normalized_format(this->tx_)) {
this->clear(float4(NAN_FLT));
}
else if (GPU_texture_has_integer_format(this->tx_)) {
if (GPU_texture_has_signed_format(this->tx_)) {
this->clear(int4(0xF0F0F0F0));
}
else {
this->clear(uint4(0xF0F0F0F0));
}
}
}
/**
* Returns a buffer containing the texture data for the specified miplvl.
* The memory block needs to be manually freed by MEM_freeN().
*/
template<typename T> T *read(eGPUDataFormat format, int miplvl = 0)
{
return reinterpret_cast<T *>(GPU_texture_read(tx_, format, miplvl));
}
void filter_mode(bool do_filter)
{
GPU_texture_filter_mode(tx_, do_filter);
}
/**
* Free the internal texture but not the #draw::Texture itself.
*/
void free()
{
GPU_TEXTURE_FREE_SAFE(tx_);
for (GPUTexture *&view : mip_views_) {
GPU_TEXTURE_FREE_SAFE(view);
}
for (GPUTexture *&view : layer_views_) {
GPU_TEXTURE_FREE_SAFE(view);
}
GPU_TEXTURE_FREE_SAFE(stencil_view_);
GPU_TEXTURE_FREE_SAFE(layer_range_view_);
mip_views_.clear();
layer_views_.clear();
}
/**
* Swap the content of the two textures.
*/
static void swap(Texture &a, Texture &b)
{
std::swap(a.tx_, b.tx_);
std::swap(a.name_, b.name_);
std::swap(a.stencil_view_, b.stencil_view_);
std::swap(a.layer_range_view_, b.layer_range_view_);
std::swap(a.mip_views_, b.mip_views_);
std::swap(a.layer_views_, b.layer_views_);
}
private:
bool ensure_impl(int w,
int h = 0,
int d = 0,
int mip_len = 1,
eGPUTextureFormat format = GPU_RGBA8,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL,
float *data = nullptr,
bool layered = false,
bool cubemap = false)
{
/* TODO(@fclem): In the future, we need to check if mip_count did not change.
* For now it's ok as we always define all MIP level. */
if (tx_) {
int3 size(0);
GPU_texture_get_mipmap_size(tx_, 0, size);
if (size != int3(w, h, d) || GPU_texture_format(tx_) != format ||
GPU_texture_is_cube(tx_) != cubemap || GPU_texture_is_array(tx_) != layered)
{
free();
}
}
if (tx_ == nullptr) {
tx_ = create(w, h, d, mip_len, format, usage, data, layered, cubemap);
if (data == nullptr && (G.debug & G_DEBUG_GPU)) {
debug_clear();
}
return true;
}
return false;
}
GPUTexture *create(int w,
int h,
int d,
int mip_len,
eGPUTextureFormat format,
eGPUTextureUsage usage,
float *data,
bool layered,
bool cubemap)
{
if (h == 0) {
return GPU_texture_create_1d(name_, w, mip_len, format, usage, data);
}
else if (cubemap) {
if (layered) {
return GPU_texture_create_cube_array(name_, w, d, mip_len, format, usage, data);
}
else {
return GPU_texture_create_cube(name_, w, mip_len, format, usage, data);
}
}
else if (d == 0) {
if (layered) {
return GPU_texture_create_1d_array(name_, w, h, mip_len, format, usage, data);
}
else {
return GPU_texture_create_2d(name_, w, h, mip_len, format, usage, data);
}
}
else {
if (layered) {
return GPU_texture_create_2d_array(name_, w, h, d, mip_len, format, usage, data);
}
else {
return GPU_texture_create_3d(name_, w, h, d, mip_len, format, usage, data);
}
}
}
};
class TextureFromPool : public Texture, NonMovable {
public:
TextureFromPool(const char *name = "gpu::Texture") : Texture(name){};
/* Always use `release()` after rendering. */
void acquire(int2 extent,
eGPUTextureFormat format,
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL)
{
BLI_assert(this->tx_ == nullptr);
this->tx_ = DRW_texture_pool_texture_acquire(
DST.vmempool->texture_pool, UNPACK2(extent), format, usage);
if (G.debug & G_DEBUG_GPU) {
debug_clear();
}
}
void release()
{
/* Allows multiple release. */
if (this->tx_ == nullptr) {
return;
}
DRW_texture_pool_texture_release(DST.vmempool->texture_pool, this->tx_);
this->tx_ = nullptr;
}
/**
* Swap the content of the two textures.
* Also change ownership accordingly if needed.
*/
static void swap(TextureFromPool &a, Texture &b)
{
Texture::swap(a, b);
DRW_texture_pool_give_texture_ownership(DST.vmempool->texture_pool, a);
DRW_texture_pool_take_texture_ownership(DST.vmempool->texture_pool, b);
}
static void swap(Texture &a, TextureFromPool &b)
{
swap(b, a);
}
static void swap(TextureFromPool &a, TextureFromPool &b)
{
Texture::swap(a, b);
}
EEVEE-Next: Ray-tracing Denoise Pipeline This is a full rewrite of the raytracing denoise pipeline. It uses the same principle as before but now uses compute shaders for every stages and a tile base approach. More aggressive filtering is needed since we are moving towards having no prefiltered screen radiance buffer. Thus we introduce a temporal denoise and a bilateral denoise stage to the denoising. These are optionnal and can be disabled. Note that this patch does not include any tracing part and only samples the reflection probes. It is focused on denoising only. Tracing will come in another PR. The motivation for this is that having hardware raytracing support means we can't prefilter the radiance in screen space so we have to have better denoising. Also this means we can have better surface appearance with support for other BxDF model than GGX. Also GGX support is improved. Technically, the new denoising fixes some implementation mistake the old pipeline did. It separates all 3 stages (spatial, temporal, bilateral) and use random sampling for all stages hoping to create a noisy enough (but still stable) output so that the TAA soaks the remaining noise. However that's not always the case. Depending on the nature of the scene, the input can be very high frequency and might create lots of flickering. That why another solution needs to be found for the higher roughness material as denoising them becomes expensive and low quality. Pull Request: https://projects.blender.org/blender/blender/pulls/110117
2023-08-03 15:32:06 +02:00
/** WORKAROUND: used when needing a ref to the Texture and not the GPUTexture. */
TextureFromPool *ptr()
{
return this;
}
/** Remove methods that are forbidden with this type of textures. */
bool ensure_1d(int, int, eGPUTextureFormat, eGPUTextureUsage, float *) = delete;
bool ensure_1d_array(int, int, int, eGPUTextureFormat, eGPUTextureUsage, float *) = delete;
bool ensure_2d(int, int, int, eGPUTextureFormat, eGPUTextureUsage, float *) = delete;
bool ensure_2d_array(int, int, int, int, eGPUTextureFormat, eGPUTextureUsage, float *) = delete;
bool ensure_3d(int, int, int, int, eGPUTextureFormat, eGPUTextureUsage, float *) = delete;
bool ensure_cube(int, int, eGPUTextureFormat, eGPUTextureUsage, float *) = delete;
bool ensure_cube_array(int, int, int, eGPUTextureFormat, eGPUTextureUsage, float *) = delete;
void filter_mode(bool) = delete;
void free() = delete;
GPUTexture *mip_view(int) = delete;
GPUTexture *layer_view(int) = delete;
GPUTexture *stencil_view() = delete;
};
class TextureRef : public Texture {
public:
TextureRef() = default;
~TextureRef()
{
this->tx_ = nullptr;
}
void wrap(GPUTexture *tex)
{
this->tx_ = tex;
}
/** Remove methods that are forbidden with this type of textures. */
bool ensure_1d(int, int, eGPUTextureFormat, float *) = delete;
bool ensure_1d_array(int, int, int, eGPUTextureFormat, float *) = delete;
bool ensure_2d(int, int, int, eGPUTextureFormat, float *) = delete;
bool ensure_2d_array(int, int, int, int, eGPUTextureFormat, float *) = delete;
bool ensure_3d(int, int, int, int, eGPUTextureFormat, float *) = delete;
bool ensure_cube(int, int, eGPUTextureFormat, float *) = delete;
bool ensure_cube_array(int, int, int, eGPUTextureFormat, float *) = delete;
void filter_mode(bool) = delete;
void free() = delete;
GPUTexture *mip_view(int) = delete;
GPUTexture *layer_view(int) = delete;
GPUTexture *stencil_view() = delete;
};
DRWManager: New implementation. This is a new implementation of the draw manager using modern rendering practices and GPU driven culling. This only ports features that are not considered deprecated or to be removed. The old DRW API is kept working along side this new one, and does not interfeer with it. However this needed some more hacking inside the draw_view_lib.glsl. At least the create info are well separated. The reviewer might start by looking at `draw_pass_test.cc` to see the API in usage. Important files are `draw_pass.hh`, `draw_command.hh`, `draw_command_shared.hh`. In a nutshell (for a developper used to old DRW API): - `DRWShadingGroups` are replaced by `Pass<T>::Sub`. - Contrary to DRWShadingGroups, all commands recorded inside a pass or sub-pass (even binds / push_constant / uniforms) will be executed in order. - All memory is managed per object (except for Sub-Pass which are managed by their parent pass) and not from draw manager pools. So passes "can" potentially be recorded once and submitted multiple time (but this is not really encouraged for now). The only implicit link is between resource lifetime and `ResourceHandles` - Sub passes can be any level deep. - IMPORTANT: All state propagate from sub pass to subpass. There is no state stack concept anymore. Ensure the correct render state is set before drawing anything using `Pass::state_set()`. - The drawcalls now needs a `ResourceHandle` instead of an `Object *`. This is to remove any implicit dependency between `Pass` and `Manager`. This was a huge problem in old implementation since the manager did not know what to pull from the object. Now it is explicitly requested by the engine. - The pases need to be submitted to a `draw::Manager` instance which can be retrieved using `DRW_manager_get()` (for now). Internally: - All object data are stored in contiguous storage buffers. Removing a lot of complexity in the pass submission. - Draw calls are sorted and visibility tested on GPU. Making more modern culling and better instancing usage possible in the future. - Unit Tests have been added for regression testing and avoid most API breakage. - `draw::View` now contains culling data for all objects in the scene allowing caching for multiple views. - Bounding box and sphere final setup is moved to GPU. - Some global resources locations have been hardcoded to reduce complexity. What is missing: - ~~Workaround for lack of gl_BaseInstanceARB.~~ Done - ~~Object Uniform Attributes.~~ Done (Not in this patch) - Workaround for hardware supporting a maximum of 8 SSBO. Reviewed By: jbakker Differential Revision: https://developer.blender.org/D15817
2022-09-02 18:30:48 +02:00
/**
* Dummy type to bind texture as image.
* It is just a GPUTexture in disguise.
*/
class Image {};
DRWManager: New implementation. This is a new implementation of the draw manager using modern rendering practices and GPU driven culling. This only ports features that are not considered deprecated or to be removed. The old DRW API is kept working along side this new one, and does not interfeer with it. However this needed some more hacking inside the draw_view_lib.glsl. At least the create info are well separated. The reviewer might start by looking at `draw_pass_test.cc` to see the API in usage. Important files are `draw_pass.hh`, `draw_command.hh`, `draw_command_shared.hh`. In a nutshell (for a developper used to old DRW API): - `DRWShadingGroups` are replaced by `Pass<T>::Sub`. - Contrary to DRWShadingGroups, all commands recorded inside a pass or sub-pass (even binds / push_constant / uniforms) will be executed in order. - All memory is managed per object (except for Sub-Pass which are managed by their parent pass) and not from draw manager pools. So passes "can" potentially be recorded once and submitted multiple time (but this is not really encouraged for now). The only implicit link is between resource lifetime and `ResourceHandles` - Sub passes can be any level deep. - IMPORTANT: All state propagate from sub pass to subpass. There is no state stack concept anymore. Ensure the correct render state is set before drawing anything using `Pass::state_set()`. - The drawcalls now needs a `ResourceHandle` instead of an `Object *`. This is to remove any implicit dependency between `Pass` and `Manager`. This was a huge problem in old implementation since the manager did not know what to pull from the object. Now it is explicitly requested by the engine. - The pases need to be submitted to a `draw::Manager` instance which can be retrieved using `DRW_manager_get()` (for now). Internally: - All object data are stored in contiguous storage buffers. Removing a lot of complexity in the pass submission. - Draw calls are sorted and visibility tested on GPU. Making more modern culling and better instancing usage possible in the future. - Unit Tests have been added for regression testing and avoid most API breakage. - `draw::View` now contains culling data for all objects in the scene allowing caching for multiple views. - Bounding box and sphere final setup is moved to GPU. - Some global resources locations have been hardcoded to reduce complexity. What is missing: - ~~Workaround for lack of gl_BaseInstanceARB.~~ Done - ~~Object Uniform Attributes.~~ Done (Not in this patch) - Workaround for hardware supporting a maximum of 8 SSBO. Reviewed By: jbakker Differential Revision: https://developer.blender.org/D15817
2022-09-02 18:30:48 +02:00
static inline Image *as_image(GPUTexture *tex)
{
return reinterpret_cast<Image *>(tex);
}
static inline Image **as_image(GPUTexture **tex)
{
return reinterpret_cast<Image **>(tex);
}
static inline GPUTexture *as_texture(Image *img)
{
return reinterpret_cast<GPUTexture *>(img);
}
static inline GPUTexture **as_texture(Image **img)
{
return reinterpret_cast<GPUTexture **>(img);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Framebuffer
* \{ */
class Framebuffer : NonCopyable {
private:
GPUFrameBuffer *fb_ = nullptr;
const char *name_;
public:
Framebuffer() : name_(""){};
Framebuffer(const char *name) : name_(name){};
~Framebuffer()
{
GPU_FRAMEBUFFER_FREE_SAFE(fb_);
}
void ensure(GPUAttachment depth = GPU_ATTACHMENT_NONE,
GPUAttachment color1 = GPU_ATTACHMENT_NONE,
GPUAttachment color2 = GPU_ATTACHMENT_NONE,
GPUAttachment color3 = GPU_ATTACHMENT_NONE,
GPUAttachment color4 = GPU_ATTACHMENT_NONE,
GPUAttachment color5 = GPU_ATTACHMENT_NONE,
GPUAttachment color6 = GPU_ATTACHMENT_NONE,
GPUAttachment color7 = GPU_ATTACHMENT_NONE,
GPUAttachment color8 = GPU_ATTACHMENT_NONE)
{
if (fb_ == nullptr) {
fb_ = GPU_framebuffer_create(name_);
}
GPUAttachment config[] = {
depth, color1, color2, color3, color4, color5, color6, color7, color8};
GPU_framebuffer_config_array(fb_, config, sizeof(config) / sizeof(GPUAttachment));
}
/**
* Empty frame-buffer configuration.
*/
void ensure(int2 target_size)
{
if (fb_ == nullptr) {
fb_ = GPU_framebuffer_create(name_);
}
GPU_framebuffer_default_size(fb_, UNPACK2(target_size));
}
void bind()
{
GPU_framebuffer_bind(fb_);
}
void clear_depth(float depth)
{
GPU_framebuffer_clear_depth(fb_, depth);
}
Framebuffer &operator=(Framebuffer &&a)
{
if (*this != a) {
this->fb_ = a.fb_;
this->name_ = a.name_;
a.fb_ = nullptr;
}
return *this;
}
operator GPUFrameBuffer *() const
{
return fb_;
}
GPUFrameBuffer **operator&()
{
return &fb_;
}
/**
* Swap the content of the two framebuffer.
*/
static void swap(Framebuffer &a, Framebuffer &b)
{
std::swap(a.fb_, b.fb_);
std::swap(a.name_, b.name_);
}
};
/** \} */
/* -------------------------------------------------------------------- */
/** \name Double & Triple buffering util
*
* This is not strictly related to a GPU type and could be moved elsewhere.
* \{ */
template<typename T, int64_t len> class SwapChain {
private:
BLI_STATIC_ASSERT(len > 1, "A swap-chain needs more than 1 unit in length.");
std::array<T, len> chain_;
public:
void swap()
{
for (auto i : IndexRange(len - 1)) {
auto i_next = (i + 1) % len;
if constexpr (std::is_trivial_v<T>) {
std::swap(chain_[i], chain_[i_next]);
}
else {
T::swap(chain_[i], chain_[i_next]);
}
}
}
constexpr int64_t size()
{
return len;
}
T &current()
{
return chain_[0];
}
T &previous()
{
/* Avoid modulo operation with negative numbers. */
return chain_[(0 + len - 1) % len];
}
T &next()
{
return chain_[(0 + 1) % len];
}
const T &current() const
{
return chain_[0];
}
const T &previous() const
{
/* Avoid modulo operation with negative numbers. */
return chain_[(0 + len - 1) % len];
}
const T &next() const
{
return chain_[(0 + 1) % len];
}
};
/** \} */
} // namespace blender::draw