Files
test/source/blender/gpu/vulkan/vk_data_conversion.hh
Jeroen Bakker ad93324580 Vulkan: Add support for Framebuffer Depth Blit
This PR adds support for `GPU_framebuffer_blit` When used with `GPU_DEPTH_BIT`.
The challenge with is that not all GPUs support using a depth texture as a blit destination.

An AMD GPU doesn't support a depth buffer with stencil buffer as blit destination.
![image](/attachments/130e7e38-2dbc-4653-950c-2234bd5bad52)
> NOTE: AMD GPUs don't support 24 bit unsigned normalized depth textures at all.

In all cases when we use depth blitting we are blitting the whole texture
and in stead we can use a texture copy.

A negative effect is that we need to unbind the framebuffer when copying
depth textures, but a positive effect is that we can use a data transfer function what should
theoretically be faster.

This should be revisited when we are investigating in areas to improve the
performance of the Vulkan backend.

Pull Request: https://projects.blender.org/blender/blender/pulls/112674
2023-09-28 10:00:24 +02:00

247 lines
8.5 KiB
C++

/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#pragma once
#include "BLI_math_vector_types.hh"
#include "gpu_texture_private.hh"
namespace blender::gpu {
/**
* Convert host buffer to device buffer.
*
* \param dst_buffer: device buffer.
* \param src_buffer: host buffer.
* \param buffer_size: number of pixels to convert from the start of the given buffer.
* \param host_format: format of the host buffer.
* \param device_format: format of the device buffer.
*
* \note Will assert when the host_format/device_format combination isn't valid
* (#validate_data_format) or supported. Some combinations aren't supported in Vulkan due to
* platform incompatibility.
*/
void convert_host_to_device(void *dst_buffer,
const void *src_buffer,
size_t buffer_size,
eGPUDataFormat host_format,
eGPUTextureFormat device_format);
/**
* Convert device buffer to host buffer.
*
* \param dst_buffer: host buffer
* \param src_buffer: device buffer.
* \param buffer_size: number of pixels to convert from the start of the given buffer.
* \param host_format: format of the host buffer
* \param device_format: format of the device buffer.
*
* \note Will assert when the host_format/device_format combination isn't valid
* (#validate_data_format) or supported. Some combinations aren't supported in Vulkan due to
* platform incompatibility.
*/
void convert_device_to_host(void *dst_buffer,
const void *src_buffer,
size_t buffer_size,
eGPUDataFormat host_format,
eGPUTextureFormat device_format);
/**
* Are all attributes of the given vertex format natively supported or does conversion needs to
* happen.
*
* \param vertex_format: the vertex format to check if an associated buffer requires conversion
* being done on the host.
*/
bool conversion_needed(const GPUVertFormat &vertex_format);
/**
* Convert the given `data` to contain Vulkan natively supported data formats.
*
* When for an vertex attribute the fetch mode is set to GPU_FETCH_INT_TO_FLOAT and the attribute
* is an int32_t or uint32_t the conversion will be done. Attributes of 16 or 8 bits are supported
* natively and will be done in Vulkan.
*
* \param data: Buffer to convert. Data will be converted in place.
* \param vertex_format: Vertex format of the given data. Attributes that aren't supported will be
* converted to a supported one.
* \param vertex_len: Number of vertices of the given data buffer;
* The number of vertices to convert.
*/
void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint vertex_len);
/* -------------------------------------------------------------------- */
/** \name Floating point conversions
* \{ */
/**
* Description of a IEEE 754-1985 floating point data type.
*/
template<bool HasSignBit, uint8_t MantissaBitLen, uint8_t ExponentBitLen>
class FloatingPointFormat {
public:
static constexpr bool HAS_SIGN = HasSignBit;
static constexpr uint8_t SIGN_SHIFT = MantissaBitLen + ExponentBitLen;
static constexpr uint32_t SIGN_MASK = HasSignBit ? 1 : 0;
static constexpr uint8_t MANTISSA_LEN = MantissaBitLen;
static constexpr uint8_t MANTISSA_SHIFT = 0;
static constexpr uint32_t MANTISSA_MASK = (1 << MantissaBitLen) - 1;
static constexpr uint32_t MANTISSA_NAN_MASK = MANTISSA_MASK;
static constexpr uint8_t EXPONENT_SHIFT = MantissaBitLen;
static constexpr uint8_t EXPONENT_LEN = ExponentBitLen;
static constexpr uint32_t EXPONENT_MASK = (1 << ExponentBitLen) - 1;
static constexpr int32_t EXPONENT_BIAS = (1 << (ExponentBitLen - 1)) - 1;
static constexpr int32_t EXPONENT_SPECIAL_MASK = EXPONENT_MASK;
static uint32_t get_mantissa(uint32_t floating_point_number)
{
return (floating_point_number >> MANTISSA_SHIFT) & MANTISSA_MASK;
}
static uint32_t clear_mantissa(uint32_t floating_point_number)
{
return floating_point_number & ~(MANTISSA_MASK << MANTISSA_SHIFT);
}
static uint32_t set_mantissa(uint32_t mantissa, uint32_t floating_point_number)
{
uint32_t result = clear_mantissa(floating_point_number);
result |= mantissa << MANTISSA_SHIFT;
return result;
}
static uint32_t get_exponent(uint32_t floating_point_number)
{
return ((floating_point_number >> EXPONENT_SHIFT) & EXPONENT_MASK);
}
static uint32_t clear_exponent(uint32_t floating_point_number)
{
return floating_point_number & ~(EXPONENT_MASK << EXPONENT_SHIFT);
}
static uint32_t set_exponent(uint32_t exponent, uint32_t floating_point_number)
{
uint32_t result = clear_exponent(floating_point_number);
result |= (exponent) << EXPONENT_SHIFT;
return result;
}
static bool is_signed(uint32_t floating_point_number)
{
if constexpr (HasSignBit) {
return (floating_point_number >> SIGN_SHIFT) & SIGN_MASK;
}
return false;
}
static uint32_t clear_sign(uint32_t floating_point_number)
{
return floating_point_number & ~(1 << SIGN_SHIFT);
}
static uint32_t set_sign(bool sign, uint32_t floating_point_number)
{
if constexpr (!HasSignBit) {
return floating_point_number;
}
uint32_t result = clear_sign(floating_point_number);
result |= uint32_t(sign) << SIGN_SHIFT;
return result;
}
};
using FormatF32 = FloatingPointFormat<true, 23, 8>;
using FormatF16 = FloatingPointFormat<true, 10, 5>;
using FormatF11 = FloatingPointFormat<false, 6, 5>;
using FormatF10 = FloatingPointFormat<false, 5, 5>;
/**
* Convert between low precision floating (including 32 bit floats).
*
* The input and output values are bits (uint32_t) as this function does a bit-wise operations to
* convert between the formats. Additional conversion rules can be applied to the conversion
* function. Due to the implementation the compiler would make an optimized version depending on
* the actual possibilities.
*/
template<
/**
* FloatingPointFormat of the value that is converted to.
*/
typename DestinationFormat,
/**
* FloatingPointFormat of the value that is converted from.
*/
typename SourceFormat,
/**
* Should negative values be clamped to zero when DestinationFormat doesn't contain a sign
* bit. Also -Inf will be clamped to zero.
*
* When set to `false` and DestinationFormat doesn't contain a sign bit the value will be
* made absolute.
*/
bool ClampNegativeToZero = true>
uint32_t convert_float_formats(uint32_t value)
{
bool is_signed = SourceFormat::is_signed(value);
uint32_t mantissa = SourceFormat::get_mantissa(value);
int32_t exponent = SourceFormat::get_exponent(value);
const bool is_nan = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && mantissa;
const bool is_inf = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && (mantissa == 0);
const bool is_zero = (exponent == 0 && mantissa == 0);
/* Sign conversion */
if constexpr (!DestinationFormat::HAS_SIGN && ClampNegativeToZero) {
if (is_signed && !is_nan) {
return 0;
}
}
if (is_zero) {
return 0;
}
if (is_inf) {
exponent = DestinationFormat::EXPONENT_SPECIAL_MASK;
}
else if (is_nan) {
exponent = DestinationFormat::EXPONENT_SPECIAL_MASK;
mantissa = DestinationFormat::MANTISSA_NAN_MASK;
}
else {
/* Exponent conversion */
exponent -= SourceFormat::EXPONENT_BIAS;
/* Clamping when destination has lower precision. */
if constexpr (SourceFormat::EXPONENT_LEN > DestinationFormat::EXPONENT_LEN) {
if (exponent > DestinationFormat::EXPONENT_BIAS) {
exponent = 0;
mantissa = SourceFormat::MANTISSA_MASK;
}
else if (exponent < -DestinationFormat::EXPONENT_BIAS) {
return 0;
}
}
exponent += DestinationFormat::EXPONENT_BIAS;
/* Mantissa conversion */
if constexpr (SourceFormat::MANTISSA_LEN > DestinationFormat::MANTISSA_LEN) {
mantissa = mantissa >> (SourceFormat::MANTISSA_LEN - DestinationFormat::MANTISSA_LEN);
}
else if constexpr (SourceFormat::MANTISSA_LEN < DestinationFormat::MANTISSA_LEN) {
mantissa = mantissa << (DestinationFormat::MANTISSA_LEN - SourceFormat::MANTISSA_LEN);
}
}
uint32_t result = 0;
result = DestinationFormat::set_sign(is_signed, result);
result = DestinationFormat::set_exponent(exponent, result);
result = DestinationFormat::set_mantissa(mantissa, result);
return result;
}
/* \} */
}; // namespace blender::gpu