Vulkan: Convert 3 Component Texture Formats

3 component texture formats are often not supported by vulkan devices. Sometimes the support is less than 5%. The 4 components variants have more than 90% support. This PR builds on top of the existing vulkan data conversion to add the ability to convert between RGB16F<=>RGBA16F and RGB32F<=>RGBA32F texture formats. This allows using color management other then Standard/sRGB. Most places the 3 component texture formats are phased out, but OCIO, external render engines and real time compositor uses them. Pull Request: https://projects.blender.org/blender/blender/pulls/114708
2023-11-10 15:19:24 +01:00
parent 5052e0d407
commit 2a24e29241
4 changed files with 187 additions and 7 deletions
--- a/source/blender/gpu/vulkan/vk_data_conversion.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion.cc
@@ -63,14 +63,31 @@ enum class ConversionType {
  FLOAT_TO_B10F_G11F_R11F,
  B10F_G11F_R11F_TO_FLOAT,

+  FLOAT3_TO_HALF4,
+  HALF4_TO_FLOAT3,
+
+  FLOAT3_TO_FLOAT4,
+  FLOAT4_TO_FLOAT3,
+
  /**
   * The requested conversion isn't supported.
   */
  UNSUPPORTED,
 };

-static ConversionType type_of_conversion_float(eGPUTextureFormat device_format)
+static ConversionType type_of_conversion_float(const eGPUTextureFormat host_format,
+                                               const eGPUTextureFormat device_format)
 {
+  if (host_format != device_format) {
+    if (host_format == GPU_RGB16F && device_format == GPU_RGBA16F) {
+      return ConversionType::FLOAT3_TO_HALF4;
+    }
+    if (host_format == GPU_RGB32F && device_format == GPU_RGBA32F) {
+      return ConversionType::FLOAT3_TO_FLOAT4;
+    }
+    return ConversionType::UNSUPPORTED;
+  }
+
  switch (device_format) {
    case GPU_RGBA32F:
    case GPU_RG32F:
@@ -486,13 +503,15 @@ static ConversionType type_of_conversion_r10g10b10a2(eGPUTextureFormat device_fo
  return ConversionType::UNSUPPORTED;
 }

-static ConversionType host_to_device(eGPUDataFormat host_format, eGPUTextureFormat device_format)
+static ConversionType host_to_device(const eGPUDataFormat host_format,
+                                     const eGPUTextureFormat host_texture_format,
+                                     const eGPUTextureFormat device_format)
 {
  BLI_assert(validate_data_format(device_format, host_format));

  switch (host_format) {
    case GPU_DATA_FLOAT:
-      return type_of_conversion_float(device_format);
+      return type_of_conversion_float(host_texture_format, device_format);
    case GPU_DATA_UINT:
      return type_of_conversion_uint(device_format);
    case GPU_DATA_INT:
@@ -540,6 +559,8 @@ static ConversionType reversed(ConversionType type)
      CASE_PAIR(FLOAT, SRGBA8)
      CASE_PAIR(FLOAT, DEPTH_COMPONENT24)
      CASE_PAIR(FLOAT, B10F_G11F_R11F)
+      CASE_PAIR(FLOAT3, HALF4)
+      CASE_PAIR(FLOAT3, FLOAT4)

    case ConversionType::UNSUPPORTED:
      return ConversionType::UNSUPPORTED;
@@ -599,6 +620,42 @@ using FLOAT4 = PixelValue<ColorSceneLinear4f<eAlpha::Premultiplied>>;
 class B10F_G11G_R11F : public PixelValue<uint32_t> {
 };

+class HALF4 : public PixelValue<uint64_t> {
+ public:
+  uint32_t get_r() const
+  {
+    return value & 0xffff;
+  }
+
+  void set_r(uint64_t new_value)
+  {
+    value = (value & 0xffffffffffff0000) | (new_value & 0xffff);
+  }
+  uint64_t get_g() const
+  {
+    return (value >> 16) & 0xffff;
+  }
+
+  void set_g(uint64_t new_value)
+  {
+    value = (value & 0xffffffff0000ffff) | ((new_value & 0xffff) << 16);
+  }
+  uint64_t get_b() const
+  {
+    return (value >> 32) & 0xffff;
+  }
+
+  void set_b(uint64_t new_value)
+  {
+    value = (value & 0xffff0000ffffffff) | ((new_value & 0xffff) << 32);
+  }
+
+  void set_a(uint64_t new_value)
+  {
+    value = (value & 0xffffffffffff) | ((new_value & 0xffff) << 48);
+  }
+};
+
 class DepthComponent24 : public ComponentValue<uint32_t> {
 public:
  operator uint32_t() const
@@ -738,6 +795,36 @@ static void convert(FLOAT4 &dst, const SRGBA8 &src)
  dst.value = src.value.decode();
 }

+static void convert(FLOAT3 &dst, const HALF4 &src)
+{
+  dst.value.x = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_r()));
+  dst.value.y = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_g()));
+  dst.value.z = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_b()));
+}
+
+static void convert(HALF4 &dst, const FLOAT3 &src)
+{
+  dst.set_r(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.x)));
+  dst.set_g(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.y)));
+  dst.set_b(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.z)));
+  dst.set_a(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(1.0f)));
+}
+
+static void convert(FLOAT3 &dst, const FLOAT4 &src)
+{
+  dst.value.x = src.value.r;
+  dst.value.y = src.value.g;
+  dst.value.z = src.value.b;
+}
+
+static void convert(FLOAT4 &dst, const FLOAT3 &src)
+{
+  dst.value.r = src.value.x;
+  dst.value.g = src.value.y;
+  dst.value.b = src.value.z;
+  dst.value.a = 1.0f;
+}
+
 constexpr uint32_t MASK_10_BITS = 0b1111111111;
 constexpr uint32_t MASK_11_BITS = 0b11111111111;
 constexpr uint8_t SHIFT_B = 22;
@@ -918,6 +1005,20 @@ static void convert_buffer(void *dst_memory,
    case ConversionType::B10F_G11F_R11F_TO_FLOAT:
      convert_per_pixel<FLOAT3, B10F_G11G_R11F>(dst_memory, src_memory, buffer_size);
      break;
+
+    case ConversionType::FLOAT3_TO_HALF4:
+      convert_per_pixel<HALF4, FLOAT3>(dst_memory, src_memory, buffer_size);
+      break;
+    case ConversionType::HALF4_TO_FLOAT3:
+      convert_per_pixel<FLOAT3, HALF4>(dst_memory, src_memory, buffer_size);
+      break;
+
+    case ConversionType::FLOAT3_TO_FLOAT4:
+      convert_per_pixel<FLOAT4, FLOAT3>(dst_memory, src_memory, buffer_size);
+      break;
+    case ConversionType::FLOAT4_TO_FLOAT3:
+      convert_per_pixel<FLOAT3, FLOAT4>(dst_memory, src_memory, buffer_size);
+      break;
  }
 }

@@ -929,9 +1030,10 @@ void convert_host_to_device(void *dst_buffer,
                            const void *src_buffer,
                            size_t buffer_size,
                            eGPUDataFormat host_format,
+                            eGPUTextureFormat host_texture_format,
                            eGPUTextureFormat device_format)
 {
-  ConversionType conversion_type = host_to_device(host_format, device_format);
+  ConversionType conversion_type = host_to_device(host_format, host_texture_format, device_format);
  BLI_assert(conversion_type != ConversionType::UNSUPPORTED);
  convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
 }
@@ -940,9 +1042,11 @@ void convert_device_to_host(void *dst_buffer,
                            const void *src_buffer,
                            size_t buffer_size,
                            eGPUDataFormat host_format,
+                            eGPUTextureFormat host_texture_format,
                            eGPUTextureFormat device_format)
 {
-  ConversionType conversion_type = reversed(host_to_device(host_format, device_format));
+  ConversionType conversion_type = reversed(
+      host_to_device(host_format, host_texture_format, device_format));
  BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED,
                 "Data conversion between host_format and device_format isn't supported (yet).");
  convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
--- a/source/blender/gpu/vulkan/vk_data_conversion.hh
+++ b/source/blender/gpu/vulkan/vk_data_conversion.hh
@@ -24,6 +24,7 @@ struct VKWorkarounds;
 * \param src_buffer: host buffer.
 * \param buffer_size: number of pixels to convert from the start of the given buffer.
 * \param host_format: format of the host buffer.
+ * \param host_texture_format: texture format of the host buffer.
 * \param device_format: format of the device buffer.
 *
 * \note Will assert when the host_format/device_format combination isn't valid
@@ -34,6 +35,7 @@ void convert_host_to_device(void *dst_buffer,
                            const void *src_buffer,
                            size_t buffer_size,
                            eGPUDataFormat host_format,
+                            eGPUTextureFormat host_texture_format,
                            eGPUTextureFormat device_format);

 /**
@@ -43,6 +45,7 @@ void convert_host_to_device(void *dst_buffer,
 * \param src_buffer: device buffer.
 * \param buffer_size: number of pixels to convert from the start of the given buffer.
 * \param host_format: format of the host buffer
+ * \param host_texture_format: texture format of the host buffer.
 * \param device_format: format of the device buffer.
 *
 * \note Will assert when the host_format/device_format combination isn't valid
@@ -53,6 +56,7 @@ void convert_device_to_host(void *dst_buffer,
                            const void *src_buffer,
                            size_t buffer_size,
                            eGPUDataFormat host_format,
+                            eGPUTextureFormat host_texture_format,
                            eGPUTextureFormat device_format);

 /**
--- a/source/blender/gpu/vulkan/vk_data_conversion_test.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion_test.cc
@@ -254,4 +254,68 @@ TEST(VulkanDataConversion, vertex_format_multiple_attributes)
  }
 }

+TEST(VulkanDataConversion, texture_rgb16f_as_floats_to_rgba16f)
+{
+  const size_t num_pixels = 4;
+  float input[] = {
+      1.0,
+      0.5,
+      0.2,
+
+      0.2,
+      1.0,
+      0.3,
+
+      0.4,
+      0.2,
+      1.0,
+
+      1.0,
+      1.0,
+      1.0,
+  };
+
+  uint64_t device[num_pixels];
+  convert_host_to_device(device, input, num_pixels, GPU_DATA_FLOAT, GPU_RGB16F, GPU_RGBA16F);
+
+  float read_back[num_pixels * 3];
+  convert_device_to_host(read_back, device, num_pixels, GPU_DATA_FLOAT, GPU_RGB16F, GPU_RGBA16F);
+
+  for (int i : IndexRange(num_pixels * 3)) {
+    EXPECT_NEAR(input[i], read_back[i], 0.01);
+  }
+}
+
+TEST(VulkanDataConversion, texture_rgb32f_as_floats_to_rgba32f)
+{
+  const size_t num_pixels = 4;
+  float input[] = {
+      1.0,
+      0.5,
+      0.2,
+
+      0.2,
+      1.0,
+      0.3,
+
+      0.4,
+      0.2,
+      1.0,
+
+      1.0,
+      1.0,
+      1.0,
+  };
+
+  float device[num_pixels * 4];
+  convert_host_to_device(device, input, num_pixels, GPU_DATA_FLOAT, GPU_RGB32F, GPU_RGBA32F);
+
+  float read_back[num_pixels * 3];
+  convert_device_to_host(read_back, device, num_pixels, GPU_DATA_FLOAT, GPU_RGB32F, GPU_RGBA32F);
+
+  for (int i : IndexRange(num_pixels * 3)) {
+    EXPECT_NEAR(input[i], read_back[i], 0.01);
+  }
+}
+
 }  // namespace blender::gpu::tests
--- a/source/blender/gpu/vulkan/vk_texture.cc
+++ b/source/blender/gpu/vulkan/vk_texture.cc
@@ -244,7 +244,7 @@ void VKTexture::read_sub(
  context.flush();

  convert_device_to_host(
-      r_data, staging_buffer.mapped_memory_get(), sample_len, format, device_format_);
+      r_data, staging_buffer.mapped_memory_get(), sample_len, format, format_, device_format_);
 }

 void *VKTexture::read(int mip, eGPUDataFormat format)
@@ -284,7 +284,7 @@ void VKTexture::update_sub(
  VKBuffer staging_buffer;
  staging_buffer.create(device_memory_size, GPU_USAGE_DYNAMIC, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
  convert_host_to_device(
-      staging_buffer.mapped_memory_get(), data, sample_len, format, device_format_);
+      staging_buffer.mapped_memory_get(), data, sample_len, format, format_, device_format_);

  VkBufferImageCopy region = {};
  region.imageExtent.width = extent.x;
@@ -330,6 +330,14 @@ bool VKTexture::init_internal()
  if (device_format_ == GPU_DEPTH24_STENCIL8 && workarounds.not_aligned_pixel_formats) {
    device_format_ = GPU_DEPTH32F_STENCIL8;
  }
+  /* R16G16F16 formats are typically not supported (<1%) but R16G16B16A16 is
+   * typically supported (+90%). */
+  if (device_format_ == GPU_RGB16F) {
+    device_format_ = GPU_RGBA16F;
+  }
+  if (device_format_ == GPU_RGB32F) {
+    device_format_ = GPU_RGBA32F;
+  }

  if (!allocate()) {
    return false;