Metal: Fix failing texture unit tests

Resolves failing unit tests where missing texture read support for 1D/3D textures was required. Also resolves a number of missing format support cases when determining texture component count and byte size for reading/writing. Authored by Apple: Michael Parkin-White Pull Request: https://projects.blender.org/blender/blender/pulls/108751
2023-06-12 08:43:10 +02:00
parent 6504ba980e
commit 99931a5154
4 changed files with 124 additions and 13 deletions
--- a/source/blender/gpu/intern/gpu_texture_private.hh
+++ b/source/blender/gpu/intern/gpu_texture_private.hh
@@ -751,7 +751,8 @@ inline size_t to_bytesize(eGPUTextureFormat tex_format, eGPUDataFormat data_form
   * Standard component len calculation does not apply, as the texture formats contain multiple
   * channels, but associated data format contains several compacted components. */
  if ((tex_format == GPU_R11F_G11F_B10F && data_format == GPU_DATA_10_11_11_REV) ||
-      (tex_format == GPU_RGB10_A2 && data_format == GPU_DATA_2_10_10_10_REV))
+      ((tex_format == GPU_RGB10_A2 || tex_format == GPU_RGB10_A2UI) &&
+       data_format == GPU_DATA_2_10_10_10_REV))
  {
    return 4;
  }
--- a/source/blender/gpu/metal/mtl_texture.hh
+++ b/source/blender/gpu/metal/mtl_texture.hh
@@ -483,6 +483,7 @@ inline std::string tex_data_format_to_msl_type_str(eGPUDataFormat type)
    case GPU_DATA_UINT_24_8:
      return "uint"; /* Problematic type - but will match alignment. */
    case GPU_DATA_10_11_11_REV:
+    case GPU_DATA_2_10_10_10_REV:
      return "float"; /* Problematic type - each component will be read as a float. */
    default:
      BLI_assert(false);
@@ -508,6 +509,7 @@ inline std::string tex_data_format_to_msl_texture_template_type(eGPUDataFormat t
    case GPU_DATA_UINT_24_8:
      return "uint"; /* Problematic type. */
    case GPU_DATA_10_11_11_REV:
+    case GPU_DATA_2_10_10_10_REV:
      return "float"; /* Problematic type. */
    default:
      BLI_assert(false);
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -559,7 +559,8 @@ void gpu::MTLTexture::update_sub(
    }

    /* Safety Checks. */
-    if (type == GPU_DATA_UINT_24_8 || type == GPU_DATA_10_11_11_REV) {
+    if (type == GPU_DATA_UINT_24_8 || type == GPU_DATA_10_11_11_REV ||
+        type == GPU_DATA_2_10_10_10_REV) {
      BLI_assert(can_use_direct_blit &&
                 "Special input data type must be a 1-1 mapping with destination texture as it "
                 "cannot easily be split");
@@ -726,7 +727,7 @@ void gpu::MTLTexture::update_sub(
            MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
            cs.bind_pso(pso);
            cs.bind_compute_bytes(&params, sizeof(params), 0);
-            cs.bind_compute_buffer(staging_buffer, 0, 1);
+            cs.bind_compute_buffer(staging_buffer, 0, 1, true);
            cs.bind_compute_texture(texture_handle, 0);
            [compute_encoder
                      dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */
@@ -746,7 +747,7 @@ void gpu::MTLTexture::update_sub(
            MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
            cs.bind_pso(pso);
            cs.bind_compute_bytes(&params, sizeof(params), 0);
-            cs.bind_compute_buffer(staging_buffer, 0, 1);
+            cs.bind_compute_buffer(staging_buffer, 0, 1, true);
            cs.bind_compute_texture(texture_handle, 0);
            [compute_encoder
                      dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */
@@ -806,7 +807,7 @@ void gpu::MTLTexture::update_sub(
            MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
            cs.bind_pso(pso);
            cs.bind_compute_bytes(&params, sizeof(params), 0);
-            cs.bind_compute_buffer(staging_buffer, 0, 1);
+            cs.bind_compute_buffer(staging_buffer, 0, 1, true);
            cs.bind_compute_texture(texture_handle, 0);
            [compute_encoder
                      dispatchThreads:MTLSizeMake(
@@ -827,7 +828,7 @@ void gpu::MTLTexture::update_sub(
            MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
            cs.bind_pso(pso);
            cs.bind_compute_bytes(&params, sizeof(params), 0);
-            cs.bind_compute_buffer(staging_buffer, 0, 1);
+            cs.bind_compute_buffer(staging_buffer, 0, 1, true);
            cs.bind_compute_texture(texture_handle, 0);
            [compute_encoder dispatchThreads:MTLSizeMake(extent[0],
                                                         extent[1],
@@ -870,7 +871,7 @@ void gpu::MTLTexture::update_sub(
          MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
          cs.bind_pso(pso);
          cs.bind_compute_bytes(&params, sizeof(params), 0);
-          cs.bind_compute_buffer(staging_buffer, 0, 1);
+          cs.bind_compute_buffer(staging_buffer, 0, 1, true);
          cs.bind_compute_texture(texture_handle, 0);
          [compute_encoder
                    dispatchThreads:MTLSizeMake(
@@ -1433,9 +1434,13 @@ void gpu::MTLTexture::read_internal(int mip,
    BLI_assert(validate_data_format(format_, data_format));
  }

-  /* SPECIAL Workaround for R11G11B10 textures requesting a read using: GPU_DATA_10_11_11_REV. */
-  if (desired_output_format == GPU_DATA_10_11_11_REV) {
-    BLI_assert(format_ == GPU_R11F_G11F_B10F);
+  /* SPECIAL Workaround for R11G11B10, GPU_RGB10_A2, GPU_RGB10_A2UI textures requesting a read
+   * using: GPU_DATA_10_11_11_REV. */
+  if (desired_output_format == GPU_DATA_10_11_11_REV ||
+      desired_output_format == GPU_DATA_2_10_10_10_REV)
+  {
+    BLI_assert(format_ == GPU_R11F_G11F_B10F || format_ == GPU_RGB10_A2 ||
+               format_ == GPU_RGB10_A2UI);

    /* override parameters - we'll be able to use simple copy, as bpp will match at 4 bytes. */
    image_bpp = sizeof(int);
@@ -1516,12 +1521,54 @@ void gpu::MTLTexture::read_internal(int mip,

    /* Perform per-texture type read. */
    switch (type_) {
+      case GPU_TEXTURE_1D: {
+        if (can_use_simple_read) {
+          /* Use Blit Encoder READ. */
+          id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
+          if (G.debug & G_DEBUG_GPU) {
+            [enc insertDebugSignpost:@"GPUTextureRead1D"];
+          }
+          [enc copyFromTexture:read_texture
+                           sourceSlice:0
+                           sourceLevel:mip
+                          sourceOrigin:MTLOriginMake(x_off, 0, 0)
+                            sourceSize:MTLSizeMake(width, 1, 1)
+                              toBuffer:destination_buffer
+                     destinationOffset:0
+                destinationBytesPerRow:bytes_per_row
+              destinationBytesPerImage:bytes_per_image];
+          copy_successful = true;
+        }
+        else {
+
+          /* Use Compute READ. */
+          id<MTLComputeCommandEncoder> compute_encoder =
+              ctx->main_command_buffer.ensure_begin_compute_encoder();
+          id<MTLComputePipelineState> pso = texture_read_1d_get_kernel(
+              compute_specialization_kernel);
+          TextureReadParams params = {
+              mip,
+              {width, 1, 1},
+              {x_off, 0, 0},
+          };
+
+          /* Bind resources via compute state for optimal state caching performance. */
+          MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
+          cs.bind_pso(pso);
+          cs.bind_compute_bytes(&params, sizeof(params), 0);
+          cs.bind_compute_buffer(destination_buffer, 0, 1, true);
+          cs.bind_compute_texture(read_texture, 0);
+          [compute_encoder dispatchThreads:MTLSizeMake(width, 1, 1) /* Width, Height, Layer */
+                     threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
+          copy_successful = true;
+        }
+      } break;
      case GPU_TEXTURE_2D: {
        if (can_use_simple_read) {
          /* Use Blit Encoder READ. */
          id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
          if (G.debug & G_DEBUG_GPU) {
-            [enc insertDebugSignpost:@"GPUTextureRead"];
+            [enc insertDebugSignpost:@"GPUTextureRead2D"];
          }
          [enc copyFromTexture:read_texture
                           sourceSlice:0
@@ -1564,7 +1611,7 @@ void gpu::MTLTexture::read_internal(int mip,
          /* Use Blit Encoder READ. */
          id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
          if (G.debug & G_DEBUG_GPU) {
-            [enc insertDebugSignpost:@"GPUTextureRead"];
+            [enc insertDebugSignpost:@"GPUTextureRead2DArray"];
          }
          int base_slice = z_off;
          int final_slice = base_slice + depth;
@@ -1610,11 +1657,55 @@ void gpu::MTLTexture::read_internal(int mip,
        }
      } break;

+      case GPU_TEXTURE_3D: {
+        if (can_use_simple_read) {
+          /* Use Blit Encoder READ. */
+          id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
+          if (G.debug & G_DEBUG_GPU) {
+            [enc insertDebugSignpost:@"GPUTextureRead3D"];
+          }
+          [enc copyFromTexture:read_texture
+                           sourceSlice:0
+                           sourceLevel:mip
+                          sourceOrigin:MTLOriginMake(x_off, y_off, z_off)
+                            sourceSize:MTLSizeMake(width, height, depth)
+                              toBuffer:destination_buffer
+                     destinationOffset:0
+                destinationBytesPerRow:bytes_per_row
+              destinationBytesPerImage:bytes_per_image];
+          copy_successful = true;
+        }
+        else {
+
+          /* Use Compute READ. */
+          id<MTLComputeCommandEncoder> compute_encoder =
+              ctx->main_command_buffer.ensure_begin_compute_encoder();
+          id<MTLComputePipelineState> pso = texture_read_3d_get_kernel(
+              compute_specialization_kernel);
+          TextureReadParams params = {
+              mip,
+              {width, height, depth},
+              {x_off, y_off, z_off},
+          };
+
+          /* Bind resources via compute state for optimal state caching performance. */
+          MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
+          cs.bind_pso(pso);
+          cs.bind_compute_bytes(&params, sizeof(params), 0);
+          cs.bind_compute_buffer(destination_buffer, 0, 1, true);
+          cs.bind_compute_texture(read_texture, 0);
+          [compute_encoder
+                    dispatchThreads:MTLSizeMake(width, height, depth) /* Width, Height, Layer */
+              threadsPerThreadgroup:MTLSizeMake(4, 4, 4)];
+          copy_successful = true;
+        }
+      } break;
+
      case GPU_TEXTURE_CUBE_ARRAY: {
        if (can_use_simple_read) {
          id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
          if (G.debug & G_DEBUG_GPU) {
-            [enc insertDebugSignpost:@"GPUTextureRead"];
+            [enc insertDebugSignpost:@"GPUTextureReadCubeArray"];
          }
          int base_slice = z_off;
          int final_slice = base_slice + depth;
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -191,6 +191,9 @@ size_t get_mtl_format_bytesize(MTLPixelFormat tex_format)
    case MTLPixelFormatRGBA8Uint:
    case MTLPixelFormatRGBA8Sint:
    case MTLPixelFormatRGBA8Unorm:
+    case MTLPixelFormatRGBA8Snorm:
+    case MTLPixelFormatRGB10A2Uint:
+    case MTLPixelFormatRGB10A2Unorm:
      return 4;
    case MTLPixelFormatRGBA32Uint:
    case MTLPixelFormatRGBA32Sint:
@@ -200,10 +203,13 @@ size_t get_mtl_format_bytesize(MTLPixelFormat tex_format)
    case MTLPixelFormatRGBA16Sint:
    case MTLPixelFormatRGBA16Float:
    case MTLPixelFormatRGBA16Unorm:
+    case MTLPixelFormatRGBA16Snorm:
      return 8;
    case MTLPixelFormatRG8Uint:
    case MTLPixelFormatRG8Sint:
    case MTLPixelFormatRG8Unorm:
+    case MTLPixelFormatRG8Snorm:
+    case MTLPixelFormatRG8Unorm_sRGB:
      return 2;
    case MTLPixelFormatRG32Uint:
    case MTLPixelFormatRG32Sint:
@@ -212,6 +218,8 @@ size_t get_mtl_format_bytesize(MTLPixelFormat tex_format)
    case MTLPixelFormatRG16Uint:
    case MTLPixelFormatRG16Sint:
    case MTLPixelFormatRG16Float:
+    case MTLPixelFormatRG16Unorm:
+    case MTLPixelFormatRG16Snorm:
      return 4;
    case MTLPixelFormatR8Uint:
    case MTLPixelFormatR8Sint:
@@ -225,6 +233,7 @@ size_t get_mtl_format_bytesize(MTLPixelFormat tex_format)
    case MTLPixelFormatR16Sint:
    case MTLPixelFormatR16Float:
    case MTLPixelFormatR16Snorm:
+    case MTLPixelFormatR16Unorm:
      return 2;
    case MTLPixelFormatRG11B10Float:
      return 4;
@@ -249,6 +258,7 @@ int get_mtl_format_num_components(MTLPixelFormat tex_format)
    case MTLPixelFormatRGBA8Uint:
    case MTLPixelFormatRGBA8Sint:
    case MTLPixelFormatRGBA8Unorm:
+    case MTLPixelFormatRGBA8Snorm:
    case MTLPixelFormatRGBA32Uint:
    case MTLPixelFormatRGBA32Sint:
    case MTLPixelFormatRGBA32Float:
@@ -256,7 +266,10 @@ int get_mtl_format_num_components(MTLPixelFormat tex_format)
    case MTLPixelFormatRGBA16Sint:
    case MTLPixelFormatRGBA16Float:
    case MTLPixelFormatRGBA16Unorm:
+    case MTLPixelFormatRGBA16Snorm:
    case MTLPixelFormatRGBA8Unorm_sRGB:
+    case MTLPixelFormatRGB10A2Uint:
+    case MTLPixelFormatRGB10A2Unorm:
      return 4;

    case MTLPixelFormatRG11B10Float:
@@ -272,17 +285,21 @@ int get_mtl_format_num_components(MTLPixelFormat tex_format)
    case MTLPixelFormatRG16Sint:
    case MTLPixelFormatRG16Float:
    case MTLPixelFormatDepth32Float_Stencil8:
+    case MTLPixelFormatRG16Snorm:
+    case MTLPixelFormatRG16Unorm:
      return 2;

    case MTLPixelFormatR8Uint:
    case MTLPixelFormatR8Sint:
    case MTLPixelFormatR8Unorm:
+    case MTLPixelFormatR8Snorm:
    case MTLPixelFormatR32Uint:
    case MTLPixelFormatR32Sint:
    case MTLPixelFormatR32Float:
    case MTLPixelFormatR16Uint:
    case MTLPixelFormatR16Sint:
    case MTLPixelFormatR16Float:
+    case MTLPixelFormatR16Unorm:
    case MTLPixelFormatR16Snorm:
    case MTLPixelFormatDepth32Float:
    case MTLPixelFormatDepth16Unorm: